Cody: Add a cache for inline completions (#51046)

This adds a simple LRU cache for completions with the main goal to further reduce the number of requests and reduce annoying churn in the scenario where a user receives a completion and types the exact same characters. Previously, the new characters would cause a new completion request which could yield different result. Here's an example of how this cache works: - Imagine the current file prefix looks like this with the cursor at the end: ```ts const files = ["a", "b"]; for(let i = 0; ``` - We receive the following completion: - ` i < files.length; i++) {` - We now generate create different versions (up until the next `\n`) of the input prefix by concatenating characters from the completion (Only using the last line here to visualize): 1. `for(let i = 0;` 2. `for(let i = 0; ` 3. `for(let i = 0; i` 4. `for(let i = 0; i ` 5. `for(let i = 0; i <` 6. `for(let i = 0; i < ` 7. `for(let i = 0; i < f` 8. `for(let i = 0; i < fi` 9. `for(let i = 0; i < fil` 10. `for(let i = 0; i < file` 11. ... ## Additional thoughts - I have't added a cache to multiline providers, since these are triggered less often than inline suggestions anyways. - The LRU cache is limited to 500 file prefixes, regardless of how large these are. We might want to tweak this later. It also currently retain the `prompt` as part of the `Completion` interface which may not be necessary. - I've re-enabled the request that forces adds a `\n` to the prefix. These can now be reused if you type enter and will result in a faster suggestion for the next line. ## Test plan I've added a `console.log` when a cache hit is encountered to visualize it while playing around with it: https://user-images.githubusercontent.com/458591/234050774-2215a146-904d-47ae-b82e-c90ef131fe3e.mov
2026-02-06 17:31:43 +00:00 · 2023-04-25 10:46:08 +02:00 · 2023-04-25 10:46:08 +02:00 · f5b0668415
commit f5b0668415
parent 8fb0eea3c9
4 changed files with 104 additions and 20 deletions
--- a/client/cody/package.json
+++ b/client/cody/package.json
@ -330,8 +330,9 @@
  "dependencies": {
    "@anthropic-ai/sdk": "^0.4.2",
    "@sourcegraph/cody-shared": "workspace:*",
-    "openai": "^3.2.1",
    "@sourcegraph/cody-ui": "workspace:*",
+    "lru-cache": "^9.1.1",
+    "openai": "^3.2.1",
    "wink-eng-lite-web-model": "^1.5.0",
    "wink-nlp": "^1.13.1",
    "wink-nlp-utils": "^2.1.0"
--- a/client/cody/src/completions/cache.ts
+++ b/client/cody/src/completions/cache.ts
@ -0,0 +1,60 @@
+import { LRUCache } from 'lru-cache'
+
+import { Completion } from '.'
+
+export class CompletionsCache {
+    private cache = new LRUCache<string, Completion[]>({
+        max: 500, // Maximum input prefixes in the cache.
+    })
+
+    // TODO: The caching strategy only takes the file content prefix into
+    // account. We need to add additional information like file path or suffix
+    // to make sure the cache does not return undesired results for other files
+    // in the same project.
+    public get(prefix: string): Completion[] | undefined {
+        const results = this.cache.get(prefix)
+        if (results) {
+            return results.map(result => {
+                if (prefix.length === result.prefix.length) {
+                    return result
+                }
+
+                // Cached results can be created by appending characters from a
+                // recommendation from a smaller input prompt. If that's the
+                // case, we need to slightly change the content and remove
+                // characters that are now part of the prefix.
+                const sliceChars = prefix.length - result.prefix.length
+                return {
+                    ...result,
+                    prefix,
+                    content: result.content.slice(sliceChars),
+                }
+            })
+        }
+        return undefined
+    }
+
+    public add(completions: Completion[]): void {
+        for (const completion of completions) {
+            // Cache the exact prefix first and then append characters from the
+            // completion one after the other until the first line is exceeded.
+            //
+            // If the completion starts with a `\n`, this logic will append the
+            // second line instead.
+            let maxCharsAppended = completion.content.indexOf('\n', completion.content.at(0) === '\n' ? 1 : 0)
+            if (maxCharsAppended === -1) {
+                maxCharsAppended = completion.content.length
+            }
+
+            for (let i = 0; i <= maxCharsAppended; i++) {
+                const key = completion.prefix + completion.content.slice(0, i)
+                if (!this.cache.has(key)) {
+                    this.cache.set(key, [completion])
+                } else {
+                    const existingCompletions = this.cache.get(key)!
+                    existingCompletions.push(completion)
+                }
+            }
+        }
+    }
+}
--- a/client/cody/src/completions/index.ts
+++ b/client/cody/src/completions/index.ts
@ -8,6 +8,7 @@ import {
    CodeCompletionResponse,
 } from '@sourcegraph/cody-shared/src/sourcegraph-api/completions/types'

+import { CompletionsCache } from './cache'
 import { ReferenceSnippet, getContext } from './context'
 import { CompletionsDocumentProvider } from './docprovider'
 import { History } from './history'
@ -19,6 +20,7 @@ function lastNLines(text: string, n: number): string {
 }

 const estimatedLLMResponseLatencyMS = 700
+const inlineCompletionsCache = new CompletionsCache()

 export class CodyCompletionItemProvider implements vscode.InlineCompletionItemProvider {
    private promptTokens: number
@ -86,6 +88,12 @@ export class CodyCompletionItemProvider implements vscode.InlineCompletionItemPr
        }

        const { prefix, prevLine: precedingLine } = docContext
+
+        const cachedCompletions = inlineCompletionsCache.get(prefix)
+        if (cachedCompletions) {
+            return cachedCompletions.map(r => new vscode.InlineCompletionItem(r.content))
+        }
+
        let waitMs: number
        const remainingChars = this.tokToChar(this.promptTokens)
        const completers: CompletionProvider[] = []
@ -99,7 +107,7 @@ export class CodyCompletionItemProvider implements vscode.InlineCompletionItemPr
                    this.responseTokens,
                    prefix,
                    '',
-                    2
+                    2 // tries
                )
            )
        } else if (context.triggerKind === vscode.InlineCompletionTriggerKind.Invoke || precedingLine.endsWith('.')) {
@ -115,28 +123,26 @@ export class CodyCompletionItemProvider implements vscode.InlineCompletionItemPr
                    this.responseTokens,
                    prefix,
                    '',
-                    2 // 2 tries
+                    2 // tries
+                ),
+                // Create a completion request for the current prefix with a new line added. This
+                // will make for faster recommendations when the user presses enter.
+                new EndOfLineCompletionProvider(
+                    this.completionsClient,
+                    remainingChars,
+                    this.responseTokens,
+                    prefix,
+                    '\n', // force a new line in the case we are at end of line
+                    1 // tries
                )
-                // TODO: Figure out if this is really useful. Right now it seems that this is not
-                // rendered and a subsequent completion is not properly using the cache yet.
-                //
-                // new EndOfLineCompletionProvider(
-                //     this.completionsClient,
-                //     remainingChars,
-                //     this.responseTokens,
-                //     prefix,
-                //     '\n', // force a new line in the case we are at end of line
-                //     2 // 2 tries
-                // )
            )
        }

        // TODO(beyang): trigger on context quality (better context means longer completion)

-        const waiter = new Promise<void>(resolve =>
+        await new Promise<void>(resolve =>
            setTimeout(() => resolve(), Math.max(0, waitMs - estimatedLLMResponseLatencyMS))
        )
-        await waiter

        // We don't need to make a request at all if the signal is already aborted after the
        // debounce
@ -146,6 +152,8 @@ export class CodyCompletionItemProvider implements vscode.InlineCompletionItemPr

        const results = (await Promise.all(completers.map(c => c.generateCompletions(abortController.signal)))).flat()

+        inlineCompletionsCache.add(results)
+
        return results.map(r => new vscode.InlineCompletionItem(r.content))
    }

@ -319,6 +327,7 @@ async function batchCompletions(
 }

 export interface Completion {
+    prefix: string
    prompt: string
    content: string
    stopReason?: string
@ -414,6 +423,8 @@ export class MultilineCompletionProvider implements CompletionProvider {
    }

    public async generateCompletions(abortSignal: AbortSignal, n?: number): Promise<Completion[]> {
+        const prefix = this.prefix.trim()
+
        // Create prompt
        const prompt = this.makePrompt()
        if (prompt.length > this.promptChars) {
@ -437,6 +448,7 @@ export class MultilineCompletionProvider implements CompletionProvider {
        )
        // Post-process
        return responses.map(resp => ({
+            prefix,
            prompt,
            content: this.postProcess(resp.completion),
            stopReason: resp.stopReason,
@ -521,6 +533,8 @@ export class EndOfLineCompletionProvider implements CompletionProvider {
    }

    public async generateCompletions(abortSignal: AbortSignal, n?: number): Promise<Completion[]> {
+        const prefix = this.prefix + this.injectPrefix
+
        // Create prompt
        const prompt = this.makePrompt()
        if (prompt.length > this.promptChars) {
@ -544,6 +558,7 @@ export class EndOfLineCompletionProvider implements CompletionProvider {
        )
        // Post-process
        return responses.map(resp => ({
+            prefix,
            prompt,
            content: this.postProcess(resp.completion),
            stopReason: resp.stopReason,
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@ -942,6 +942,7 @@ importers:
      '@anthropic-ai/sdk': ^0.4.2
      '@sourcegraph/cody-shared': workspace:*
      '@sourcegraph/cody-ui': workspace:*
+      lru-cache: ^9.1.1
      openai: ^3.2.1
      wink-eng-lite-web-model: ^1.5.0
      wink-nlp: ^1.13.1
@ -950,6 +951,7 @@ importers:
      '@anthropic-ai/sdk': 0.4.2
      '@sourcegraph/cody-shared': link:../cody-shared
      '@sourcegraph/cody-ui': link:../cody-ui
+      lru-cache: 9.1.1
      openai: 3.2.1
      wink-eng-lite-web-model: 1.5.0_wink-nlp@1.13.1
      wink-nlp: 1.13.1
@ -13035,7 +13037,7 @@ packages:
      postcss-modules-values: 4.0.0_postcss@8.4.21
      postcss-value-parser: 4.2.0
      semver: 7.3.8
-      webpack: 5.75.0_esbuild@0.17.14
+      webpack: 5.75.0_pdcrf7mb3dfag2zju4x4octu4a

  /css-minimizer-webpack-plugin/4.2.2_zj7shrtzhjuywytipisjis56au:
    resolution: {integrity: sha512-s3Of/4jKfw1Hj9CxEO1E5oXhQAxlayuHO2y/ML+C6I9sQ7FdzfEV6QgMLN3vI+qFsjJGIAFLKtQK7t8BOXAIyA==}
@ -20271,6 +20273,11 @@ packages:
    engines: {node: '>=12'}
    dev: false

+  /lru-cache/9.1.1:
+    resolution: {integrity: sha512-65/Jky17UwSb0BuB9V+MyDpsOtXKmYwzhyl+cOa9XUiI4uV2Ouy/2voFP3+al0BjZbJgMBD8FojMpAf+Z+qn4A==}
+    engines: {node: 14 || >=16.14}
+    dev: false
+
  /lru-queue/0.1.0:
    resolution: {integrity: sha512-BpdYkt9EvGl8OfWHDQPISVpcl5xZthb+XPsbELj5AQXxIC8IriDZIQYjBJPEm5rS420sjZ0TLEzRcq5KdBhYrQ==}
    dependencies:
@ -24974,7 +24981,7 @@ packages:
      klona: 2.0.5
      neo-async: 2.6.2
      sass: 1.32.4
-      webpack: 5.75.0_esbuild@0.17.14
+      webpack: 5.75.0_pdcrf7mb3dfag2zju4x4octu4a

  /sass/1.32.4:
    resolution: {integrity: sha512-N0BT0PI/t3+gD8jKa83zJJUb7ssfQnRRfqN+GIErokW6U4guBpfYl8qYB+OFLEho+QvnV5ZH1R9qhUC/Z2Ch9w==}
@ -26163,7 +26170,7 @@ packages:
    peerDependencies:
      webpack: ^5.0.0
    dependencies:
-      webpack: 5.75.0_esbuild@0.17.14
+      webpack: 5.75.0_pdcrf7mb3dfag2zju4x4octu4a

  /style-mod/4.0.0:
    resolution: {integrity: sha512-OPhtyEjyyN9x3nhPsu76f52yUGXiZcgvsrFVtvTkyGRQJ0XK+GPc6ov1z+lRpbeabka+MYEQxOYRnt5nF30aMw==}
@ -26707,7 +26714,7 @@ packages:
      schema-utils: 3.1.1
      serialize-javascript: 6.0.0
      terser: 5.16.8
-      webpack: 5.75.0_esbuild@0.17.14
+      webpack: 5.75.0_pdcrf7mb3dfag2zju4x4octu4a

  /terser/4.8.1:
    resolution: {integrity: sha512-4GnLC0x667eJG0ewJTa6z/yXrbLGv80D9Ru6HIpCQmO+Q4PfEtBFi0ObSckqwL6VyQv/7ENJieXHo2ANmdQwgw==}
@ -28506,6 +28513,7 @@ packages:
      - '@swc/core'
      - esbuild
      - uglify-js
+    dev: false

  /webpack/5.75.0_pdcrf7mb3dfag2zju4x4octu4a:
    resolution: {integrity: sha512-piaIaoVJlqMsPtX/+3KTTO6jfvrSYgauFVdt8cr9LTHKmcq/AMd4mhzsiP7ZF/PGRNPGA8336jldh9l2Kt2ogQ==}