Cody: Add a cache for inline completions (#51046)

This adds a simple LRU cache for completions with the main goal to
further reduce the number of requests and reduce annoying churn in the
scenario where a user receives a completion and types the exact same
characters. Previously, the new characters would cause a new completion
request which could yield different result.

Here's an example of how this cache works:

- Imagine the current file prefix looks like this with the cursor at the
end:
    ```ts
    const files = ["a", "b"];
    for(let i = 0;
    ```
- We receive the following completion:
   - ` i < files.length; i++) {`
- We now generate create different versions (up until the next `\n`) of
the input prefix by concatenating characters from the completion (Only
using the last line here to visualize):
   1. `for(let i = 0;`
   2. `for(let i = 0; `
   3. `for(let i = 0; i`
   4. `for(let i = 0; i `
   5. `for(let i = 0; i <`
   6. `for(let i = 0; i < `
   7. `for(let i = 0; i < f`
   8. `for(let i = 0; i < fi`
   9. `for(let i = 0; i < fil`
   10. `for(let i = 0; i < file`
   11. ...

## Additional thoughts

- I have't added a cache to multiline providers, since these are
triggered less often than inline suggestions anyways.
- The LRU cache is limited to 500 file prefixes, regardless of how large
these are. We might want to tweak this later. It also currently retain
the `prompt` as part of the `Completion` interface which may not be
necessary.
- I've re-enabled the request that forces adds a `\n` to the prefix.
These can now be reused if you type enter and will result in a faster
suggestion for the next line.

## Test plan

I've added a `console.log` when a cache hit is encountered to visualize
it while playing around with it:



https://user-images.githubusercontent.com/458591/234050774-2215a146-904d-47ae-b82e-c90ef131fe3e.mov


<!-- All pull requests REQUIRE a test plan:
https://docs.sourcegraph.com/dev/background-information/testing_principles
-->
This commit is contained in:
Philipp Spiess 2023-04-25 10:46:08 +02:00 committed by GitHub
parent 8fb0eea3c9
commit f5b0668415
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 104 additions and 20 deletions

View File

@ -330,8 +330,9 @@
"dependencies": {
"@anthropic-ai/sdk": "^0.4.2",
"@sourcegraph/cody-shared": "workspace:*",
"openai": "^3.2.1",
"@sourcegraph/cody-ui": "workspace:*",
"lru-cache": "^9.1.1",
"openai": "^3.2.1",
"wink-eng-lite-web-model": "^1.5.0",
"wink-nlp": "^1.13.1",
"wink-nlp-utils": "^2.1.0"

View File

@ -0,0 +1,60 @@
import { LRUCache } from 'lru-cache'
import { Completion } from '.'
export class CompletionsCache {
private cache = new LRUCache<string, Completion[]>({
max: 500, // Maximum input prefixes in the cache.
})
// TODO: The caching strategy only takes the file content prefix into
// account. We need to add additional information like file path or suffix
// to make sure the cache does not return undesired results for other files
// in the same project.
public get(prefix: string): Completion[] | undefined {
const results = this.cache.get(prefix)
if (results) {
return results.map(result => {
if (prefix.length === result.prefix.length) {
return result
}
// Cached results can be created by appending characters from a
// recommendation from a smaller input prompt. If that's the
// case, we need to slightly change the content and remove
// characters that are now part of the prefix.
const sliceChars = prefix.length - result.prefix.length
return {
...result,
prefix,
content: result.content.slice(sliceChars),
}
})
}
return undefined
}
public add(completions: Completion[]): void {
for (const completion of completions) {
// Cache the exact prefix first and then append characters from the
// completion one after the other until the first line is exceeded.
//
// If the completion starts with a `\n`, this logic will append the
// second line instead.
let maxCharsAppended = completion.content.indexOf('\n', completion.content.at(0) === '\n' ? 1 : 0)
if (maxCharsAppended === -1) {
maxCharsAppended = completion.content.length
}
for (let i = 0; i <= maxCharsAppended; i++) {
const key = completion.prefix + completion.content.slice(0, i)
if (!this.cache.has(key)) {
this.cache.set(key, [completion])
} else {
const existingCompletions = this.cache.get(key)!
existingCompletions.push(completion)
}
}
}
}
}

View File

@ -8,6 +8,7 @@ import {
CodeCompletionResponse,
} from '@sourcegraph/cody-shared/src/sourcegraph-api/completions/types'
import { CompletionsCache } from './cache'
import { ReferenceSnippet, getContext } from './context'
import { CompletionsDocumentProvider } from './docprovider'
import { History } from './history'
@ -19,6 +20,7 @@ function lastNLines(text: string, n: number): string {
}
const estimatedLLMResponseLatencyMS = 700
const inlineCompletionsCache = new CompletionsCache()
export class CodyCompletionItemProvider implements vscode.InlineCompletionItemProvider {
private promptTokens: number
@ -86,6 +88,12 @@ export class CodyCompletionItemProvider implements vscode.InlineCompletionItemPr
}
const { prefix, prevLine: precedingLine } = docContext
const cachedCompletions = inlineCompletionsCache.get(prefix)
if (cachedCompletions) {
return cachedCompletions.map(r => new vscode.InlineCompletionItem(r.content))
}
let waitMs: number
const remainingChars = this.tokToChar(this.promptTokens)
const completers: CompletionProvider[] = []
@ -99,7 +107,7 @@ export class CodyCompletionItemProvider implements vscode.InlineCompletionItemPr
this.responseTokens,
prefix,
'',
2
2 // tries
)
)
} else if (context.triggerKind === vscode.InlineCompletionTriggerKind.Invoke || precedingLine.endsWith('.')) {
@ -115,28 +123,26 @@ export class CodyCompletionItemProvider implements vscode.InlineCompletionItemPr
this.responseTokens,
prefix,
'',
2 // 2 tries
2 // tries
),
// Create a completion request for the current prefix with a new line added. This
// will make for faster recommendations when the user presses enter.
new EndOfLineCompletionProvider(
this.completionsClient,
remainingChars,
this.responseTokens,
prefix,
'\n', // force a new line in the case we are at end of line
1 // tries
)
// TODO: Figure out if this is really useful. Right now it seems that this is not
// rendered and a subsequent completion is not properly using the cache yet.
//
// new EndOfLineCompletionProvider(
// this.completionsClient,
// remainingChars,
// this.responseTokens,
// prefix,
// '\n', // force a new line in the case we are at end of line
// 2 // 2 tries
// )
)
}
// TODO(beyang): trigger on context quality (better context means longer completion)
const waiter = new Promise<void>(resolve =>
await new Promise<void>(resolve =>
setTimeout(() => resolve(), Math.max(0, waitMs - estimatedLLMResponseLatencyMS))
)
await waiter
// We don't need to make a request at all if the signal is already aborted after the
// debounce
@ -146,6 +152,8 @@ export class CodyCompletionItemProvider implements vscode.InlineCompletionItemPr
const results = (await Promise.all(completers.map(c => c.generateCompletions(abortController.signal)))).flat()
inlineCompletionsCache.add(results)
return results.map(r => new vscode.InlineCompletionItem(r.content))
}
@ -319,6 +327,7 @@ async function batchCompletions(
}
export interface Completion {
prefix: string
prompt: string
content: string
stopReason?: string
@ -414,6 +423,8 @@ export class MultilineCompletionProvider implements CompletionProvider {
}
public async generateCompletions(abortSignal: AbortSignal, n?: number): Promise<Completion[]> {
const prefix = this.prefix.trim()
// Create prompt
const prompt = this.makePrompt()
if (prompt.length > this.promptChars) {
@ -437,6 +448,7 @@ export class MultilineCompletionProvider implements CompletionProvider {
)
// Post-process
return responses.map(resp => ({
prefix,
prompt,
content: this.postProcess(resp.completion),
stopReason: resp.stopReason,
@ -521,6 +533,8 @@ export class EndOfLineCompletionProvider implements CompletionProvider {
}
public async generateCompletions(abortSignal: AbortSignal, n?: number): Promise<Completion[]> {
const prefix = this.prefix + this.injectPrefix
// Create prompt
const prompt = this.makePrompt()
if (prompt.length > this.promptChars) {
@ -544,6 +558,7 @@ export class EndOfLineCompletionProvider implements CompletionProvider {
)
// Post-process
return responses.map(resp => ({
prefix,
prompt,
content: this.postProcess(resp.completion),
stopReason: resp.stopReason,

View File

@ -942,6 +942,7 @@ importers:
'@anthropic-ai/sdk': ^0.4.2
'@sourcegraph/cody-shared': workspace:*
'@sourcegraph/cody-ui': workspace:*
lru-cache: ^9.1.1
openai: ^3.2.1
wink-eng-lite-web-model: ^1.5.0
wink-nlp: ^1.13.1
@ -950,6 +951,7 @@ importers:
'@anthropic-ai/sdk': 0.4.2
'@sourcegraph/cody-shared': link:../cody-shared
'@sourcegraph/cody-ui': link:../cody-ui
lru-cache: 9.1.1
openai: 3.2.1
wink-eng-lite-web-model: 1.5.0_wink-nlp@1.13.1
wink-nlp: 1.13.1
@ -13035,7 +13037,7 @@ packages:
postcss-modules-values: 4.0.0_postcss@8.4.21
postcss-value-parser: 4.2.0
semver: 7.3.8
webpack: 5.75.0_esbuild@0.17.14
webpack: 5.75.0_pdcrf7mb3dfag2zju4x4octu4a
/css-minimizer-webpack-plugin/4.2.2_zj7shrtzhjuywytipisjis56au:
resolution: {integrity: sha512-s3Of/4jKfw1Hj9CxEO1E5oXhQAxlayuHO2y/ML+C6I9sQ7FdzfEV6QgMLN3vI+qFsjJGIAFLKtQK7t8BOXAIyA==}
@ -20271,6 +20273,11 @@ packages:
engines: {node: '>=12'}
dev: false
/lru-cache/9.1.1:
resolution: {integrity: sha512-65/Jky17UwSb0BuB9V+MyDpsOtXKmYwzhyl+cOa9XUiI4uV2Ouy/2voFP3+al0BjZbJgMBD8FojMpAf+Z+qn4A==}
engines: {node: 14 || >=16.14}
dev: false
/lru-queue/0.1.0:
resolution: {integrity: sha512-BpdYkt9EvGl8OfWHDQPISVpcl5xZthb+XPsbELj5AQXxIC8IriDZIQYjBJPEm5rS420sjZ0TLEzRcq5KdBhYrQ==}
dependencies:
@ -24974,7 +24981,7 @@ packages:
klona: 2.0.5
neo-async: 2.6.2
sass: 1.32.4
webpack: 5.75.0_esbuild@0.17.14
webpack: 5.75.0_pdcrf7mb3dfag2zju4x4octu4a
/sass/1.32.4:
resolution: {integrity: sha512-N0BT0PI/t3+gD8jKa83zJJUb7ssfQnRRfqN+GIErokW6U4guBpfYl8qYB+OFLEho+QvnV5ZH1R9qhUC/Z2Ch9w==}
@ -26163,7 +26170,7 @@ packages:
peerDependencies:
webpack: ^5.0.0
dependencies:
webpack: 5.75.0_esbuild@0.17.14
webpack: 5.75.0_pdcrf7mb3dfag2zju4x4octu4a
/style-mod/4.0.0:
resolution: {integrity: sha512-OPhtyEjyyN9x3nhPsu76f52yUGXiZcgvsrFVtvTkyGRQJ0XK+GPc6ov1z+lRpbeabka+MYEQxOYRnt5nF30aMw==}
@ -26707,7 +26714,7 @@ packages:
schema-utils: 3.1.1
serialize-javascript: 6.0.0
terser: 5.16.8
webpack: 5.75.0_esbuild@0.17.14
webpack: 5.75.0_pdcrf7mb3dfag2zju4x4octu4a
/terser/4.8.1:
resolution: {integrity: sha512-4GnLC0x667eJG0ewJTa6z/yXrbLGv80D9Ru6HIpCQmO+Q4PfEtBFi0ObSckqwL6VyQv/7ENJieXHo2ANmdQwgw==}
@ -28506,6 +28513,7 @@ packages:
- '@swc/core'
- esbuild
- uglify-js
dev: false
/webpack/5.75.0_pdcrf7mb3dfag2zju4x4octu4a:
resolution: {integrity: sha512-piaIaoVJlqMsPtX/+3KTTO6jfvrSYgauFVdt8cr9LTHKmcq/AMd4mhzsiP7ZF/PGRNPGA8336jldh9l2Kt2ogQ==}