mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 17:31:43 +00:00
Cody: Add a cache for inline completions (#51046)
This adds a simple LRU cache for completions with the main goal to
further reduce the number of requests and reduce annoying churn in the
scenario where a user receives a completion and types the exact same
characters. Previously, the new characters would cause a new completion
request which could yield different result.
Here's an example of how this cache works:
- Imagine the current file prefix looks like this with the cursor at the
end:
```ts
const files = ["a", "b"];
for(let i = 0;
```
- We receive the following completion:
- ` i < files.length; i++) {`
- We now generate create different versions (up until the next `\n`) of
the input prefix by concatenating characters from the completion (Only
using the last line here to visualize):
1. `for(let i = 0;`
2. `for(let i = 0; `
3. `for(let i = 0; i`
4. `for(let i = 0; i `
5. `for(let i = 0; i <`
6. `for(let i = 0; i < `
7. `for(let i = 0; i < f`
8. `for(let i = 0; i < fi`
9. `for(let i = 0; i < fil`
10. `for(let i = 0; i < file`
11. ...
## Additional thoughts
- I have't added a cache to multiline providers, since these are
triggered less often than inline suggestions anyways.
- The LRU cache is limited to 500 file prefixes, regardless of how large
these are. We might want to tweak this later. It also currently retain
the `prompt` as part of the `Completion` interface which may not be
necessary.
- I've re-enabled the request that forces adds a `\n` to the prefix.
These can now be reused if you type enter and will result in a faster
suggestion for the next line.
## Test plan
I've added a `console.log` when a cache hit is encountered to visualize
it while playing around with it:
https://user-images.githubusercontent.com/458591/234050774-2215a146-904d-47ae-b82e-c90ef131fe3e.mov
<!-- All pull requests REQUIRE a test plan:
https://docs.sourcegraph.com/dev/background-information/testing_principles
-->
This commit is contained in:
parent
8fb0eea3c9
commit
f5b0668415
@ -330,8 +330,9 @@
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.4.2",
|
||||
"@sourcegraph/cody-shared": "workspace:*",
|
||||
"openai": "^3.2.1",
|
||||
"@sourcegraph/cody-ui": "workspace:*",
|
||||
"lru-cache": "^9.1.1",
|
||||
"openai": "^3.2.1",
|
||||
"wink-eng-lite-web-model": "^1.5.0",
|
||||
"wink-nlp": "^1.13.1",
|
||||
"wink-nlp-utils": "^2.1.0"
|
||||
|
||||
60
client/cody/src/completions/cache.ts
Normal file
60
client/cody/src/completions/cache.ts
Normal file
@ -0,0 +1,60 @@
|
||||
import { LRUCache } from 'lru-cache'
|
||||
|
||||
import { Completion } from '.'
|
||||
|
||||
export class CompletionsCache {
|
||||
private cache = new LRUCache<string, Completion[]>({
|
||||
max: 500, // Maximum input prefixes in the cache.
|
||||
})
|
||||
|
||||
// TODO: The caching strategy only takes the file content prefix into
|
||||
// account. We need to add additional information like file path or suffix
|
||||
// to make sure the cache does not return undesired results for other files
|
||||
// in the same project.
|
||||
public get(prefix: string): Completion[] | undefined {
|
||||
const results = this.cache.get(prefix)
|
||||
if (results) {
|
||||
return results.map(result => {
|
||||
if (prefix.length === result.prefix.length) {
|
||||
return result
|
||||
}
|
||||
|
||||
// Cached results can be created by appending characters from a
|
||||
// recommendation from a smaller input prompt. If that's the
|
||||
// case, we need to slightly change the content and remove
|
||||
// characters that are now part of the prefix.
|
||||
const sliceChars = prefix.length - result.prefix.length
|
||||
return {
|
||||
...result,
|
||||
prefix,
|
||||
content: result.content.slice(sliceChars),
|
||||
}
|
||||
})
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
public add(completions: Completion[]): void {
|
||||
for (const completion of completions) {
|
||||
// Cache the exact prefix first and then append characters from the
|
||||
// completion one after the other until the first line is exceeded.
|
||||
//
|
||||
// If the completion starts with a `\n`, this logic will append the
|
||||
// second line instead.
|
||||
let maxCharsAppended = completion.content.indexOf('\n', completion.content.at(0) === '\n' ? 1 : 0)
|
||||
if (maxCharsAppended === -1) {
|
||||
maxCharsAppended = completion.content.length
|
||||
}
|
||||
|
||||
for (let i = 0; i <= maxCharsAppended; i++) {
|
||||
const key = completion.prefix + completion.content.slice(0, i)
|
||||
if (!this.cache.has(key)) {
|
||||
this.cache.set(key, [completion])
|
||||
} else {
|
||||
const existingCompletions = this.cache.get(key)!
|
||||
existingCompletions.push(completion)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -8,6 +8,7 @@ import {
|
||||
CodeCompletionResponse,
|
||||
} from '@sourcegraph/cody-shared/src/sourcegraph-api/completions/types'
|
||||
|
||||
import { CompletionsCache } from './cache'
|
||||
import { ReferenceSnippet, getContext } from './context'
|
||||
import { CompletionsDocumentProvider } from './docprovider'
|
||||
import { History } from './history'
|
||||
@ -19,6 +20,7 @@ function lastNLines(text: string, n: number): string {
|
||||
}
|
||||
|
||||
const estimatedLLMResponseLatencyMS = 700
|
||||
const inlineCompletionsCache = new CompletionsCache()
|
||||
|
||||
export class CodyCompletionItemProvider implements vscode.InlineCompletionItemProvider {
|
||||
private promptTokens: number
|
||||
@ -86,6 +88,12 @@ export class CodyCompletionItemProvider implements vscode.InlineCompletionItemPr
|
||||
}
|
||||
|
||||
const { prefix, prevLine: precedingLine } = docContext
|
||||
|
||||
const cachedCompletions = inlineCompletionsCache.get(prefix)
|
||||
if (cachedCompletions) {
|
||||
return cachedCompletions.map(r => new vscode.InlineCompletionItem(r.content))
|
||||
}
|
||||
|
||||
let waitMs: number
|
||||
const remainingChars = this.tokToChar(this.promptTokens)
|
||||
const completers: CompletionProvider[] = []
|
||||
@ -99,7 +107,7 @@ export class CodyCompletionItemProvider implements vscode.InlineCompletionItemPr
|
||||
this.responseTokens,
|
||||
prefix,
|
||||
'',
|
||||
2
|
||||
2 // tries
|
||||
)
|
||||
)
|
||||
} else if (context.triggerKind === vscode.InlineCompletionTriggerKind.Invoke || precedingLine.endsWith('.')) {
|
||||
@ -115,28 +123,26 @@ export class CodyCompletionItemProvider implements vscode.InlineCompletionItemPr
|
||||
this.responseTokens,
|
||||
prefix,
|
||||
'',
|
||||
2 // 2 tries
|
||||
2 // tries
|
||||
),
|
||||
// Create a completion request for the current prefix with a new line added. This
|
||||
// will make for faster recommendations when the user presses enter.
|
||||
new EndOfLineCompletionProvider(
|
||||
this.completionsClient,
|
||||
remainingChars,
|
||||
this.responseTokens,
|
||||
prefix,
|
||||
'\n', // force a new line in the case we are at end of line
|
||||
1 // tries
|
||||
)
|
||||
// TODO: Figure out if this is really useful. Right now it seems that this is not
|
||||
// rendered and a subsequent completion is not properly using the cache yet.
|
||||
//
|
||||
// new EndOfLineCompletionProvider(
|
||||
// this.completionsClient,
|
||||
// remainingChars,
|
||||
// this.responseTokens,
|
||||
// prefix,
|
||||
// '\n', // force a new line in the case we are at end of line
|
||||
// 2 // 2 tries
|
||||
// )
|
||||
)
|
||||
}
|
||||
|
||||
// TODO(beyang): trigger on context quality (better context means longer completion)
|
||||
|
||||
const waiter = new Promise<void>(resolve =>
|
||||
await new Promise<void>(resolve =>
|
||||
setTimeout(() => resolve(), Math.max(0, waitMs - estimatedLLMResponseLatencyMS))
|
||||
)
|
||||
await waiter
|
||||
|
||||
// We don't need to make a request at all if the signal is already aborted after the
|
||||
// debounce
|
||||
@ -146,6 +152,8 @@ export class CodyCompletionItemProvider implements vscode.InlineCompletionItemPr
|
||||
|
||||
const results = (await Promise.all(completers.map(c => c.generateCompletions(abortController.signal)))).flat()
|
||||
|
||||
inlineCompletionsCache.add(results)
|
||||
|
||||
return results.map(r => new vscode.InlineCompletionItem(r.content))
|
||||
}
|
||||
|
||||
@ -319,6 +327,7 @@ async function batchCompletions(
|
||||
}
|
||||
|
||||
export interface Completion {
|
||||
prefix: string
|
||||
prompt: string
|
||||
content: string
|
||||
stopReason?: string
|
||||
@ -414,6 +423,8 @@ export class MultilineCompletionProvider implements CompletionProvider {
|
||||
}
|
||||
|
||||
public async generateCompletions(abortSignal: AbortSignal, n?: number): Promise<Completion[]> {
|
||||
const prefix = this.prefix.trim()
|
||||
|
||||
// Create prompt
|
||||
const prompt = this.makePrompt()
|
||||
if (prompt.length > this.promptChars) {
|
||||
@ -437,6 +448,7 @@ export class MultilineCompletionProvider implements CompletionProvider {
|
||||
)
|
||||
// Post-process
|
||||
return responses.map(resp => ({
|
||||
prefix,
|
||||
prompt,
|
||||
content: this.postProcess(resp.completion),
|
||||
stopReason: resp.stopReason,
|
||||
@ -521,6 +533,8 @@ export class EndOfLineCompletionProvider implements CompletionProvider {
|
||||
}
|
||||
|
||||
public async generateCompletions(abortSignal: AbortSignal, n?: number): Promise<Completion[]> {
|
||||
const prefix = this.prefix + this.injectPrefix
|
||||
|
||||
// Create prompt
|
||||
const prompt = this.makePrompt()
|
||||
if (prompt.length > this.promptChars) {
|
||||
@ -544,6 +558,7 @@ export class EndOfLineCompletionProvider implements CompletionProvider {
|
||||
)
|
||||
// Post-process
|
||||
return responses.map(resp => ({
|
||||
prefix,
|
||||
prompt,
|
||||
content: this.postProcess(resp.completion),
|
||||
stopReason: resp.stopReason,
|
||||
|
||||
@ -942,6 +942,7 @@ importers:
|
||||
'@anthropic-ai/sdk': ^0.4.2
|
||||
'@sourcegraph/cody-shared': workspace:*
|
||||
'@sourcegraph/cody-ui': workspace:*
|
||||
lru-cache: ^9.1.1
|
||||
openai: ^3.2.1
|
||||
wink-eng-lite-web-model: ^1.5.0
|
||||
wink-nlp: ^1.13.1
|
||||
@ -950,6 +951,7 @@ importers:
|
||||
'@anthropic-ai/sdk': 0.4.2
|
||||
'@sourcegraph/cody-shared': link:../cody-shared
|
||||
'@sourcegraph/cody-ui': link:../cody-ui
|
||||
lru-cache: 9.1.1
|
||||
openai: 3.2.1
|
||||
wink-eng-lite-web-model: 1.5.0_wink-nlp@1.13.1
|
||||
wink-nlp: 1.13.1
|
||||
@ -13035,7 +13037,7 @@ packages:
|
||||
postcss-modules-values: 4.0.0_postcss@8.4.21
|
||||
postcss-value-parser: 4.2.0
|
||||
semver: 7.3.8
|
||||
webpack: 5.75.0_esbuild@0.17.14
|
||||
webpack: 5.75.0_pdcrf7mb3dfag2zju4x4octu4a
|
||||
|
||||
/css-minimizer-webpack-plugin/4.2.2_zj7shrtzhjuywytipisjis56au:
|
||||
resolution: {integrity: sha512-s3Of/4jKfw1Hj9CxEO1E5oXhQAxlayuHO2y/ML+C6I9sQ7FdzfEV6QgMLN3vI+qFsjJGIAFLKtQK7t8BOXAIyA==}
|
||||
@ -20271,6 +20273,11 @@ packages:
|
||||
engines: {node: '>=12'}
|
||||
dev: false
|
||||
|
||||
/lru-cache/9.1.1:
|
||||
resolution: {integrity: sha512-65/Jky17UwSb0BuB9V+MyDpsOtXKmYwzhyl+cOa9XUiI4uV2Ouy/2voFP3+al0BjZbJgMBD8FojMpAf+Z+qn4A==}
|
||||
engines: {node: 14 || >=16.14}
|
||||
dev: false
|
||||
|
||||
/lru-queue/0.1.0:
|
||||
resolution: {integrity: sha512-BpdYkt9EvGl8OfWHDQPISVpcl5xZthb+XPsbELj5AQXxIC8IriDZIQYjBJPEm5rS420sjZ0TLEzRcq5KdBhYrQ==}
|
||||
dependencies:
|
||||
@ -24974,7 +24981,7 @@ packages:
|
||||
klona: 2.0.5
|
||||
neo-async: 2.6.2
|
||||
sass: 1.32.4
|
||||
webpack: 5.75.0_esbuild@0.17.14
|
||||
webpack: 5.75.0_pdcrf7mb3dfag2zju4x4octu4a
|
||||
|
||||
/sass/1.32.4:
|
||||
resolution: {integrity: sha512-N0BT0PI/t3+gD8jKa83zJJUb7ssfQnRRfqN+GIErokW6U4guBpfYl8qYB+OFLEho+QvnV5ZH1R9qhUC/Z2Ch9w==}
|
||||
@ -26163,7 +26170,7 @@ packages:
|
||||
peerDependencies:
|
||||
webpack: ^5.0.0
|
||||
dependencies:
|
||||
webpack: 5.75.0_esbuild@0.17.14
|
||||
webpack: 5.75.0_pdcrf7mb3dfag2zju4x4octu4a
|
||||
|
||||
/style-mod/4.0.0:
|
||||
resolution: {integrity: sha512-OPhtyEjyyN9x3nhPsu76f52yUGXiZcgvsrFVtvTkyGRQJ0XK+GPc6ov1z+lRpbeabka+MYEQxOYRnt5nF30aMw==}
|
||||
@ -26707,7 +26714,7 @@ packages:
|
||||
schema-utils: 3.1.1
|
||||
serialize-javascript: 6.0.0
|
||||
terser: 5.16.8
|
||||
webpack: 5.75.0_esbuild@0.17.14
|
||||
webpack: 5.75.0_pdcrf7mb3dfag2zju4x4octu4a
|
||||
|
||||
/terser/4.8.1:
|
||||
resolution: {integrity: sha512-4GnLC0x667eJG0ewJTa6z/yXrbLGv80D9Ru6HIpCQmO+Q4PfEtBFi0ObSckqwL6VyQv/7ENJieXHo2ANmdQwgw==}
|
||||
@ -28506,6 +28513,7 @@ packages:
|
||||
- '@swc/core'
|
||||
- esbuild
|
||||
- uglify-js
|
||||
dev: false
|
||||
|
||||
/webpack/5.75.0_pdcrf7mb3dfag2zju4x4octu4a:
|
||||
resolution: {integrity: sha512-piaIaoVJlqMsPtX/+3KTTO6jfvrSYgauFVdt8cr9LTHKmcq/AMd4mhzsiP7ZF/PGRNPGA8336jldh9l2Kt2ogQ==}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user