cody > keyword context: ignore ASCII-only keywords of length 2 or less (#51597)

This commit is contained in:
Beyang Liu 2023-05-08 18:32:48 -07:00 committed by GitHub
parent 947f528c6e
commit a44b7b2d25
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 143 additions and 2 deletions

View File

@ -103,5 +103,6 @@ ts_project(
tsconfig = ":tsconfig",
deps = [
":cody",
"//:node_modules/@types/node",
],
)

View File

@ -1,6 +1,131 @@
import { regexForTerms, userQueryToKeywordQuery } from './local-keyword-context-fetcher'
import * as assert from 'assert'
import { Term, regexForTerms, userQueryToKeywordQuery } from './local-keyword-context-fetcher'
describe('keyword context', () => {
it('userQueryToKeywordQuery', () => {
const cases: { query: string; expected: Term[] }[] = [
{
query: 'Where is auth in Sourcegraph?',
expected: [
{
count: 1,
originals: ['Where'],
prefix: 'where',
stem: 'where',
},
{
count: 1,
originals: ['auth'],
prefix: 'auth',
stem: 'auth',
},
{
count: 1,
originals: ['Sourcegraph'],
prefix: 'sourcegraph',
stem: 'sourcegraph',
},
],
},
{
query: `Explain the following code at a high level:
uint32_t PackUInt32(const Color& color) {
uint32_t result = 0;
result |= static_cast<uint32_t>(color.r * 255 + 0.5f) << 24;
result |= static_cast<uint32_t>(color.g * 255 + 0.5f) << 16;
result |= static_cast<uint32_t>(color.b * 255 + 0.5f) << 8;
result |= static_cast<uint32_t>(color.a * 255 + 0.5f);
return result;
}
`,
expected: [
{
count: 4,
originals: ['255', '255', '255', '255'],
prefix: '255',
stem: '255',
},
{
count: 1,
originals: ['Explain'],
prefix: 'explain',
stem: 'explain',
},
{
count: 1,
originals: ['following'],
prefix: 'follow',
stem: 'follow',
},
{
count: 1,
originals: ['code'],
prefix: 'code',
stem: 'code',
},
{
count: 1,
originals: ['high'],
prefix: 'high',
stem: 'high',
},
{
count: 1,
originals: ['level'],
prefix: 'level',
stem: 'level',
},
{
count: 6,
originals: ['uint32_t', 'uint32_t', 'uint32_t', 'uint32_t', 'uint32_t', 'uint32_t'],
prefix: 'uint',
stem: 'uinty2_t',
},
{
count: 1,
originals: ['PackUInt32'],
prefix: 'packuint',
stem: 'packuinty2',
},
{
count: 1,
originals: ['const'],
prefix: 'const',
stem: 'const',
},
{
count: 6,
originals: ['Color', 'color', 'color', 'color', 'color', 'color'],
prefix: 'color',
stem: 'color',
},
{
count: 6,
originals: ['result', 'result', 'result', 'result', 'result', 'result'],
prefix: 'result',
stem: 'result',
},
{
count: 4,
originals: ['static_cast', 'static_cast', 'static_cast', 'static_cast'],
prefix: 'static_cast',
stem: 'static_cast',
},
{
count: 1,
originals: ['return'],
prefix: 'return',
stem: 'return',
},
],
},
]
for (const testcase of cases) {
const actual = userQueryToKeywordQuery(testcase.query)
assert.deepStrictEqual(actual, testcase.expected)
}
})
it('query to regex', () => {
const trials: {
userQuery: string

View File

@ -18,7 +18,7 @@ const fileExtRipgrepParams = ['-Tmarkdown', '-Tyaml', '-Tjson', '-g', '!*.lock',
* For example, if the original is "cody" and the stem is "codi", the prefix is "cod"
* - The count is the number of times the keyword appears in the document/query.
*/
interface Term {
export interface Term {
stem: string
originals: string[]
prefix: string
@ -57,6 +57,21 @@ export function userQueryToKeywordQuery(query: string): Term[] {
const filteredWords = winkUtils.tokens.removeWords(origWords) as string[]
const terms: { [stem: string]: Term } = {}
for (const word of filteredWords) {
// Ignore ASCII-only strings of length 2 or less
if (word.length <= 2) {
let skip = true
for (let i = 0; i < word.length; i++) {
if (word.charCodeAt(i) >= 128) {
// non-ASCII
skip = false
break
}
}
if (skip) {
continue
}
}
const stem = winkUtils.string.stem(word)
if (terms[stem]) {
terms[stem].originals.push(word)