mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 19:51:50 +00:00
cody > keyword context: ignore ASCII-only keywords of length 2 or less (#51597)
This commit is contained in:
parent
947f528c6e
commit
a44b7b2d25
1
client/cody/BUILD.bazel
generated
1
client/cody/BUILD.bazel
generated
@ -103,5 +103,6 @@ ts_project(
|
||||
tsconfig = ":tsconfig",
|
||||
deps = [
|
||||
":cody",
|
||||
"//:node_modules/@types/node",
|
||||
],
|
||||
)
|
||||
|
||||
@ -1,6 +1,131 @@
|
||||
import { regexForTerms, userQueryToKeywordQuery } from './local-keyword-context-fetcher'
|
||||
import * as assert from 'assert'
|
||||
|
||||
import { Term, regexForTerms, userQueryToKeywordQuery } from './local-keyword-context-fetcher'
|
||||
|
||||
describe('keyword context', () => {
|
||||
it('userQueryToKeywordQuery', () => {
|
||||
const cases: { query: string; expected: Term[] }[] = [
|
||||
{
|
||||
query: 'Where is auth in Sourcegraph?',
|
||||
expected: [
|
||||
{
|
||||
count: 1,
|
||||
originals: ['Where'],
|
||||
prefix: 'where',
|
||||
stem: 'where',
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
originals: ['auth'],
|
||||
prefix: 'auth',
|
||||
stem: 'auth',
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
originals: ['Sourcegraph'],
|
||||
prefix: 'sourcegraph',
|
||||
stem: 'sourcegraph',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
query: `Explain the following code at a high level:
|
||||
uint32_t PackUInt32(const Color& color) {
|
||||
uint32_t result = 0;
|
||||
result |= static_cast<uint32_t>(color.r * 255 + 0.5f) << 24;
|
||||
result |= static_cast<uint32_t>(color.g * 255 + 0.5f) << 16;
|
||||
result |= static_cast<uint32_t>(color.b * 255 + 0.5f) << 8;
|
||||
result |= static_cast<uint32_t>(color.a * 255 + 0.5f);
|
||||
return result;
|
||||
}
|
||||
`,
|
||||
expected: [
|
||||
{
|
||||
count: 4,
|
||||
originals: ['255', '255', '255', '255'],
|
||||
prefix: '255',
|
||||
stem: '255',
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
originals: ['Explain'],
|
||||
prefix: 'explain',
|
||||
stem: 'explain',
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
originals: ['following'],
|
||||
prefix: 'follow',
|
||||
stem: 'follow',
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
originals: ['code'],
|
||||
prefix: 'code',
|
||||
stem: 'code',
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
originals: ['high'],
|
||||
prefix: 'high',
|
||||
stem: 'high',
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
originals: ['level'],
|
||||
prefix: 'level',
|
||||
stem: 'level',
|
||||
},
|
||||
{
|
||||
count: 6,
|
||||
originals: ['uint32_t', 'uint32_t', 'uint32_t', 'uint32_t', 'uint32_t', 'uint32_t'],
|
||||
prefix: 'uint',
|
||||
stem: 'uinty2_t',
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
originals: ['PackUInt32'],
|
||||
prefix: 'packuint',
|
||||
stem: 'packuinty2',
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
originals: ['const'],
|
||||
prefix: 'const',
|
||||
stem: 'const',
|
||||
},
|
||||
{
|
||||
count: 6,
|
||||
originals: ['Color', 'color', 'color', 'color', 'color', 'color'],
|
||||
prefix: 'color',
|
||||
stem: 'color',
|
||||
},
|
||||
{
|
||||
count: 6,
|
||||
originals: ['result', 'result', 'result', 'result', 'result', 'result'],
|
||||
prefix: 'result',
|
||||
stem: 'result',
|
||||
},
|
||||
{
|
||||
count: 4,
|
||||
originals: ['static_cast', 'static_cast', 'static_cast', 'static_cast'],
|
||||
prefix: 'static_cast',
|
||||
stem: 'static_cast',
|
||||
},
|
||||
{
|
||||
count: 1,
|
||||
originals: ['return'],
|
||||
prefix: 'return',
|
||||
stem: 'return',
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
for (const testcase of cases) {
|
||||
const actual = userQueryToKeywordQuery(testcase.query)
|
||||
assert.deepStrictEqual(actual, testcase.expected)
|
||||
}
|
||||
})
|
||||
it('query to regex', () => {
|
||||
const trials: {
|
||||
userQuery: string
|
||||
|
||||
@ -18,7 +18,7 @@ const fileExtRipgrepParams = ['-Tmarkdown', '-Tyaml', '-Tjson', '-g', '!*.lock',
|
||||
* For example, if the original is "cody" and the stem is "codi", the prefix is "cod"
|
||||
* - The count is the number of times the keyword appears in the document/query.
|
||||
*/
|
||||
interface Term {
|
||||
export interface Term {
|
||||
stem: string
|
||||
originals: string[]
|
||||
prefix: string
|
||||
@ -57,6 +57,21 @@ export function userQueryToKeywordQuery(query: string): Term[] {
|
||||
const filteredWords = winkUtils.tokens.removeWords(origWords) as string[]
|
||||
const terms: { [stem: string]: Term } = {}
|
||||
for (const word of filteredWords) {
|
||||
// Ignore ASCII-only strings of length 2 or less
|
||||
if (word.length <= 2) {
|
||||
let skip = true
|
||||
for (let i = 0; i < word.length; i++) {
|
||||
if (word.charCodeAt(i) >= 128) {
|
||||
// non-ASCII
|
||||
skip = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if (skip) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
const stem = winkUtils.string.stem(word)
|
||||
if (terms[stem]) {
|
||||
terms[stem].originals.push(word)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user