diff --git a/client/web/src/components/fuzzyFinder/FuzzyFinder.tsx b/client/web/src/components/fuzzyFinder/FuzzyFinder.tsx new file mode 100644 index 00000000000..5ecc3b89b9f --- /dev/null +++ b/client/web/src/components/fuzzyFinder/FuzzyFinder.tsx @@ -0,0 +1,145 @@ +import { Shortcut } from '@slimsag/react-shortcuts' +import React, { useState } from 'react' + +import { gql } from '@sourcegraph/shared/src/graphql/graphql' +import { useLocalStorage } from '@sourcegraph/shared/src/util/useLocalStorage' + +import { requestGraphQL } from '../../backend/graphql' +import { FuzzySearch, SearchIndexing } from '../../fuzzyFinder/FuzzySearch' +import { FilesResult, FilesVariables } from '../../graphql-operations' +import { + KEYBOARD_SHORTCUT_CLOSE_FUZZY_FINDER, + KEYBOARD_SHORTCUT_FUZZY_FINDER, +} from '../../keyboardShortcuts/keyboardShortcuts' + +import { FuzzyModal } from './FuzzyModal' + +const DEFAULT_MAX_RESULTS = 100 + +export interface FuzzyFinderProps { + repoName: string + commitID: string +} + +export const FuzzyFinder: React.FunctionComponent = props => { + const [isVisible, setIsVisible] = useState(false) + // NOTE: the query is cached in local storage to mimic the file pickers in + // IntelliJ (by default) and VS Code (when "Workbench > Quick Open > + // Preserve Input" is enabled). + const [query, setQuery] = useLocalStorage(`fuzzy-modal.query.${props.repoName}`, '') + + // The "focus index" is the index of the file result that the user has + // select with up/down arrow keys. The focused item is highlighted and the + // window.location is moved to that URL when the user presses the enter key. + const [focusIndex, setFocusIndex] = useState(0) + + // The maximum number of results to display in the fuzzy finder. For large + // repositories, a generic query like "src" may return thousands of results + // making DOM rendering slow. The user can increase this number by clicking + // on a button at the bottom of the result list. + const [maxResults, setMaxResults] = useState(DEFAULT_MAX_RESULTS) + + // The state machine of the fuzzy finder. See `FuzzyFSM` for more details + // about the state transititions. + const [fsm, setFsm] = useState({ key: 'empty' }) + + return ( + <> + { + setIsVisible(true) + const input = document.querySelector('#fuzzy-modal-input') + input?.focus() + input?.select() + }} + /> + setIsVisible(false)} /> + {isVisible && ( + setIsVisible(false)} + query={query} + setQuery={setQuery} + focusIndex={focusIndex} + setFocusIndex={setFocusIndex} + maxResults={maxResults} + increaseMaxResults={() => setMaxResults(maxResults + DEFAULT_MAX_RESULTS)} + fsm={fsm} + setFsm={setFsm} + downloadFilenames={() => downloadFilenames(props)} + /> + )} + + ) +} + +/** + * The fuzzy finder modal is implemented as a state machine with the following transitions: + * + * ``` + * ╭────[cached]───────────────────────╮ ╭──╮ + * │ v │ v + * Empty ─[uncached]───> Downloading ──> Indexing ──> Ready + * ╰──────────────────────> Failed + * ``` + * + * - Empty: start state. + * - Downloading: downloading filenames from the remote server. The filenames + * are cached using the browser's CacheStorage, if available. + * - Indexing: processing the downloaded filenames. This step is usually + * instant, unless the repo is very large (>100k source files). + * In the torvalds/linux repo (~70k files), this step takes <1s + * on my computer but the chromium/chromium repo (~360k files) + * it takes ~3-5 seconds. This step is async so that the user can + * query against partially indexed results. + * - Ready: all filenames have been indexed. + * - Failed: something unexpected happened, the user can't fuzzy find files. + */ +export type FuzzyFSM = Empty | Downloading | Indexing | Ready | Failed +export interface Empty { + key: 'empty' +} +export interface Downloading { + key: 'downloading' +} +export interface Indexing { + key: 'indexing' + indexing: SearchIndexing +} +export interface Ready { + key: 'ready' + fuzzy: FuzzySearch +} +export interface Failed { + key: 'failed' + errorMessage: string +} + +async function downloadFilenames(props: FuzzyFinderProps): Promise { + const gqlResult = await requestGraphQL( + gql` + query Files($repository: String!, $commit: String!) { + repository(name: $repository) { + commit(rev: $commit) { + tree(recursive: true) { + files(first: 1000000, recursive: true) { + path + } + } + } + } + } + `, + { + repository: props.repoName, + commit: props.commitID, + } + ).toPromise() + const filenames = gqlResult.data?.repository?.commit?.tree?.files?.map(file => file.path) + if (!filenames) { + throw new Error(JSON.stringify(gqlResult)) + } + return filenames +} diff --git a/client/web/src/components/fuzzyFinder/FuzzyModal.module.scss b/client/web/src/components/fuzzyFinder/FuzzyModal.module.scss new file mode 100644 index 00000000000..96f0331de40 --- /dev/null +++ b/client/web/src/components/fuzzyFinder/FuzzyModal.module.scss @@ -0,0 +1,51 @@ +.modal { + z-index: 1000; + position: fixed; + left: 0; + right: 0; + top: 0; + bottom: 0; + background-color: rgba(0, 0, 0, 0.5); + display: flex; + align-items: center; + justify-content: center; +} + +.content { + width: 80vw; + background-color: var(--color-bg-2); +} + +.header, +.footer { + padding: 1rem; +} + +.body { + height: 80vh; + overflow-y: scroll; + padding: 0.5rem; +} + +.input { + border: none; + width: 100%; + padding: 0.25rem; + font-size: 2em; +} + +.results { + text-align: left; + list-style-type: none; + padding: unset; + color: var(--body-color); +} + +.focused { + background-color: var(--color-bg-3); +} + +.footer { + display: flex; + justify-content: space-between; +} diff --git a/client/web/src/components/fuzzyFinder/FuzzyModal.tsx b/client/web/src/components/fuzzyFinder/FuzzyModal.tsx new file mode 100644 index 00000000000..c428710f958 --- /dev/null +++ b/client/web/src/components/fuzzyFinder/FuzzyModal.tsx @@ -0,0 +1,359 @@ +/* eslint-disable jsx-a11y/no-noninteractive-element-interactions */ +// NOTE: the eslint-disable above can't be a eslint-disable-next-line because +// JSX syntax doesn't support comments on the line where it's needed. + +import React from 'react' + +import { CaseSensitiveFuzzySearch } from '../../fuzzyFinder/CaseSensitiveFuzzySearch' +import { FuzzySearch, FuzzySearchResult, SearchIndexing } from '../../fuzzyFinder/FuzzySearch' + +import { FuzzyFinderProps, Indexing, FuzzyFSM } from './FuzzyFinder' +import styles from './FuzzyModal.module.scss' +import { HighlightedLink } from './HighlightedLink' + +// Enable this URL query parameter to see debugging information like latency +// numbers and the false-positive ratio for the bloom filter. +const IS_DEBUG = window.location.href.toString().includes('fuzzyFinder=debug') + +// Cache for the last fuzzy query. This value is only used to avoid redoing the +// full fuzzy search on every re-render when the user presses the down/up arrow +// keys to move the "focus index". +const lastFuzzySearchResult = new Map() + +// The number of results to jump by on PageUp/PageDown keyboard shortcuts. +const PAGE_DOWN_INCREMENT = 10 + +export interface FuzzyModalProps + extends VisibilityProps, + FuzzyFinderProps, + MaxResultsProps, + FocusIndexProps, + FuzzyFSMProps, + QueryProps { + downloadFilenames: () => Promise +} +interface VisibilityProps { + isVisible: boolean + onClose: () => void +} +interface QueryProps { + query: string + setQuery: (query: string) => void +} +interface MaxResultsProps { + maxResults: number + increaseMaxResults: () => void +} +interface FocusIndexProps { + focusIndex: number + setFocusIndex: (focusIndex: number) => void +} +interface FuzzyFSMProps { + fsm: FuzzyFSM + setFsm: (fsm: FuzzyFSM) => void +} + +/** + * Component that interactively displays filenames in the open repository when given fuzzy queries. + * + * Similar to "Go to file" in VS Code or the "t" keyboard shortcut on github.com + */ +export const FuzzyModal: React.FunctionComponent = props => { + const fuzzyResult = renderFuzzyResult(props) + + // Sets the new "focus index" so that it's rounded by the number of + // displayed filenames. Cycles so that the user can press-hold the down + // arrow and it goes all the way down and back up to the top result. + function setRoundedFocusIndex(increment: number): void { + const newNumber = props.focusIndex + increment + const index = newNumber % fuzzyResult.resultsCount + const nextIndex = index < 0 ? fuzzyResult.resultsCount + index : index + props.setFocusIndex(nextIndex) + document.querySelector(`#fuzzy-modal-result-${nextIndex}`)?.scrollIntoView(false) + } + + function onInputKeyDown(event: React.KeyboardEvent): void { + switch (event.key) { + case 'Escape': + props.onClose() + break + case 'ArrowDown': + event.preventDefault() // Don't move the cursor to the end of the input. + setRoundedFocusIndex(1) + break + case 'PageDown': + setRoundedFocusIndex(PAGE_DOWN_INCREMENT) + break + case 'ArrowUp': + event.preventDefault() // Don't move the cursor to the start of the input. + setRoundedFocusIndex(-1) + break + case 'PageUp': + setRoundedFocusIndex(-PAGE_DOWN_INCREMENT) + break + case 'Enter': + if (props.focusIndex < fuzzyResult.resultsCount) { + const fileAnchor = document.querySelector( + `#fuzzy-modal-result-${props.focusIndex} a` + ) + fileAnchor?.click() + props.onClose() + } + break + default: + } + } + + return ( + // Use 'onMouseDown' instead of 'onClick' to allow selecting the text and mouse up outside the modal +
props.onClose()}> +
event.stopPropagation()}> +
+ { + props.setQuery(event.target.value) + props.setFocusIndex(0) + }} + type="text" + onKeyDown={onInputKeyDown} + /> +
+
{fuzzyResult.element}
+
+ + {fuzzyFooter(props.fsm, fuzzyResult)} +
+
+
+ ) +} + +function plural(what: string, count: number, isComplete: boolean): string { + return count.toLocaleString() + (isComplete ? '' : '+') + ' ' + what + (count === 1 ? '' : 's') +} + +function fuzzyFooter(fsm: FuzzyFSM, files: RenderedFuzzyResult): JSX.Element { + return IS_DEBUG ? ( + <> + {files.falsePositiveRatio && Math.round(files.falsePositiveRatio * 100)}fp + {files.elapsedMilliseconds && Math.round(files.elapsedMilliseconds).toLocaleString()}ms + + ) : ( + <> + {plural('result', files.resultsCount, files.isComplete)} + + {fsm.key === 'indexing' && indexingProgressBar(fsm)} + {plural('total file', files.totalFileCount, true)} + + + ) +} + +function indexingProgressBar(indexing: Indexing): JSX.Element { + const indexedFiles = indexing.indexing.indexedFileCount + const totalFiles = indexing.indexing.totalFileCount + const percentage = Math.round((indexedFiles / totalFiles) * 100) + return ( + + {percentage}% + + ) +} + +interface RenderedFuzzyResult { + element: JSX.Element + resultsCount: number + isComplete: boolean + totalFileCount: number + elapsedMilliseconds?: number + falsePositiveRatio?: number +} + +function renderFuzzyResult(props: FuzzyModalProps): RenderedFuzzyResult { + function empty(element: JSX.Element): RenderedFuzzyResult { + return { + element, + resultsCount: 0, + isComplete: true, + totalFileCount: 0, + } + } + + function onError(what: string): (error: Error) => void { + return error => { + props.setFsm({ key: 'failed', errorMessage: JSON.stringify(error) }) + throw new Error(what) + } + } + + const usuallyFast = + "This step is usually fast unless it's a very large repository. The result is cached so you only have to wait for it once :)" + + switch (props.fsm.key) { + case 'empty': + handleEmpty(props).then(() => {}, onError('onEmpty')) + return empty(<>) + case 'downloading': + return empty(

Downloading... {usuallyFast}

) + case 'failed': + return empty(

Error: {props.fsm.errorMessage}

) + case 'indexing': { + const loader = props.fsm.indexing + later() + .then(() => continueIndexing(loader)) + .then(next => props.setFsm(next), onError('onIndexing')) + return renderFiles(props, props.fsm.indexing.partialFuzzy, props.fsm.indexing) + } + case 'ready': + return renderFiles(props, props.fsm.fuzzy) + default: + return empty(

ERROR

) + } +} + +function renderFiles(props: FuzzyModalProps, search: FuzzySearch, indexing?: SearchIndexing): RenderedFuzzyResult { + const indexedFileCount = indexing ? indexing.indexedFileCount : '' + const cacheKey = `${props.query}-${props.maxResults}${indexedFileCount}` + let fuzzyResult = lastFuzzySearchResult.get(cacheKey) + if (!fuzzyResult) { + const start = window.performance.now() + fuzzyResult = search.search({ + query: props.query, + maxResults: props.maxResults, + createUrl: filename => `/${props.repoName}@${props.commitID}/-/blob/${filename}`, + onClick: () => props.onClose(), + }) + fuzzyResult.elapsedMilliseconds = window.performance.now() - start + lastFuzzySearchResult.clear() // Only cache the last query. + lastFuzzySearchResult.set(cacheKey, fuzzyResult) + } + const links = fuzzyResult.links + if (links.length === 0) { + return { + element:

No files matching '{props.query}'

, + resultsCount: 0, + totalFileCount: search.totalFileCount, + isComplete: fuzzyResult.isComplete, + } + } + const linksToRender = links.slice(0, props.maxResults) + return { + element: ( +
    + {linksToRender.map((file, fileIndex) => ( +
  • + +
  • + ))} + {!fuzzyResult.isComplete && ( +
  • + +
  • + )} +
+ ), + resultsCount: linksToRender.length, + totalFileCount: search.totalFileCount, + isComplete: fuzzyResult.isComplete, + elapsedMilliseconds: fuzzyResult.elapsedMilliseconds, + falsePositiveRatio: fuzzyResult.falsePositiveRatio, + } +} + +function filesCacheKey(props: FuzzyModalProps): string { + return `/fuzzy-modal.files.${props.repoName}.${props.commitID}` +} + +function openCaches(): Promise { + return caches.open('fuzzy-modal') +} + +async function later(): Promise { + return new Promise(resolve => setTimeout(() => resolve(), 0)) +} + +async function continueIndexing(indexing: SearchIndexing): Promise { + const next = await indexing.continue() + if (next.key === 'indexing') { + return { key: 'indexing', indexing: next } + } + return { + key: 'ready', + fuzzy: next.value, + } +} + +async function loadCachedIndex(props: FuzzyModalProps): Promise { + const cacheAvailable = 'caches' in self + if (!cacheAvailable) { + return Promise.resolve(undefined) + } + const cacheKey = filesCacheKey(props) + const cache = await openCaches() + const cacheRequest = new Request(cacheKey) + const fromCache = await cache.match(cacheRequest) + if (!fromCache) { + return undefined + } + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + const filenames = JSON.parse(await fromCache.text()) + return handleFilenames(filenames) +} + +async function cacheFilenames(props: FuzzyModalProps, filenames: string[]): Promise { + const cacheAvailable = 'caches' in self + if (!cacheAvailable) { + return Promise.resolve() + } + const cacheKey = filesCacheKey(props) + const cache = await openCaches() + await cache.put(cacheKey, new Response(JSON.stringify(filenames))) +} + +async function handleEmpty(props: FuzzyModalProps): Promise { + const fromCache = await loadCachedIndex(props) + if (fromCache) { + props.setFsm(fromCache) + } else { + props.setFsm({ key: 'downloading' }) + try { + const filenames = await props.downloadFilenames() + props.setFsm(handleFilenames(filenames)) + cacheFilenames(props, filenames).then( + () => {}, + () => {} + ) + } catch (error) { + props.setFsm({ + key: 'failed', + errorMessage: JSON.stringify(error), + }) + } + } +} + +function handleFilenames(filenames: string[]): FuzzyFSM { + const indexing = CaseSensitiveFuzzySearch.fromSearchValuesAsync(filenames.map(file => ({ text: file }))) + if (indexing.key === 'ready') { + return { + key: 'ready', + fuzzy: indexing.value, + } + } + return { + key: 'indexing', + indexing, + } +} diff --git a/client/web/src/components/fuzzyFinder/HighlightedLink.module.scss b/client/web/src/components/fuzzyFinder/HighlightedLink.module.scss new file mode 100644 index 00000000000..40490bc4c4c --- /dev/null +++ b/client/web/src/components/fuzzyFinder/HighlightedLink.module.scss @@ -0,0 +1,15 @@ +.highlighted { + color: var(--oc-black); +} +.fuzzy { + background-color: var(--oc-yellow-4); +} +.exact { + background-color: var(--oc-yellow-3); +} +.link { + display: inline-block; + width: 100%; + height: 100%; + max-height: 1em; +} diff --git a/client/web/src/components/fuzzyFinder/HighlightedLink.tsx b/client/web/src/components/fuzzyFinder/HighlightedLink.tsx new file mode 100644 index 00000000000..2c39b27624d --- /dev/null +++ b/client/web/src/components/fuzzyFinder/HighlightedLink.tsx @@ -0,0 +1,69 @@ +import React from 'react' +import { Link } from 'react-router-dom' + +import styles from './HighlightedLink.module.scss' + +export interface RangePosition { + startOffset: number + endOffset: number + /** + * Does this range enclose an exact word? + */ + isExact: boolean +} +export interface HighlightedLinkProps { + text: string + positions: RangePosition[] + url?: string + onClick?: () => void +} + +export function offsetSum(props: HighlightedLinkProps): number { + let sum = 0 + for (const position of props.positions) { + sum += position.startOffset + } + return sum +} + +/** + * React component that renders text with highlighted subranges. + * + * Used to render fuzzy finder results. For example, given the query "doc/read" + * we want to highlight 'Doc' and `READ' in the filename + * 'Documentation/README.md`. + */ +export const HighlightedLink: React.FunctionComponent = props => { + const spans: JSX.Element[] = [] + let start = 0 + function pushSpan(className: string, startOffset: number, endOffset: number): void { + if (startOffset >= endOffset) { + return + } + const text = props.text.slice(startOffset, endOffset) + const key = `${startOffset}-${endOffset}` + const span = ( + + {text} + + ) + spans.push(span) + } + for (const position of props.positions) { + if (position.startOffset > start) { + pushSpan('', start, position.startOffset) + } + start = position.endOffset + const classNameSuffix = position.isExact ? styles.exact : styles.fuzzy + pushSpan(`${styles.highlighted} ${classNameSuffix}`, position.startOffset, position.endOffset) + } + pushSpan('', start, props.text.length) + + return props.url ? ( + props.onClick?.()}> + {spans} + + ) : ( + <>{spans} + ) +} diff --git a/client/web/src/fuzzyFinder/CaseSensitiveFuzzySearch.test.ts b/client/web/src/fuzzyFinder/CaseSensitiveFuzzySearch.test.ts new file mode 100644 index 00000000000..2f35681efc2 --- /dev/null +++ b/client/web/src/fuzzyFinder/CaseSensitiveFuzzySearch.test.ts @@ -0,0 +1,96 @@ +import { CaseSensitiveFuzzySearch, allFuzzyParts, fuzzyMatchesQuery } from './CaseSensitiveFuzzySearch' +import { FuzzySearchParameters } from './FuzzySearch' + +const all = [ + 't1/README.md', + 't2/Readme.md', + 't1/READMES.md', + '.tsconfig.json', + 'to/the/moon.jpg', + 'lol/business.txt', + 'haha/business.txt', + 't3/siteConfig.json', + 'business/crazy.txt', + 'fuzzy/business.txt', + '.travis/workflows/config.json', + 'test/WorkspaceSymbolProvider.scala', +] + +const fuzzy = CaseSensitiveFuzzySearch.fromSearchValues(all.map(text => ({ text }))) + +function checkSearch(query: string, expected: string[]) { + test(`search-${query}`, () => { + const queryProps: FuzzySearchParameters = { query, maxResults: 1000 } + const actual = fuzzy.search(queryProps).links.map(link => link.text) + expect(actual).toStrictEqual(expected) + for (const result of expected) { + const individualFuzzy = CaseSensitiveFuzzySearch.fromSearchValues([{ text: result }]) + const individualActual = individualFuzzy.search(queryProps).links.map(link => link.text) + expect(individualActual).toStrictEqual([result]) + } + }) +} + +function checkParts(name: string, original: string, expected: string[]) { + test(`allFuzzyParts-${name}`, () => { + expect(allFuzzyParts(original, false)).toStrictEqual(expected) + }) +} + +function checkFuzzyMatch(name: string, query: string, value: string, expected: string[]) { + test(`fuzzyMatchesQuery-${name}`, () => { + const obtained = fuzzyMatchesQuery(query, value) + const parts: string[] = [] + for (const position of obtained) { + parts.push(value.slice(position.startOffset, position.endOffset)) + } + expect(parts).toStrictEqual(expected) + }) +} + +describe('case sensitive fuzzy search', () => { + describe('splitting a filename into parts works as expected', () => { + checkParts('basic', 'haha/business.txt', ['haha', 'business', 'txt']) + checkParts('snake_case', 'haha_business.txt', ['haha', 'business', 'txt']) + checkParts('camelCase', 'hahaBusiness.txt', ['haha', 'Business', 'txt']) + checkParts('CamelCase', 'HahaBusiness.txt', ['Haha', 'Business', 'txt']) + checkParts('kebab-case', 'haha-business.txt', ['haha', 'business', 'txt']) + checkParts('kebab-case', 'haha-business.txt', ['haha', 'business', 'txt']) + checkParts('dotfile', '.tsconfig.json', ['tsconfig', 'json']) + }) + + describe('fuzzy matching selects the correct substrings', () => { + checkFuzzyMatch('dotfile', 'ts', '.tsconfig.json', ['ts']) + checkFuzzyMatch('basic', 'ha/busi', 'haha/business.txt', ['ha', '/', 'busi']) + checkFuzzyMatch('all-lowercase', 'readme', 't1/README.md', ['README']) + checkFuzzyMatch('all-lowercase2', 'readme', 't2/Readme.md', ['Readme']) + checkFuzzyMatch('digits', 't2', 't2/Readme.md', ['t2']) + checkFuzzyMatch('consume-delimeter-negative', 'ts/json', '.tsconfig.json', []) + checkFuzzyMatch('consume-delimeter-positive', 'ts/json', '.tsconfig/json', ['ts', '/', 'json']) + checkFuzzyMatch('consume-delimeter-end-of-word', 'ts/', '.tsconfig/json', ['ts', '/']) + checkFuzzyMatch('consume-delimeter-start-of-word', '.ts/', '.tsconfig/json', ['.', 'ts', '/']) + }) + + describe('fuzzy searching against the bloom filter returns the correct results', () => { + checkSearch('h/bus', ['haha/business.txt']) + checkSearch('moon', ['to/the/moon.jpg']) + checkSearch('t/moon', ['to/the/moon.jpg']) + checkSearch('t/t/moon', ['to/the/moon.jpg']) + checkSearch('t.t.moon', []) + checkSearch('t t moon', []) + checkSearch('jpg', ['to/the/moon.jpg']) + checkSearch('t/mo', ['to/the/moon.jpg']) + checkSearch('mo', ['to/the/moon.jpg']) + checkSearch('t', all) + checkSearch('readme', ['t1/README.md', 't2/Readme.md', 't1/READMES.md']) + checkSearch('README', ['t1/README.md', 't1/READMES.md']) + checkSearch('Readme', ['t2/Readme.md']) + checkSearch('WSProvider', ['test/WorkspaceSymbolProvider.scala']) + checkSearch('t2', ['t2/Readme.md']) + }) + + describe('caveat: validate the fuzzy finder is quite strict with capitalization', () => { + checkSearch('sitecon', []) + checkFuzzyMatch('sitecon', 'sitecon', 'website/siteConfig.js', []) + }) +}) diff --git a/client/web/src/fuzzyFinder/CaseSensitiveFuzzySearch.ts b/client/web/src/fuzzyFinder/CaseSensitiveFuzzySearch.ts new file mode 100644 index 00000000000..8915a6bf088 --- /dev/null +++ b/client/web/src/fuzzyFinder/CaseSensitiveFuzzySearch.ts @@ -0,0 +1,468 @@ +import { BloomFilter } from 'bloomfilter' + +import { HighlightedLinkProps, offsetSum, RangePosition } from '../components/fuzzyFinder/HighlightedLink' + +import { FuzzySearch, IndexingFSM, FuzzySearchParameters, FuzzySearchResult, SearchValue } from './FuzzySearch' +import { Hasher } from './Hasher' + +/** + * We don't index filenames with length larger than this value. + */ +const MAX_VALUE_LENGTH = 100 + +// Normally, you need multiple hash functions to keep the false-positive ratio +// low. However, non-empirical observations indicate that a single hash function +// works fine and provides the fastest indexing time in large repositories like +// Chromium. +const DEFAULT_BLOOM_FILTER_HASH_FUNCTION_COUNT = 1 +// The number of filenames to group together in a single bucket, and the number +// string prefixes that each bloom can contain. Currently, every bucket can +// contain up to 262.144 prefixes (conservatively large number). With bucket +// size 50, my off-the-napkin calculation is that total memory usage with 400k +// files (Chromium size) may be as large as ~261mb. It's usable on most +// computers, but still a bit high. +// Tracking issue to fine-tune these parameters: https://github.com/sourcegraph/sourcegraph/issues/21201 +const DEFAULT_BUCKET_SIZE = 50 +const DEFAULT_BLOOM_FILTER_SIZE = 2 << 17 + +/** + * Returns true if the given query fuzzy matches the given value. + */ +export function fuzzyMatchesQuery(query: string, value: string): RangePosition[] { + return fuzzyMatches(allFuzzyParts(query, true), value) +} + +/** + * Case-sensitive fuzzy search that uses bloom filters for low-latency filtering + * in large repositories (>100k files). + * + * NOTE(olafur): this is a reimplementation of the fuzzy finder in the Scala + * language server that's documented in this blog post here + * https://scalameta.org/metals/blog/2019/01/22/bloom-filters.html#fuzzy-symbol-search + * + * In a nutshell, bloom filters improve performance by allowing us to skip a + * "bucket" of candidate files if we know that bucket does not match any words + * in that query. For example, the query "SymPro" is split into the words "Sym" + * and "Pro". If a bucket of 500 words is guaranteed to have to appearances of + * the words "Sym" and "Pro", then we can skip those 500 words and move on to + * the next bucket. + * + * One downside of the bloom filter approach is that it requires an indexing + * phase that can take a couple of seconds to complete on a large input size + * (>100k filenames). The indexing phase can take a while to complete because we + * need to compute all possible words that the user may query. For example, + * given the filename "SymbolProvider", we create a bloom filter with all + * possible prefixes of "Symbol" and "Provider". Fortunately, bloom filters can be + * serialized so that the indexing step only runs once per repoName/commitID pair. + */ +export class CaseSensitiveFuzzySearch extends FuzzySearch { + public totalFileCount = 0 + constructor(public readonly buckets: Bucket[]) { + super() + for (const bucket of buckets) { + this.totalFileCount += bucket.files.length + } + } + + public static fromSearchValuesAsync(files: SearchValue[], bucketSize: number = DEFAULT_BUCKET_SIZE): IndexingFSM { + files.sort((a, b) => a.text.length - b.text.length) + const indexer = new Indexer(files, bucketSize) + function loop(): IndexingFSM { + if (indexer.isDone()) { + return { key: 'ready', value: indexer.complete() } + } + indexer.processBuckets(25000) + return { + key: 'indexing', + indexedFileCount: indexer.indexedFileCount(), + totalFileCount: indexer.totalFileCount(), + partialFuzzy: indexer.complete(), + continue: () => new Promise(resolve => resolve(loop())), + } + } + return loop() + } + + public static fromSearchValues( + files: SearchValue[], + bucketSize: number = DEFAULT_BUCKET_SIZE + ): CaseSensitiveFuzzySearch { + const indexer = new Indexer(files, bucketSize) + while (!indexer.isDone()) { + indexer.processBuckets(bucketSize) + } + return indexer.complete() + } + + public search(query: FuzzySearchParameters): FuzzySearchResult { + if (query.query.length === 0) { + return this.emptyResult(query) + } + let falsePositives = 0 + const result: HighlightedLinkProps[] = [] + const hashParts = allQueryHashParts(query.query) + const queryParts = allFuzzyParts(query.query, true) + const complete = (isComplete: boolean): FuzzySearchResult => + this.sorted({ links: result, isComplete, falsePositiveRatio: falsePositives / this.buckets.length }) + for (const bucket of this.buckets) { + const matches = bucket.matches(query, queryParts, hashParts) + if (!matches.skipped && matches.value.length === 0) { + falsePositives++ + } + for (const value of matches.value) { + if (result.length >= query.maxResults) { + return complete(false) + } + result.push(value) + } + } + return complete(true) + } + + private sorted(result: FuzzySearchResult): FuzzySearchResult { + result.links.sort((a, b) => { + const byLength = a.text.length - b.text.length + if (byLength !== 0) { + return byLength + } + + const byEarliestMatch = offsetSum(a) - offsetSum(b) + if (byEarliestMatch !== 0) { + return byEarliestMatch + } + + return a.text.localeCompare(b.text) + }) + return result + } + + private emptyResult(query: FuzzySearchParameters): FuzzySearchResult { + const result: HighlightedLinkProps[] = [] + const complete = (isComplete: boolean): FuzzySearchResult => this.sorted({ links: result, isComplete }) + + for (const bucket of this.buckets) { + if (result.length > query.maxResults) { + return complete(false) + } + for (const value of bucket.files) { + result.push({ + text: value.text, + positions: [], + url: query.createUrl ? query.createUrl(value.text) : undefined, + onClick: query.onClick, + }) + if (result.length > query.maxResults) { + return complete(false) + } + } + } + return complete(true) + } +} + +export function allFuzzyParts(value: string, includeDelimeters: boolean): string[] { + const buf: string[] = [] + let start = 0 + let end = 0 + while (end < value.length) { + if (end > start) { + buf.push(value.slice(start, end)) + } + while (end < value.length && isDelimeter(value[end])) { + if (includeDelimeters) { + buf.push(value[end]) + } + end++ + } + start = end + end = nextFuzzyPart(value, end + 1) + } + + if (start < value.length && end > start) { + buf.push(value.slice(start, end)) + } + + return buf +} + +function isDigit(value: string): boolean { + return value >= '0' && value <= '9' +} +function isLowercaseCharacter(value: string): boolean { + return isLowercaseOrDigit(value) && !isDelimeter(value) +} +function isLowercaseOrDigit(value: string): boolean { + return isDigit(value) || (value.toLowerCase() === value && value !== value.toUpperCase()) +} + +function isUppercaseCharacter(value: string): boolean { + return isUppercase(value) && !isDelimeter(value) +} +function isUppercase(value: string): boolean { + return value.toUpperCase() === value && value !== value.toLowerCase() +} + +function isDelimeterOrUppercase(character: string): boolean { + return isDelimeter(character) || isUppercase(character) +} + +function isDelimeter(character: string): boolean { + switch (character) { + case '/': + case '_': + case '-': + case '.': + case ' ': + return true + default: + return false + } +} + +function fuzzyMatches(queries: string[], value: string): RangePosition[] { + const result: RangePosition[] = [] + const matcher = new FuzzyMatcher(queries, value) + while (!matcher.isDone()) { + const isCurrentQueryDelimeter = matcher.isQueryDelimeter() + while (!matcher.isQueryDelimeter() && matcher.isStartDelimeter()) { + matcher.start++ + } + if (matcher.matchesFromStart()) { + result.push(matcher.rangePositionFromStart()) + matcher.queryIndex++ + } + matcher.start = matcher.nextStart(isCurrentQueryDelimeter) + } + return matcher.queryIndex >= queries.length ? result : [] +} + +class FuzzyMatcher { + public queryIndex = 0 + public start = 0 + private lowercaseValue: string + constructor(private readonly queries: string[], private readonly value: string) { + this.lowercaseValue = value.toLowerCase() + } + public nextStart(isCurrentQueryDelimeter: boolean): number { + const offset = isCurrentQueryDelimeter ? this.start : this.start + 1 + let end = this.isQueryDelimeter() + ? this.indexOfDelimeter(this.query(), offset) + : nextFuzzyPart(this.value, offset) + while (end < this.value.length && !this.isQueryDelimeter() && isDelimeter(this.value[end])) { + end++ + } + return end + } + public rangePositionFromStart(): RangePosition { + const end = this.start + this.query().length + return { + startOffset: this.start, + endOffset: end, + isExact: end >= this.value.length || startsNewWord(this.value, end), + } + } + public matchesFromStart(): boolean { + const caseInsensitive = this.isCaseInsensitive() + const compareValue = caseInsensitive ? this.lowercaseValue : this.value + return ( + compareValue.startsWith(this.query(), this.start) && + (!caseInsensitive || isCapitalizedPart(this.value, this.start, this.query())) + ) + } + public isStartDelimeter(): boolean { + return isDelimeter(this.value[this.start]) + } + public isDone(): boolean { + return this.queryIndex >= this.queries.length || this.start >= this.value.length + } + public query(): string { + return this.queries[this.queryIndex] + } + public isCaseInsensitive(): boolean { + return isLowercaseOrDigit(this.query()) + } + public isQueryDelimeter(): boolean { + return isDelimeter(this.query()) + } + public indexOfDelimeter(delim: string, start: number): number { + const index = this.value.indexOf(delim, start) + return index < 0 ? this.value.length : index + } +} + +function startsNewWord(value: string, index: number): boolean { + return ( + isDelimeterOrUppercase(value[index]) || + (isLowercaseCharacter(value[index]) && !isLowercaseCharacter(value[index - 1])) + ) +} + +/** + * Returns true if value.substring(start, start + query.length) is "properly capitalized". + * + * The string is properly capitalized as long it contains no lowercase character + * that is followed by an uppercase character. For example: + * + * - Not properly capitalized: "InnerClasses" "innerClasses" + * - Properly capitalized: "Innerclasses" "INnerclasses" + */ +function isCapitalizedPart(value: string, start: number, query: string): boolean { + let previousIsLowercase = false + for (let index = start; index < value.length && index - start < query.length; index++) { + const nextIsLowercase = isLowercaseOrDigit(value[index]) + if (previousIsLowercase && !nextIsLowercase) { + return false + } + previousIsLowercase = nextIsLowercase + } + return true +} + +function nextFuzzyPart(value: string, start: number): number { + let end = start + while (end < value.length && !isDelimeterOrUppercase(value[end])) { + end++ + } + return end +} + +function populateBloomFilter(values: SearchValue[]): BloomFilter { + const hashes = new BloomFilter(DEFAULT_BLOOM_FILTER_SIZE, DEFAULT_BLOOM_FILTER_HASH_FUNCTION_COUNT) + for (const value of values) { + if (value.text.length < MAX_VALUE_LENGTH) { + updateHashParts(value.text, hashes) + } + } + return hashes +} + +function allQueryHashParts(query: string): number[] { + const fuzzyParts = allFuzzyParts(query, false) + const result: number[] = [] + const hasher = new Hasher() + for (const part of fuzzyParts) { + hasher.reset() + for (const character of part) { + hasher.update(character) + result.push(hasher.digest()) + } + } + return result +} + +function updateHashParts(value: string, buf: BloomFilter): void { + const words = new Hasher() + const lowercaseWords = new Hasher() + + for (let index = 0; index < value.length; index++) { + const character = value[index] + if (isDelimeterOrUppercase(character)) { + words.reset() + lowercaseWords.reset() + if (isUppercaseCharacter(character) && (index === 0 || !isUppercaseCharacter(value[index - 1]))) { + let uppercaseWordIndex = index + const upper = [] + while (uppercaseWordIndex < value.length && isUppercaseCharacter(value[uppercaseWordIndex])) { + upper.push(value[uppercaseWordIndex]) + lowercaseWords.update(value[uppercaseWordIndex].toLowerCase()) + buf.add(lowercaseWords.digest()) + uppercaseWordIndex++ + } + lowercaseWords.reset() + } + } + if (isDelimeter(character)) { + continue + } + words.update(character) + lowercaseWords.update(character.toLowerCase()) + + buf.add(words.digest()) + if (words.digest() !== lowercaseWords.digest()) { + buf.add(lowercaseWords.digest()) + } + } +} + +interface BucketResult { + skipped: boolean + value: HighlightedLinkProps[] +} + +class Bucket { + constructor( + public readonly files: SearchValue[], + public readonly filter: BloomFilter, + public readonly id: number + ) {} + public static fromSearchValues(files: SearchValue[]): Bucket { + files.sort((a, b) => a.text.length - b.text.length) + return new Bucket(files, populateBloomFilter(files), Math.random()) + } + + private matchesMaybe(hashParts: number[]): boolean { + for (const part of hashParts) { + if (!this.filter.test(part)) { + return false + } + } + return true + } + public matches(query: FuzzySearchParameters, queryParts: string[], hashParts: number[]): BucketResult { + const matchesMaybe = this.matchesMaybe(hashParts) + if (!matchesMaybe) { + return { skipped: true, value: [] } + } + const result: HighlightedLinkProps[] = [] + for (const file of this.files) { + const positions = fuzzyMatches(queryParts, file.text) + if (positions.length > 0) { + result.push({ + text: file.text, + positions, + url: query.createUrl ? query.createUrl(file.text) : undefined, + onClick: query.onClick, + }) + } + } + return { skipped: false, value: result } + } +} + +class Indexer { + private buffer: SearchValue[] = [] + private buckets: Bucket[] = [] + private index = 0 + constructor(private readonly files: SearchValue[], private readonly bucketSize: number) { + this.files.sort((a, b) => a.text.length - b.text.length) + } + + public complete(): CaseSensitiveFuzzySearch { + return new CaseSensitiveFuzzySearch(this.buckets) + } + + public isDone(): boolean { + return this.index >= this.files.length + } + public totalFileCount(): number { + return this.files.length + } + public indexedFileCount(): number { + return this.index + } + public processBuckets(fileCount: number): void { + let bucketCount = fileCount / this.bucketSize + while (bucketCount > 0 && !this.isDone()) { + const endIndex = Math.min(this.files.length, this.index + this.bucketSize) + while (this.index < endIndex) { + this.buffer.push(this.files[this.index]) + this.index++ + } + if (this.buffer) { + this.buckets.push(Bucket.fromSearchValues(this.buffer)) + this.buffer = [] + } + bucketCount-- + } + } +} diff --git a/client/web/src/fuzzyFinder/FuzzySearch.ts b/client/web/src/fuzzyFinder/FuzzySearch.ts new file mode 100644 index 00000000000..d4dbdc584db --- /dev/null +++ b/client/web/src/fuzzyFinder/FuzzySearch.ts @@ -0,0 +1,48 @@ +import { HighlightedLinkProps } from '../components/fuzzyFinder/HighlightedLink' + +export interface FuzzySearchParameters { + query: string + maxResults: number + createUrl?: (value: string) => string + onClick?: () => void +} + +export interface FuzzySearchResult { + links: HighlightedLinkProps[] + isComplete: boolean + elapsedMilliseconds?: number + falsePositiveRatio?: number +} + +export interface SearchValue { + text: string +} + +export type IndexingFSM = SearchIndexing | SearchReady +export interface SearchIndexing { + key: 'indexing' + indexedFileCount: number + totalFileCount: number + partialFuzzy: FuzzySearch + continue: () => Promise +} +export interface SearchReady { + key: 'ready' + value: FuzzySearch +} + +/** + * Superclass for different fuzzy finding algorithms. + * + * Currently, there is only one implementation that is case sensitive. This + * implementation is specifically tailored for large repos that have >400k + * source files. Most users will likely prefer case-insensitive fuzzy filtering, + * which is easy to support for small repos (<20k files) but it's not clear how + * to support that in larger repos without sacrificing latency. + * + * Tracking issue to add case-insensitive search: https://github.com/sourcegraph/sourcegraph/issues/21201 + */ +export abstract class FuzzySearch { + public abstract totalFileCount: number + public abstract search(parameters: FuzzySearchParameters): FuzzySearchResult +} diff --git a/client/web/src/fuzzyFinder/Hasher.ts b/client/web/src/fuzzyFinder/Hasher.ts new file mode 100644 index 00000000000..75a72e18ca0 --- /dev/null +++ b/client/web/src/fuzzyFinder/Hasher.ts @@ -0,0 +1,29 @@ +/** + * Computes the hashcode from a streaming input of characters. Every hashcode is + * computed in O(1) time. + * + * This class makes it possible to compute a hashcode for every prefix of a + * given string of length N in O(N) time. For example, given the string "Doc", + * we can compute the hashcode for the string "D", "Do" and "Doc" in three + * constant operations. If implemented naively, computing every individual + * hashcode would be a linear operation resulting in a total runtime of O(N^2). + */ +export class Hasher { + private currentHash = 0 + public update(character: string): Hasher { + for (let index = 0; index < character.length; index++) { + this.currentHash = (Math.imul(31, this.currentHash) + character.charCodeAt(index)) | 0 + } + return this + } + public digest(): number { + return this.currentHash + } + public resetWith(character: string): void { + this.reset() + this.update(character) + } + public reset(): void { + this.currentHash = 0 + } +} diff --git a/client/web/src/keyboardShortcuts/keyboardShortcuts.ts b/client/web/src/keyboardShortcuts/keyboardShortcuts.ts index 54311a87558..c7568fd1c54 100644 --- a/client/web/src/keyboardShortcuts/keyboardShortcuts.ts +++ b/client/web/src/keyboardShortcuts/keyboardShortcuts.ts @@ -27,6 +27,18 @@ export const KEYBOARD_SHORTCUT_FOCUS_SEARCHBAR: KeyboardShortcut = { keybindings: [{ ordered: ['/'] }], } +export const KEYBOARD_SHORTCUT_FUZZY_FINDER: KeyboardShortcut = { + id: 'fuzzyFinder', + title: 'Fuzzy search files', + keybindings: [{ ordered: ['t'] }], +} + +export const KEYBOARD_SHORTCUT_CLOSE_FUZZY_FINDER: KeyboardShortcut = { + id: 'closeFuzzyFiles', + title: 'Close fuzzy search files', + keybindings: [{ ordered: ['Escape'] }], +} + export const KEYBOARD_SHORTCUT_COPY_FULL_QUERY: KeyboardShortcut = { id: 'copyFullQuery', title: 'Copy full query', @@ -42,6 +54,8 @@ export const KEYBOARD_SHORTCUTS: KeyboardShortcut[] = [ KEYBOARD_SHORTCUT_SWITCH_THEME, KEYBOARD_SHORTCUT_SHOW_HELP, KEYBOARD_SHORTCUT_FOCUS_SEARCHBAR, + KEYBOARD_SHORTCUT_FUZZY_FINDER, + KEYBOARD_SHORTCUT_CLOSE_FUZZY_FINDER, KEYBOARD_SHORTCUT_COPY_FULL_QUERY, ] diff --git a/client/web/src/repo/RepoContainer.tsx b/client/web/src/repo/RepoContainer.tsx index f4ae569146e..6db0c6a87cd 100644 --- a/client/web/src/repo/RepoContainer.tsx +++ b/client/web/src/repo/RepoContainer.tsx @@ -35,6 +35,7 @@ import { AuthenticatedUser } from '../auth' import { ErrorMessage } from '../components/alerts' import { BreadcrumbSetters, BreadcrumbsProps } from '../components/Breadcrumbs' import { ErrorBoundary } from '../components/ErrorBoundary' +import { FuzzyFinder } from '../components/fuzzyFinder/FuzzyFinder' import { HeroPage } from '../components/HeroPage' import { ActionItemsBarProps, useWebActionItems } from '../extensions/components/ActionItemsBar' import { ExternalLinkFields, RepositoryFields } from '../graphql-operations' @@ -383,6 +384,12 @@ export const RepoContainer: React.FunctionComponent = props return (
+ {!isErrorLike(props.settingsCascade.final) && + props.settingsCascade.final?.experimentalFeatures?.fuzzyFinder && + resolvedRevisionOrError && + !isErrorLike(resolvedRevisionOrError) && ( + + )} {showExtensionAlert && (