diff --git a/cmd/searcher/internal/search/BUILD.bazel b/cmd/searcher/internal/search/BUILD.bazel index 0dfcb76b858..f15b50fdb32 100644 --- a/cmd/searcher/internal/search/BUILD.bazel +++ b/cmd/searcher/internal/search/BUILD.bazel @@ -41,7 +41,7 @@ go_library( "//internal/search/backend", "//internal/search/casetransform", "//internal/search/query", - "//internal/search/zoekt", + "//internal/search/zoektquery", "//internal/searcher/v1:searcher", "//internal/trace", "//lib/codeintel/languages", diff --git a/cmd/searcher/internal/search/zoekt_search.go b/cmd/searcher/internal/search/zoekt_search.go index 9a1079c8021..24a7b85d62b 100644 --- a/cmd/searcher/internal/search/zoekt_search.go +++ b/cmd/searcher/internal/search/zoekt_search.go @@ -11,53 +11,53 @@ import ( "github.com/sourcegraph/conc/pool" "github.com/sourcegraph/log" "github.com/sourcegraph/zoekt" - zoektquery "github.com/sourcegraph/zoekt/query" + "github.com/sourcegraph/zoekt/query" "github.com/sourcegraph/sourcegraph/cmd/searcher/protocol" "github.com/sourcegraph/sourcegraph/internal/api" "github.com/sourcegraph/sourcegraph/internal/comby" "github.com/sourcegraph/sourcegraph/internal/search" "github.com/sourcegraph/sourcegraph/internal/search/backend" - zoektutil "github.com/sourcegraph/sourcegraph/internal/search/zoekt" + "github.com/sourcegraph/sourcegraph/internal/search/zoektquery" "github.com/sourcegraph/sourcegraph/lib/errors" ) -func handleFilePathPatterns(query *protocol.PatternInfo) (zoektquery.Q, error) { - var and []zoektquery.Q +func handleFilePathPatterns(patternInfo *protocol.PatternInfo) (query.Q, error) { + var and []query.Q // Zoekt uses regular expressions for file paths. // Unhandled cases: PathPatternsAreCaseSensitive and whitespace in file path patterns. - for _, p := range query.IncludePaths { - q, err := zoektutil.FileRe(p, query.IsCaseSensitive) + for _, p := range patternInfo.IncludePaths { + q, err := zoektquery.FileRe(p, patternInfo.IsCaseSensitive) if err != nil { return nil, err } and = append(and, q) } - if query.ExcludePaths != "" { - q, err := zoektutil.FileRe(query.ExcludePaths, query.IsCaseSensitive) + if patternInfo.ExcludePaths != "" { + q, err := zoektquery.FileRe(patternInfo.ExcludePaths, patternInfo.IsCaseSensitive) if err != nil { return nil, err } - and = append(and, &zoektquery.Not{Child: q}) + and = append(and, &query.Not{Child: q}) } - return zoektquery.NewAnd(and...), nil + return query.NewAnd(and...), nil } -func buildQuery(pattern string, branchRepos []zoektquery.BranchRepos, filePathPatterns zoektquery.Q, shortcircuit bool) (zoektquery.Q, error) { +func buildQuery(pattern string, branchRepos []query.BranchRepos, filePathPatterns query.Q, shortcircuit bool) (query.Q, error) { regexString := comby.StructuralPatToRegexpQuery(pattern, shortcircuit) if len(regexString) == 0 { - return &zoektquery.Const{Value: true}, nil + return &query.Const{Value: true}, nil } re, err := syntax.Parse(regexString, syntax.ClassNL|syntax.PerlX|syntax.UnicodeGroups) if err != nil { return nil, err } - return zoektquery.NewAnd( - &zoektquery.BranchesRepos{List: branchRepos}, + return query.NewAnd( + &query.BranchesRepos{List: branchRepos}, filePathPatterns, - &zoektquery.Regexp{ + &query.Regexp{ Regexp: re, CaseSensitive: true, Content: true, @@ -71,7 +71,7 @@ func buildQuery(pattern string, branchRepos []zoektquery.BranchRepos, filePathPa // Timeouts are reported through the context, and as a special case errNoResultsInTimeout // is returned if no results are found in the given timeout (instead of the more common // case of finding partial or full results in the given timeout). -func zoektSearch(ctx context.Context, logger log.Logger, client zoekt.Streamer, args *protocol.PatternInfo, branchRepos []zoektquery.BranchRepos, contextLines int32, since func(t time.Time) time.Duration, repo api.RepoName, sender matchSender) (err error) { +func zoektSearch(ctx context.Context, logger log.Logger, client zoekt.Streamer, args *protocol.PatternInfo, branchRepos []query.BranchRepos, contextLines int32, since func(t time.Time) time.Duration, repo api.RepoName, sender matchSender) (err error) { if len(branchRepos) == 0 { return nil } diff --git a/dev/check/go-dbconn-import.sh b/dev/check/go-dbconn-import.sh index ae5fcb3273a..7c206866ab9 100755 --- a/dev/check/go-dbconn-import.sh +++ b/dev/check/go-dbconn-import.sh @@ -20,11 +20,7 @@ allowed_prefix=( github.com/sourcegraph/sourcegraph/cmd/precise-code-intel-worker github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker github.com/sourcegraph/sourcegraph/cmd/repo-updater - # Transitively depends on zoekt package which imports but does not use DB - github.com/sourcegraph/sourcegraph/cmd/searcher # Doesn't connect but uses db internals for use with sqlite - # Main entrypoint for running all services, so it must be allowed to import it. - github.com/sourcegraph/sourcegraph/cmd/sourcegraph github.com/sourcegraph/sourcegraph/cmd/symbols github.com/sourcegraph/sourcegraph/cmd/worker ) diff --git a/dev/linters/dbconn/dbconn.go b/dev/linters/dbconn/dbconn.go index 8ffe7345480..d427733b577 100644 --- a/dev/linters/dbconn/dbconn.go +++ b/dev/linters/dbconn/dbconn.go @@ -33,11 +33,7 @@ var allowedToImport = []string{ "github.com/sourcegraph/sourcegraph/cmd/precise-code-intel-worker", "github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker", "github.com/sourcegraph/sourcegraph/cmd/repo-updater", - // Transitively depends on zoekt package which imports but does not use DB - "github.com/sourcegraph/sourcegraph/cmd/searcher", // Doesn't connect but uses db internals for use with sqlite - // Main entrypoint for running all services, so it must be allowed to import it. - "github.com/sourcegraph/sourcegraph/cmd/sourcegraph", "github.com/sourcegraph/sourcegraph/cmd/symbols", "github.com/sourcegraph/sourcegraph/cmd/worker", } diff --git a/internal/search/symbol/BUILD.bazel b/internal/search/symbol/BUILD.bazel index 5f37f9998cd..f846588b5de 100644 --- a/internal/search/symbol/BUILD.bazel +++ b/internal/search/symbol/BUILD.bazel @@ -13,7 +13,7 @@ go_library( "//internal/authz", "//internal/search", "//internal/search/result", - "//internal/search/zoekt", + "//internal/search/zoektquery", "//internal/symbols", "//internal/trace/policy", "//internal/types", diff --git a/internal/search/symbol/symbol.go b/internal/search/symbol/symbol.go index 32b5c912378..82c83560829 100644 --- a/internal/search/symbol/symbol.go +++ b/internal/search/symbol/symbol.go @@ -9,14 +9,14 @@ import ( "github.com/RoaringBitmap/roaring" "github.com/grafana/regexp" "github.com/sourcegraph/zoekt" - zoektquery "github.com/sourcegraph/zoekt/query" + "github.com/sourcegraph/zoekt/query" "github.com/sourcegraph/sourcegraph/internal/actor" "github.com/sourcegraph/sourcegraph/internal/api" "github.com/sourcegraph/sourcegraph/internal/authz" "github.com/sourcegraph/sourcegraph/internal/search" "github.com/sourcegraph/sourcegraph/internal/search/result" - zoektutil "github.com/sourcegraph/sourcegraph/internal/search/zoekt" + "github.com/sourcegraph/sourcegraph/internal/search/zoektquery" "github.com/sourcegraph/sourcegraph/internal/symbols" "github.com/sourcegraph/sourcegraph/internal/trace/policy" "github.com/sourcegraph/sourcegraph/internal/types" @@ -205,28 +205,28 @@ func searchZoekt( return } - var query zoektquery.Q + var q query.Q if expr.Op == syntax.OpLiteral { - query = &zoektquery.Substring{ + q = &query.Substring{ Pattern: string(expr.Rune), Content: true, } } else { - query = &zoektquery.Regexp{ + q = &query.Regexp{ Regexp: expr, Content: true, } } - ands := []zoektquery.Q{ - &zoektquery.BranchesRepos{List: []zoektquery.BranchRepos{ + ands := []query.Q{ + &query.BranchesRepos{List: []query.BranchRepos{ {Branch: branch, Repos: roaring.BitmapOf(uint32(repoName.ID))}, }}, - &zoektquery.Symbol{Expr: query}, + &query.Symbol{Expr: q}, } if includePatterns != nil { for _, p := range *includePatterns { - q, err := zoektutil.FileRe(p, true) + q, err := zoektquery.FileRe(p, true) if err != nil { return nil, err } @@ -234,7 +234,7 @@ func searchZoekt( } } - final := zoektquery.Simplify(zoektquery.NewAnd(ands...)) + final := query.Simplify(query.NewAnd(ands...)) match := limitOrDefault(first) + 1 resp, err := z.Search(ctx, final, &zoekt.SearchOptions{ Trace: policy.ShouldTrace(ctx), diff --git a/internal/search/zoekt/BUILD.bazel b/internal/search/zoekt/BUILD.bazel index 6775d3cb364..ff97e196b4c 100644 --- a/internal/search/zoekt/BUILD.bazel +++ b/internal/search/zoekt/BUILD.bazel @@ -27,6 +27,7 @@ go_library( "//internal/search/query", "//internal/search/result", "//internal/search/streaming", + "//internal/search/zoektquery", "//internal/trace", "//internal/types", "//lib/errors", diff --git a/internal/search/zoekt/query.go b/internal/search/zoekt/query.go index 6b72e1b29e3..fcda84580fb 100644 --- a/internal/search/zoekt/query.go +++ b/internal/search/zoekt/query.go @@ -8,6 +8,7 @@ import ( "github.com/sourcegraph/sourcegraph/internal/search" "github.com/sourcegraph/sourcegraph/internal/search/query" "github.com/sourcegraph/sourcegraph/internal/search/result" + "github.com/sourcegraph/sourcegraph/internal/search/zoektquery" "github.com/sourcegraph/sourcegraph/lib/errors" zoekt "github.com/sourcegraph/zoekt/query" @@ -59,14 +60,14 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea // TODO PathPatternsAreCaseSensitive // TODO whitespace in file path patterns? for _, i := range filesInclude { - q, err := FileRe(i, isCaseSensitive) + q, err := zoektquery.FileRe(i, isCaseSensitive) if err != nil { return nil, err } and = append(and, q) } if len(filesExclude) > 0 { - q, err := FileRe(query.UnionRegExps(filesExclude), isCaseSensitive) + q, err := zoektquery.FileRe(query.UnionRegExps(filesExclude), isCaseSensitive) if err != nil { return nil, err } @@ -144,7 +145,7 @@ func toZoektPattern( fileNameOnly := patternMatchesPath && !patternMatchesContent contentOnly := !patternMatchesPath && patternMatchesContent - q, err = parseRe(n.RegExpPattern(), fileNameOnly, contentOnly, isCaseSensitive) + q, err = zoektquery.ParseRe(n.RegExpPattern(), fileNameOnly, contentOnly, isCaseSensitive) if err != nil { return nil, err } diff --git a/internal/search/zoekt/zoekt.go b/internal/search/zoekt/zoekt.go index e512c59d323..d2e6677c8a9 100644 --- a/internal/search/zoekt/zoekt.go +++ b/internal/search/zoekt/zoekt.go @@ -1,49 +1,11 @@ package zoekt import ( - "regexp/syntax" //nolint:depguard // zoekt requires this pkg - "github.com/sourcegraph/zoekt" - zoektquery "github.com/sourcegraph/zoekt/query" "github.com/sourcegraph/sourcegraph/internal/types" ) -func FileRe(pattern string, queryIsCaseSensitive bool) (zoektquery.Q, error) { - return parseRe(pattern, true, false, queryIsCaseSensitive) -} - -const regexpFlags = syntax.ClassNL | syntax.PerlX | syntax.UnicodeGroups - -func parseRe(pattern string, filenameOnly bool, contentOnly bool, queryIsCaseSensitive bool) (zoektquery.Q, error) { - // these are the flags used by zoekt, which differ to searcher. - re, err := syntax.Parse(pattern, regexpFlags) - if err != nil { - return nil, err - } - - // OptimizeRegexp currently only converts capture groups into non-capture - // groups (faster for stdlib regexp to execute). - re = zoektquery.OptimizeRegexp(re, regexpFlags) - - // zoekt decides to use its literal optimization at the query parser - // level, so we check if our regex can just be a literal. - if re.Op == syntax.OpLiteral { - return &zoektquery.Substring{ - Pattern: string(re.Rune), - CaseSensitive: queryIsCaseSensitive, - Content: contentOnly, - FileName: filenameOnly, - }, nil - } - return &zoektquery.Regexp{ - Regexp: re, - CaseSensitive: queryIsCaseSensitive, - Content: contentOnly, - FileName: filenameOnly, - }, nil -} - // repoRevFunc is a function which maps repository names returned from Zoekt // into the Sourcegraph's resolved repository revisions for the search. type repoRevFunc func(file *zoekt.FileMatch) (repo types.MinimalRepo, revs []string) diff --git a/internal/search/zoektquery/BUILD.bazel b/internal/search/zoektquery/BUILD.bazel new file mode 100644 index 00000000000..36472682ce2 --- /dev/null +++ b/internal/search/zoektquery/BUILD.bazel @@ -0,0 +1,9 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "zoektquery", + srcs = ["query.go"], + importpath = "github.com/sourcegraph/sourcegraph/internal/search/zoektquery", + visibility = ["//:__subpackages__"], + deps = ["@com_github_sourcegraph_zoekt//query"], +) diff --git a/internal/search/zoektquery/query.go b/internal/search/zoektquery/query.go new file mode 100644 index 00000000000..15c6fa0f96d --- /dev/null +++ b/internal/search/zoektquery/query.go @@ -0,0 +1,42 @@ +package zoektquery + +import ( + "regexp/syntax" //nolint:depguard // zoekt requires this pkg + + "github.com/sourcegraph/zoekt/query" +) + +const regexpFlags = syntax.ClassNL | syntax.PerlX | syntax.UnicodeGroups + +func FileRe(pattern string, queryIsCaseSensitive bool) (query.Q, error) { + return ParseRe(pattern, true, false, queryIsCaseSensitive) +} + +func ParseRe(pattern string, filenameOnly bool, contentOnly bool, queryIsCaseSensitive bool) (query.Q, error) { + // these are the flags used by zoekt, which differ to searcher. + re, err := syntax.Parse(pattern, regexpFlags) + if err != nil { + return nil, err + } + + // OptimizeRegexp currently only converts capture groups into non-capture + // groups (faster for stdlib regexp to execute). + re = query.OptimizeRegexp(re, regexpFlags) + + // zoekt decides to use its literal optimization at the query parser + // level, so we check if our regex can just be a literal. + if re.Op == syntax.OpLiteral { + return &query.Substring{ + Pattern: string(re.Rune), + CaseSensitive: queryIsCaseSensitive, + Content: contentOnly, + FileName: filenameOnly, + }, nil + } + return &query.Regexp{ + Regexp: re, + CaseSensitive: queryIsCaseSensitive, + Content: contentOnly, + FileName: filenameOnly, + }, nil +}