searcher: remove unused dependency on dbconn pkg (#61463)

In a recent incident it was bought up that hot fixing searcher had a
risk since it speaks to the DB. I investigated and noticed it doesn't
actually connect, but does have dbconn as an unused transitive dep.

This commit refactors out the bit of logic used by searcher into a pure
package. To do so I did a few renames, but functionally there is no
change in logic other than the searcher binary shedding some heavy
dependencies.

Additionally I updated the go-dbconn-import check to ensure searcher
isn't accidently using the database again.

Test Plan: CI
This commit is contained in:
Keegan Carruthers-Smith 2024-03-28 17:11:41 +02:00 committed by GitHub
parent 486b248183
commit 57a2330d98
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 84 additions and 77 deletions

View File

@ -41,7 +41,7 @@ go_library(
"//internal/search/backend",
"//internal/search/casetransform",
"//internal/search/query",
"//internal/search/zoekt",
"//internal/search/zoektquery",
"//internal/searcher/v1:searcher",
"//internal/trace",
"//lib/codeintel/languages",

View File

@ -11,53 +11,53 @@ import (
"github.com/sourcegraph/conc/pool"
"github.com/sourcegraph/log"
"github.com/sourcegraph/zoekt"
zoektquery "github.com/sourcegraph/zoekt/query"
"github.com/sourcegraph/zoekt/query"
"github.com/sourcegraph/sourcegraph/cmd/searcher/protocol"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/comby"
"github.com/sourcegraph/sourcegraph/internal/search"
"github.com/sourcegraph/sourcegraph/internal/search/backend"
zoektutil "github.com/sourcegraph/sourcegraph/internal/search/zoekt"
"github.com/sourcegraph/sourcegraph/internal/search/zoektquery"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
func handleFilePathPatterns(query *protocol.PatternInfo) (zoektquery.Q, error) {
var and []zoektquery.Q
func handleFilePathPatterns(patternInfo *protocol.PatternInfo) (query.Q, error) {
var and []query.Q
// Zoekt uses regular expressions for file paths.
// Unhandled cases: PathPatternsAreCaseSensitive and whitespace in file path patterns.
for _, p := range query.IncludePaths {
q, err := zoektutil.FileRe(p, query.IsCaseSensitive)
for _, p := range patternInfo.IncludePaths {
q, err := zoektquery.FileRe(p, patternInfo.IsCaseSensitive)
if err != nil {
return nil, err
}
and = append(and, q)
}
if query.ExcludePaths != "" {
q, err := zoektutil.FileRe(query.ExcludePaths, query.IsCaseSensitive)
if patternInfo.ExcludePaths != "" {
q, err := zoektquery.FileRe(patternInfo.ExcludePaths, patternInfo.IsCaseSensitive)
if err != nil {
return nil, err
}
and = append(and, &zoektquery.Not{Child: q})
and = append(and, &query.Not{Child: q})
}
return zoektquery.NewAnd(and...), nil
return query.NewAnd(and...), nil
}
func buildQuery(pattern string, branchRepos []zoektquery.BranchRepos, filePathPatterns zoektquery.Q, shortcircuit bool) (zoektquery.Q, error) {
func buildQuery(pattern string, branchRepos []query.BranchRepos, filePathPatterns query.Q, shortcircuit bool) (query.Q, error) {
regexString := comby.StructuralPatToRegexpQuery(pattern, shortcircuit)
if len(regexString) == 0 {
return &zoektquery.Const{Value: true}, nil
return &query.Const{Value: true}, nil
}
re, err := syntax.Parse(regexString, syntax.ClassNL|syntax.PerlX|syntax.UnicodeGroups)
if err != nil {
return nil, err
}
return zoektquery.NewAnd(
&zoektquery.BranchesRepos{List: branchRepos},
return query.NewAnd(
&query.BranchesRepos{List: branchRepos},
filePathPatterns,
&zoektquery.Regexp{
&query.Regexp{
Regexp: re,
CaseSensitive: true,
Content: true,
@ -71,7 +71,7 @@ func buildQuery(pattern string, branchRepos []zoektquery.BranchRepos, filePathPa
// Timeouts are reported through the context, and as a special case errNoResultsInTimeout
// is returned if no results are found in the given timeout (instead of the more common
// case of finding partial or full results in the given timeout).
func zoektSearch(ctx context.Context, logger log.Logger, client zoekt.Streamer, args *protocol.PatternInfo, branchRepos []zoektquery.BranchRepos, contextLines int32, since func(t time.Time) time.Duration, repo api.RepoName, sender matchSender) (err error) {
func zoektSearch(ctx context.Context, logger log.Logger, client zoekt.Streamer, args *protocol.PatternInfo, branchRepos []query.BranchRepos, contextLines int32, since func(t time.Time) time.Duration, repo api.RepoName, sender matchSender) (err error) {
if len(branchRepos) == 0 {
return nil
}

View File

@ -20,11 +20,7 @@ allowed_prefix=(
github.com/sourcegraph/sourcegraph/cmd/precise-code-intel-worker
github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker
github.com/sourcegraph/sourcegraph/cmd/repo-updater
# Transitively depends on zoekt package which imports but does not use DB
github.com/sourcegraph/sourcegraph/cmd/searcher
# Doesn't connect but uses db internals for use with sqlite
# Main entrypoint for running all services, so it must be allowed to import it.
github.com/sourcegraph/sourcegraph/cmd/sourcegraph
github.com/sourcegraph/sourcegraph/cmd/symbols
github.com/sourcegraph/sourcegraph/cmd/worker
)

View File

@ -33,11 +33,7 @@ var allowedToImport = []string{
"github.com/sourcegraph/sourcegraph/cmd/precise-code-intel-worker",
"github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker",
"github.com/sourcegraph/sourcegraph/cmd/repo-updater",
// Transitively depends on zoekt package which imports but does not use DB
"github.com/sourcegraph/sourcegraph/cmd/searcher",
// Doesn't connect but uses db internals for use with sqlite
// Main entrypoint for running all services, so it must be allowed to import it.
"github.com/sourcegraph/sourcegraph/cmd/sourcegraph",
"github.com/sourcegraph/sourcegraph/cmd/symbols",
"github.com/sourcegraph/sourcegraph/cmd/worker",
}

View File

@ -13,7 +13,7 @@ go_library(
"//internal/authz",
"//internal/search",
"//internal/search/result",
"//internal/search/zoekt",
"//internal/search/zoektquery",
"//internal/symbols",
"//internal/trace/policy",
"//internal/types",

View File

@ -9,14 +9,14 @@ import (
"github.com/RoaringBitmap/roaring"
"github.com/grafana/regexp"
"github.com/sourcegraph/zoekt"
zoektquery "github.com/sourcegraph/zoekt/query"
"github.com/sourcegraph/zoekt/query"
"github.com/sourcegraph/sourcegraph/internal/actor"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/authz"
"github.com/sourcegraph/sourcegraph/internal/search"
"github.com/sourcegraph/sourcegraph/internal/search/result"
zoektutil "github.com/sourcegraph/sourcegraph/internal/search/zoekt"
"github.com/sourcegraph/sourcegraph/internal/search/zoektquery"
"github.com/sourcegraph/sourcegraph/internal/symbols"
"github.com/sourcegraph/sourcegraph/internal/trace/policy"
"github.com/sourcegraph/sourcegraph/internal/types"
@ -205,28 +205,28 @@ func searchZoekt(
return
}
var query zoektquery.Q
var q query.Q
if expr.Op == syntax.OpLiteral {
query = &zoektquery.Substring{
q = &query.Substring{
Pattern: string(expr.Rune),
Content: true,
}
} else {
query = &zoektquery.Regexp{
q = &query.Regexp{
Regexp: expr,
Content: true,
}
}
ands := []zoektquery.Q{
&zoektquery.BranchesRepos{List: []zoektquery.BranchRepos{
ands := []query.Q{
&query.BranchesRepos{List: []query.BranchRepos{
{Branch: branch, Repos: roaring.BitmapOf(uint32(repoName.ID))},
}},
&zoektquery.Symbol{Expr: query},
&query.Symbol{Expr: q},
}
if includePatterns != nil {
for _, p := range *includePatterns {
q, err := zoektutil.FileRe(p, true)
q, err := zoektquery.FileRe(p, true)
if err != nil {
return nil, err
}
@ -234,7 +234,7 @@ func searchZoekt(
}
}
final := zoektquery.Simplify(zoektquery.NewAnd(ands...))
final := query.Simplify(query.NewAnd(ands...))
match := limitOrDefault(first) + 1
resp, err := z.Search(ctx, final, &zoekt.SearchOptions{
Trace: policy.ShouldTrace(ctx),

View File

@ -27,6 +27,7 @@ go_library(
"//internal/search/query",
"//internal/search/result",
"//internal/search/streaming",
"//internal/search/zoektquery",
"//internal/trace",
"//internal/types",
"//lib/errors",

View File

@ -8,6 +8,7 @@ import (
"github.com/sourcegraph/sourcegraph/internal/search"
"github.com/sourcegraph/sourcegraph/internal/search/query"
"github.com/sourcegraph/sourcegraph/internal/search/result"
"github.com/sourcegraph/sourcegraph/internal/search/zoektquery"
"github.com/sourcegraph/sourcegraph/lib/errors"
zoekt "github.com/sourcegraph/zoekt/query"
@ -59,14 +60,14 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea
// TODO PathPatternsAreCaseSensitive
// TODO whitespace in file path patterns?
for _, i := range filesInclude {
q, err := FileRe(i, isCaseSensitive)
q, err := zoektquery.FileRe(i, isCaseSensitive)
if err != nil {
return nil, err
}
and = append(and, q)
}
if len(filesExclude) > 0 {
q, err := FileRe(query.UnionRegExps(filesExclude), isCaseSensitive)
q, err := zoektquery.FileRe(query.UnionRegExps(filesExclude), isCaseSensitive)
if err != nil {
return nil, err
}
@ -144,7 +145,7 @@ func toZoektPattern(
fileNameOnly := patternMatchesPath && !patternMatchesContent
contentOnly := !patternMatchesPath && patternMatchesContent
q, err = parseRe(n.RegExpPattern(), fileNameOnly, contentOnly, isCaseSensitive)
q, err = zoektquery.ParseRe(n.RegExpPattern(), fileNameOnly, contentOnly, isCaseSensitive)
if err != nil {
return nil, err
}

View File

@ -1,49 +1,11 @@
package zoekt
import (
"regexp/syntax" //nolint:depguard // zoekt requires this pkg
"github.com/sourcegraph/zoekt"
zoektquery "github.com/sourcegraph/zoekt/query"
"github.com/sourcegraph/sourcegraph/internal/types"
)
func FileRe(pattern string, queryIsCaseSensitive bool) (zoektquery.Q, error) {
return parseRe(pattern, true, false, queryIsCaseSensitive)
}
const regexpFlags = syntax.ClassNL | syntax.PerlX | syntax.UnicodeGroups
func parseRe(pattern string, filenameOnly bool, contentOnly bool, queryIsCaseSensitive bool) (zoektquery.Q, error) {
// these are the flags used by zoekt, which differ to searcher.
re, err := syntax.Parse(pattern, regexpFlags)
if err != nil {
return nil, err
}
// OptimizeRegexp currently only converts capture groups into non-capture
// groups (faster for stdlib regexp to execute).
re = zoektquery.OptimizeRegexp(re, regexpFlags)
// zoekt decides to use its literal optimization at the query parser
// level, so we check if our regex can just be a literal.
if re.Op == syntax.OpLiteral {
return &zoektquery.Substring{
Pattern: string(re.Rune),
CaseSensitive: queryIsCaseSensitive,
Content: contentOnly,
FileName: filenameOnly,
}, nil
}
return &zoektquery.Regexp{
Regexp: re,
CaseSensitive: queryIsCaseSensitive,
Content: contentOnly,
FileName: filenameOnly,
}, nil
}
// repoRevFunc is a function which maps repository names returned from Zoekt
// into the Sourcegraph's resolved repository revisions for the search.
type repoRevFunc func(file *zoekt.FileMatch) (repo types.MinimalRepo, revs []string)

View File

@ -0,0 +1,9 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "zoektquery",
srcs = ["query.go"],
importpath = "github.com/sourcegraph/sourcegraph/internal/search/zoektquery",
visibility = ["//:__subpackages__"],
deps = ["@com_github_sourcegraph_zoekt//query"],
)

View File

@ -0,0 +1,42 @@
package zoektquery
import (
"regexp/syntax" //nolint:depguard // zoekt requires this pkg
"github.com/sourcegraph/zoekt/query"
)
const regexpFlags = syntax.ClassNL | syntax.PerlX | syntax.UnicodeGroups
func FileRe(pattern string, queryIsCaseSensitive bool) (query.Q, error) {
return ParseRe(pattern, true, false, queryIsCaseSensitive)
}
func ParseRe(pattern string, filenameOnly bool, contentOnly bool, queryIsCaseSensitive bool) (query.Q, error) {
// these are the flags used by zoekt, which differ to searcher.
re, err := syntax.Parse(pattern, regexpFlags)
if err != nil {
return nil, err
}
// OptimizeRegexp currently only converts capture groups into non-capture
// groups (faster for stdlib regexp to execute).
re = query.OptimizeRegexp(re, regexpFlags)
// zoekt decides to use its literal optimization at the query parser
// level, so we check if our regex can just be a literal.
if re.Op == syntax.OpLiteral {
return &query.Substring{
Pattern: string(re.Rune),
CaseSensitive: queryIsCaseSensitive,
Content: contentOnly,
FileName: filenameOnly,
}, nil
}
return &query.Regexp{
Regexp: re,
CaseSensitive: queryIsCaseSensitive,
Content: contentOnly,
FileName: filenameOnly,
}, nil
}