mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 15:31:48 +00:00
search jobs: refactor SearchQuery interface (#59468)
Relates to #59329 This is mostly a refactor to prepare for a JSON writer that replaces our CSV writer. The high-level plan is to switch the internal (object storage) AND the external (download) format from CSV to JSON. Test plan: - CI - Note: I had to update `SearcherFake`, hence some of the test data changed. It only affects a few tests and the character of the tests remains the same.
This commit is contained in:
parent
f666295b6f
commit
0eaa40528e
@ -72,7 +72,10 @@ func (h *exhaustiveSearchRepoRevHandler) Handle(ctx context.Context, logger log.
|
||||
return err
|
||||
}
|
||||
|
||||
csvWriter := service.NewBlobstoreCSVWriter(ctx, h.uploadStore, fmt.Sprintf("%d-%d", jobID, record.ID))
|
||||
csvWriter, err := service.NewCSVWriter(ctx, h.uploadStore, fmt.Sprintf("%d-%d", jobID, record.ID))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = q.Search(ctx, repoRev, csvWriter)
|
||||
if closeErr := csvWriter.Close(); closeErr != nil {
|
||||
|
||||
@ -118,9 +118,9 @@ func TestExhaustiveSearch(t *testing.T) {
|
||||
}
|
||||
sort.Strings(vals)
|
||||
require.Equal([]string{
|
||||
"repo,revspec,revision\n1,spec,rev1\n",
|
||||
"repo,revspec,revision\n1,spec,rev2\n",
|
||||
"repo,revspec,revision\n2,spec,rev3\n",
|
||||
"repository,revision,file_path,match_count,first_match_url\n1,rev1,path/to/file.go,0,/1@rev1/-/blob/path/to/file.go\n",
|
||||
"repository,revision,file_path,match_count,first_match_url\n1,rev2,path/to/file.go,0,/1@rev2/-/blob/path/to/file.go\n",
|
||||
"repository,revision,file_path,match_count,first_match_url\n2,rev3,path/to/file.go,0,/2@rev3/-/blob/path/to/file.go\n",
|
||||
}, vals)
|
||||
}
|
||||
|
||||
|
||||
@ -1,31 +1,42 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strconv"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/conf"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
"github.com/sourcegraph/sourcegraph/internal/uploadstore"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
type matchCSVWriter struct {
|
||||
type MatchCSVWriter struct {
|
||||
w CSVWriter
|
||||
headerTyp string
|
||||
host *url.URL
|
||||
}
|
||||
|
||||
func newMatchCSVWriter(w CSVWriter) (*matchCSVWriter, error) {
|
||||
func NewCSVWriter(ctx context.Context, store uploadstore.Store, prefix string) (*MatchCSVWriter, error) {
|
||||
csvWriter := newBlobstoreCSVWriter(ctx, store, prefix)
|
||||
return newMatchCSVWriter(csvWriter)
|
||||
}
|
||||
|
||||
func newMatchCSVWriter(w CSVWriter) (*MatchCSVWriter, error) {
|
||||
externalURL := conf.Get().ExternalURL
|
||||
u, err := url.Parse(externalURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &matchCSVWriter{w: w, host: u}, nil
|
||||
return &MatchCSVWriter{w: w, host: u}, nil
|
||||
}
|
||||
|
||||
func (w *matchCSVWriter) Write(match result.Match) error {
|
||||
func (w *MatchCSVWriter) Close() error {
|
||||
return w.w.Close()
|
||||
}
|
||||
|
||||
func (w *MatchCSVWriter) Write(match result.Match) error {
|
||||
// TODO compare to logic used by the webapp to convert
|
||||
// results into csv. See
|
||||
// client/web/src/search/results/export/searchResultsExport.ts
|
||||
@ -38,7 +49,7 @@ func (w *matchCSVWriter) Write(match result.Match) error {
|
||||
}
|
||||
}
|
||||
|
||||
func (w *matchCSVWriter) writeFileMatch(fm *result.FileMatch) error {
|
||||
func (w *MatchCSVWriter) writeFileMatch(fm *result.FileMatch) error {
|
||||
// Differences to "Export CSV" in webapp. We have removed columns since it
|
||||
// is easier to add columns than to remove them.
|
||||
//
|
||||
@ -159,7 +170,7 @@ func minRange(ranges result.Ranges) (result.Range, bool) {
|
||||
return min, true
|
||||
}
|
||||
|
||||
func (w *matchCSVWriter) writeHeader(typ string) (bool, error) {
|
||||
func (w *MatchCSVWriter) writeHeader(typ string) (bool, error) {
|
||||
if w.headerTyp == "" {
|
||||
w.headerTyp = typ
|
||||
return true, nil
|
||||
|
||||
@ -5,11 +5,15 @@ import (
|
||||
"context"
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/actor"
|
||||
"github.com/sourcegraph/sourcegraph/internal/api"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/exhaustive/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
types2 "github.com/sourcegraph/sourcegraph/internal/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/uploadstore"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
"github.com/sourcegraph/sourcegraph/lib/iterator"
|
||||
@ -72,15 +76,15 @@ type SearchQuery interface {
|
||||
|
||||
ResolveRepositoryRevSpec(context.Context, types.RepositoryRevSpecs) ([]types.RepositoryRevision, error)
|
||||
|
||||
Search(context.Context, types.RepositoryRevision, CSVWriter) error
|
||||
Search(context.Context, types.RepositoryRevision, MatchWriter) error
|
||||
}
|
||||
|
||||
// CSVWriter makes it so we can avoid caring about search types and leave it
|
||||
type MatchWriter interface {
|
||||
Write(match result.Match) error
|
||||
}
|
||||
|
||||
// CSVWriter makes it, so we can avoid caring about search types and leave it
|
||||
// up to the search job to decide the shape of data.
|
||||
//
|
||||
// Note: I expect the implementation of this to handle things like chunking up
|
||||
// the CSV/etc. EG once we hit 100MB of data it can write the data out then
|
||||
// start a new file. It takes care of remembering the header for the new file.
|
||||
type CSVWriter interface {
|
||||
// WriteHeader should be called first and only once.
|
||||
WriteHeader(...string) error
|
||||
@ -88,9 +92,11 @@ type CSVWriter interface {
|
||||
// WriteRow should have the same number of values as WriteHeader and can be
|
||||
// called zero or more times.
|
||||
WriteRow(...string) error
|
||||
|
||||
io.Closer
|
||||
}
|
||||
|
||||
// NewBlobstoreCSVWriter creates a new BlobstoreCSVWriter which writes a CSV to
|
||||
// newBlobstoreCSVWriter creates a new BlobstoreCSVWriter which writes a CSV to
|
||||
// the store. BlobstoreCSVWriter takes care of chunking the CSV into blobs of
|
||||
// 100MiB, each with the same header row. Blobs are named {prefix}-{shard}
|
||||
// except for the first blob, which is named {prefix}.
|
||||
@ -100,7 +106,7 @@ type CSVWriter interface {
|
||||
//
|
||||
// The caller is expected to call Close() once and only once after the last call
|
||||
// to WriteRow.
|
||||
func NewBlobstoreCSVWriter(ctx context.Context, store uploadstore.Store, prefix string) *BlobstoreCSVWriter {
|
||||
func newBlobstoreCSVWriter(ctx context.Context, store uploadstore.Store, prefix string) *BlobstoreCSVWriter {
|
||||
|
||||
c := &BlobstoreCSVWriter{
|
||||
maxBlobSizeBytes: 100 * 1024 * 1024,
|
||||
@ -312,15 +318,18 @@ func (s searcherFake) ResolveRepositoryRevSpec(ctx context.Context, repoRevSpec
|
||||
return repoRevs, nil
|
||||
}
|
||||
|
||||
func (s searcherFake) Search(ctx context.Context, r types.RepositoryRevision, w CSVWriter) error {
|
||||
func (s searcherFake) Search(ctx context.Context, r types.RepositoryRevision, w MatchWriter) error {
|
||||
if err := isSameUser(ctx, s.userID); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := w.WriteHeader("repo", "revspec", "revision"); err != nil {
|
||||
return err
|
||||
}
|
||||
return w.WriteRow(strconv.Itoa(int(r.Repository)), string(r.RevisionSpecifiers), string(r.Revision))
|
||||
return w.Write(&result.FileMatch{
|
||||
File: result.File{
|
||||
Repo: types2.MinimalRepo{Name: api.RepoName(strconv.Itoa(int(r.Repository)))},
|
||||
CommitID: api.CommitID(r.Revision),
|
||||
Path: "path/to/file.go",
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func isSameUser(ctx context.Context, userID int32) error {
|
||||
|
||||
@ -63,10 +63,14 @@ func (c *csvBuffer) WriteRow(row ...string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *csvBuffer) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestBlobstoreCSVWriter(t *testing.T) {
|
||||
mockStore := setupMockStore(t)
|
||||
|
||||
csvWriter := NewBlobstoreCSVWriter(context.Background(), mockStore, "blob")
|
||||
csvWriter := newBlobstoreCSVWriter(context.Background(), mockStore, "blob")
|
||||
csvWriter.maxBlobSizeBytes = 12
|
||||
|
||||
err := csvWriter.WriteHeader("h", "h", "h") // 3 bytes (letters) + 2 bytes (commas) + 1 byte (newline) = 6 bytes
|
||||
@ -116,7 +120,7 @@ func TestBlobstoreCSVWriter(t *testing.T) {
|
||||
|
||||
func TestNoUploadIfNotData(t *testing.T) {
|
||||
mockStore := setupMockStore(t)
|
||||
csvWriter := NewBlobstoreCSVWriter(context.Background(), mockStore, "blob")
|
||||
csvWriter := newBlobstoreCSVWriter(context.Background(), mockStore, "blob")
|
||||
|
||||
// No data written, so no upload should happen.
|
||||
err := csvWriter.Close()
|
||||
|
||||
@ -161,7 +161,7 @@ func (s searchQuery) toRepoRevSpecs(ctx context.Context, repoRevSpec types.Repos
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s searchQuery) Search(ctx context.Context, repoRev types.RepositoryRevision, w CSVWriter) error {
|
||||
func (s searchQuery) Search(ctx context.Context, repoRev types.RepositoryRevision, matchWriter MatchWriter) error {
|
||||
if err := isSameUser(ctx, s.userID); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -181,10 +181,6 @@ func (s searchQuery) Search(ctx context.Context, repoRev types.RepositoryRevisio
|
||||
|
||||
var mu sync.Mutex // serialize writes to w
|
||||
var writeRowErr error // capture if w.Write fails
|
||||
matchWriter, err := newMatchCSVWriter(w)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO currently ignoring returned Alert
|
||||
_, err = job.Run(ctx, s.clients, streaming.StreamFunc(func(se streaming.SearchEvent) {
|
||||
|
||||
@ -38,10 +38,10 @@ func TestBackendFake(t *testing.T) {
|
||||
Query: "1@rev1 1@rev2 2@rev3",
|
||||
WantRefSpecs: "RepositoryRevSpec{1@spec} RepositoryRevSpec{2@spec}",
|
||||
WantRepoRevs: "RepositoryRevision{1@rev1} RepositoryRevision{1@rev2} RepositoryRevision{2@rev3}",
|
||||
WantCSV: autogold.Expect(`repo,revspec,revision
|
||||
1,spec,rev1
|
||||
1,spec,rev2
|
||||
2,spec,rev3
|
||||
WantCSV: autogold.Expect(`repository,revision,file_path,match_count,first_match_url
|
||||
1,rev1,path/to/file.go,0,/1@rev1/-/blob/path/to/file.go
|
||||
1,rev2,path/to/file.go,0,/1@rev2/-/blob/path/to/file.go
|
||||
2,rev3,path/to/file.go,0,/2@rev3/-/blob/path/to/file.go
|
||||
`),
|
||||
})
|
||||
}
|
||||
@ -365,8 +365,11 @@ func testNewSearcher(t *testing.T, ctx context.Context, newSearcher NewSearcher,
|
||||
|
||||
// Test Search
|
||||
var csv csvBuffer
|
||||
matchWriter, err := newMatchCSVWriter(&csv)
|
||||
assert.NoError(err)
|
||||
|
||||
for _, repoRev := range repoRevs {
|
||||
err := searcher.Search(ctx, repoRev, &csv)
|
||||
err := searcher.Search(ctx, repoRev, matchWriter)
|
||||
assert.NoError(err)
|
||||
}
|
||||
if tc.WantCSV != nil {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user