chore(search): update logging of search durations (#64269)

The current logging is outdated. This PR is not a complete rewrite. It
tries to keep semantics mostly as-is but it is more generous when it
comes to what we log. (For example we didn't log `(AND foo bar)` before)

IMO this is a good compromise we can ship quickly and then take some
time to figure out how to distinguish suggestions, searches, code-intel
and whether it is a good idea to mix search types and result types.

Updates:
- Prefer to log result types, fall back to pattern type. (This is
largely the same logic as before but we don't bail for complex queries)
- Don't guess repo and file results types if not explicitly specified.
We loose a bit of information here but it keeps the code much cleaner.
- Allow "AND" and "OR" operators.

The updated test case gives a good overview of how we would log things
in the future.

Kept as-is:
- I left "new" and "legacy" logging in place. I am not sure how we use
the target stores right now so I wanted to keep this as-is for now.
However we should see if we can retire the legacy logging.
- The previous and current logic translates `type:file` queries to
pattern types. I guess this makes sense from the perspective of how
things are implemented in the backend.
- "type:path" is logged as "file".

## Test plan:
Updated unit test
This commit is contained in:
Stefan Hengl 2024-08-06 07:26:01 +02:00 committed by GitHub
parent a310035006
commit 737e460a64
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 158 additions and 107 deletions

View File

@ -73,6 +73,11 @@ func (l *LogJob) MapChildren(fn job.MapFunc) job.Job {
// only be called after a search result is performed, because it relies on the
// invariant that query and pattern error checking has already been performed.
func (l *LogJob) logEvent(ctx context.Context, clients job.RuntimeClients, duration time.Duration) {
a := actor.FromContext(ctx)
if !a.IsAuthenticated() || a.IsMockUser() {
return
}
tr, ctx := trace.New(ctx, "LogSearchDuration")
defer tr.End()
@ -86,100 +91,64 @@ func (l *LogJob) logEvent(ctx context.Context, clients job.RuntimeClients, durat
// Map type:path to file
types = append(types, "file")
case "file":
switch {
case l.inputs.PatternType == query.SearchTypeStandard:
types = append(types, "standard")
case l.inputs.PatternType == query.SearchTypeStructural:
types = append(types, "structural")
case l.inputs.PatternType == query.SearchTypeLiteral:
types = append(types, "literal")
case l.inputs.PatternType == query.SearchTypeRegex:
types = append(types, "regexp")
}
types = append(types, l.inputs.PatternType.String())
}
}
// Don't record composite searches that specify more than one type:
// because we can't break down the search timings into multiple
// categories.
if len(types) > 1 {
return
// Prefer the result type if it is a single type, otherwise use the pattern
// type.
action := ""
if len(types) == 1 {
action = types[0]
} else {
action = l.inputs.PatternType.String()
}
// New events that get exported: https://docs-legacy.sourcegraph.com/dev/background-information/telemetry
events := telemetryrecorder.NewBestEffort(clients.Logger, clients.DB)
// For now, do not tee into event_logs in telemetryrecorder - retain the
// custom instrumentation of V1 events instead (usagestats.LogBackendEvent)
ctx = teestore.WithoutV1(ctx)
// Search.latencies
events.Record(ctx, "search.latencies", telemetry.SafeAction(action), &telemetry.EventParameters{
Metadata: telemetry.EventMetadata{
"durationMs": float64(duration.Milliseconds()),
},
})
// Legacy event
value := fmt.Sprintf(`{"durationMs": %d}`, duration.Milliseconds())
eventName := fmt.Sprintf("search.latencies.%s", action)
//lint:ignore SA1019 existing usage of deprecated functionality. TODO: Use only the new V2 event instead.
err := usagestats.LogBackendEvent(clients.DB, a.UID, deviceid.FromContext(ctx), eventName, json.RawMessage(value), json.RawMessage(value), featureflag.GetEvaluatedFlagSet(ctx), nil)
if err != nil {
clients.Logger.Warn("Could not log search latency", log.Error(err))
}
// Log usage of file:has.owners and select:file.owners
q, err := query.ToBasicQuery(l.inputs.Query)
if err != nil {
// Can't convert to a basic query, can't guarantee accurate reporting.
return
}
if !query.IsPatternAtom(q) {
// Not an atomic pattern, can't guarantee accurate reporting.
return
}
// If no type: was explicitly specified, infer the result type.
if len(types) == 0 {
// If a pattern was specified, a content search happened.
if q.IsLiteral() {
types = append(types, "literal")
} else if q.IsRegexp() {
types = append(types, "regexp")
} else if q.IsStructural() {
types = append(types, "structural")
} else if l.inputs.Query.Exists(query.FieldFile) {
// No search pattern specified and file: is specified.
types = append(types, "file")
} else {
// No search pattern or file: is specified, assume repo.
// This includes accounting for searches of fields that
// specify repohasfile: and repohascommitafter:.
types = append(types, "repo")
if _, _, ok := isOwnershipSearch(q); ok {
// New event
events.Record(ctx, "search", "file.hasOwners", nil)
//lint:ignore SA1019 existing usage of deprecated functionality. TODO: Use only the new V2 event instead.
err := usagestats.LogBackendEvent(clients.DB, a.UID, deviceid.FromContext(ctx), "FileHasOwnerSearch", nil, nil, featureflag.GetEvaluatedFlagSet(ctx), nil)
if err != nil {
clients.Logger.Warn("Could not log use of file:has.owners", log.Error(err))
}
}
// Only log the time if we successfully resolved one search type.
if len(types) == 1 {
// New events that get exported: https://docs-legacy.sourcegraph.com/dev/background-information/telemetry
events := telemetryrecorder.NewBestEffort(clients.Logger, clients.DB)
// For now, do not tee into event_logs in telemetryrecorder - retain the
// custom instrumentation of V1 events instead (usagestats.LogBackendEvent)
ctx = teestore.WithoutV1(ctx)
a := actor.FromContext(ctx)
if a.IsAuthenticated() && !a.IsMockUser() { // Do not log in tests
if v, _ := q.ToParseTree().StringValue(query.FieldSelect); v != "" {
if sp, err := filter.SelectPathFromString(v); err == nil && isSelectOwnersSearch(sp) {
// New event
events.Record(ctx, "search.latencies", telemetry.SafeAction(types[0]), &telemetry.EventParameters{
Metadata: telemetry.EventMetadata{
"durationMs": float64(duration.Milliseconds()),
},
})
// Legacy event
value := fmt.Sprintf(`{"durationMs": %d}`, duration.Milliseconds())
eventName := fmt.Sprintf("search.latencies.%s", types[0])
events.Record(ctx, "search", "select.fileOwners", nil)
//lint:ignore SA1019 existing usage of deprecated functionality. TODO: Use only the new V2 event instead.
err := usagestats.LogBackendEvent(clients.DB, a.UID, deviceid.FromContext(ctx), eventName, json.RawMessage(value), json.RawMessage(value), featureflag.GetEvaluatedFlagSet(ctx), nil)
err := usagestats.LogBackendEvent(clients.DB, a.UID, deviceid.FromContext(ctx), "SelectFileOwnersSearch", nil, nil, featureflag.GetEvaluatedFlagSet(ctx), nil)
if err != nil {
clients.Logger.Warn("Could not log search latency", log.Error(err))
}
if _, _, ok := isOwnershipSearch(q); ok {
// New event
events.Record(ctx, "search", "file.hasOwners", nil)
//lint:ignore SA1019 existing usage of deprecated functionality. TODO: Use only the new V2 event instead.
err := usagestats.LogBackendEvent(clients.DB, a.UID, deviceid.FromContext(ctx), "FileHasOwnerSearch", nil, nil, featureflag.GetEvaluatedFlagSet(ctx), nil)
if err != nil {
clients.Logger.Warn("Could not log use of file:has.owners", log.Error(err))
}
}
if v, _ := q.ToParseTree().StringValue(query.FieldSelect); v != "" {
if sp, err := filter.SelectPathFromString(v); err == nil && isSelectOwnersSearch(sp) {
// New event
events.Record(ctx, "search", "select.fileOwners", nil)
//lint:ignore SA1019 existing usage of deprecated functionality. TODO: Use only the new V2 event instead.
err := usagestats.LogBackendEvent(clients.DB, a.UID, deviceid.FromContext(ctx), "SelectFileOwnersSearch", nil, nil, featureflag.GetEvaluatedFlagSet(ctx), nil)
if err != nil {
clients.Logger.Warn("Could not log use of select:file.owners", log.Error(err))
}
}
clients.Logger.Warn("Could not log use of select:file.owners", log.Error(err))
}
}
}

View File

@ -5,7 +5,8 @@ import (
"sort"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/hexops/autogold/v2"
"github.com/stretchr/testify/require"
"github.com/sourcegraph/log/logtest"
@ -49,27 +50,114 @@ func (s *fakeEventLogStore) loggedEventNames() []string {
func TestOwnSearchEventNames(t *testing.T) {
type wantEvents struct {
legacy []string // we retain manual instrumentation of existing events
new []string // https://docs-legacy.sourcegraph.com/dev/background-information/telemetry
legacy autogold.Value // we retain manual instrumentation of existing events
new autogold.Value // https://docs-legacy.sourcegraph.com/dev/background-information/telemetry
}
for literal, wantEventNames := range map[string]wantEvents{
"file:has.owner(one@example.com)": {
legacy: []string{"FileHasOwnerSearch", "search.latencies.file"},
new: []string{"search.latencies - file", "search - file.hasOwners"},
cases := []struct {
query string
searchType query.SearchType
want wantEvents
}{
// result types
{
query: "sourcegraph type:repo",
searchType: query.SearchTypeKeyword,
want: wantEvents{
legacy: autogold.Expect([]string{"search.latencies.repo"}),
new: autogold.Expect([]string{"search.latencies - repo"}),
},
},
"select:file.owners": {
legacy: []string{"SelectFileOwnersSearch", "search.latencies.repo"},
new: []string{"search.latencies - repo", "search - select.fileOwners"},
{
query: "sourcegraph type:diff",
searchType: query.SearchTypeKeyword,
want: wantEvents{
legacy: autogold.Expect([]string{"search.latencies.diff"}),
new: autogold.Expect([]string{"search.latencies - diff"}),
},
},
} {
t.Run(literal, func(t *testing.T) {
q, err := query.ParseLiteral(literal)
if err != nil {
t.Fatalf("ParseLiteral: %s", err)
}
{
query: "search results type:file",
searchType: query.SearchTypeKeyword,
want: wantEvents{
legacy: autogold.Expect([]string{"search.latencies.keyword"}),
new: autogold.Expect([]string{"search.latencies - keyword"}),
},
},
{
query: "search results type:path",
searchType: query.SearchTypeKeyword,
want: wantEvents{
legacy: autogold.Expect([]string{"search.latencies.file"}),
new: autogold.Expect([]string{"search.latencies - file"}),
},
},
// pattern types
{
query: "bytes buffer",
searchType: query.SearchTypeKeyword,
want: wantEvents{
legacy: autogold.Expect([]string{"search.latencies.keyword"}),
new: autogold.Expect([]string{"search.latencies - keyword"}),
},
},
{
query: "bytes",
searchType: query.SearchTypeKeyword,
want: wantEvents{
legacy: autogold.Expect([]string{"search.latencies.keyword"}),
new: autogold.Expect([]string{"search.latencies - keyword"}),
},
},
{
query: "bytes buffer",
searchType: query.SearchTypeStandard,
want: wantEvents{
legacy: autogold.Expect([]string{"search.latencies.standard"}),
new: autogold.Expect([]string{"search.latencies - standard"}),
},
},
{
query: "bytes buffer",
searchType: query.SearchTypeRegex,
want: wantEvents{
legacy: autogold.Expect([]string{"search.latencies.regex"}),
new: autogold.Expect([]string{"search.latencies - regex"}),
},
},
{
query: "if ... else ...",
searchType: query.SearchTypeStructural,
want: wantEvents{
legacy: autogold.Expect([]string{"search.latencies.structural"}),
new: autogold.Expect([]string{"search.latencies - structural"}),
},
},
// other
{
query: "file:has.owner(one@example.com)",
searchType: query.SearchTypeKeyword,
want: wantEvents{
legacy: autogold.Expect([]string{"FileHasOwnerSearch", "search.latencies.keyword"}),
new: autogold.Expect([]string{"search.latencies - keyword", "search - file.hasOwners"}),
},
},
{
query: "select:file.owners",
searchType: query.SearchTypeKeyword,
want: wantEvents{
legacy: autogold.Expect([]string{"SelectFileOwnersSearch", "search.latencies.keyword"}),
new: autogold.Expect([]string{"search.latencies - keyword", "search - select.fileOwners"}),
},
},
}
for _, c := range cases {
t.Run(c.query, func(t *testing.T) {
q, err := query.ParseSearchType(c.query, c.searchType)
require.NoError(t, err)
inputs := &search.Inputs{
UserSettings: &schema.Settings{},
PatternType: query.SearchTypeLiteral,
PatternType: c.searchType,
Protocol: search.Streaming,
OnSourcegraphDotCom: true,
Query: q,
@ -90,20 +178,14 @@ func TestOwnSearchEventNames(t *testing.T) {
ctx := actor.WithActor(context.Background(), actor.FromUser(42))
childJob := mockjob.NewMockJob()
logJob := jobutil.NewLogJob(inputs, childJob)
if _, err := logJob.Run(ctx, job.RuntimeClients{
_, err = logJob.Run(ctx, job.RuntimeClients{
Logger: logtest.Scoped(t),
DB: db,
}, streaming.NewNullStream()); err != nil {
t.Fatalf("LogJob.Run: %s", err)
}
// legacy events
if diff := cmp.Diff(wantEventNames.legacy, legacyEvents.loggedEventNames()); diff != "" {
t.Errorf("logged legacy events, -want+got: %s", diff)
}
// new events
if diff := cmp.Diff(wantEventNames.new, newEvents.GetMockQueuedEvents().Summary()); diff != "" {
t.Errorf("logged new events, -want+got: %s", diff)
}
}, streaming.NewNullStream())
require.NoError(t, err)
c.want.legacy.Equal(t, legacyEvents.loggedEventNames())
c.want.new.Equal(t, newEvents.GetMockQueuedEvents().Summary())
})
}
}