batches: use "keyword" as default pattern type (#63613)

This is part of the Keyword GA Project.

Batch Changes uses Sourcegraph queries to define the list of repositories on which the batch change will run.

With this change we default to pattern type "keyword" instead of "standard". 

To make this a backward compatible change, we also introduce a version identifier to batch specs. Authors can specify `version: 2` in the spec, in which case we default to pattern type "keyword". Existing specs (without a specified version) and specs with `version: 1` will keep using pattern type "standard".

Notes:
- Corresponding doc update [PR](https://github.com/sourcegraph/docs/pull/477)
- We don't have a query input field, but instead the query is defined in a batch spec YAML. It didn't feel right to edit the YAML and append "patternType: " on save, which is what we do for Code Monitors and Insights.
- I misuse the pattern type query parameter to effectively override the version. Once we introduce "V4" we should come back here and clean up. I left a TODO in the code.

Test plan:
- New and updated unit tests
- manual testing
  - new batch changes use `version: 2` by default.
  - using an unsupported version returns an error
  - I ran various "on:" queries to verify that version 2 uses keyword search and version 1 uses standard search.
This commit is contained in:
Stefan Hengl 2024-07-09 10:35:01 +02:00 committed by GitHub
parent e223b3be41
commit 1af563b614
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 231 additions and 28 deletions

View File

@ -29,6 +29,8 @@ All notable changes to Sourcegraph are documented in this file.
- Added support for Google as an LLM provider for Cody, with the following models available through Cody Gateway: Gemini Pro (`gemini-pro-latest`), Gemini 1.5 Flash (`gemini-1.5-flash-latest`), and Gemini 1.5 Pro (`gemini-1.5-pro-latest`). [#63053](https://github.com/sourcegraph/sourcegraph/pull/63053)
- Added syntax highlighting for the Magik programming language. [#62919](https://github.com/sourcegraph/sourcegraph/pull/62919)
- Added syntax highlighting for the Hack programming language. [#62770](https://github.com/sourcegraph/sourcegraph/pull/62770)
- Batch Changes: The new (optional) field "version" of batch specs determines how the spec is processed. This allows us to introduce new features while maintaining backward compatability. [#63613](https://github.com/sourcegraph/sourcegraph/pull/63613)
- Batch Changes: A new version `2` is introduced. Batch specs specifying `version: 2` will use keyword search as the default pattern type to determine repos/workspaces. Batch specs with `version: 1` or without version field keep using pattern type "standard". [#63613](https://github.com/sourcegraph/sourcegraph/pull/63613)
### Changed

View File

@ -1,9 +1,10 @@
version: 2
name: sprintf-to-itoa
description: |
This batch change uses [Comby](https://comby.dev) to replace `fmt.Sprintf` calls
in Go code with the equivalent but clearer `strconv.Iota` call.
# Find all repositories that contain the `fmt.Sprintf` statement using structural search
# Find all repositories that contain the `fmt.Sprintf` statement using regular expression search
on:
- repositoriesMatchingQuery: lang:go fmt.Sprintf\("%d", \w+\) patterntype:regexp

View File

@ -1,3 +1,4 @@
version: 2
name: update-log15-import
description: This batch change updates Go import paths for the `log15` package from `gopkg.in/inconshreveable/log15.v2` to `github.com/inconshreveable/log15` using [Comby](https://comby.dev/)

View File

@ -1,3 +1,4 @@
version: 2
name: hello-world
description: Add Hello World to READMEs

View File

@ -1,3 +1,4 @@
version: 2
name: many-comby
description: |
Apply many comby match patterns

View File

@ -1,2 +1,3 @@
version: 2
name: my-batch-change
# Add your own description, query, steps, changesetTemplate, etc.

View File

@ -1,3 +1,4 @@
version: 2
name: monorepo-dynamic
description: |
Create one changeset per workspace in the sourcegraph frontend monorepo

View File

@ -1,3 +1,4 @@
version: 2
name: apply-regex-sed
description: Apply a regex using sed
@ -20,4 +21,3 @@ changesetTemplate:
branch: batch-changes/regex
commit:
message: batch changes - apply regex

View File

@ -43,6 +43,7 @@ go_library(
"//internal/metrics",
"//internal/observation",
"//internal/repoupdater",
"//internal/search/query",
"//internal/search/streaming/api",
"//internal/search/streaming/http",
"//internal/trace",

View File

@ -26,6 +26,7 @@ import (
"github.com/sourcegraph/sourcegraph/internal/gitserver"
"github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain"
"github.com/sourcegraph/sourcegraph/internal/httpcli"
searchquery "github.com/sourcegraph/sourcegraph/internal/search/query"
streamapi "github.com/sourcegraph/sourcegraph/internal/search/streaming/api"
streamhttp "github.com/sourcegraph/sourcegraph/internal/search/streaming/http"
"github.com/sourcegraph/sourcegraph/internal/trace"
@ -144,7 +145,7 @@ func (wr *workspaceResolver) determineRepositories(ctx context.Context, batchSpe
var errs error
// TODO: this could be trivially parallelised in the future.
for _, on := range batchSpec.On {
revs, ruleType, err := wr.resolveRepositoriesOn(ctx, &on)
revs, ruleType, err := wr.resolveRepositoriesOn(ctx, &on, batchSpec.Version)
if err != nil {
errs = errors.Append(errs, errors.Wrapf(err, "resolving %q", on.String()))
continue
@ -229,12 +230,12 @@ func findIgnoredRepositories(ctx context.Context, gitserverClient gitserver.Clie
var ErrMalformedOnQueryOrRepository = batcheslib.NewValidationError(errors.New("malformed 'on' field; missing either a repository name or a query"))
// resolveRepositoriesOn resolves a single on: entry in a batch spec.
func (wr *workspaceResolver) resolveRepositoriesOn(ctx context.Context, on *batcheslib.OnQueryOrRepository) (_ []*RepoRevision, _ onlib.RepositoryRuleType, err error) {
func (wr *workspaceResolver) resolveRepositoriesOn(ctx context.Context, on *batcheslib.OnQueryOrRepository, batchSpecVersion int) (_ []*RepoRevision, _ onlib.RepositoryRuleType, err error) {
tr, ctx := trace.New(ctx, "workspaceResolver.resolveRepositoriesOn")
defer tr.EndWithErr(&err)
if on.RepositoriesMatchingQuery != "" {
revs, err := wr.resolveRepositoriesMatchingQuery(ctx, on.RepositoriesMatchingQuery)
revs, err := wr.resolveRepositoriesMatchingQuery(ctx, on.RepositoriesMatchingQuery, batchSpecVersion)
return revs, onlib.RepositoryRuleTypeQuery, err
}
@ -312,7 +313,7 @@ func (wr *workspaceResolver) resolveRepositoryNameAndBranch(ctx context.Context,
}, nil
}
func (wr *workspaceResolver) resolveRepositoriesMatchingQuery(ctx context.Context, query string) (_ []*RepoRevision, err error) {
func (wr *workspaceResolver) resolveRepositoriesMatchingQuery(ctx context.Context, query string, batchSpecVersion int) (_ []*RepoRevision, err error) {
tr, ctx := trace.New(ctx, "workspaceResolver.resolveRepositorySearch")
defer tr.EndWithErr(&err)
@ -330,7 +331,7 @@ func (wr *workspaceResolver) resolveRepositoriesMatchingQuery(ctx context.Contex
repoMap[path] = true
}
}
if err := wr.runSearch(ctx, query, func(matches []streamhttp.EventMatch) {
if err := wr.runSearch(ctx, query, batchSpecVersion, func(matches []streamhttp.EventMatch) {
for _, match := range matches {
switch m := match.(type) {
case *streamhttp.EventRepoMatch:
@ -387,8 +388,16 @@ func (wr *workspaceResolver) resolveRepositoriesMatchingQuery(ctx context.Contex
const internalSearchClientUserAgent = "Batch Changes repository resolver"
func (wr *workspaceResolver) runSearch(ctx context.Context, query string, onMatches func(matches []streamhttp.EventMatch)) (err error) {
req, err := streamhttp.NewRequestWithVersion(wr.frontendInternalURL, query, searchAPIVersion)
func determineDefaultPatternType(batchSpecVersion int) searchquery.SearchType {
if batchSpecVersion <= 1 {
return searchquery.SearchTypeStandard
}
return searchquery.SearchTypeKeyword
}
func (wr *workspaceResolver) runSearch(ctx context.Context, query string, batchSpecVersion int, onMatches func(matches []streamhttp.EventMatch)) (err error) {
defaultPatternType := determineDefaultPatternType(batchSpecVersion)
req, err := streamhttp.NewRequestWithVersion(wr.frontendInternalURL, query, searchAPIVersion, &defaultPatternType)
if err != nil {
return err
}
@ -483,12 +492,12 @@ const findDirectoriesInReposConcurrency = 10
// The locations are paths relative to the root of the directory.
// No "/" at the beginning.
// A dot (".") represents the root directory.
func (wr *workspaceResolver) FindDirectoriesInRepos(ctx context.Context, fileName string, repos ...*RepoRevision) (map[repoRevKey][]string, error) {
func (wr *workspaceResolver) FindDirectoriesInRepos(ctx context.Context, fileName string, batchSpecVersion int, repos ...*RepoRevision) (map[repoRevKey][]string, error) {
findForRepoRev := func(repoRev *RepoRevision) ([]string, error) {
query := fmt.Sprintf(`file:(^|/)%s$ repo:^%s$@%s type:path count:all`, regexp.QuoteMeta(fileName), regexp.QuoteMeta(string(repoRev.Repo.Name)), repoRev.Commit)
query := fmt.Sprintf(`file:(^|/)%s$ repo:^%s$@%s type:path count:all patterntype:keyword`, regexp.QuoteMeta(fileName), regexp.QuoteMeta(string(repoRev.Repo.Name)), repoRev.Commit)
results := []string{}
err := wr.runSearch(ctx, query, func(matches []streamhttp.EventMatch) {
err := wr.runSearch(ctx, query, batchSpecVersion, func(matches []streamhttp.EventMatch) {
for _, match := range matches {
switch m := match.(type) {
case *streamhttp.EventPathMatch:
@ -550,7 +559,7 @@ func (wr *workspaceResolver) FindDirectoriesInRepos(ctx context.Context, fileNam
}
type directoryFinder interface {
FindDirectoriesInRepos(ctx context.Context, fileName string, repos ...*RepoRevision) (map[repoRevKey][]string, error)
FindDirectoriesInRepos(ctx context.Context, fileName string, batchSpecVersion int, repos ...*RepoRevision) (map[repoRevKey][]string, error)
}
// findWorkspaces matches the given repos to the workspace configs and
@ -623,7 +632,7 @@ func findWorkspaces(
workspacesByRepoRev := map[repoRevKey]repoWorkspaces{}
for idx, repoRevs := range matched {
conf := spec.Workspaces[idx]
repoRevDirs, err := finder.FindDirectoriesInRepos(ctx, conf.RootAtLocationOf, repoRevs...)
repoRevDirs, err := finder.FindDirectoriesInRepos(ctx, conf.RootAtLocationOf, spec.Version, repoRevs...)
if err != nil {
return nil, err
}

View File

@ -421,12 +421,15 @@ func resolveWorkspacesAndCompare(t *testing.T, s *store.Store, gs gitserver.Clie
frontendInternalURL: newStreamSearchTestServer(t, matches),
}
ctx := actor.WithActor(context.Background(), actor.FromUser(u.ID))
have, err := wr.ResolveWorkspacesForBatchSpec(ctx, spec)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if diff := cmp.Diff(want, have); diff != "" {
t.Fatalf("returned workspaces wrong. (-want +got):\n%s", diff)
for _, version := range []int{0, 1, 2} { // Test all versions
spec.Version = version
have, err := wr.ResolveWorkspacesForBatchSpec(ctx, spec)
if err != nil {
t.Fatalf("version: %d, unexpected error: %s", version, err)
}
if diff := cmp.Diff(want, have); diff != "" {
t.Fatalf("version: %d, returned workspaces wrong. (-want +got):\n%s", version, diff)
}
}
}
@ -693,6 +696,6 @@ type mockDirectoryFinder struct {
results map[repoRevKey][]string
}
func (m *mockDirectoryFinder) FindDirectoriesInRepos(ctx context.Context, fileName string, repos ...*RepoRevision) (map[repoRevKey][]string, error) {
func (m *mockDirectoryFinder) FindDirectoriesInRepos(ctx context.Context, fileName string, batchSpecVersion int, repos ...*RepoRevision) (map[repoRevKey][]string, error) {
return m.results, nil
}

View File

@ -15,6 +15,7 @@ go_library(
tags = [TAG_PLATFORM_SEARCH],
visibility = ["//:__subpackages__"],
deps = [
"//internal/search/query",
"//internal/search/streaming/api",
"//lib/errors",
],
@ -30,6 +31,7 @@ go_test(
embed = [":http"],
tags = [TAG_PLATFORM_SEARCH],
deps = [
"//internal/search/query",
"//internal/search/streaming/api",
"@com_github_google_go_cmp//cmp",
"@com_github_stretchr_testify//require",

View File

@ -8,26 +8,42 @@ import (
"net/http"
"net/url"
"github.com/sourcegraph/sourcegraph/internal/search/query"
"github.com/sourcegraph/sourcegraph/internal/search/streaming/api"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
const maxPayloadSize = 10 * 1024 * 1024 // 10mb
// TODO(stefan): Remove NewRequest in favor of NewRequestWithVersion.
// NewRequest returns an http.Request against the streaming API for query.
func NewRequest(baseURL string, query string) (*http.Request, error) {
// when an empty string is passed as version, the route handler defaults to using the
// latest supported version.
return NewRequestWithVersion(baseURL, query, "")
// We don't set version or pattern type and rely on the defaults of the route
// handler.
return NewRequestWithVersion(baseURL, query, "", nil)
}
// NewRequestWithVersion returns an http.Request against the streaming API for query with the specified version.
func NewRequestWithVersion(baseURL, query, version string) (*http.Request, error) {
u := fmt.Sprintf("%s/search/stream?v=%s&q=%s", baseURL, version, url.QueryEscape(query))
// NewRequestWithVersion returns an http.Request against the streaming API for
// query with the specified version and patternType.
func NewRequestWithVersion(baseURL, query string, version string, patternType *query.SearchType) (*http.Request, error) {
u := fmt.Sprintf("%s/search/stream?q=%s", baseURL, url.QueryEscape(query))
req, err := http.NewRequest("GET", u, nil)
if err != nil {
return nil, err
}
if version != "" || patternType != nil {
q := req.URL.Query()
if version != "" {
q.Add("v", version)
}
if patternType != nil {
q.Add("t", patternType.String())
}
req.URL.RawQuery = q.Encode()
}
req.Header.Set("Accept", "text/event-stream")
return req, nil
}

View File

@ -3,9 +3,13 @@ package http
import (
"net/http"
"net/http/httptest"
"net/url"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/require"
"github.com/sourcegraph/sourcegraph/internal/search/query"
"github.com/sourcegraph/sourcegraph/internal/search/streaming/api"
)
@ -120,3 +124,75 @@ func TestFrontendClient(t *testing.T) {
t.Fatalf("mismatch (-want +got):\n%s", d)
}
}
func TestNewRequestWithVersion(t *testing.T) {
baseURL := "http://example.com"
patternTypeKeyword := query.SearchTypeKeyword
tests := []struct {
name string
query string
version string
patternType *query.SearchType
expectedQuery string
}{
{
name: "No version, no patternType",
query: "test",
version: "",
patternType: nil,
expectedQuery: "q=test",
},
{
name: "Only version",
query: "test",
version: "V4",
patternType: nil,
expectedQuery: "q=test&v=V4",
},
{
name: "Only patternType",
query: "test",
version: "",
patternType: &patternTypeKeyword,
expectedQuery: "q=test&t=keyword",
},
{
name: "Version and patternType",
query: "test query",
version: "V3",
patternType: &patternTypeKeyword,
expectedQuery: "q=test+query&v=V3&t=keyword",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
req, err := NewRequestWithVersion(baseURL, tt.query, tt.version, tt.patternType)
require.NoError(t, err)
// Check the request method
require.Equal(t, "GET", req.Method)
// Check the request URL
parsedURL, err := url.Parse(req.URL.String())
require.NoError(t, err)
expectedBaseURL, err := url.Parse(baseURL)
require.NoError(t, err)
require.Equal(t, expectedBaseURL.Host, parsedURL.Host)
require.Equal(t, expectedBaseURL.Scheme, parsedURL.Scheme)
require.Equal(t, "/search/stream", parsedURL.Path)
// Check the query parameters
queryParams := parsedURL.Query()
expectedParams, err := url.ParseQuery(tt.expectedQuery)
require.NoError(t, err)
require.Equal(t, expectedParams, queryParams)
// Check the Accept header
require.Equal(t, "text/event-stream", req.Header.Get("Accept"))
})
}
}

View File

@ -29,6 +29,7 @@ import (
// pointers, which is ugly and inefficient.
type BatchSpec struct {
Version int `json:"version,omitempty" yaml:"version"`
Name string `json:"name,omitempty" yaml:"name"`
Description string `json:"description,omitempty" yaml:"description"`
On []OnQueryOrRepository `json:"on,omitempty" yaml:"on"`

View File

@ -11,7 +11,7 @@ import (
)
func TestParseBatchSpec(t *testing.T) {
t.Run("valid", func(t *testing.T) {
t.Run("valid_without_version", func(t *testing.T) {
const spec = `
name: hello-world
description: Add Hello World to READMEs
@ -36,6 +36,81 @@ changesetTemplate:
}
})
t.Run("valid with version 1", func(t *testing.T) {
const spec = `
version: 1
name: hello-world
description: Add Hello World to READMEs
on:
- repositoriesMatchingQuery: file:README.md
steps:
- run: echo Hello World | tee -a $(find -name README.md)
container: alpine:3
changesetTemplate:
title: Hello World
body: My first batch change!
branch: hello-world
commit:
message: Append Hello World to all README.md files
published: false
fork: false
`
_, err := ParseBatchSpec([]byte(spec))
if err != nil {
t.Fatalf("parsing valid spec returned error: %s", err)
}
})
t.Run("valid with version 2", func(t *testing.T) {
const spec = `
version: 2
name: hello-world
description: Add Hello World to READMEs
on:
- repositoriesMatchingQuery: file:README.md
steps:
- run: echo Hello World | tee -a $(find -name README.md)
container: alpine:3
changesetTemplate:
title: Hello World
body: My first batch change!
branch: hello-world
commit:
message: Append Hello World to all README.md files
published: false
fork: false
`
_, err := ParseBatchSpec([]byte(spec))
if err != nil {
t.Fatalf("parsing valid spec returned error: %s", err)
}
})
t.Run("invalid version", func(t *testing.T) {
const spec = `
version: 99
name: hello-world
description: Add Hello World to READMEs
on:
- repositoriesMatchingQuery: file:README.md
steps:
- run: echo Hello World | tee -a $(find -name README.md)
container: alpine:3
changesetTemplate:
title: Hello World
body: My first batch change!
branch: hello-world
commit:
message: Append Hello World to all README.md files
published: false
fork: false
`
_, err := ParseBatchSpec([]byte(spec))
assert.Equal(t, "version: version must be one of the following: 1, 2", err.Error())
})
t.Run("missing changesetTemplate", func(t *testing.T) {
const spec = `
name: hello-world

View File

@ -12,6 +12,11 @@ const BatchSpecJSON = `{
"additionalProperties": false,
"required": ["name"],
"properties": {
"version": {
"type": "number",
"description": "The version of the batch spec schema. Defaults to 1.",
"enum": [1, 2]
},
"name": {
"type": "string",
"description": "The name of the batch change, which is unique among all batch changes in the namespace. A batch change's name is case-preserving.",

View File

@ -7,6 +7,11 @@
"additionalProperties": false,
"required": ["name"],
"properties": {
"version": {
"type": "number",
"description": "The version of the batch spec schema. Defaults to 1.",
"enum": [1, 2]
},
"name": {
"type": "string",
"description": "The name of the batch change, which is unique among all batch changes in the namespace. A batch change's name is case-preserving.",

View File

@ -309,6 +309,8 @@ type BatchSpec struct {
Steps []*Step `json:"steps,omitempty"`
// TransformChanges description: Optional transformations to apply to the changes produced in each repository.
TransformChanges *TransformChanges `json:"transformChanges,omitempty"`
// Version description: The version of the batch spec schema. Defaults to 1.
Version float64 `json:"version,omitempty"`
// Workspaces description: Individual workspace configurations for one or more repositories that define which workspaces to use for the execution of steps in the repositories.
Workspaces []*WorkspaceConfiguration `json:"workspaces,omitempty"`
}