diff --git a/.github/workflows/buildchecker.yml b/.github/workflows/buildchecker.yml index 56ad67f00a2..c189591acac 100644 --- a/.github/workflows/buildchecker.yml +++ b/.github/workflows/buildchecker.yml @@ -24,3 +24,4 @@ jobs: GITHUB_TOKEN: ${{ secrets.AUTOBUILDSHERRIF_GITHUB_TOKEN }} BUILDKITE_TOKEN: ${{ secrets.AUTOBUILDSHERRIF_BUILDKITE_TOKEN }} SLACK_WEBHOOK: ${{ secrets.AUTOBUILDSHERRIF_SLACK_WEBHOOK }} + SLACK_TOKEN: ${{ secrets.AUTOBUILDSHERRIF_SLACK_TOKEN }} diff --git a/dev/buildchecker/checker.go b/dev/buildchecker/checker.go index 46189d8161f..3c4a9e0b060 100644 --- a/dev/buildchecker/checker.go +++ b/dev/buildchecker/checker.go @@ -7,16 +7,19 @@ import ( "time" "github.com/buildkite/go-buildkite/v3/buildkite" + "github.com/google/go-github/v41/github" ) type CheckOptions struct { FailuresThreshold int BuildTimeout time.Duration + GitHubClient *github.Client } type CommitInfo struct { - Commit string - Author string + Commit string + SlackUserID string + Author string } type CheckResults struct { @@ -30,7 +33,7 @@ type CheckResults struct { // CheckBuilds is the main buildchecker program. It checks the given builds for relevant // failures and runs lock/unlock operations on the given branch. -func CheckBuilds(ctx context.Context, branch BranchLocker, builds []buildkite.Build, opts CheckOptions) (results *CheckResults, err error) { +func CheckBuilds(ctx context.Context, branch BranchLocker, slackUser SlackUserResolver, builds []buildkite.Build, opts CheckOptions) (results *CheckResults, err error) { results = &CheckResults{} // Scan for first build with a meaningful state @@ -69,6 +72,17 @@ func CheckBuilds(ctx context.Context, branch BranchLocker, builds []buildkite.Bu } fmt.Println("threshold exceeded, this is a big deal!") + + // annotate the failures with their author (Github handle), so we can reach them + // over Slack. + for i, info := range results.FailedCommits { + results.FailedCommits[i].SlackUserID, err = slackUser.ResolveByCommit(ctx, info.Commit) + if err != nil { + // If we can't resolve the user, do not interrupt the process. + fmt.Println(fmt.Errorf("slackUserResolve: %w", err)) + } + } + results.LockBranch = true results.Action, err = branch.Lock(ctx, results.FailedCommits, "dev-experience") if err != nil { @@ -124,11 +138,12 @@ func checkConsecutiveFailures(builds []buildkite.Build, threshold int, timeout t return } - consecutiveFailures += 1 var author string if b.Author != nil { author = fmt.Sprintf("%s (%s)", b.Author.Name, b.Author.Email) } + + consecutiveFailures += 1 failedCommits = append(failedCommits, CommitInfo{ Commit: *b.Commit, Author: author, diff --git a/dev/buildchecker/checker_test.go b/dev/buildchecker/checker_test.go index b5c9b016fb2..5b522abc012 100644 --- a/dev/buildchecker/checker_test.go +++ b/dev/buildchecker/checker_test.go @@ -26,6 +26,7 @@ func (m *mockBranchLocker) Lock(context.Context, []CommitInfo, string) (func() e func TestCheckBuilds(t *testing.T) { // Simple end-to-end tests of the buildchecker entrypoint with mostly fixed parameters ctx := context.Background() + slackUser := NewMockSlackUserResolver("commit", nil) testOptions := CheckOptions{ FailuresThreshold: 2, BuildTimeout: time.Hour, @@ -82,7 +83,7 @@ func TestCheckBuilds(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var lock = &mockBranchLocker{} - res, err := CheckBuilds(ctx, lock, tt.builds, testOptions) + res, err := CheckBuilds(ctx, lock, slackUser, tt.builds, testOptions) assert.NoError(t, err) assert.Equal(t, tt.wantLocked, res.LockBranch) // Mock always returns an action, check it's always assigned correctly diff --git a/dev/buildchecker/main.go b/dev/buildchecker/main.go index c87a859b233..00e1c203e40 100644 --- a/dev/buildchecker/main.go +++ b/dev/buildchecker/main.go @@ -9,6 +9,7 @@ import ( "github.com/buildkite/go-buildkite/v3/buildkite" "github.com/google/go-github/v41/github" + "github.com/slack-go/slack" "golang.org/x/oauth2" ) @@ -17,6 +18,7 @@ func main() { ctx = context.Background() buildkiteToken string githubToken string + slackToken string slackWebhook string pipeline string branch string @@ -26,6 +28,7 @@ func main() { flag.StringVar(&buildkiteToken, "buildkite.token", "", "mandatory buildkite token") flag.StringVar(&githubToken, "github.token", "", "mandatory github token") + flag.StringVar(&slackToken, "slack.token", "", "mandatory slack api token") flag.StringVar(&slackWebhook, "slack.webhook", "", "Slack Webhook URL to post the results on") flag.StringVar(&pipeline, "pipeline", "sourcegraph", "name of the pipeline to inspect") flag.StringVar(&branch, "branch", "main", "name of the branch to inspect") @@ -45,9 +48,13 @@ func main() { &oauth2.Token{AccessToken: githubToken}, ))) + // Slack client + slc := slack.New(slackToken) + // Newest is returned first https://buildkite.com/docs/apis/rest-api/builds#list-builds-for-a-pipeline builds, _, err := bkc.Builds.ListByPipeline("sourcegraph", pipeline, &buildkite.BuildsListOptions{ - Branch: branch, + // Branch: branch, + Branch: "main", // Fix to high page size just in case, default is 30 // https://buildkite.com/docs/apis/rest-api#pagination ListOptions: buildkite.ListOptions{PerPage: 99}, @@ -59,11 +66,13 @@ func main() { opts := CheckOptions{ FailuresThreshold: threshold, BuildTimeout: time.Duration(timeoutMins) * time.Minute, + GitHubClient: ghc, } log.Printf("running buildchecker over %d builds with option: %+v\n", len(builds), opts) results, err := CheckBuilds( ctx, NewBranchLocker(ghc, "sourcegraph", "sourcegraph", branch), + NewGithubSlackUserResolver(ghc, slc, "sourcegraph", "sourcegraph"), builds, opts, ) diff --git a/dev/buildchecker/run.sh b/dev/buildchecker/run.sh index a49d2c63b36..ee60dd38db3 100755 --- a/dev/buildchecker/run.sh +++ b/dev/buildchecker/run.sh @@ -9,4 +9,5 @@ echo "--- Running buildchecker" go run ./dev/buildchecker/ \ -buildkite.token="$BUILDKITE_TOKEN" \ -github.token="$GITHUB_TOKEN" \ + -slack.token="$SLACK_TOKEN" \ -slack.webhook="$SLACK_WEBHOOK" diff --git a/dev/buildchecker/slack.go b/dev/buildchecker/slack.go index aff78f86093..c9dd49b2622 100644 --- a/dev/buildchecker/slack.go +++ b/dev/buildchecker/slack.go @@ -9,6 +9,10 @@ import ( "time" ) +func slackMention(slackUserID string) string { + return fmt.Sprintf("<@%s>", slackUserID) +} + func slackSummary(locked bool, failedCommits []CommitInfo) string { if !locked { return ":white_check_mark: Pipeline healthy - branch unlocked!" @@ -16,9 +20,16 @@ func slackSummary(locked bool, failedCommits []CommitInfo) string { message := `:alert: *Consecutive build failures detected - branch has been locked.* :alert: The authors of the following failed commits who are Sourcegraph teammates have been granted merge access to investigate and resolve the issue: ` + for _, commit := range failedCommits { - message += fmt.Sprintf("\n- - %s", - commit.Commit, commit.Commit, commit.Author) + var mention string + if commit.SlackUserID != "" { + mention = slackMention(commit.SlackUserID) + } else { + mention = ":warning: Cannot find Slack user :warning:" + } + message += fmt.Sprintf("\n- - %s - %s", + commit.Commit, commit.Commit, commit.Author, mention) } message += ` diff --git a/dev/buildchecker/slack_test.go b/dev/buildchecker/slack_test.go index 43fa835c455..ba97a02de41 100644 --- a/dev/buildchecker/slack_test.go +++ b/dev/buildchecker/slack_test.go @@ -15,12 +15,17 @@ func TestSlackSummary(t *testing.T) { t.Run("locked", func(t *testing.T) { s := slackSummary(true, []CommitInfo{ - {Commit: "a", Author: "bob"}, - {Commit: "b", Author: "alice"}, + {Commit: "a", Author: "bob", SlackUserID: "123"}, + {Commit: "b", Author: "alice", SlackUserID: "124"}, + {Commit: "c", Author: "no_github", SlackUserID: ""}, }) t.Log(s) assert.Contains(t, s, "locked") assert.Contains(t, s, "bob") + assert.Contains(t, s, "<@123>") assert.Contains(t, s, "alice") + assert.Contains(t, s, "<@124>") + assert.Contains(t, s, "no_github") + assert.Contains(t, s, ":warning: Cannot find Slack user :warning:") }) } diff --git a/dev/buildchecker/slack_user_resolver.go b/dev/buildchecker/slack_user_resolver.go new file mode 100644 index 00000000000..71051ca53ee --- /dev/null +++ b/dev/buildchecker/slack_user_resolver.go @@ -0,0 +1,133 @@ +package main + +import ( + "context" + "io" + "net/http" + "sync" + + "github.com/cockroachdb/errors" + "github.com/google/go-github/v41/github" + "github.com/slack-go/slack" + "golang.org/x/net/context/ctxhttp" + "gopkg.in/yaml.v2" +) + +type SlackUserResolver interface { + ResolveByCommit(ctx context.Context, commit string) (string, error) +} + +const teamDataURL = "https://raw.githubusercontent.com/sourcegraph/handbook/main/data/team.yml" + +type teamMember struct { + Email string `yaml:"email"` + GitHub string `yaml:"github"` +} + +type githubSlackUserResolver struct { + ghClient *github.Client + slackClient *slack.Client + organization string + repository string + team map[string]teamMember + sync.Once +} + +func NewGithubSlackUserResolver(ghClient *github.Client, slackClient *slack.Client, organization string, repository string) SlackUserResolver { + return &githubSlackUserResolver{ + ghClient: ghClient, + slackClient: slackClient, + organization: organization, + repository: repository, + } +} + +func (r *githubSlackUserResolver) ResolveByCommit(ctx context.Context, commit string) (string, error) { + resp, _, err := r.ghClient.Repositories.GetCommit(ctx, r.organization, r.repository, commit, nil) + if err != nil { + return "", errors.Wrap(err, "cannot resolve author from commit") + } + return r.getSlackUserIDbyCommit(ctx, resp.Author.GetLogin()) +} + +func (r *githubSlackUserResolver) getSlackUserIDbyCommit(ctx context.Context, handle string) (string, error) { + err := r.fetchTeamData(ctx) + if err != nil { + return "", err + } + var email string + for _, member := range r.team { + if member.GitHub == handle { + email = member.Email + break + } + } + if email == "" { + return "", errors.Newf("cannot find slack user for GitHub handle %s", handle) + } + user, err := r.slackClient.GetUserByEmail(email) + if err != nil { + return "", err + } + return user.ID, nil +} + +func (r *githubSlackUserResolver) fetchTeamData(ctx context.Context) error { + var outerErr error + r.Once.Do(func() { + team, err := fetchTeamData(ctx) + if err != nil { + outerErr = err + return + } + r.team = team + }) + return outerErr +} + +func getEmailByGitHubHandle(team map[string]teamMember, handle string) string { + for _, member := range team { + if member.GitHub == handle { + return member.Email + } + } + return "" +} + +func fetchTeamData(ctx context.Context) (map[string]teamMember, error) { + resp, err := ctxhttp.Get(ctx, http.DefaultClient, teamDataURL) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + team := map[string]teamMember{} + err = yaml.Unmarshal(body, &team) + if err != nil { + return nil, err + } + return team, nil +} + +type mockSlackUserResolver struct { + commit string + err error +} + +func (r *mockSlackUserResolver) ResolveByCommit(_ context.Context, commit string) (string, error) { + if r.err != nil { + return "", r.err + } + return r.commit, nil +} + +func NewMockSlackUserResolver(commit string, err error) SlackUserResolver { + return &mockSlackUserResolver{ + commit: commit, + err: err, + } +}