From 27211dea73d1045bcc9982120fdce6712c1078b5 Mon Sep 17 00:00:00 2001 From: Robert Lin Date: Wed, 5 Jun 2024 09:09:22 -0700 Subject: [PATCH] feat/msp: update handbook link in alerts dashboard, sort custom alerts first (#63089) 1. The dashboard link still points to the old `go/msp-ops/...` which no longer work (CORE-105) 2. Alerts defined on top of the MSP defaults are probably of more interest, so let's sort these in front of the others ## Test plan Unit/golden tests --- .../stacks/monitoring/BUILD.bazel | 2 + .../stacks/monitoring/dashboard.go | 41 ++++-- .../stacks/monitoring/dashboard_test.go | 138 ++++++++++++++++-- .../stacks/monitoring/monitoring.go | 4 +- 4 files changed, 161 insertions(+), 24 deletions(-) diff --git a/dev/managedservicesplatform/stacks/monitoring/BUILD.bazel b/dev/managedservicesplatform/stacks/monitoring/BUILD.bazel index 9c29dcafc45..51c669d369a 100644 --- a/dev/managedservicesplatform/stacks/monitoring/BUILD.bazel +++ b/dev/managedservicesplatform/stacks/monitoring/BUILD.bazel @@ -66,6 +66,8 @@ go_test( embed = [":monitoring"], tags = [TAG_INFRA_CORESERVICES], deps = [ + "//dev/managedservicesplatform/spec", + "//lib/pointers", "@com_github_hexops_autogold_v2//:autogold", "@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//monitoringalertpolicy", ], diff --git a/dev/managedservicesplatform/stacks/monitoring/dashboard.go b/dev/managedservicesplatform/stacks/monitoring/dashboard.go index e4822a25283..74ce29e3a1f 100644 --- a/dev/managedservicesplatform/stacks/monitoring/dashboard.go +++ b/dev/managedservicesplatform/stacks/monitoring/dashboard.go @@ -8,9 +8,11 @@ import ( "github.com/hashicorp/terraform-cdk-go/cdktf" "github.com/sourcegraph/managed-services-platform-cdktf/gen/google/monitoringalertpolicy" "github.com/sourcegraph/managed-services-platform-cdktf/gen/google/monitoringdashboard" - "github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resourceid" - "github.com/sourcegraph/sourcegraph/lib/pointers" "golang.org/x/exp/maps" + + "github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resourceid" + "github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/spec" + "github.com/sourcegraph/sourcegraph/lib/pointers" ) // Schema reference: https://cloud.google.com/monitoring/api/ref_v3/rest/v1/projects.dashboards#resource:-dashboard @@ -68,7 +70,7 @@ func createMonitoringDashboard(stack cdktf.TerraformStack, vars Variables, alertGroups map[string][]monitoringalertpolicy.MonitoringAlertPolicy, ) error { - dashboard := generateDashboard(vars.Service.ID, vars.EnvironmentID, alertGroups) + dashboard := generateDashboard(vars.Service, vars.EnvironmentID, alertGroups) dashboardJSON, err := json.Marshal(dashboard) if err != nil { @@ -87,15 +89,22 @@ const dashboardColumns = 48 const widgetWidth = 24 const widgetHeight = 16 -func generateDashboard(serviceID, envID string, alertGroups map[string][]monitoringalertpolicy.MonitoringAlertPolicy) dashboard { +// Groups with special treatment +const ( + customAlertsGroupName = "Custom Alerts" + responseCodeRatioAlertsGroupName = "Response Code Ratio Alerts" +) + +func generateDashboard(svc spec.ServiceSpec, envID string, alertGroups map[string][]monitoringalertpolicy.MonitoringAlertPolicy) dashboard { // Add a banner informing operators not to edit the dashboard infoBanner := tile{ Width: dashboardColumns, Height: 8, Widget: widget{ Text: &text{ - Content: fmt.Sprintf("Auto-generated - Please do not edit\n\nFor more details see: [go/msp-ops/%[1]s](https://handbook.sourcegraph.com/departments/engineering/managed-services/%[1]s/)", serviceID), - Format: "MARKDOWN", + Content: fmt.Sprintf("This dashboard is auto-generated - please do not edit!\n\nFor more details, see: %s", + svc.GetHandbookPageURL()), + Format: "MARKDOWN", Style: textStyle{ BackgroundColor: "#FFFFFF", FontSize: "FS_EXTRA_LARGE", @@ -114,9 +123,12 @@ func generateDashboard(serviceID, envID string, alertGroups map[string][]monitor // absolute distance from top of dashboard height := infoBanner.Height - // for consistency we sort the map keys first - keys := maps.Keys(alertGroups) + // for consistency we sort the map keys first, but place the custom alerts + // at the start + firstKeys := []string{customAlertsGroupName, responseCodeRatioAlertsGroupName} + keys := remove(maps.Keys(alertGroups), firstKeys) slices.Sort(keys) + keys = append(firstKeys, keys...) for _, section := range keys { alerts := alertGroups[section] // Ensure we don't create empty sections @@ -162,10 +174,21 @@ func generateDashboard(serviceID, envID string, alertGroups map[string][]monitor } return dashboard{ - DisplayName: fmt.Sprintf("MSP Alerts - %s-%s", serviceID, envID), + DisplayName: fmt.Sprintf("MSP Alerts - %s (%s)", svc.GetName(), envID), MosaicLayout: mosaicLayout{ Columns: dashboardColumns, Tiles: tiles, }, } } + +func remove(v []string, filter []string) []string { + for _, f := range filter { + for i, s := range v { + if s == f { + v = append(v[:i], v[i+1:]...) + } + } + } + return v +} diff --git a/dev/managedservicesplatform/stacks/monitoring/dashboard_test.go b/dev/managedservicesplatform/stacks/monitoring/dashboard_test.go index 1ef12fc0970..020c0e03773 100644 --- a/dev/managedservicesplatform/stacks/monitoring/dashboard_test.go +++ b/dev/managedservicesplatform/stacks/monitoring/dashboard_test.go @@ -5,6 +5,9 @@ import ( "github.com/hexops/autogold/v2" "github.com/sourcegraph/managed-services-platform-cdktf/gen/google/monitoringalertpolicy" + + "github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/spec" + "github.com/sourcegraph/sourcegraph/lib/pointers" ) type mockAlertPolicy struct { @@ -34,7 +37,7 @@ func TestDashboardCreation(t *testing.T) { }, }, want: autogold.Expect(dashboard{ - DisplayName: "MSP Alerts - msp-testbed-dev", + DisplayName: "MSP Alerts - msp-testbed (dev)", MosaicLayout: mosaicLayout{ Columns: 48, Tiles: []tile{ @@ -42,9 +45,9 @@ func TestDashboardCreation(t *testing.T) { Width: 48, Height: 8, Widget: widget{Text: &text{ - Content: `Auto-generated - Please do not edit + Content: `This dashboard is auto-generated - please do not edit! -For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/departments/engineering/managed-services/msp-testbed/)`, +For more details, see: https://sourcegraph.notion.site/0d0b709881674eee9dca4202de9f93b1`, Format: "MARKDOWN", Style: textStyle{ BackgroundColor: "#FFFFFF", @@ -87,7 +90,7 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/ }, }, want: autogold.Expect(dashboard{ - DisplayName: "MSP Alerts - msp-testbed-dev", + DisplayName: "MSP Alerts - msp-testbed (dev)", MosaicLayout: mosaicLayout{ Columns: 48, Tiles: []tile{ @@ -95,9 +98,9 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/ Width: 48, Height: 8, Widget: widget{Text: &text{ - Content: `Auto-generated - Please do not edit + Content: `This dashboard is auto-generated - please do not edit! -For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/departments/engineering/managed-services/msp-testbed/)`, +For more details, see: https://sourcegraph.notion.site/0d0b709881674eee9dca4202de9f93b1`, Format: "MARKDOWN", Style: textStyle{ BackgroundColor: "#FFFFFF", @@ -148,7 +151,7 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/ }, }, want: autogold.Expect(dashboard{ - DisplayName: "MSP Alerts - msp-testbed-dev", + DisplayName: "MSP Alerts - msp-testbed (dev)", MosaicLayout: mosaicLayout{ Columns: 48, Tiles: []tile{ @@ -156,9 +159,9 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/ Width: 48, Height: 8, Widget: widget{Text: &text{ - Content: `Auto-generated - Please do not edit + Content: `This dashboard is auto-generated - please do not edit! -For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/departments/engineering/managed-services/msp-testbed/)`, +For more details, see: https://sourcegraph.notion.site/0d0b709881674eee9dca4202de9f93b1`, Format: "MARKDOWN", Style: textStyle{ BackgroundColor: "#FFFFFF", @@ -220,7 +223,7 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/ }, }, want: autogold.Expect(dashboard{ - DisplayName: "MSP Alerts - msp-testbed-dev", + DisplayName: "MSP Alerts - msp-testbed (dev)", MosaicLayout: mosaicLayout{ Columns: 48, Tiles: []tile{ @@ -228,9 +231,9 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/ Width: 48, Height: 8, Widget: widget{Text: &text{ - Content: `Auto-generated - Please do not edit + Content: `This dashboard is auto-generated - please do not edit! -For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/departments/engineering/managed-services/msp-testbed/)`, +For more details, see: https://sourcegraph.notion.site/0d0b709881674eee9dca4202de9f93b1`, Format: "MARKDOWN", Style: textStyle{ BackgroundColor: "#FFFFFF", @@ -303,11 +306,120 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/ }, }), }, + { + name: "multiple sections with custom alerts and response code ratio", + serviceID: "msp-testbed", + envID: "dev", + alertGroups: map[string][]monitoringalertpolicy.MonitoringAlertPolicy{ + "Container Alerts": { + &mockAlertPolicy{name: "/projects/msp-testbed/alertPolicies/00000"}, + }, + responseCodeRatioAlertsGroupName: { + &mockAlertPolicy{name: "/projects/msp-testbed/alertPolicies/00020responsecode"}, + }, + "Cloud SQL Alerts": { + &mockAlertPolicy{name: "/projects/msp-testbed/alertPolicies/00010"}, + }, + customAlertsGroupName: { + &mockAlertPolicy{name: "/projects/msp-testbed/alertPolicies/00020customalert"}, + }, + }, + want: autogold.Expect(dashboard{ + DisplayName: "MSP Alerts - msp-testbed (dev)", + MosaicLayout: mosaicLayout{ + Columns: 48, + Tiles: []tile{ + { + Width: 48, + Height: 8, + Widget: widget{Text: &text{ + Content: `This dashboard is auto-generated - please do not edit! + +For more details, see: https://sourcegraph.notion.site/0d0b709881674eee9dca4202de9f93b1`, + Format: "MARKDOWN", + Style: textStyle{ + BackgroundColor: "#FFFFFF", + FontSize: "FS_EXTRA_LARGE", + HorizontalAlignment: "H_CENTER", + Padding: "P_EXTRA_SMALL", + PointerLocation: "POINTER_LOCATION_UNSPECIFIED", + TextColor: "#000000", + VerticalAlignment: "V_CENTER", + }, + }}, + }, + { + YPos: 8, + Width: 24, + Height: 16, + Widget: widget{AlertChart: &alertChart{Name: "/projects/msp-testbed/alertPolicies/00020customalert"}}, + }, + { + YPos: 8, + Width: 48, + Height: 16, + Widget: widget{ + Title: "Custom Alerts", + CollapsibleGroup: &collapsibleGroup{}, + }, + }, + { + YPos: 24, + Width: 24, + Height: 16, + Widget: widget{AlertChart: &alertChart{Name: "/projects/msp-testbed/alertPolicies/00020responsecode"}}, + }, + { + YPos: 24, + Width: 48, + Height: 16, + Widget: widget{ + Title: "Response Code Ratio Alerts", + CollapsibleGroup: &collapsibleGroup{}, + }, + }, + { + YPos: 40, + Width: 24, + Height: 16, + Widget: widget{AlertChart: &alertChart{Name: "/projects/msp-testbed/alertPolicies/00010"}}, + }, + { + YPos: 40, + Width: 48, + Height: 16, + Widget: widget{ + Title: "Cloud SQL Alerts", + CollapsibleGroup: &collapsibleGroup{}, + }, + }, + { + YPos: 56, + Width: 24, + Height: 16, + Widget: widget{AlertChart: &alertChart{Name: "/projects/msp-testbed/alertPolicies/00000"}}, + }, + { + YPos: 56, + Width: 48, + Height: 16, + Widget: widget{ + Title: "Container Alerts", + CollapsibleGroup: &collapsibleGroup{}, + }, + }, + }, + }, + }), + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - dashboard := generateDashboard(tt.serviceID, tt.envID, tt.alertGroups) + dashboard := generateDashboard(spec.ServiceSpec{ + ID: tt.serviceID, + NotionPageID: pointers.Ptr("0d0b709881674eee9dca4202de9f93b1"), + }, tt.envID, tt.alertGroups) tt.want.Equal(t, dashboard) }) } diff --git a/dev/managedservicesplatform/stacks/monitoring/monitoring.go b/dev/managedservicesplatform/stacks/monitoring/monitoring.go index d22e8a84a25..c68a656fcb5 100644 --- a/dev/managedservicesplatform/stacks/monitoring/monitoring.go +++ b/dev/managedservicesplatform/stacks/monitoring/monitoring.go @@ -390,7 +390,7 @@ func NewStack(stacks *stack.Set, vars Variables) (*CrossStackOutput, error) { if err != nil { return nil, errors.Wrap(err, "failed to create response code metrics") } - alertGroups["Response Code Ratio Alerts"] = responseCodeRatioAlerts + alertGroups[responseCodeRatioAlertsGroupName] = responseCodeRatioAlerts } case spec.ServiceKindJob: jobAlerts, err := createJobAlerts(stack, id.Group("job"), vars, channels) @@ -407,7 +407,7 @@ func NewStack(stacks *stack.Set, vars Variables) (*CrossStackOutput, error) { if err != nil { return nil, errors.Wrap(err, "failed to create custom alerts") } - alertGroups["Custom Alerts"] = customAlerts + alertGroups[customAlertsGroupName] = customAlerts } if vars.RedisInstanceID != nil {