feat/msp: update handbook link in alerts dashboard, sort custom alerts first (#63089)

1. The dashboard link still points to the old `go/msp-ops/...` which no
longer work (CORE-105)
2. Alerts defined on top of the MSP defaults are probably of more
interest, so let's sort these in front of the others

## Test plan

Unit/golden tests
This commit is contained in:
Robert Lin 2024-06-05 09:09:22 -07:00 committed by GitHub
parent 872e23f956
commit 27211dea73
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 161 additions and 24 deletions

View File

@ -66,6 +66,8 @@ go_test(
embed = [":monitoring"],
tags = [TAG_INFRA_CORESERVICES],
deps = [
"//dev/managedservicesplatform/spec",
"//lib/pointers",
"@com_github_hexops_autogold_v2//:autogold",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//monitoringalertpolicy",
],

View File

@ -8,9 +8,11 @@ import (
"github.com/hashicorp/terraform-cdk-go/cdktf"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/monitoringalertpolicy"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/monitoringdashboard"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resourceid"
"github.com/sourcegraph/sourcegraph/lib/pointers"
"golang.org/x/exp/maps"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resourceid"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/spec"
"github.com/sourcegraph/sourcegraph/lib/pointers"
)
// Schema reference: https://cloud.google.com/monitoring/api/ref_v3/rest/v1/projects.dashboards#resource:-dashboard
@ -68,7 +70,7 @@ func createMonitoringDashboard(stack cdktf.TerraformStack,
vars Variables,
alertGroups map[string][]monitoringalertpolicy.MonitoringAlertPolicy,
) error {
dashboard := generateDashboard(vars.Service.ID, vars.EnvironmentID, alertGroups)
dashboard := generateDashboard(vars.Service, vars.EnvironmentID, alertGroups)
dashboardJSON, err := json.Marshal(dashboard)
if err != nil {
@ -87,15 +89,22 @@ const dashboardColumns = 48
const widgetWidth = 24
const widgetHeight = 16
func generateDashboard(serviceID, envID string, alertGroups map[string][]monitoringalertpolicy.MonitoringAlertPolicy) dashboard {
// Groups with special treatment
const (
customAlertsGroupName = "Custom Alerts"
responseCodeRatioAlertsGroupName = "Response Code Ratio Alerts"
)
func generateDashboard(svc spec.ServiceSpec, envID string, alertGroups map[string][]monitoringalertpolicy.MonitoringAlertPolicy) dashboard {
// Add a banner informing operators not to edit the dashboard
infoBanner := tile{
Width: dashboardColumns,
Height: 8,
Widget: widget{
Text: &text{
Content: fmt.Sprintf("Auto-generated - Please do not edit\n\nFor more details see: [go/msp-ops/%[1]s](https://handbook.sourcegraph.com/departments/engineering/managed-services/%[1]s/)", serviceID),
Format: "MARKDOWN",
Content: fmt.Sprintf("This dashboard is auto-generated - please do not edit!\n\nFor more details, see: %s",
svc.GetHandbookPageURL()),
Format: "MARKDOWN",
Style: textStyle{
BackgroundColor: "#FFFFFF",
FontSize: "FS_EXTRA_LARGE",
@ -114,9 +123,12 @@ func generateDashboard(serviceID, envID string, alertGroups map[string][]monitor
// absolute distance from top of dashboard
height := infoBanner.Height
// for consistency we sort the map keys first
keys := maps.Keys(alertGroups)
// for consistency we sort the map keys first, but place the custom alerts
// at the start
firstKeys := []string{customAlertsGroupName, responseCodeRatioAlertsGroupName}
keys := remove(maps.Keys(alertGroups), firstKeys)
slices.Sort(keys)
keys = append(firstKeys, keys...)
for _, section := range keys {
alerts := alertGroups[section]
// Ensure we don't create empty sections
@ -162,10 +174,21 @@ func generateDashboard(serviceID, envID string, alertGroups map[string][]monitor
}
return dashboard{
DisplayName: fmt.Sprintf("MSP Alerts - %s-%s", serviceID, envID),
DisplayName: fmt.Sprintf("MSP Alerts - %s (%s)", svc.GetName(), envID),
MosaicLayout: mosaicLayout{
Columns: dashboardColumns,
Tiles: tiles,
},
}
}
func remove(v []string, filter []string) []string {
for _, f := range filter {
for i, s := range v {
if s == f {
v = append(v[:i], v[i+1:]...)
}
}
}
return v
}

View File

@ -5,6 +5,9 @@ import (
"github.com/hexops/autogold/v2"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/monitoringalertpolicy"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/spec"
"github.com/sourcegraph/sourcegraph/lib/pointers"
)
type mockAlertPolicy struct {
@ -34,7 +37,7 @@ func TestDashboardCreation(t *testing.T) {
},
},
want: autogold.Expect(dashboard{
DisplayName: "MSP Alerts - msp-testbed-dev",
DisplayName: "MSP Alerts - msp-testbed (dev)",
MosaicLayout: mosaicLayout{
Columns: 48,
Tiles: []tile{
@ -42,9 +45,9 @@ func TestDashboardCreation(t *testing.T) {
Width: 48,
Height: 8,
Widget: widget{Text: &text{
Content: `Auto-generated - Please do not edit
Content: `This dashboard is auto-generated - please do not edit!
For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/departments/engineering/managed-services/msp-testbed/)`,
For more details, see: https://sourcegraph.notion.site/0d0b709881674eee9dca4202de9f93b1`,
Format: "MARKDOWN",
Style: textStyle{
BackgroundColor: "#FFFFFF",
@ -87,7 +90,7 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/
},
},
want: autogold.Expect(dashboard{
DisplayName: "MSP Alerts - msp-testbed-dev",
DisplayName: "MSP Alerts - msp-testbed (dev)",
MosaicLayout: mosaicLayout{
Columns: 48,
Tiles: []tile{
@ -95,9 +98,9 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/
Width: 48,
Height: 8,
Widget: widget{Text: &text{
Content: `Auto-generated - Please do not edit
Content: `This dashboard is auto-generated - please do not edit!
For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/departments/engineering/managed-services/msp-testbed/)`,
For more details, see: https://sourcegraph.notion.site/0d0b709881674eee9dca4202de9f93b1`,
Format: "MARKDOWN",
Style: textStyle{
BackgroundColor: "#FFFFFF",
@ -148,7 +151,7 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/
},
},
want: autogold.Expect(dashboard{
DisplayName: "MSP Alerts - msp-testbed-dev",
DisplayName: "MSP Alerts - msp-testbed (dev)",
MosaicLayout: mosaicLayout{
Columns: 48,
Tiles: []tile{
@ -156,9 +159,9 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/
Width: 48,
Height: 8,
Widget: widget{Text: &text{
Content: `Auto-generated - Please do not edit
Content: `This dashboard is auto-generated - please do not edit!
For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/departments/engineering/managed-services/msp-testbed/)`,
For more details, see: https://sourcegraph.notion.site/0d0b709881674eee9dca4202de9f93b1`,
Format: "MARKDOWN",
Style: textStyle{
BackgroundColor: "#FFFFFF",
@ -220,7 +223,7 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/
},
},
want: autogold.Expect(dashboard{
DisplayName: "MSP Alerts - msp-testbed-dev",
DisplayName: "MSP Alerts - msp-testbed (dev)",
MosaicLayout: mosaicLayout{
Columns: 48,
Tiles: []tile{
@ -228,9 +231,9 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/
Width: 48,
Height: 8,
Widget: widget{Text: &text{
Content: `Auto-generated - Please do not edit
Content: `This dashboard is auto-generated - please do not edit!
For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/departments/engineering/managed-services/msp-testbed/)`,
For more details, see: https://sourcegraph.notion.site/0d0b709881674eee9dca4202de9f93b1`,
Format: "MARKDOWN",
Style: textStyle{
BackgroundColor: "#FFFFFF",
@ -303,11 +306,120 @@ For more details see: [go/msp-ops/msp-testbed](https://handbook.sourcegraph.com/
},
}),
},
{
name: "multiple sections with custom alerts and response code ratio",
serviceID: "msp-testbed",
envID: "dev",
alertGroups: map[string][]monitoringalertpolicy.MonitoringAlertPolicy{
"Container Alerts": {
&mockAlertPolicy{name: "/projects/msp-testbed/alertPolicies/00000"},
},
responseCodeRatioAlertsGroupName: {
&mockAlertPolicy{name: "/projects/msp-testbed/alertPolicies/00020responsecode"},
},
"Cloud SQL Alerts": {
&mockAlertPolicy{name: "/projects/msp-testbed/alertPolicies/00010"},
},
customAlertsGroupName: {
&mockAlertPolicy{name: "/projects/msp-testbed/alertPolicies/00020customalert"},
},
},
want: autogold.Expect(dashboard{
DisplayName: "MSP Alerts - msp-testbed (dev)",
MosaicLayout: mosaicLayout{
Columns: 48,
Tiles: []tile{
{
Width: 48,
Height: 8,
Widget: widget{Text: &text{
Content: `This dashboard is auto-generated - please do not edit!
For more details, see: https://sourcegraph.notion.site/0d0b709881674eee9dca4202de9f93b1`,
Format: "MARKDOWN",
Style: textStyle{
BackgroundColor: "#FFFFFF",
FontSize: "FS_EXTRA_LARGE",
HorizontalAlignment: "H_CENTER",
Padding: "P_EXTRA_SMALL",
PointerLocation: "POINTER_LOCATION_UNSPECIFIED",
TextColor: "#000000",
VerticalAlignment: "V_CENTER",
},
}},
},
{
YPos: 8,
Width: 24,
Height: 16,
Widget: widget{AlertChart: &alertChart{Name: "/projects/msp-testbed/alertPolicies/00020customalert"}},
},
{
YPos: 8,
Width: 48,
Height: 16,
Widget: widget{
Title: "Custom Alerts",
CollapsibleGroup: &collapsibleGroup{},
},
},
{
YPos: 24,
Width: 24,
Height: 16,
Widget: widget{AlertChart: &alertChart{Name: "/projects/msp-testbed/alertPolicies/00020responsecode"}},
},
{
YPos: 24,
Width: 48,
Height: 16,
Widget: widget{
Title: "Response Code Ratio Alerts",
CollapsibleGroup: &collapsibleGroup{},
},
},
{
YPos: 40,
Width: 24,
Height: 16,
Widget: widget{AlertChart: &alertChart{Name: "/projects/msp-testbed/alertPolicies/00010"}},
},
{
YPos: 40,
Width: 48,
Height: 16,
Widget: widget{
Title: "Cloud SQL Alerts",
CollapsibleGroup: &collapsibleGroup{},
},
},
{
YPos: 56,
Width: 24,
Height: 16,
Widget: widget{AlertChart: &alertChart{Name: "/projects/msp-testbed/alertPolicies/00000"}},
},
{
YPos: 56,
Width: 48,
Height: 16,
Widget: widget{
Title: "Container Alerts",
CollapsibleGroup: &collapsibleGroup{},
},
},
},
},
}),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dashboard := generateDashboard(tt.serviceID, tt.envID, tt.alertGroups)
dashboard := generateDashboard(spec.ServiceSpec{
ID: tt.serviceID,
NotionPageID: pointers.Ptr("0d0b709881674eee9dca4202de9f93b1"),
}, tt.envID, tt.alertGroups)
tt.want.Equal(t, dashboard)
})
}

View File

@ -390,7 +390,7 @@ func NewStack(stacks *stack.Set, vars Variables) (*CrossStackOutput, error) {
if err != nil {
return nil, errors.Wrap(err, "failed to create response code metrics")
}
alertGroups["Response Code Ratio Alerts"] = responseCodeRatioAlerts
alertGroups[responseCodeRatioAlertsGroupName] = responseCodeRatioAlerts
}
case spec.ServiceKindJob:
jobAlerts, err := createJobAlerts(stack, id.Group("job"), vars, channels)
@ -407,7 +407,7 @@ func NewStack(stacks *stack.Set, vars Variables) (*CrossStackOutput, error) {
if err != nil {
return nil, errors.Wrap(err, "failed to create custom alerts")
}
alertGroups["Custom Alerts"] = customAlerts
alertGroups[customAlertsGroupName] = customAlerts
}
if vars.RedisInstanceID != nil {