mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 17:51:57 +00:00
monitoring: dashboards docs improvements (#24563)
- Render Description more prominently because that's how panels are identified in dashboards - Render link to panel and dashboards - Render full query in collapsible section for reference - Show number of alerts and indicate that no alerts are defined more prominently - Alerts now always link to the panel docs - Fix code-insights owner slug that lead to handbook 404 - Fix some `MetricDescriptionRoot` using underscore instead of space
This commit is contained in:
parent
586b0ee3a0
commit
4933ac4e81
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -75,7 +75,7 @@ func (codeInsights) NewInsightsQueryRunnerResetterGroup(containerName string) mo
|
||||
|
||||
ObservableConstructorOptions: ObservableConstructorOptions{
|
||||
MetricNameRoot: "insights_search_queue",
|
||||
MetricDescriptionRoot: "insights_search_queue",
|
||||
MetricDescriptionRoot: "insights search queue",
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
@ -663,21 +663,21 @@ func (codeIntelligence) NewJanitorGroup(containerName string) monitoring.Group {
|
||||
{
|
||||
Standard.Count("records deleted")(ObservableConstructorOptions{
|
||||
MetricNameRoot: "codeintel_background_upload_records_removed",
|
||||
MetricDescriptionRoot: "lsif_upload",
|
||||
MetricDescriptionRoot: "lsif upload",
|
||||
})(containerName, monitoring.ObservableOwnerCodeIntel).WithNoAlerts(`
|
||||
Number of LSIF upload records deleted due to expiration or unreachability every 5m
|
||||
`).Observable(),
|
||||
|
||||
Standard.Count("records deleted")(ObservableConstructorOptions{
|
||||
MetricNameRoot: "codeintel_background_index_records_removed",
|
||||
MetricDescriptionRoot: "lsif_index",
|
||||
MetricDescriptionRoot: "lsif index",
|
||||
})(containerName, monitoring.ObservableOwnerCodeIntel).WithNoAlerts(`
|
||||
Number of LSIF index records deleted due to expiration or unreachability every 5m
|
||||
`).Observable(),
|
||||
|
||||
Standard.Count("data bundles deleted")(ObservableConstructorOptions{
|
||||
MetricNameRoot: "codeintel_background_uploads_purged",
|
||||
MetricDescriptionRoot: "lsif_upload",
|
||||
MetricDescriptionRoot: "lsif upload",
|
||||
})(containerName, monitoring.ObservableOwnerCodeIntel).WithNoAlerts(`
|
||||
Number of LSIF upload data bundles purged from the codeintel-db database every 5m
|
||||
`).Observable(),
|
||||
|
||||
@ -112,7 +112,7 @@ func Worker() *monitoring.Container {
|
||||
|
||||
ObservableConstructorOptions: shared.ObservableConstructorOptions{
|
||||
MetricNameRoot: "codeintel_background_upload",
|
||||
MetricDescriptionRoot: "lsif_upload",
|
||||
MetricDescriptionRoot: "lsif upload",
|
||||
},
|
||||
},
|
||||
|
||||
@ -132,7 +132,7 @@ func Worker() *monitoring.Container {
|
||||
|
||||
ObservableConstructorOptions: shared.ObservableConstructorOptions{
|
||||
MetricNameRoot: "codeintel_background_index",
|
||||
MetricDescriptionRoot: "lsif_index",
|
||||
MetricDescriptionRoot: "lsif index",
|
||||
},
|
||||
},
|
||||
|
||||
@ -152,7 +152,7 @@ func Worker() *monitoring.Container {
|
||||
|
||||
ObservableConstructorOptions: shared.ObservableConstructorOptions{
|
||||
MetricNameRoot: "codeintel_background_dependency_index",
|
||||
MetricDescriptionRoot: "lsif_dependency_index",
|
||||
MetricDescriptionRoot: "lsif dependency index",
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
@ -78,20 +78,21 @@ func renderDocumentation(containers []*Container) (*documentation, error) {
|
||||
for _, c := range containers {
|
||||
fmt.Fprintf(&docs.dashboards, "## %s\n\n", c.Title)
|
||||
fprintSubtitle(&docs.dashboards, c.Description)
|
||||
fmt.Fprintf(&docs.dashboards, "To see this dashboard, visit `/-/debug/grafana/d/%[1]s/%[1]s` on your Sourcegraph instance.\n\n", c.Name)
|
||||
|
||||
for _, g := range c.Groups {
|
||||
for gIndex, g := range c.Groups {
|
||||
// the "General" group is top-level
|
||||
if g.Title != "General" {
|
||||
fmt.Fprintf(&docs.dashboards, "### %s: %s\n\n", c.Title, g.Title)
|
||||
}
|
||||
|
||||
for _, r := range g.Rows {
|
||||
for _, o := range r {
|
||||
for rIndex, r := range g.Rows {
|
||||
for oIndex, o := range r {
|
||||
if err := docs.renderAlertSolutionEntry(c, o); err != nil {
|
||||
return nil, errors.Errorf("error rendering alert solution entry %q %q: %w",
|
||||
c.Name, o.Name, err)
|
||||
}
|
||||
if err := docs.renderDashboardPanelEntry(c, o); err != nil {
|
||||
if err := docs.renderDashboardPanelEntry(c, g, o, observablePanelID(gIndex, rIndex, oIndex)); err != nil {
|
||||
return nil, errors.Errorf("error rendering dashboard panel entry %q %q: %w",
|
||||
c.Name, o.Name, err)
|
||||
}
|
||||
@ -140,16 +141,20 @@ func (d *documentation) renderAlertSolutionEntry(c *Container, o Observable) err
|
||||
possibleSolutions, _ := toMarkdown(o.PossibleSolutions, true)
|
||||
fmt.Fprintf(&d.alertSolutions, "%s\n", possibleSolutions)
|
||||
}
|
||||
if o.Interpretation != "" && o.Interpretation != "none" {
|
||||
// indicate help is available in dashboards reference
|
||||
fmt.Fprintf(&d.alertSolutions, "- More help interpreting this metric is available in the [dashboards reference](./%s#%s).\n",
|
||||
dashboardsDocsFile, observableDocAnchor(c, o))
|
||||
} else {
|
||||
// just show the panel reference
|
||||
fmt.Fprintf(&d.alertSolutions, "- Learn more about the related dashboard panel in the [dashboards reference](./%s#%s).\n",
|
||||
dashboardsDocsFile, observableDocAnchor(c, o))
|
||||
}
|
||||
// add silencing configuration as another solution
|
||||
fmt.Fprintf(&d.alertSolutions, "- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:\n\n")
|
||||
fmt.Fprintf(&d.alertSolutions, "```json\n%s\n```\n\n", fmt.Sprintf(`"observability.silenceAlerts": [
|
||||
%s
|
||||
]`, strings.Join(prometheusAlertNames, ",\n")))
|
||||
// add link to panel information IF there are additional details available
|
||||
if o.Interpretation != "" && o.Interpretation != "none" {
|
||||
fmt.Fprintf(&d.alertSolutions, "> NOTE: More help interpreting this metric is available in the [dashboards reference](./%s#%s).\n\n",
|
||||
dashboardsDocsFile, observableDocAnchor(c, o))
|
||||
}
|
||||
if o.Owner != "" {
|
||||
// add owner
|
||||
fprintOwnedBy(&d.alertSolutions, o.Owner)
|
||||
@ -159,23 +164,42 @@ func (d *documentation) renderAlertSolutionEntry(c *Container, o Observable) err
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *documentation) renderDashboardPanelEntry(c *Container, o Observable) error {
|
||||
func (d *documentation) renderDashboardPanelEntry(c *Container, g Group, o Observable, panelID uint) error {
|
||||
fprintObservableHeader(&d.dashboards, c, &o, 4)
|
||||
fmt.Fprintf(&d.dashboards, "This panel indicates %s.\n\n", o.Description)
|
||||
fprintSubtitle(&d.dashboards, fmt.Sprintf("%s\n\n", upperFirst(o.Description)))
|
||||
|
||||
// render interpretation reference if available
|
||||
if o.Interpretation != "" && o.Interpretation != "none" {
|
||||
interpretation, _ := toMarkdown(o.Interpretation, false)
|
||||
fmt.Fprintf(&d.dashboards, "%s\n\n", interpretation)
|
||||
}
|
||||
|
||||
// add link to alert solutions IF there is an alert attached
|
||||
if !o.NoAlert {
|
||||
fmt.Fprintf(&d.dashboards, "> NOTE: Alerts related to this panel are documented in the [alert solutions reference](./%s#%s).\n\n",
|
||||
alertSolutionsFile, observableDocAnchor(c, o))
|
||||
fmt.Fprintf(&d.dashboards, "Refer to the [alert solutions reference](./%s#%s) for %s related to this panel.\n\n",
|
||||
alertSolutionsFile, observableDocAnchor(c, o), pluralize("alert", o.alertsCount()))
|
||||
} else {
|
||||
fmt.Fprintf(&d.dashboards, "This panel has no related alerts.\n\n")
|
||||
}
|
||||
|
||||
// how to get to this panel
|
||||
fmt.Fprintf(&d.dashboards, "To see this panel, visit `/-/debug/grafana/d/%[1]s/%[1]s?viewPanel=%[2]d` on your Sourcegraph instance.\n\n",
|
||||
c.Name, panelID)
|
||||
|
||||
if o.Owner != "" {
|
||||
// add owner
|
||||
fprintOwnedBy(&d.dashboards, o.Owner)
|
||||
}
|
||||
|
||||
fmt.Fprintf(&d.dashboards, `
|
||||
<details>
|
||||
<summary>Technical details</summary>
|
||||
|
||||
Query: %s
|
||||
|
||||
</details>
|
||||
`, fmt.Sprintf("`%s`", o.Query))
|
||||
|
||||
// render break for readability
|
||||
fmt.Fprint(&d.dashboards, "\n<br />\n\n")
|
||||
return nil
|
||||
|
||||
@ -422,19 +422,21 @@ const (
|
||||
|
||||
// toMarkdown returns a Markdown string that also links to the owner's team page
|
||||
func (o ObservableOwner) toMarkdown() string {
|
||||
var teamName string
|
||||
var slug string
|
||||
// special cases for differences in how a team is named in ObservableOwner and how
|
||||
// they are named in the handbook.
|
||||
// see https://about.sourcegraph.com/company/team/org_chart#engineering
|
||||
switch o {
|
||||
case ObservableOwnerCodeIntel:
|
||||
teamName = "code-intelligence"
|
||||
slug = "code-intelligence"
|
||||
case ObservableOwnerCodeInsights:
|
||||
slug = "developer-insights/code-insights"
|
||||
default:
|
||||
teamName = string(o)
|
||||
slug = strings.ReplaceAll(string(o), " ", "-")
|
||||
}
|
||||
|
||||
slug := strings.ReplaceAll(teamName, " ", "-")
|
||||
return fmt.Sprintf("[Sourcegraph %s team](https://about.sourcegraph.com/handbook/engineering/%s)", upperFirst(teamName), slug)
|
||||
return fmt.Sprintf("[Sourcegraph %s team](https://about.sourcegraph.com/handbook/engineering/%s)",
|
||||
upperFirst(string(o)), slug)
|
||||
}
|
||||
|
||||
// Observable describes a metric about a container that can be observed. For example, memory usage.
|
||||
@ -574,7 +576,7 @@ func (o Observable) validate() error {
|
||||
return errors.New(`Panel.panelType must be "graph" or "heatmap"`)
|
||||
}
|
||||
|
||||
allAlertsEmpty := o.Warning.isEmpty() && o.Critical.isEmpty()
|
||||
allAlertsEmpty := o.alertsCount() == 0
|
||||
if allAlertsEmpty || o.NoAlert {
|
||||
// Ensure lack of alerts is intentional
|
||||
if allAlertsEmpty && !o.NoAlert {
|
||||
@ -619,6 +621,16 @@ func (o Observable) validate() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o Observable) alertsCount() (count int) {
|
||||
if !o.Warning.isEmpty() {
|
||||
count++
|
||||
}
|
||||
if !o.Critical.isEmpty() {
|
||||
count++
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Alert provides a builder for defining alerting on an Observable.
|
||||
func Alert() *ObservableAlertDefinition {
|
||||
return &ObservableAlertDefinition{}
|
||||
|
||||
@ -20,6 +20,13 @@ func withPeriod(s string) string {
|
||||
return s
|
||||
}
|
||||
|
||||
func pluralize(noun string, count int) string {
|
||||
if count != 1 {
|
||||
noun += "s"
|
||||
}
|
||||
return fmt.Sprintf("%d %s", count, noun)
|
||||
}
|
||||
|
||||
// StringPtr converts a string value to a pointer, useful for setting fields in some APIs.
|
||||
func StringPtr(s string) *string { return &s }
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user