diff --git a/docker-images/prometheus/cmd/prom-wrapper/change.go b/docker-images/prometheus/cmd/prom-wrapper/change.go index 8c4474fc9bb..7417f5aef87 100644 --- a/docker-images/prometheus/cmd/prom-wrapper/change.go +++ b/docker-images/prometheus/cmd/prom-wrapper/change.go @@ -8,12 +8,13 @@ import ( "time" "github.com/go-openapi/strfmt" - "github.com/inconshreveable/log15" amclient "github.com/prometheus/alertmanager/api/v2/client" "github.com/prometheus/alertmanager/api/v2/client/silence" "github.com/prometheus/alertmanager/api/v2/models" amconfig "github.com/prometheus/alertmanager/config" + "github.com/sourcegraph/log" + "github.com/sourcegraph/sourcegraph/internal/conf" "github.com/sourcegraph/sourcegraph/lib/errors" "github.com/sourcegraph/sourcegraph/schema" @@ -30,10 +31,10 @@ type ChangeResult struct { } // Change implements a change to configuration -type Change func(ctx context.Context, log log15.Logger, change ChangeContext, newConfig *subscribedSiteConfig) (result ChangeResult) +type Change func(ctx context.Context, logger log.Logger, change ChangeContext, newConfig *subscribedSiteConfig) (result ChangeResult) // changeReceivers applies `observability.alerts` as Alertmanager receivers. -func changeReceivers(ctx context.Context, log log15.Logger, change ChangeContext, newConfig *subscribedSiteConfig) (result ChangeResult) { +func changeReceivers(ctx context.Context, _ log.Logger, change ChangeContext, newConfig *subscribedSiteConfig) (result ChangeResult) { // convenience function for creating a prefixed problem - this reflects the relevant site configuration fields newProblem := func(err error) { result.Problems = append(result.Problems, conf.NewSiteProblem(fmt.Sprintf("`observability.alerts`: %v", err))) @@ -48,7 +49,7 @@ func changeReceivers(ctx context.Context, log log15.Logger, change ChangeContext } // changeSMTP applies SMTP server configuration. -func changeSMTP(ctx context.Context, log log15.Logger, change ChangeContext, newConfig *subscribedSiteConfig) (result ChangeResult) { +func changeSMTP(ctx context.Context, _ log.Logger, change ChangeContext, newConfig *subscribedSiteConfig) (result ChangeResult) { if change.AMConfig.Global == nil { change.AMConfig.Global = &amconfig.GlobalConfig{} } @@ -90,7 +91,7 @@ func changeSMTP(ctx context.Context, log log15.Logger, change ChangeContext, new } // changeSilences syncs Alertmanager silences with silences configured in observability.silenceAlerts -func changeSilences(ctx context.Context, log log15.Logger, change ChangeContext, newConfig *subscribedSiteConfig) (result ChangeResult) { +func changeSilences(ctx context.Context, logger log.Logger, change ChangeContext, newConfig *subscribedSiteConfig) (result ChangeResult) { // convenience function for creating a prefixed problem - this reflects the relevant site configuration fields newProblem := func(err error) { result.Problems = append(result.Problems, conf.NewSiteProblem(fmt.Sprintf("`observability.silenceAlerts`: %v", err))) @@ -137,7 +138,12 @@ func changeSilences(ctx context.Context, log log15.Logger, change ChangeContext, } } } - log.Info("updating alert silences", "silences", activeSilences) + + var activeSilencesNames []string + for s := range activeSilences { + activeSilencesNames = append(activeSilencesNames, s) + } + logger.Info("updating alert silences", log.Strings("activeSilences", activeSilencesNames)) // create or update silences for alert, existingSilence := range activeSilences { @@ -167,7 +173,9 @@ func changeSilences(ctx context.Context, log log15.Logger, change ChangeContext, } if err != nil { silenceData, _ := json.Marshal(s) - log.Error("failed to update silence", "error", err, "silence", string(silenceData), "existingSilence", existingSilence) + logger.Error("failed to update silence", log.Error(err), + log.String("silence", string(silenceData)), + log.String("existingSilence", existingSilence)) newProblem(errors.Errorf("failed to update silence: %w", err)) return } diff --git a/docker-images/prometheus/cmd/prom-wrapper/cmd.go b/docker-images/prometheus/cmd/prom-wrapper/cmd.go index f90e2ebedf6..9e44b7f1113 100644 --- a/docker-images/prometheus/cmd/prom-wrapper/cmd.go +++ b/docker-images/prometheus/cmd/prom-wrapper/cmd.go @@ -5,16 +5,14 @@ import ( "os" "os/exec" - "github.com/inconshreveable/log15" - - "github.com/sourcegraph/sourcegraph/lib/errors" + "github.com/sourcegraph/log" ) -func runCmd(log log15.Logger, errs chan<- error, cmd *exec.Cmd) { - log.Info(fmt.Sprintf("running: %+v", cmd.Args)) +func runCmd(logger log.Logger, errs chan<- error, cmd *exec.Cmd) { + logger = logger.With(log.Strings("cmd", append([]string{cmd.Path}, cmd.Args...))) + logger.Info("running cmd") if err := cmd.Run(); err != nil { - err := errors.Errorf("command %+v exited: %w", cmd.Args, err) - log.Error(err.Error()) + logger.Error("command exited", log.Error(err)) errs <- err } } diff --git a/docker-images/prometheus/cmd/prom-wrapper/main.go b/docker-images/prometheus/cmd/prom-wrapper/main.go index 3c3bfbcbb6e..82706297619 100644 --- a/docker-images/prometheus/cmd/prom-wrapper/main.go +++ b/docker-images/prometheus/cmd/prom-wrapper/main.go @@ -15,13 +15,16 @@ import ( "time" "github.com/gorilla/mux" - "github.com/inconshreveable/log15" amclient "github.com/prometheus/alertmanager/api/v2/client" prometheusAPI "github.com/prometheus/client_golang/api" prometheus "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/sourcegraph/log" + "github.com/sourcegraph/sourcegraph/internal/env" + "github.com/sourcegraph/sourcegraph/internal/hostname" srcprometheus "github.com/sourcegraph/sourcegraph/internal/src-prometheus" + "github.com/sourcegraph/sourcegraph/internal/version" "github.com/sourcegraph/sourcegraph/lib/errors" ) @@ -41,10 +44,21 @@ var ( ) func main() { - log := log15.New("cmd", "prom-wrapper") + liblog := log.Init(log.Resource{ + Name: env.MyName, + Version: version.Version(), + InstanceID: hostname.Get(), + }) + defer liblog.Sync() + + logger := log.Scoped("prom-wrapper", "sourcegraph/prometheus wrapper program") ctx := context.Background() + disableAlertmanager := noAlertmanager == "true" disableSourcegraphConfig := noConfig == "true" + logger.Info("starting prom-wrapper", + log.Bool("disableAlertmanager", disableAlertmanager), + log.Bool("disableSourcegraphConfig", disableSourcegraphConfig)) // spin up prometheus and alertmanager procErrs := make(chan error) @@ -52,7 +66,7 @@ func main() { if len(os.Args) > 1 { promArgs = os.Args[1:] // propagate args to prometheus } - go runCmd(log, procErrs, NewPrometheusCmd(promArgs, prometheusPort)) + go runCmd(logger, procErrs, NewPrometheusCmd(promArgs, prometheusPort)) // router serves endpoints accessible from outside the container (defined by `exportPort`) // this includes any endpoints from `siteConfigSubscriber`, reverse-proxying services, etc. @@ -70,34 +84,32 @@ func main() { Address: fmt.Sprintf("http://127.0.0.1:%s", prometheusPort), }) if err != nil { - log.Crit("failed to initialize prometheus client", - "error", err) - os.Exit(1) + logger.Fatal("failed to initialize prometheus client", + log.Error(err)) } // disable all components that depend on Alertmanager if DISABLE_ALERTMANAGER=true if disableAlertmanager { - log.Warn("DISABLE_ALERTMANAGER=true; Alertmanager is disabled") + logger.Warn("DISABLE_ALERTMANAGER=true; Alertmanager is disabled") } else { // start alertmanager - go runCmd(log, procErrs, NewAlertmanagerCmd(alertmanagerConfigPath)) + go runCmd(logger, procErrs, NewAlertmanagerCmd(alertmanagerConfigPath)) // wait for alertmanager to become available - log.Info("waiting for alertmanager") + logger.Info("waiting for alertmanager") alertmanagerWaitCtx, cancel := context.WithTimeout(ctx, 30*time.Second) if err := waitForAlertmanager(alertmanagerWaitCtx, alertmanager); err != nil { - log.Crit("unable to reach Alertmanager", "error", err) - os.Exit(1) + logger.Fatal("unable to reach Alertmanager", log.Error(err)) } cancel() - log.Debug("detected alertmanager ready") + logger.Debug("detected alertmanager ready") // subscribe to configuration if disableSourcegraphConfig { - log.Info("DISABLE_SOURCEGRAPH_CONFIG=true; configuration syncing is disabled") + logger.Info("DISABLE_SOURCEGRAPH_CONFIG=true; configuration syncing is disabled") } else { - log.Info("initializing configuration") - subscriber := NewSiteConfigSubscriber(log, alertmanager) + logger.Info("initializing configuration") + subscriber := NewSiteConfigSubscriber(logger.Scoped("siteconfig", "site configuration subscriber"), alertmanager) // watch for configuration updates in the background go subscriber.Subscribe(ctx) @@ -116,7 +128,7 @@ func main() { } // serve alerts summary status - alertsReporter := NewAlertsStatusReporter(log, alertmanager, prometheus.NewAPI(promClient)) + alertsReporter := NewAlertsStatusReporter(logger, alertmanager, prometheus.NewAPI(promClient)) router.PathPrefix(srcprometheus.EndpointAlertsStatus).Handler(alertsReporter.Handler()) // serve prometheus by default via reverse proxy - place last so other prefixes get served first @@ -128,9 +140,9 @@ func main() { }) go func() { - log.Debug("serving endpoints and reverse proxy") + logger.Debug("serving endpoints and reverse proxy") if err := http.ListenAndServe(fmt.Sprintf(":%s", exportPort), router); err != nil && !errors.Is(err, http.ErrServerClosed) { - log.Crit("error serving reverse proxy", "error", err) + logger.Fatal("error serving reverse proxy", log.Error(err)) os.Exit(1) } os.Exit(0) @@ -142,7 +154,7 @@ func main() { var exitCode int select { case sig := <-c: - log.Info(fmt.Sprintf("stopping on signal %s", sig)) + logger.Info("stopping on signal", log.String("signal", sig.String())) exitCode = 2 case err := <-procErrs: if err != nil { diff --git a/docker-images/prometheus/cmd/prom-wrapper/siteconfig.go b/docker-images/prometheus/cmd/prom-wrapper/siteconfig.go index b55eb1cf005..d79d67c3e16 100644 --- a/docker-images/prometheus/cmd/prom-wrapper/siteconfig.go +++ b/docker-images/prometheus/cmd/prom-wrapper/siteconfig.go @@ -11,11 +11,12 @@ import ( "time" "github.com/gorilla/mux" - "github.com/inconshreveable/log15" amclient "github.com/prometheus/alertmanager/api/v2/client" "github.com/prometheus/alertmanager/api/v2/client/general" amconfig "github.com/prometheus/alertmanager/config" + "github.com/sourcegraph/log" + "github.com/sourcegraph/sourcegraph/internal/conf" srcprometheus "github.com/sourcegraph/sourcegraph/internal/src-prometheus" "github.com/sourcegraph/sourcegraph/schema" @@ -80,6 +81,13 @@ type siteConfigDiff struct { change Change } +func siteConfigDiffTypes(diffs []siteConfigDiff) (types []string) { + for _, d := range diffs { + types = append(types, d.Type) + } + return types +} + // Diff returns a set of changes to apply. func (c *subscribedSiteConfig) Diff(other *subscribedSiteConfig) []siteConfigDiff { var changes []siteConfigDiff @@ -104,7 +112,7 @@ func (c *subscribedSiteConfig) Diff(other *subscribedSiteConfig) []siteConfigDif // SiteConfigSubscriber is a sidecar service that subscribes to Sourcegraph site configuration and // applies relevant (subscribedSiteConfig) changes to Grafana. type SiteConfigSubscriber struct { - log log15.Logger + log log.Logger alertmanager *amclient.Alertmanager mux sync.RWMutex @@ -112,11 +120,10 @@ type SiteConfigSubscriber struct { problems conf.Problems // exported by handler } -func NewSiteConfigSubscriber(logger log15.Logger, alertmanager *amclient.Alertmanager) *SiteConfigSubscriber { - log := logger.New("logger", "config-subscriber") +func NewSiteConfigSubscriber(logger log.Logger, alertmanager *amclient.Alertmanager) *SiteConfigSubscriber { zeroConfig := newSubscribedSiteConfig(schema.SiteConfiguration{}) return &SiteConfigSubscriber{ - log: log, + log: logger, alertmanager: alertmanager, config: zeroConfig, } @@ -135,7 +142,7 @@ func (c *SiteConfigSubscriber) Handler() http.Handler { if _, err := c.alertmanager.General.GetStatus(&general.GetStatusParams{ Context: req.Context(), }); err != nil { - c.log.Error("unable to get Alertmanager status", "error", err) + c.log.Error("unable to get Alertmanager status", log.Error(err)) problems = append(problems, conf.NewSiteProblem("`observability`: unable to reach Alertmanager - please refer to the Prometheus logs for more details")) } @@ -193,12 +200,12 @@ func (c *SiteConfigSubscriber) execDiffs(ctx context.Context, newConfig *subscri c.mux.Lock() defer c.mux.Unlock() - c.log.Debug("applying configuration diffs", "diffs", diffs) + c.log.Debug("applying configuration diffs", log.Strings("types", siteConfigDiffTypes(diffs))) c.problems = nil // reset problems amConfig, err := amconfig.LoadFile(alertmanagerConfigPath) if err != nil { - c.log.Error("failed to load Alertmanager configuration", "error", err) + c.log.Error("failed to load Alertmanager configuration", log.Error(err)) c.problems = append(c.problems, conf.NewSiteProblem("`observability`: failed to load Alertmanager configuration, please refer to Prometheus logs for more details")) return } @@ -210,7 +217,7 @@ func (c *SiteConfigSubscriber) execDiffs(ctx context.Context, newConfig *subscri } for _, diff := range diffs { c.log.Info(fmt.Sprintf("applying changes for %q diff", diff.Type)) - result := diff.change(ctx, c.log.New("change", diff.Type), changeContext, newConfig) + result := diff.change(ctx, c.log.With(log.String("change", diff.Type)), changeContext, newConfig) c.problems = append(c.problems, result.Problems...) } @@ -218,12 +225,14 @@ func (c *SiteConfigSubscriber) execDiffs(ctx context.Context, newConfig *subscri c.log.Debug("reloading with new configuration") err = applyConfiguration(ctx, changeContext.AMConfig) if err != nil { - c.log.Error("failed to apply new configuration", "error", err) + c.log.Error("failed to apply new configuration", log.Error(err)) c.problems = append(c.problems, conf.NewSiteProblem(fmt.Sprintf("`observability`: failed to update Alertmanager configuration (%s)", err.Error()))) return } // update state if changes applied c.config = newConfig - c.log.Debug("configuration diffs applied", "diffs", diffs, "problems", c.problems) + c.log.Debug("configuration diffs applied", + log.Strings("types", siteConfigDiffTypes(diffs)), + log.Strings("problems", c.problems.Messages())) } diff --git a/docker-images/prometheus/cmd/prom-wrapper/status.go b/docker-images/prometheus/cmd/prom-wrapper/status.go index afbeefa9535..419a2cfbef8 100644 --- a/docker-images/prometheus/cmd/prom-wrapper/status.go +++ b/docker-images/prometheus/cmd/prom-wrapper/status.go @@ -8,25 +8,25 @@ import ( "time" "github.com/gorilla/mux" - "github.com/inconshreveable/log15" amclient "github.com/prometheus/alertmanager/api/v2/client" "github.com/prometheus/alertmanager/api/v2/client/alert" prometheus "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/common/model" + "github.com/sourcegraph/log" srcprometheus "github.com/sourcegraph/sourcegraph/internal/src-prometheus" ) // AlertsStatusReporter summarizes alert activity from Alertmanager type AlertsStatusReporter struct { - log log15.Logger + log log.Logger alertmanager *amclient.Alertmanager prometheus prometheus.API } -func NewAlertsStatusReporter(logger log15.Logger, alertmanager *amclient.Alertmanager, prom prometheus.API) *AlertsStatusReporter { +func NewAlertsStatusReporter(logger log.Logger, alertmanager *amclient.Alertmanager, prom prometheus.API) *AlertsStatusReporter { return &AlertsStatusReporter{ - log: logger.New("logger", "alerts-status"), + log: logger.Scoped("alerts-status", "alerts status reporter"), alertmanager: alertmanager, prometheus: prom, } @@ -61,8 +61,8 @@ func (s *AlertsStatusReporter) Handler() http.Handler { } if len(warn) > 0 { s.log.Warn("site.monitoring.alerts: warnings encountered on prometheus query", - "timespan", timespan.String(), - "warnings", warn) + log.String("timespan", timespan.String()), + log.Strings("warnings", warn)) } if results.Type() != model.ValMatrix { w.WriteHeader(http.StatusInternalServerError)