diff --git a/CHANGELOG.md b/CHANGELOG.md index d4f2c43a39a..e7beef12d3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ All notable changes to Sourcegraph are documented in this file. - Added support for `select:file.directory` in search queries, which returns unique directory paths for results that satisfy the query. [#22449](https://github.com/sourcegraph/sourcegraph/pull/22449) - An `sg_service` Postgres role has been introduced, as well as an `sg_repo_access_policy` policy on the `repo` table that restricts access to that role. The role that owns the `repo` table will continue to get unrestricted access. [#22303](https://github.com/sourcegraph/sourcegraph/pull/22303) +- Every service that connects to the database (i.e. Postgres) now has a "Database connections" monitoring section in its Grafana dashboard. [#22570](https://github.com/sourcegraph/sourcegraph/pull/22570) ### Changed diff --git a/cmd/frontend/internal/cli/serve_cmd.go b/cmd/frontend/internal/cli/serve_cmd.go index 1502c78b83f..50fca82f421 100644 --- a/cmd/frontend/internal/cli/serve_cmd.go +++ b/cmd/frontend/internal/cli/serve_cmd.go @@ -93,7 +93,8 @@ func defaultExternalURL(nginxAddr, httpAddr string) *url.URL { // InitDB initializes and returns the global database connection and sets the // version of the frontend in our versions table. func InitDB() (*sql.DB, error) { - if err := dbconn.SetupGlobalConnection(""); err != nil { + opts := dbconn.Opts{DSN: "", DBName: "frontend", AppName: "frontend"} + if err := dbconn.SetupGlobalConnection(opts); err != nil { return nil, fmt.Errorf("failed to connect to frontend database: %s", err) } diff --git a/cmd/gitserver/main.go b/cmd/gitserver/main.go index f6d4b4cd583..5bac4297698 100644 --- a/cmd/gitserver/main.go +++ b/cmd/gitserver/main.go @@ -236,5 +236,5 @@ func getDB() (dbutil.DB, error) { } }) - return dbconn.New(dsn, "gitserver") + return dbconn.New(dbconn.Opts{DSN: dsn, DBName: "frontend", AppName: "gitserver"}) } diff --git a/cmd/repo-updater/shared/main.go b/cmd/repo-updater/shared/main.go index 868b0fe4055..260639933dd 100644 --- a/cmd/repo-updater/shared/main.go +++ b/cmd/repo-updater/shared/main.go @@ -134,7 +134,7 @@ func Main(enterpriseInit EnterpriseInit) { log.Fatalf("error initialising encryption keyring: %v", err) } - db, err := dbconn.New(dsn, "repo-updater") + db, err := dbconn.New(dbconn.Opts{DSN: dsn, DBName: "frontend", AppName: "repo-updater"}) if err != nil { log.Fatalf("failed to initialize database store: %v", err) } diff --git a/cmd/worker/shared/db.go b/cmd/worker/shared/db.go index 95437c933e6..a7c49d7d619 100644 --- a/cmd/worker/shared/db.go +++ b/cmd/worker/shared/db.go @@ -23,7 +23,8 @@ var initDatabaseMemo = NewMemoizedConstructor(func() (interface{}, error) { return serviceConnections.PostgresDSN }) - if err := dbconn.SetupGlobalConnection(postgresDSN); err != nil { + opts := dbconn.Opts{DSN: postgresDSN, DBName: "frontend", AppName: "worker"} + if err := dbconn.SetupGlobalConnection(opts); err != nil { return nil, fmt.Errorf("failed to connect to frontend database: %s", err) } diff --git a/doc/admin/observability/dashboards.md b/doc/admin/observability/dashboards.md index b2a7b31d57d..07c7bb699d1 100644 --- a/doc/admin/observability/dashboards.md +++ b/doc/admin/observability/dashboards.md @@ -398,6 +398,80 @@ This panel indicates out-of-band down migration errors every 5m.
+### Frontend: Database connections + +#### frontend: max_open_conns + +This panel indicates maximum open. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### frontend: open_conns + +This panel indicates established. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### frontend: in_use + +This panel indicates used. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### frontend: idle + +This panel indicates idle. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### frontend: waited_for + +This panel indicates waited for. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### frontend: blocked_seconds + +This panel indicates blocked seconds. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### frontend: closed_max_idle + +This panel indicates closed by SetMaxIdleConns. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### frontend: closed_max_lifetime + +This panel indicates closed by SetConnMaxLifetime. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### frontend: closed_max_idle_time + +This panel indicates closed by SetConnMaxIdleTime. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ ### Frontend: Internal service requests #### frontend: internal_indexed_search_error_responses @@ -874,6 +948,80 @@ Repositories removed due to disk pressure
+### Git Server: Database connections + +#### gitserver: max_open_conns + +This panel indicates maximum open. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### gitserver: open_conns + +This panel indicates established. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### gitserver: in_use + +This panel indicates used. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### gitserver: idle + +This panel indicates idle. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### gitserver: waited_for + +This panel indicates waited for. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### gitserver: blocked_seconds + +This panel indicates blocked seconds. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### gitserver: closed_max_idle + +This panel indicates closed by SetMaxIdleConns. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### gitserver: closed_max_lifetime + +This panel indicates closed by SetConnMaxLifetime. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### gitserver: closed_max_idle_time + +This panel indicates closed by SetConnMaxIdleTime. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ ### Git Server: Container monitoring (not available on server) #### gitserver: container_cpu_usage @@ -1471,6 +1619,80 @@ This panel indicates gitserver client errors every 5m.
+### Precise Code Intel Worker: Database connections + +#### precise-code-intel-worker: max_open_conns + +This panel indicates maximum open. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### precise-code-intel-worker: open_conns + +This panel indicates established. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### precise-code-intel-worker: in_use + +This panel indicates used. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### precise-code-intel-worker: idle + +This panel indicates idle. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### precise-code-intel-worker: waited_for + +This panel indicates waited for. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### precise-code-intel-worker: blocked_seconds + +This panel indicates blocked seconds. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### precise-code-intel-worker: closed_max_idle + +This panel indicates closed by SetMaxIdleConns. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### precise-code-intel-worker: closed_max_lifetime + +This panel indicates closed by SetConnMaxLifetime. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### precise-code-intel-worker: closed_max_idle_time + +This panel indicates closed by SetConnMaxIdleTime. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ ### Precise Code Intel Worker: Internal service requests #### precise-code-intel-worker: frontend_internal_api_error_responses @@ -1935,6 +2157,80 @@ This panel indicates index enqueuer errors every 5m.
+### Worker: Database connections + +#### worker: max_open_conns + +This panel indicates maximum open. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### worker: open_conns + +This panel indicates established. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### worker: in_use + +This panel indicates used. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### worker: idle + +This panel indicates idle. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### worker: waited_for + +This panel indicates waited for. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### worker: blocked_seconds + +This panel indicates blocked seconds. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### worker: closed_max_idle + +This panel indicates closed by SetMaxIdleConns. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### worker: closed_max_lifetime + +This panel indicates closed by SetConnMaxLifetime. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### worker: closed_max_idle_time + +This panel indicates closed by SetConnMaxIdleTime. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ ### Worker: Internal service requests #### worker: frontend_internal_api_error_responses @@ -2437,6 +2733,80 @@ Indicates how long we`re waiting on the rate limit once it has been exceeded
+### Repo Updater: Database connections + +#### repo-updater: max_open_conns + +This panel indicates maximum open. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### repo-updater: open_conns + +This panel indicates established. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### repo-updater: in_use + +This panel indicates used. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### repo-updater: idle + +This panel indicates idle. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### repo-updater: waited_for + +This panel indicates waited for. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### repo-updater: blocked_seconds + +This panel indicates blocked seconds. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### repo-updater: closed_max_idle + +This panel indicates closed by SetMaxIdleConns. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### repo-updater: closed_max_lifetime + +This panel indicates closed by SetConnMaxLifetime. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### repo-updater: closed_max_idle_time + +This panel indicates closed by SetConnMaxIdleTime. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ ### Repo Updater: Container monitoring (not available on server) #### repo-updater: container_cpu_usage @@ -3544,6 +3914,80 @@ This panel indicates worker store errors every 5m.
+### Executor Queue: Database connections + +#### executor-queue: max_open_conns + +This panel indicates maximum open. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### executor-queue: open_conns + +This panel indicates established. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### executor-queue: in_use + +This panel indicates used. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### executor-queue: idle + +This panel indicates idle. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### executor-queue: waited_for + +This panel indicates waited for. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### executor-queue: blocked_seconds + +This panel indicates blocked seconds. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### executor-queue: closed_max_idle + +This panel indicates closed by SetMaxIdleConns. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### executor-queue: closed_max_lifetime + +This panel indicates closed by SetConnMaxLifetime. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ +#### executor-queue: closed_max_idle_time + +This panel indicates closed by SetConnMaxIdleTime. + +*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).* + +
+ ### Executor Queue: Internal service requests #### executor-queue: frontend_internal_api_error_responses diff --git a/enterprise/cmd/executor-queue/main.go b/enterprise/cmd/executor-queue/main.go index 794220281ce..33ff12827b9 100644 --- a/enterprise/cmd/executor-queue/main.go +++ b/enterprise/cmd/executor-queue/main.go @@ -101,7 +101,7 @@ func connectToDatabase() *sql.DB { } }) - db, err := dbconn.New(postgresDSN, "") + db, err := dbconn.New(dbconn.Opts{DSN: postgresDSN, DBName: "frontend", AppName: "executor-queue"}) if err != nil { log.Fatalf("failed to initialize store: %s", err) } diff --git a/enterprise/cmd/frontend/internal/codeintel/services.go b/enterprise/cmd/frontend/internal/codeintel/services.go index 0a55b6d3640..0655b214320 100644 --- a/enterprise/cmd/frontend/internal/codeintel/services.go +++ b/enterprise/cmd/frontend/internal/codeintel/services.go @@ -88,7 +88,7 @@ func mustInitializeCodeIntelDB() *sql.DB { } }) - db, err := dbconn.New(postgresDSN, "_codeintel") + db, err := dbconn.New(dbconn.Opts{DSN: postgresDSN, DBName: "codeintel", AppName: "frontend"}) if err != nil { log.Fatalf("Failed to connect to codeintel database: %s", err) } diff --git a/enterprise/cmd/precise-code-intel-worker/main.go b/enterprise/cmd/precise-code-intel-worker/main.go index bfc33bb6fca..f07098bdbd7 100644 --- a/enterprise/cmd/precise-code-intel-worker/main.go +++ b/enterprise/cmd/precise-code-intel-worker/main.go @@ -125,7 +125,8 @@ func mustInitializeDB() *sql.DB { } }) - if err := dbconn.SetupGlobalConnection(postgresDSN); err != nil { + opts := dbconn.Opts{DSN: postgresDSN, DBName: "frontend", AppName: "precise-code-intel-worker"} + if err := dbconn.SetupGlobalConnection(opts); err != nil { log.Fatalf("Failed to connect to frontend database: %s", err) } @@ -154,7 +155,7 @@ func mustInitializeCodeIntelDB() *sql.DB { } }) - db, err := dbconn.New(postgresDSN, "_codeintel") + db, err := dbconn.New(dbconn.Opts{DSN: postgresDSN, DBName: "codeintel", AppName: "precise-code-intel-worker"}) if err != nil { log.Fatalf("Failed to connect to codeintel database: %s", err) } diff --git a/enterprise/cmd/worker/internal/codeintel/codeinteldb.go b/enterprise/cmd/worker/internal/codeintel/codeinteldb.go index c851b1da9b4..9626439d4e1 100644 --- a/enterprise/cmd/worker/internal/codeintel/codeinteldb.go +++ b/enterprise/cmd/worker/internal/codeintel/codeinteldb.go @@ -20,7 +20,7 @@ var initCodeIntelDatabaseMemo = shared.NewMemoizedConstructor(func() (interface{ return serviceConnections.CodeIntelPostgresDSN }) - db, err := dbconn.New(postgresDSN, "_codeintel") + db, err := dbconn.New(dbconn.Opts{DSN: postgresDSN, DBName: "codeintel", AppName: "worker"}) if err != nil { return nil, fmt.Errorf("failed to connect to codeintel database: %s", err) } diff --git a/enterprise/internal/insights/background/background.go b/enterprise/internal/insights/background/background.go index 89f0f05db08..09153f30ff5 100644 --- a/enterprise/internal/insights/background/background.go +++ b/enterprise/internal/insights/background/background.go @@ -33,7 +33,7 @@ func StartBackgroundJobs(ctx context.Context, mainAppDB *sql.DB) { } // Create a connection to TimescaleDB, so we can record results. - timescale, err := insights.InitializeCodeInsightsDB() + timescale, err := insights.InitializeCodeInsightsDB("repo-updater") if err != nil { // e.g. migration failed, DB unavailable, etc. code insights is non-functional so we do not // want to continue. @@ -72,7 +72,7 @@ func StartBackgroundJobs(ctx context.Context, mainAppDB *sql.DB) { // TODO(slimsag): future: register another worker here for webhook querying. } - //todo(insights) add setting to disable this indexer + // todo(insights) add setting to disable this indexer routines = append(routines, compression.NewCommitIndexerWorker(ctx, mainAppDB, timescale)) // Register the background goroutine which discovers historical gaps in data and enqueues diff --git a/enterprise/internal/insights/dbtesting/insights.go b/enterprise/internal/insights/dbtesting/insights.go index c89e863f317..209ee512224 100644 --- a/enterprise/internal/insights/dbtesting/insights.go +++ b/enterprise/internal/insights/dbtesting/insights.go @@ -66,7 +66,7 @@ func TimescaleDB(t testing.TB) (db *sql.DB, cleanup func()) { } u.Path = dbname timescaleDSN = u.String() - db, err = dbconn.New(timescaleDSN, dbname) + db, err = dbconn.NewRaw(timescaleDSN) if err != nil { t.Fatal(err) } diff --git a/enterprise/internal/insights/insights.go b/enterprise/internal/insights/insights.go index 98f86d84a50..06b55ff5feb 100644 --- a/enterprise/internal/insights/insights.go +++ b/enterprise/internal/insights/insights.go @@ -44,7 +44,7 @@ func Init(ctx context.Context, postgres dbutil.DB, outOfBandMigrationRunner *oob } return nil } - timescale, err := InitializeCodeInsightsDB() + timescale, err := InitializeCodeInsightsDB("frontend") if err != nil { return err } @@ -56,7 +56,7 @@ func Init(ctx context.Context, postgres dbutil.DB, outOfBandMigrationRunner *oob // database migrations before returning. It is safe to call from multiple services/containers (in // which case, one's migration will win and the other caller will receive an error and should exit // and restart until the other finishes.) -func InitializeCodeInsightsDB() (*sql.DB, error) { +func InitializeCodeInsightsDB(app string) (*sql.DB, error) { timescaleDSN := conf.Get().ServiceConnections.CodeInsightsTimescaleDSN conf.Watch(func() { if newDSN := conf.Get().ServiceConnections.CodeInsightsTimescaleDSN; timescaleDSN != newDSN { @@ -64,7 +64,7 @@ func InitializeCodeInsightsDB() (*sql.DB, error) { } }) - db, err := dbconn.New(timescaleDSN, "") + db, err := dbconn.New(dbconn.Opts{DSN: timescaleDSN, DBName: "codeinsights", AppName: app}) if err != nil { return nil, fmt.Errorf("Failed to connect to codeinsights database: %s", err) } diff --git a/internal/database/basestore/handle.go b/internal/database/basestore/handle.go index 1422d02a9ce..27170c1883a 100644 --- a/internal/database/basestore/handle.go +++ b/internal/database/basestore/handle.go @@ -6,7 +6,6 @@ import ( "github.com/hashicorp/go-multierror" - "github.com/sourcegraph/sourcegraph/internal/database/dbconn" "github.com/sourcegraph/sourcegraph/internal/database/dbutil" ) @@ -19,16 +18,6 @@ type TransactableHandle struct { txOptions sql.TxOptions } -// NewHandle returns a new transactable database handle connected to the given dsn (data store name). -func NewHandle(postgresDSN, app string, txOptions sql.TxOptions) (*TransactableHandle, error) { - db, err := dbconn.New(postgresDSN, app) - if err != nil { - return nil, err - } - - return NewHandleWithDB(db, txOptions), nil -} - // NewHandleWithDB returns a new transactable database handle using the given database connection. func NewHandleWithDB(db dbutil.DB, txOptions sql.TxOptions) *TransactableHandle { return &TransactableHandle{db: db, txOptions: txOptions} diff --git a/internal/database/basestore/store.go b/internal/database/basestore/store.go index f0cc69f025a..321f83668e4 100644 --- a/internal/database/basestore/store.go +++ b/internal/database/basestore/store.go @@ -50,16 +50,6 @@ type ShareableStore interface { var _ ShareableStore = &Store{} -// New returns a new base store connected to the given dsn (data store name). -func New(postgresDSN, app string, txOptions sql.TxOptions) (*Store, error) { - handle, err := NewHandle(postgresDSN, app, txOptions) - if err != nil { - return nil, err - } - - return NewWithHandle(handle), nil -} - // NewHandleWithDB returns a new base store connected to the given connection. func NewWithDB(db dbutil.DB, txOptions sql.TxOptions) *Store { return NewWithHandle(NewHandleWithDB(db, txOptions)) diff --git a/internal/database/dbconn/dbconn.go b/internal/database/dbconn/dbconn.go index 81dde23ef03..3fdb40f4ba3 100644 --- a/internal/database/dbconn/dbconn.go +++ b/internal/database/dbconn/dbconn.go @@ -40,30 +40,61 @@ var ( _ = env.Ensure("TZ", "UTC", "timezone used by time instances") ) +// Opts contain arguments passed to database connection initialisation functions. +type Opts struct { + // DSN (data source name) is a URI like string containing all data needed to connect to the database. + DSN string + + // DBName is used only for Prometheus metrics instead of whatever actual database name is set in DSN. + // This is needed because in our dev environment we use a single physical database (and DSN) for all our different + // logical databases. + DBName string + + // AppName overrides the application_name in the DSN. This separate parameter is needed + // because we have multiple apps connecting to the same database, but have a single shared DSN configured. + AppName string +} + // SetupGlobalConnection connects to the given data source and stores the handle // globally. // +// dbname is used for its Prometheus label value instead of whatever actual value is set in dataSource. +// This is needed because in our dev environment we use a single physical database (and DSN) for all our different +// logical databases. app, however is set as the application_name in the connection string. This is needed +// because we have multiple apps connecting to the same database, but have a single shared DSN. +// // Note: github.com/jackc/pgx parses the environment as well. This function will // also use the value of PGDATASOURCE if supplied and dataSource is the empty // string. -func SetupGlobalConnection(dataSource string) (err error) { - Global, err = New(dataSource, "_app") +func SetupGlobalConnection(opts Opts) (err error) { + Global, err = New(opts) return err } // New connects to the given data source and returns the handle. // +// dbname is used for its Prometheus label value instead of whatever actual value is set in dataSource. +// This is needed because in our dev environment we use a single physical database (and DSN) for all our different +// logical databases. app, however is set as the application_name in the connection string. This is needed +// because we have multiple apps connecting to the same database, but have a single shared DSN. +// // Note: github.com/jackc/pgx parses the environment as well. This function will // also use the value of PGDATASOURCE if supplied and dataSource is the empty // string. -func New(dataSource, dbNameSuffix string) (*sql.DB, error) { - db, err := NewRaw(dataSource) +func New(opts Opts) (*sql.DB, error) { + cfg, err := buildConfig(opts.DSN, opts.AppName) if err != nil { return nil, err } - registerPrometheusCollector(db, dbNameSuffix) + db, err := newWithConfig(cfg) + if err != nil { + return nil, err + } + + prometheus.MustRegister(newMetricsCollector(db, opts.DBName, opts.AppName)) configureConnectionPool(db) + return db, nil } @@ -72,11 +103,14 @@ func New(dataSource, dbNameSuffix string) (*sql.DB, error) { // Prefer to call New as it also configures a connection pool and metrics. // Use this method only in internal utilities (such as schemadoc). func NewRaw(dataSource string) (*sql.DB, error) { - cfg, err := buildConfig(dataSource) + cfg, err := buildConfig(dataSource, "") if err != nil { return nil, err } + return newWithConfig(cfg) +} +func newWithConfig(cfg *pgx.ConnConfig) (*sql.DB, error) { db, err := openDBWithStartupWait(cfg) if err != nil { return nil, errors.Wrap(err, "DB not available") @@ -121,11 +155,15 @@ var startupTimeout = func() time.Duration { // buildConfig takes either a Postgres connection string or connection URI, // parses it, and returns a config with additional parameters. -func buildConfig(dataSource string) (*pgx.ConnConfig, error) { +func buildConfig(dataSource, app string) (*pgx.ConnConfig, error) { if dataSource == "" { dataSource = defaultDataSource } + if app == "" { + app = defaultApplicationName + } + cfg, err := pgx.ParseConfig(dataSource) if err != nil { return nil, err @@ -145,7 +183,7 @@ func buildConfig(dataSource string) (*pgx.ConnConfig, error) { // by checking if application_name is set and setting a default // value if not. if _, ok := cfg.RuntimeParams["application_name"]; !ok { - cfg.RuntimeParams["application_name"] = defaultApplicationName + cfg.RuntimeParams["application_name"] = app } // Force PostgreSQL session timezone to UTC. @@ -334,22 +372,6 @@ func (h *hook) OnError(ctx context.Context, err error, query string, args ...int return err } -func registerPrometheusCollector(db *sql.DB, dbNameSuffix string) { - c := prometheus.NewGaugeFunc( - prometheus.GaugeOpts{ - Namespace: "src", - Subsystem: "pgsql" + strings.ReplaceAll(dbNameSuffix, "-", "_"), - Name: "open_connections", - Help: "Number of open connections to pgsql DB, as reported by pgsql.DB.Stats()", - }, - func() float64 { - s := db.Stats() - return float64(s.OpenConnections) - }, - ) - prometheus.MustRegister(c) -} - // configureConnectionPool sets reasonable sizes on the built in DB queue. By // default the connection pool is unbounded, which leads to the error `pq: // sorry too many clients already`. diff --git a/internal/database/dbconn/dbconn_test.go b/internal/database/dbconn/dbconn_test.go index 0691f20a90b..36e7e71d933 100644 --- a/internal/database/dbconn/dbconn_test.go +++ b/internal/database/dbconn/dbconn_test.go @@ -45,7 +45,7 @@ func TestBuildConfig(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - cfg, err := buildConfig(tt.dataSource) + cfg, err := buildConfig(tt.dataSource, "") if tt.fails { if err == nil { t.Fatal("error expected") diff --git a/internal/database/dbconn/metrics.go b/internal/database/dbconn/metrics.go new file mode 100644 index 00000000000..738744b9277 --- /dev/null +++ b/internal/database/dbconn/metrics.go @@ -0,0 +1,159 @@ +package dbconn + +import ( + "database/sql" + + "github.com/prometheus/client_golang/prometheus" +) + +// metricsCollector implements the Prometheus collector interface. +// It reports all metrics returned by sql.DB.Stats(). +// Adapted from github.com/dlmiddlecote/sqlstats +type metricsCollector struct { + db *sql.DB + + // descriptions of exported metrics + maxOpenDesc *prometheus.Desc + openDesc *prometheus.Desc + inUseDesc *prometheus.Desc + idleDesc *prometheus.Desc + waitedForDesc *prometheus.Desc + blockedSecondsDesc *prometheus.Desc + closedMaxIdleDesc *prometheus.Desc + closedMaxLifetimeDesc *prometheus.Desc + closedMaxIdleTimeDesc *prometheus.Desc +} + +func newMetricsCollector(db *sql.DB, dbname, app string) *metricsCollector { + const ( + namespace = "src" + subsystem = "pgsql_conns" + ) + + labels := prometheus.Labels{ + "db_name": dbname, + "app_name": app, + } + + return &metricsCollector{ + db: db, + maxOpenDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "max_open"), + "Maximum number of open connections to the database.", + nil, + labels, + ), + openDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "open"), + "The number of established connections both in use and idle.", + nil, + labels, + ), + inUseDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "in_use"), + "The number of connections currently in use.", + nil, + labels, + ), + idleDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "idle"), + "The number of idle connections.", + nil, + labels, + ), + waitedForDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "waited_for"), + "The total number of connections waited for.", + nil, + labels, + ), + blockedSecondsDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "blocked_seconds"), + "The total time blocked waiting for a new connection.", + nil, + labels, + ), + closedMaxIdleDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "closed_max_idle"), + "The total number of connections closed due to SetMaxIdleConns.", + nil, + labels, + ), + closedMaxLifetimeDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "closed_max_lifetime"), + "The total number of connections closed due to SetConnMaxLifetime.", + nil, + labels, + ), + closedMaxIdleTimeDesc: prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "closed_max_idle_time"), + "The total number of connections closed due to SetConnMaxIdleTime.", + nil, + labels, + ), + } +} + +// Describe implements the prometheus.Collector interface. +func (c metricsCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- c.maxOpenDesc + ch <- c.openDesc + ch <- c.inUseDesc + ch <- c.idleDesc + ch <- c.waitedForDesc + ch <- c.blockedSecondsDesc + ch <- c.closedMaxIdleDesc + ch <- c.closedMaxLifetimeDesc + ch <- c.closedMaxIdleTimeDesc +} + +// Collect implements the prometheus.Collector interface. +func (c metricsCollector) Collect(ch chan<- prometheus.Metric) { + stats := c.db.Stats() + + ch <- prometheus.MustNewConstMetric( + c.maxOpenDesc, + prometheus.GaugeValue, + float64(stats.MaxOpenConnections), + ) + ch <- prometheus.MustNewConstMetric( + c.openDesc, + prometheus.GaugeValue, + float64(stats.OpenConnections), + ) + ch <- prometheus.MustNewConstMetric( + c.inUseDesc, + prometheus.GaugeValue, + float64(stats.InUse), + ) + ch <- prometheus.MustNewConstMetric( + c.idleDesc, + prometheus.GaugeValue, + float64(stats.Idle), + ) + ch <- prometheus.MustNewConstMetric( + c.waitedForDesc, + prometheus.CounterValue, + float64(stats.WaitCount), + ) + ch <- prometheus.MustNewConstMetric( + c.blockedSecondsDesc, + prometheus.CounterValue, + stats.WaitDuration.Seconds(), + ) + ch <- prometheus.MustNewConstMetric( + c.closedMaxIdleDesc, + prometheus.CounterValue, + float64(stats.MaxIdleClosed), + ) + ch <- prometheus.MustNewConstMetric( + c.closedMaxLifetimeDesc, + prometheus.CounterValue, + float64(stats.MaxLifetimeClosed), + ) + ch <- prometheus.MustNewConstMetric( + c.closedMaxIdleTimeDesc, + prometheus.CounterValue, + float64(stats.MaxIdleTimeClosed), + ) +} diff --git a/internal/database/dbtesting/dbtesting.go b/internal/database/dbtesting/dbtesting.go index c8cad5236c5..0f25c8bf8bf 100644 --- a/internal/database/dbtesting/dbtesting.go +++ b/internal/database/dbtesting/dbtesting.go @@ -22,8 +22,10 @@ import ( // MockHashPassword if non-nil is used instead of database.hashPassword. This is useful // when running tests since we can use a faster implementation. -var MockHashPassword func(password string) (sql.NullString, error) -var MockValidPassword func(hash, password string) bool +var ( + MockHashPassword func(password string) (sql.NullString, error) + MockValidPassword func(hash, password string) bool +) func useFastPasswordMocks() { // We can't care about security in tests, we care about speed. @@ -158,7 +160,8 @@ func initTest(nameSuffix string) error { } } - if err := dbconn.SetupGlobalConnection("dbname=" + dbname); err != nil { + opts := dbconn.Opts{DSN: "dbname=" + dbname, DBName: dbname, AppName: "tests"} + if err := dbconn.SetupGlobalConnection(opts); err != nil { return err } diff --git a/internal/insights/insights_test.go b/internal/insights/insights_test.go index 5350401a8d5..1e900897041 100644 --- a/internal/insights/insights_test.go +++ b/internal/insights/insights_test.go @@ -9,6 +9,10 @@ import ( "github.com/sourcegraph/sourcegraph/internal/database/dbtesting" ) +func init() { + dbtesting.DBNameSuffix = "insights" +} + func TestGetSearchInsights(t *testing.T) { ctx := context.Background() diff --git a/monitoring/definitions/executor_queue.go b/monitoring/definitions/executor_queue.go index bfa56f39ab2..cce98839f75 100644 --- a/monitoring/definitions/executor_queue.go +++ b/monitoring/definitions/executor_queue.go @@ -90,6 +90,11 @@ func ExecutorQueue() *monitoring.Container { }, }, }, + { + Title: shared.TitleDatabaseConnectionsMonitoring, + Hidden: true, + Rows: shared.DatabaseConnectionsMonitoring("executor-queue"), + }, { Title: "Internal service requests", Hidden: true, diff --git a/monitoring/definitions/frontend.go b/monitoring/definitions/frontend.go index 392a998296a..247abbd2a5c 100644 --- a/monitoring/definitions/frontend.go +++ b/monitoring/definitions/frontend.go @@ -490,6 +490,11 @@ func Frontend() *monitoring.Container { }, }, }, + { + Title: shared.TitleDatabaseConnectionsMonitoring, + Hidden: true, + Rows: shared.DatabaseConnectionsMonitoring("frontend"), + }, { Title: "Internal service requests", Hidden: true, diff --git a/monitoring/definitions/git_server.go b/monitoring/definitions/git_server.go index 14a29145594..2b35e1d4bee 100644 --- a/monitoring/definitions/git_server.go +++ b/monitoring/definitions/git_server.go @@ -210,7 +210,8 @@ func GitServer() *monitoring.Container { }), Owner: monitoring.ObservableOwnerCoreApplication, }, - }, { + }, + { { Name: "repository_clone_queue_size", Description: "repository clone queue size", @@ -236,7 +237,8 @@ func GitServer() *monitoring.Container { - **Check the gitserver logs for more information.** `, }, - }, { + }, + { { Name: "echo_command_duration_test", Description: "echo test command duration", @@ -297,6 +299,11 @@ func GitServer() *monitoring.Container { }, }, }, + { + Title: shared.TitleDatabaseConnectionsMonitoring, + Hidden: true, + Rows: shared.DatabaseConnectionsMonitoring("gitserver"), + }, { Title: shared.TitleContainerMonitoring, Hidden: true, diff --git a/monitoring/definitions/precise_code_intel_worker.go b/monitoring/definitions/precise_code_intel_worker.go index 8e560080816..b9e90cc2710 100644 --- a/monitoring/definitions/precise_code_intel_worker.go +++ b/monitoring/definitions/precise_code_intel_worker.go @@ -186,6 +186,11 @@ func PreciseCodeIntelWorker() *monitoring.Container { }, }, }, + { + Title: shared.TitleDatabaseConnectionsMonitoring, + Hidden: true, + Rows: shared.DatabaseConnectionsMonitoring("precise-code-intel-worker"), + }, { Title: "Internal service requests", Hidden: true, diff --git a/monitoring/definitions/repo_updater.go b/monitoring/definitions/repo_updater.go index b33db78f6da..cd1b9c6fb9d 100644 --- a/monitoring/definitions/repo_updater.go +++ b/monitoring/definitions/repo_updater.go @@ -426,6 +426,11 @@ func RepoUpdater() *monitoring.Container { }, }, }, + { + Title: shared.TitleDatabaseConnectionsMonitoring, + Hidden: true, + Rows: shared.DatabaseConnectionsMonitoring("repo-updater"), + }, { Title: shared.TitleContainerMonitoring, Hidden: true, diff --git a/monitoring/definitions/shared/dbconns.go b/monitoring/definitions/shared/dbconns.go new file mode 100644 index 00000000000..49a41575794 --- /dev/null +++ b/monitoring/definitions/shared/dbconns.go @@ -0,0 +1,104 @@ +package shared + +import ( + "fmt" + + "github.com/sourcegraph/sourcegraph/monitoring/monitoring" +) + +// Database connections monitoring overview. +const TitleDatabaseConnectionsMonitoring = "Database connections" + +func DatabaseConnectionsMonitoring(app string) []monitoring.Row { + return []monitoring.Row{ + { + { + Name: "max_open_conns", + Description: "maximum open", + Query: fmt.Sprintf(`sum by (app_name, db_name) (src_pgsql_conns_max_open{app_name=%q})`, app), + Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"), + NoAlert: true, + Owner: monitoring.ObservableOwnerCoreApplication, + Interpretation: "none", + }, + { + Name: "open_conns", + Description: "established", + Query: fmt.Sprintf(`sum by (app_name, db_name) (src_pgsql_conns_open{app_name=%q})`, app), + Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"), + NoAlert: true, + Owner: monitoring.ObservableOwnerCoreApplication, + Interpretation: "none", + }, + }, + { + { + Name: "in_use", + Description: "used", + Query: fmt.Sprintf(`sum by (app_name, db_name) (src_pgsql_conns_in_use{app_name=%q})`, app), + Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"), + NoAlert: true, + Owner: monitoring.ObservableOwnerCoreApplication, + Interpretation: "none", + }, + { + Name: "idle", + Description: "idle", + Query: fmt.Sprintf(`sum by (app_name, db_name) (src_pgsql_conns_idle{app_name=%q})`, app), + Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"), + NoAlert: true, + Owner: monitoring.ObservableOwnerCoreApplication, + Interpretation: "none", + }, + }, + { + { + Name: "waited_for", + Description: "waited for", + Query: fmt.Sprintf(`sum by (app_name, db_name) (increase(src_pgsql_conns_waited_for{app_name=%q}[1m]))`, app), + Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"), + NoAlert: true, + Owner: monitoring.ObservableOwnerCoreApplication, + Interpretation: "none", + }, + { + Name: "blocked_seconds", + Description: "blocked seconds", + Query: fmt.Sprintf(`sum by (app_name, db_name) (increase(src_pgsql_conns_blocked_seconds{app_name=%q}[1m]))`, app), + Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}").Unit(monitoring.Seconds), + NoAlert: true, + Owner: monitoring.ObservableOwnerCoreApplication, + Interpretation: "none", + }, + }, + { + { + Name: "closed_max_idle", + Description: "closed by SetMaxIdleConns", + Query: fmt.Sprintf(`sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle{app_name=%q}[1m]))`, app), + Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"), + NoAlert: true, + Owner: monitoring.ObservableOwnerCoreApplication, + Interpretation: "none", + }, + { + Name: "closed_max_lifetime", + Description: "closed by SetConnMaxLifetime", + Query: fmt.Sprintf(`sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_lifetime{app_name=%q}[1m]))`, app), + Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"), + NoAlert: true, + Owner: monitoring.ObservableOwnerCoreApplication, + Interpretation: "none", + }, + { + Name: "closed_max_idle_time", + Description: "closed by SetConnMaxIdleTime", + Query: fmt.Sprintf(`sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle_time{app_name=%q}[1m]))`, app), + Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"), + NoAlert: true, + Owner: monitoring.ObservableOwnerCoreApplication, + Interpretation: "none", + }, + }, + } +} diff --git a/monitoring/definitions/worker.go b/monitoring/definitions/worker.go index 815e01cb2d3..dd3aca703da 100644 --- a/monitoring/definitions/worker.go +++ b/monitoring/definitions/worker.go @@ -202,6 +202,11 @@ func Worker() *monitoring.Container { }, }, }, + { + Title: shared.TitleDatabaseConnectionsMonitoring, + Hidden: true, + Rows: shared.DatabaseConnectionsMonitoring("worker"), + }, { Title: "Internal service requests", Hidden: true, diff --git a/sg.config.yaml b/sg.config.yaml index f287c93be9c..398175bc550 100644 --- a/sg.config.yaml +++ b/sg.config.yaml @@ -26,7 +26,27 @@ env: JAEGER_SERVER_URL: http://localhost:16686 ZOEKT_HOST: localhost:3070 - SRC_PROF_HTTP: '' # This needs to be empty? + SRC_PROF_HTTP: '' + SRC_PROF_SERVICES: | + [ + { "Name": "frontend", "Host": "127.0.0.1:6063" }, + { "Name": "enterprise-frontend", "Host": "127.0.0.1:6063" }, + { "Name": "gitserver", "Host": "127.0.0.1:6068" }, + { "Name": "searcher", "Host": "127.0.0.1:6069" }, + { "Name": "symbols", "Host": "127.0.0.1:6071" }, + { "Name": "repo-updater", "Host": "127.0.0.1:6074" }, + { "Name": "enterprise-repo-updater", "Host": "127.0.0.1:6074" }, + { "Name": "query-runner", "Host": "127.0.0.1:6067" }, + { "Name": "precise-code-intel-worker", "Host": "127.0.0.1:6088" }, + { "Name": "worker", "Host": "127.0.0.1:6089" }, + { "Name": "enterprise-worker", "Host": "127.0.0.1:6089" }, + { "Name": "executor-queue", "Host": "127.0.0.1:6091" }, + { "Name": "executor", "Host": "127.0.0.1:6092" }, + { "Name": "zoekt-indexserver-0", "Host": "127.0.0.1:6072" }, + { "Name": "zoekt-indexserver-1", "Host": "127.0.0.1:6073" }, + { "Name": "zoekt-webserver-0", "Host": "127.0.0.1:3070", "DefaultPath": "/debug/requests/" }, + { "Name": "zoekt-webserver-1", "Host": "127.0.0.1:3071", "DefaultPath": "/debug/requests/" } + ] OVERRIDE_AUTH_SECRET: sSsNGlI8fBDftBz0LDQNXEnP6lrWdt9g0fK6hoFvGQ # Settings/config SITE_CONFIG_FILE: ./dev/site-config.json