monitoring: Database connections monitoring (#22570)

* Revert "Revert "dbconn: Better SQL pool stats (#22523)""

This reverts commit 18c8696687.

* Remove dependency on sqlstats

* codeintel: Avoid duplicate app name

* dbconn: Parameterise dbname for Prometheus label

* monitoring: Database connections monitoring

* Add CHANGELOG entry

* fixup! Add PR link to CHANGELOG

* fixup! Fix insights test

* fixup! Introduce dbconn.Opts for readability
This commit is contained in:
Tomás Senart 2021-07-05 12:24:20 +02:00 committed by GitHub
parent 5f554106d2
commit bce37c82aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 838 additions and 67 deletions

View File

@ -17,6 +17,7 @@ All notable changes to Sourcegraph are documented in this file.
- Added support for `select:file.directory` in search queries, which returns unique directory paths for results that satisfy the query. [#22449](https://github.com/sourcegraph/sourcegraph/pull/22449)
- An `sg_service` Postgres role has been introduced, as well as an `sg_repo_access_policy` policy on the `repo` table that restricts access to that role. The role that owns the `repo` table will continue to get unrestricted access. [#22303](https://github.com/sourcegraph/sourcegraph/pull/22303)
- Every service that connects to the database (i.e. Postgres) now has a "Database connections" monitoring section in its Grafana dashboard. [#22570](https://github.com/sourcegraph/sourcegraph/pull/22570)
### Changed

View File

@ -93,7 +93,8 @@ func defaultExternalURL(nginxAddr, httpAddr string) *url.URL {
// InitDB initializes and returns the global database connection and sets the
// version of the frontend in our versions table.
func InitDB() (*sql.DB, error) {
if err := dbconn.SetupGlobalConnection(""); err != nil {
opts := dbconn.Opts{DSN: "", DBName: "frontend", AppName: "frontend"}
if err := dbconn.SetupGlobalConnection(opts); err != nil {
return nil, fmt.Errorf("failed to connect to frontend database: %s", err)
}

View File

@ -236,5 +236,5 @@ func getDB() (dbutil.DB, error) {
}
})
return dbconn.New(dsn, "gitserver")
return dbconn.New(dbconn.Opts{DSN: dsn, DBName: "frontend", AppName: "gitserver"})
}

View File

@ -134,7 +134,7 @@ func Main(enterpriseInit EnterpriseInit) {
log.Fatalf("error initialising encryption keyring: %v", err)
}
db, err := dbconn.New(dsn, "repo-updater")
db, err := dbconn.New(dbconn.Opts{DSN: dsn, DBName: "frontend", AppName: "repo-updater"})
if err != nil {
log.Fatalf("failed to initialize database store: %v", err)
}

View File

@ -23,7 +23,8 @@ var initDatabaseMemo = NewMemoizedConstructor(func() (interface{}, error) {
return serviceConnections.PostgresDSN
})
if err := dbconn.SetupGlobalConnection(postgresDSN); err != nil {
opts := dbconn.Opts{DSN: postgresDSN, DBName: "frontend", AppName: "worker"}
if err := dbconn.SetupGlobalConnection(opts); err != nil {
return nil, fmt.Errorf("failed to connect to frontend database: %s", err)
}

View File

@ -398,6 +398,80 @@ This panel indicates out-of-band down migration errors every 5m.
<br />
### Frontend: Database connections
#### frontend: max_open_conns
This panel indicates maximum open.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### frontend: open_conns
This panel indicates established.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### frontend: in_use
This panel indicates used.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### frontend: idle
This panel indicates idle.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### frontend: waited_for
This panel indicates waited for.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### frontend: blocked_seconds
This panel indicates blocked seconds.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### frontend: closed_max_idle
This panel indicates closed by SetMaxIdleConns.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### frontend: closed_max_lifetime
This panel indicates closed by SetConnMaxLifetime.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### frontend: closed_max_idle_time
This panel indicates closed by SetConnMaxIdleTime.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
### Frontend: Internal service requests
#### frontend: internal_indexed_search_error_responses
@ -874,6 +948,80 @@ Repositories removed due to disk pressure
<br />
### Git Server: Database connections
#### gitserver: max_open_conns
This panel indicates maximum open.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### gitserver: open_conns
This panel indicates established.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### gitserver: in_use
This panel indicates used.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### gitserver: idle
This panel indicates idle.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### gitserver: waited_for
This panel indicates waited for.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### gitserver: blocked_seconds
This panel indicates blocked seconds.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### gitserver: closed_max_idle
This panel indicates closed by SetMaxIdleConns.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### gitserver: closed_max_lifetime
This panel indicates closed by SetConnMaxLifetime.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### gitserver: closed_max_idle_time
This panel indicates closed by SetConnMaxIdleTime.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
### Git Server: Container monitoring (not available on server)
#### gitserver: container_cpu_usage
@ -1471,6 +1619,80 @@ This panel indicates gitserver client errors every 5m.
<br />
### Precise Code Intel Worker: Database connections
#### precise-code-intel-worker: max_open_conns
This panel indicates maximum open.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### precise-code-intel-worker: open_conns
This panel indicates established.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### precise-code-intel-worker: in_use
This panel indicates used.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### precise-code-intel-worker: idle
This panel indicates idle.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### precise-code-intel-worker: waited_for
This panel indicates waited for.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### precise-code-intel-worker: blocked_seconds
This panel indicates blocked seconds.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### precise-code-intel-worker: closed_max_idle
This panel indicates closed by SetMaxIdleConns.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### precise-code-intel-worker: closed_max_lifetime
This panel indicates closed by SetConnMaxLifetime.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### precise-code-intel-worker: closed_max_idle_time
This panel indicates closed by SetConnMaxIdleTime.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
### Precise Code Intel Worker: Internal service requests
#### precise-code-intel-worker: frontend_internal_api_error_responses
@ -1935,6 +2157,80 @@ This panel indicates index enqueuer errors every 5m.
<br />
### Worker: Database connections
#### worker: max_open_conns
This panel indicates maximum open.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### worker: open_conns
This panel indicates established.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### worker: in_use
This panel indicates used.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### worker: idle
This panel indicates idle.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### worker: waited_for
This panel indicates waited for.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### worker: blocked_seconds
This panel indicates blocked seconds.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### worker: closed_max_idle
This panel indicates closed by SetMaxIdleConns.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### worker: closed_max_lifetime
This panel indicates closed by SetConnMaxLifetime.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### worker: closed_max_idle_time
This panel indicates closed by SetConnMaxIdleTime.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
### Worker: Internal service requests
#### worker: frontend_internal_api_error_responses
@ -2437,6 +2733,80 @@ Indicates how long we`re waiting on the rate limit once it has been exceeded
<br />
### Repo Updater: Database connections
#### repo-updater: max_open_conns
This panel indicates maximum open.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### repo-updater: open_conns
This panel indicates established.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### repo-updater: in_use
This panel indicates used.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### repo-updater: idle
This panel indicates idle.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### repo-updater: waited_for
This panel indicates waited for.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### repo-updater: blocked_seconds
This panel indicates blocked seconds.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### repo-updater: closed_max_idle
This panel indicates closed by SetMaxIdleConns.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### repo-updater: closed_max_lifetime
This panel indicates closed by SetConnMaxLifetime.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### repo-updater: closed_max_idle_time
This panel indicates closed by SetConnMaxIdleTime.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
### Repo Updater: Container monitoring (not available on server)
#### repo-updater: container_cpu_usage
@ -3544,6 +3914,80 @@ This panel indicates worker store errors every 5m.
<br />
### Executor Queue: Database connections
#### executor-queue: max_open_conns
This panel indicates maximum open.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### executor-queue: open_conns
This panel indicates established.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### executor-queue: in_use
This panel indicates used.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### executor-queue: idle
This panel indicates idle.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### executor-queue: waited_for
This panel indicates waited for.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### executor-queue: blocked_seconds
This panel indicates blocked seconds.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### executor-queue: closed_max_idle
This panel indicates closed by SetMaxIdleConns.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### executor-queue: closed_max_lifetime
This panel indicates closed by SetConnMaxLifetime.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
#### executor-queue: closed_max_idle_time
This panel indicates closed by SetConnMaxIdleTime.
<sub>*Managed by the [Sourcegraph Core application team](https://about.sourcegraph.com/handbook/engineering/core-application).*</sub>
<br />
### Executor Queue: Internal service requests
#### executor-queue: frontend_internal_api_error_responses

View File

@ -101,7 +101,7 @@ func connectToDatabase() *sql.DB {
}
})
db, err := dbconn.New(postgresDSN, "")
db, err := dbconn.New(dbconn.Opts{DSN: postgresDSN, DBName: "frontend", AppName: "executor-queue"})
if err != nil {
log.Fatalf("failed to initialize store: %s", err)
}

View File

@ -88,7 +88,7 @@ func mustInitializeCodeIntelDB() *sql.DB {
}
})
db, err := dbconn.New(postgresDSN, "_codeintel")
db, err := dbconn.New(dbconn.Opts{DSN: postgresDSN, DBName: "codeintel", AppName: "frontend"})
if err != nil {
log.Fatalf("Failed to connect to codeintel database: %s", err)
}

View File

@ -125,7 +125,8 @@ func mustInitializeDB() *sql.DB {
}
})
if err := dbconn.SetupGlobalConnection(postgresDSN); err != nil {
opts := dbconn.Opts{DSN: postgresDSN, DBName: "frontend", AppName: "precise-code-intel-worker"}
if err := dbconn.SetupGlobalConnection(opts); err != nil {
log.Fatalf("Failed to connect to frontend database: %s", err)
}
@ -154,7 +155,7 @@ func mustInitializeCodeIntelDB() *sql.DB {
}
})
db, err := dbconn.New(postgresDSN, "_codeintel")
db, err := dbconn.New(dbconn.Opts{DSN: postgresDSN, DBName: "codeintel", AppName: "precise-code-intel-worker"})
if err != nil {
log.Fatalf("Failed to connect to codeintel database: %s", err)
}

View File

@ -20,7 +20,7 @@ var initCodeIntelDatabaseMemo = shared.NewMemoizedConstructor(func() (interface{
return serviceConnections.CodeIntelPostgresDSN
})
db, err := dbconn.New(postgresDSN, "_codeintel")
db, err := dbconn.New(dbconn.Opts{DSN: postgresDSN, DBName: "codeintel", AppName: "worker"})
if err != nil {
return nil, fmt.Errorf("failed to connect to codeintel database: %s", err)
}

View File

@ -33,7 +33,7 @@ func StartBackgroundJobs(ctx context.Context, mainAppDB *sql.DB) {
}
// Create a connection to TimescaleDB, so we can record results.
timescale, err := insights.InitializeCodeInsightsDB()
timescale, err := insights.InitializeCodeInsightsDB("repo-updater")
if err != nil {
// e.g. migration failed, DB unavailable, etc. code insights is non-functional so we do not
// want to continue.
@ -72,7 +72,7 @@ func StartBackgroundJobs(ctx context.Context, mainAppDB *sql.DB) {
// TODO(slimsag): future: register another worker here for webhook querying.
}
//todo(insights) add setting to disable this indexer
// todo(insights) add setting to disable this indexer
routines = append(routines, compression.NewCommitIndexerWorker(ctx, mainAppDB, timescale))
// Register the background goroutine which discovers historical gaps in data and enqueues

View File

@ -66,7 +66,7 @@ func TimescaleDB(t testing.TB) (db *sql.DB, cleanup func()) {
}
u.Path = dbname
timescaleDSN = u.String()
db, err = dbconn.New(timescaleDSN, dbname)
db, err = dbconn.NewRaw(timescaleDSN)
if err != nil {
t.Fatal(err)
}

View File

@ -44,7 +44,7 @@ func Init(ctx context.Context, postgres dbutil.DB, outOfBandMigrationRunner *oob
}
return nil
}
timescale, err := InitializeCodeInsightsDB()
timescale, err := InitializeCodeInsightsDB("frontend")
if err != nil {
return err
}
@ -56,7 +56,7 @@ func Init(ctx context.Context, postgres dbutil.DB, outOfBandMigrationRunner *oob
// database migrations before returning. It is safe to call from multiple services/containers (in
// which case, one's migration will win and the other caller will receive an error and should exit
// and restart until the other finishes.)
func InitializeCodeInsightsDB() (*sql.DB, error) {
func InitializeCodeInsightsDB(app string) (*sql.DB, error) {
timescaleDSN := conf.Get().ServiceConnections.CodeInsightsTimescaleDSN
conf.Watch(func() {
if newDSN := conf.Get().ServiceConnections.CodeInsightsTimescaleDSN; timescaleDSN != newDSN {
@ -64,7 +64,7 @@ func InitializeCodeInsightsDB() (*sql.DB, error) {
}
})
db, err := dbconn.New(timescaleDSN, "")
db, err := dbconn.New(dbconn.Opts{DSN: timescaleDSN, DBName: "codeinsights", AppName: app})
if err != nil {
return nil, fmt.Errorf("Failed to connect to codeinsights database: %s", err)
}

View File

@ -6,7 +6,6 @@ import (
"github.com/hashicorp/go-multierror"
"github.com/sourcegraph/sourcegraph/internal/database/dbconn"
"github.com/sourcegraph/sourcegraph/internal/database/dbutil"
)
@ -19,16 +18,6 @@ type TransactableHandle struct {
txOptions sql.TxOptions
}
// NewHandle returns a new transactable database handle connected to the given dsn (data store name).
func NewHandle(postgresDSN, app string, txOptions sql.TxOptions) (*TransactableHandle, error) {
db, err := dbconn.New(postgresDSN, app)
if err != nil {
return nil, err
}
return NewHandleWithDB(db, txOptions), nil
}
// NewHandleWithDB returns a new transactable database handle using the given database connection.
func NewHandleWithDB(db dbutil.DB, txOptions sql.TxOptions) *TransactableHandle {
return &TransactableHandle{db: db, txOptions: txOptions}

View File

@ -50,16 +50,6 @@ type ShareableStore interface {
var _ ShareableStore = &Store{}
// New returns a new base store connected to the given dsn (data store name).
func New(postgresDSN, app string, txOptions sql.TxOptions) (*Store, error) {
handle, err := NewHandle(postgresDSN, app, txOptions)
if err != nil {
return nil, err
}
return NewWithHandle(handle), nil
}
// NewHandleWithDB returns a new base store connected to the given connection.
func NewWithDB(db dbutil.DB, txOptions sql.TxOptions) *Store {
return NewWithHandle(NewHandleWithDB(db, txOptions))

View File

@ -40,30 +40,61 @@ var (
_ = env.Ensure("TZ", "UTC", "timezone used by time instances")
)
// Opts contain arguments passed to database connection initialisation functions.
type Opts struct {
// DSN (data source name) is a URI like string containing all data needed to connect to the database.
DSN string
// DBName is used only for Prometheus metrics instead of whatever actual database name is set in DSN.
// This is needed because in our dev environment we use a single physical database (and DSN) for all our different
// logical databases.
DBName string
// AppName overrides the application_name in the DSN. This separate parameter is needed
// because we have multiple apps connecting to the same database, but have a single shared DSN configured.
AppName string
}
// SetupGlobalConnection connects to the given data source and stores the handle
// globally.
//
// dbname is used for its Prometheus label value instead of whatever actual value is set in dataSource.
// This is needed because in our dev environment we use a single physical database (and DSN) for all our different
// logical databases. app, however is set as the application_name in the connection string. This is needed
// because we have multiple apps connecting to the same database, but have a single shared DSN.
//
// Note: github.com/jackc/pgx parses the environment as well. This function will
// also use the value of PGDATASOURCE if supplied and dataSource is the empty
// string.
func SetupGlobalConnection(dataSource string) (err error) {
Global, err = New(dataSource, "_app")
func SetupGlobalConnection(opts Opts) (err error) {
Global, err = New(opts)
return err
}
// New connects to the given data source and returns the handle.
//
// dbname is used for its Prometheus label value instead of whatever actual value is set in dataSource.
// This is needed because in our dev environment we use a single physical database (and DSN) for all our different
// logical databases. app, however is set as the application_name in the connection string. This is needed
// because we have multiple apps connecting to the same database, but have a single shared DSN.
//
// Note: github.com/jackc/pgx parses the environment as well. This function will
// also use the value of PGDATASOURCE if supplied and dataSource is the empty
// string.
func New(dataSource, dbNameSuffix string) (*sql.DB, error) {
db, err := NewRaw(dataSource)
func New(opts Opts) (*sql.DB, error) {
cfg, err := buildConfig(opts.DSN, opts.AppName)
if err != nil {
return nil, err
}
registerPrometheusCollector(db, dbNameSuffix)
db, err := newWithConfig(cfg)
if err != nil {
return nil, err
}
prometheus.MustRegister(newMetricsCollector(db, opts.DBName, opts.AppName))
configureConnectionPool(db)
return db, nil
}
@ -72,11 +103,14 @@ func New(dataSource, dbNameSuffix string) (*sql.DB, error) {
// Prefer to call New as it also configures a connection pool and metrics.
// Use this method only in internal utilities (such as schemadoc).
func NewRaw(dataSource string) (*sql.DB, error) {
cfg, err := buildConfig(dataSource)
cfg, err := buildConfig(dataSource, "")
if err != nil {
return nil, err
}
return newWithConfig(cfg)
}
func newWithConfig(cfg *pgx.ConnConfig) (*sql.DB, error) {
db, err := openDBWithStartupWait(cfg)
if err != nil {
return nil, errors.Wrap(err, "DB not available")
@ -121,11 +155,15 @@ var startupTimeout = func() time.Duration {
// buildConfig takes either a Postgres connection string or connection URI,
// parses it, and returns a config with additional parameters.
func buildConfig(dataSource string) (*pgx.ConnConfig, error) {
func buildConfig(dataSource, app string) (*pgx.ConnConfig, error) {
if dataSource == "" {
dataSource = defaultDataSource
}
if app == "" {
app = defaultApplicationName
}
cfg, err := pgx.ParseConfig(dataSource)
if err != nil {
return nil, err
@ -145,7 +183,7 @@ func buildConfig(dataSource string) (*pgx.ConnConfig, error) {
// by checking if application_name is set and setting a default
// value if not.
if _, ok := cfg.RuntimeParams["application_name"]; !ok {
cfg.RuntimeParams["application_name"] = defaultApplicationName
cfg.RuntimeParams["application_name"] = app
}
// Force PostgreSQL session timezone to UTC.
@ -334,22 +372,6 @@ func (h *hook) OnError(ctx context.Context, err error, query string, args ...int
return err
}
func registerPrometheusCollector(db *sql.DB, dbNameSuffix string) {
c := prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Namespace: "src",
Subsystem: "pgsql" + strings.ReplaceAll(dbNameSuffix, "-", "_"),
Name: "open_connections",
Help: "Number of open connections to pgsql DB, as reported by pgsql.DB.Stats()",
},
func() float64 {
s := db.Stats()
return float64(s.OpenConnections)
},
)
prometheus.MustRegister(c)
}
// configureConnectionPool sets reasonable sizes on the built in DB queue. By
// default the connection pool is unbounded, which leads to the error `pq:
// sorry too many clients already`.

View File

@ -45,7 +45,7 @@ func TestBuildConfig(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg, err := buildConfig(tt.dataSource)
cfg, err := buildConfig(tt.dataSource, "")
if tt.fails {
if err == nil {
t.Fatal("error expected")

View File

@ -0,0 +1,159 @@
package dbconn
import (
"database/sql"
"github.com/prometheus/client_golang/prometheus"
)
// metricsCollector implements the Prometheus collector interface.
// It reports all metrics returned by sql.DB.Stats().
// Adapted from github.com/dlmiddlecote/sqlstats
type metricsCollector struct {
db *sql.DB
// descriptions of exported metrics
maxOpenDesc *prometheus.Desc
openDesc *prometheus.Desc
inUseDesc *prometheus.Desc
idleDesc *prometheus.Desc
waitedForDesc *prometheus.Desc
blockedSecondsDesc *prometheus.Desc
closedMaxIdleDesc *prometheus.Desc
closedMaxLifetimeDesc *prometheus.Desc
closedMaxIdleTimeDesc *prometheus.Desc
}
func newMetricsCollector(db *sql.DB, dbname, app string) *metricsCollector {
const (
namespace = "src"
subsystem = "pgsql_conns"
)
labels := prometheus.Labels{
"db_name": dbname,
"app_name": app,
}
return &metricsCollector{
db: db,
maxOpenDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "max_open"),
"Maximum number of open connections to the database.",
nil,
labels,
),
openDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "open"),
"The number of established connections both in use and idle.",
nil,
labels,
),
inUseDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "in_use"),
"The number of connections currently in use.",
nil,
labels,
),
idleDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "idle"),
"The number of idle connections.",
nil,
labels,
),
waitedForDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "waited_for"),
"The total number of connections waited for.",
nil,
labels,
),
blockedSecondsDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "blocked_seconds"),
"The total time blocked waiting for a new connection.",
nil,
labels,
),
closedMaxIdleDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "closed_max_idle"),
"The total number of connections closed due to SetMaxIdleConns.",
nil,
labels,
),
closedMaxLifetimeDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "closed_max_lifetime"),
"The total number of connections closed due to SetConnMaxLifetime.",
nil,
labels,
),
closedMaxIdleTimeDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "closed_max_idle_time"),
"The total number of connections closed due to SetConnMaxIdleTime.",
nil,
labels,
),
}
}
// Describe implements the prometheus.Collector interface.
func (c metricsCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.maxOpenDesc
ch <- c.openDesc
ch <- c.inUseDesc
ch <- c.idleDesc
ch <- c.waitedForDesc
ch <- c.blockedSecondsDesc
ch <- c.closedMaxIdleDesc
ch <- c.closedMaxLifetimeDesc
ch <- c.closedMaxIdleTimeDesc
}
// Collect implements the prometheus.Collector interface.
func (c metricsCollector) Collect(ch chan<- prometheus.Metric) {
stats := c.db.Stats()
ch <- prometheus.MustNewConstMetric(
c.maxOpenDesc,
prometheus.GaugeValue,
float64(stats.MaxOpenConnections),
)
ch <- prometheus.MustNewConstMetric(
c.openDesc,
prometheus.GaugeValue,
float64(stats.OpenConnections),
)
ch <- prometheus.MustNewConstMetric(
c.inUseDesc,
prometheus.GaugeValue,
float64(stats.InUse),
)
ch <- prometheus.MustNewConstMetric(
c.idleDesc,
prometheus.GaugeValue,
float64(stats.Idle),
)
ch <- prometheus.MustNewConstMetric(
c.waitedForDesc,
prometheus.CounterValue,
float64(stats.WaitCount),
)
ch <- prometheus.MustNewConstMetric(
c.blockedSecondsDesc,
prometheus.CounterValue,
stats.WaitDuration.Seconds(),
)
ch <- prometheus.MustNewConstMetric(
c.closedMaxIdleDesc,
prometheus.CounterValue,
float64(stats.MaxIdleClosed),
)
ch <- prometheus.MustNewConstMetric(
c.closedMaxLifetimeDesc,
prometheus.CounterValue,
float64(stats.MaxLifetimeClosed),
)
ch <- prometheus.MustNewConstMetric(
c.closedMaxIdleTimeDesc,
prometheus.CounterValue,
float64(stats.MaxIdleTimeClosed),
)
}

View File

@ -22,8 +22,10 @@ import (
// MockHashPassword if non-nil is used instead of database.hashPassword. This is useful
// when running tests since we can use a faster implementation.
var MockHashPassword func(password string) (sql.NullString, error)
var MockValidPassword func(hash, password string) bool
var (
MockHashPassword func(password string) (sql.NullString, error)
MockValidPassword func(hash, password string) bool
)
func useFastPasswordMocks() {
// We can't care about security in tests, we care about speed.
@ -158,7 +160,8 @@ func initTest(nameSuffix string) error {
}
}
if err := dbconn.SetupGlobalConnection("dbname=" + dbname); err != nil {
opts := dbconn.Opts{DSN: "dbname=" + dbname, DBName: dbname, AppName: "tests"}
if err := dbconn.SetupGlobalConnection(opts); err != nil {
return err
}

View File

@ -9,6 +9,10 @@ import (
"github.com/sourcegraph/sourcegraph/internal/database/dbtesting"
)
func init() {
dbtesting.DBNameSuffix = "insights"
}
func TestGetSearchInsights(t *testing.T) {
ctx := context.Background()

View File

@ -90,6 +90,11 @@ func ExecutorQueue() *monitoring.Container {
},
},
},
{
Title: shared.TitleDatabaseConnectionsMonitoring,
Hidden: true,
Rows: shared.DatabaseConnectionsMonitoring("executor-queue"),
},
{
Title: "Internal service requests",
Hidden: true,

View File

@ -490,6 +490,11 @@ func Frontend() *monitoring.Container {
},
},
},
{
Title: shared.TitleDatabaseConnectionsMonitoring,
Hidden: true,
Rows: shared.DatabaseConnectionsMonitoring("frontend"),
},
{
Title: "Internal service requests",
Hidden: true,

View File

@ -210,7 +210,8 @@ func GitServer() *monitoring.Container {
}),
Owner: monitoring.ObservableOwnerCoreApplication,
},
}, {
},
{
{
Name: "repository_clone_queue_size",
Description: "repository clone queue size",
@ -236,7 +237,8 @@ func GitServer() *monitoring.Container {
- **Check the gitserver logs for more information.**
`,
},
}, {
},
{
{
Name: "echo_command_duration_test",
Description: "echo test command duration",
@ -297,6 +299,11 @@ func GitServer() *monitoring.Container {
},
},
},
{
Title: shared.TitleDatabaseConnectionsMonitoring,
Hidden: true,
Rows: shared.DatabaseConnectionsMonitoring("gitserver"),
},
{
Title: shared.TitleContainerMonitoring,
Hidden: true,

View File

@ -186,6 +186,11 @@ func PreciseCodeIntelWorker() *monitoring.Container {
},
},
},
{
Title: shared.TitleDatabaseConnectionsMonitoring,
Hidden: true,
Rows: shared.DatabaseConnectionsMonitoring("precise-code-intel-worker"),
},
{
Title: "Internal service requests",
Hidden: true,

View File

@ -426,6 +426,11 @@ func RepoUpdater() *monitoring.Container {
},
},
},
{
Title: shared.TitleDatabaseConnectionsMonitoring,
Hidden: true,
Rows: shared.DatabaseConnectionsMonitoring("repo-updater"),
},
{
Title: shared.TitleContainerMonitoring,
Hidden: true,

View File

@ -0,0 +1,104 @@
package shared
import (
"fmt"
"github.com/sourcegraph/sourcegraph/monitoring/monitoring"
)
// Database connections monitoring overview.
const TitleDatabaseConnectionsMonitoring = "Database connections"
func DatabaseConnectionsMonitoring(app string) []monitoring.Row {
return []monitoring.Row{
{
{
Name: "max_open_conns",
Description: "maximum open",
Query: fmt.Sprintf(`sum by (app_name, db_name) (src_pgsql_conns_max_open{app_name=%q})`, app),
Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"),
NoAlert: true,
Owner: monitoring.ObservableOwnerCoreApplication,
Interpretation: "none",
},
{
Name: "open_conns",
Description: "established",
Query: fmt.Sprintf(`sum by (app_name, db_name) (src_pgsql_conns_open{app_name=%q})`, app),
Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"),
NoAlert: true,
Owner: monitoring.ObservableOwnerCoreApplication,
Interpretation: "none",
},
},
{
{
Name: "in_use",
Description: "used",
Query: fmt.Sprintf(`sum by (app_name, db_name) (src_pgsql_conns_in_use{app_name=%q})`, app),
Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"),
NoAlert: true,
Owner: monitoring.ObservableOwnerCoreApplication,
Interpretation: "none",
},
{
Name: "idle",
Description: "idle",
Query: fmt.Sprintf(`sum by (app_name, db_name) (src_pgsql_conns_idle{app_name=%q})`, app),
Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"),
NoAlert: true,
Owner: monitoring.ObservableOwnerCoreApplication,
Interpretation: "none",
},
},
{
{
Name: "waited_for",
Description: "waited for",
Query: fmt.Sprintf(`sum by (app_name, db_name) (increase(src_pgsql_conns_waited_for{app_name=%q}[1m]))`, app),
Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"),
NoAlert: true,
Owner: monitoring.ObservableOwnerCoreApplication,
Interpretation: "none",
},
{
Name: "blocked_seconds",
Description: "blocked seconds",
Query: fmt.Sprintf(`sum by (app_name, db_name) (increase(src_pgsql_conns_blocked_seconds{app_name=%q}[1m]))`, app),
Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}").Unit(monitoring.Seconds),
NoAlert: true,
Owner: monitoring.ObservableOwnerCoreApplication,
Interpretation: "none",
},
},
{
{
Name: "closed_max_idle",
Description: "closed by SetMaxIdleConns",
Query: fmt.Sprintf(`sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle{app_name=%q}[1m]))`, app),
Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"),
NoAlert: true,
Owner: monitoring.ObservableOwnerCoreApplication,
Interpretation: "none",
},
{
Name: "closed_max_lifetime",
Description: "closed by SetConnMaxLifetime",
Query: fmt.Sprintf(`sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_lifetime{app_name=%q}[1m]))`, app),
Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"),
NoAlert: true,
Owner: monitoring.ObservableOwnerCoreApplication,
Interpretation: "none",
},
{
Name: "closed_max_idle_time",
Description: "closed by SetConnMaxIdleTime",
Query: fmt.Sprintf(`sum by (app_name, db_name) (increase(src_pgsql_conns_closed_max_idle_time{app_name=%q}[1m]))`, app),
Panel: monitoring.Panel().LegendFormat("dbname={{db_name}}"),
NoAlert: true,
Owner: monitoring.ObservableOwnerCoreApplication,
Interpretation: "none",
},
},
}
}

View File

@ -202,6 +202,11 @@ func Worker() *monitoring.Container {
},
},
},
{
Title: shared.TitleDatabaseConnectionsMonitoring,
Hidden: true,
Rows: shared.DatabaseConnectionsMonitoring("worker"),
},
{
Title: "Internal service requests",
Hidden: true,

View File

@ -26,7 +26,27 @@ env:
JAEGER_SERVER_URL: http://localhost:16686
ZOEKT_HOST: localhost:3070
SRC_PROF_HTTP: '' # This needs to be empty?
SRC_PROF_HTTP: ''
SRC_PROF_SERVICES: |
[
{ "Name": "frontend", "Host": "127.0.0.1:6063" },
{ "Name": "enterprise-frontend", "Host": "127.0.0.1:6063" },
{ "Name": "gitserver", "Host": "127.0.0.1:6068" },
{ "Name": "searcher", "Host": "127.0.0.1:6069" },
{ "Name": "symbols", "Host": "127.0.0.1:6071" },
{ "Name": "repo-updater", "Host": "127.0.0.1:6074" },
{ "Name": "enterprise-repo-updater", "Host": "127.0.0.1:6074" },
{ "Name": "query-runner", "Host": "127.0.0.1:6067" },
{ "Name": "precise-code-intel-worker", "Host": "127.0.0.1:6088" },
{ "Name": "worker", "Host": "127.0.0.1:6089" },
{ "Name": "enterprise-worker", "Host": "127.0.0.1:6089" },
{ "Name": "executor-queue", "Host": "127.0.0.1:6091" },
{ "Name": "executor", "Host": "127.0.0.1:6092" },
{ "Name": "zoekt-indexserver-0", "Host": "127.0.0.1:6072" },
{ "Name": "zoekt-indexserver-1", "Host": "127.0.0.1:6073" },
{ "Name": "zoekt-webserver-0", "Host": "127.0.0.1:3070", "DefaultPath": "/debug/requests/" },
{ "Name": "zoekt-webserver-1", "Host": "127.0.0.1:3071", "DefaultPath": "/debug/requests/" }
]
OVERRIDE_AUTH_SECRET: sSsNGlI8fBDftBz0LDQNXEnP6lrWdt9g0fK6hoFvGQ
# Settings/config
SITE_CONFIG_FILE: ./dev/site-config.json