insights: expose job status info over GraphQL API (#20105)

This exposes some status/progress information we can use in the Code Insights frontend to communicate to the viewer what is going on, how many points have been collected, if errors ocurred, etc. For sure this isn't perfect, the most beautiful API, always super clear, etc. but it's a _lot_ better than what we have right now (nothing.) <img width="783" alt="image" src="https://user-images.githubusercontent.com/3173176/115091535-9bd6e180-9ecc-11eb-9856-ba74784ee8e2.png"> Missing tests, but it's a pretty straightforward implementation so I'm content merging it as-is (especially given the other option is for us to not have this until we get a backend engineer.) Signed-off-by: Stephen Gutekanst <stephen@sourcegraph.com>
2026-02-06 20:31:48 +00:00 · 2021-04-16 16:10:38 -07:00 · 2021-04-16 16:10:38 -07:00 · ef666e3150
commit ef666e3150
parent 81ba85eff6
7 changed files with 175 additions and 17 deletions
--- a/cmd/frontend/graphqlbackend/insights.go
+++ b/cmd/frontend/graphqlbackend/insights.go
@ -16,6 +16,13 @@ type InsightsDataPointResolver interface {
 	Value() float64
 }

+type InsightStatusResolver interface {
+	TotalPoints() int32
+	PendingJobs() int32
+	CompletedJobs() int32
+	FailedJobs() int32
+}
+
 type InsightsPointsArgs struct {
 	From *DateTime
 	To   *DateTime
@ -24,6 +31,7 @@ type InsightsPointsArgs struct {
 type InsightSeriesResolver interface {
 	Label() string
 	Points(ctx context.Context, args *InsightsPointsArgs) ([]InsightsDataPointResolver, error)
+	Status(ctx context.Context) (InsightStatusResolver, error)
 }

 type InsightResolver interface {
--- a/cmd/frontend/graphqlbackend/schema.graphql
+++ b/cmd/frontend/graphqlbackend/schema.graphql
@ -797,6 +797,11 @@ type InsightsSeries {
    If no 'to' time range is specified, the current point in time is assumed.
    """
    points(from: DateTime, to: DateTime): [InsightDataPoint!]!
+
+    """
+    The status of this series of data, e.g. progress collecting it.
+    """
+    status: InsightSeriesStatus!
 }

 """
@ -814,6 +819,58 @@ type InsightDataPoint {
    value: Float!
 }

+"""
+Status indicators for a specific series of insight data.
+"""
+type InsightSeriesStatus {
+    """
+    The total number of points stored for this series, at the finest level
+    (e.g. per repository, or per-repository-per-language) Has no strict relation
+    to the data points shown in the web UI or returned by `points()`, because those
+    are aggregated and this number _can_ report some duplicates points which get
+    stored but removed at query time for the web UI.
+
+    Why its useful: an insight may look like "it is doing nothing" but in reality
+    this number will be increasing by e.g. several thousands of points rapidly.
+    """
+    totalPoints: Int!
+
+    """
+    The total number of jobs currently pending to add new data points for this series.
+
+    Each job may create multiple data points (e.g. a job may create one data point per
+    repo, or language, etc.) This number will go up and down over time until all work
+    is completed (discovering work takes almost as long as doing the work.)
+
+    Why its useful: signals "amount of work still to be done."
+    """
+    pendingJobs: Int!
+
+    """
+    The total number of jobs completed for this series. Note that since pendingJobs will
+    go up/down over time, you CANNOT divide these two numbers to get a percentage as it
+    would be nonsense ("it says 90% complete but has been like that for a really long
+    time!").
+
+    Does not include 'failedJobs'.
+
+    Why its useful: gives an indication of "how much work has been done?"
+    """
+    completedJobs: Int!
+
+    """
+    The total number of jobs that were tried multiple times and outright failed. They will
+    not be retried again, and indicates the series has incomplete data.
+
+    Use ((failedJobs / completedJobs) * 100.0) to get an approximate percentage of how
+    much data the series data may be missing (e.g. ((30 / 150)*100.0) == 20% of the series
+    data is incomplete (rough approximation, not precise).
+
+    Why its useful: signals if there are problems, and how severe they are.
+    """
+    failedJobs: Int!
+}
+
 """
 A new external service.
 """
--- a/enterprise/internal/insights/background/queryrunner/worker.go
+++ b/enterprise/internal/insights/background/queryrunner/worker.go
@ -136,6 +136,42 @@ FROM insights_query_runner_jobs
 WHERE id = %s;
 `

+type JobsStatus struct {
+	Queued, Processing uint64
+	Completed          uint64
+	Errored, Failed    uint64
+}
+
+// QueryJobsStatus queries the current status of jobs for the specified series.
+func QueryJobsStatus(ctx context.Context, workerBaseStore *basestore.Store, seriesID string) (*JobsStatus, error) {
+	var status JobsStatus
+	for _, work := range []struct {
+		stateName string
+		result    *uint64
+	}{
+		{"queued", &status.Queued},
+		{"processing", &status.Processing},
+		{"completed", &status.Completed},
+		{"errored", &status.Errored},
+		{"failed", &status.Failed},
+	} {
+		value, _, err := basestore.ScanFirstInt(workerBaseStore.Query(
+			ctx,
+			sqlf.Sprintf(queryJobsStatusFmtStr, seriesID, work.stateName)),
+		)
+		if err != nil {
+			return nil, err
+		}
+		*work.result = uint64(value)
+	}
+	return &status, nil
+}
+
+const queryJobsStatusFmtStr = `
+-- source: enterprise/internal/insights/background/queryrunner/worker.go:JobsStatus
+SELECT COUNT(*) FROM insights_query_runner_jobs WHERE series_id=%s AND state=%s
+`
+
 // Job represents a single job for the query runner worker to perform. When enqueued, it is stored
 // in the insights_query_runner_jobs table - then the worker dequeues it by reading it from that
 // table.
--- a/enterprise/internal/insights/resolvers/insight_connection_resolver.go
+++ b/enterprise/internal/insights/resolvers/insight_connection_resolver.go
@ -9,14 +9,16 @@ import (
 	"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/graphqlutil"
 	"github.com/sourcegraph/sourcegraph/enterprise/internal/insights/discovery"
 	"github.com/sourcegraph/sourcegraph/enterprise/internal/insights/store"
+	"github.com/sourcegraph/sourcegraph/internal/database/basestore"
 	"github.com/sourcegraph/sourcegraph/schema"
 )

 var _ graphqlbackend.InsightConnectionResolver = &insightConnectionResolver{}

 type insightConnectionResolver struct {
-	store        store.Interface
-	settingStore discovery.SettingStore
+	insightsStore   store.Interface
+	workerBaseStore *basestore.Store
+	settingStore    discovery.SettingStore

 	// cache results because they are used by multiple fields
 	once     sync.Once
@ -32,7 +34,11 @@ func (r *insightConnectionResolver) Nodes(ctx context.Context) ([]graphqlbackend
 	}
 	resolvers := make([]graphqlbackend.InsightResolver, 0, len(nodes))
 	for _, insight := range nodes {
-		resolvers = append(resolvers, &insightResolver{store: r.store, insight: insight})
+		resolvers = append(resolvers, &insightResolver{
+			insightsStore:   r.insightsStore,
+			workerBaseStore: r.workerBaseStore,
+			insight:         insight,
+		})
 	}
 	return resolvers, nil
 }
@ -65,8 +71,9 @@ func (r *insightConnectionResolver) compute(ctx context.Context) ([]*schema.Insi
 var _ graphqlbackend.InsightResolver = &insightResolver{}

 type insightResolver struct {
-	store   store.Interface
-	insight *schema.Insight
+	insightsStore   store.Interface
+	workerBaseStore *basestore.Store
+	insight         *schema.Insight
 }

 func (r *insightResolver) Title() string { return r.insight.Title }
@ -77,7 +84,11 @@ func (r *insightResolver) Series() []graphqlbackend.InsightSeriesResolver {
 	series := r.insight.Series
 	resolvers := make([]graphqlbackend.InsightSeriesResolver, 0, len(series))
 	for _, series := range series {
-		resolvers = append(resolvers, &insightSeriesResolver{store: r.store, series: series})
+		resolvers = append(resolvers, &insightSeriesResolver{
+			insightsStore:   r.insightsStore,
+			workerBaseStore: r.workerBaseStore,
+			series:          series,
+		})
 	}
 	return resolvers
 }
--- a/enterprise/internal/insights/resolvers/insight_series_resolver.go
+++ b/enterprise/internal/insights/resolvers/insight_series_resolver.go
@ -5,16 +5,19 @@ import (
 	"time"

 	"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend"
+	"github.com/sourcegraph/sourcegraph/enterprise/internal/insights/background/queryrunner"
 	"github.com/sourcegraph/sourcegraph/enterprise/internal/insights/discovery"
 	"github.com/sourcegraph/sourcegraph/enterprise/internal/insights/store"
+	"github.com/sourcegraph/sourcegraph/internal/database/basestore"
 	"github.com/sourcegraph/sourcegraph/schema"
 )

 var _ graphqlbackend.InsightSeriesResolver = &insightSeriesResolver{}

 type insightSeriesResolver struct {
-	store  store.Interface
-	series *schema.InsightSeries
+	insightsStore   store.Interface
+	workerBaseStore *basestore.Store
+	series          *schema.InsightSeries
 }

 func (r *insightSeriesResolver) Label() string { return r.series.Label }
@ -41,7 +44,7 @@ func (r *insightSeriesResolver) Points(ctx context.Context, args *graphqlbackend
 	}
 	// TODO(slimsag): future: Pass through opts.Limit

-	points, err := r.store.SeriesPoints(ctx, opts)
+	points, err := r.insightsStore.SeriesPoints(ctx, opts)
 	if err != nil {
 		return nil, err
 	}
@ -52,6 +55,35 @@ func (r *insightSeriesResolver) Points(ctx context.Context, args *graphqlbackend
 	return resolvers, nil
 }

+func (r *insightSeriesResolver) Status(ctx context.Context) (graphqlbackend.InsightStatusResolver, error) {
+	seriesID, err := discovery.EncodeSeriesID(r.series)
+	if err != nil {
+		return nil, err
+	}
+
+	totalPoints, err := r.insightsStore.CountData(ctx, store.CountDataOpts{
+		SeriesID: &seriesID,
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	status, err := queryrunner.QueryJobsStatus(ctx, r.workerBaseStore, seriesID)
+	if err != nil {
+		return nil, err
+	}
+
+	return insightStatusResolver{
+		totalPoints: int32(totalPoints),
+
+		// Include errored because they'll be retried before becoming failures
+		pendingJobs: int32(status.Queued + status.Processing + status.Errored),
+
+		completedJobs: int32(status.Completed),
+		failedJobs:    int32(status.Failed),
+	}, nil
+}
+
 var _ graphqlbackend.InsightsDataPointResolver = insightsDataPointResolver{}

 type insightsDataPointResolver struct{ p store.SeriesPoint }
@ -61,3 +93,12 @@ func (i insightsDataPointResolver) DateTime() graphqlbackend.DateTime {
 }

 func (i insightsDataPointResolver) Value() float64 { return i.p.Value }
+
+type insightStatusResolver struct {
+	totalPoints, pendingJobs, completedJobs, failedJobs int32
+}
+
+func (i insightStatusResolver) TotalPoints() int32   { return i.totalPoints }
+func (i insightStatusResolver) PendingJobs() int32   { return i.pendingJobs }
+func (i insightStatusResolver) CompletedJobs() int32 { return i.completedJobs }
+func (i insightStatusResolver) FailedJobs() int32    { return i.failedJobs }
--- a/enterprise/internal/insights/resolvers/insight_series_resolver_test.go
+++ b/enterprise/internal/insights/resolvers/insight_series_resolver_test.go
@ -34,9 +34,9 @@ func TestResolver_InsightSeries(t *testing.T) {
 		resolver := newWithClock(timescale, postgres, clock)

 		// Create a mock store, delegating any un-mocked methods to the DB store.
-		dbStore := resolver.store
+		dbStore := resolver.insightsStore
 		mockStore := store.NewMockInterfaceFrom(dbStore)
-		resolver.store = mockStore
+		resolver.insightsStore = mockStore

 		// Create the insights connection resolver and query series.
 		conn, err := resolver.Insights(ctx)
--- a/enterprise/internal/insights/resolvers/resolver.go
+++ b/enterprise/internal/insights/resolvers/resolver.go
@ -2,11 +2,13 @@ package resolvers

 import (
 	"context"
+	"database/sql"
 	"time"

 	"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend"
 	"github.com/sourcegraph/sourcegraph/enterprise/internal/insights/store"
 	"github.com/sourcegraph/sourcegraph/internal/database"
+	"github.com/sourcegraph/sourcegraph/internal/database/basestore"
 	"github.com/sourcegraph/sourcegraph/internal/database/dbutil"
 	"github.com/sourcegraph/sourcegraph/internal/timeutil"
 )
@ -15,8 +17,9 @@ var _ graphqlbackend.InsightsResolver = &Resolver{}

 // Resolver is the GraphQL resolver of all things related to Insights.
 type Resolver struct {
-	store        store.Interface
-	settingStore *database.SettingStore
+	insightsStore   store.Interface
+	workerBaseStore *basestore.Store
+	settingStore    *database.SettingStore
 }

 // New returns a new Resolver whose store uses the given Timescale and Postgres DBs.
@ -28,14 +31,16 @@ func New(timescale, postgres dbutil.DB) graphqlbackend.InsightsResolver {
 // clock for timestamps.
 func newWithClock(timescale, postgres dbutil.DB, clock func() time.Time) *Resolver {
 	return &Resolver{
-		store:        store.NewWithClock(timescale, clock),
-		settingStore: database.Settings(postgres),
+		insightsStore:   store.NewWithClock(timescale, clock),
+		workerBaseStore: basestore.NewWithDB(postgres, sql.TxOptions{}),
+		settingStore:    database.Settings(postgres),
 	}
 }

 func (r *Resolver) Insights(ctx context.Context) (graphqlbackend.InsightConnectionResolver, error) {
 	return &insightConnectionResolver{
-		store:        r.store,
-		settingStore: r.settingStore,
+		insightsStore:   r.insightsStore,
+		workerBaseStore: r.workerBaseStore,
+		settingStore:    r.settingStore,
 	}, nil
 }