insights: expose job status info over GraphQL API (#20105)

This exposes some status/progress information we can use in the Code Insights
frontend to communicate to the viewer what is going on, how many points have
been collected, if errors ocurred, etc.

For sure this isn't perfect, the most beautiful API, always super clear, etc.
but it's a _lot_ better than what we have right now (nothing.)

<img width="783" alt="image" src="https://user-images.githubusercontent.com/3173176/115091535-9bd6e180-9ecc-11eb-9856-ba74784ee8e2.png">

Missing tests, but it's a pretty straightforward implementation so I'm content
merging it as-is (especially given the other option is for us to not have this
until we get a backend engineer.)

Signed-off-by: Stephen Gutekanst <stephen@sourcegraph.com>
This commit is contained in:
Stephen Gutekanst 2021-04-16 16:10:38 -07:00 committed by GitHub
parent 81ba85eff6
commit ef666e3150
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 175 additions and 17 deletions

View File

@ -16,6 +16,13 @@ type InsightsDataPointResolver interface {
Value() float64
}
type InsightStatusResolver interface {
TotalPoints() int32
PendingJobs() int32
CompletedJobs() int32
FailedJobs() int32
}
type InsightsPointsArgs struct {
From *DateTime
To *DateTime
@ -24,6 +31,7 @@ type InsightsPointsArgs struct {
type InsightSeriesResolver interface {
Label() string
Points(ctx context.Context, args *InsightsPointsArgs) ([]InsightsDataPointResolver, error)
Status(ctx context.Context) (InsightStatusResolver, error)
}
type InsightResolver interface {

View File

@ -797,6 +797,11 @@ type InsightsSeries {
If no 'to' time range is specified, the current point in time is assumed.
"""
points(from: DateTime, to: DateTime): [InsightDataPoint!]!
"""
The status of this series of data, e.g. progress collecting it.
"""
status: InsightSeriesStatus!
}
"""
@ -814,6 +819,58 @@ type InsightDataPoint {
value: Float!
}
"""
Status indicators for a specific series of insight data.
"""
type InsightSeriesStatus {
"""
The total number of points stored for this series, at the finest level
(e.g. per repository, or per-repository-per-language) Has no strict relation
to the data points shown in the web UI or returned by `points()`, because those
are aggregated and this number _can_ report some duplicates points which get
stored but removed at query time for the web UI.
Why its useful: an insight may look like "it is doing nothing" but in reality
this number will be increasing by e.g. several thousands of points rapidly.
"""
totalPoints: Int!
"""
The total number of jobs currently pending to add new data points for this series.
Each job may create multiple data points (e.g. a job may create one data point per
repo, or language, etc.) This number will go up and down over time until all work
is completed (discovering work takes almost as long as doing the work.)
Why its useful: signals "amount of work still to be done."
"""
pendingJobs: Int!
"""
The total number of jobs completed for this series. Note that since pendingJobs will
go up/down over time, you CANNOT divide these two numbers to get a percentage as it
would be nonsense ("it says 90% complete but has been like that for a really long
time!").
Does not include 'failedJobs'.
Why its useful: gives an indication of "how much work has been done?"
"""
completedJobs: Int!
"""
The total number of jobs that were tried multiple times and outright failed. They will
not be retried again, and indicates the series has incomplete data.
Use ((failedJobs / completedJobs) * 100.0) to get an approximate percentage of how
much data the series data may be missing (e.g. ((30 / 150)*100.0) == 20% of the series
data is incomplete (rough approximation, not precise).
Why its useful: signals if there are problems, and how severe they are.
"""
failedJobs: Int!
}
"""
A new external service.
"""

View File

@ -136,6 +136,42 @@ FROM insights_query_runner_jobs
WHERE id = %s;
`
type JobsStatus struct {
Queued, Processing uint64
Completed uint64
Errored, Failed uint64
}
// QueryJobsStatus queries the current status of jobs for the specified series.
func QueryJobsStatus(ctx context.Context, workerBaseStore *basestore.Store, seriesID string) (*JobsStatus, error) {
var status JobsStatus
for _, work := range []struct {
stateName string
result *uint64
}{
{"queued", &status.Queued},
{"processing", &status.Processing},
{"completed", &status.Completed},
{"errored", &status.Errored},
{"failed", &status.Failed},
} {
value, _, err := basestore.ScanFirstInt(workerBaseStore.Query(
ctx,
sqlf.Sprintf(queryJobsStatusFmtStr, seriesID, work.stateName)),
)
if err != nil {
return nil, err
}
*work.result = uint64(value)
}
return &status, nil
}
const queryJobsStatusFmtStr = `
-- source: enterprise/internal/insights/background/queryrunner/worker.go:JobsStatus
SELECT COUNT(*) FROM insights_query_runner_jobs WHERE series_id=%s AND state=%s
`
// Job represents a single job for the query runner worker to perform. When enqueued, it is stored
// in the insights_query_runner_jobs table - then the worker dequeues it by reading it from that
// table.

View File

@ -9,14 +9,16 @@ import (
"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/graphqlutil"
"github.com/sourcegraph/sourcegraph/enterprise/internal/insights/discovery"
"github.com/sourcegraph/sourcegraph/enterprise/internal/insights/store"
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
"github.com/sourcegraph/sourcegraph/schema"
)
var _ graphqlbackend.InsightConnectionResolver = &insightConnectionResolver{}
type insightConnectionResolver struct {
store store.Interface
settingStore discovery.SettingStore
insightsStore store.Interface
workerBaseStore *basestore.Store
settingStore discovery.SettingStore
// cache results because they are used by multiple fields
once sync.Once
@ -32,7 +34,11 @@ func (r *insightConnectionResolver) Nodes(ctx context.Context) ([]graphqlbackend
}
resolvers := make([]graphqlbackend.InsightResolver, 0, len(nodes))
for _, insight := range nodes {
resolvers = append(resolvers, &insightResolver{store: r.store, insight: insight})
resolvers = append(resolvers, &insightResolver{
insightsStore: r.insightsStore,
workerBaseStore: r.workerBaseStore,
insight: insight,
})
}
return resolvers, nil
}
@ -65,8 +71,9 @@ func (r *insightConnectionResolver) compute(ctx context.Context) ([]*schema.Insi
var _ graphqlbackend.InsightResolver = &insightResolver{}
type insightResolver struct {
store store.Interface
insight *schema.Insight
insightsStore store.Interface
workerBaseStore *basestore.Store
insight *schema.Insight
}
func (r *insightResolver) Title() string { return r.insight.Title }
@ -77,7 +84,11 @@ func (r *insightResolver) Series() []graphqlbackend.InsightSeriesResolver {
series := r.insight.Series
resolvers := make([]graphqlbackend.InsightSeriesResolver, 0, len(series))
for _, series := range series {
resolvers = append(resolvers, &insightSeriesResolver{store: r.store, series: series})
resolvers = append(resolvers, &insightSeriesResolver{
insightsStore: r.insightsStore,
workerBaseStore: r.workerBaseStore,
series: series,
})
}
return resolvers
}

View File

@ -5,16 +5,19 @@ import (
"time"
"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend"
"github.com/sourcegraph/sourcegraph/enterprise/internal/insights/background/queryrunner"
"github.com/sourcegraph/sourcegraph/enterprise/internal/insights/discovery"
"github.com/sourcegraph/sourcegraph/enterprise/internal/insights/store"
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
"github.com/sourcegraph/sourcegraph/schema"
)
var _ graphqlbackend.InsightSeriesResolver = &insightSeriesResolver{}
type insightSeriesResolver struct {
store store.Interface
series *schema.InsightSeries
insightsStore store.Interface
workerBaseStore *basestore.Store
series *schema.InsightSeries
}
func (r *insightSeriesResolver) Label() string { return r.series.Label }
@ -41,7 +44,7 @@ func (r *insightSeriesResolver) Points(ctx context.Context, args *graphqlbackend
}
// TODO(slimsag): future: Pass through opts.Limit
points, err := r.store.SeriesPoints(ctx, opts)
points, err := r.insightsStore.SeriesPoints(ctx, opts)
if err != nil {
return nil, err
}
@ -52,6 +55,35 @@ func (r *insightSeriesResolver) Points(ctx context.Context, args *graphqlbackend
return resolvers, nil
}
func (r *insightSeriesResolver) Status(ctx context.Context) (graphqlbackend.InsightStatusResolver, error) {
seriesID, err := discovery.EncodeSeriesID(r.series)
if err != nil {
return nil, err
}
totalPoints, err := r.insightsStore.CountData(ctx, store.CountDataOpts{
SeriesID: &seriesID,
})
if err != nil {
return nil, err
}
status, err := queryrunner.QueryJobsStatus(ctx, r.workerBaseStore, seriesID)
if err != nil {
return nil, err
}
return insightStatusResolver{
totalPoints: int32(totalPoints),
// Include errored because they'll be retried before becoming failures
pendingJobs: int32(status.Queued + status.Processing + status.Errored),
completedJobs: int32(status.Completed),
failedJobs: int32(status.Failed),
}, nil
}
var _ graphqlbackend.InsightsDataPointResolver = insightsDataPointResolver{}
type insightsDataPointResolver struct{ p store.SeriesPoint }
@ -61,3 +93,12 @@ func (i insightsDataPointResolver) DateTime() graphqlbackend.DateTime {
}
func (i insightsDataPointResolver) Value() float64 { return i.p.Value }
type insightStatusResolver struct {
totalPoints, pendingJobs, completedJobs, failedJobs int32
}
func (i insightStatusResolver) TotalPoints() int32 { return i.totalPoints }
func (i insightStatusResolver) PendingJobs() int32 { return i.pendingJobs }
func (i insightStatusResolver) CompletedJobs() int32 { return i.completedJobs }
func (i insightStatusResolver) FailedJobs() int32 { return i.failedJobs }

View File

@ -34,9 +34,9 @@ func TestResolver_InsightSeries(t *testing.T) {
resolver := newWithClock(timescale, postgres, clock)
// Create a mock store, delegating any un-mocked methods to the DB store.
dbStore := resolver.store
dbStore := resolver.insightsStore
mockStore := store.NewMockInterfaceFrom(dbStore)
resolver.store = mockStore
resolver.insightsStore = mockStore
// Create the insights connection resolver and query series.
conn, err := resolver.Insights(ctx)

View File

@ -2,11 +2,13 @@ package resolvers
import (
"context"
"database/sql"
"time"
"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend"
"github.com/sourcegraph/sourcegraph/enterprise/internal/insights/store"
"github.com/sourcegraph/sourcegraph/internal/database"
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
"github.com/sourcegraph/sourcegraph/internal/database/dbutil"
"github.com/sourcegraph/sourcegraph/internal/timeutil"
)
@ -15,8 +17,9 @@ var _ graphqlbackend.InsightsResolver = &Resolver{}
// Resolver is the GraphQL resolver of all things related to Insights.
type Resolver struct {
store store.Interface
settingStore *database.SettingStore
insightsStore store.Interface
workerBaseStore *basestore.Store
settingStore *database.SettingStore
}
// New returns a new Resolver whose store uses the given Timescale and Postgres DBs.
@ -28,14 +31,16 @@ func New(timescale, postgres dbutil.DB) graphqlbackend.InsightsResolver {
// clock for timestamps.
func newWithClock(timescale, postgres dbutil.DB, clock func() time.Time) *Resolver {
return &Resolver{
store: store.NewWithClock(timescale, clock),
settingStore: database.Settings(postgres),
insightsStore: store.NewWithClock(timescale, clock),
workerBaseStore: basestore.NewWithDB(postgres, sql.TxOptions{}),
settingStore: database.Settings(postgres),
}
}
func (r *Resolver) Insights(ctx context.Context) (graphqlbackend.InsightConnectionResolver, error) {
return &insightConnectionResolver{
store: r.store,
settingStore: r.settingStore,
insightsStore: r.insightsStore,
workerBaseStore: r.workerBaseStore,
settingStore: r.settingStore,
}, nil
}