mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 17:51:57 +00:00
API docs: codeintel: add OOB migration to index API docs for search (#25207)
* migrations/frontend: add OOB migration for API docs search indexing * codeintel/lsifstore: implement OOB migration for API docs search indexing Signed-off-by: Stephen Gutekanst <stephen@sourcegraph.com>
This commit is contained in:
parent
8e54706299
commit
b910fedad1
@ -22,6 +22,8 @@ type Config struct {
|
||||
ReferencesCountMigrationBatchInterval time.Duration
|
||||
DocumentColumnSplitMigrationBatchSize int
|
||||
DocumentColumnSplitMigrationBatchInterval time.Duration
|
||||
APIDocsSearchMigrationBatchSize int
|
||||
APIDocsSearchMigrationBatchInterval time.Duration
|
||||
CommittedAtMigrationBatchSize int
|
||||
CommittedAtMigrationBatchInterval time.Duration
|
||||
ReferenceCountMigrationBatchSize int
|
||||
@ -48,6 +50,8 @@ func init() {
|
||||
config.ReferencesCountMigrationBatchInterval = config.GetInterval("PRECISE_CODE_INTEL_REFERENCES_COUNT_MIGRATION_BATCH_INTERVAL", "1s", "The timeout between processing migration batches.")
|
||||
config.DocumentColumnSplitMigrationBatchSize = config.GetInt("PRECISE_CODE_INTEL_DOCUMENT_COLUMN_SPLIT_MIGRATION_BATCH_SIZE", "100", "The maximum number of document records to migrate at a time.")
|
||||
config.DocumentColumnSplitMigrationBatchInterval = config.GetInterval("PRECISE_CODE_INTEL_DOCUMENT_COLUMN_SPLIT_MIGRATION_BATCH_INTERVAL", "1s", "The timeout between processing migration batches.")
|
||||
config.APIDocsSearchMigrationBatchSize = config.GetInt("PRECISE_CODE_INTEL_API_DOCS_SEARCH_MIGRATION_BATCH_SIZE", "1", "The maximum number of bundles to migrate at a time.")
|
||||
config.APIDocsSearchMigrationBatchInterval = config.GetInterval("PRECISE_CODE_INTEL_API_DOCS_SEARCH_MIGRATION_BATCH_INTERVAL", "1s", "The timeout between processing migration batches.")
|
||||
config.CommittedAtMigrationBatchSize = config.GetInt("PRECISE_CODE_INTEL_COMMITTED_AT_MIGRATION_BATCH_SIZE", "100", "The maximum number of upload records to migrate at a time.")
|
||||
config.CommittedAtMigrationBatchInterval = config.GetInterval("PRECISE_CODE_INTEL_COMMITTED_AT_MIGRATION_BATCH_INTERVAL", "1s", "The timeout between processing migration batches.")
|
||||
config.ReferenceCountMigrationBatchSize = config.GetInt("PRECISE_CODE_INTEL_REFERENCE_COUNT_MIGRATION_BATCH_SIZE", "100", "The maximum number of upload records to migrate at a time.")
|
||||
|
||||
@ -44,6 +44,20 @@ func registerMigrations(ctx context.Context, db dbutil.DB, outOfBandMigrationRun
|
||||
return err
|
||||
}
|
||||
|
||||
if err := outOfBandMigrationRunner.Register(
|
||||
lsifmigrations.APIDocsSearchMigrationID, // 12
|
||||
lsifmigrations.NewAPIDocsSearchMigrator(
|
||||
services.lsifStore,
|
||||
services.dbStore,
|
||||
services.repoStore,
|
||||
services.gitserverClient,
|
||||
config.APIDocsSearchMigrationBatchSize,
|
||||
),
|
||||
oobmigration.MigratorOptions{Interval: config.APIDocsSearchMigrationBatchInterval},
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := outOfBandMigrationRunner.Register(
|
||||
dbmigrations.CommittedAtMigrationID, // 8
|
||||
dbmigrations.NewCommittedAtMigrator(services.dbStore, services.gitserverClient, config.CommittedAtMigrationBatchSize),
|
||||
|
||||
@ -17,6 +17,7 @@ import (
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/stores/lsifstore"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/stores/uploadstore"
|
||||
"github.com/sourcegraph/sourcegraph/internal/conf"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/dbconn"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/dbutil"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/locker"
|
||||
@ -29,6 +30,7 @@ var services struct {
|
||||
dbStore *store.Store
|
||||
locker *locker.Locker
|
||||
lsifStore *lsifstore.Store
|
||||
repoStore *database.RepoStore
|
||||
uploadStore uploadstore.Store
|
||||
gitserverClient *gitserver.Client
|
||||
indexEnqueuer *enqueuer.IndexEnqueuer
|
||||
@ -72,6 +74,7 @@ func initServices(ctx context.Context, db dbutil.DB) error {
|
||||
services.dbStore = dbStore
|
||||
services.locker = locker
|
||||
services.lsifStore = lsifStore
|
||||
services.repoStore = database.ReposWith(dbStore.Store)
|
||||
services.uploadStore = uploadStore
|
||||
services.gitserverClient = gitserverClient
|
||||
services.indexEnqueuer = indexEnqueuer
|
||||
|
||||
@ -0,0 +1,207 @@
|
||||
package migration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/cockroachdb/errors"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/inconshreveable/log15"
|
||||
"github.com/keegancsmith/sqlf"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/stores/dbstore"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/stores/lsifstore"
|
||||
"github.com/sourcegraph/sourcegraph/internal/api"
|
||||
"github.com/sourcegraph/sourcegraph/internal/conf"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
|
||||
"github.com/sourcegraph/sourcegraph/internal/oobmigration"
|
||||
"github.com/sourcegraph/sourcegraph/lib/codeintel/precise"
|
||||
)
|
||||
|
||||
// APIDocsSearchMigrationID is the primary key of the migration record handled by an instance of
|
||||
// apiDocsSearchMigrator. This populates the new lsif_data_documentation_search_* tables using data
|
||||
// decoded from other tables. This is associated with the out-of-band migration record inserted in
|
||||
// migrations/frontend/1528395874_oob_lsif_data_documentation_search.up.sql.
|
||||
const APIDocsSearchMigrationID = 12
|
||||
|
||||
// NewAPIDocsSearchMigrator creates a new Migrator instance that reads records from the lsif_data_documentation_pages
|
||||
// table, decodes the GOB payloads, and populates the new lsif_data_documentation_search_* tables with
|
||||
// the information needed to search API docs.
|
||||
func NewAPIDocsSearchMigrator(
|
||||
store *lsifstore.Store,
|
||||
dbStore *dbstore.Store,
|
||||
repoStore *database.RepoStore,
|
||||
gitserverClient GitserverClient,
|
||||
batchSize int,
|
||||
) oobmigration.Migrator {
|
||||
return &apiDocsSearchMigrator{
|
||||
store: store,
|
||||
dbStore: dbStore,
|
||||
repoStore: repoStore,
|
||||
gitserverClient: gitserverClient,
|
||||
serializer: lsifstore.NewSerializer(),
|
||||
batchSize: batchSize,
|
||||
}
|
||||
}
|
||||
|
||||
// Implements the oobmigration.Migrator interface.
|
||||
type apiDocsSearchMigrator struct {
|
||||
store *lsifstore.Store
|
||||
dbStore *dbstore.Store
|
||||
repoStore *database.RepoStore
|
||||
gitserverClient GitserverClient
|
||||
serializer *lsifstore.Serializer
|
||||
batchSize int
|
||||
}
|
||||
|
||||
// Progress returns a percentage (in the range range [0, 1]) of data records that need
|
||||
// to be upgraded in the forward direction. A value of 1 means that no further action
|
||||
// is required. A value < 1 denotes that a future invocation of the Up method could
|
||||
// migrate additional data (excluding error conditions and prerequisite migrations).
|
||||
func (m *apiDocsSearchMigrator) Progress(ctx context.Context) (float64, error) {
|
||||
progress, _, err := basestore.ScanFirstFloat(m.store.Query(ctx, sqlf.Sprintf(apiDocsSearchMigratorProgressQuery)))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return progress, nil
|
||||
}
|
||||
|
||||
const apiDocsSearchMigratorProgressQuery = `
|
||||
-- source: enterprise/internal/codeintel/stores/lsifstore/migration/apidocs_search.go:Progress
|
||||
SELECT CASE c2.count WHEN 0 THEN 1 ELSE cast(c1.count as float) / cast(c2.count as float) END FROM
|
||||
(SELECT count(DISTINCT dump_id) FROM lsif_data_documentation_pages WHERE search_indexed='true') c1,
|
||||
(SELECT count(DISTINCT dump_id) FROM lsif_data_documentation_pages) c2
|
||||
`
|
||||
|
||||
// Up runs a batch of the migration. This method is called repeatedly until the Progress
|
||||
// method reports completion. Errors returned from this method will be associated with the
|
||||
// migration record.
|
||||
func (m *apiDocsSearchMigrator) Up(ctx context.Context) error {
|
||||
if !conf.APIDocsSearchIndexingEnabled() {
|
||||
return nil
|
||||
}
|
||||
|
||||
tx, err := m.store.Transact(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { err = tx.Done(err) }()
|
||||
|
||||
dumpIDs, err := basestore.ScanInts(tx.Query(ctx, sqlf.Sprintf(apiDocsSearchMigratorUnprocessedDumpsQuery, m.batchSize)))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
done := make(chan error, m.batchSize)
|
||||
for _, dumpID := range dumpIDs {
|
||||
dumpID := dumpID
|
||||
go func() {
|
||||
err := m.processUpload(ctx, dumpID)
|
||||
done <- err
|
||||
}()
|
||||
}
|
||||
var errs error
|
||||
for range dumpIDs {
|
||||
err := <-done
|
||||
if err != nil {
|
||||
errs = multierror.Append(errs, err)
|
||||
}
|
||||
}
|
||||
return errs
|
||||
}
|
||||
|
||||
const apiDocsSearchMigratorUnprocessedDumpsQuery = `
|
||||
-- source: enterprise/internal/codeintel/stores/lsifstore/migration/apidocs_search.go:Up
|
||||
SELECT DISTINCT dump_id FROM lsif_data_documentation_pages
|
||||
WHERE search_indexed='false'
|
||||
LIMIT %s
|
||||
`
|
||||
|
||||
// processUpload indexes all of the API documentation for the given dump ID by decoding the information
|
||||
// in lsif_data_documentation_pages and inserting into the new lsif_data_documentation_search_* tables.
|
||||
func (m *apiDocsSearchMigrator) processUpload(ctx context.Context, uploadID int) error {
|
||||
upload, exists, err := m.dbStore.GetUploadByID(ctx, uploadID)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "GetUploadByID")
|
||||
}
|
||||
if !exists {
|
||||
// The upload doesn't exist anymore, don't error out - just skip migrating this one.
|
||||
log15.Error("API docs: migration: could not find LSIF upload, skipping", "id", uploadID)
|
||||
if err := m.store.Exec(ctx, sqlf.Sprintf(apiDocsSearchMigratorProcessedDumpQuery, uploadID)); err != nil {
|
||||
return errors.Wrap(err, "marking upload as migrated")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Find the associated repository.
|
||||
repos, err := m.repoStore.GetByIDs(ctx, api.RepoID(upload.RepositoryID))
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "RepoStore.GetByIDs")
|
||||
}
|
||||
if len(repos) == 0 {
|
||||
return fmt.Errorf("could not get repo id=%v name=%q", upload.RepositoryID, upload.RepositoryName) // Repository no longer exists? nothing we can do
|
||||
}
|
||||
repo := repos[0]
|
||||
|
||||
// Determine if this bundle was for the default branch or not.
|
||||
isDefaultBranch, err := m.gitserverClient.DefaultBranchContains(ctx, upload.RepositoryID, upload.Commit)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "gitserver.DefaultBranchContains")
|
||||
}
|
||||
|
||||
tx, err := m.store.Transact(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { err = tx.Done(err) }()
|
||||
|
||||
rows, err := m.store.Query(ctx, sqlf.Sprintf(apiDocsSearchMigratorPagesQuery, uploadID))
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "Query")
|
||||
}
|
||||
defer func() { err = basestore.CloseRows(rows, err) }()
|
||||
var (
|
||||
indexed = 0
|
||||
pages []*precise.DocumentationPageData
|
||||
)
|
||||
for rows.Next() {
|
||||
indexed++
|
||||
var pageBytes []byte
|
||||
if err := rows.Scan(&pageBytes); err != nil {
|
||||
return errors.Wrap(err, "Scan")
|
||||
}
|
||||
|
||||
page, err := m.serializer.UnmarshalDocumentationPageData(pageBytes)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "UnmarshalDocumentationPageData")
|
||||
}
|
||||
pages = append(pages, page)
|
||||
}
|
||||
if err := tx.WriteDocumentationSearch(ctx, upload, repo, isDefaultBranch, pages); err != nil {
|
||||
return errors.Wrap(err, "WriteDocumentationSearch")
|
||||
}
|
||||
if err := m.store.Exec(ctx, sqlf.Sprintf(apiDocsSearchMigratorProcessedDumpQuery, uploadID)); err != nil {
|
||||
return errors.Wrap(err, "marking upload as migrated")
|
||||
}
|
||||
log15.Info("Indexed API docs pages for search", "pages_indexed", indexed, "repo", upload.RepositoryName, "upload_id", uploadID)
|
||||
return nil
|
||||
}
|
||||
|
||||
const apiDocsSearchMigratorPagesQuery = `
|
||||
-- source: enterprise/internal/codeintel/stores/lsifstore/migration/apidocs_search.go:Up
|
||||
SELECT data FROM lsif_data_documentation_pages WHERE dump_id=%s
|
||||
`
|
||||
|
||||
const apiDocsSearchMigratorProcessedDumpQuery = `
|
||||
-- source: enterprise/internal/codeintel/stores/lsifstore/migration/apidocs_search.go:Up
|
||||
UPDATE lsif_data_documentation_pages SET search_indexed='true' WHERE dump_id=%s
|
||||
`
|
||||
|
||||
// Down runs a batch of the migration in reverse. This does not need to be implemented
|
||||
// for migrations which are non-destructive. A non-destructive migration only adds data,
|
||||
// and does not transform fields that were read by previous versions of Sourcegraph and
|
||||
// therefore do not need to be undone prior to a downgrade.
|
||||
func (m *apiDocsSearchMigrator) Down(ctx context.Context) error {
|
||||
return nil // our migration is non-destructive, it only populates a new table
|
||||
}
|
||||
@ -0,0 +1,7 @@
|
||||
package migration
|
||||
|
||||
import "context"
|
||||
|
||||
type GitserverClient interface {
|
||||
DefaultBranchContains(ctx context.Context, repositoryID int, commit string) (bool, error)
|
||||
}
|
||||
@ -0,0 +1,7 @@
|
||||
BEGIN;
|
||||
|
||||
-- The OOB migration doesn't add any new tables or columns or anything, so we don't need to do
|
||||
-- anything on down migration. It migrates data from lsif_data_documentation_pages -> the new
|
||||
-- lsif_data_documentation_search_* tables - but it's fine to just leave those.
|
||||
|
||||
COMMIT;
|
||||
@ -0,0 +1,16 @@
|
||||
BEGIN;
|
||||
|
||||
-- Create the OOB migration according to doc/dev/background-information/oobmigrations.md
|
||||
INSERT INTO out_of_band_migrations (id, team, component, description, introduced_version_major, introduced_version_minor, non_destructive)
|
||||
VALUES (
|
||||
12, -- This must be consistent across all Sourcegraph instances
|
||||
'apidocs', -- Team owning migration
|
||||
'codeintel-db.lsif_data_documentation_search', -- Component being migrated
|
||||
'Index API docs for search', -- Description
|
||||
3, -- The next minor release (major version)
|
||||
32, -- The next minor release (minor version)
|
||||
true -- Can be read with previous version without down migration
|
||||
)
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
COMMIT;
|
||||
Loading…
Reference in New Issue
Block a user