mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 18:51:59 +00:00
ranking: Make map and reduce steps atomic and non-continuous (#51214)
This commit is contained in:
parent
df34ca7e71
commit
c42e5d75bd
2
deps.bzl
2
deps.bzl
@ -6334,13 +6334,13 @@ def go_dependencies():
|
||||
name = "com_github_sourcegraph_zoekt",
|
||||
build_file_proto_mode = "disable_global",
|
||||
importpath = "github.com/sourcegraph/zoekt",
|
||||
patch_args = ["-p1"],
|
||||
patches = [
|
||||
"//third_party/com_github_sourcegraph_zoekt:zoekt_archive_index.patch",
|
||||
"//third_party/com_github_sourcegraph_zoekt:zoekt_git_index.patch",
|
||||
"//third_party/com_github_sourcegraph_zoekt:zoekt_webserver.patch",
|
||||
"//third_party/com_github_sourcegraph_zoekt:zoekt_indexserver.patch",
|
||||
],
|
||||
patch_args = ["-p1"],
|
||||
sum = "h1:lCxBFbLdzt0m789CNyYWEqPURhta69aRA9ixPyWilvI=",
|
||||
version = "v0.0.0-20230503105159-f818d968ddad",
|
||||
)
|
||||
|
||||
@ -12,6 +12,16 @@ FLAKES = {
|
||||
"path": "dev/sg/linters",
|
||||
"prefix": "TestLibLogLinter",
|
||||
"reason": "Test was having incomplete data, fails now that constraints are in place"
|
||||
},
|
||||
{
|
||||
"path": "internal/database",
|
||||
"prefix": "TestRepos_List_LastChanged",
|
||||
"reason": "Shifting constraints on table; ranking is experimental"
|
||||
},
|
||||
{
|
||||
"path": "enterprise/internal/codeintel/ranking/internal/store",
|
||||
"prefix": "Test",
|
||||
"reason": "Shifting constraints on table; ranking is experimental"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@ -24,6 +24,7 @@ func (j *rankingJob) Description() string {
|
||||
func (j *rankingJob) Config() []env.Config {
|
||||
return []env.Config{
|
||||
ranking.ExporterConfigInst,
|
||||
ranking.CoordinatorConfigInst,
|
||||
ranking.MapperConfigInst,
|
||||
ranking.ReducerConfigInst,
|
||||
ranking.JanitorConfigInst,
|
||||
@ -38,6 +39,7 @@ func (j *rankingJob) Routines(_ context.Context, observationCtx *observation.Con
|
||||
|
||||
routines := []goroutine.BackgroundRoutine{}
|
||||
routines = append(routines, ranking.NewSymbolExporter(observationCtx, services.RankingService))
|
||||
routines = append(routines, ranking.NewCoordinator(observationCtx, services.RankingService))
|
||||
routines = append(routines, ranking.NewMapper(observationCtx, services.RankingService)...)
|
||||
routines = append(routines, ranking.NewReducer(observationCtx, services.RankingService))
|
||||
routines = append(routines, ranking.NewSymbolJanitor(observationCtx, services.RankingService)...)
|
||||
|
||||
@ -17,8 +17,6 @@ type cleanupJobsConfig struct {
|
||||
FailedIndexMaxAge time.Duration
|
||||
}
|
||||
|
||||
var ConfigCleanupInst = &cleanupJobsConfig{}
|
||||
|
||||
func (c *cleanupJobsConfig) Load() {
|
||||
minimumTimeSinceLastCheckName := env.ChooseFallbackVariableName("CODEINTEL_AUTOINDEXING_MINIMUM_TIME_SINCE_LAST_CHECK", "PRECISE_CODE_INTEL_COMMIT_RESOLVER_MINIMUM_TIME_SINCE_LAST_CHECK")
|
||||
commitResolverBatchSizeName := env.ChooseFallbackVariableName("CODEINTEL_AUTOINDEXING_COMMIT_RESOLVER_BATCH_SIZE", "PRECISE_CODE_INTEL_COMMIT_RESOLVER_BATCH_SIZE")
|
||||
|
||||
@ -11,6 +11,7 @@ go_library(
|
||||
visibility = ["//enterprise:__subpackages__"],
|
||||
deps = [
|
||||
"//enterprise/internal/codeintel/ranking/internal/background",
|
||||
"//enterprise/internal/codeintel/ranking/internal/background/coordinator",
|
||||
"//enterprise/internal/codeintel/ranking/internal/background/exporter",
|
||||
"//enterprise/internal/codeintel/ranking/internal/background/janitor",
|
||||
"//enterprise/internal/codeintel/ranking/internal/background/mapper",
|
||||
|
||||
@ -2,6 +2,7 @@ package ranking
|
||||
|
||||
import (
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/background"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/background/coordinator"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/background/exporter"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/background/janitor"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/background/mapper"
|
||||
@ -29,10 +30,11 @@ func NewService(
|
||||
}
|
||||
|
||||
var (
|
||||
ExporterConfigInst = &exporter.Config{}
|
||||
MapperConfigInst = &mapper.Config{}
|
||||
ReducerConfigInst = &reducer.Config{}
|
||||
JanitorConfigInst = &janitor.Config{}
|
||||
ExporterConfigInst = &exporter.Config{}
|
||||
CoordinatorConfigInst = &coordinator.Config{}
|
||||
MapperConfigInst = &mapper.Config{}
|
||||
ReducerConfigInst = &reducer.Config{}
|
||||
JanitorConfigInst = &janitor.Config{}
|
||||
)
|
||||
|
||||
func NewSymbolExporter(observationCtx *observation.Context, rankingService *Service) goroutine.BackgroundRoutine {
|
||||
@ -44,6 +46,14 @@ func NewSymbolExporter(observationCtx *observation.Context, rankingService *Serv
|
||||
)
|
||||
}
|
||||
|
||||
func NewCoordinator(observationCtx *observation.Context, rankingService *Service) goroutine.BackgroundRoutine {
|
||||
return background.NewCoordinator(
|
||||
scopedContext("coordinator", observationCtx),
|
||||
rankingService.store,
|
||||
CoordinatorConfigInst,
|
||||
)
|
||||
}
|
||||
|
||||
func NewMapper(observationCtx *observation.Context, rankingService *Service) []goroutine.BackgroundRoutine {
|
||||
return background.NewMapper(
|
||||
scopedContext("mapper", observationCtx),
|
||||
|
||||
@ -6,6 +6,7 @@ go_library(
|
||||
importpath = "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/background",
|
||||
visibility = ["//enterprise:__subpackages__"],
|
||||
deps = [
|
||||
"//enterprise/internal/codeintel/ranking/internal/background/coordinator",
|
||||
"//enterprise/internal/codeintel/ranking/internal/background/exporter",
|
||||
"//enterprise/internal/codeintel/ranking/internal/background/janitor",
|
||||
"//enterprise/internal/codeintel/ranking/internal/background/mapper",
|
||||
|
||||
19
enterprise/internal/codeintel/ranking/internal/background/coordinator/BUILD.bazel
generated
Normal file
19
enterprise/internal/codeintel/ranking/internal/background/coordinator/BUILD.bazel
generated
Normal file
@ -0,0 +1,19 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "coordinator",
|
||||
srcs = [
|
||||
"config.go",
|
||||
"job.go",
|
||||
],
|
||||
importpath = "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/background/coordinator",
|
||||
visibility = ["//enterprise:__subpackages__"],
|
||||
deps = [
|
||||
"//enterprise/internal/codeintel/ranking/internal/shared",
|
||||
"//enterprise/internal/codeintel/ranking/internal/store",
|
||||
"//internal/conf",
|
||||
"//internal/env",
|
||||
"//internal/goroutine",
|
||||
"//internal/observation",
|
||||
],
|
||||
)
|
||||
@ -0,0 +1,17 @@
|
||||
package coordinator
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/env"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
env.BaseConfig
|
||||
|
||||
Interval time.Duration
|
||||
}
|
||||
|
||||
func (c *Config) Load() {
|
||||
c.Interval = c.GetInterval("CODEINTEL_RANKING_COORDINATOR_INTERVAL", "1s", "How frequently to run the ranking coordinator.")
|
||||
}
|
||||
@ -0,0 +1,34 @@
|
||||
package coordinator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
rankingshared "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/shared"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/store"
|
||||
"github.com/sourcegraph/sourcegraph/internal/conf"
|
||||
"github.com/sourcegraph/sourcegraph/internal/goroutine"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
)
|
||||
|
||||
func NewCoordinator(
|
||||
observationCtx *observation.Context,
|
||||
store store.Store,
|
||||
config *Config,
|
||||
) goroutine.BackgroundRoutine {
|
||||
name := "codeintel.ranking.file-reference-count-coordinator"
|
||||
|
||||
return goroutine.NewPeriodicGoroutine(
|
||||
context.Background(),
|
||||
name,
|
||||
"Coordinates the state of the file reference count map and reduce jobs.",
|
||||
config.Interval,
|
||||
goroutine.HandlerFunc(func(ctx context.Context) error {
|
||||
if enabled := conf.CodeIntelRankingDocumentReferenceCountsEnabled(); !enabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
return store.Coordinate(ctx, rankingshared.DerivativeGraphKeyFromTime(time.Now()))
|
||||
}),
|
||||
)
|
||||
}
|
||||
@ -14,8 +14,6 @@ type Config struct {
|
||||
WriteBatchSize int
|
||||
}
|
||||
|
||||
var ConfigInst = &Config{}
|
||||
|
||||
func (c *Config) Load() {
|
||||
c.Interval = c.GetInterval("CODEINTEL_RANKING_SYMBOL_EXPORTER_INTERVAL", "1s", "How frequently to serialize a batch of the code intel graph for ranking.")
|
||||
c.ReadBatchSize = c.GetInt("CODEINTEL_RANKING_SYMBOL_EXPORTER_READ_BATCH_SIZE", "16", "How many uploads to process at once.")
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package background
|
||||
|
||||
import (
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/background/coordinator"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/background/exporter"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/background/janitor"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/background/mapper"
|
||||
@ -15,6 +16,10 @@ func NewSymbolExporter(observationCtx *observation.Context, store store.Store, l
|
||||
return exporter.NewSymbolExporter(observationCtx, store, lsifstore, config)
|
||||
}
|
||||
|
||||
func NewCoordinator(observationCtx *observation.Context, store store.Store, config *coordinator.Config) goroutine.BackgroundRoutine {
|
||||
return coordinator.NewCoordinator(observationCtx, store, config)
|
||||
}
|
||||
|
||||
func NewMapper(observationCtx *observation.Context, store store.Store, config *mapper.Config) []goroutine.BackgroundRoutine {
|
||||
return []goroutine.BackgroundRoutine{
|
||||
mapper.NewMapper(observationCtx, store, config),
|
||||
@ -31,6 +36,9 @@ func NewSymbolJanitor(observationCtx *observation.Context, store store.Store, co
|
||||
janitor.NewSymbolDefinitionsJanitor(observationCtx, store, config),
|
||||
janitor.NewSymbolReferencesJanitor(observationCtx, store, config),
|
||||
janitor.NewSymbolInitialPathsJanitor(observationCtx, store, config),
|
||||
janitor.NewDeletedSymbolDefinitionsJanitor(observationCtx, store, config),
|
||||
janitor.NewDeletedSymbolReferencesJanitor(observationCtx, store, config),
|
||||
janitor.NewDeletedSymbolInitialPathsJanitor(observationCtx, store, config),
|
||||
janitor.NewAbandonedDefinitionsJanitor(observationCtx, store, config),
|
||||
janitor.NewAbandonedReferencesJanitor(observationCtx, store, config),
|
||||
janitor.NewAbandonedInitialCountsJanitor(observationCtx, store, config),
|
||||
|
||||
@ -12,8 +12,6 @@ type Config struct {
|
||||
Interval time.Duration
|
||||
}
|
||||
|
||||
var ConfigInst = &Config{}
|
||||
|
||||
func (c *Config) Load() {
|
||||
c.Interval = c.GetInterval("CODEINTEL_RANKING_JANITOR_INTERVAL", "1s", "How frequently to run the ranking janitor.")
|
||||
}
|
||||
|
||||
@ -23,11 +23,11 @@ func NewSymbolDefinitionsJanitor(
|
||||
|
||||
return background.NewJanitorJob(context.Background(), background.JanitorOptions{
|
||||
Name: name,
|
||||
Description: "Removes stale data from the ranking definitions table.",
|
||||
Description: "Soft-deletes stale data from the ranking definitions table.",
|
||||
Interval: config.Interval,
|
||||
Metrics: background.NewJanitorMetrics(observationCtx, name, recordTypeName),
|
||||
CleanupFunc: func(ctx context.Context) (numRecordsScanned int, numRecordsAltered int, err error) {
|
||||
return vacuumStaleDefinitions(ctx, store)
|
||||
return softDeleteStaleDefinitions(ctx, store)
|
||||
},
|
||||
})
|
||||
}
|
||||
@ -41,11 +41,11 @@ func NewSymbolReferencesJanitor(
|
||||
|
||||
return background.NewJanitorJob(context.Background(), background.JanitorOptions{
|
||||
Name: name,
|
||||
Description: "Removes stale data from the ranking references table.",
|
||||
Description: "Soft-deletes stale data from the ranking references table.",
|
||||
Interval: config.Interval,
|
||||
Metrics: background.NewJanitorMetrics(observationCtx, name, recordTypeName),
|
||||
CleanupFunc: func(ctx context.Context) (numRecordsScanned int, numRecordsAltered int, err error) {
|
||||
return vacuumStaleReferences(ctx, store)
|
||||
return softDeleteStaleReferences(ctx, store)
|
||||
},
|
||||
})
|
||||
}
|
||||
@ -59,11 +59,68 @@ func NewSymbolInitialPathsJanitor(
|
||||
|
||||
return background.NewJanitorJob(context.Background(), background.JanitorOptions{
|
||||
Name: name,
|
||||
Description: "Removes stale data from the ranking initial paths table.",
|
||||
Description: "Soft-deletes stale data from the ranking initial paths table.",
|
||||
Interval: config.Interval,
|
||||
Metrics: background.NewJanitorMetrics(observationCtx, name, recordTypeName),
|
||||
CleanupFunc: func(ctx context.Context) (numRecordsScanned int, numRecordsAltered int, err error) {
|
||||
return vacuumStaleInitialPaths(ctx, store)
|
||||
return softDeleteStaleInitialPaths(ctx, store)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func NewDeletedSymbolDefinitionsJanitor(
|
||||
observationCtx *observation.Context,
|
||||
store store.Store,
|
||||
config *Config,
|
||||
) goroutine.BackgroundRoutine {
|
||||
name := "codeintel.ranking.deleted-symbol-definitions-janitor"
|
||||
|
||||
return background.NewJanitorJob(context.Background(), background.JanitorOptions{
|
||||
Name: name,
|
||||
Description: "Removes soft-deleted data from the ranking definitions table no longer being read by a mapper process.",
|
||||
Interval: config.Interval,
|
||||
Metrics: background.NewJanitorMetrics(observationCtx, name, recordTypeName),
|
||||
CleanupFunc: func(ctx context.Context) (numRecordsScanned int, numRecordsAltered int, err error) {
|
||||
numDeleted, err := vacuumDeletedDefinitions(ctx, store)
|
||||
return numDeleted, numDeleted, err
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func NewDeletedSymbolReferencesJanitor(
|
||||
observationCtx *observation.Context,
|
||||
store store.Store,
|
||||
config *Config,
|
||||
) goroutine.BackgroundRoutine {
|
||||
name := "codeintel.ranking.deleted-symbol-references-janitor"
|
||||
|
||||
return background.NewJanitorJob(context.Background(), background.JanitorOptions{
|
||||
Name: name,
|
||||
Description: "Removes soft-deleted data from the ranking references table no longer being read by a mapper process.",
|
||||
Interval: config.Interval,
|
||||
Metrics: background.NewJanitorMetrics(observationCtx, name, recordTypeName),
|
||||
CleanupFunc: func(ctx context.Context) (numRecordsScanned int, numRecordsAltered int, err error) {
|
||||
numDeleted, err := vacuumDeletedReferences(ctx, store)
|
||||
return numDeleted, numDeleted, err
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func NewDeletedSymbolInitialPathsJanitor(
|
||||
observationCtx *observation.Context,
|
||||
store store.Store,
|
||||
config *Config,
|
||||
) goroutine.BackgroundRoutine {
|
||||
name := "codeintel.ranking.deleted-symbol-initial-paths-janitor"
|
||||
|
||||
return background.NewJanitorJob(context.Background(), background.JanitorOptions{
|
||||
Name: name,
|
||||
Description: "Removes soft-deleted data from the ranking initial paths table no longer being read by a seed mapper process.",
|
||||
Interval: config.Interval,
|
||||
Metrics: background.NewJanitorMetrics(observationCtx, name, recordTypeName),
|
||||
CleanupFunc: func(ctx context.Context) (numRecordsScanned int, numRecordsAltered int, err error) {
|
||||
numDeleted, err := vacuumDeletedInitialPaths(ctx, store)
|
||||
return numDeleted, numDeleted, err
|
||||
},
|
||||
})
|
||||
}
|
||||
@ -162,33 +219,57 @@ func NewRankJanitor(
|
||||
})
|
||||
}
|
||||
|
||||
func vacuumStaleDefinitions(ctx context.Context, store store.Store) (int, int, error) {
|
||||
func softDeleteStaleDefinitions(ctx context.Context, store store.Store) (int, int, error) {
|
||||
if enabled := conf.CodeIntelRankingDocumentReferenceCountsEnabled(); !enabled {
|
||||
return 0, 0, nil
|
||||
}
|
||||
|
||||
numDefinitionRecordsScanned, numDefinitionRecordsRemoved, err := store.VacuumStaleDefinitions(ctx, rankingshared.GraphKey())
|
||||
numDefinitionRecordsScanned, numDefinitionRecordsRemoved, err := store.SoftDeleteStaleDefinitions(ctx, rankingshared.GraphKey())
|
||||
return numDefinitionRecordsScanned, numDefinitionRecordsRemoved, err
|
||||
}
|
||||
|
||||
func vacuumStaleReferences(ctx context.Context, store store.Store) (int, int, error) {
|
||||
func softDeleteStaleReferences(ctx context.Context, store store.Store) (int, int, error) {
|
||||
if enabled := conf.CodeIntelRankingDocumentReferenceCountsEnabled(); !enabled {
|
||||
return 0, 0, nil
|
||||
}
|
||||
|
||||
numReferenceRecordsScanned, numReferenceRecordsRemoved, err := store.VacuumStaleReferences(ctx, rankingshared.GraphKey())
|
||||
numReferenceRecordsScanned, numReferenceRecordsRemoved, err := store.SoftDeleteStaleReferences(ctx, rankingshared.GraphKey())
|
||||
return numReferenceRecordsScanned, numReferenceRecordsRemoved, err
|
||||
}
|
||||
|
||||
func vacuumStaleInitialPaths(ctx context.Context, store store.Store) (int, int, error) {
|
||||
func softDeleteStaleInitialPaths(ctx context.Context, store store.Store) (int, int, error) {
|
||||
if enabled := conf.CodeIntelRankingDocumentReferenceCountsEnabled(); !enabled {
|
||||
return 0, 0, nil
|
||||
}
|
||||
|
||||
numPathRecordsScanned, numStalePathRecordsDeleted, err := store.VacuumStaleInitialPaths(ctx, rankingshared.GraphKey())
|
||||
numPathRecordsScanned, numStalePathRecordsDeleted, err := store.SoftDeleteStaleInitialPaths(ctx, rankingshared.GraphKey())
|
||||
return numPathRecordsScanned, numStalePathRecordsDeleted, err
|
||||
}
|
||||
|
||||
func vacuumDeletedDefinitions(ctx context.Context, store store.Store) (int, error) {
|
||||
if enabled := conf.CodeIntelRankingDocumentReferenceCountsEnabled(); !enabled {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return store.VacuumDeletedDefinitions(ctx, rankingshared.DerivativeGraphKeyFromTime(time.Now()))
|
||||
}
|
||||
|
||||
func vacuumDeletedReferences(ctx context.Context, store store.Store) (int, error) {
|
||||
if enabled := conf.CodeIntelRankingDocumentReferenceCountsEnabled(); !enabled {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return store.VacuumDeletedReferences(ctx, rankingshared.DerivativeGraphKeyFromTime(time.Now()))
|
||||
}
|
||||
|
||||
func vacuumDeletedInitialPaths(ctx context.Context, store store.Store) (int, error) {
|
||||
if enabled := conf.CodeIntelRankingDocumentReferenceCountsEnabled(); !enabled {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return store.VacuumDeletedInitialPaths(ctx, rankingshared.DerivativeGraphKeyFromTime(time.Now()))
|
||||
}
|
||||
|
||||
const vacuumBatchSize = 100 // TODO - configure via envvar
|
||||
|
||||
func vacuumAbandonedDefinitions(ctx context.Context, store store.Store) (int, error) {
|
||||
|
||||
@ -13,8 +13,6 @@ type Config struct {
|
||||
BatchSize int
|
||||
}
|
||||
|
||||
var ConfigInst = &Config{}
|
||||
|
||||
func (c *Config) Load() {
|
||||
c.Interval = c.GetInterval("CODEINTEL_RANKING_MAPPER_INTERVAL", "1s", "How frequently to run the ranking mapper.")
|
||||
c.BatchSize = c.GetInt("CODEINTEL_RANKING_MAPPER_BATCH_SIZE", "100", "How many definitions and references to map at once.")
|
||||
|
||||
@ -13,8 +13,6 @@ type Config struct {
|
||||
BatchSize int
|
||||
}
|
||||
|
||||
var ConfigInst = &Config{}
|
||||
|
||||
func (c *Config) Load() {
|
||||
c.Interval = c.GetInterval("CODEINTEL_RANKING_REDUCER_INTERVAL", "1s", "How frequently to run the ranking reducer.")
|
||||
c.BatchSize = c.GetInt("CODEINTEL_RANKING_REDUCER_BATCH_SIZE", "100", "How many path counts to reduce at once.")
|
||||
|
||||
@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
|
||||
go_library(
|
||||
name = "store",
|
||||
srcs = [
|
||||
"coordinator.go",
|
||||
"definitions.go",
|
||||
"mapper.go",
|
||||
"observability.go",
|
||||
|
||||
@ -0,0 +1,65 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/keegancsmith/sqlf"
|
||||
|
||||
rankingshared "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/shared"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
func (s *store) Coordinate(
|
||||
ctx context.Context,
|
||||
derivativeGraphKey string,
|
||||
) (err error) {
|
||||
ctx, _, endObservation := s.operations.coordinate.With(ctx, &err, observation.Args{})
|
||||
defer endObservation(1, observation.Args{})
|
||||
|
||||
graphKey, ok := rankingshared.GraphKeyFromDerivativeGraphKey(derivativeGraphKey)
|
||||
if !ok {
|
||||
return errors.Newf("unexpected derivative graph key %q", derivativeGraphKey)
|
||||
}
|
||||
|
||||
if err := s.db.Exec(ctx, sqlf.Sprintf(
|
||||
coordinateStartMapperQuery,
|
||||
derivativeGraphKey,
|
||||
graphKey,
|
||||
graphKey,
|
||||
graphKey,
|
||||
)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.db.Exec(ctx, sqlf.Sprintf(
|
||||
coordinateStartReducerQuery,
|
||||
derivativeGraphKey,
|
||||
)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
const coordinateStartMapperQuery = `
|
||||
INSERT INTO codeintel_ranking_progress(graph_key, max_definition_id, max_reference_id, max_path_id, mappers_started_at)
|
||||
VALUES (
|
||||
%s,
|
||||
COALESCE((SELECT MAX(id) FROM codeintel_ranking_definitions WHERE graph_key = %s), 0),
|
||||
COALESCE((SELECT MAX(id) FROM codeintel_ranking_references WHERE graph_key = %s), 0),
|
||||
COALESCE((SELECT MAX(id) FROM codeintel_initial_path_ranks WHERE graph_key = %s), 0),
|
||||
NOW()
|
||||
)
|
||||
ON CONFLICT DO NOTHING
|
||||
`
|
||||
|
||||
const coordinateStartReducerQuery = `
|
||||
UPDATE codeintel_ranking_progress
|
||||
SET reducer_started_at = NOW()
|
||||
WHERE
|
||||
graph_key = %s AND
|
||||
mapper_completed_at IS NOT NULL AND
|
||||
seed_mapper_completed_at IS NOT NULL AND
|
||||
reducer_started_at IS NULL
|
||||
`
|
||||
@ -7,10 +7,12 @@ import (
|
||||
"github.com/keegancsmith/sqlf"
|
||||
otlog "github.com/opentracing/opentracing-go/log"
|
||||
|
||||
rankingshared "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/shared"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/shared"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/batch"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
func (s *store) InsertDefinitionsForRanking(
|
||||
@ -78,16 +80,16 @@ deleted_definitions AS (
|
||||
SELECT COUNT(*) FROM deleted_definitions
|
||||
`
|
||||
|
||||
func (s *store) VacuumStaleDefinitions(ctx context.Context, graphKey string) (
|
||||
func (s *store) SoftDeleteStaleDefinitions(ctx context.Context, graphKey string) (
|
||||
numDefinitionRecordsScanned int,
|
||||
numStaleDefinitionRecordsDeleted int,
|
||||
err error,
|
||||
) {
|
||||
ctx, _, endObservation := s.operations.vacuumStaleDefinitions.With(ctx, &err, observation.Args{LogFields: []otlog.Field{}})
|
||||
ctx, _, endObservation := s.operations.softDeleteStaleDefinitions.With(ctx, &err, observation.Args{LogFields: []otlog.Field{}})
|
||||
defer endObservation(1, observation.Args{})
|
||||
|
||||
rows, err := s.db.Query(ctx, sqlf.Sprintf(
|
||||
vacuumStaleDefinitionsQuery,
|
||||
softDeleteStaleDefinitionsQuery,
|
||||
graphKey, int(threshold/time.Hour), vacuumBatchSize,
|
||||
))
|
||||
if err != nil {
|
||||
@ -107,17 +109,16 @@ func (s *store) VacuumStaleDefinitions(ctx context.Context, graphKey string) (
|
||||
return numDefinitionRecordsScanned, numStaleDefinitionRecordsDeleted, nil
|
||||
}
|
||||
|
||||
const vacuumStaleDefinitionsQuery = `
|
||||
const softDeleteStaleDefinitionsQuery = `
|
||||
WITH
|
||||
locked_definitions AS (
|
||||
SELECT
|
||||
rd.id,
|
||||
u.repository_id,
|
||||
rd.upload_id
|
||||
FROM codeintel_ranking_definitions rd
|
||||
JOIN lsif_uploads u ON u.id = rd.upload_id
|
||||
WHERE
|
||||
rd.graph_key = %s AND
|
||||
rd.deleted_at IS NULL AND
|
||||
(rd.last_scanned_at IS NULL OR NOW() - rd.last_scanned_at >= %s * '1 hour'::interval)
|
||||
ORDER BY rd.last_scanned_at ASC NULLS FIRST, rd.id
|
||||
FOR UPDATE SKIP LOCKED
|
||||
@ -128,7 +129,8 @@ candidates AS (
|
||||
ld.id,
|
||||
uvt.is_default_branch IS TRUE AS safe
|
||||
FROM locked_definitions ld
|
||||
LEFT JOIN lsif_uploads_visible_at_tip uvt ON uvt.repository_id = ld.repository_id AND uvt.upload_id = ld.upload_id
|
||||
LEFT JOIN lsif_uploads u ON u.id = ld.upload_id
|
||||
LEFT JOIN lsif_uploads_visible_at_tip uvt ON uvt.repository_id = u.repository_id AND uvt.upload_id = ld.upload_id
|
||||
),
|
||||
updated_definitions AS (
|
||||
UPDATE codeintel_ranking_definitions
|
||||
@ -136,7 +138,8 @@ updated_definitions AS (
|
||||
WHERE id IN (SELECT c.id FROM candidates c WHERE c.safe)
|
||||
),
|
||||
deleted_definitions AS (
|
||||
DELETE FROM codeintel_ranking_definitions
|
||||
UPDATE codeintel_ranking_definitions
|
||||
SET deleted_at = NOW()
|
||||
WHERE id IN (SELECT c.id FROM candidates c WHERE NOT c.safe)
|
||||
RETURNING 1
|
||||
)
|
||||
@ -144,3 +147,51 @@ SELECT
|
||||
(SELECT COUNT(*) FROM candidates),
|
||||
(SELECT COUNT(*) FROM deleted_definitions)
|
||||
`
|
||||
|
||||
func (s *store) VacuumDeletedDefinitions(ctx context.Context, derivativeGraphKey string) (
|
||||
numDefinitionRecordsDeleted int,
|
||||
err error,
|
||||
) {
|
||||
ctx, _, endObservation := s.operations.vacuumDeletedDefinitions.With(ctx, &err, observation.Args{LogFields: []otlog.Field{}})
|
||||
defer endObservation(1, observation.Args{})
|
||||
|
||||
graphKey, ok := rankingshared.GraphKeyFromDerivativeGraphKey(derivativeGraphKey)
|
||||
if !ok {
|
||||
return 0, errors.Newf("unexpected derivative graph key %q", derivativeGraphKey)
|
||||
}
|
||||
|
||||
count, _, err := basestore.ScanFirstInt(s.db.Query(ctx, sqlf.Sprintf(
|
||||
vacuumDeletedDefinitionsQuery,
|
||||
graphKey,
|
||||
derivativeGraphKey,
|
||||
vacuumBatchSize,
|
||||
)))
|
||||
return count, err
|
||||
}
|
||||
|
||||
const vacuumDeletedDefinitionsQuery = `
|
||||
WITH
|
||||
locked_definitions AS (
|
||||
SELECT rd.id
|
||||
FROM codeintel_ranking_definitions rd
|
||||
WHERE
|
||||
rd.graph_key = %s AND
|
||||
rd.deleted_at IS NOT NULL AND
|
||||
NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM codeintel_ranking_progress crp
|
||||
WHERE
|
||||
crp.graph_key = %s AND
|
||||
crp.mapper_completed_at IS NULL
|
||||
)
|
||||
ORDER BY rd.id
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT %s
|
||||
),
|
||||
deleted_definitions AS (
|
||||
DELETE FROM codeintel_ranking_definitions
|
||||
WHERE id IN (SELECT id FROM locked_definitions)
|
||||
RETURNING 1
|
||||
)
|
||||
SELECT COUNT(*) FROM deleted_definitions
|
||||
`
|
||||
|
||||
@ -9,6 +9,7 @@ import (
|
||||
"github.com/keegancsmith/sqlf"
|
||||
"github.com/sourcegraph/log/logtest"
|
||||
|
||||
rankingshared "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/shared"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/shared"
|
||||
uploadsshared "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/uploads/shared"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database"
|
||||
@ -120,7 +121,7 @@ func TestVacuumAbandonedDefinitions(t *testing.T) {
|
||||
assertCounts(1*30 + 10)
|
||||
}
|
||||
|
||||
func TestVacuumStaleDefinitionsAndReferences(t *testing.T) {
|
||||
func TestSoftDeleteStaleDefinitionsAndReferences(t *testing.T) {
|
||||
logger := logtest.Scoped(t)
|
||||
ctx := context.Background()
|
||||
db := database.NewDB(logger, dbtest.NewDB(logger, t))
|
||||
@ -197,7 +198,7 @@ func TestVacuumStaleDefinitionsAndReferences(t *testing.T) {
|
||||
insertVisibleAtTip(t, db, 50, 2)
|
||||
|
||||
// remove definitions for non-visible uploads
|
||||
_, numStaleDefinitionRecordsDeleted, err := store.VacuumStaleDefinitions(ctx, mockRankingGraphKey)
|
||||
_, numStaleDefinitionRecordsDeleted, err := store.SoftDeleteStaleDefinitions(ctx, mockRankingGraphKey)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error vacuuming stale definitions: %s", err)
|
||||
}
|
||||
@ -206,7 +207,7 @@ func TestVacuumStaleDefinitionsAndReferences(t *testing.T) {
|
||||
}
|
||||
|
||||
// remove references for non-visible uploads
|
||||
if _, _, err := store.VacuumStaleReferences(ctx, mockRankingGraphKey); err != nil {
|
||||
if _, _, err := store.SoftDeleteStaleReferences(ctx, mockRankingGraphKey); err != nil {
|
||||
t.Fatalf("unexpected error vacuuming stale references: %s", err)
|
||||
}
|
||||
|
||||
@ -214,6 +215,24 @@ func TestVacuumStaleDefinitionsAndReferences(t *testing.T) {
|
||||
assertCounts(2, 3)
|
||||
}
|
||||
|
||||
func TestVacuumDeletedDefinitions(t *testing.T) {
|
||||
logger := logtest.Scoped(t)
|
||||
ctx := context.Background()
|
||||
db := database.NewDB(logger, dbtest.NewDB(logger, t))
|
||||
store := New(&observation.TestContext, db)
|
||||
|
||||
key := rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123)
|
||||
|
||||
// TODO - setup
|
||||
|
||||
_, err := store.VacuumDeletedDefinitions(ctx, key)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error vacuuming deleted definitions: %s", err)
|
||||
}
|
||||
|
||||
// TODO - assertions
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
|
||||
@ -224,7 +243,7 @@ func getRankingDefinitions(
|
||||
graphKey string,
|
||||
) (_ []shared.RankingDefinitions, err error) {
|
||||
query := fmt.Sprintf(
|
||||
`SELECT upload_id, symbol_name, document_path FROM codeintel_ranking_definitions WHERE graph_key = '%s'`,
|
||||
`SELECT upload_id, symbol_name, document_path FROM codeintel_ranking_definitions WHERE graph_key = '%s' AND deleted_at IS NULL`,
|
||||
graphKey,
|
||||
)
|
||||
rows, err := db.QueryContext(ctx, query)
|
||||
|
||||
@ -31,12 +31,14 @@ func (s *store) InsertPathCountInputs(
|
||||
|
||||
rows, err := s.db.Query(ctx, sqlf.Sprintf(
|
||||
insertPathCountInputsQuery,
|
||||
derivativeGraphKey,
|
||||
graphKey,
|
||||
derivativeGraphKey,
|
||||
batchSize,
|
||||
derivativeGraphKey,
|
||||
derivativeGraphKey,
|
||||
graphKey,
|
||||
graphKey,
|
||||
derivativeGraphKey,
|
||||
))
|
||||
if err != nil {
|
||||
@ -58,14 +60,38 @@ func (s *store) InsertPathCountInputs(
|
||||
|
||||
const insertPathCountInputsQuery = `
|
||||
WITH
|
||||
progress AS (
|
||||
SELECT
|
||||
crp.id,
|
||||
crp.max_definition_id,
|
||||
crp.max_reference_id,
|
||||
crp.mappers_started_at as started_at
|
||||
FROM codeintel_ranking_progress crp
|
||||
WHERE
|
||||
crp.graph_key = %s AND
|
||||
crp.mapper_completed_at IS NULL
|
||||
),
|
||||
refs AS (
|
||||
SELECT
|
||||
rr.id,
|
||||
rr.upload_id,
|
||||
rr.symbol_names
|
||||
FROM codeintel_ranking_references rr
|
||||
JOIN progress p ON TRUE
|
||||
WHERE
|
||||
rr.graph_key = %s AND
|
||||
|
||||
-- Note that we do a check in the processable_symbols CTE below that will
|
||||
-- ensure that we don't process a record AND the one it shadows. We end up
|
||||
-- taking the lowest ID and no-oping any others that happened to fall into
|
||||
-- the window.
|
||||
|
||||
-- Ensure that the record is within the bounds where it would be visible
|
||||
-- to the current "snapshot" defined by the ranking computation state row.
|
||||
rr.id <= p.max_reference_id AND
|
||||
(rr.deleted_at IS NULL OR rr.deleted_at > p.started_at) AND
|
||||
|
||||
-- Ensure the record isn't already processed
|
||||
NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM codeintel_ranking_references_processed rrp
|
||||
@ -104,6 +130,7 @@ processable_symbols AS (
|
||||
u.indexer = u2.indexer AND
|
||||
u.id != u2.id
|
||||
) AND
|
||||
|
||||
-- For multiple references for the same repository/root/indexer in THIS batch, we want to
|
||||
-- process the one associated with the most recently processed upload record. This should
|
||||
-- maximize fresh results.
|
||||
@ -127,12 +154,35 @@ referenced_definitions AS (
|
||||
SELECT
|
||||
u.repository_id,
|
||||
rd.document_path,
|
||||
rd.graph_key,
|
||||
COUNT(*) AS count
|
||||
FROM codeintel_ranking_definitions rd
|
||||
JOIN referenced_symbols rs ON rs.symbol_name = rd.symbol_name
|
||||
JOIN lsif_uploads u ON u.id = rd.upload_id
|
||||
WHERE rd.graph_key = %s
|
||||
JOIN progress p ON TRUE
|
||||
WHERE
|
||||
rd.graph_key = %s AND
|
||||
|
||||
-- Ensure that the record is within the bounds where it would be visible
|
||||
-- to the current "snapshot" defined by the ranking computation state row.
|
||||
rd.id <= p.max_definition_id AND
|
||||
(rd.deleted_at IS NULL OR rd.deleted_at > p.started_at) AND
|
||||
|
||||
-- If there are multiple uploads in the same repository/root/indexer, only
|
||||
-- consider definition records attached to the one with the highest id. This
|
||||
-- should prevent over-counting definitions when there are multiple uploads
|
||||
-- in the exported set, but the shadowed (newly non-visible) uploads have not
|
||||
-- yet been removed by the janitor processes.
|
||||
NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM lsif_uploads u2
|
||||
JOIN codeintel_ranking_definitions rd2 ON rd2.upload_id = u2.id
|
||||
WHERE
|
||||
rd2.graph_key = %s AND
|
||||
u.repository_id = u2.repository_id AND
|
||||
u.root = u2.root AND
|
||||
u.indexer = u2.indexer AND
|
||||
u.id > u2.id
|
||||
)
|
||||
GROUP BY u.repository_id, rd.document_path, rd.graph_key
|
||||
),
|
||||
ins AS (
|
||||
@ -145,6 +195,13 @@ ins AS (
|
||||
FROM referenced_definitions rx
|
||||
GROUP BY rx.repository_id, rx.document_path
|
||||
RETURNING 1
|
||||
),
|
||||
set_progress AS (
|
||||
UPDATE codeintel_ranking_progress
|
||||
SET mapper_completed_at = NOW()
|
||||
WHERE
|
||||
id IN (SELECT id FROM progress) AND
|
||||
NOT EXISTS (SELECT 1 FROM refs)
|
||||
)
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM locked_refs),
|
||||
@ -170,6 +227,7 @@ func (s *store) InsertInitialPathCounts(
|
||||
|
||||
rows, err := s.db.Query(ctx, sqlf.Sprintf(
|
||||
insertInitialPathCountsInputsQuery,
|
||||
derivativeGraphKey,
|
||||
graphKey,
|
||||
derivativeGraphKey,
|
||||
batchSize,
|
||||
@ -195,6 +253,16 @@ func (s *store) InsertInitialPathCounts(
|
||||
|
||||
const insertInitialPathCountsInputsQuery = `
|
||||
WITH
|
||||
progress AS (
|
||||
SELECT
|
||||
crp.id,
|
||||
crp.max_path_id,
|
||||
crp.mappers_started_at as started_at
|
||||
FROM codeintel_ranking_progress crp
|
||||
WHERE
|
||||
crp.graph_key = %s AND
|
||||
crp.seed_mapper_completed_at IS NULL
|
||||
),
|
||||
unprocessed_path_counts AS (
|
||||
SELECT
|
||||
ipr.id,
|
||||
@ -205,8 +273,20 @@ unprocessed_path_counts AS (
|
||||
ELSE ipr.document_paths
|
||||
END AS document_paths
|
||||
FROM codeintel_initial_path_ranks ipr
|
||||
JOIN progress p ON TRUE
|
||||
WHERE
|
||||
ipr.graph_key = %s AND
|
||||
|
||||
-- Note that we don't do any special precautions here to de-duplicate the
|
||||
-- zero-rank path data, as duplicate paths add a zero count (no-op), and
|
||||
-- extra paths will no be resolvable in gitserver.
|
||||
|
||||
-- Ensure that the record is within the bounds where it would be visible
|
||||
-- to the current "snapshot" defined by the ranking computation state row.
|
||||
ipr.id <= p.max_path_id AND
|
||||
(ipr.deleted_at IS NULL OR ipr.deleted_at > p.started_at) AND
|
||||
|
||||
-- Ensure the record isn't already processed
|
||||
NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM codeintel_initial_path_ranks_processed prp
|
||||
@ -218,6 +298,15 @@ unprocessed_path_counts AS (
|
||||
LIMIT %s
|
||||
FOR UPDATE SKIP LOCKED
|
||||
),
|
||||
locked_path_counts AS (
|
||||
INSERT INTO codeintel_initial_path_ranks_processed (graph_key, codeintel_initial_path_ranks_id)
|
||||
SELECT
|
||||
%s,
|
||||
eupc.id
|
||||
FROM unprocessed_path_counts eupc
|
||||
ON CONFLICT DO NOTHING
|
||||
RETURNING codeintel_initial_path_ranks_id
|
||||
),
|
||||
expanded_unprocessed_path_counts AS (
|
||||
SELECT
|
||||
upc.id,
|
||||
@ -226,15 +315,6 @@ expanded_unprocessed_path_counts AS (
|
||||
unnest(upc.document_paths) AS document_path
|
||||
FROM unprocessed_path_counts upc
|
||||
),
|
||||
locked_path_counts AS (
|
||||
INSERT INTO codeintel_initial_path_ranks_processed (graph_key, codeintel_initial_path_ranks_id)
|
||||
SELECT
|
||||
%s,
|
||||
eupc.id
|
||||
FROM expanded_unprocessed_path_counts eupc
|
||||
ON CONFLICT DO NOTHING
|
||||
RETURNING codeintel_initial_path_ranks_id
|
||||
),
|
||||
ins AS (
|
||||
INSERT INTO codeintel_ranking_path_counts_inputs (repository_id, document_path, count, graph_key)
|
||||
SELECT
|
||||
@ -246,6 +326,13 @@ ins AS (
|
||||
JOIN expanded_unprocessed_path_counts eupc on eupc.id = lpc.codeintel_initial_path_ranks_id
|
||||
JOIN lsif_uploads u ON u.id = eupc.upload_id
|
||||
RETURNING 1
|
||||
),
|
||||
set_progress AS (
|
||||
UPDATE codeintel_ranking_progress
|
||||
SET seed_mapper_completed_at = NOW()
|
||||
WHERE
|
||||
id IN (SELECT id FROM progress) AND
|
||||
NOT EXISTS (SELECT 1 FROM unprocessed_path_counts)
|
||||
)
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM locked_path_counts),
|
||||
|
||||
@ -68,6 +68,16 @@ func TestInsertPathCountInputs(t *testing.T) {
|
||||
t.Fatalf("unexpected error inserting definitions: %s", err)
|
||||
}
|
||||
|
||||
// Insert metadata to trigger mapper
|
||||
if _, err := db.ExecContext(ctx, `
|
||||
INSERT INTO codeintel_ranking_progress(graph_key, max_definition_id, max_reference_id, max_path_id, mappers_started_at)
|
||||
VALUES ($1, 1000, 1000, 1000, NOW())
|
||||
`,
|
||||
rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123),
|
||||
); err != nil {
|
||||
t.Fatalf("failed to insert metadata: %s", err)
|
||||
}
|
||||
|
||||
//
|
||||
// Basic test case
|
||||
|
||||
@ -181,6 +191,16 @@ func TestInsertInitialPathCounts(t *testing.T) {
|
||||
// Creates repository 50
|
||||
insertUploads(t, db, uploadsshared.Upload{ID: 1})
|
||||
|
||||
// Insert metadata to trigger mapper
|
||||
if _, err := db.ExecContext(ctx, `
|
||||
INSERT INTO codeintel_ranking_progress(graph_key, max_definition_id, max_reference_id, max_path_id, mappers_started_at)
|
||||
VALUES ($1, 1000, 1000, 1000, NOW())
|
||||
`,
|
||||
rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123),
|
||||
); err != nil {
|
||||
t.Fatalf("failed to insert metadata: %s", err)
|
||||
}
|
||||
|
||||
mockUploadID := 1
|
||||
mockPathNames := make(chan string, 3)
|
||||
mockPathNames <- "foo.go"
|
||||
|
||||
@ -13,16 +13,20 @@ type operations struct {
|
||||
getReferenceCountStatistics *observation.Operation
|
||||
lastUpdatedAt *observation.Operation
|
||||
getUploadsForRanking *observation.Operation
|
||||
processStaleExportedUploads *observation.Operation
|
||||
vacuumAbandonedExportedUploads *observation.Operation
|
||||
insertDefinitionsForRanking *observation.Operation
|
||||
vacuumAbandonedDefinitions *observation.Operation
|
||||
vacuumStaleDefinitions *observation.Operation
|
||||
softDeleteStaleDefinitions *observation.Operation
|
||||
vacuumDeletedDefinitions *observation.Operation
|
||||
insertReferencesForRanking *observation.Operation
|
||||
vacuumAbandonedReferences *observation.Operation
|
||||
vacuumStaleReferences *observation.Operation
|
||||
softDeleteStaleReferences *observation.Operation
|
||||
vacuumDeletedReferences *observation.Operation
|
||||
insertInitialPathRanks *observation.Operation
|
||||
vacuumAbandonedInitialPathCounts *observation.Operation
|
||||
vacuumStaleInitialPaths *observation.Operation
|
||||
softDeleteStaleInitialPaths *observation.Operation
|
||||
vacuumDeletedInitialPaths *observation.Operation
|
||||
coordinate *observation.Operation
|
||||
insertPathCountInputs *observation.Operation
|
||||
insertInitialPathCounts *observation.Operation
|
||||
vacuumStaleGraphs *observation.Operation
|
||||
@ -56,16 +60,20 @@ func newOperations(observationCtx *observation.Context) *operations {
|
||||
getReferenceCountStatistics: op("GetReferenceCountStatistics"),
|
||||
lastUpdatedAt: op("LastUpdatedAt"),
|
||||
getUploadsForRanking: op("GetUploadsForRanking"),
|
||||
processStaleExportedUploads: op("ProcessStaleExportedUploads"),
|
||||
vacuumAbandonedExportedUploads: op("VacuumAbandonedExportedUploads"),
|
||||
insertDefinitionsForRanking: op("InsertDefinitionsForRanking"),
|
||||
vacuumAbandonedDefinitions: op("VacuumAbandonedDefinitions"),
|
||||
vacuumStaleDefinitions: op("VacuumStaleDefinitions"),
|
||||
softDeleteStaleDefinitions: op("SoftDeleteStaleDefinitions"),
|
||||
vacuumDeletedDefinitions: op("VacuumDeletedDefinitions"),
|
||||
insertReferencesForRanking: op("InsertReferencesForRanking"),
|
||||
vacuumAbandonedReferences: op("VacuumAbandonedReferences"),
|
||||
vacuumStaleReferences: op("VacuumStaleReferences"),
|
||||
softDeleteStaleReferences: op("SoftDeleteStaleReferences"),
|
||||
vacuumDeletedReferences: op("VacuumDeletedReferences"),
|
||||
insertInitialPathRanks: op("InsertInitialPathRanks"),
|
||||
vacuumAbandonedInitialPathCounts: op("VacuumAbandonedInitialPathCounts"),
|
||||
vacuumStaleInitialPaths: op("VacuumStaleInitialPaths"),
|
||||
softDeleteStaleInitialPaths: op("SoftDeleteStaleInitialPaths"),
|
||||
vacuumDeletedInitialPaths: op("VacuumDeletedInitialPaths"),
|
||||
coordinate: op("Coordinate"),
|
||||
insertPathCountInputs: op("InsertPathCountInputs"),
|
||||
insertInitialPathCounts: op("InsertInitialPathCounts"),
|
||||
vacuumStaleGraphs: op("VacuumStaleGraphs"),
|
||||
|
||||
@ -8,9 +8,11 @@ import (
|
||||
"github.com/lib/pq"
|
||||
otlog "github.com/opentracing/opentracing-go/log"
|
||||
|
||||
rankingshared "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/shared"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/batch"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
func (s *store) InsertInitialPathRanks(ctx context.Context, uploadID int, documentPaths chan string, batchSize int, graphKey string) (err error) {
|
||||
@ -91,16 +93,16 @@ deleted_initial_paths AS (
|
||||
SELECT COUNT(*) FROM deleted_initial_paths
|
||||
`
|
||||
|
||||
func (s *store) VacuumStaleInitialPaths(ctx context.Context, graphKey string) (
|
||||
func (s *store) SoftDeleteStaleInitialPaths(ctx context.Context, graphKey string) (
|
||||
numPathRecordsScanned int,
|
||||
numStalePathRecordsDeleted int,
|
||||
err error,
|
||||
) {
|
||||
ctx, _, endObservation := s.operations.vacuumStaleInitialPaths.With(ctx, &err, observation.Args{LogFields: []otlog.Field{}})
|
||||
ctx, _, endObservation := s.operations.softDeleteStaleInitialPaths.With(ctx, &err, observation.Args{LogFields: []otlog.Field{}})
|
||||
defer endObservation(1, observation.Args{})
|
||||
|
||||
rows, err := s.db.Query(ctx, sqlf.Sprintf(
|
||||
vacuumStalePathsQuery,
|
||||
softDeleteStalePathsQuery,
|
||||
graphKey, int(threshold/time.Hour), vacuumBatchSize,
|
||||
))
|
||||
if err != nil {
|
||||
@ -120,17 +122,16 @@ func (s *store) VacuumStaleInitialPaths(ctx context.Context, graphKey string) (
|
||||
return numPathRecordsScanned, numStalePathRecordsDeleted, nil
|
||||
}
|
||||
|
||||
const vacuumStalePathsQuery = `
|
||||
const softDeleteStalePathsQuery = `
|
||||
WITH
|
||||
locked_initial_path_ranks AS (
|
||||
SELECT
|
||||
ipr.id,
|
||||
u.repository_id,
|
||||
ipr.upload_id
|
||||
FROM codeintel_initial_path_ranks ipr
|
||||
JOIN lsif_uploads u ON u.id = ipr.upload_id
|
||||
WHERE
|
||||
ipr.graph_key = %s AND
|
||||
ipr.deleted_at IS NULL AND
|
||||
(ipr.last_scanned_at IS NULL OR NOW() - ipr.last_scanned_at >= %s * '1 hour'::interval)
|
||||
ORDER BY ipr.last_scanned_at ASC NULLS FIRST, ipr.id
|
||||
FOR UPDATE SKIP LOCKED
|
||||
@ -141,7 +142,8 @@ candidates AS (
|
||||
lipr.id,
|
||||
uvt.is_default_branch IS TRUE AS safe
|
||||
FROM locked_initial_path_ranks lipr
|
||||
LEFT JOIN lsif_uploads_visible_at_tip uvt ON uvt.repository_id = lipr.repository_id AND uvt.upload_id = lipr.upload_id
|
||||
LEFT JOIN lsif_uploads u ON u.id = lipr.upload_id
|
||||
LEFT JOIN lsif_uploads_visible_at_tip uvt ON uvt.repository_id = u.repository_id AND uvt.upload_id = lipr.upload_id
|
||||
),
|
||||
updated_initial_path_ranks AS (
|
||||
UPDATE codeintel_initial_path_ranks
|
||||
@ -149,7 +151,8 @@ updated_initial_path_ranks AS (
|
||||
WHERE id IN (SELECT c.id FROM candidates c WHERE c.safe)
|
||||
),
|
||||
deleted_initial_path_ranks AS (
|
||||
DELETE FROM codeintel_initial_path_ranks
|
||||
UPDATE codeintel_initial_path_ranks
|
||||
SET deleted_at = NOW()
|
||||
WHERE id IN (SELECT c.id FROM candidates c WHERE NOT c.safe)
|
||||
RETURNING 1
|
||||
)
|
||||
@ -157,3 +160,51 @@ SELECT
|
||||
(SELECT COUNT(*) FROM candidates),
|
||||
(SELECT COUNT(*) FROM deleted_initial_path_ranks)
|
||||
`
|
||||
|
||||
func (s *store) VacuumDeletedInitialPaths(ctx context.Context, derivativeGraphKey string) (
|
||||
numPathRecordsDeleted int,
|
||||
err error,
|
||||
) {
|
||||
ctx, _, endObservation := s.operations.vacuumDeletedInitialPaths.With(ctx, &err, observation.Args{LogFields: []otlog.Field{}})
|
||||
defer endObservation(1, observation.Args{})
|
||||
|
||||
graphKey, ok := rankingshared.GraphKeyFromDerivativeGraphKey(derivativeGraphKey)
|
||||
if !ok {
|
||||
return 0, errors.Newf("unexpected derivative graph key %q", derivativeGraphKey)
|
||||
}
|
||||
|
||||
count, _, err := basestore.ScanFirstInt(s.db.Query(ctx, sqlf.Sprintf(
|
||||
vacuumDeletedStalePathsQuery,
|
||||
graphKey,
|
||||
derivativeGraphKey,
|
||||
vacuumBatchSize,
|
||||
)))
|
||||
return count, err
|
||||
}
|
||||
|
||||
const vacuumDeletedStalePathsQuery = `
|
||||
WITH
|
||||
locked_initial_path_ranks AS (
|
||||
SELECT ipr.id
|
||||
FROM codeintel_initial_path_ranks ipr
|
||||
WHERE
|
||||
ipr.graph_key = %s AND
|
||||
ipr.deleted_at IS NOT NULL AND
|
||||
NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM codeintel_ranking_progress crp
|
||||
WHERE
|
||||
crp.graph_key = %s AND
|
||||
crp.seed_mapper_completed_at IS NULL
|
||||
)
|
||||
ORDER BY ipr.id
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT %s
|
||||
),
|
||||
deleted_initial_path_ranks AS (
|
||||
DELETE FROM codeintel_initial_path_ranks
|
||||
WHERE id IN (SELECT id FROM locked_initial_path_ranks)
|
||||
RETURNING 1
|
||||
)
|
||||
SELECT COUNT(*) FROM deleted_initial_path_ranks
|
||||
`
|
||||
|
||||
@ -8,6 +8,7 @@ import (
|
||||
"github.com/keegancsmith/sqlf"
|
||||
"github.com/sourcegraph/log/logtest"
|
||||
|
||||
rankingshared "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/shared"
|
||||
uploadsshared "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/uploads/shared"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
|
||||
@ -93,7 +94,7 @@ func TestVacuumAbandonedInitialPathCounts(t *testing.T) {
|
||||
assertCounts(1*30 + 10)
|
||||
}
|
||||
|
||||
func TestVacuumStaleInitialPaths(t *testing.T) {
|
||||
func TestSoftDeleteStaleInitialPaths(t *testing.T) {
|
||||
logger := logtest.Scoped(t)
|
||||
ctx := context.Background()
|
||||
db := database.NewDB(logger, dbtest.NewDB(logger, t))
|
||||
@ -129,7 +130,7 @@ func TestVacuumStaleInitialPaths(t *testing.T) {
|
||||
insertVisibleAtTip(t, db, 50, 2)
|
||||
|
||||
// remove path counts for non-visible uploads
|
||||
if _, _, err := store.VacuumStaleInitialPaths(ctx, mockRankingGraphKey); err != nil {
|
||||
if _, _, err := store.SoftDeleteStaleInitialPaths(ctx, mockRankingGraphKey); err != nil {
|
||||
t.Fatalf("unexpected error vacuuming stale initial counts: %s", err)
|
||||
}
|
||||
|
||||
@ -137,6 +138,24 @@ func TestVacuumStaleInitialPaths(t *testing.T) {
|
||||
assertCounts(3)
|
||||
}
|
||||
|
||||
func TestVacuumDeletedInitialPaths(t *testing.T) {
|
||||
logger := logtest.Scoped(t)
|
||||
ctx := context.Background()
|
||||
db := database.NewDB(logger, dbtest.NewDB(logger, t))
|
||||
store := New(&observation.TestContext, db)
|
||||
|
||||
key := rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123)
|
||||
|
||||
// TODO - setup
|
||||
|
||||
_, err := store.VacuumDeletedInitialPaths(ctx, key)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error vacuuming deleted initial paths: %s", err)
|
||||
}
|
||||
|
||||
// TODO - assertions
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
|
||||
@ -157,7 +176,7 @@ func getInitialPathRanks(
|
||||
upload_id,
|
||||
unnest(document_paths) AS document_path
|
||||
FROM codeintel_initial_path_ranks
|
||||
WHERE graph_key LIKE %s || '%%'
|
||||
WHERE graph_key LIKE %s || '%%' AND deleted_at IS NULL
|
||||
)s
|
||||
GROUP BY upload_id, document_path
|
||||
ORDER BY upload_id, document_path
|
||||
|
||||
@ -30,6 +30,7 @@ func (s *store) InsertPathRanks(
|
||||
rows, err := s.db.Query(ctx, sqlf.Sprintf(
|
||||
insertPathRanksQuery,
|
||||
derivativeGraphKey,
|
||||
derivativeGraphKey,
|
||||
batchSize,
|
||||
derivativeGraphKey,
|
||||
))
|
||||
@ -51,6 +52,16 @@ func (s *store) InsertPathRanks(
|
||||
|
||||
const insertPathRanksQuery = `
|
||||
WITH
|
||||
progress AS (
|
||||
SELECT
|
||||
crp.id,
|
||||
crp.mappers_started_at as started_at
|
||||
FROM codeintel_ranking_progress crp
|
||||
WHERE
|
||||
crp.graph_key = %s and
|
||||
crp.reducer_started_at IS NOT NULL AND
|
||||
crp.reducer_completed_at IS NULL
|
||||
),
|
||||
input_ranks AS (
|
||||
SELECT
|
||||
pci.id,
|
||||
@ -58,6 +69,7 @@ input_ranks AS (
|
||||
pci.document_path AS path,
|
||||
pci.count
|
||||
FROM codeintel_ranking_path_counts_inputs pci
|
||||
JOIN progress p ON TRUE
|
||||
WHERE
|
||||
pci.graph_key = %s AND
|
||||
NOT pci.processed AND
|
||||
@ -79,10 +91,10 @@ processed AS (
|
||||
RETURNING 1
|
||||
),
|
||||
inserted AS (
|
||||
INSERT INTO codeintel_path_ranks AS pr (repository_id, graph_key, payload)
|
||||
INSERT INTO codeintel_path_ranks AS pr (graph_key, repository_id, payload)
|
||||
SELECT
|
||||
temp.repository_id,
|
||||
%s,
|
||||
temp.repository_id,
|
||||
jsonb_object_agg(temp.path, temp.count)
|
||||
FROM (
|
||||
SELECT
|
||||
@ -93,26 +105,27 @@ inserted AS (
|
||||
GROUP BY cr.repository_id, cr.path
|
||||
) temp
|
||||
GROUP BY temp.repository_id
|
||||
ON CONFLICT (repository_id) DO UPDATE SET
|
||||
graph_key = EXCLUDED.graph_key,
|
||||
payload = CASE
|
||||
WHEN pr.graph_key != EXCLUDED.graph_key
|
||||
THEN EXCLUDED.payload
|
||||
ELSE
|
||||
(
|
||||
SELECT jsonb_object_agg(key, sum) FROM (
|
||||
SELECT key, SUM(value::int) AS sum
|
||||
FROM
|
||||
(
|
||||
SELECT * FROM jsonb_each(pr.payload)
|
||||
UNION
|
||||
SELECT * FROM jsonb_each(EXCLUDED.payload)
|
||||
) AS both_payloads
|
||||
GROUP BY key
|
||||
) AS combined_json
|
||||
)
|
||||
END
|
||||
ON CONFLICT (graph_key, repository_id) DO UPDATE SET
|
||||
payload = (
|
||||
SELECT jsonb_object_agg(key, sum) FROM (
|
||||
SELECT key, SUM(value::int) AS sum
|
||||
FROM
|
||||
(
|
||||
SELECT * FROM jsonb_each(pr.payload)
|
||||
UNION
|
||||
SELECT * FROM jsonb_each(EXCLUDED.payload)
|
||||
) AS both_payloads
|
||||
GROUP BY key
|
||||
) AS combined_json
|
||||
)
|
||||
RETURNING 1
|
||||
),
|
||||
set_progress AS (
|
||||
UPDATE codeintel_ranking_progress
|
||||
SET reducer_completed_at = NOW()
|
||||
WHERE
|
||||
id IN (SELECT id FROM progress) AND
|
||||
NOT EXISTS (SELECT 1 FROM processed)
|
||||
)
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM processed) AS num_processed,
|
||||
@ -123,16 +136,13 @@ func (s *store) VacuumStaleRanks(ctx context.Context, derivativeGraphKey string)
|
||||
ctx, _, endObservation := s.operations.vacuumStaleRanks.With(ctx, &err, observation.Args{LogFields: []otlog.Field{}})
|
||||
defer endObservation(1, observation.Args{})
|
||||
|
||||
graphKey, ok := rankingshared.GraphKeyFromDerivativeGraphKey(derivativeGraphKey)
|
||||
if !ok {
|
||||
if _, ok := rankingshared.GraphKeyFromDerivativeGraphKey(derivativeGraphKey); !ok {
|
||||
return 0, 0, errors.Newf("unexpected derivative graph key %q", derivativeGraphKey)
|
||||
}
|
||||
|
||||
rows, err := s.db.Query(ctx, sqlf.Sprintf(
|
||||
vacuumStaleRanksQuery,
|
||||
derivativeGraphKey,
|
||||
graphKey,
|
||||
derivativeGraphKey,
|
||||
))
|
||||
defer func() { err = basestore.CloseRows(rows, err) }()
|
||||
|
||||
@ -147,44 +157,32 @@ func (s *store) VacuumStaleRanks(ctx context.Context, derivativeGraphKey string)
|
||||
|
||||
const vacuumStaleRanksQuery = `
|
||||
WITH
|
||||
matching_graph_keys AS (
|
||||
SELECT DISTINCT graph_key
|
||||
FROM codeintel_path_ranks
|
||||
-- Implicit delete anything with a different graph key root
|
||||
WHERE graph_key != %s AND graph_key LIKE %s || '.%%'
|
||||
),
|
||||
valid_graph_keys AS (
|
||||
-- Select the current graph key as well as the highest graph key that
|
||||
-- shares the same parent graph key. Returning both will help bridge
|
||||
-- the gap that happens if we were to flush the entire table at the
|
||||
-- start of a new graph reduction.
|
||||
--
|
||||
-- This may have the effect of returning stale ranking data for a repo
|
||||
-- for which we no longer have SCIP data, but only from the previous
|
||||
-- graph reduction (and changing the parent graph key will flush all
|
||||
-- previous data (see the CTE definition above) if the need arises.
|
||||
-- Select current graph key
|
||||
SELECT %s AS graph_key
|
||||
-- Select previous graph key
|
||||
UNION (
|
||||
SELECT graph_key
|
||||
FROM matching_graph_keys
|
||||
ORDER BY reverse(split_part(reverse(graph_key), '-', 1))::int DESC
|
||||
SELECT crp.graph_key
|
||||
FROM codeintel_ranking_progress crp
|
||||
WHERE crp.reducer_completed_at IS NOT NULL
|
||||
ORDER BY crp.reducer_completed_at DESC
|
||||
LIMIT 1
|
||||
)
|
||||
),
|
||||
locked_records AS (
|
||||
-- Lock all path rank records that don't have a recent graph key
|
||||
SELECT repository_id
|
||||
-- Lock all path rank records that don't have a valid graph key
|
||||
SELECT id
|
||||
FROM codeintel_path_ranks
|
||||
WHERE graph_key NOT IN (SELECT graph_key FROM valid_graph_keys)
|
||||
ORDER BY repository_id
|
||||
ORDER BY id
|
||||
FOR UPDATE
|
||||
),
|
||||
del AS (
|
||||
deleted_records AS (
|
||||
DELETE FROM codeintel_path_ranks
|
||||
WHERE repository_id IN (SELECT repository_id FROM locked_records)
|
||||
WHERE id IN (SELECT id FROM locked_records)
|
||||
RETURNING 1
|
||||
)
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM locked_records),
|
||||
(SELECT COUNT(*) FROM del)
|
||||
(SELECT COUNT(*) FROM deleted_records)
|
||||
`
|
||||
|
||||
@ -3,7 +3,6 @@ package store
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
@ -64,16 +63,31 @@ func TestInsertPathRanks(t *testing.T) {
|
||||
t.Fatalf("unexpected error inserting references: %s", err)
|
||||
}
|
||||
|
||||
// Insert metadata to trigger mapper
|
||||
if _, err := db.ExecContext(ctx, `
|
||||
INSERT INTO codeintel_ranking_progress(graph_key, max_definition_id, max_reference_id, max_path_id, mappers_started_at)
|
||||
VALUES ($1, 1000, 1000, 1000, NOW())
|
||||
`,
|
||||
rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123),
|
||||
); err != nil {
|
||||
t.Fatalf("failed to insert metadata: %s", err)
|
||||
}
|
||||
|
||||
// Test InsertPathCountInputs
|
||||
if _, _, err := store.InsertPathCountInputs(ctx, rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123), 1000); err != nil {
|
||||
t.Fatalf("unexpected error inserting path count inputs: %s", err)
|
||||
}
|
||||
|
||||
// Insert repos
|
||||
if _, err := db.ExecContext(ctx, fmt.Sprintf(`INSERT INTO repo (id, name) VALUES (1, 'deadbeef')`)); err != nil {
|
||||
if _, err := db.ExecContext(ctx, `INSERT INTO repo (id, name) VALUES (1, 'deadbeef')`); err != nil {
|
||||
t.Fatalf("failed to insert repos: %s", err)
|
||||
}
|
||||
|
||||
// Update metadata to trigger reducer
|
||||
if _, err := db.ExecContext(ctx, `UPDATE codeintel_ranking_progress SET reducer_started_at = NOW()`); err != nil {
|
||||
t.Fatalf("failed to update metadata: %s", err)
|
||||
}
|
||||
|
||||
// Finally! Test InsertPathRanks
|
||||
numPathRanksInserted, numInputsProcessed, err := store.InsertPathRanks(ctx, rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123), 10)
|
||||
if err != nil {
|
||||
@ -100,15 +114,34 @@ func TestVacuumStaleRanks(t *testing.T) {
|
||||
t.Fatalf("failed to insert repos: %s", err)
|
||||
}
|
||||
|
||||
key1 := rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123)
|
||||
key2 := rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 234)
|
||||
key3 := rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 345)
|
||||
key4 := rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 456)
|
||||
|
||||
// Insert metadata to rank progress by completion date
|
||||
if _, err := db.ExecContext(ctx, `
|
||||
INSERT INTO codeintel_ranking_progress(graph_key, max_definition_id, max_reference_id, max_path_id, mappers_started_at, reducer_completed_at)
|
||||
VALUES
|
||||
($1, 1000, 1000, 1000, NOW() - '80 second'::interval, NOW() - '70 second'::interval),
|
||||
($2, 1000, 1000, 1000, NOW() - '60 second'::interval, NOW() - '50 second'::interval),
|
||||
($3, 1000, 1000, 1000, NOW() - '40 second'::interval, NOW() - '30 second'::interval),
|
||||
($4, 1000, 1000, 1000, NOW() - '20 second'::interval, NULL)
|
||||
`,
|
||||
key1, key2, key3, key4,
|
||||
); err != nil {
|
||||
t.Fatalf("failed to insert metadata: %s", err)
|
||||
}
|
||||
|
||||
for r, key := range map[string]string{
|
||||
"foo1": rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123),
|
||||
"foo2": rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123),
|
||||
"foo3": rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123),
|
||||
"foo4": rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123),
|
||||
"foo5": rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123),
|
||||
"bar": rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 234),
|
||||
"baz": rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 345),
|
||||
"bonk": rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 456),
|
||||
"foo1": key1,
|
||||
"foo2": key1,
|
||||
"foo3": key1,
|
||||
"foo4": key1,
|
||||
"foo5": key1,
|
||||
"bar": key2,
|
||||
"baz": key3,
|
||||
"bonk": key4,
|
||||
} {
|
||||
if err := setDocumentRanks(ctx, basestore.NewWithHandle(db.Handle()), api.RepoName(r), nil, key); err != nil {
|
||||
t.Fatalf("failed to insert document ranks: %s", err)
|
||||
@ -137,7 +170,7 @@ func TestVacuumStaleRanks(t *testing.T) {
|
||||
assertNames([]string{"bar", "baz", "bonk", "foo1", "foo2", "foo3", "foo4", "foo5"})
|
||||
|
||||
// remove sufficiently stale records associated with other ranking keys
|
||||
_, rankRecordsDeleted, err := store.VacuumStaleRanks(ctx, rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 456))
|
||||
_, rankRecordsDeleted, err := store.VacuumStaleRanks(ctx, key4)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error vacuuming stale ranks: %s", err)
|
||||
}
|
||||
@ -152,19 +185,18 @@ func TestVacuumStaleRanks(t *testing.T) {
|
||||
//
|
||||
//
|
||||
|
||||
func setDocumentRanks(ctx context.Context, db *basestore.Store, repoName api.RepoName, ranks map[string]float64, graphKey string) error {
|
||||
func setDocumentRanks(ctx context.Context, db *basestore.Store, repoName api.RepoName, ranks map[string]float64, derivativeGraphKey string) error {
|
||||
serialized, err := json.Marshal(ranks)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return db.Exec(ctx, sqlf.Sprintf(setDocumentRanksQuery, repoName, serialized, graphKey))
|
||||
return db.Exec(ctx, sqlf.Sprintf(setDocumentRanksQuery, derivativeGraphKey, repoName, serialized))
|
||||
}
|
||||
|
||||
const setDocumentRanksQuery = `
|
||||
INSERT INTO codeintel_path_ranks AS pr (repository_id, payload, graph_key)
|
||||
VALUES ((SELECT id FROM repo WHERE name = %s), %s, %s)
|
||||
ON CONFLICT (repository_id) DO
|
||||
UPDATE
|
||||
SET payload = EXCLUDED.payload
|
||||
INSERT INTO codeintel_path_ranks AS pr (graph_key, repository_id, payload)
|
||||
VALUES (%s, (SELECT id FROM repo WHERE name = %s), %s)
|
||||
ON CONFLICT (graph_key, repository_id) DO
|
||||
UPDATE SET payload = EXCLUDED.payload
|
||||
`
|
||||
|
||||
@ -8,9 +8,11 @@ import (
|
||||
"github.com/lib/pq"
|
||||
otlog "github.com/opentracing/opentracing-go/log"
|
||||
|
||||
rankingshared "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/shared"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/batch"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
func (s *store) InsertReferencesForRanking(
|
||||
@ -75,16 +77,16 @@ deleted_references AS (
|
||||
SELECT COUNT(*) FROM deleted_references
|
||||
`
|
||||
|
||||
func (s *store) VacuumStaleReferences(ctx context.Context, graphKey string) (
|
||||
func (s *store) SoftDeleteStaleReferences(ctx context.Context, graphKey string) (
|
||||
numReferenceRecordsScanned int,
|
||||
numStaleReferenceRecordsDeleted int,
|
||||
err error,
|
||||
) {
|
||||
ctx, _, endObservation := s.operations.vacuumStaleReferences.With(ctx, &err, observation.Args{LogFields: []otlog.Field{}})
|
||||
ctx, _, endObservation := s.operations.softDeleteStaleReferences.With(ctx, &err, observation.Args{LogFields: []otlog.Field{}})
|
||||
defer endObservation(1, observation.Args{})
|
||||
|
||||
rows, err := s.db.Query(ctx, sqlf.Sprintf(
|
||||
vacuumStaleReferencesQuery,
|
||||
softDeleteStaleReferencesQuery,
|
||||
graphKey, int(threshold/time.Hour), vacuumBatchSize,
|
||||
))
|
||||
if err != nil {
|
||||
@ -104,17 +106,16 @@ func (s *store) VacuumStaleReferences(ctx context.Context, graphKey string) (
|
||||
return numReferenceRecordsScanned, numStaleReferenceRecordsDeleted, nil
|
||||
}
|
||||
|
||||
const vacuumStaleReferencesQuery = `
|
||||
const softDeleteStaleReferencesQuery = `
|
||||
WITH
|
||||
locked_references AS (
|
||||
SELECT
|
||||
rr.id,
|
||||
u.repository_id,
|
||||
rr.upload_id
|
||||
FROM codeintel_ranking_references rr
|
||||
JOIN lsif_uploads u ON u.id = rr.upload_id
|
||||
WHERE
|
||||
rr.graph_key = %s AND
|
||||
rr.deleted_at IS NULL AND
|
||||
(rr.last_scanned_at IS NULL OR NOW() - rr.last_scanned_at >= %s * '1 hour'::interval)
|
||||
ORDER BY rr.last_scanned_at ASC NULLS FIRST, rr.id
|
||||
FOR UPDATE SKIP LOCKED
|
||||
@ -125,7 +126,8 @@ candidates AS (
|
||||
lr.id,
|
||||
uvt.is_default_branch IS TRUE AS safe
|
||||
FROM locked_references lr
|
||||
LEFT JOIN lsif_uploads_visible_at_tip uvt ON uvt.repository_id = lr.repository_id AND uvt.upload_id = lr.upload_id
|
||||
LEFT JOIN lsif_uploads u ON u.id = lr.upload_id
|
||||
LEFT JOIN lsif_uploads_visible_at_tip uvt ON uvt.repository_id = u.repository_id AND uvt.upload_id = lr.upload_id
|
||||
),
|
||||
updated_references AS (
|
||||
UPDATE codeintel_ranking_references
|
||||
@ -133,7 +135,8 @@ updated_references AS (
|
||||
WHERE id IN (SELECT c.id FROM candidates c WHERE c.safe)
|
||||
),
|
||||
deleted_references AS (
|
||||
DELETE FROM codeintel_ranking_references
|
||||
UPDATE codeintel_ranking_references
|
||||
SET deleted_at = NOW()
|
||||
WHERE id IN (SELECT c.id FROM candidates c WHERE NOT c.safe)
|
||||
RETURNING 1
|
||||
)
|
||||
@ -141,3 +144,51 @@ SELECT
|
||||
(SELECT COUNT(*) FROM candidates),
|
||||
(SELECT COUNT(*) FROM deleted_references)
|
||||
`
|
||||
|
||||
func (s *store) VacuumDeletedReferences(ctx context.Context, derivativeGraphKey string) (
|
||||
numReferenceRecordsDeleted int,
|
||||
err error,
|
||||
) {
|
||||
ctx, _, endObservation := s.operations.vacuumDeletedReferences.With(ctx, &err, observation.Args{LogFields: []otlog.Field{}})
|
||||
defer endObservation(1, observation.Args{})
|
||||
|
||||
graphKey, ok := rankingshared.GraphKeyFromDerivativeGraphKey(derivativeGraphKey)
|
||||
if !ok {
|
||||
return 0, errors.Newf("unexpected derivative graph key %q", derivativeGraphKey)
|
||||
}
|
||||
|
||||
count, _, err := basestore.ScanFirstInt(s.db.Query(ctx, sqlf.Sprintf(
|
||||
vacuumDeletedReferencesQuery,
|
||||
graphKey,
|
||||
derivativeGraphKey,
|
||||
vacuumBatchSize,
|
||||
)))
|
||||
return count, err
|
||||
}
|
||||
|
||||
const vacuumDeletedReferencesQuery = `
|
||||
WITH
|
||||
locked_references AS (
|
||||
SELECT rr.id
|
||||
FROM codeintel_ranking_references rr
|
||||
WHERE
|
||||
rr.graph_key = %s AND
|
||||
rr.deleted_at IS NOT NULL AND
|
||||
NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM codeintel_ranking_progress crp
|
||||
WHERE
|
||||
crp.graph_key = %s AND
|
||||
crp.mapper_completed_at IS NULL
|
||||
)
|
||||
ORDER BY rr.id
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT %s
|
||||
),
|
||||
deleted_references AS (
|
||||
DELETE FROM codeintel_ranking_references
|
||||
WHERE id IN (SELECT id FROM locked_references)
|
||||
RETURNING 1
|
||||
)
|
||||
SELECT COUNT(*) FROM deleted_references
|
||||
`
|
||||
|
||||
@ -10,6 +10,7 @@ import (
|
||||
"github.com/lib/pq"
|
||||
"github.com/sourcegraph/log/logtest"
|
||||
|
||||
rankingshared "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/shared"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/shared"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
|
||||
@ -108,6 +109,24 @@ func TestVacuumAbandonedReferences(t *testing.T) {
|
||||
assertCounts(1*30 + 10)
|
||||
}
|
||||
|
||||
func TestVacuumDeletedReferences(t *testing.T) {
|
||||
logger := logtest.Scoped(t)
|
||||
ctx := context.Background()
|
||||
db := database.NewDB(logger, dbtest.NewDB(logger, t))
|
||||
store := New(&observation.TestContext, db)
|
||||
|
||||
key := rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123)
|
||||
|
||||
// TODO - setup
|
||||
|
||||
_, err := store.VacuumDeletedReferences(ctx, key)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error vacuuming deleted references: %s", err)
|
||||
}
|
||||
|
||||
// TODO - assertions
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
|
||||
@ -118,7 +137,7 @@ func getRankingReferences(
|
||||
graphKey string,
|
||||
) (_ []shared.RankingReferences, err error) {
|
||||
query := fmt.Sprintf(
|
||||
`SELECT upload_id, symbol_names FROM codeintel_ranking_references WHERE graph_key = '%s'`,
|
||||
`SELECT upload_id, symbol_names FROM codeintel_ranking_references WHERE graph_key = '%s' AND deleted_at IS NULL`,
|
||||
graphKey,
|
||||
)
|
||||
rows, err := db.QueryContext(ctx, query)
|
||||
|
||||
@ -64,10 +64,19 @@ func (s *store) GetDocumentRanks(ctx context.Context, repoName api.RepoName) (_
|
||||
}
|
||||
|
||||
const getDocumentRanksQuery = `
|
||||
WITH
|
||||
last_completed_progress AS (
|
||||
SELECT crp.graph_key
|
||||
FROM codeintel_ranking_progress crp
|
||||
WHERE crp.reducer_completed_at IS NOT NULL
|
||||
ORDER BY crp.reducer_completed_at DESC
|
||||
LIMIT 1
|
||||
)
|
||||
SELECT payload
|
||||
FROM codeintel_path_ranks pr
|
||||
JOIN repo r ON r.id = pr.repository_id
|
||||
WHERE
|
||||
pr.graph_key IN (SELECT graph_key FROM last_completed_progress) AND
|
||||
r.name = %s AND
|
||||
r.deleted_at IS NULL AND
|
||||
r.blocked IS NULL
|
||||
@ -77,14 +86,7 @@ func (s *store) GetReferenceCountStatistics(ctx context.Context) (logmean float6
|
||||
ctx, _, endObservation := s.operations.getReferenceCountStatistics.With(ctx, &err, observation.Args{})
|
||||
defer endObservation(1, observation.Args{})
|
||||
|
||||
rows, err := s.db.Query(ctx, sqlf.Sprintf(`
|
||||
SELECT CASE
|
||||
WHEN COALESCE(SUM(pr.num_paths), 0) = 0
|
||||
THEN 0.0
|
||||
ELSE SUM(pr.refcount_logsum) / SUM(pr.num_paths)::float
|
||||
END AS logmean
|
||||
FROM codeintel_path_ranks pr
|
||||
`))
|
||||
rows, err := s.db.Query(ctx, sqlf.Sprintf(getReferenceCountStatisticsQuery))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
@ -99,6 +101,24 @@ func (s *store) GetReferenceCountStatistics(ctx context.Context) (logmean float6
|
||||
return logmean, nil
|
||||
}
|
||||
|
||||
const getReferenceCountStatisticsQuery = `
|
||||
WITH
|
||||
last_completed_progress AS (
|
||||
SELECT crp.graph_key
|
||||
FROM codeintel_ranking_progress crp
|
||||
WHERE crp.reducer_completed_at IS NOT NULL
|
||||
ORDER BY crp.reducer_completed_at DESC
|
||||
LIMIT 1
|
||||
)
|
||||
SELECT
|
||||
CASE WHEN COALESCE(SUM(pr.num_paths), 0) = 0
|
||||
THEN 0.0
|
||||
ELSE SUM(pr.refcount_logsum) / SUM(pr.num_paths)::float
|
||||
END AS logmean
|
||||
FROM codeintel_path_ranks pr
|
||||
WHERE pr.graph_key IN (SELECT graph_key FROM last_completed_progress)
|
||||
`
|
||||
|
||||
func (s *store) LastUpdatedAt(ctx context.Context, repoIDs []api.RepoID) (_ map[api.RepoID]time.Time, err error) {
|
||||
ctx, _, endObservation := s.operations.lastUpdatedAt.With(ctx, &err, observation.Args{})
|
||||
defer endObservation(1, observation.Args{})
|
||||
@ -112,11 +132,21 @@ func (s *store) LastUpdatedAt(ctx context.Context, repoIDs []api.RepoID) (_ map[
|
||||
}
|
||||
|
||||
const lastUpdatedAtQuery = `
|
||||
WITH
|
||||
last_completed_progress AS (
|
||||
SELECT crp.graph_key
|
||||
FROM codeintel_ranking_progress crp
|
||||
WHERE crp.reducer_completed_at IS NOT NULL
|
||||
ORDER BY crp.reducer_completed_at DESC
|
||||
LIMIT 1
|
||||
)
|
||||
SELECT
|
||||
repository_id,
|
||||
updated_at
|
||||
FROM codeintel_path_ranks
|
||||
WHERE repository_id = ANY(%s)
|
||||
FROM codeintel_path_ranks pr
|
||||
WHERE
|
||||
pr.graph_key IN (SELECT graph_key FROM last_completed_progress) AND
|
||||
repository_id = ANY(%s)
|
||||
`
|
||||
|
||||
var scanLastUpdatedAtPairs = basestore.NewMapScanner(func(s dbutil.Scanner) (repoID api.RepoID, t time.Time, _ error) {
|
||||
|
||||
@ -8,6 +8,7 @@ import (
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/sourcegraph/log/logtest"
|
||||
|
||||
rankingshared "github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/ranking/internal/shared"
|
||||
"github.com/sourcegraph/sourcegraph/internal/api"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
|
||||
@ -73,6 +74,18 @@ func TestDocumentRanks(t *testing.T) {
|
||||
store := New(&observation.TestContext, db)
|
||||
repoName := api.RepoName("foo")
|
||||
|
||||
key := rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123)
|
||||
|
||||
if _, err := db.ExecContext(ctx, `
|
||||
INSERT INTO codeintel_ranking_progress(graph_key, max_definition_id, max_reference_id, max_path_id, mappers_started_at, reducer_completed_at)
|
||||
VALUES
|
||||
($1, 1000, 1000, 1000, NOW(), NOW())
|
||||
`,
|
||||
key,
|
||||
); err != nil {
|
||||
t.Fatalf("failed to insert metadata: %s", err)
|
||||
}
|
||||
|
||||
if _, err := db.ExecContext(ctx, `INSERT INTO repo (name, stars) VALUES ('foo', 1000)`); err != nil {
|
||||
t.Fatalf("failed to insert repos: %s", err)
|
||||
}
|
||||
@ -82,14 +95,14 @@ func TestDocumentRanks(t *testing.T) {
|
||||
"internal/secret.go": 3,
|
||||
"internal/util.go": 4,
|
||||
"README.md": 5, // no longer referenced
|
||||
}, mockRankingGraphKey+"-123"); err != nil {
|
||||
}, key); err != nil {
|
||||
t.Fatalf("unexpected error setting document ranks: %s", err)
|
||||
}
|
||||
if err := setDocumentRanks(ctx, basestore.NewWithHandle(db.Handle()), repoName, map[string]float64{
|
||||
"cmd/args.go": 8, // new
|
||||
"internal/secret.go": 7, // edited
|
||||
"internal/util.go": 6, // edited
|
||||
}, mockRankingGraphKey+"-123"); err != nil {
|
||||
}, key); err != nil {
|
||||
t.Fatalf("unexpected error setting document ranks: %s", err)
|
||||
}
|
||||
|
||||
@ -117,17 +130,29 @@ func TestGetReferenceCountStatistics(t *testing.T) {
|
||||
db := database.NewDB(logger, dbtest.NewDB(logger, t))
|
||||
store := New(&observation.TestContext, db)
|
||||
|
||||
key := rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123)
|
||||
|
||||
if _, err := db.ExecContext(ctx, `
|
||||
INSERT INTO codeintel_ranking_progress(graph_key, max_definition_id, max_reference_id, max_path_id, mappers_started_at, reducer_completed_at)
|
||||
VALUES
|
||||
($1, 1000, 1000, 1000, NOW(), NOW())
|
||||
`,
|
||||
key,
|
||||
); err != nil {
|
||||
t.Fatalf("failed to insert metadata: %s", err)
|
||||
}
|
||||
|
||||
if _, err := db.ExecContext(ctx, `INSERT INTO repo (name) VALUES ('foo'), ('bar'), ('baz')`); err != nil {
|
||||
t.Fatalf("failed to insert repos: %s", err)
|
||||
}
|
||||
|
||||
if err := setDocumentRanks(ctx, basestore.NewWithHandle(db.Handle()), api.RepoName("foo"), map[string]float64{"foo": 18, "bar": 3985, "baz": 5260}, mockRankingGraphKey); err != nil {
|
||||
if err := setDocumentRanks(ctx, basestore.NewWithHandle(db.Handle()), api.RepoName("foo"), map[string]float64{"foo": 18, "bar": 3985, "baz": 5260}, key); err != nil {
|
||||
t.Fatalf("failed to set document ranks: %s", err)
|
||||
}
|
||||
if err := setDocumentRanks(ctx, basestore.NewWithHandle(db.Handle()), api.RepoName("bar"), map[string]float64{"foo": 5712, "bar": 5902, "baz": 79}, mockRankingGraphKey); err != nil {
|
||||
if err := setDocumentRanks(ctx, basestore.NewWithHandle(db.Handle()), api.RepoName("bar"), map[string]float64{"foo": 5712, "bar": 5902, "baz": 79}, key); err != nil {
|
||||
t.Fatalf("failed to set document ranks: %s", err)
|
||||
}
|
||||
if err := setDocumentRanks(ctx, basestore.NewWithHandle(db.Handle()), api.RepoName("baz"), map[string]float64{"foo": 86, "bar": 89, "baz": 9, "bonk": 918, "quux": 0}, mockRankingGraphKey); err != nil {
|
||||
if err := setDocumentRanks(ctx, basestore.NewWithHandle(db.Handle()), api.RepoName("baz"), map[string]float64{"foo": 86, "bar": 89, "baz": 9, "bonk": 918, "quux": 0}, key); err != nil {
|
||||
t.Fatalf("failed to set document ranks: %s", err)
|
||||
}
|
||||
|
||||
@ -150,15 +175,27 @@ func TestLastUpdatedAt(t *testing.T) {
|
||||
db := database.NewDB(logger, dbtest.NewDB(logger, t))
|
||||
store := New(&observation.TestContext, db)
|
||||
|
||||
key := rankingshared.NewDerivativeGraphKeyKey(mockRankingGraphKey, "", 123)
|
||||
|
||||
if _, err := db.ExecContext(ctx, `
|
||||
INSERT INTO codeintel_ranking_progress(graph_key, max_definition_id, max_reference_id, max_path_id, mappers_started_at, reducer_completed_at)
|
||||
VALUES
|
||||
($1, 1000, 1000, 1000, NOW(), NOW())
|
||||
`,
|
||||
key,
|
||||
); err != nil {
|
||||
t.Fatalf("failed to insert metadata: %s", err)
|
||||
}
|
||||
|
||||
idFoo := api.RepoID(1)
|
||||
idBar := api.RepoID(2)
|
||||
if _, err := db.ExecContext(ctx, `INSERT INTO repo (id, name) VALUES (1, 'foo'), (2, 'bar'), (3, 'baz')`); err != nil {
|
||||
t.Fatalf("failed to insert repos: %s", err)
|
||||
}
|
||||
if err := setDocumentRanks(ctx, basestore.NewWithHandle(db.Handle()), "foo", nil, mockRankingGraphKey+"-123"); err != nil {
|
||||
if err := setDocumentRanks(ctx, basestore.NewWithHandle(db.Handle()), "foo", nil, key); err != nil {
|
||||
t.Fatalf("unexpected error setting document ranks: %s", err)
|
||||
}
|
||||
if err := setDocumentRanks(ctx, basestore.NewWithHandle(db.Handle()), "bar", nil, mockRankingGraphKey+"-123"); err != nil {
|
||||
if err := setDocumentRanks(ctx, basestore.NewWithHandle(db.Handle()), "bar", nil, key); err != nil {
|
||||
t.Fatalf("unexpected error setting document ranks: %s", err)
|
||||
}
|
||||
|
||||
|
||||
@ -25,25 +25,31 @@ type Store interface {
|
||||
|
||||
// Export uploads (metadata tracking) + cleanup
|
||||
GetUploadsForRanking(ctx context.Context, graphKey, objectPrefix string, batchSize int) ([]uploadsshared.ExportedUpload, error)
|
||||
ProcessStaleExportedUploads(ctx context.Context, graphKey string, batchSize int, deleter func(ctx context.Context, objectPrefix string) error) (totalDeleted int, _ error)
|
||||
VacuumAbandonedExportedUploads(ctx context.Context, graphKey string, batchSize int) (int, error)
|
||||
|
||||
// Export definitions + cleanup
|
||||
InsertDefinitionsForRanking(ctx context.Context, rankingGraphKey string, definitions chan shared.RankingDefinitions) error
|
||||
InsertDefinitionsForRanking(ctx context.Context, graphKey string, definitions chan shared.RankingDefinitions) error
|
||||
VacuumAbandonedDefinitions(ctx context.Context, graphKey string, batchSize int) (int, error)
|
||||
VacuumStaleDefinitions(ctx context.Context, graphKey string) (numDefinitionRecordsScanned int, numStaleDefinitionRecordsDeleted int, _ error)
|
||||
SoftDeleteStaleDefinitions(ctx context.Context, graphKey string) (numDefinitionRecordsScanned int, numStaleDefinitionRecordsDeleted int, _ error)
|
||||
VacuumDeletedDefinitions(ctx context.Context, derivativeGraphKey string) (int, error)
|
||||
|
||||
// Export references + cleanup
|
||||
InsertReferencesForRanking(ctx context.Context, rankingGraphKey string, batchSize int, uploadID int, references chan string) error
|
||||
InsertReferencesForRanking(ctx context.Context, graphKey string, batchSize int, uploadID int, references chan string) error
|
||||
VacuumAbandonedReferences(ctx context.Context, graphKey string, batchSize int) (int, error)
|
||||
VacuumStaleReferences(ctx context.Context, graphKey string) (numReferenceRecordsScanned int, numStaleReferenceRecordsDeleted int, _ error)
|
||||
SoftDeleteStaleReferences(ctx context.Context, graphKey string) (numReferenceRecordsScanned int, numStaleReferenceRecordsDeleted int, _ error)
|
||||
VacuumDeletedReferences(ctx context.Context, derivativeGraphKey string) (int, error)
|
||||
|
||||
// Export upload paths + cleanup
|
||||
InsertInitialPathRanks(ctx context.Context, uploadID int, documentPaths chan string, batchSize int, graphKey string) error
|
||||
VacuumAbandonedInitialPathCounts(ctx context.Context, graphKey string, batchSize int) (int, error)
|
||||
VacuumStaleInitialPaths(ctx context.Context, graphKey string) (numPathRecordsScanned int, numStalePathRecordsDeleted int, _ error)
|
||||
SoftDeleteStaleInitialPaths(ctx context.Context, graphKey string) (numPathRecordsScanned int, numStalePathRecordsDeleted int, _ error)
|
||||
VacuumDeletedInitialPaths(ctx context.Context, derivativeGraphKey string) (int, error)
|
||||
|
||||
// Coordinates mapper+reducer phases
|
||||
Coordinate(ctx context.Context, derivativeGraphKey string) error
|
||||
|
||||
// Mapper behavior + cleanup
|
||||
InsertPathCountInputs(ctx context.Context, rankingGraphKey string, batchSize int) (numReferenceRecordsProcessed int, numInputsInserted int, err error)
|
||||
InsertPathCountInputs(ctx context.Context, derivativeGraphKey string, batchSize int) (numReferenceRecordsProcessed int, numInputsInserted int, err error)
|
||||
InsertInitialPathCounts(ctx context.Context, derivativeGraphKey string, batchSize int) (numInitialPathsProcessed int, numInitialPathRanksInserted int, err error)
|
||||
VacuumStaleGraphs(ctx context.Context, derivativeGraphKey string, batchSize int) (inputRecordsDeleted int, _ error)
|
||||
|
||||
|
||||
@ -4,7 +4,6 @@ import (
|
||||
"context"
|
||||
|
||||
"github.com/keegancsmith/sqlf"
|
||||
"github.com/lib/pq"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/codeintel/uploads/shared"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
|
||||
@ -81,63 +80,28 @@ var scanUploads = basestore.NewSliceScanner(func(s dbutil.Scanner) (u shared.Exp
|
||||
return u, err
|
||||
})
|
||||
|
||||
func (s *store) ProcessStaleExportedUploads(
|
||||
ctx context.Context,
|
||||
graphKey string,
|
||||
batchSize int,
|
||||
deleter func(ctx context.Context, objectPrefix string) error,
|
||||
) (totalDeleted int, err error) {
|
||||
ctx, _, endObservation := s.operations.processStaleExportedUploads.With(ctx, &err, observation.Args{})
|
||||
func (s *store) VacuumAbandonedExportedUploads(ctx context.Context, graphKey string, batchSize int) (_ int, err error) {
|
||||
ctx, _, endObservation := s.operations.vacuumAbandonedExportedUploads.With(ctx, &err, observation.Args{})
|
||||
defer endObservation(1, observation.Args{})
|
||||
|
||||
var a int
|
||||
err = s.withTransaction(ctx, func(tx *store) error {
|
||||
prefixByIDs, err := scanIntStringMap(tx.db.Query(ctx, sqlf.Sprintf(selectStaleExportedUploadsQuery, graphKey, batchSize)))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ids := make([]int, 0, len(prefixByIDs))
|
||||
for id, prefix := range prefixByIDs {
|
||||
if err := deleter(ctx, prefix); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ids = append(ids, id)
|
||||
}
|
||||
|
||||
if err := tx.db.Exec(ctx, sqlf.Sprintf(deleteStaleExportedUploadsQuery, pq.Array(ids))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
a = len(ids)
|
||||
return nil
|
||||
})
|
||||
return a, err
|
||||
count, _, err := basestore.ScanFirstInt(s.db.Query(ctx, sqlf.Sprintf(vacuumAbandonedExportedUploadsQuery, graphKey, graphKey, batchSize)))
|
||||
return count, err
|
||||
}
|
||||
|
||||
var scanIntStringMap = basestore.NewMapScanner(func(s dbutil.Scanner) (k int, v string, _ error) {
|
||||
err := s.Scan(&k, &v)
|
||||
return k, v, err
|
||||
})
|
||||
|
||||
const selectStaleExportedUploadsQuery = `
|
||||
SELECT
|
||||
re.id,
|
||||
re.object_prefix
|
||||
FROM codeintel_ranking_exports re
|
||||
WHERE
|
||||
re.graph_key = %s AND (re.upload_id IS NULL OR re.upload_id NOT IN (
|
||||
SELECT uvt.upload_id
|
||||
FROM lsif_uploads_visible_at_tip uvt
|
||||
WHERE uvt.is_default_branch
|
||||
))
|
||||
ORDER BY re.upload_id DESC
|
||||
LIMIT %s
|
||||
FOR UPDATE OF re SKIP LOCKED
|
||||
`
|
||||
|
||||
const deleteStaleExportedUploadsQuery = `
|
||||
DELETE FROM codeintel_ranking_exports re
|
||||
WHERE re.id = ANY(%s)
|
||||
const vacuumAbandonedExportedUploadsQuery = `
|
||||
WITH
|
||||
locked_exported_uploads AS (
|
||||
SELECT id
|
||||
FROM codeintel_ranking_exports
|
||||
WHERE (graph_key < %s OR graph_key > %s)
|
||||
ORDER BY graph_key, id
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT %s
|
||||
),
|
||||
deleted_uploads AS (
|
||||
DELETE FROM codeintel_ranking_exports
|
||||
WHERE id IN (SELECT id FROM locked_exported_uploads)
|
||||
RETURNING 1
|
||||
)
|
||||
SELECT COUNT(*) FROM deleted_uploads
|
||||
`
|
||||
|
||||
@ -2,7 +2,6 @@ package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
@ -68,80 +67,18 @@ func TestGetUploadsForRanking(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessStaleExportedUploads(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip()
|
||||
}
|
||||
|
||||
func TestVacuumAbandonedExportedUploads(t *testing.T) {
|
||||
logger := logtest.Scoped(t)
|
||||
ctx := context.Background()
|
||||
db := database.NewDB(logger, dbtest.NewDB(logger, t))
|
||||
store := New(&observation.TestContext, db)
|
||||
|
||||
if _, err := db.ExecContext(ctx, `
|
||||
INSERT INTO repo (id, name, deleted_at) VALUES (50, 'foo', NULL);
|
||||
INSERT INTO repo (id, name, deleted_at) VALUES (51, 'bar', NULL);
|
||||
INSERT INTO repo (id, name, deleted_at) VALUES (52, 'baz', NULL);
|
||||
INSERT INTO lsif_uploads (id, repository_id, commit, indexer, num_parts, uploaded_parts, state) VALUES (100, 50, '0000000000000000000000000000000000000001', 'lsif-test', 1, '{}', 'completed');
|
||||
INSERT INTO lsif_uploads (id, repository_id, commit, indexer, num_parts, uploaded_parts, state) VALUES (101, 50, '0000000000000000000000000000000000000002', 'lsif-test', 1, '{}', 'completed');
|
||||
INSERT INTO lsif_uploads (id, repository_id, commit, indexer, num_parts, uploaded_parts, state) VALUES (102, 50, '0000000000000000000000000000000000000003', 'lsif-test', 1, '{}', 'completed');
|
||||
INSERT INTO lsif_uploads (id, repository_id, commit, indexer, num_parts, uploaded_parts, state) VALUES (103, 51, '0000000000000000000000000000000000000004', 'lsif-test', 1, '{}', 'completed');
|
||||
INSERT INTO lsif_uploads (id, repository_id, commit, indexer, num_parts, uploaded_parts, state) VALUES (104, 51, '0000000000000000000000000000000000000005', 'lsif-test', 1, '{}', 'completed');
|
||||
INSERT INTO lsif_uploads (id, repository_id, commit, indexer, num_parts, uploaded_parts, state) VALUES (105, 52, '0000000000000000000000000000000000000006', 'lsif-test', 1, '{}', 'completed');
|
||||
INSERT INTO lsif_uploads_visible_at_tip (upload_id, repository_id, is_default_branch) VALUES (100, 50, true);
|
||||
INSERT INTO lsif_uploads_visible_at_tip (upload_id, repository_id, is_default_branch) VALUES (103, 51, true);
|
||||
INSERT INTO lsif_uploads_visible_at_tip (upload_id, repository_id, is_default_branch) VALUES (105, 52, true);
|
||||
`); err != nil {
|
||||
t.Fatalf("unexpected error setting up test: %s", err)
|
||||
}
|
||||
// TODO - setup
|
||||
|
||||
// Insert all records
|
||||
uploads, err := store.GetUploadsForRanking(ctx, "test", "ranking", 10)
|
||||
_, err := store.VacuumAbandonedExportedUploads(ctx, mockRankingGraphKey, 100)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error getting uploads for ranking: %s", err)
|
||||
}
|
||||
expectedUploads := []uploadsshared.ExportedUpload{
|
||||
{ID: 100, Repo: "foo", RepoID: 50, ObjectPrefix: "ranking/test/100"}, // shadowed by upload 102
|
||||
{ID: 103, Repo: "bar", RepoID: 51, ObjectPrefix: "ranking/test/103"}, // repo gets deleted
|
||||
{ID: 105, Repo: "baz", RepoID: 52, ObjectPrefix: "ranking/test/105"}, // upload gets deleted
|
||||
}
|
||||
if diff := cmp.Diff(expectedUploads, uploads); diff != "" {
|
||||
t.Fatalf("unexpected uploads (-want +got):\n%s", diff)
|
||||
t.Fatalf("unexpected error vacuuming deleted exported uploads: %s", err)
|
||||
}
|
||||
|
||||
// Mess some stuff up
|
||||
if _, err := db.ExecContext(ctx, `
|
||||
UPDATE repo SET deleted_at = NOW() WHERE id = 51; -- delete repo (attached to upload 103)
|
||||
DELETE FROM lsif_uploads_visible_at_tip WHERE upload_id = 103; -- eventual effect (after janitor runs)
|
||||
DELETE FROM lsif_uploads WHERE id = 105; -- delete upload
|
||||
DELETE FROM lsif_uploads_visible_at_tip WHERE upload_id = 105; -- eventual effect (after janitor runs)
|
||||
|
||||
DELETE FROM lsif_uploads_visible_at_tip WHERE upload_id = 100; -- Shadow upload 100 with upload 102
|
||||
INSERT INTO lsif_uploads_visible_at_tip (upload_id, repository_id, is_default_branch) VALUES (102, 50, true);
|
||||
`); err != nil {
|
||||
t.Fatalf("unexpected error setting up test: %s", err)
|
||||
}
|
||||
|
||||
// Assert that these records will be marked for deletion
|
||||
var deletedObjectPrefixes []string
|
||||
numDeleted, err := store.ProcessStaleExportedUploads(ctx, "test", 100, func(ctx context.Context, objectPrefix string) error {
|
||||
deletedObjectPrefixes = append(deletedObjectPrefixes, objectPrefix)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error processing stale exported uploads: %s", err)
|
||||
}
|
||||
if numDeleted != len(deletedObjectPrefixes) {
|
||||
t.Fatalf("expected numDeleted to match number of invocations. numDeleted=%d len(deleted)=%d", numDeleted, len(deletedObjectPrefixes))
|
||||
}
|
||||
|
||||
expectedDeletedObjectPrefixes := []string{
|
||||
"ranking/test/100",
|
||||
"ranking/test/103",
|
||||
"ranking/test/105",
|
||||
}
|
||||
sort.Strings(deletedObjectPrefixes)
|
||||
if diff := cmp.Diff(expectedDeletedObjectPrefixes, deletedObjectPrefixes); diff != "" {
|
||||
t.Fatalf("unexpected deleted IDs (-want +got):\n%s", diff)
|
||||
}
|
||||
// TODO - assertions
|
||||
}
|
||||
|
||||
1210
enterprise/internal/codeintel/ranking/mocks_test.go
generated
1210
enterprise/internal/codeintel/ranking/mocks_test.go
generated
File diff suppressed because it is too large
Load Diff
@ -1092,8 +1092,24 @@ func (s *repoStore) listSQL(ctx context.Context, tr *trace.Trace, opt ReposListO
|
||||
|
||||
if !opt.MinLastChanged.IsZero() {
|
||||
conds := []*sqlf.Query{
|
||||
sqlf.Sprintf(`
|
||||
EXISTS (
|
||||
SELECT 1
|
||||
FROM codeintel_path_ranks pr
|
||||
WHERE
|
||||
pr.graph_key IN (
|
||||
SELECT crp.graph_key
|
||||
FROM codeintel_ranking_progress crp
|
||||
WHERE crp.reducer_completed_at IS NOT NULL
|
||||
ORDER BY crp.reducer_completed_at DESC
|
||||
LIMIT 1
|
||||
) AND
|
||||
pr.repository_id = repo.id AND
|
||||
pr.updated_at >= %s
|
||||
)
|
||||
`, opt.MinLastChanged),
|
||||
|
||||
sqlf.Sprintf("EXISTS (SELECT 1 FROM gitserver_repos gr WHERE gr.repo_id = repo.id AND gr.last_changed >= %s)", opt.MinLastChanged),
|
||||
sqlf.Sprintf("EXISTS (SELECT 1 FROM codeintel_path_ranks pr WHERE pr.repository_id = repo.id AND pr.updated_at >= %s)", opt.MinLastChanged),
|
||||
sqlf.Sprintf("COALESCE(repo.updated_at, repo.created_at) >= %s", opt.MinLastChanged),
|
||||
sqlf.Sprintf("EXISTS (SELECT 1 FROM search_context_repos scr LEFT JOIN search_contexts sc ON scr.search_context_id = sc.id WHERE scr.repo_id = repo.id AND sc.updated_at >= %s)", opt.MinLastChanged),
|
||||
}
|
||||
|
||||
@ -758,8 +758,20 @@ func TestRepos_List_LastChanged(t *testing.T) {
|
||||
setGitserverRepoCloneStatus(t, db, r3.Name, types.CloneStatusCloned)
|
||||
setGitserverRepoLastChanged(t, db, r3.Name, now.Add(-time.Hour))
|
||||
{
|
||||
_, err := db.Handle().ExecContext(ctx, `INSERT INTO codeintel_path_ranks (repository_id, updated_at, payload) VALUES ($1, $2, '{}'::jsonb)`, r3.ID, now)
|
||||
if err != nil {
|
||||
if _, err := db.Handle().ExecContext(ctx, `
|
||||
INSERT INTO codeintel_path_ranks (graph_key, repository_id, updated_at, payload)
|
||||
VALUES ('test', $1, $2, '{}'::jsonb)
|
||||
`,
|
||||
r3.ID, now,
|
||||
); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := db.Handle().ExecContext(ctx, `
|
||||
INSERT INTO codeintel_ranking_progress(graph_key, max_definition_id, max_reference_id, max_path_id, mappers_started_at, reducer_completed_at)
|
||||
VALUES ('test', 1000, 1000, 1000, NOW(), NOW())
|
||||
`,
|
||||
); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -492,6 +492,15 @@
|
||||
"Increment": 1,
|
||||
"CycleOption": "NO"
|
||||
},
|
||||
{
|
||||
"Name": "codeintel_ranking_progress_id_seq",
|
||||
"TypeName": "bigint",
|
||||
"StartValue": 1,
|
||||
"MinimumValue": 1,
|
||||
"MaximumValue": 9223372036854775807,
|
||||
"Increment": 1,
|
||||
"CycleOption": "NO"
|
||||
},
|
||||
{
|
||||
"Name": "codeintel_ranking_references_id_seq",
|
||||
"TypeName": "bigint",
|
||||
@ -7156,6 +7165,19 @@
|
||||
"Name": "codeintel_initial_path_ranks",
|
||||
"Comment": "",
|
||||
"Columns": [
|
||||
{
|
||||
"Name": "deleted_at",
|
||||
"Index": 7,
|
||||
"TypeName": "timestamp with time zone",
|
||||
"IsNullable": true,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
},
|
||||
{
|
||||
"Name": "document_path",
|
||||
"Index": 3,
|
||||
@ -7424,7 +7446,7 @@
|
||||
"Name": "graph_key",
|
||||
"Index": 5,
|
||||
"TypeName": "text",
|
||||
"IsNullable": true,
|
||||
"IsNullable": false,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
@ -7513,6 +7535,16 @@
|
||||
}
|
||||
],
|
||||
"Indexes": [
|
||||
{
|
||||
"Name": "codeintel_path_ranks_graph_key_repository_id",
|
||||
"IsPrimaryKey": false,
|
||||
"IsUnique": true,
|
||||
"IsExclusion": false,
|
||||
"IsDeferrable": false,
|
||||
"IndexDefinition": "CREATE UNIQUE INDEX codeintel_path_ranks_graph_key_repository_id ON codeintel_path_ranks USING btree (graph_key, repository_id)",
|
||||
"ConstraintType": "",
|
||||
"ConstraintDefinition": ""
|
||||
},
|
||||
{
|
||||
"Name": "codeintel_path_ranks_pkey",
|
||||
"IsPrimaryKey": true,
|
||||
@ -7523,16 +7555,6 @@
|
||||
"ConstraintType": "p",
|
||||
"ConstraintDefinition": "PRIMARY KEY (id)"
|
||||
},
|
||||
{
|
||||
"Name": "codeintel_path_ranks_repository_id",
|
||||
"IsPrimaryKey": false,
|
||||
"IsUnique": true,
|
||||
"IsExclusion": false,
|
||||
"IsDeferrable": false,
|
||||
"IndexDefinition": "CREATE UNIQUE INDEX codeintel_path_ranks_repository_id ON codeintel_path_ranks USING btree (repository_id)",
|
||||
"ConstraintType": "",
|
||||
"ConstraintDefinition": ""
|
||||
},
|
||||
{
|
||||
"Name": "codeintel_path_ranks_graph_key",
|
||||
"IsPrimaryKey": false,
|
||||
@ -7574,6 +7596,19 @@
|
||||
"Name": "codeintel_ranking_definitions",
|
||||
"Comment": "",
|
||||
"Columns": [
|
||||
{
|
||||
"Name": "deleted_at",
|
||||
"Index": 8,
|
||||
"TypeName": "timestamp with time zone",
|
||||
"IsNullable": true,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
},
|
||||
{
|
||||
"Name": "document_path",
|
||||
"Index": 5,
|
||||
@ -7909,10 +7944,183 @@
|
||||
"Constraints": null,
|
||||
"Triggers": []
|
||||
},
|
||||
{
|
||||
"Name": "codeintel_ranking_progress",
|
||||
"Comment": "",
|
||||
"Columns": [
|
||||
{
|
||||
"Name": "graph_key",
|
||||
"Index": 2,
|
||||
"TypeName": "text",
|
||||
"IsNullable": false,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
},
|
||||
{
|
||||
"Name": "id",
|
||||
"Index": 1,
|
||||
"TypeName": "bigint",
|
||||
"IsNullable": false,
|
||||
"Default": "nextval('codeintel_ranking_progress_id_seq'::regclass)",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
},
|
||||
{
|
||||
"Name": "mapper_completed_at",
|
||||
"Index": 7,
|
||||
"TypeName": "timestamp with time zone",
|
||||
"IsNullable": true,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
},
|
||||
{
|
||||
"Name": "mappers_started_at",
|
||||
"Index": 6,
|
||||
"TypeName": "timestamp with time zone",
|
||||
"IsNullable": false,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
},
|
||||
{
|
||||
"Name": "max_definition_id",
|
||||
"Index": 3,
|
||||
"TypeName": "integer",
|
||||
"IsNullable": false,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
},
|
||||
{
|
||||
"Name": "max_path_id",
|
||||
"Index": 5,
|
||||
"TypeName": "integer",
|
||||
"IsNullable": false,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
},
|
||||
{
|
||||
"Name": "max_reference_id",
|
||||
"Index": 4,
|
||||
"TypeName": "integer",
|
||||
"IsNullable": false,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
},
|
||||
{
|
||||
"Name": "reducer_completed_at",
|
||||
"Index": 10,
|
||||
"TypeName": "timestamp with time zone",
|
||||
"IsNullable": true,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
},
|
||||
{
|
||||
"Name": "reducer_started_at",
|
||||
"Index": 9,
|
||||
"TypeName": "timestamp with time zone",
|
||||
"IsNullable": true,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
},
|
||||
{
|
||||
"Name": "seed_mapper_completed_at",
|
||||
"Index": 8,
|
||||
"TypeName": "timestamp with time zone",
|
||||
"IsNullable": true,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
}
|
||||
],
|
||||
"Indexes": [
|
||||
{
|
||||
"Name": "codeintel_ranking_progress_graph_key_key",
|
||||
"IsPrimaryKey": false,
|
||||
"IsUnique": true,
|
||||
"IsExclusion": false,
|
||||
"IsDeferrable": false,
|
||||
"IndexDefinition": "CREATE UNIQUE INDEX codeintel_ranking_progress_graph_key_key ON codeintel_ranking_progress USING btree (graph_key)",
|
||||
"ConstraintType": "u",
|
||||
"ConstraintDefinition": "UNIQUE (graph_key)"
|
||||
},
|
||||
{
|
||||
"Name": "codeintel_ranking_progress_pkey",
|
||||
"IsPrimaryKey": true,
|
||||
"IsUnique": true,
|
||||
"IsExclusion": false,
|
||||
"IsDeferrable": false,
|
||||
"IndexDefinition": "CREATE UNIQUE INDEX codeintel_ranking_progress_pkey ON codeintel_ranking_progress USING btree (id)",
|
||||
"ConstraintType": "p",
|
||||
"ConstraintDefinition": "PRIMARY KEY (id)"
|
||||
}
|
||||
],
|
||||
"Constraints": null,
|
||||
"Triggers": []
|
||||
},
|
||||
{
|
||||
"Name": "codeintel_ranking_references",
|
||||
"Comment": "References for a given upload proceduced by background job consuming SCIP indexes.",
|
||||
"Columns": [
|
||||
{
|
||||
"Name": "deleted_at",
|
||||
"Index": 7,
|
||||
"TypeName": "timestamp with time zone",
|
||||
"IsNullable": true,
|
||||
"Default": "",
|
||||
"CharacterMaximumLength": 0,
|
||||
"IsIdentity": false,
|
||||
"IdentityGeneration": "",
|
||||
"IsGenerated": "NEVER",
|
||||
"GenerationExpression": "",
|
||||
"Comment": ""
|
||||
},
|
||||
{
|
||||
"Name": "graph_key",
|
||||
"Index": 4,
|
||||
|
||||
@ -852,6 +852,7 @@ Contains auto-index job inference Lua scripts as an alternative to setting via e
|
||||
graph_key | text | | not null |
|
||||
last_scanned_at | timestamp with time zone | | |
|
||||
document_paths | text[] | | not null | '{}'::text[]
|
||||
deleted_at | timestamp with time zone | | |
|
||||
Indexes:
|
||||
"codeintel_initial_path_ranks_pkey" PRIMARY KEY, btree (id)
|
||||
"codeintel_initial_path_ranks_graph_key_id" btree (graph_key, id)
|
||||
@ -896,13 +897,13 @@ Indexes:
|
||||
repository_id | integer | | not null |
|
||||
payload | jsonb | | not null |
|
||||
updated_at | timestamp with time zone | | not null | now()
|
||||
graph_key | text | | |
|
||||
graph_key | text | | not null |
|
||||
num_paths | integer | | |
|
||||
refcount_logsum | double precision | | |
|
||||
id | bigint | | not null | nextval('codeintel_path_ranks_id_seq'::regclass)
|
||||
Indexes:
|
||||
"codeintel_path_ranks_pkey" PRIMARY KEY, btree (id)
|
||||
"codeintel_path_ranks_repository_id" UNIQUE, btree (repository_id)
|
||||
"codeintel_path_ranks_graph_key_repository_id" UNIQUE, btree (graph_key, repository_id)
|
||||
"codeintel_path_ranks_graph_key" btree (graph_key, updated_at NULLS FIRST, id)
|
||||
"codeintel_path_ranks_repository_id_updated_at_id" btree (repository_id, updated_at NULLS FIRST, id)
|
||||
Triggers:
|
||||
@ -922,6 +923,7 @@ Triggers:
|
||||
document_path | text | | not null |
|
||||
graph_key | text | | not null |
|
||||
last_scanned_at | timestamp with time zone | | |
|
||||
deleted_at | timestamp with time zone | | |
|
||||
Indexes:
|
||||
"codeintel_ranking_definitions_pkey" PRIMARY KEY, btree (id)
|
||||
"codeintel_ranking_definitions_graph_key_last_scanned_at_id" btree (graph_key, last_scanned_at NULLS FIRST, id)
|
||||
@ -963,6 +965,26 @@ Indexes:
|
||||
|
||||
```
|
||||
|
||||
# Table "public.codeintel_ranking_progress"
|
||||
```
|
||||
Column | Type | Collation | Nullable | Default
|
||||
--------------------------+--------------------------+-----------+----------+--------------------------------------------------------
|
||||
id | bigint | | not null | nextval('codeintel_ranking_progress_id_seq'::regclass)
|
||||
graph_key | text | | not null |
|
||||
max_definition_id | integer | | not null |
|
||||
max_reference_id | integer | | not null |
|
||||
max_path_id | integer | | not null |
|
||||
mappers_started_at | timestamp with time zone | | not null |
|
||||
mapper_completed_at | timestamp with time zone | | |
|
||||
seed_mapper_completed_at | timestamp with time zone | | |
|
||||
reducer_started_at | timestamp with time zone | | |
|
||||
reducer_completed_at | timestamp with time zone | | |
|
||||
Indexes:
|
||||
"codeintel_ranking_progress_pkey" PRIMARY KEY, btree (id)
|
||||
"codeintel_ranking_progress_graph_key_key" UNIQUE CONSTRAINT, btree (graph_key)
|
||||
|
||||
```
|
||||
|
||||
# Table "public.codeintel_ranking_references"
|
||||
```
|
||||
Column | Type | Collation | Nullable | Default
|
||||
@ -972,6 +994,7 @@ Indexes:
|
||||
symbol_names | text[] | | not null |
|
||||
graph_key | text | | not null |
|
||||
last_scanned_at | timestamp with time zone | | |
|
||||
deleted_at | timestamp with time zone | | |
|
||||
Indexes:
|
||||
"codeintel_ranking_references_pkey" PRIMARY KEY, btree (id)
|
||||
"codeintel_ranking_references_graph_key_id" btree (graph_key, id)
|
||||
|
||||
39
migrations/BUILD.bazel
generated
39
migrations/BUILD.bazel
generated
@ -901,30 +901,39 @@ go_library(
|
||||
"frontend/1682114198_product_license_access_tokens/metadata.yaml",
|
||||
"frontend/1682114198_product_license_access_tokens/up.sql",
|
||||
"frontend/squashed.sql",
|
||||
"frontend/1682626931_subscription_llm_proxy_state/down.sql",
|
||||
"frontend/1682626931_subscription_llm_proxy_state/metadata.yaml",
|
||||
"frontend/1682626931_subscription_llm_proxy_state/up.sql",
|
||||
"frontend/1683053825_sg_telemetry_allowlist/down.sql",
|
||||
"frontend/1683053825_sg_telemetry_allowlist/metadata.yaml",
|
||||
"frontend/1683053825_sg_telemetry_allowlist/up.sql",
|
||||
"frontend/1682604499_add_softdelete_timestamp_to_ranking_exports/down.sql",
|
||||
"frontend/1682604499_add_softdelete_timestamp_to_ranking_exports/metadata.yaml",
|
||||
"frontend/1682604499_add_softdelete_timestamp_to_ranking_exports/up.sql",
|
||||
"frontend/1682967255_add_ranking_graph_key_table/down.sql",
|
||||
"frontend/1682967255_add_ranking_graph_key_table/metadata.yaml",
|
||||
"frontend/1682967255_add_ranking_graph_key_table/up.sql",
|
||||
"frontend/1683640362_expand_ranking_unique_key/down.sql",
|
||||
"frontend/1683640362_expand_ranking_unique_key/metadata.yaml",
|
||||
"frontend/1683640362_expand_ranking_unique_key/up.sql",
|
||||
"frontend/1682598027_add_github_app_installations_table/down.sql",
|
||||
"frontend/1682598027_add_github_app_installations_table/metadata.yaml",
|
||||
"frontend/1682598027_add_github_app_installations_table/up.sql",
|
||||
"frontend/1682626931_subscription_llm_proxy_state/down.sql",
|
||||
"frontend/1682626931_subscription_llm_proxy_state/metadata.yaml",
|
||||
"frontend/1682626931_subscription_llm_proxy_state/up.sql",
|
||||
"frontend/1682683129_add_recent_view_ownership_signal/down.sql",
|
||||
"frontend/1682683129_add_recent_view_ownership_signal/metadata.yaml",
|
||||
"frontend/1682683129_add_recent_view_ownership_signal/up.sql",
|
||||
"frontend/1683295546_add_app_url_column_for_github_apps/down.sql",
|
||||
"frontend/1683295546_add_app_url_column_for_github_apps/metadata.yaml",
|
||||
"frontend/1683295546_add_app_url_column_for_github_apps/up.sql",
|
||||
"frontend/1683290474_user_code_completions_quota/down.sql",
|
||||
"frontend/1683290474_user_code_completions_quota/metadata.yaml",
|
||||
"frontend/1683290474_user_code_completions_quota/up.sql",
|
||||
"frontend/1683561153_add_autoindexing_repo_exceptions_table/down.sql",
|
||||
"frontend/1683561153_add_autoindexing_repo_exceptions_table/metadata.yaml",
|
||||
"frontend/1683561153_add_autoindexing_repo_exceptions_table/up.sql",
|
||||
"frontend/1683246005_llmproxynoaccesstokenenable/down.sql",
|
||||
"frontend/1683246005_llmproxynoaccesstokenenable/metadata.yaml",
|
||||
"frontend/1683246005_llmproxynoaccesstokenenable/up.sql",
|
||||
"frontend/1683290474_user_code_completions_quota/down.sql",
|
||||
"frontend/1683290474_user_code_completions_quota/metadata.yaml",
|
||||
"frontend/1683290474_user_code_completions_quota/up.sql",
|
||||
"frontend/1683295546_add_app_url_column_for_github_apps/down.sql",
|
||||
"frontend/1683295546_add_app_url_column_for_github_apps/metadata.yaml",
|
||||
"frontend/1683295546_add_app_url_column_for_github_apps/up.sql",
|
||||
"frontend/1683053825_sg_telemetry_allowlist/down.sql",
|
||||
"frontend/1683053825_sg_telemetry_allowlist/metadata.yaml",
|
||||
"frontend/1683053825_sg_telemetry_allowlist/up.sql",
|
||||
"frontend/1683561153_add_autoindexing_repo_exceptions_table/down.sql",
|
||||
"frontend/1683561153_add_autoindexing_repo_exceptions_table/metadata.yaml",
|
||||
"frontend/1683561153_add_autoindexing_repo_exceptions_table/up.sql",
|
||||
],
|
||||
importpath = "github.com/sourcegraph/sourcegraph/migrations",
|
||||
visibility = ["//visibility:public"],
|
||||
|
||||
@ -0,0 +1,3 @@
|
||||
AlTER TABLE codeintel_ranking_definitions DROP COLUMN IF EXISTS deleted_at;
|
||||
AlTER TABLE codeintel_ranking_references DROP COLUMN IF EXISTS deleted_at;
|
||||
AlTER TABLE codeintel_initial_path_ranks DROP COLUMN IF EXISTS deleted_at;
|
||||
@ -0,0 +1,2 @@
|
||||
name: Add soft-delete timestamp to ranking exports
|
||||
parents: [1681300431, 1682012624, 1681982430]
|
||||
@ -0,0 +1,3 @@
|
||||
AlTER TABLE codeintel_ranking_definitions ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ;
|
||||
AlTER TABLE codeintel_ranking_references ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ;
|
||||
AlTER TABLE codeintel_initial_path_ranks ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ;
|
||||
@ -0,0 +1 @@
|
||||
DROP TABLE IF EXISTS codeintel_ranking_progress;
|
||||
@ -0,0 +1,2 @@
|
||||
name: Add ranking graph key table
|
||||
parents: [1682114198, 1682604499]
|
||||
@ -0,0 +1,12 @@
|
||||
CREATE TABLE IF NOT EXISTS codeintel_ranking_progress (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
graph_key TEXT NOT NULL UNIQUE,
|
||||
max_definition_id INTEGER NOT NULL,
|
||||
max_reference_id INTEGER NOT NULL,
|
||||
max_path_id INTEGER NOT NULL,
|
||||
mappers_started_at TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
mapper_completed_at TIMESTAMP WITH TIME ZONE,
|
||||
seed_mapper_completed_at TIMESTAMP WITH TIME ZONE,
|
||||
reducer_started_at TIMESTAMP WITH TIME ZONE,
|
||||
reducer_completed_at TIMESTAMP WITH TIME ZONE
|
||||
);
|
||||
@ -0,0 +1,3 @@
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS codeintel_path_ranks_repository_id ON codeintel_path_ranks(repository_id);
|
||||
DROP INDEX IF EXISTS codeintel_path_ranks_graph_key_repository_id;
|
||||
ALTER TABLE codeintel_path_ranks ALTER COLUMN graph_key DROP NOT NULL;
|
||||
@ -0,0 +1,2 @@
|
||||
name: Expand ranking unique key
|
||||
parents: [1682967255]
|
||||
@ -0,0 +1,4 @@
|
||||
DELETE FROM codeintel_path_ranks WHERE graph_key IS NULL;
|
||||
ALTER TABLE codeintel_path_ranks ALTER COLUMN graph_key SET NOT NULL;
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS codeintel_path_ranks_graph_key_repository_id ON codeintel_path_ranks(graph_key, repository_id);
|
||||
DROP INDEX IF EXISTS codeintel_path_ranks_repository_id;
|
||||
@ -1673,7 +1673,8 @@ CREATE TABLE codeintel_initial_path_ranks (
|
||||
document_path text DEFAULT ''::text NOT NULL,
|
||||
graph_key text NOT NULL,
|
||||
last_scanned_at timestamp with time zone,
|
||||
document_paths text[] DEFAULT '{}'::text[] NOT NULL
|
||||
document_paths text[] DEFAULT '{}'::text[] NOT NULL,
|
||||
deleted_at timestamp with time zone
|
||||
);
|
||||
|
||||
CREATE SEQUENCE codeintel_initial_path_ranks_id_seq
|
||||
@ -1720,7 +1721,7 @@ CREATE TABLE codeintel_path_ranks (
|
||||
repository_id integer NOT NULL,
|
||||
payload jsonb NOT NULL,
|
||||
updated_at timestamp with time zone DEFAULT now() NOT NULL,
|
||||
graph_key text,
|
||||
graph_key text NOT NULL,
|
||||
num_paths integer,
|
||||
refcount_logsum double precision,
|
||||
id bigint NOT NULL
|
||||
@ -1741,7 +1742,8 @@ CREATE TABLE codeintel_ranking_definitions (
|
||||
symbol_name text NOT NULL,
|
||||
document_path text NOT NULL,
|
||||
graph_key text NOT NULL,
|
||||
last_scanned_at timestamp with time zone
|
||||
last_scanned_at timestamp with time zone,
|
||||
deleted_at timestamp with time zone
|
||||
);
|
||||
|
||||
CREATE SEQUENCE codeintel_ranking_definitions_id_seq
|
||||
@ -1789,12 +1791,35 @@ CREATE SEQUENCE codeintel_ranking_path_counts_inputs_id_seq
|
||||
|
||||
ALTER SEQUENCE codeintel_ranking_path_counts_inputs_id_seq OWNED BY codeintel_ranking_path_counts_inputs.id;
|
||||
|
||||
CREATE TABLE codeintel_ranking_progress (
|
||||
id bigint NOT NULL,
|
||||
graph_key text NOT NULL,
|
||||
max_definition_id integer NOT NULL,
|
||||
max_reference_id integer NOT NULL,
|
||||
max_path_id integer NOT NULL,
|
||||
mappers_started_at timestamp with time zone NOT NULL,
|
||||
mapper_completed_at timestamp with time zone,
|
||||
seed_mapper_completed_at timestamp with time zone,
|
||||
reducer_started_at timestamp with time zone,
|
||||
reducer_completed_at timestamp with time zone
|
||||
);
|
||||
|
||||
CREATE SEQUENCE codeintel_ranking_progress_id_seq
|
||||
START WITH 1
|
||||
INCREMENT BY 1
|
||||
NO MINVALUE
|
||||
NO MAXVALUE
|
||||
CACHE 1;
|
||||
|
||||
ALTER SEQUENCE codeintel_ranking_progress_id_seq OWNED BY codeintel_ranking_progress.id;
|
||||
|
||||
CREATE TABLE codeintel_ranking_references (
|
||||
id bigint NOT NULL,
|
||||
upload_id integer NOT NULL,
|
||||
symbol_names text[] NOT NULL,
|
||||
graph_key text NOT NULL,
|
||||
last_scanned_at timestamp with time zone
|
||||
last_scanned_at timestamp with time zone,
|
||||
deleted_at timestamp with time zone
|
||||
);
|
||||
|
||||
COMMENT ON TABLE codeintel_ranking_references IS 'References for a given upload proceduced by background job consuming SCIP indexes.';
|
||||
@ -4673,6 +4698,8 @@ ALTER TABLE ONLY codeintel_ranking_exports ALTER COLUMN id SET DEFAULT nextval('
|
||||
|
||||
ALTER TABLE ONLY codeintel_ranking_path_counts_inputs ALTER COLUMN id SET DEFAULT nextval('codeintel_ranking_path_counts_inputs_id_seq'::regclass);
|
||||
|
||||
ALTER TABLE ONLY codeintel_ranking_progress ALTER COLUMN id SET DEFAULT nextval('codeintel_ranking_progress_id_seq'::regclass);
|
||||
|
||||
ALTER TABLE ONLY codeintel_ranking_references ALTER COLUMN id SET DEFAULT nextval('codeintel_ranking_references_id_seq'::regclass);
|
||||
|
||||
ALTER TABLE ONLY codeintel_ranking_references_processed ALTER COLUMN id SET DEFAULT nextval('codeintel_ranking_references_processed_id_seq'::regclass);
|
||||
@ -4963,6 +4990,12 @@ ALTER TABLE ONLY codeintel_ranking_exports
|
||||
ALTER TABLE ONLY codeintel_ranking_path_counts_inputs
|
||||
ADD CONSTRAINT codeintel_ranking_path_counts_inputs_pkey PRIMARY KEY (id);
|
||||
|
||||
ALTER TABLE ONLY codeintel_ranking_progress
|
||||
ADD CONSTRAINT codeintel_ranking_progress_graph_key_key UNIQUE (graph_key);
|
||||
|
||||
ALTER TABLE ONLY codeintel_ranking_progress
|
||||
ADD CONSTRAINT codeintel_ranking_progress_pkey PRIMARY KEY (id);
|
||||
|
||||
ALTER TABLE ONLY codeintel_ranking_references
|
||||
ADD CONSTRAINT codeintel_ranking_references_pkey PRIMARY KEY (id);
|
||||
|
||||
@ -5445,7 +5478,7 @@ CREATE UNIQUE INDEX codeintel_langugage_support_requests_user_id_language ON cod
|
||||
|
||||
CREATE INDEX codeintel_path_ranks_graph_key ON codeintel_path_ranks USING btree (graph_key, updated_at NULLS FIRST, id);
|
||||
|
||||
CREATE UNIQUE INDEX codeintel_path_ranks_repository_id ON codeintel_path_ranks USING btree (repository_id);
|
||||
CREATE UNIQUE INDEX codeintel_path_ranks_graph_key_repository_id ON codeintel_path_ranks USING btree (graph_key, repository_id);
|
||||
|
||||
CREATE INDEX codeintel_path_ranks_repository_id_updated_at_id ON codeintel_path_ranks USING btree (repository_id, updated_at NULLS FIRST, id);
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user