sentinel: Vulnerability matcher routine (#48582)

This commit is contained in:
Eric Fritz 2023-03-03 10:20:58 -06:00 committed by GitHub
parent 2113a9e813
commit 554175d597
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 254 additions and 52 deletions

View File

@ -11,6 +11,7 @@ type sentinelConfig struct {
DownloaderInterval time.Duration
MatcherInterval time.Duration
BatchSize int
}
var ConfigInst = &sentinelConfig{}
@ -18,4 +19,5 @@ var ConfigInst = &sentinelConfig{}
func (c *sentinelConfig) Load() {
c.DownloaderInterval = c.GetInterval("CODEINTEL_SENTINEL_DOWNLOADER_INTERVAL", "1h", "How frequently to sync the vulnerability database.")
c.MatcherInterval = c.GetInterval("CODEINTEL_SENTINEL_MATCHER_INTERVAL", "1s", "How frequently to match existing records against known vulnerabilities.")
c.BatchSize = c.GetInt("CODEINTEL_SENTINEL_BATCH_SIZE", "100", "How many precise indexes to scan at once for vulnerabilities.")
}

View File

@ -35,6 +35,6 @@ func CVEScannerJob(observationCtx *observation.Context, service *Service) []goro
return []goroutine.BackgroundRoutine{
background.NewCVEDownloader(service.store, metrics, ConfigInst.DownloaderInterval),
background.NewCVEMatcher(service.store, metrics, ConfigInst.MatcherInterval),
background.NewCVEMatcher(service.store, metrics, ConfigInst.MatcherInterval, ConfigInst.BatchSize),
}
}

View File

@ -8,13 +8,19 @@ import (
"github.com/sourcegraph/sourcegraph/internal/goroutine"
)
func NewCVEMatcher(store store.Store, metrics *Metrics, interval time.Duration) goroutine.BackgroundRoutine {
func NewCVEMatcher(store store.Store, metrics *Metrics, interval time.Duration, batchSize int) goroutine.BackgroundRoutine {
return goroutine.NewPeriodicGoroutine(
context.Background(),
"codeintel.sentinel-cve-matcher", "TODO",
"codeintel.sentinel-cve-matcher", "Matches SCIP indexes against known vulnerabilities.",
interval,
goroutine.HandlerFunc(func(ctx context.Context) error {
// Currently unimplemented
numReferencesScanned, numVulnerabilityMatches, err := store.ScanMatches(ctx, batchSize)
if err != nil {
return err
}
metrics.numReferencesScanned.Add(float64(numReferencesScanned))
metrics.numVulnerabilityMatches.Add(float64(numVulnerabilityMatches))
return nil
}),
)

View File

@ -7,6 +7,8 @@ import (
)
type Metrics struct {
numReferencesScanned prometheus.Counter
numVulnerabilityMatches prometheus.Counter
numVulnerabilitiesInserted prometheus.Counter
}
@ -25,8 +27,18 @@ func NewMetrics(observationCtx *observation.Context) *Metrics {
"src_codeintel_sentinel_num_vulnerabilities_inserted_total",
"The number of vulnerability records inserted into Postgres.",
)
numReferencesScanned := counter(
"src_codeintel_sentinel_num_references_scanned_total",
"The total number of references scanned for vulnerabilities.",
)
numVulnerabilityMatches := counter(
"src_codeintel_sentinel_num_vulnerability_matches_total",
"The total number of vulnerability matches found.",
)
return &Metrics{
numReferencesScanned: numReferencesScanned,
numVulnerabilityMatches: numVulnerabilityMatches,
numVulnerabilitiesInserted: numVulnerabilitiesInserted,
}
}

View File

@ -138,43 +138,45 @@ var scanVulnerabilityMatchesAndCount = func(rows basestore.Rows, queryErr error)
return flattenMatches(matches), totalCount, nil
}
func (s *store) ScanMatches(ctx context.Context) (err error) {
func (s *store) ScanMatches(ctx context.Context, batchSize int) (numReferencesScanned int, numVulnerabilityMatches int, err error) {
ctx, _, endObservation := s.operations.scanMatches.With(ctx, &err, observation.Args{})
defer endObservation(1, observation.Args{})
tx, err := s.db.Transact(ctx)
if err != nil {
return err
return 0, 0, err
}
defer func() { err = tx.Done(err) }()
scipSchemeToVulnerabilityLanguage := map[string]string{
"gomod": "go",
"npm": "Javascript",
// TODO - java mapping
}
numScanned := 0
scanFilteredVulnerabilityMatches := basestore.NewFilteredSliceScanner(func(s dbutil.Scanner) (m VulnerabilityMatch, _ bool, _ error) {
var (
version string
versionConstraints []string
)
schemes := make([]string, 0, len(scipSchemeToVulnerabilityLanguage))
for scheme := range scipSchemeToVulnerabilityLanguage {
schemes = append(schemes, scheme)
}
sort.Strings(schemes)
if err := s.Scan(&m.UploadID, &m.VulnerabilityAffectedPackageID, &version, pq.Array(&versionConstraints)); err != nil {
return VulnerabilityMatch{}, false, err
}
mappings := make([]*sqlf.Query, 0, len(schemes))
for _, scheme := range schemes {
mappings = append(mappings, sqlf.Sprintf("(r.scheme = %s AND vap.language = %s)", scheme, scipSchemeToVulnerabilityLanguage[scheme]))
}
numScanned++
matches, valid := versionMatchesConstraints(version, versionConstraints)
_ = valid // TODO - log un-parseable versions
return m, matches, nil
})
matches, err := scanFilteredVulnerabilityMatches(tx.Query(ctx, sqlf.Sprintf(
scanMatchesQuery,
sqlf.Join(mappings, " OR "),
batchSize,
sqlf.Join(makeSchemeTtoVulnerabilityLanguageMappingConditions(), " OR "),
)))
if err != nil {
return err
return 0, 0, err
}
if err := tx.Exec(ctx, sqlf.Sprintf(scanMatchesTemporaryTableQuery)); err != nil {
return err
return 0, 0, err
}
if err := batch.WithInserter(
@ -200,25 +202,78 @@ func (s *store) ScanMatches(ctx context.Context) (err error) {
return nil
},
); err != nil {
return err
return 0, 0, err
}
if err := tx.Exec(ctx, sqlf.Sprintf(scanMatchesUpdateQuery)); err != nil {
return err
numMatched, _, err := basestore.ScanFirstInt(tx.Query(ctx, sqlf.Sprintf(scanMatchesUpdateQuery)))
if err != nil {
return 0, 0, err
}
return nil
return numScanned, numMatched, nil
}
var scipSchemeToVulnerabilityLanguage = map[string]string{
"gomod": "go",
"npm": "Javascript",
// TODO - java mapping
}
func makeSchemeTtoVulnerabilityLanguageMappingConditions() []*sqlf.Query {
schemes := make([]string, 0, len(scipSchemeToVulnerabilityLanguage))
for scheme := range scipSchemeToVulnerabilityLanguage {
schemes = append(schemes, scheme)
}
sort.Strings(schemes)
mappings := make([]*sqlf.Query, 0, len(schemes))
for _, scheme := range schemes {
mappings = append(mappings, sqlf.Sprintf("(r.scheme = %s AND vap.language = %s)", scheme, scipSchemeToVulnerabilityLanguage[scheme]))
}
return mappings
}
const scanMatchesQuery = `
WITH
candidates AS (
SELECT u.id
FROM lsif_uploads u
JOIN repo r ON r.id = u.repository_id
WHERE
u.state = 'completed' AND
r.deleted_at IS NULL AND
r.blocked IS NULL AND
NOT EXISTS (
SELECT 1
FROM lsif_uploads_vulnerability_scan uvs
WHERE
uvs.upload_id = u.id AND
-- TODO: we'd rather compare this against vuln update times
uvs.last_scanned_at < NOW()
)
ORDER BY u.id
LIMIT %s
),
locked_candidates AS (
INSERT INTO lsif_uploads_vulnerability_scan (upload_id, last_scanned_at)
SELECT id, NOW() FROM candidates
ON CONFLICT DO NOTHING
RETURNING upload_id
)
SELECT
r.dump_id,
vap.id,
r.version,
vap.version_constraint
FROM vulnerability_affected_packages vap
-- TODO - do we need the inverse? need to refine? the resulting match?
JOIN lsif_references r ON r.name LIKE '%%' || vap.package_name || '%%'
FROM locked_candidates lc
JOIN lsif_references r ON r.dump_id = lc.upload_id
JOIN vulnerability_affected_packages vap ON
-- NOTE: This is currently a bit of a hack that works to find some
-- good matches with the dataset we have. We should have a better
-- way to match on a normalized name here, or have rules per types
-- of language ecosystem.
r.name LIKE '%%' || vap.package_name || '%%'
WHERE %s
`
@ -230,9 +285,13 @@ CREATE TEMPORARY TABLE t_vulnerability_affected_packages (
`
const scanMatchesUpdateQuery = `
INSERT INTO vulnerability_matches (upload_id, vulnerability_affected_package_id)
SELECT upload_id, vulnerability_affected_package_id FROM t_vulnerability_affected_packages
ON CONFLICT DO NOTHING
WITH ins AS (
INSERT INTO vulnerability_matches (upload_id, vulnerability_affected_package_id)
SELECT upload_id, vulnerability_affected_package_id FROM t_vulnerability_affected_packages
ON CONFLICT DO NOTHING
RETURNING 1
)
SELECT COUNT(*) FROM ins
`
type VulnerabilityMatch struct {
@ -240,22 +299,6 @@ type VulnerabilityMatch struct {
VulnerabilityAffectedPackageID int
}
var scanFilteredVulnerabilityMatches = basestore.NewFilteredSliceScanner(func(s dbutil.Scanner) (m VulnerabilityMatch, _ bool, _ error) {
var (
version string
versionConstraints []string
)
if err := s.Scan(&m.UploadID, &m.VulnerabilityAffectedPackageID, &version, pq.Array(&versionConstraints)); err != nil {
return VulnerabilityMatch{}, false, err
}
matches, valid := versionMatchesConstraints(version, versionConstraints)
_ = valid // TODO - log un-parseable versions
return m, matches, nil
})
func versionMatchesConstraints(versionString string, constraints []string) (matches, valid bool) {
v, err := version.NewVersion(versionString)
if err != nil {

View File

@ -33,7 +33,7 @@ func TestVulnerabilityMatchByID(t *testing.T) {
t.Fatalf("unexpected error inserting vulnerabilities: %s", err)
}
if err := store.ScanMatches(ctx); err != nil {
if _, _, err := store.ScanMatches(ctx, 100); err != nil {
t.Fatalf("unexpected error inserting vulnerabilities: %s", err)
}
@ -68,7 +68,7 @@ func TestGetVulnerabilityMatches(t *testing.T) {
t.Fatalf("unexpected error inserting vulnerabilities: %s", err)
}
if err := store.ScanMatches(ctx); err != nil {
if _, _, err := store.ScanMatches(ctx, 100); err != nil {
t.Fatalf("unexpected error inserting vulnerabilities: %s", err)
}

View File

@ -19,7 +19,7 @@ type Store interface {
VulnerabilityMatchByID(ctx context.Context, id int) (shared.VulnerabilityMatch, bool, error)
GetVulnerabilityMatches(ctx context.Context, args shared.GetVulnerabilityMatchesArgs) ([]shared.VulnerabilityMatch, int, error)
ScanMatches(ctx context.Context) error
ScanMatches(ctx context.Context, batchSize int) (numReferencesScanned int, numVulnerabilityMatches int, _ error)
}
type store struct {

View File

@ -799,6 +799,15 @@
"Increment": 1,
"CycleOption": "NO"
},
{
"Name": "lsif_uploads_vulnerability_scan_id_seq",
"TypeName": "bigint",
"StartValue": 1,
"MinimumValue": 1,
"MaximumValue": 9223372036854775807,
"Increment": 1,
"CycleOption": "NO"
},
{
"Name": "migration_logs_id_seq",
"TypeName": "integer",
@ -15992,6 +16001,83 @@
"Constraints": null,
"Triggers": []
},
{
"Name": "lsif_uploads_vulnerability_scan",
"Comment": "",
"Columns": [
{
"Name": "id",
"Index": 1,
"TypeName": "bigint",
"IsNullable": false,
"Default": "nextval('lsif_uploads_vulnerability_scan_id_seq'::regclass)",
"CharacterMaximumLength": 0,
"IsIdentity": false,
"IdentityGeneration": "",
"IsGenerated": "NEVER",
"GenerationExpression": "",
"Comment": ""
},
{
"Name": "last_scanned_at",
"Index": 3,
"TypeName": "timestamp without time zone",
"IsNullable": false,
"Default": "now()",
"CharacterMaximumLength": 0,
"IsIdentity": false,
"IdentityGeneration": "",
"IsGenerated": "NEVER",
"GenerationExpression": "",
"Comment": ""
},
{
"Name": "upload_id",
"Index": 2,
"TypeName": "integer",
"IsNullable": false,
"Default": "",
"CharacterMaximumLength": 0,
"IsIdentity": false,
"IdentityGeneration": "",
"IsGenerated": "NEVER",
"GenerationExpression": "",
"Comment": ""
}
],
"Indexes": [
{
"Name": "lsif_uploads_vulnerability_scan_pkey",
"IsPrimaryKey": true,
"IsUnique": true,
"IsExclusion": false,
"IsDeferrable": false,
"IndexDefinition": "CREATE UNIQUE INDEX lsif_uploads_vulnerability_scan_pkey ON lsif_uploads_vulnerability_scan USING btree (id)",
"ConstraintType": "p",
"ConstraintDefinition": "PRIMARY KEY (id)"
},
{
"Name": "lsif_uploads_vulnerability_scan_upload_id",
"IsPrimaryKey": false,
"IsUnique": true,
"IsExclusion": false,
"IsDeferrable": false,
"IndexDefinition": "CREATE UNIQUE INDEX lsif_uploads_vulnerability_scan_upload_id ON lsif_uploads_vulnerability_scan USING btree (upload_id)",
"ConstraintType": "",
"ConstraintDefinition": ""
}
],
"Constraints": [
{
"Name": "fk_upload_id",
"ConstraintType": "f",
"RefTableName": "lsif_uploads",
"IsDeferrable": false,
"ConstraintDefinition": "FOREIGN KEY (upload_id) REFERENCES lsif_uploads(id) ON DELETE CASCADE"
}
],
"Triggers": []
},
{
"Name": "migration_logs",
"Comment": "",

View File

@ -2274,6 +2274,7 @@ Check constraints:
Referenced by:
TABLE "codeintel_ranking_exports" CONSTRAINT "codeintel_ranking_exports_upload_id_fkey" FOREIGN KEY (upload_id) REFERENCES lsif_uploads(id) ON DELETE SET NULL
TABLE "vulnerability_matches" CONSTRAINT "fk_upload" FOREIGN KEY (upload_id) REFERENCES lsif_uploads(id) ON DELETE CASCADE
TABLE "lsif_uploads_vulnerability_scan" CONSTRAINT "fk_upload_id" FOREIGN KEY (upload_id) REFERENCES lsif_uploads(id) ON DELETE CASCADE
TABLE "lsif_dependency_syncing_jobs" CONSTRAINT "lsif_dependency_indexing_jobs_upload_id_fkey" FOREIGN KEY (upload_id) REFERENCES lsif_uploads(id) ON DELETE CASCADE
TABLE "lsif_dependency_indexing_jobs" CONSTRAINT "lsif_dependency_indexing_jobs_upload_id_fkey1" FOREIGN KEY (upload_id) REFERENCES lsif_uploads(id) ON DELETE CASCADE
TABLE "lsif_packages" CONSTRAINT "lsif_packages_dump_id_fkey" FOREIGN KEY (dump_id) REFERENCES lsif_uploads(id) ON DELETE CASCADE
@ -2393,6 +2394,21 @@ Associates a repository with the set of LSIF upload identifiers that can serve i
**upload_id**: The identifier of the upload visible from the tip of the specified branch or tag.
# Table "public.lsif_uploads_vulnerability_scan"
```
Column | Type | Collation | Nullable | Default
-----------------+-----------------------------+-----------+----------+-------------------------------------------------------------
id | bigint | | not null | nextval('lsif_uploads_vulnerability_scan_id_seq'::regclass)
upload_id | integer | | not null |
last_scanned_at | timestamp without time zone | | not null | now()
Indexes:
"lsif_uploads_vulnerability_scan_pkey" PRIMARY KEY, btree (id)
"lsif_uploads_vulnerability_scan_upload_id" UNIQUE, btree (upload_id)
Foreign-key constraints:
"fk_upload_id" FOREIGN KEY (upload_id) REFERENCES lsif_uploads(id) ON DELETE CASCADE
```
# Table "public.migration_logs"
```
Column | Type | Collation | Nullable | Default

View File

@ -0,0 +1 @@
DROP TABLE IF EXISTS lsif_uploads_vulnerability_scan;

View File

@ -0,0 +1,2 @@
name: Add vulnerability scan log table
parents: [1677483453, 1677607213, 1677700103]

View File

@ -0,0 +1,9 @@
CREATE TABLE IF NOT EXISTS lsif_uploads_vulnerability_scan (
ID BIGSERIAL PRIMARY KEY,
upload_id INT NOT NULL,
last_scanned_at TIMESTAMP NOT NULL DEFAULT NOW(),
CONSTRAINT fk_upload_id FOREIGN KEY (upload_id) REFERENCES lsif_uploads(id) ON DELETE CASCADE
);
CREATE UNIQUE INDEX IF NOT EXISTS lsif_uploads_vulnerability_scan_upload_id ON lsif_uploads_vulnerability_scan(upload_id);

View File

@ -3124,6 +3124,21 @@ COMMENT ON COLUMN lsif_uploads_visible_at_tip.branch_or_tag_name IS 'The name of
COMMENT ON COLUMN lsif_uploads_visible_at_tip.is_default_branch IS 'Whether the specified branch is the default of the repository. Always false for tags.';
CREATE TABLE lsif_uploads_vulnerability_scan (
id bigint NOT NULL,
upload_id integer NOT NULL,
last_scanned_at timestamp without time zone DEFAULT now() NOT NULL
);
CREATE SEQUENCE lsif_uploads_vulnerability_scan_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE lsif_uploads_vulnerability_scan_id_seq OWNED BY lsif_uploads_vulnerability_scan.id;
CREATE VIEW lsif_uploads_with_repository_name AS
SELECT u.id,
u.commit,
@ -4508,6 +4523,8 @@ ALTER TABLE ONLY lsif_uploads ALTER COLUMN id SET DEFAULT nextval('lsif_dumps_id
ALTER TABLE ONLY lsif_uploads_audit_logs ALTER COLUMN sequence SET DEFAULT nextval('lsif_uploads_audit_logs_seq'::regclass);
ALTER TABLE ONLY lsif_uploads_vulnerability_scan ALTER COLUMN id SET DEFAULT nextval('lsif_uploads_vulnerability_scan_id_seq'::regclass);
ALTER TABLE ONLY namespace_permissions ALTER COLUMN id SET DEFAULT nextval('namespace_permissions_id_seq'::regclass);
ALTER TABLE ONLY notebooks ALTER COLUMN id SET DEFAULT nextval('notebooks_id_seq'::regclass);
@ -4857,6 +4874,9 @@ ALTER TABLE ONLY lsif_uploads
ALTER TABLE ONLY lsif_uploads_reference_counts
ADD CONSTRAINT lsif_uploads_reference_counts_upload_id_key UNIQUE (upload_id);
ALTER TABLE ONLY lsif_uploads_vulnerability_scan
ADD CONSTRAINT lsif_uploads_vulnerability_scan_pkey PRIMARY KEY (id);
ALTER TABLE ONLY names
ADD CONSTRAINT names_pkey PRIMARY KEY (name);
@ -5352,6 +5372,8 @@ CREATE INDEX lsif_uploads_visible_at_tip_is_default_branch ON lsif_uploads_visib
CREATE INDEX lsif_uploads_visible_at_tip_repository_id_upload_id ON lsif_uploads_visible_at_tip USING btree (repository_id, upload_id);
CREATE UNIQUE INDEX lsif_uploads_vulnerability_scan_upload_id ON lsif_uploads_vulnerability_scan USING btree (upload_id);
CREATE INDEX notebook_stars_user_id_idx ON notebook_stars USING btree (user_id);
CREATE INDEX notebooks_blocks_tsvector_idx ON notebooks USING gin (blocks_tsvector);
@ -5813,6 +5835,9 @@ ALTER TABLE ONLY codeintel_ranking_references_processed
ALTER TABLE ONLY vulnerability_matches
ADD CONSTRAINT fk_upload FOREIGN KEY (upload_id) REFERENCES lsif_uploads(id) ON DELETE CASCADE;
ALTER TABLE ONLY lsif_uploads_vulnerability_scan
ADD CONSTRAINT fk_upload_id FOREIGN KEY (upload_id) REFERENCES lsif_uploads(id) ON DELETE CASCADE;
ALTER TABLE ONLY vulnerability_affected_packages
ADD CONSTRAINT fk_vulnerabilities FOREIGN KEY (vulnerability_id) REFERENCES vulnerabilities(id) ON DELETE CASCADE;