codeintel: Fix bad SCIP schema version triggers (#45300)

This commit is contained in:
Eric Fritz 2022-12-06 16:47:40 -06:00 committed by GitHub
parent c877318909
commit 32fa9a4dfc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 289 additions and 66 deletions

View File

@ -24,11 +24,11 @@
},
{
"Name": "update_codeintel_scip_document_lookup_schema_versions_insert",
"Definition": "CREATE OR REPLACE FUNCTION public.update_codeintel_scip_document_lookup_schema_versions_insert()\n RETURNS trigger\n LANGUAGE plpgsql\nAS $function$ BEGIN\n INSERT INTO codeintel_scip_document_lookup_schema_versions\n SELECT\n upload_id,\n MIN(documents.schema_version) as min_schema_version,\n MAX(documents.schema_version) as max_schema_version\n FROM newtab\n JOIN codeintel_scip_documents ON codeintel_scip_documents.id = newtab.document_id\n GROUP BY newtab.upload_id\n ON CONFLICT (upload_id) DO UPDATE SET\n -- Update with min(old_min, new_min) and max(old_max, new_max)\n min_schema_version = LEAST(codeintel_scip_document_lookup_schema_versions.min_schema_version, EXCLUDED.min_schema_version),\n max_schema_version = GREATEST(codeintel_scip_document_lookup_schema_versions.max_schema_version, EXCLUDED.max_schema_version);\n RETURN NULL;\nEND $function$\n"
"Definition": "CREATE OR REPLACE FUNCTION public.update_codeintel_scip_document_lookup_schema_versions_insert()\n RETURNS trigger\n LANGUAGE plpgsql\nAS $function$ BEGIN\n INSERT INTO codeintel_scip_document_lookup_schema_versions\n SELECT\n upload_id,\n MIN(schema_version) as min_schema_version,\n MAX(schema_version) as max_schema_version\n FROM newtab\n JOIN codeintel_scip_documents ON codeintel_scip_documents.id = newtab.document_id\n GROUP BY newtab.upload_id\n ON CONFLICT (upload_id) DO UPDATE SET\n -- Update with min(old_min, new_min) and max(old_max, new_max)\n min_schema_version = LEAST(codeintel_scip_document_lookup_schema_versions.min_schema_version, EXCLUDED.min_schema_version),\n max_schema_version = GREATEST(codeintel_scip_document_lookup_schema_versions.max_schema_version, EXCLUDED.max_schema_version);\n RETURN NULL;\nEND $function$\n"
},
{
"Name": "update_codeintel_scip_documents_schema_versions_insert",
"Definition": "CREATE OR REPLACE FUNCTION public.update_codeintel_scip_documents_schema_versions_insert()\n RETURNS trigger\n LANGUAGE plpgsql\nAS $function$ BEGIN\n INSERT INTO codeintel_scip_documents_schema_versions\n SELECT\n upload_id,\n MIN(documents.schema_version) as min_schema_version,\n MAX(documents.schema_version) as max_schema_version\n FROM newtab\n JOIN codeintel_scip_documents ON codeintel_scip_documents.id = newtab.document_id\n GROUP BY newtab.upload_id\n ON CONFLICT (upload_id) DO UPDATE SET\n -- Update with min(old_min, new_min) and max(old_max, new_max)\n min_schema_version = LEAST(codeintel_scip_documents_schema_versions.min_schema_version, EXCLUDED.min_schema_version),\n max_schema_version = GREATEST(codeintel_scip_documents_schema_versions.max_schema_version, EXCLUDED.max_schema_version);\n RETURN NULL;\nEND $function$\n"
"Definition": "CREATE OR REPLACE FUNCTION public.update_codeintel_scip_documents_schema_versions_insert()\n RETURNS trigger\n LANGUAGE plpgsql\nAS $function$ BEGIN\n INSERT INTO codeintel_scip_documents_schema_versions\n SELECT\n metadata_shard_id,\n MIN(schema_version) as min_schema_version,\n MAX(schema_version) as max_schema_version\n FROM newtab\n JOIN codeintel_scip_documents ON codeintel_scip_documents.id = newtab.document_id\n GROUP BY newtab.metadata_shard_id\n ON CONFLICT (metadata_shard_id) DO UPDATE SET\n -- Update with min(old_min, new_min) and max(old_max, new_max)\n min_schema_version = LEAST(codeintel_scip_documents_schema_versions.min_schema_version, EXCLUDED.min_schema_version),\n max_schema_version = GREATEST(codeintel_scip_documents_schema_versions.max_schema_version, EXCLUDED.max_schema_version);\n RETURN NULL;\nEND $function$\n"
},
{
"Name": "update_codeintel_scip_symbols_schema_versions_insert",
@ -271,7 +271,12 @@
"ConstraintDefinition": "FOREIGN KEY (document_id) REFERENCES codeintel_scip_documents(id)"
}
],
"Triggers": []
"Triggers": [
{
"Name": "codeintel_scip_document_lookup_schema_versions_insert",
"Definition": "CREATE TRIGGER codeintel_scip_document_lookup_schema_versions_insert AFTER INSERT ON codeintel_scip_document_lookup REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_document_lookup_schema_versions_insert()"
}
]
},
{
"Name": "codeintel_scip_document_lookup_schema_versions",
@ -330,12 +335,7 @@
}
],
"Constraints": null,
"Triggers": [
{
"Name": "codeintel_scip_document_lookup_schema_versions_insert",
"Definition": "CREATE TRIGGER codeintel_scip_document_lookup_schema_versions_insert AFTER INSERT ON codeintel_scip_document_lookup_schema_versions REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_document_lookup_schema_versions_insert()"
}
]
"Triggers": []
},
{
"Name": "codeintel_scip_documents",
@ -354,6 +354,19 @@
"GenerationExpression": "",
"Comment": "An auto-generated identifier. This column is used as a foreign key target to reduce occurrences of the full payload hash value."
},
{
"Name": "metadata_shard_id",
"Index": 5,
"TypeName": "integer",
"IsNullable": false,
"Default": "(floor(((random() * (128)::double precision) + (1)::double precision)))::integer",
"CharacterMaximumLength": 0,
"IsIdentity": false,
"IdentityGeneration": "",
"IsGenerated": "NEVER",
"GenerationExpression": "",
"Comment": "A randomly generated integer used to arbitrarily bucket groups of documents for things like expiration checks and data migrations."
},
{
"Name": "payload_hash",
"Index": 2,
@ -421,7 +434,7 @@
},
{
"Name": "codeintel_scip_documents_schema_versions",
"Comment": "Tracks the range of `schema_versions` values associated with each SCIP index in the [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) table.",
"Comment": "Tracks the range of `schema_versions` values associated with each document metadata shard in the [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) table.",
"Columns": [
{
"Name": "max_schema_version",
@ -434,7 +447,20 @@
"IdentityGeneration": "",
"IsGenerated": "NEVER",
"GenerationExpression": "",
"Comment": "An upper-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `upload_id` column matches the associated SCIP index."
"Comment": "An upper-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `metadata_shard_id` column matches the associated document metadata shard."
},
{
"Name": "metadata_shard_id",
"Index": 1,
"TypeName": "integer",
"IsNullable": false,
"Default": "",
"CharacterMaximumLength": 0,
"IsIdentity": false,
"IdentityGeneration": "",
"IsGenerated": "NEVER",
"GenerationExpression": "",
"Comment": "The identifier of the associated document metadata shard."
},
{
"Name": "min_schema_version",
@ -447,20 +473,7 @@
"IdentityGeneration": "",
"IsGenerated": "NEVER",
"GenerationExpression": "",
"Comment": "A lower-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `upload_id` column matches the associated SCIP index."
},
{
"Name": "upload_id",
"Index": 1,
"TypeName": "integer",
"IsNullable": false,
"Default": "",
"CharacterMaximumLength": 0,
"IsIdentity": false,
"IdentityGeneration": "",
"IsGenerated": "NEVER",
"GenerationExpression": "",
"Comment": "The identifier of the associated SCIP index."
"Comment": "A lower-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `metadata_shard_id` column matches the associated document metadata shard."
}
],
"Indexes": [
@ -470,9 +483,9 @@
"IsUnique": true,
"IsExclusion": false,
"IsDeferrable": false,
"IndexDefinition": "CREATE UNIQUE INDEX codeintel_scip_documents_schema_versions_pkey ON codeintel_scip_documents_schema_versions USING btree (upload_id)",
"IndexDefinition": "CREATE UNIQUE INDEX codeintel_scip_documents_schema_versions_pkey ON codeintel_scip_documents_schema_versions USING btree (metadata_shard_id)",
"ConstraintType": "p",
"ConstraintDefinition": "PRIMARY KEY (upload_id)"
"ConstraintDefinition": "PRIMARY KEY (metadata_shard_id)"
}
],
"Constraints": null,
@ -734,7 +747,12 @@
"ConstraintDefinition": "FOREIGN KEY (document_lookup_id) REFERENCES codeintel_scip_document_lookup(id) ON DELETE CASCADE"
}
],
"Triggers": []
"Triggers": [
{
"Name": "codeintel_scip_symbols_schema_versions_insert",
"Definition": "CREATE TRIGGER codeintel_scip_symbols_schema_versions_insert AFTER INSERT ON codeintel_scip_symbols REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_symbols_schema_versions_insert()"
}
]
},
{
"Name": "codeintel_scip_symbols_schema_versions",
@ -793,12 +811,7 @@
}
],
"Constraints": null,
"Triggers": [
{
"Name": "codeintel_scip_symbols_schema_versions_insert",
"Definition": "CREATE TRIGGER codeintel_scip_symbols_schema_versions_insert AFTER INSERT ON codeintel_scip_symbols_schema_versions REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_symbols_schema_versions_insert()"
}
]
"Triggers": []
},
{
"Name": "lsif_data_definitions",

View File

@ -28,6 +28,8 @@ Foreign-key constraints:
"codeintel_scip_document_lookup_document_id_fk" FOREIGN KEY (document_id) REFERENCES codeintel_scip_documents(id)
Referenced by:
TABLE "codeintel_scip_symbols" CONSTRAINT "codeintel_scip_symbols_document_lookup_id_fk" FOREIGN KEY (document_lookup_id) REFERENCES codeintel_scip_document_lookup(id) ON DELETE CASCADE
Triggers:
codeintel_scip_document_lookup_schema_versions_insert AFTER INSERT ON codeintel_scip_document_lookup REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_document_lookup_schema_versions_insert()
```
@ -50,8 +52,6 @@ A mapping from file paths to document references within a particular SCIP index.
max_schema_version | integer | | |
Indexes:
"codeintel_scip_document_lookup_schema_versions_pkey" PRIMARY KEY, btree (upload_id)
Triggers:
codeintel_scip_document_lookup_schema_versions_insert AFTER INSERT ON codeintel_scip_document_lookup_schema_versions REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_document_lookup_schema_versions_insert()
```
@ -65,12 +65,13 @@ Tracks the range of `schema_versions` values associated with each SCIP index in
# Table "public.codeintel_scip_documents"
```
Column | Type | Collation | Nullable | Default
------------------+---------+-----------+----------+------------------------------------------------------
id | bigint | | not null | nextval('codeintel_scip_documents_id_seq'::regclass)
payload_hash | bytea | | not null |
schema_version | integer | | not null |
raw_scip_payload | bytea | | not null |
Column | Type | Collation | Nullable | Default
-------------------+---------+-----------+----------+----------------------------------------------------------------------------------
id | bigint | | not null | nextval('codeintel_scip_documents_id_seq'::regclass)
payload_hash | bytea | | not null |
schema_version | integer | | not null |
raw_scip_payload | bytea | | not null |
metadata_shard_id | integer | | not null | (floor(((random() * (128)::double precision) + (1)::double precision)))::integer
Indexes:
"codeintel_scip_documents_pkey" PRIMARY KEY, btree (id)
"codeintel_scip_documents_payload_hash_key" UNIQUE CONSTRAINT, btree (payload_hash)
@ -83,6 +84,8 @@ A lookup of SCIP [Document](https://sourcegraph.com/search?q=context:%40sourcegr
**id**: An auto-generated identifier. This column is used as a foreign key target to reduce occurrences of the full payload hash value.
**metadata_shard_id**: A randomly generated integer used to arbitrarily bucket groups of documents for things like expiration checks and data migrations.
**payload_hash**: A deterministic hash of the raw SCIP payload. We use this as a unique value to enforce deduplication between indexes with the same document data.
**raw_scip_payload**: The raw, canonicalized SCIP [Document](https://sourcegraph.com/search?q=context:%40sourcegraph/all+repo:%5Egithub%5C.com/sourcegraph/scip%24+file:%5Escip%5C.proto+message+Document&patternType=standard) payload.
@ -93,23 +96,23 @@ A lookup of SCIP [Document](https://sourcegraph.com/search?q=context:%40sourcegr
```
Column | Type | Collation | Nullable | Default
--------------------+---------+-----------+----------+---------
upload_id | integer | | not null |
metadata_shard_id | integer | | not null |
min_schema_version | integer | | |
max_schema_version | integer | | |
Indexes:
"codeintel_scip_documents_schema_versions_pkey" PRIMARY KEY, btree (upload_id)
"codeintel_scip_documents_schema_versions_pkey" PRIMARY KEY, btree (metadata_shard_id)
Triggers:
codeintel_scip_documents_schema_versions_insert AFTER INSERT ON codeintel_scip_documents_schema_versions REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_documents_schema_versions_insert()
```
Tracks the range of `schema_versions` values associated with each SCIP index in the [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) table.
Tracks the range of `schema_versions` values associated with each document metadata shard in the [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) table.
**max_schema_version**: An upper-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `upload_id` column matches the associated SCIP index.
**max_schema_version**: An upper-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `metadata_shard_id` column matches the associated document metadata shard.
**min_schema_version**: A lower-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `upload_id` column matches the associated SCIP index.
**metadata_shard_id**: The identifier of the associated document metadata shard.
**upload_id**: The identifier of the associated SCIP index.
**min_schema_version**: A lower-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `metadata_shard_id` column matches the associated document metadata shard.
# Table "public.codeintel_scip_metadata"
```
@ -160,6 +163,8 @@ Indexes:
"codeintel_scip_symbols_document_lookup_id" btree (document_lookup_id)
Foreign-key constraints:
"codeintel_scip_symbols_document_lookup_id_fk" FOREIGN KEY (document_lookup_id) REFERENCES codeintel_scip_document_lookup(id) ON DELETE CASCADE
Triggers:
codeintel_scip_symbols_schema_versions_insert AFTER INSERT ON codeintel_scip_symbols REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_symbols_schema_versions_insert()
```
@ -190,8 +195,6 @@ A mapping from SCIP [Symbol names](https://sourcegraph.com/search?q=context:%40s
max_schema_version | integer | | |
Indexes:
"codeintel_scip_symbols_schema_versions_pkey" PRIMARY KEY, btree (upload_id)
Triggers:
codeintel_scip_symbols_schema_versions_insert AFTER INSERT ON codeintel_scip_symbols_schema_versions REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_symbols_schema_versions_insert()
```

View File

@ -0,0 +1,75 @@
--
-- Restore some gibberish I guess?
--
CREATE OR REPLACE FUNCTION update_codeintel_scip_documents_schema_versions_insert() RETURNS trigger
LANGUAGE plpgsql
AS $$ BEGIN
INSERT INTO codeintel_scip_documents_schema_versions
SELECT
upload_id,
MIN(documents.schema_version) as min_schema_version,
MAX(documents.schema_version) as max_schema_version
FROM newtab
JOIN codeintel_scip_documents ON codeintel_scip_documents.id = newtab.document_id
GROUP BY newtab.upload_id
ON CONFLICT (upload_id) DO UPDATE SET
-- Update with min(old_min, new_min) and max(old_max, new_max)
min_schema_version = LEAST(codeintel_scip_documents_schema_versions.min_schema_version, EXCLUDED.min_schema_version),
max_schema_version = GREATEST(codeintel_scip_documents_schema_versions.max_schema_version, EXCLUDED.max_schema_version);
RETURN NULL;
END $$;
CREATE OR REPLACE FUNCTION update_codeintel_scip_document_lookup_schema_versions_insert() RETURNS trigger
LANGUAGE plpgsql
AS $$ BEGIN
INSERT INTO codeintel_scip_document_lookup_schema_versions
SELECT
upload_id,
MIN(documents.schema_version) as min_schema_version,
MAX(documents.schema_version) as max_schema_version
FROM newtab
JOIN codeintel_scip_documents ON codeintel_scip_documents.id = newtab.document_id
GROUP BY newtab.upload_id
ON CONFLICT (upload_id) DO UPDATE SET
-- Update with min(old_min, new_min) and max(old_max, new_max)
min_schema_version = LEAST(codeintel_scip_document_lookup_schema_versions.min_schema_version, EXCLUDED.min_schema_version),
max_schema_version = GREATEST(codeintel_scip_document_lookup_schema_versions.max_schema_version, EXCLUDED.max_schema_version);
RETURN NULL;
END $$;
CREATE OR REPLACE FUNCTION update_codeintel_scip_symbols_schema_versions_insert() RETURNS trigger
LANGUAGE plpgsql
AS $$ BEGIN
INSERT INTO codeintel_scip_symbols_schema_versions
SELECT
upload_id,
MIN(schema_version) as min_schema_version,
MAX(schema_version) as max_schema_version
FROM newtab
GROUP BY upload_id
ON CONFLICT (upload_id) DO UPDATE SET
-- Update with min(old_min, new_min) and max(old_max, new_max)
min_schema_version = LEAST(codeintel_scip_symbols_schema_versions.min_schema_version, EXCLUDED.min_schema_version),
max_schema_version = GREATEST(codeintel_scip_symbols_schema_versions.max_schema_version, EXCLUDED.max_schema_version);
RETURN NULL;
END $$;
--
-- Actual triggers
--
DROP TRIGGER IF EXISTS codeintel_scip_documents_schema_versions_insert ON codeintel_scip_documents;
DROP TRIGGER IF EXISTS codeintel_scip_documents_schema_versions_insert ON codeintel_scip_documents_schema_versions;
CREATE TRIGGER codeintel_scip_documents_schema_versions_insert AFTER INSERT ON codeintel_scip_documents_schema_versions
REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_documents_schema_versions_insert();
DROP TRIGGER IF EXISTS codeintel_scip_document_lookup_schema_versions_insert ON codeintel_scip_document_lookup;
DROP TRIGGER IF EXISTS codeintel_scip_document_lookup_schema_versions_insert ON codeintel_scip_document_lookup_schema_versions;
CREATE TRIGGER codeintel_scip_document_lookup_schema_versions_insert AFTER INSERT ON codeintel_scip_document_lookup_schema_versions
REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_document_lookup_schema_versions_insert();
DROP TRIGGER IF EXISTS codeintel_scip_symbols_schema_versions_insert ON codeintel_scip_symbols;
DROP TRIGGER IF EXISTS codeintel_scip_symbols_schema_versions_insert ON codeintel_scip_symbols_schema_versions;
CREATE TRIGGER codeintel_scip_symbols_schema_versions_insert AFTER INSERT ON codeintel_scip_symbols_schema_versions
REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_symbols_schema_versions_insert();

View File

@ -0,0 +1,2 @@
name: Fix SCIP schema version triggers
parents: [1670001463]

View File

@ -0,0 +1,48 @@
DROP TRIGGER IF EXISTS codeintel_scip_documents_schema_versions_insert ON codeintel_scip_documents;
DROP TRIGGER IF EXISTS codeintel_scip_documents_schema_versions_insert ON codeintel_scip_documents_schema_versions;
DROP FUNCTION IF EXISTS update_codeintel_scip_documents_schema_versions_insert();
DROP TRIGGER IF EXISTS codeintel_scip_document_lookup_schema_versions_insert ON codeintel_scip_document_lookup;
DROP TRIGGER IF EXISTS codeintel_scip_document_lookup_schema_versions_insert ON codeintel_scip_document_lookup_schema_versions;
CREATE TRIGGER codeintel_scip_document_lookup_schema_versions_insert AFTER INSERT ON codeintel_scip_document_lookup
REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_document_lookup_schema_versions_insert();
DROP TRIGGER IF EXISTS codeintel_scip_symbols_schema_versions_insert ON codeintel_scip_symbols;
DROP TRIGGER IF EXISTS codeintel_scip_symbols_schema_versions_insert ON codeintel_scip_symbols_schema_versions;
CREATE TRIGGER codeintel_scip_symbols_schema_versions_insert AFTER INSERT ON codeintel_scip_symbols
REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_symbols_schema_versions_insert();
CREATE OR REPLACE FUNCTION update_codeintel_scip_document_lookup_schema_versions_insert() RETURNS trigger
LANGUAGE plpgsql
AS $$ BEGIN
INSERT INTO codeintel_scip_document_lookup_schema_versions
SELECT
upload_id,
MIN(schema_version) as min_schema_version,
MAX(schema_version) as max_schema_version
FROM newtab
JOIN codeintel_scip_documents ON codeintel_scip_documents.id = newtab.document_id
GROUP BY newtab.upload_id
ON CONFLICT (upload_id) DO UPDATE SET
-- Update with min(old_min, new_min) and max(old_max, new_max)
min_schema_version = LEAST(codeintel_scip_document_lookup_schema_versions.min_schema_version, EXCLUDED.min_schema_version),
max_schema_version = GREATEST(codeintel_scip_document_lookup_schema_versions.max_schema_version, EXCLUDED.max_schema_version);
RETURN NULL;
END $$;
CREATE OR REPLACE FUNCTION update_codeintel_scip_symbols_schema_versions_insert() RETURNS trigger
LANGUAGE plpgsql
AS $$ BEGIN
INSERT INTO codeintel_scip_symbols_schema_versions
SELECT
upload_id,
MIN(schema_version) as min_schema_version,
MAX(schema_version) as max_schema_version
FROM newtab
GROUP BY upload_id
ON CONFLICT (upload_id) DO UPDATE SET
-- Update with min(old_min, new_min) and max(old_max, new_max)
min_schema_version = LEAST(codeintel_scip_symbols_schema_versions.min_schema_version, EXCLUDED.min_schema_version),
max_schema_version = GREATEST(codeintel_scip_symbols_schema_versions.max_schema_version, EXCLUDED.max_schema_version);
RETURN NULL;
END $$;

View File

@ -0,0 +1,38 @@
-- Restore table and triggers
DROP TABLE codeintel_scip_documents_schema_versions;
CREATE TABLE IF NOT EXISTS codeintel_scip_documents_schema_versions (
upload_id integer NOT NULL,
min_schema_version integer,
max_schema_version integer,
PRIMARY KEY(upload_id)
);
COMMENT ON TABLE codeintel_scip_documents_schema_versions IS 'Tracks the range of `schema_versions` values associated with each SCIP index in the [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) table.';
COMMENT ON COLUMN codeintel_scip_documents_schema_versions.upload_id IS 'The identifier of the associated SCIP index.';
COMMENT ON COLUMN codeintel_scip_documents_schema_versions.min_schema_version IS 'A lower-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `upload_id` column matches the associated SCIP index.';
COMMENT ON COLUMN codeintel_scip_documents_schema_versions.max_schema_version IS 'An upper-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `upload_id` column matches the associated SCIP index.';
CREATE OR REPLACE FUNCTION update_codeintel_scip_documents_schema_versions_insert() RETURNS trigger
LANGUAGE plpgsql
AS $$ BEGIN
INSERT INTO codeintel_scip_documents_schema_versions
SELECT
upload_id,
MIN(documents.schema_version) as min_schema_version,
MAX(documents.schema_version) as max_schema_version
FROM newtab
JOIN codeintel_scip_documents ON codeintel_scip_documents.id = newtab.document_id
GROUP BY newtab.upload_id
ON CONFLICT (upload_id) DO UPDATE SET
-- Update with min(old_min, new_min) and max(old_max, new_max)
min_schema_version = LEAST(codeintel_scip_documents_schema_versions.min_schema_version, EXCLUDED.min_schema_version),
max_schema_version = GREATEST(codeintel_scip_documents_schema_versions.max_schema_version, EXCLUDED.max_schema_version);
RETURN NULL;
END $$;
DROP TRIGGER IF EXISTS codeintel_scip_documents_schema_versions_insert ON codeintel_scip_documents_schema_versions;
CREATE TRIGGER codeintel_scip_documents_schema_versions_insert AFTER INSERT ON codeintel_scip_documents_schema_versions
REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_documents_schema_versions_insert();
-- Restore documents table
ALTER TABLE codeintel_scip_documents DROP COLUMN IF EXISTS metadata_shard_id;

View File

@ -0,0 +1,2 @@
name: Fix SCIP document schema counting
parents: [1670363942]

View File

@ -0,0 +1,39 @@
-- Add shard id to documents
ALTER TABLE codeintel_scip_documents ADD COLUMN IF NOT EXISTS metadata_shard_id integer NOT NULL DEFAULT floor(random() * 128 + 1)::integer;
COMMENT ON COLUMN codeintel_scip_documents.metadata_shard_id IS 'A randomly generated integer used to arbitrarily bucket groups of documents for things like expiration checks and data migrations.';
-- Replace table and triggers
DROP TABLE IF EXISTS codeintel_scip_documents_schema_versions;
CREATE TABLE codeintel_scip_documents_schema_versions (
metadata_shard_id integer NOT NULL,
min_schema_version integer,
max_schema_version integer,
PRIMARY KEY(metadata_shard_id)
);
COMMENT ON TABLE codeintel_scip_documents_schema_versions IS 'Tracks the range of `schema_versions` values associated with each document metadata shard in the [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) table.';
COMMENT ON COLUMN codeintel_scip_documents_schema_versions.metadata_shard_id IS 'The identifier of the associated document metadata shard.';
COMMENT ON COLUMN codeintel_scip_documents_schema_versions.min_schema_version IS 'A lower-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `metadata_shard_id` column matches the associated document metadata shard.';
COMMENT ON COLUMN codeintel_scip_documents_schema_versions.max_schema_version IS 'An upper-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `metadata_shard_id` column matches the associated document metadata shard.';
CREATE OR REPLACE FUNCTION update_codeintel_scip_documents_schema_versions_insert() RETURNS trigger
LANGUAGE plpgsql
AS $$ BEGIN
INSERT INTO codeintel_scip_documents_schema_versions
SELECT
metadata_shard_id,
MIN(schema_version) as min_schema_version,
MAX(schema_version) as max_schema_version
FROM newtab
JOIN codeintel_scip_documents ON codeintel_scip_documents.id = newtab.document_id
GROUP BY newtab.metadata_shard_id
ON CONFLICT (metadata_shard_id) DO UPDATE SET
-- Update with min(old_min, new_min) and max(old_max, new_max)
min_schema_version = LEAST(codeintel_scip_documents_schema_versions.min_schema_version, EXCLUDED.min_schema_version),
max_schema_version = GREATEST(codeintel_scip_documents_schema_versions.max_schema_version, EXCLUDED.max_schema_version);
RETURN NULL;
END $$;
DROP TRIGGER IF EXISTS codeintel_scip_documents_schema_versions_insert ON codeintel_scip_documents_schema_versions;
CREATE TRIGGER codeintel_scip_documents_schema_versions_insert AFTER INSERT ON codeintel_scip_documents_schema_versions
REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_documents_schema_versions_insert();

View File

@ -45,8 +45,8 @@ CREATE FUNCTION update_codeintel_scip_document_lookup_schema_versions_insert() R
INSERT INTO codeintel_scip_document_lookup_schema_versions
SELECT
upload_id,
MIN(documents.schema_version) as min_schema_version,
MAX(documents.schema_version) as max_schema_version
MIN(schema_version) as min_schema_version,
MAX(schema_version) as max_schema_version
FROM newtab
JOIN codeintel_scip_documents ON codeintel_scip_documents.id = newtab.document_id
GROUP BY newtab.upload_id
@ -62,13 +62,13 @@ CREATE FUNCTION update_codeintel_scip_documents_schema_versions_insert() RETURNS
AS $$ BEGIN
INSERT INTO codeintel_scip_documents_schema_versions
SELECT
upload_id,
MIN(documents.schema_version) as min_schema_version,
MAX(documents.schema_version) as max_schema_version
metadata_shard_id,
MIN(schema_version) as min_schema_version,
MAX(schema_version) as max_schema_version
FROM newtab
JOIN codeintel_scip_documents ON codeintel_scip_documents.id = newtab.document_id
GROUP BY newtab.upload_id
ON CONFLICT (upload_id) DO UPDATE SET
GROUP BY newtab.metadata_shard_id
ON CONFLICT (metadata_shard_id) DO UPDATE SET
-- Update with min(old_min, new_min) and max(old_max, new_max)
min_schema_version = LEAST(codeintel_scip_documents_schema_versions.min_schema_version, EXCLUDED.min_schema_version),
max_schema_version = GREATEST(codeintel_scip_documents_schema_versions.max_schema_version, EXCLUDED.max_schema_version);
@ -227,7 +227,8 @@ CREATE TABLE codeintel_scip_documents (
id bigint NOT NULL,
payload_hash bytea NOT NULL,
schema_version integer NOT NULL,
raw_scip_payload bytea NOT NULL
raw_scip_payload bytea NOT NULL,
metadata_shard_id integer DEFAULT (floor(((random() * (128)::double precision) + (1)::double precision)))::integer NOT NULL
);
COMMENT ON TABLE codeintel_scip_documents IS 'A lookup of SCIP [Document](https://sourcegraph.com/search?q=context:%40sourcegraph/all+repo:%5Egithub%5C.com/sourcegraph/scip%24+file:%5Escip%5C.proto+message+Document&patternType=standard) payloads by their hash.';
@ -240,6 +241,8 @@ COMMENT ON COLUMN codeintel_scip_documents.schema_version IS 'The schema version
COMMENT ON COLUMN codeintel_scip_documents.raw_scip_payload IS 'The raw, canonicalized SCIP [Document](https://sourcegraph.com/search?q=context:%40sourcegraph/all+repo:%5Egithub%5C.com/sourcegraph/scip%24+file:%5Escip%5C.proto+message+Document&patternType=standard) payload.';
COMMENT ON COLUMN codeintel_scip_documents.metadata_shard_id IS 'A randomly generated integer used to arbitrarily bucket groups of documents for things like expiration checks and data migrations.';
CREATE SEQUENCE codeintel_scip_documents_id_seq
START WITH 1
INCREMENT BY 1
@ -250,18 +253,18 @@ CREATE SEQUENCE codeintel_scip_documents_id_seq
ALTER SEQUENCE codeintel_scip_documents_id_seq OWNED BY codeintel_scip_documents.id;
CREATE TABLE codeintel_scip_documents_schema_versions (
upload_id integer NOT NULL,
metadata_shard_id integer NOT NULL,
min_schema_version integer,
max_schema_version integer
);
COMMENT ON TABLE codeintel_scip_documents_schema_versions IS 'Tracks the range of `schema_versions` values associated with each SCIP index in the [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) table.';
COMMENT ON TABLE codeintel_scip_documents_schema_versions IS 'Tracks the range of `schema_versions` values associated with each document metadata shard in the [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) table.';
COMMENT ON COLUMN codeintel_scip_documents_schema_versions.upload_id IS 'The identifier of the associated SCIP index.';
COMMENT ON COLUMN codeintel_scip_documents_schema_versions.metadata_shard_id IS 'The identifier of the associated document metadata shard.';
COMMENT ON COLUMN codeintel_scip_documents_schema_versions.min_schema_version IS 'A lower-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `upload_id` column matches the associated SCIP index.';
COMMENT ON COLUMN codeintel_scip_documents_schema_versions.min_schema_version IS 'A lower-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `metadata_shard_id` column matches the associated document metadata shard.';
COMMENT ON COLUMN codeintel_scip_documents_schema_versions.max_schema_version IS 'An upper-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `upload_id` column matches the associated SCIP index.';
COMMENT ON COLUMN codeintel_scip_documents_schema_versions.max_schema_version IS 'An upper-bound on the `schema_version` values of the records in the table [`codeintel_scip_documents`](#table-publiccodeintel_scip_documents) where the `metadata_shard_id` column matches the associated document metadata shard.';
CREATE TABLE codeintel_scip_metadata (
id bigint NOT NULL,
@ -610,7 +613,7 @@ ALTER TABLE ONLY codeintel_scip_documents
ADD CONSTRAINT codeintel_scip_documents_pkey PRIMARY KEY (id);
ALTER TABLE ONLY codeintel_scip_documents_schema_versions
ADD CONSTRAINT codeintel_scip_documents_schema_versions_pkey PRIMARY KEY (upload_id);
ADD CONSTRAINT codeintel_scip_documents_schema_versions_pkey PRIMARY KEY (metadata_shard_id);
ALTER TABLE ONLY codeintel_scip_metadata
ADD CONSTRAINT codeintel_scip_metadata_pkey PRIMARY KEY (id);
@ -698,11 +701,11 @@ CREATE INDEX rockskip_symbols_gin ON rockskip_symbols USING gin (singleton_integ
CREATE INDEX rockskip_symbols_repo_id_path_name ON rockskip_symbols USING btree (repo_id, path, name);
CREATE TRIGGER codeintel_scip_document_lookup_schema_versions_insert AFTER INSERT ON codeintel_scip_document_lookup_schema_versions REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_document_lookup_schema_versions_insert();
CREATE TRIGGER codeintel_scip_document_lookup_schema_versions_insert AFTER INSERT ON codeintel_scip_document_lookup REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_document_lookup_schema_versions_insert();
CREATE TRIGGER codeintel_scip_documents_schema_versions_insert AFTER INSERT ON codeintel_scip_documents_schema_versions REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_documents_schema_versions_insert();
CREATE TRIGGER codeintel_scip_symbols_schema_versions_insert AFTER INSERT ON codeintel_scip_symbols_schema_versions REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_symbols_schema_versions_insert();
CREATE TRIGGER codeintel_scip_symbols_schema_versions_insert AFTER INSERT ON codeintel_scip_symbols REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_codeintel_scip_symbols_schema_versions_insert();
CREATE TRIGGER lsif_data_definitions_schema_versions_insert AFTER INSERT ON lsif_data_definitions REFERENCING NEW TABLE AS newtab FOR EACH STATEMENT EXECUTE FUNCTION update_lsif_data_definitions_schema_versions_insert();