API docs: introduce running tally tables for optimized stats/counting (#26136)

In this change, we introduce a few running tally tables whose names are self-describing:

* `lsif_data_apidocs_num_pages` - used to optimize a query in lsifstore.
* `lsif_data_apidocs_num_dumps` - generally useful info, and used by the OOB migration.
* `lsif_data_apidocs_num_search_results_public` - generally useful info I would like to expose in the future
* `lsif_data_apidocs_num_search_results_private` - generally useful info I would like to expose in the future

We also introduce a running tally table `lsif_data_apidocs_num_dumps_indexed` which records the number of LSIF dumps that have been indexed for search by the OOB migrator, making the `Progress` percentage calculation for the API docs' OOB search migrator very cheap - as it had previously caused high CPU usage in codeintel-db due to the expensive `count()` involved. See #25856

Additionally, this concludes the major DB-schema rework I have been doing for the API docs search backend and re-enables API docs search by default, so that indexing will once again continue on Sourcegraph.com.

Fixes #25856

Helps #21938

Signed-off-by: Stephen Gutekanst <stephen@sourcegraph.com>
This commit is contained in:
Stephen Gutekanst 2021-10-18 08:54:31 -07:00 committed by GitHub
parent c2064705fa
commit 52a4d556d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 209 additions and 7 deletions

View File

@ -508,11 +508,9 @@ func (s *Store) truncateDocumentationSearchIndexSize(ctx context.Context, tableS
return nil
}
// TODO(apidocs): future: introduce materialized count for this table and for other interesting API
// docs data points in general. https://github.com/sourcegraph/sourcegraph/pull/25206#discussion_r714270738
const countDocumentationSearchRowsQuery = `
-- source: enterprise/internal/codeintel/stores/lsifstore/data_write_documentation.go:truncateDocumentationSearchIndexSize
SELECT count(*)::bigint FROM lsif_data_docs_search_$SUFFIX
SELECT count::bigint FROM lsif_data_apidocs_num_search_results_$SUFFIX
`
const truncateDocumentationSearchRowsQuery = `

View File

@ -69,9 +69,10 @@ func (m *apiDocsSearchMigrator) Progress(ctx context.Context) (float64, error) {
const apiDocsSearchMigratorProgressQuery = `
-- source: enterprise/internal/codeintel/stores/lsifstore/migration/apidocs_search.go:Progress
SELECT CASE c2.count WHEN 0 THEN 1 ELSE cast(c1.count as float) / cast(c2.count as float) END FROM
(SELECT count(DISTINCT dump_id) FROM lsif_data_documentation_pages WHERE search_indexed='true') c1,
(SELECT count(DISTINCT dump_id) FROM lsif_data_documentation_pages) c2
SELECT CASE c2.count WHEN 0 THEN 1 ELSE cast(c1.count as float) / cast(c2.count as float) END
FROM
(SELECT * FROM lsif_data_apidocs_num_dumps_indexed) c1,
(SELECT * FROM lsif_data_apidocs_num_dumps) c2
`
// Up runs a batch of the migration. This method is called repeatedly until the Progress

View File

@ -301,7 +301,7 @@ func EventLoggingEnabled() bool {
func APIDocsSearchIndexingEnabled() bool {
val := ExperimentalFeatures().ApidocsSearchIndexing
if val == "" {
return false // default to off for now until we've fixed https://github.com/sourcegraph/sourcegraph/issues/25856
return true
}
return val == "enabled"
}

View File

@ -9,6 +9,46 @@ Indexes:
```
# Table "public.lsif_data_apidocs_num_dumps"
```
Column | Type | Collation | Nullable | Default
--------+--------+-----------+----------+---------
count | bigint | | |
```
# Table "public.lsif_data_apidocs_num_dumps_indexed"
```
Column | Type | Collation | Nullable | Default
--------+--------+-----------+----------+---------
count | bigint | | |
```
# Table "public.lsif_data_apidocs_num_pages"
```
Column | Type | Collation | Nullable | Default
--------+--------+-----------+----------+---------
count | bigint | | |
```
# Table "public.lsif_data_apidocs_num_search_results_private"
```
Column | Type | Collation | Nullable | Default
--------+--------+-----------+----------+---------
count | bigint | | |
```
# Table "public.lsif_data_apidocs_num_search_results_public"
```
Column | Type | Collation | Nullable | Default
--------+--------+-----------+----------+---------
count | bigint | | |
```
# Table "public.lsif_data_definitions"
```
Column | Type | Collation | Nullable | Default
@ -142,6 +182,9 @@ Foreign-key constraints:
"lsif_data_docs_search_private_lang_name_id_fk" FOREIGN KEY (lang_name_id) REFERENCES lsif_data_docs_search_lang_names_private(id)
"lsif_data_docs_search_private_repo_name_id_fk" FOREIGN KEY (repo_name_id) REFERENCES lsif_data_docs_search_repo_names_private(id)
"lsif_data_docs_search_private_tags_id_fk" FOREIGN KEY (tags_id) REFERENCES lsif_data_docs_search_tags_private(id)
Triggers:
lsif_data_docs_search_private_delete AFTER DELETE ON lsif_data_docs_search_private REFERENCING OLD TABLE AS oldtbl FOR EACH STATEMENT EXECUTE FUNCTION lsif_data_docs_search_private_delete()
lsif_data_docs_search_private_insert AFTER INSERT ON lsif_data_docs_search_private REFERENCING NEW TABLE AS newtbl FOR EACH STATEMENT EXECUTE FUNCTION lsif_data_docs_search_private_insert()
```
@ -209,6 +252,9 @@ Foreign-key constraints:
"lsif_data_docs_search_public_lang_name_id_fk" FOREIGN KEY (lang_name_id) REFERENCES lsif_data_docs_search_lang_names_public(id)
"lsif_data_docs_search_public_repo_name_id_fk" FOREIGN KEY (repo_name_id) REFERENCES lsif_data_docs_search_repo_names_public(id)
"lsif_data_docs_search_public_tags_id_fk" FOREIGN KEY (tags_id) REFERENCES lsif_data_docs_search_tags_public(id)
Triggers:
lsif_data_docs_search_public_delete AFTER DELETE ON lsif_data_docs_search_public REFERENCING OLD TABLE AS oldtbl FOR EACH STATEMENT EXECUTE FUNCTION lsif_data_docs_search_public_delete()
lsif_data_docs_search_public_insert AFTER INSERT ON lsif_data_docs_search_public REFERENCING NEW TABLE AS newtbl FOR EACH STATEMENT EXECUTE FUNCTION lsif_data_docs_search_public_insert()
```
@ -382,6 +428,10 @@ Maps documentation path IDs to their corresponding integral documentationResult
search_indexed | boolean | | | false
Indexes:
"lsif_data_documentation_pages_pkey" PRIMARY KEY, btree (dump_id, path_id)
Triggers:
lsif_data_documentation_pages_delete AFTER DELETE ON lsif_data_documentation_pages REFERENCING OLD TABLE AS oldtbl FOR EACH STATEMENT EXECUTE FUNCTION lsif_data_documentation_pages_delete()
lsif_data_documentation_pages_insert AFTER INSERT ON lsif_data_documentation_pages REFERENCING NEW TABLE AS newtbl FOR EACH STATEMENT EXECUTE FUNCTION lsif_data_documentation_pages_insert()
lsif_data_documentation_pages_update AFTER UPDATE ON lsif_data_documentation_pages REFERENCING OLD TABLE AS oldtbl NEW TABLE AS newtbl FOR EACH STATEMENT EXECUTE FUNCTION lsif_data_documentation_pages_update()
```

View File

@ -0,0 +1,25 @@
BEGIN;
DROP TABLE lsif_data_apidocs_num_pages;
DROP TABLE lsif_data_apidocs_num_dumps;
DROP TABLE lsif_data_apidocs_num_dumps_indexed;
DROP TRIGGER lsif_data_documentation_pages_delete ON lsif_data_documentation_pages;
DROP TRIGGER lsif_data_documentation_pages_insert ON lsif_data_documentation_pages;
DROP TRIGGER lsif_data_documentation_pages_update ON lsif_data_documentation_pages;
DROP FUNCTION lsif_data_documentation_pages_delete;
DROP FUNCTION lsif_data_documentation_pages_insert;
DROP FUNCTION lsif_data_documentation_pages_update;
DROP TABLE lsif_data_apidocs_num_search_results_public;
DROP TRIGGER lsif_data_docs_search_public_delete ON lsif_data_docs_search_public;
DROP TRIGGER lsif_data_docs_search_public_insert ON lsif_data_docs_search_public;
DROP FUNCTION lsif_data_docs_search_public_delete;
DROP FUNCTION lsif_data_docs_search_public_insert;
DROP TABLE lsif_data_apidocs_num_search_results_private;
DROP TRIGGER lsif_data_docs_search_private_delete ON lsif_data_docs_search_private;
DROP TRIGGER lsif_data_docs_search_private_insert ON lsif_data_docs_search_private;
DROP FUNCTION lsif_data_docs_search_private_delete;
DROP FUNCTION lsif_data_docs_search_private_insert;
COMMIT;

View File

@ -0,0 +1,128 @@
BEGIN;
--------------------------------------------------------
-- Stats for the lsif_data_documentation_pages table. --
--------------------------------------------------------
CREATE TABLE lsif_data_apidocs_num_pages AS SELECT count(*) FROM lsif_data_documentation_pages;
CREATE TABLE lsif_data_apidocs_num_dumps AS SELECT count(DISTINCT dump_id) FROM lsif_data_documentation_pages;
CREATE TABLE lsif_data_apidocs_num_dumps_indexed AS SELECT count(DISTINCT dump_id) FROM lsif_data_documentation_pages WHERE search_indexed='true';
CREATE OR REPLACE FUNCTION lsif_data_documentation_pages_delete()
RETURNS TRIGGER LANGUAGE plpgsql
AS $$
BEGIN
-- Decrement tally counting tables.
UPDATE lsif_data_apidocs_num_pages SET count = count - (select count(*) from oldtbl);
UPDATE lsif_data_apidocs_num_dumps SET count = count - (select count(DISTINCT dump_id) from oldtbl);
UPDATE lsif_data_apidocs_num_dumps_indexed SET count = count - (select count(DISTINCT dump_id) from oldtbl WHERE search_indexed='true');
RETURN NULL;
END $$;
CREATE TRIGGER lsif_data_documentation_pages_delete
AFTER DELETE ON lsif_data_documentation_pages
REFERENCING OLD TABLE AS oldtbl
FOR EACH STATEMENT EXECUTE PROCEDURE lsif_data_documentation_pages_delete();
CREATE OR REPLACE FUNCTION lsif_data_documentation_pages_insert()
RETURNS TRIGGER LANGUAGE plpgsql
AS $$
BEGIN
-- Increment tally counting tables.
UPDATE lsif_data_apidocs_num_pages SET count = count + (select count(*) from newtbl);
UPDATE lsif_data_apidocs_num_dumps SET count = count + (select count(DISTINCT dump_id) from newtbl);
UPDATE lsif_data_apidocs_num_dumps_indexed SET count = count + (select count(DISTINCT dump_id) from newtbl WHERE search_indexed='true');
RETURN NULL;
END $$;
CREATE TRIGGER lsif_data_documentation_pages_insert
AFTER INSERT ON lsif_data_documentation_pages
REFERENCING NEW TABLE AS newtbl
FOR EACH STATEMENT EXECUTE PROCEDURE lsif_data_documentation_pages_insert();
CREATE OR REPLACE FUNCTION lsif_data_documentation_pages_update()
RETURNS TRIGGER LANGUAGE plpgsql
AS $$
BEGIN
WITH
beforeIndexed AS (SELECT count(DISTINCT dump_id) FROM oldtbl WHERE search_indexed='true'),
afterIndexed AS (SELECT count(DISTINCT dump_id) FROM newtbl WHERE search_indexed='true')
UPDATE lsif_data_apidocs_num_dumps_indexed SET count=count + ((select * from afterIndexed) - (select * from beforeIndexed));
RETURN NULL;
END $$;
CREATE TRIGGER lsif_data_documentation_pages_update
AFTER UPDATE ON lsif_data_documentation_pages
REFERENCING OLD TABLE AS oldtbl NEW TABLE AS newtbl
FOR EACH STATEMENT EXECUTE PROCEDURE lsif_data_documentation_pages_update();
-------------------------------------------------------
-- Stats for the lsif_data_docs_search_public table. --
-------------------------------------------------------
CREATE TABLE lsif_data_apidocs_num_search_results_public AS SELECT count(*) FROM lsif_data_docs_search_public;
CREATE OR REPLACE FUNCTION lsif_data_docs_search_public_delete()
RETURNS TRIGGER LANGUAGE plpgsql
AS $$
BEGIN
-- Decrement tally counting tables.
UPDATE lsif_data_apidocs_num_search_results_public SET count = count - (select count(*) from oldtbl);
RETURN NULL;
END $$;
CREATE TRIGGER lsif_data_docs_search_public_delete
AFTER DELETE
ON lsif_data_docs_search_public
REFERENCING OLD TABLE AS oldtbl
FOR EACH STATEMENT EXECUTE PROCEDURE lsif_data_docs_search_public_delete();
CREATE OR REPLACE FUNCTION lsif_data_docs_search_public_insert()
RETURNS TRIGGER LANGUAGE plpgsql
AS $$
BEGIN
-- Increment tally counting tables.
UPDATE lsif_data_apidocs_num_search_results_public SET count = count + (select count(*) from newtbl);
RETURN NULL;
END $$;
CREATE TRIGGER lsif_data_docs_search_public_insert
AFTER INSERT
ON lsif_data_docs_search_public
REFERENCING NEW TABLE AS newtbl
FOR EACH STATEMENT EXECUTE PROCEDURE lsif_data_docs_search_public_insert();
-------------------------------------------------------
-- Stats for the lsif_data_docs_search_private table. --
-------------------------------------------------------
CREATE TABLE lsif_data_apidocs_num_search_results_private AS SELECT count(*) FROM lsif_data_docs_search_private;
CREATE OR REPLACE FUNCTION lsif_data_docs_search_private_delete()
RETURNS TRIGGER LANGUAGE plpgsql
AS $$
BEGIN
-- Decrement tally counting tables.
UPDATE lsif_data_apidocs_num_search_results_private SET count = count - (select count(*) from oldtbl);
RETURN NULL;
END $$;
CREATE TRIGGER lsif_data_docs_search_private_delete
AFTER DELETE
ON lsif_data_docs_search_private
REFERENCING OLD TABLE AS oldtbl
FOR EACH STATEMENT EXECUTE PROCEDURE lsif_data_docs_search_private_delete();
CREATE OR REPLACE FUNCTION lsif_data_docs_search_private_insert()
RETURNS TRIGGER LANGUAGE plpgsql
AS $$
BEGIN
-- Increment tally counting tables.
UPDATE lsif_data_apidocs_num_search_results_private SET count = count + (select count(*) from newtbl);
RETURN NULL;
END $$;
CREATE TRIGGER lsif_data_docs_search_private_insert
AFTER INSERT
ON lsif_data_docs_search_private
REFERENCING NEW TABLE AS newtbl
FOR EACH STATEMENT EXECUTE PROCEDURE lsif_data_docs_search_private_insert();
COMMIT;