mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 13:51:46 +00:00
chore/sg: remove 'sg telemetry' and related docs (#63763)
Removes the `sg telemetry` command that pertains to the legacy V1 exporter that is specific to Cloud instances. I got asked about this recently, and especially with the new `sg analytics` for usage of the `sg` CLI, this has the potential to be pretty confusing. Part of https://linear.app/sourcegraph/issue/CORE-104 ## Test plan n/a ## Changelog - `sg`: the deprecated `sg telemetry` command for allowlisting export of V1 telemetry from Cloud instances has been removed. Use telemetry V2 instead.
This commit is contained in:
parent
cb19d6f0a9
commit
a07a1b9ed0
@ -1,4 +1,3 @@
|
||||
// NOTE(naman): Remember to add events to allow list: https://docs-legacy.sourcegraph.com/dev/background-information/data-usage-pipeline#allow-list
|
||||
export const enum EventName {
|
||||
CODY_CHAT_PAGE_VIEWED = 'web:codyChat:pageViewed',
|
||||
CODY_CHAT_SUBMIT = 'web:codyChat:submit',
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
import type { AuthProvider } from '../jscontext'
|
||||
|
||||
// NOTE(naman): Remember to add events to allow list: https://docs-legacy.sourcegraph.com/dev/background-information/data-usage-pipeline#allow-list
|
||||
export const enum EventName {
|
||||
CODY_CHAT_PAGE_VIEWED = 'web:codyChat:pageViewed',
|
||||
CODY_CHAT_SUBMIT = 'web:codyChat:submit',
|
||||
|
||||
@ -39,7 +39,6 @@ go_library(
|
||||
"sg_src.go",
|
||||
"sg_start.go",
|
||||
"sg_teammate.go",
|
||||
"sg_telemetry.go",
|
||||
"sg_tests.go",
|
||||
"sg_update.go",
|
||||
"sg_version.go",
|
||||
|
||||
@ -282,7 +282,6 @@ var sg = &cli.App{
|
||||
dbCommand,
|
||||
migrationCommand,
|
||||
insightsCommand,
|
||||
telemetryCommand,
|
||||
monitoringCommand,
|
||||
contextCommand,
|
||||
deployCommand,
|
||||
|
||||
@ -1,176 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/lib/output"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/dev/sg/internal/category"
|
||||
"github.com/sourcegraph/sourcegraph/dev/sg/internal/migration"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/dev/sg/internal/db"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/dev/sg/internal/std"
|
||||
)
|
||||
|
||||
const telemetryV1DeprecationWarning = `WARNING: Telemetry V1 and related mechanisms (event_logs export) have been DEPRECATED.
|
||||
This includes the 'sg telemetry' commands.
|
||||
|
||||
It will be replaced with Telemetry V2 - code that is instrumented with the new
|
||||
Telemetry SDKs will automatically export safe telemetry in accordance with the
|
||||
Sourcegraph instance's allowed policies.
|
||||
|
||||
For more details, see https://docs-legacy.sourcegraph.com/dev/background-information/telemetry
|
||||
or reach out in #discuss-analytics.`
|
||||
|
||||
func renderTelemetryV1DeprecationWarning() {
|
||||
std.Out.WriteWarningf(telemetryV1DeprecationWarning)
|
||||
}
|
||||
|
||||
var telemetryCommand = &cli.Command{
|
||||
Name: "telemetry",
|
||||
Before: func(ctx *cli.Context) error {
|
||||
renderTelemetryV1DeprecationWarning()
|
||||
return nil
|
||||
},
|
||||
Usage: "[DEPRECATED] Operations relating to Sourcegraph telemetry v1",
|
||||
Description: telemetryV1DeprecationWarning,
|
||||
Category: category.Dev,
|
||||
Subcommands: []*cli.Command{
|
||||
allowlistCommand,
|
||||
},
|
||||
}
|
||||
|
||||
var allowlistCommand = &cli.Command{
|
||||
Name: "allowlist",
|
||||
Usage: "Edit the legacy telemetry v1 usage data export allow list",
|
||||
Flags: []cli.Flag{},
|
||||
Description: `
|
||||
Utility that will generate SQL to add and remove events from the usage data allow list.
|
||||
https://docs-legacy.sourcegraph.com/dev/background-information/data-usage-pipeline#allow-list
|
||||
|
||||
Events are keyed by event name and passed in as additional arguments to the add and remove subcommands.
|
||||
`,
|
||||
UsageText: `
|
||||
# Generate SQL to add events from the allow list
|
||||
sg telemetry allowlist add EVENT_ONE EVENT_TWO
|
||||
|
||||
# Generate SQL to remove events from the allow list
|
||||
sg telemetry allowlist remove EVENT_ONE EVENT_TWO
|
||||
|
||||
# Automatically generate migration files associated with the allow list modification
|
||||
sg telemetry allowlist add --migration EVENT_ONE EVENT_TWO
|
||||
|
||||
# Provide a specific migration name for the migration files
|
||||
sg telemetry allowlist add --migration --name my_migration_name EVENT_ONE EVENT_TWO
|
||||
`,
|
||||
Subcommands: []*cli.Command{
|
||||
addAllowlistCommand,
|
||||
removeAllowlistCommand,
|
||||
},
|
||||
}
|
||||
|
||||
var addAllowlistCommand = &cli.Command{
|
||||
Name: "add",
|
||||
ArgsUsage: "[event]",
|
||||
Usage: "Generate the SQL required to add events to the legacy telemetry v1 export allow list",
|
||||
UsageText: `
|
||||
# Generate SQL to add events from the allow list
|
||||
sg telemetry allowlist add EVENT_ONE EVENT_TWO
|
||||
|
||||
# Automatically generate migration files associated with the allow list modification
|
||||
sg telemetry allowlist add --migration EVENT_ONE EVENT_TWO
|
||||
|
||||
# Provide a specific migration name for the migration files
|
||||
sg telemetry allowlist add --migration --name my_migration_name EVENT_ONE EVENT_TWO
|
||||
`,
|
||||
Flags: []cli.Flag{
|
||||
allowlistCreateMigrationFlag,
|
||||
allowlistMigrationNameOverrideFlag,
|
||||
},
|
||||
Action: addAllowList,
|
||||
}
|
||||
|
||||
var removeAllowlistCommand = &cli.Command{
|
||||
Name: "remove",
|
||||
ArgsUsage: "[event]",
|
||||
Usage: "Generate the SQL required to remove events from the legacy telemetry v1 export allow list",
|
||||
UsageText: `
|
||||
# Generate SQL to add events from the allow list
|
||||
sg telemetry allowlist remove EVENT_ONE EVENT_TWO
|
||||
|
||||
# Automatically generate migration files associated with the allow list modification
|
||||
sg telemetry allowlist remove --migration EVENT_ONE EVENT_TWO
|
||||
|
||||
# Provide a specific migration name for the migration files
|
||||
sg telemetry allowlist remove --migration --name my_migration_name EVENT_ONE EVENT_TWO
|
||||
`,
|
||||
Flags: []cli.Flag{
|
||||
allowlistCreateMigrationFlag,
|
||||
allowlistMigrationNameOverrideFlag,
|
||||
},
|
||||
Action: removeAllowList,
|
||||
}
|
||||
|
||||
var createMigrationFiles bool
|
||||
var allowlistCreateMigrationFlag = &cli.BoolFlag{
|
||||
Name: "migration",
|
||||
Usage: "Create migration files with the generated SQL.",
|
||||
Value: false,
|
||||
Destination: &createMigrationFiles,
|
||||
}
|
||||
|
||||
var allowlistMigrationName string
|
||||
var allowlistMigrationNameOverrideFlag = &cli.StringFlag{
|
||||
Name: "name",
|
||||
Usage: "Specifies the name of the resulting migration.",
|
||||
Required: false,
|
||||
Value: "sg_telemetry_allowlist",
|
||||
Destination: &allowlistMigrationName,
|
||||
}
|
||||
|
||||
func addAllowList(ctx *cli.Context) (err error) {
|
||||
events := ctx.Args().Slice()
|
||||
if len(events) == 0 {
|
||||
return cli.Exit("no events provided", 1)
|
||||
}
|
||||
|
||||
return editAllowlist(ctx, events, false)
|
||||
}
|
||||
|
||||
func removeAllowList(ctx *cli.Context) (err error) {
|
||||
events := ctx.Args().Slice()
|
||||
if len(events) == 0 {
|
||||
return cli.Exit("no events provided", 1)
|
||||
}
|
||||
|
||||
return editAllowlist(ctx, events, true)
|
||||
}
|
||||
|
||||
func editAllowlist(ctx *cli.Context, events []string, reverse bool) error {
|
||||
header := fmt.Sprintf("-- This migration was generated by the command `sg telemetry %s`", ctx.Command.FullName())
|
||||
arrayStr := fmt.Sprintf(`'{%v}'`, strings.Join(events, ","))
|
||||
upQuery := fmt.Sprintf("INSERT INTO event_logs_export_allowlist (event_name) VALUES (UNNEST(%s::TEXT[])) ON CONFLICT DO NOTHING;", arrayStr)
|
||||
downQuery := fmt.Sprintf("DELETE FROM event_logs_export_allowlist WHERE event_name IN (SELECT * FROM UNNEST(%s::TEXT[]));", arrayStr)
|
||||
|
||||
if reverse {
|
||||
upQuery, downQuery = downQuery, upQuery
|
||||
}
|
||||
|
||||
std.Out.WriteLine(output.Styledf(output.StylePending, "\ngenerating output..."))
|
||||
std.Out.WriteLine(output.Styledf(output.StyleSuccess, "%s", upQuery))
|
||||
std.Out.WriteLine(output.Styledf(output.StyleWarning, "revert:\n%s", downQuery))
|
||||
|
||||
if !createMigrationFiles {
|
||||
return nil
|
||||
}
|
||||
std.Out.WriteLine(output.Styledf(output.StylePending, "\ncreating migration files with name: %s...\n", allowlistMigrationName))
|
||||
database, ok := db.DatabaseByName("frontend")
|
||||
if !ok {
|
||||
return cli.Exit("frontend database not found", 1)
|
||||
}
|
||||
return migration.AddWithTemplate(database, allowlistMigrationName, fmt.Sprintf("%s\n%s", header, upQuery), fmt.Sprintf("%s\n%s", header, downQuery))
|
||||
}
|
||||
@ -1,63 +0,0 @@
|
||||
# DEPRECATED: Adding, changing, and debugging user event data
|
||||
|
||||
> WARNING: **This process is deprecated.** To export Telemetry events from Sourcegraph instances, refer to the new [telemetry reference](./telemetry/index.md).
|
||||
|
||||
This document outlines the process for adding or changing the raw user event data collected from Sourcegraph instances. This is limited to certain managed instances (cloud) where the customer has signed a corresponding data collection agreement.
|
||||
|
||||
### User event data philosophy
|
||||
|
||||
[Raw user event data](https://docs.sourcegraph.com/dev/background-information/data-usage-pipeline) is collected from logs in the `event_logs` table in the instance primary database and sent to Sourcegraph centralized analytics. These [events](https://sourcegraph.com/search?q=context:global+repo:%5Egithub%5C.com/sourcegraph/sourcegraph%24+file:internal/database/event_logs.go+Event+type:symbol+select:symbol.struct&patternType=standard) are a product of events performed by users or the system and represent our customers’ most sensitive data. We must preserve and build trust through only careful additions and changes to events added to the egress pipeline.
|
||||
|
||||
All user event data must be:
|
||||
1. Anonymous (with only one exception, the email address of the initial site installer)
|
||||
2. Non-specific (e.g., no repo names, no usernames, no file names, no specific search queries, etc.)
|
||||
|
||||
### Adding events to the raw user event data pipeline
|
||||
|
||||
Ensure that any events added to the data pipeline are consistent with the user level data [FAQ](https://docs.google.com/document/d/1vXHoMBnvI_SlOjft4Q1Zhb5ZoScS1IjZ4V1LSKgVxv8/edit#).
|
||||
|
||||
### Changing the BigQuery view
|
||||
|
||||
Edit the query from the [cloud console](https://console.cloud.google.com/bigquery?project=telligentsourcegraph&ws=!1m5!1m4!4m3!1stelligentsourcegraph!2sdotcom_events!3sevents_usage) to add additional data that is passed through the event batches. Any data that does not exist will persist as null rather than fail–this will ensure backward compatibility.
|
||||
|
||||
```(
|
||||
WITH data as (
|
||||
SELECT JSON_EXTRACT_ARRAY(data, '$') as json
|
||||
FROM `telligentsourcegraph.dotcom_events.events_usage_raw`
|
||||
)
|
||||
select
|
||||
--flattened_data,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.name') as name,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.url') as url,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.user_id') as user_id,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.anonymous_user_id') as anonymous_user_id,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.source') as source,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.argument') as argument,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.version') as version,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.timestamp') as timestamp,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.firstSourceURL') as firstSourceURL,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.first_source_url') as first_source_url,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.feature_flags') as feature_flags,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.cohort_id') as cohort_id,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.referrer') as referrer,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.public_argument') as public_argument,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.device_id') as device_id,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.insert_id') as insert_id,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.last_source_url') as last_source_url,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.site_id') as site_id,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.license_key') as license_key,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.initial_admin_email') as initial_admin_email,
|
||||
JSON_EXTRACT_SCALAR(flattened_data, '$.deploy_type') as deploy_type,
|
||||
from data
|
||||
cross join unnest(data.json) as flattened_data
|
||||
)
|
||||
```
|
||||
|
||||
### Debugging
|
||||
|
||||
Working backward from where the data is coming from, we should receive alerts for anomalies along each step of the process here.
|
||||
1. At the [view](https://console.cloud.google.com/bigquery?project=telligentsourcegraph&ws=!1m5!1m4!4m3!1stelligentsourcegraph!2sdotcom_events!3sevents_usage), the details expose the raw SQL of flattening out the payload batches then extracting the desired field from the JSON structure.
|
||||
2. In the [raw table](https://console.cloud.google.com/bigquery?project=telligentsourcegraph&ws=!1m5!1m4!4m3!1stelligentsourcegraph!2sdotcom_events!3sevents_usage_raw), the subscription unloads any new data into this table as an array of JSON objects. The payloads are capped at source to 10MB.
|
||||
3. In the [subscription](https://console.cloud.google.com/cloudpubsub/subscription/detail/dotcom-events-usage?project=telligentsourcegraph), there are metrics available in the cloud console. There is not currently any automated snapshot generation.
|
||||
4. In the [pub/sub topic](https://console.cloud.google.com/cloudpubsub/topic/detail/dotcom-events-usage?project=telligentsourcegraph), there are also metrics available in the cloud console. There is not currently any automated snapshot generation. There is currently a 7 day retention duration for messages in the queue.
|
||||
5. In the instance, this would point to an issue with the instance sending data at all. Check if there is also an interruption in [ping](https://docs.sourcegraph.com/dev/background-information/adding_ping_data) or other telemetry data.
|
||||
@ -1,65 +0,0 @@
|
||||
# DEPRECATED: Event level data usage pipeline
|
||||
|
||||
> WARNING: **This process is deprecated.** To export Telemetry events from Sourcegraph instances, refer to the new [telemetry reference](./telemetry/index.md).
|
||||
|
||||
This document outlines information about the ability to export raw user event data from Sourcegraph. This is limited
|
||||
to certain managed instances (cloud) where the customer has signed a corresponding data collection agreement.
|
||||
|
||||
### What is it?
|
||||
|
||||
This process is a background job that can be enabled that will periodically scrape the `event_logs` table in the primary database
|
||||
and send it to Sourcegraph centralized analytics. [Events](https://sourcegraph.com/search?q=context:global+repo:%5Egithub%5C.com/sourcegraph/sourcegraph%24+file:internal/database/event_logs.go+Event+type:symbol+select:symbol.struct&patternType=standard) stored in `event_logs` are product events performed by users or the system. More information can be found in [RFC 719: Managed Instance Telemetry](https://docs.google.com/document/d/1N9aO0uTlvwXI7FzdPjIUCn_d1tRkJUfsc0urWigRf6s/edit).
|
||||
|
||||
The [job interval](https://sourcegraph.com/search?q=context:global+repo:%5Egithub%5C.com/sourcegraph/sourcegraph%24+file:%cmd/worker/internal/telemetry/telemetry_job%5C.go+JobCooldownDuration&patternType=standard) determines how often the job is executed. The [batch size option](https://sourcegraph.com/search?q=context:global+repo:%5Egithub%5C.com/sourcegraph/sourcegraph%24+file:%cmd/worker/internal/telemetry/telemetry_job%5C.go+getBatchSize+type:symbol&patternType=standard) determines how many records can be pulled in a single scrape. The batch size has a [default value](https://sourcegraph.com/search?q=context:global+repo:%5Egithub%5C.com/sourcegraph/sourcegraph%24+file:%cmd/worker/internal/telemetry/telemetry_job%5C.go+MaxEventsCountDefault&patternType=standard) and can be overridden with a site setting:
|
||||
``` json
|
||||
"exportUsageTelemetry": {
|
||||
"batchSize": 100,
|
||||
}
|
||||
```
|
||||
|
||||
The scraping job maintains state using a bookmark stored in the primary postgres database in the table `event_logs_scrape_state`. [If the bookmark is not found, one will be inserted](https://sourcegraph.com/github.com/sourcegraph/sourcegraph/-/blob/cmd/worker/internal/telemetry/telemetry_job.go?L424-440) such that the bookmark is the most recent event at the time.
|
||||
|
||||
The scraping process has a crude at-least once semantics guarantee. If any scrape should fail, the bookmark state will not be updated causing future scrapes to retry the same set of events.
|
||||
|
||||
### Allow list
|
||||
|
||||
Only events that [exist in an allow list](https://sourcegraph.com/github.com/sourcegraph/sourcegraph@735bc0f69ce417ecce55a9194dbf349c954043e3/-/blob/internal/database/event_logs.go?L321-324) will be scraped. Events are keyed in the allow list by the `event_logs.name` column. The allow list can be found in the primary
|
||||
postgres database in the table `event_logs_export_allowlist`.
|
||||
|
||||
#### Adding to the allow list
|
||||
Modifying the allow list is performed using database migrations. To simplify this process and ensure consistency, use the [sg](https://docs.sourcegraph.com/dev/background-information/sg) tool:
|
||||
``` shell
|
||||
sg telemetry allowlist add --migration [event]
|
||||
```
|
||||
|
||||
``` shell
|
||||
sg telemetry allowlist remove --migration [event]
|
||||
```
|
||||
|
||||
If you want to modify many events you can pass a file of newline delimited event names using `xargs`:
|
||||
``` shell
|
||||
cat /location/of/my/events/file | xargs sg telemetry allowlist add --migration
|
||||
```
|
||||
|
||||
Create a pull request and get a review from the Data Engineering team.
|
||||
|
||||
|
||||
#### Determine if an event is in the allow list
|
||||
Currently, there is not a single document that shows the entire allow list. There are two options:
|
||||
1. Start Sourcegraph and migrate to the latest version, and query the database
|
||||
```postgresql
|
||||
select * from event_logs_export_allowlist;
|
||||
```
|
||||
2. [Look through migration files](https://sourcegraph.com/search?q=context:global+repo:%5Egithub%5C.com/sourcegraph/sourcegraph%24+file:migrations+lang:sql+MY_EVENT_NAME&patternType=standard) to see if the event you are looking for has been added and not deleted
|
||||
|
||||
|
||||
### How to enable for a managed instance
|
||||
1. Ensure the managed instance has the [appropriate IAM policy](https://sourcegraph.sourcegraph.com/github.com/sourcegraph/deploy-sourcegraph-managed/-/blob/modules/terraform-managed-instance-new/iam.tf?L19-31&utm_source=raycast-sourcegraph&utm_campaign=search) applied
|
||||
2. Update the managed instance deployment manifest to include the following environment variables:
|
||||
1. `EXPORT_USAGE_DATA_ENABLED=true`
|
||||
2. [`EXPORT_USAGE_DATA_TOPIC_NAME`](https://sourcegraph.sourcegraph.com/search?q=context:global+repo:%5Egithub%5C.com/sourcegraph/deploy-sourcegraph-managed%24+EXPORT_USAGE_DATA_TOPIC_NAME&patternType=standard)
|
||||
3. [`EXPORT_USAGE_DATA_TOPIC_PROJECT`](https://sourcegraph.sourcegraph.com/search?q=context:global+repo:%5Egithub%5C.com/sourcegraph/deploy-sourcegraph-managed%24+EXPORT_USAGE_DATA_TOPIC_PROJECT&patternType=standard)
|
||||
3. Deploy the updated deployment manifest and restart the `worker` service.
|
||||
|
||||
### Monitoring
|
||||
Each Sourcegraph instance with this export job enabled will emit metrics that are prefixed with `src_telemetry_job`.
|
||||
@ -13,7 +13,7 @@
|
||||
> NOTE: This document is a work-in-progress.
|
||||
|
||||
Telemetry describes the logging of user events, such as a page view or search. Telemetry data is collected by each Sourcegraph instance and is not sent to Sourcegraph.com (except in aggregate form as documented in "[Pings](../../../admin/pings.md)"). Some select managed instances enable
|
||||
event level (non-aggregated) [telemetry](../data-usage-pipeline.md).
|
||||
event level (non-aggregated) V1 telemetry.
|
||||
|
||||
## Browser extension telemetry
|
||||
|
||||
|
||||
@ -161,8 +161,6 @@ Clarification and discussion about key concepts, architecture, and development s
|
||||
|
||||
- [Telemetry](background-information/telemetry/index.md)
|
||||
- [Adding, changing and debugging pings](background-information/adding_ping_data.md)
|
||||
- [DEPRECATED: Event level data usage pipeline](background-information/data-usage-pipeline.md)
|
||||
- [DEPRECATED: Adding, changing and debugging user event data](background-information/adding_event_level_data.md)
|
||||
- [Deploy Sourcegraph with Helm chart (BETA)](../../../admin/deploy/kubernetes/helm.md)
|
||||
- [GitHub API oddities](background-information/github-api-oddities.md)
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user