From c0a0f71deec2ee1c56a842a5b57f917fa5caa60f Mon Sep 17 00:00:00 2001 From: Aditya Kalia <32119652+akalia25@users.noreply.github.com> Date: Mon, 5 Dec 2022 16:46:42 -0500 Subject: [PATCH] adding `originalReferrer`, `sessionReferrer`, `sessionFirstUrl` to our events (#44749) * adding originalReferrer to our events adding original referrer which is cookie name "_mkto_referrer" as our original referrer * updated to remove omitEmpty clause * new name reference fix * refactored code * fix to maintain string type (flagged by buildkite) * fixed undefined value issue for originalReferrer * refactoring to make sg lint happy * Creating cookie `originalReferrer` and updated getOriginalReferrer func * Update client/web/src/tracking/eventLogger.ts Co-authored-by: Brett Hayes * run sg-prettier :D * adding SessionReferrer as a field we collect * Satisfied Sg Lint :rocket: * Update client/web/src/tracking/eventLogger.ts Co-authored-by: Brett Hayes * Update client/web/src/tracking/eventLogger.ts Co-authored-by: Brett Hayes * adding `sessionFirstUrl` to our events * code cleanup * logic fixes to make sure cookies are being set and values returned properly * sg lint * fix `sessionFirstUrl` to `sessionFirstURL` * bug fix Co-authored-by: Brett Hayes --- client/web/src/tracking/eventLogger.ts | 85 +++++++++++++++ client/web/src/user/settings/backend.tsx | 3 + cmd/frontend/graphqlbackend/schema.graphql | 24 +++++ .../graphqlbackend/user_usage_stats.go | 36 ++++--- internal/cookie/cookie.go | 39 +++++++ internal/usagestats/event_handlers.go | 101 +++++++++++------- 6 files changed, 233 insertions(+), 55 deletions(-) diff --git a/client/web/src/tracking/eventLogger.ts b/client/web/src/tracking/eventLogger.ts index 9798f9dd75a..04981b22eab 100644 --- a/client/web/src/tracking/eventLogger.ts +++ b/client/web/src/tracking/eventLogger.ts @@ -19,6 +19,10 @@ export const FIRST_SOURCE_URL_KEY = 'sourcegraphSourceUrl' export const LAST_SOURCE_URL_KEY = 'sourcegraphRecentSourceUrl' export const DEVICE_ID_KEY = 'sourcegraphDeviceId' export const DEVICE_SESSION_ID_KEY = 'sourcegraphSessionId' +export const ORIGINAL_REFERRER_KEY = 'originalReferrer' +export const MKTO_ORIGINAL_REFERRER_KEY = '_mkto_referrer' +export const SESSION_REFERRER_KEY = 'sessionReferrer' +export const SESSION_FIRST_URL_KEY = 'sessionFirstUrl' const EXTENSION_MARKER_ID = '#sourcegraph-app-background' @@ -77,6 +81,9 @@ export class EventLogger implements TelemetryService, SharedEventLogger { private deviceID = '' private eventID = 0 private listeners: Set<(eventName: string) => void> = new Set() + private originalReferrer?: string + private sessionReferrer?: string + private sessionFirstURL?: string private readonly cookieSettings: CookieAttributes = { // 365 days expiry, but renewed on activity. @@ -233,6 +240,66 @@ export class EventLogger implements TelemetryService, SharedEventLogger { return lastSourceURL } + public getOriginalReferrer(): string { + // Gets the original referrer from the cookie or if it doesn't exist, the mkto_referrer from the URL. + const originalReferrer = + this.originalReferrer || + cookies.get(ORIGINAL_REFERRER_KEY) || + cookies.get(MKTO_ORIGINAL_REFERRER_KEY) || + document.referrer + try { + // 🚨 SECURITY: If the referrer is a valid Sourcegraph.com URL, + // only send the hostname instead of the whole URL to avoid + // leaking private repository names and files into our data. + const url = new URL(originalReferrer) + if (url.hostname === 'sourcegraph.com') { + this.originalReferrer = '' + cookies.set(ORIGINAL_REFERRER_KEY, this.originalReferrer, this.cookieSettings) + return this.originalReferrer + } + cookies.set(ORIGINAL_REFERRER_KEY, originalReferrer, this.cookieSettings) + return originalReferrer + } catch { + this.originalReferrer = '' + cookies.set(ORIGINAL_REFERRER_KEY, this.originalReferrer, this.cookieSettings) + return this.originalReferrer + } + } + + public getSessionReferrer(): string { + // Gets the session referrer from the cookie + const sessionReferrer = this.sessionReferrer || cookies.get(SESSION_REFERRER_KEY) || document.referrer + try { + // 🚨 SECURITY: If the referrer is a valid Sourcegraph.com URL, + // only send the hostname instead of the whole URL to avoid + // leaking private repository names and files into our data. + const url = new URL(sessionReferrer) + if (url.hostname === 'sourcegraph.com') { + this.sessionReferrer = '' + cookies.set(SESSION_REFERRER_KEY, this.sessionReferrer, this.deviceSessionCookieSettings) + return this.sessionReferrer + } + cookies.set(SESSION_REFERRER_KEY, sessionReferrer, this.deviceSessionCookieSettings) + return sessionReferrer + } catch { + this.sessionReferrer = '' + cookies.set(SESSION_REFERRER_KEY, this.sessionReferrer, this.deviceSessionCookieSettings) + return this.sessionReferrer + } + } + + public getSessionFirstURL(): string { + const sessionFirstURL = this.sessionFirstURL || cookies.get(SESSION_FIRST_URL_KEY) || location.href + + const redactedURL = redactSensitiveInfoFromAppURL(sessionFirstURL) + + // Use cookies instead of localStorage so that the ID can be shared with subdomains (about.sourcegraph.com). + // Always set to renew expiry and migrate from localStorage + cookies.set(SESSION_FIRST_URL_KEY, redactedURL, this.deviceSessionCookieSettings) + this.sessionFirstURL = redactedURL + return this.sessionFirstURL + } + public getDeviceSessionID(): string { let deviceSessionID = cookies.get(DEVICE_SESSION_ID_KEY) if (!deviceSessionID || deviceSessionID === '') { @@ -311,9 +378,27 @@ export class EventLogger implements TelemetryService, SharedEventLogger { cookies.set(DEVICE_ID_KEY, deviceID, this.cookieSettings) } + let originalReferrer = cookies.get(ORIGINAL_REFERRER_KEY) + if (!originalReferrer) { + originalReferrer = this.getOriginalReferrer() + } + + let sessionReferrer = cookies.get(SESSION_REFERRER_KEY) + if (!sessionReferrer) { + sessionReferrer = this.getSessionReferrer() + } + + let sessionFirstURL = cookies.get(SESSION_FIRST_URL_KEY) + if (!sessionFirstURL) { + sessionFirstURL = this.getSessionFirstURL() + } + this.anonymousUserID = anonymousUserID this.cohortID = cohortID this.deviceID = deviceID + this.originalReferrer = originalReferrer + this.sessionReferrer = sessionReferrer + this.sessionFirstURL = sessionFirstURL } public addEventLogListener(callback: (eventName: string) => void): () => void { diff --git a/client/web/src/user/settings/backend.tsx b/client/web/src/user/settings/backend.tsx index 011c9dae873..6897703e82b 100644 --- a/client/web/src/user/settings/backend.tsx +++ b/client/web/src/user/settings/backend.tsx @@ -168,6 +168,9 @@ function createEvent(event: string, eventProperties?: unknown, publicArgument?: firstSourceURL: eventLogger.getFirstSourceURL(), lastSourceURL: eventLogger.getLastSourceURL(), referrer: eventLogger.getReferrer(), + originalReferrer: eventLogger.getOriginalReferrer(), + sessionReferrer: eventLogger.getSessionReferrer(), + sessionFirstURL: eventLogger.getSessionFirstURL(), deviceSessionID: eventLogger.getDeviceSessionID(), url: window.location.href, source: EventSource.WEB, diff --git a/cmd/frontend/graphqlbackend/schema.graphql b/cmd/frontend/graphqlbackend/schema.graphql index 410ad1047ab..6900b55a026 100755 --- a/cmd/frontend/graphqlbackend/schema.graphql +++ b/cmd/frontend/graphqlbackend/schema.graphql @@ -486,6 +486,18 @@ type Mutation { """ referrer: String """ + The original referrer for a user + """ + originalReferrer: String + """ + The session referrer for a user + """ + sessionReferrer: String + """ + The sessions first url for a user + """ + sessionFirstURL: String + """ Device session ID to identify the user's session for analytics. """ deviceSessionID: String @@ -859,6 +871,18 @@ input Event { """ referrer: String """ + The original referrer for a user + """ + originalReferrer: String + """ + The session referrer for a user + """ + sessionReferrer: String + """ + The sessions first url for a user + """ + sessionFirstURL: String + """ Device session ID to identify the user's session for analytics. """ deviceSessionID: String diff --git a/cmd/frontend/graphqlbackend/user_usage_stats.go b/cmd/frontend/graphqlbackend/user_usage_stats.go index 3488ac749e8..62ea7b57c4f 100644 --- a/cmd/frontend/graphqlbackend/user_usage_stats.go +++ b/cmd/frontend/graphqlbackend/user_usage_stats.go @@ -78,21 +78,24 @@ func (*schemaResolver) LogUserEvent(ctx context.Context, args *struct { } type Event struct { - Event string - UserCookieID string - FirstSourceURL *string - LastSourceURL *string - URL string - Source string - Argument *string - CohortID *string - Referrer *string - DeviceSessionID *string - PublicArgument *string - UserProperties *string - DeviceID *string - InsertID *string - EventID *int32 + Event string + UserCookieID string + FirstSourceURL *string + LastSourceURL *string + URL string + Source string + Argument *string + CohortID *string + Referrer *string + OriginalReferrer *string + SessionReferrer *string + SessionFirstURL *string + DeviceSessionID *string + PublicArgument *string + UserProperties *string + DeviceID *string + InsertID *string + EventID *int32 } type EventBatch struct { @@ -176,6 +179,9 @@ func (r *schemaResolver) LogEvents(ctx context.Context, args *EventBatch) (*Empt EvaluatedFlagSet: featureflag.GetEvaluatedFlagSet(ctx), CohortID: args.CohortID, Referrer: args.Referrer, + OriginalReferrer: args.OriginalReferrer, + SessionReferrer: args.SessionReferrer, + SessionFirstURL: args.SessionFirstURL, PublicArgument: publicArgumentPayload, UserProperties: userPropertiesPayload, DeviceID: args.DeviceID, diff --git a/internal/cookie/cookie.go b/internal/cookie/cookie.go index f5f77073d2f..3d24e7e9d81 100644 --- a/internal/cookie/cookie.go +++ b/internal/cookie/cookie.go @@ -29,3 +29,42 @@ func DeviceID(r *http.Request) (string, bool) { } return cookie.Value, true } + +// OriginalReferrer returns our originalReferrer and bool indicating whether the +// value exists. +func OriginalReferrer(r *http.Request) (string, bool) { + if r == nil { + return "", false + } + cookie, err := r.Cookie("originalReferrer") + if err != nil { + return "", false + } + return cookie.Value, true +} + +// SessionReferrer returns our sessionReferrer and bool indicating whether the +// value exists. +func SessionReferrer(r *http.Request) (string, bool) { + if r == nil { + return "", false + } + cookie, err := r.Cookie("sessionReferrer") + if err != nil { + return "", false + } + return cookie.Value, true +} + +// SessionReferrer returns our sessionReferrer and bool indicating whether the +// value exists. +func SessionFirstURL(r *http.Request) (string, bool) { + if r == nil { + return "", false + } + cookie, err := r.Cookie("sessionFirstUrl") + if err != nil { + return "", false + } + return cookie.Value, true +} diff --git a/internal/usagestats/event_handlers.go b/internal/usagestats/event_handlers.go index a63ed989581..05ea72e8ba1 100644 --- a/internal/usagestats/event_handlers.go +++ b/internal/usagestats/event_handlers.go @@ -40,14 +40,17 @@ type Event struct { CohortID *string // Referrer is only logged for Cloud events; therefore, this only goes to the BigQuery database // and does not go to the Postgres DB. - Referrer *string - Argument json.RawMessage - PublicArgument json.RawMessage - UserProperties json.RawMessage - DeviceID *string - InsertID *string - EventID *int32 - DeviceSessionID *string + Referrer *string + OriginalReferrer *string + SessionReferrer *string + SessionFirstURL *string + Argument json.RawMessage + PublicArgument json.RawMessage + UserProperties json.RawMessage + DeviceID *string + InsertID *string + EventID *int32 + DeviceSessionID *string } // LogBackendEvent is a convenience function for logging backend events. @@ -99,22 +102,25 @@ func LogEvents(ctx context.Context, db database.DB, events []Event) error { } type bigQueryEvent struct { - EventName string `json:"name"` - URL string `json:"url"` - AnonymousUserID string `json:"anonymous_user_id"` - FirstSourceURL string `json:"first_source_url"` - LastSourceURL string `json:"last_source_url"` - UserID int `json:"user_id"` - Source string `json:"source"` - Timestamp string `json:"timestamp"` - Version string `json:"version"` - FeatureFlags string `json:"feature_flags"` - CohortID *string `json:"cohort_id,omitempty"` - Referrer string `json:"referrer,omitempty"` - PublicArgument string `json:"public_argument"` - DeviceID *string `json:"device_id,omitempty"` - InsertID *string `json:"insert_id,omitempty"` - DeviceSessionID *string `json:"device_session_id,omitempty"` + EventName string `json:"name"` + URL string `json:"url"` + AnonymousUserID string `json:"anonymous_user_id"` + FirstSourceURL string `json:"first_source_url"` + LastSourceURL string `json:"last_source_url"` + UserID int `json:"user_id"` + Source string `json:"source"` + Timestamp string `json:"timestamp"` + Version string `json:"version"` + FeatureFlags string `json:"feature_flags"` + CohortID *string `json:"cohort_id,omitempty"` + Referrer string `json:"referrer,omitempty"` + OriginalReferrer string `json:"original_referrer"` + SessionReferrer string `json:"session_referrer"` + SessionFirstURL string `json:"session_first_url"` + PublicArgument string `json:"public_argument"` + DeviceID *string `json:"device_id,omitempty"` + InsertID *string `json:"insert_id,omitempty"` + DeviceSessionID *string `json:"device_session_id,omitempty"` } // publishSourcegraphDotComEvents publishes Sourcegraph.com events to BigQuery. @@ -154,6 +160,18 @@ func serializePublishSourcegraphDotComEvents(events []Event) ([]string, error) { if event.Referrer != nil { referrer = *event.Referrer } + originalReferrer := "" + if event.OriginalReferrer != nil { + originalReferrer = *event.OriginalReferrer + } + sessionReferrer := "" + if event.SessionReferrer != nil { + sessionReferrer = *event.SessionReferrer + } + sessionFirstURL := "" + if event.SessionFirstURL != nil { + sessionFirstURL = *event.SessionFirstURL + } featureFlagJSON, err := json.Marshal(event.EvaluatedFlagSet) if err != nil { return nil, err @@ -165,22 +183,25 @@ func serializePublishSourcegraphDotComEvents(events []Event) ([]string, error) { } pubsubEvent, err := json.Marshal(bigQueryEvent{ - EventName: event.EventName, - UserID: int(event.UserID), - AnonymousUserID: event.UserCookieID, - URL: url, - FirstSourceURL: firstSourceURL, - LastSourceURL: lastSourceURL, - Referrer: referrer, - Source: event.Source, - Timestamp: time.Now().UTC().Format(time.RFC3339), - Version: version.Version(), - FeatureFlags: string(featureFlagJSON), - CohortID: event.CohortID, - PublicArgument: string(event.PublicArgument), - DeviceID: event.DeviceID, - InsertID: event.InsertID, - DeviceSessionID: event.DeviceSessionID, + EventName: event.EventName, + UserID: int(event.UserID), + AnonymousUserID: event.UserCookieID, + URL: url, + FirstSourceURL: firstSourceURL, + LastSourceURL: lastSourceURL, + Referrer: referrer, + OriginalReferrer: originalReferrer, + SessionReferrer: sessionReferrer, + SessionFirstURL: sessionFirstURL, + Source: event.Source, + Timestamp: time.Now().UTC().Format(time.RFC3339), + Version: version.Version(), + FeatureFlags: string(featureFlagJSON), + CohortID: event.CohortID, + PublicArgument: string(event.PublicArgument), + DeviceID: event.DeviceID, + InsertID: event.InsertID, + DeviceSessionID: event.DeviceSessionID, }) if err != nil { return nil, err