mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 20:51:43 +00:00
Update sensitive metadata allowlist to filter on keys (#62830)
* Fix bedrock URL encoding to mimic AWS CLI (#62695) * Fix bedrock URL encoding to mimic AWS CLI * Update changelog * appliance: namespace scoping (#62663) Allow a namespace to be configured, defaulting to all namespaces. Without this setting, if an admin deploys the appliance with namespace-scoped RBAC, it would throw errors due to not being able to watch ConfigMaps in all namespaces. * bazel: migrate legacy postgres-12 dockerfile to rules_oci (#61963) * build-tracker: include error if failing to write to bigquery (#62699) Without this, this error won't be logged to Sentry, resulting in us missing it unless we check GCP ## Test plan Discussed with @jac * Svelte: Fix global header navigation layers (#62697) Fix global header navigation layers * msp/rollouts: remove Cloud Deploy target import (#62687) Now that #62644 (CORE-23) is rolled out, this import block is no longer needed (and may even be disruptive when provisioning new rollout pipelines). The change was rolled out in: - https://github.com/sourcegraph/managed-services/pull/1416 - https://github.com/sourcegraph/managed-services/pull/1417 - https://github.com/sourcegraph/managed-services/pull/1403 ## Test plan n/a * msp/cloudrun: use GA launch stage (#62685) VPC direct egress is now GA: see example in https://registry.terraform.io/providers/hashicorp/google/5.29.0/docs/resources/cloud_run_v2_service#example-usage---cloudrunv2-service-directvpc and https://cloud.google.com/run/docs/configuring/vpc-direct-vpc This also fixes the infinite `GA` -> `BETA` drift we have in TFC * Symbols: new backend integration test (#62686) This PR creates a new GraphQL integration test file focused on symbol search. It exercises the same searches the web client uses for code navigation. In a follow-up, we will add cases for older commits and enable Rockskip. * fix: update search timeout docs (#62692) * update telemetry sensitivemetadataallowlist to filter based on keys * fix main merge * Update BUILD.bazel * Update teestore_test.go * add better code-comments and error messaging * add test coverage on non-string types getting redacted with proper error value return * fix spacing! --------- Co-authored-by: Rik <rik.nauta@sourcegraph.com> Co-authored-by: Craig Furman <craig.furman@sourcegraph.com> Co-authored-by: Noah S-C <noah@sourcegraph.com> Co-authored-by: Vova Kulikov <vovakulikov@icloud.com> Co-authored-by: Robert Lin <robert@bobheadxi.dev> Co-authored-by: Julie Tibshirani <julietibs@apache.org> Co-authored-by: Michael Bahr <1830132+bahrmichael@users.noreply.github.com>
This commit is contained in:
parent
f12393dd4d
commit
9123e602da
@ -16,6 +16,7 @@ go_library(
|
||||
"//internal/telemetry",
|
||||
"//lib/errors",
|
||||
"//lib/telemetrygateway/v1:telemetrygateway",
|
||||
"@org_golang_google_protobuf//types/known/structpb",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@ -1,6 +1,11 @@
|
||||
package sensitivemetadataallowlist
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
|
||||
"google.golang.org/protobuf/types/known/structpb"
|
||||
|
||||
telemetrygatewayv1 "github.com/sourcegraph/sourcegraph/lib/telemetrygateway/v1"
|
||||
)
|
||||
|
||||
@ -10,23 +15,38 @@ type redactMode int
|
||||
|
||||
const (
|
||||
redactAllSensitive redactMode = iota
|
||||
// redactMarketing only redacts marketing-related fields.
|
||||
redactMarketing
|
||||
// redactMarketingAndUnallowedPrivateMetadataKeys only redacts marketing-related fields as well
|
||||
// as unallowed private metadata keys.
|
||||
redactMarketingAndUnallowedPrivateMetadataKeys
|
||||
// redactNothing is only used in dotocm mode.
|
||||
redactNothing
|
||||
)
|
||||
|
||||
// 🚨 SECURITY: Be very careful with the redaction mechanisms here, as it impacts
|
||||
// what data we export from customer Sourcegraph instances.
|
||||
func redactEvent(event *telemetrygatewayv1.Event, mode redactMode) {
|
||||
// redactNothing
|
||||
func redactEvent(event *telemetrygatewayv1.Event, mode redactMode, allowedPrivateMetadataKeys []string) {
|
||||
// redactNothing (generally only in dotcom)
|
||||
if mode >= redactNothing {
|
||||
return
|
||||
}
|
||||
|
||||
// redactMarketing
|
||||
// redactMarketingAndUnallowedPrivateMetadataKeys
|
||||
event.MarketingTracking = nil
|
||||
if mode >= redactMarketing {
|
||||
if mode >= redactMarketingAndUnallowedPrivateMetadataKeys {
|
||||
// Do private metadata redaction in this if case, as the next case will strip
|
||||
// everything
|
||||
for key, value := range event.Parameters.PrivateMetadata.Fields {
|
||||
if !slices.Contains(allowedPrivateMetadataKeys, key) {
|
||||
// Strip all keys that are NOT in the allowlist, as they are considered sensitive.
|
||||
// No need to check data types, since we're only dealing with keys.
|
||||
delete(event.Parameters.PrivateMetadata.Fields, key)
|
||||
} else if _, isString := value.Kind.(*structpb.Value_StringValue); !isString {
|
||||
// Strip all non-string values,even if they are in the allowlist
|
||||
// 🚨 This prevents exporting arbitrary data types with deep values, which could lead to uncontrolled data exposure.
|
||||
event.Parameters.PrivateMetadata.Fields[key] =
|
||||
structpb.NewStringValue(fmt.Sprintf("ERROR: value of allowlisted key was not a string, got: %T", value.Kind))
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@ -3,7 +3,9 @@ package sensitivemetadataallowlist
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/hexops/autogold/v2"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"google.golang.org/protobuf/types/known/structpb"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/lib/pointers"
|
||||
@ -16,7 +18,15 @@ func TestRedactEvent(t *testing.T) {
|
||||
Parameters: &telemetrygatewayv1.EventParameters{
|
||||
PrivateMetadata: &structpb.Struct{
|
||||
Fields: map[string]*structpb.Value{
|
||||
"testField": structpb.NewBoolValue(true),
|
||||
"testField": structpb.NewStringValue("TestValue"),
|
||||
"notTestField": structpb.NewStringValue("notTestValue"),
|
||||
"nestedTestField": structpb.NewStructValue(&structpb.Struct{
|
||||
Fields: map[string]*structpb.Value{
|
||||
"testField": structpb.NewStringValue("TestValue"),
|
||||
"notTestField": structpb.NewStringValue("notTestValue"),
|
||||
},
|
||||
}),
|
||||
"boolTestField": structpb.NewBoolValue(true),
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -25,12 +35,12 @@ func TestRedactEvent(t *testing.T) {
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
mode redactMode
|
||||
event *telemetrygatewayv1.Event
|
||||
assert func(t *testing.T, got *telemetrygatewayv1.Event)
|
||||
name string
|
||||
mode redactMode
|
||||
event *telemetrygatewayv1.Event
|
||||
allowedKeys []string
|
||||
assert func(t *testing.T, got *telemetrygatewayv1.Event)
|
||||
}{
|
||||
{
|
||||
name: "redact all sensitive",
|
||||
@ -52,11 +62,30 @@ func TestRedactEvent(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "redact marketing",
|
||||
mode: redactMarketing,
|
||||
mode: redactMarketingAndUnallowedPrivateMetadataKeys,
|
||||
event: makeFullEvent(),
|
||||
allowedKeys: []string{
|
||||
"testField",
|
||||
},
|
||||
assert: func(t *testing.T, got *telemetrygatewayv1.Event) {
|
||||
assert.NotNil(t, got.Parameters.PrivateMetadata)
|
||||
assert.Nil(t, got.MarketingTracking)
|
||||
require.NotNil(t, got.Parameters.PrivateMetadata)
|
||||
assert.NotNil(t, got.Parameters.PrivateMetadata.Fields["testField"])
|
||||
assert.Nil(t, got.Parameters.PrivateMetadata.Fields["notTestField"])
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "redact non-string type on allowlist",
|
||||
mode: redactMarketingAndUnallowedPrivateMetadataKeys,
|
||||
event: makeFullEvent(),
|
||||
allowedKeys: []string{
|
||||
"boolTestField",
|
||||
"nestedTestField",
|
||||
},
|
||||
assert: func(t *testing.T, got *telemetrygatewayv1.Event) {
|
||||
// check that non-string types are redacted, only string types are allowed on allowlist
|
||||
autogold.Expect("ERROR: value of allowlisted key was not a string, got: *structpb.Value_BoolValue").Equal(t, got.Parameters.PrivateMetadata.Fields["boolTestField"].GetStringValue())
|
||||
autogold.Expect("ERROR: value of allowlisted key was not a string, got: *structpb.Value_StructValue").Equal(t, got.Parameters.PrivateMetadata.Fields["nestedTestField"].GetStringValue())
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -72,7 +101,7 @@ func TestRedactEvent(t *testing.T) {
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ev := makeFullEvent()
|
||||
redactEvent(ev, tc.mode)
|
||||
redactEvent(ev, tc.mode, tc.allowedKeys)
|
||||
tc.assert(t, ev)
|
||||
})
|
||||
}
|
||||
|
||||
@ -13,7 +13,7 @@ import (
|
||||
)
|
||||
|
||||
var rawAdditionalAllowedEventTypes = env.Get("SRC_TELEMETRY_SENSITIVEMETADATA_ADDITIONAL_ALLOWED_EVENT_TYPES", "",
|
||||
"Additional event types to include in sensitivemetadataallowlist.AllowedEventTypes, in comma-separated '${feature}::${action}' format.")
|
||||
"Additional event types to include in sensitivemetadataallowlist.AllowedEventTypes, in comma-separated '${feature}::${action}::${key1}::${key2}' format.")
|
||||
var additionalAllowedEventTypes = func() []EventType {
|
||||
types, err := parseAdditionalAllowedEventTypes(rawAdditionalAllowedEventTypes)
|
||||
if err != nil {
|
||||
@ -27,23 +27,47 @@ var additionalAllowedEventTypes = func() []EventType {
|
||||
func AllowedEventTypes() EventTypes {
|
||||
return eventTypes(append(additionalAllowedEventTypes,
|
||||
// Example event for testing.
|
||||
// Always provide a reason for allowlisting an event.
|
||||
EventType{
|
||||
Feature: string(telemetry.FeatureExample),
|
||||
Action: string(telemetry.ActionExample),
|
||||
AllowedPrivateMetadataKeys: []string{
|
||||
"testField",
|
||||
},
|
||||
},
|
||||
// The 'languageId' key is included for feature:'cody.completions' action:suggested/accepted events to provide
|
||||
// customers with valuable language-specific insights from the analytics we offer.
|
||||
// This information helps them better understand code completion usage patterns.
|
||||
EventType{
|
||||
Feature: "cody.completions",
|
||||
Action: "suggested",
|
||||
AllowedPrivateMetadataKeys: []string{
|
||||
"languageId",
|
||||
},
|
||||
},
|
||||
// The 'languageId' key is included for feature:'cody.completions' action:suggested/accepted events to provide
|
||||
// customers with valuable language-specific insights from the analytics we offer.
|
||||
// This information helps them better understand code completion usage patterns.
|
||||
EventType{
|
||||
Feature: "cody.completions",
|
||||
Action: "accepted",
|
||||
AllowedPrivateMetadataKeys: []string{
|
||||
"languageId",
|
||||
},
|
||||
},
|
||||
)...)
|
||||
}
|
||||
|
||||
type EventTypes struct {
|
||||
types []EventType
|
||||
// index of '{feature}.{action}' for checking
|
||||
index map[string]struct{}
|
||||
// index of '{feature}.{action}:{allowedfields}' for checking
|
||||
index map[string][]string
|
||||
}
|
||||
|
||||
func eventTypes(types ...EventType) EventTypes {
|
||||
index := make(map[string]struct{}, len(types))
|
||||
index := make(map[string][]string, len(types))
|
||||
for _, t := range types {
|
||||
index[fmt.Sprintf("%s.%s", t.Feature, t.Action)] = struct{}{}
|
||||
index[fmt.Sprintf("%s.%s", t.Feature, t.Action)] = t.AllowedPrivateMetadataKeys
|
||||
}
|
||||
return EventTypes{types: types, index: index}
|
||||
}
|
||||
@ -53,20 +77,20 @@ func eventTypes(types ...EventType) EventTypes {
|
||||
// 🚨 SECURITY: Be very careful with the redaction modes used here, as it impacts
|
||||
// what data we export from customer Sourcegraph instances.
|
||||
func (e EventTypes) Redact(event *telemetrygatewayv1.Event) {
|
||||
rm := redactAllSensitive
|
||||
if dotcom.SourcegraphDotComMode() {
|
||||
rm = redactNothing
|
||||
} else if e.IsAllowed(event) {
|
||||
rm = redactMarketing
|
||||
redactEvent(event, redactNothing, nil)
|
||||
} else if keys, allowed := e.IsAllowed(event); allowed {
|
||||
redactEvent(event, redactMarketingAndUnallowedPrivateMetadataKeys, keys)
|
||||
}
|
||||
redactEvent(event, rm)
|
||||
redactEvent(event, redactAllSensitive, nil)
|
||||
}
|
||||
|
||||
// IsAllowed indicates an event is on the sensitive telemetry allowlist.
|
||||
func (e EventTypes) IsAllowed(event *telemetrygatewayv1.Event) bool {
|
||||
// IsAllowed indicates an event is on the sensitive telemetry allowlist, and the fields that
|
||||
// are allowed.
|
||||
func (e EventTypes) IsAllowed(event *telemetrygatewayv1.Event) ([]string, bool) {
|
||||
key := fmt.Sprintf("%s.%s", event.GetFeature(), event.GetAction())
|
||||
_, allowed := e.index[key]
|
||||
return allowed
|
||||
allowedKeys, allowed := e.index[key]
|
||||
return allowedKeys, allowed
|
||||
}
|
||||
|
||||
func (e EventTypes) validate() error {
|
||||
@ -81,14 +105,20 @@ func (e EventTypes) validate() error {
|
||||
type EventType struct {
|
||||
Feature string
|
||||
Action string
|
||||
|
||||
// Future: maybe restrict to specific, known private metadata fields as well
|
||||
// AllowedPrivateMetadataKeys is a slice of strings representing the top-level field names
|
||||
// from the `privateMetadata` object that ARE allowed to be exported for ALL users. Any field not present
|
||||
// in this slice will be considered sensitive and redacted during the export process.
|
||||
// ONLY STRING fields at the top-level of the `privateMetadata` object are permitted.
|
||||
AllowedPrivateMetadataKeys []string
|
||||
}
|
||||
|
||||
func (e EventType) validate() error {
|
||||
if e.Feature == "" || e.Action == "" {
|
||||
return errors.New("feature and action are required")
|
||||
}
|
||||
if len(e.AllowedPrivateMetadataKeys) == 0 {
|
||||
return errors.New("allowedPrivateMetadataKeys are required")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -105,15 +135,22 @@ func parseAdditionalAllowedEventTypes(config string) ([]EventType, error) {
|
||||
|
||||
var types []EventType
|
||||
for _, rawType := range strings.Split(config, ",") {
|
||||
parts := strings.SplitN(rawType, "::", 2)
|
||||
if len(parts) != 2 {
|
||||
parts := strings.Split(rawType, "::")
|
||||
if len(parts) < 2 {
|
||||
return nil, errors.Newf(
|
||||
"cannot parse SRC_TELEMETRY_SENSITIVEMETADATA_ADDITIONAL_ALLOWED_EVENT_TYPES value %q",
|
||||
rawType)
|
||||
}
|
||||
// indicates that the user has not specified any allowlisted fields
|
||||
if len(parts) < 3 {
|
||||
return nil, errors.Newf(
|
||||
"cannot parse SRC_TELEMETRY_SENSITIVEMETADATA_ADDITIONAL_ALLOWED_EVENT_TYPES value %q, missing allowlisted fields",
|
||||
rawType)
|
||||
}
|
||||
types = append(types, EventType{
|
||||
Feature: parts[0],
|
||||
Action: parts[1],
|
||||
Feature: parts[0],
|
||||
Action: parts[1],
|
||||
AllowedPrivateMetadataKeys: parts[2:],
|
||||
})
|
||||
}
|
||||
return types, nil
|
||||
|
||||
@ -14,14 +14,48 @@ import (
|
||||
func TestIsAllowed(t *testing.T) {
|
||||
allowedTypes := AllowedEventTypes()
|
||||
require.NotEmpty(t, allowedTypes)
|
||||
assert.True(t, allowedTypes.IsAllowed(&v1.Event{
|
||||
Feature: string(telemetry.FeatureExample),
|
||||
Action: string(telemetry.ActionExample),
|
||||
}))
|
||||
assert.False(t, allowedTypes.IsAllowed(&v1.Event{
|
||||
Feature: "disallowedFeature",
|
||||
Action: "disallowedAction",
|
||||
}))
|
||||
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
event *v1.Event
|
||||
expectAllowed bool
|
||||
expectAllowlist []string
|
||||
}{
|
||||
{
|
||||
name: "allowed event",
|
||||
event: &v1.Event{
|
||||
Feature: string(telemetry.FeatureExample),
|
||||
Action: string(telemetry.ActionExample),
|
||||
},
|
||||
expectAllowed: true,
|
||||
expectAllowlist: []string{"testField"},
|
||||
},
|
||||
{
|
||||
name: "disallowed event",
|
||||
event: &v1.Event{
|
||||
Feature: "disallowedFeature",
|
||||
Action: "disallowedAction",
|
||||
},
|
||||
expectAllowed: false,
|
||||
},
|
||||
{
|
||||
name: "disallowed event with additional allowed event type",
|
||||
event: &v1.Event{
|
||||
Feature: "cody.completions",
|
||||
Action: "accepted",
|
||||
},
|
||||
expectAllowed: true,
|
||||
expectAllowlist: []string{
|
||||
"languageId",
|
||||
},
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
allowedKeys, allowed := allowedTypes.IsAllowed(tc.event)
|
||||
assert.Equal(t, tc.expectAllowed, allowed)
|
||||
assert.Equal(t, tc.expectAllowlist, allowedKeys)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAdditionalAllowedEventTypes(t *testing.T) {
|
||||
@ -37,25 +71,39 @@ func TestParseAdditionalAllowedEventTypes(t *testing.T) {
|
||||
config: "asdf,foobar",
|
||||
expectError: autogold.Expect(`cannot parse SRC_TELEMETRY_SENSITIVEMETADATA_ADDITIONAL_ALLOWED_EVENT_TYPES value "asdf"`),
|
||||
},
|
||||
{
|
||||
name: "invalid, no fields",
|
||||
config: "foo::bar",
|
||||
expectError: autogold.Expect(`cannot parse SRC_TELEMETRY_SENSITIVEMETADATA_ADDITIONAL_ALLOWED_EVENT_TYPES value "foo::bar", missing allowlisted fields`),
|
||||
},
|
||||
{
|
||||
name: "1 type",
|
||||
config: "foo::bar",
|
||||
config: "foo::bar::field",
|
||||
expect: autogold.Expect([]EventType{{
|
||||
Feature: "foo",
|
||||
Action: "bar",
|
||||
AllowedPrivateMetadataKeys: []string{
|
||||
"field",
|
||||
},
|
||||
}}),
|
||||
},
|
||||
{
|
||||
name: "multiple types",
|
||||
config: "foo::bar,baz.bar::bar.baz",
|
||||
config: "foo::bar::field::field2,baz.bar::bar.baz::field",
|
||||
expect: autogold.Expect([]EventType{
|
||||
{
|
||||
Feature: "foo",
|
||||
Action: "bar",
|
||||
AllowedPrivateMetadataKeys: []string{
|
||||
"field", "field2",
|
||||
},
|
||||
},
|
||||
{
|
||||
Feature: "baz.bar",
|
||||
Action: "bar.baz",
|
||||
AllowedPrivateMetadataKeys: []string{
|
||||
"field",
|
||||
},
|
||||
},
|
||||
}),
|
||||
},
|
||||
|
||||
@ -130,7 +130,8 @@ func toEventLogs(now func() time.Time, telemetryEvents []*telemetrygatewayv1.Eve
|
||||
|
||||
// Attach a simple indicator to denote if this metadata will
|
||||
// be exported.
|
||||
md["telemetry.privateMetadata.exportable"] = sensitiveMetadataAllowlist.IsAllowed(e)
|
||||
md["telemetry.privateMetadata.exportableFields"], md["telemetry.privateMetadata.exportable"] =
|
||||
sensitiveMetadataAllowlist.IsAllowed(e)
|
||||
|
||||
data, err := json.Marshal(md)
|
||||
if err != nil {
|
||||
|
||||
@ -253,7 +253,8 @@ func TestToEventLogs(t *testing.T) {
|
||||
"AnonymousUserID": "anonymous",
|
||||
"Argument": {
|
||||
"private": "sensitive-data",
|
||||
"telemetry.privateMetadata.exportable": false
|
||||
"telemetry.privateMetadata.exportable": false,
|
||||
"telemetry.privateMetadata.exportableFields": null
|
||||
},
|
||||
"PublicArgument": {
|
||||
"public": 2,
|
||||
@ -318,7 +319,8 @@ func TestToEventLogs(t *testing.T) {
|
||||
"AnonymousUserID": "anonymous",
|
||||
"Argument": {
|
||||
"private": "sensitive-data",
|
||||
"telemetry.privateMetadata.exportable": false
|
||||
"telemetry.privateMetadata.exportable": false,
|
||||
"telemetry.privateMetadata.exportableFields": null
|
||||
},
|
||||
"PublicArgument": {
|
||||
"interaction.traceID": "01020304050607080102040810203040",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user