Update sensitive metadata allowlist to filter on keys (#62830)

* Fix bedrock URL encoding to mimic AWS CLI (#62695)

* Fix bedrock URL encoding to mimic AWS CLI

* Update changelog

* appliance: namespace scoping (#62663)

Allow a namespace to be configured, defaulting to all namespaces.
Without this setting, if an admin deploys the appliance with
namespace-scoped RBAC, it would throw errors due to not being able to
watch ConfigMaps in all namespaces.

* bazel: migrate legacy postgres-12 dockerfile to rules_oci (#61963)

* build-tracker: include error if failing to write to bigquery (#62699)

Without this, this error won't be logged to Sentry, resulting in us missing it unless we check GCP

## Test plan

Discussed with @jac

* Svelte: Fix global header navigation layers (#62697)

Fix global header navigation layers

* msp/rollouts: remove Cloud Deploy target import (#62687)

Now that #62644 (CORE-23) is rolled out, this import block is no longer needed (and may even be disruptive when provisioning new rollout pipelines). The change was rolled out in:

- https://github.com/sourcegraph/managed-services/pull/1416
- https://github.com/sourcegraph/managed-services/pull/1417
- https://github.com/sourcegraph/managed-services/pull/1403

## Test plan

n/a

* msp/cloudrun: use GA launch stage (#62685)

VPC direct egress is now GA: see example in https://registry.terraform.io/providers/hashicorp/google/5.29.0/docs/resources/cloud_run_v2_service#example-usage---cloudrunv2-service-directvpc and https://cloud.google.com/run/docs/configuring/vpc-direct-vpc

This also fixes the infinite `GA` -> `BETA` drift we have in TFC

* Symbols: new backend integration test (#62686)

This PR creates a new GraphQL integration test file focused on symbol search.
It exercises the same searches the web client uses for code navigation.

In a follow-up, we will add cases for older commits and enable Rockskip.

* fix: update search timeout docs (#62692)

* update telemetry sensitivemetadataallowlist to filter based on keys

* fix main merge

* Update BUILD.bazel

* Update teestore_test.go

* add better code-comments and error messaging

* add test coverage on non-string types getting redacted with proper error value return

* fix spacing!

---------

Co-authored-by: Rik <rik.nauta@sourcegraph.com>
Co-authored-by: Craig Furman <craig.furman@sourcegraph.com>
Co-authored-by: Noah S-C <noah@sourcegraph.com>
Co-authored-by: Vova Kulikov <vovakulikov@icloud.com>
Co-authored-by: Robert Lin <robert@bobheadxi.dev>
Co-authored-by: Julie Tibshirani <julietibs@apache.org>
Co-authored-by: Michael Bahr <1830132+bahrmichael@users.noreply.github.com>
This commit is contained in:
Aditya Kalia 2024-05-22 16:29:06 -04:00 committed by GitHub
parent f12393dd4d
commit 9123e602da
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 186 additions and 48 deletions

View File

@ -16,6 +16,7 @@ go_library(
"//internal/telemetry",
"//lib/errors",
"//lib/telemetrygateway/v1:telemetrygateway",
"@org_golang_google_protobuf//types/known/structpb",
],
)

View File

@ -1,6 +1,11 @@
package sensitivemetadataallowlist
import (
"fmt"
"slices"
"google.golang.org/protobuf/types/known/structpb"
telemetrygatewayv1 "github.com/sourcegraph/sourcegraph/lib/telemetrygateway/v1"
)
@ -10,23 +15,38 @@ type redactMode int
const (
redactAllSensitive redactMode = iota
// redactMarketing only redacts marketing-related fields.
redactMarketing
// redactMarketingAndUnallowedPrivateMetadataKeys only redacts marketing-related fields as well
// as unallowed private metadata keys.
redactMarketingAndUnallowedPrivateMetadataKeys
// redactNothing is only used in dotocm mode.
redactNothing
)
// 🚨 SECURITY: Be very careful with the redaction mechanisms here, as it impacts
// what data we export from customer Sourcegraph instances.
func redactEvent(event *telemetrygatewayv1.Event, mode redactMode) {
// redactNothing
func redactEvent(event *telemetrygatewayv1.Event, mode redactMode, allowedPrivateMetadataKeys []string) {
// redactNothing (generally only in dotcom)
if mode >= redactNothing {
return
}
// redactMarketing
// redactMarketingAndUnallowedPrivateMetadataKeys
event.MarketingTracking = nil
if mode >= redactMarketing {
if mode >= redactMarketingAndUnallowedPrivateMetadataKeys {
// Do private metadata redaction in this if case, as the next case will strip
// everything
for key, value := range event.Parameters.PrivateMetadata.Fields {
if !slices.Contains(allowedPrivateMetadataKeys, key) {
// Strip all keys that are NOT in the allowlist, as they are considered sensitive.
// No need to check data types, since we're only dealing with keys.
delete(event.Parameters.PrivateMetadata.Fields, key)
} else if _, isString := value.Kind.(*structpb.Value_StringValue); !isString {
// Strip all non-string values,even if they are in the allowlist
// 🚨 This prevents exporting arbitrary data types with deep values, which could lead to uncontrolled data exposure.
event.Parameters.PrivateMetadata.Fields[key] =
structpb.NewStringValue(fmt.Sprintf("ERROR: value of allowlisted key was not a string, got: %T", value.Kind))
}
}
return
}

View File

@ -3,7 +3,9 @@ package sensitivemetadataallowlist
import (
"testing"
"github.com/hexops/autogold/v2"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/protobuf/types/known/structpb"
"github.com/sourcegraph/sourcegraph/lib/pointers"
@ -16,7 +18,15 @@ func TestRedactEvent(t *testing.T) {
Parameters: &telemetrygatewayv1.EventParameters{
PrivateMetadata: &structpb.Struct{
Fields: map[string]*structpb.Value{
"testField": structpb.NewBoolValue(true),
"testField": structpb.NewStringValue("TestValue"),
"notTestField": structpb.NewStringValue("notTestValue"),
"nestedTestField": structpb.NewStructValue(&structpb.Struct{
Fields: map[string]*structpb.Value{
"testField": structpb.NewStringValue("TestValue"),
"notTestField": structpb.NewStringValue("notTestValue"),
},
}),
"boolTestField": structpb.NewBoolValue(true),
},
},
},
@ -25,12 +35,12 @@ func TestRedactEvent(t *testing.T) {
},
}
}
tests := []struct {
name string
mode redactMode
event *telemetrygatewayv1.Event
assert func(t *testing.T, got *telemetrygatewayv1.Event)
name string
mode redactMode
event *telemetrygatewayv1.Event
allowedKeys []string
assert func(t *testing.T, got *telemetrygatewayv1.Event)
}{
{
name: "redact all sensitive",
@ -52,11 +62,30 @@ func TestRedactEvent(t *testing.T) {
},
{
name: "redact marketing",
mode: redactMarketing,
mode: redactMarketingAndUnallowedPrivateMetadataKeys,
event: makeFullEvent(),
allowedKeys: []string{
"testField",
},
assert: func(t *testing.T, got *telemetrygatewayv1.Event) {
assert.NotNil(t, got.Parameters.PrivateMetadata)
assert.Nil(t, got.MarketingTracking)
require.NotNil(t, got.Parameters.PrivateMetadata)
assert.NotNil(t, got.Parameters.PrivateMetadata.Fields["testField"])
assert.Nil(t, got.Parameters.PrivateMetadata.Fields["notTestField"])
},
},
{
name: "redact non-string type on allowlist",
mode: redactMarketingAndUnallowedPrivateMetadataKeys,
event: makeFullEvent(),
allowedKeys: []string{
"boolTestField",
"nestedTestField",
},
assert: func(t *testing.T, got *telemetrygatewayv1.Event) {
// check that non-string types are redacted, only string types are allowed on allowlist
autogold.Expect("ERROR: value of allowlisted key was not a string, got: *structpb.Value_BoolValue").Equal(t, got.Parameters.PrivateMetadata.Fields["boolTestField"].GetStringValue())
autogold.Expect("ERROR: value of allowlisted key was not a string, got: *structpb.Value_StructValue").Equal(t, got.Parameters.PrivateMetadata.Fields["nestedTestField"].GetStringValue())
},
},
{
@ -72,7 +101,7 @@ func TestRedactEvent(t *testing.T) {
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
ev := makeFullEvent()
redactEvent(ev, tc.mode)
redactEvent(ev, tc.mode, tc.allowedKeys)
tc.assert(t, ev)
})
}

View File

@ -13,7 +13,7 @@ import (
)
var rawAdditionalAllowedEventTypes = env.Get("SRC_TELEMETRY_SENSITIVEMETADATA_ADDITIONAL_ALLOWED_EVENT_TYPES", "",
"Additional event types to include in sensitivemetadataallowlist.AllowedEventTypes, in comma-separated '${feature}::${action}' format.")
"Additional event types to include in sensitivemetadataallowlist.AllowedEventTypes, in comma-separated '${feature}::${action}::${key1}::${key2}' format.")
var additionalAllowedEventTypes = func() []EventType {
types, err := parseAdditionalAllowedEventTypes(rawAdditionalAllowedEventTypes)
if err != nil {
@ -27,23 +27,47 @@ var additionalAllowedEventTypes = func() []EventType {
func AllowedEventTypes() EventTypes {
return eventTypes(append(additionalAllowedEventTypes,
// Example event for testing.
// Always provide a reason for allowlisting an event.
EventType{
Feature: string(telemetry.FeatureExample),
Action: string(telemetry.ActionExample),
AllowedPrivateMetadataKeys: []string{
"testField",
},
},
// The 'languageId' key is included for feature:'cody.completions' action:suggested/accepted events to provide
// customers with valuable language-specific insights from the analytics we offer.
// This information helps them better understand code completion usage patterns.
EventType{
Feature: "cody.completions",
Action: "suggested",
AllowedPrivateMetadataKeys: []string{
"languageId",
},
},
// The 'languageId' key is included for feature:'cody.completions' action:suggested/accepted events to provide
// customers with valuable language-specific insights from the analytics we offer.
// This information helps them better understand code completion usage patterns.
EventType{
Feature: "cody.completions",
Action: "accepted",
AllowedPrivateMetadataKeys: []string{
"languageId",
},
},
)...)
}
type EventTypes struct {
types []EventType
// index of '{feature}.{action}' for checking
index map[string]struct{}
// index of '{feature}.{action}:{allowedfields}' for checking
index map[string][]string
}
func eventTypes(types ...EventType) EventTypes {
index := make(map[string]struct{}, len(types))
index := make(map[string][]string, len(types))
for _, t := range types {
index[fmt.Sprintf("%s.%s", t.Feature, t.Action)] = struct{}{}
index[fmt.Sprintf("%s.%s", t.Feature, t.Action)] = t.AllowedPrivateMetadataKeys
}
return EventTypes{types: types, index: index}
}
@ -53,20 +77,20 @@ func eventTypes(types ...EventType) EventTypes {
// 🚨 SECURITY: Be very careful with the redaction modes used here, as it impacts
// what data we export from customer Sourcegraph instances.
func (e EventTypes) Redact(event *telemetrygatewayv1.Event) {
rm := redactAllSensitive
if dotcom.SourcegraphDotComMode() {
rm = redactNothing
} else if e.IsAllowed(event) {
rm = redactMarketing
redactEvent(event, redactNothing, nil)
} else if keys, allowed := e.IsAllowed(event); allowed {
redactEvent(event, redactMarketingAndUnallowedPrivateMetadataKeys, keys)
}
redactEvent(event, rm)
redactEvent(event, redactAllSensitive, nil)
}
// IsAllowed indicates an event is on the sensitive telemetry allowlist.
func (e EventTypes) IsAllowed(event *telemetrygatewayv1.Event) bool {
// IsAllowed indicates an event is on the sensitive telemetry allowlist, and the fields that
// are allowed.
func (e EventTypes) IsAllowed(event *telemetrygatewayv1.Event) ([]string, bool) {
key := fmt.Sprintf("%s.%s", event.GetFeature(), event.GetAction())
_, allowed := e.index[key]
return allowed
allowedKeys, allowed := e.index[key]
return allowedKeys, allowed
}
func (e EventTypes) validate() error {
@ -81,14 +105,20 @@ func (e EventTypes) validate() error {
type EventType struct {
Feature string
Action string
// Future: maybe restrict to specific, known private metadata fields as well
// AllowedPrivateMetadataKeys is a slice of strings representing the top-level field names
// from the `privateMetadata` object that ARE allowed to be exported for ALL users. Any field not present
// in this slice will be considered sensitive and redacted during the export process.
// ONLY STRING fields at the top-level of the `privateMetadata` object are permitted.
AllowedPrivateMetadataKeys []string
}
func (e EventType) validate() error {
if e.Feature == "" || e.Action == "" {
return errors.New("feature and action are required")
}
if len(e.AllowedPrivateMetadataKeys) == 0 {
return errors.New("allowedPrivateMetadataKeys are required")
}
return nil
}
@ -105,15 +135,22 @@ func parseAdditionalAllowedEventTypes(config string) ([]EventType, error) {
var types []EventType
for _, rawType := range strings.Split(config, ",") {
parts := strings.SplitN(rawType, "::", 2)
if len(parts) != 2 {
parts := strings.Split(rawType, "::")
if len(parts) < 2 {
return nil, errors.Newf(
"cannot parse SRC_TELEMETRY_SENSITIVEMETADATA_ADDITIONAL_ALLOWED_EVENT_TYPES value %q",
rawType)
}
// indicates that the user has not specified any allowlisted fields
if len(parts) < 3 {
return nil, errors.Newf(
"cannot parse SRC_TELEMETRY_SENSITIVEMETADATA_ADDITIONAL_ALLOWED_EVENT_TYPES value %q, missing allowlisted fields",
rawType)
}
types = append(types, EventType{
Feature: parts[0],
Action: parts[1],
Feature: parts[0],
Action: parts[1],
AllowedPrivateMetadataKeys: parts[2:],
})
}
return types, nil

View File

@ -14,14 +14,48 @@ import (
func TestIsAllowed(t *testing.T) {
allowedTypes := AllowedEventTypes()
require.NotEmpty(t, allowedTypes)
assert.True(t, allowedTypes.IsAllowed(&v1.Event{
Feature: string(telemetry.FeatureExample),
Action: string(telemetry.ActionExample),
}))
assert.False(t, allowedTypes.IsAllowed(&v1.Event{
Feature: "disallowedFeature",
Action: "disallowedAction",
}))
for _, tc := range []struct {
name string
event *v1.Event
expectAllowed bool
expectAllowlist []string
}{
{
name: "allowed event",
event: &v1.Event{
Feature: string(telemetry.FeatureExample),
Action: string(telemetry.ActionExample),
},
expectAllowed: true,
expectAllowlist: []string{"testField"},
},
{
name: "disallowed event",
event: &v1.Event{
Feature: "disallowedFeature",
Action: "disallowedAction",
},
expectAllowed: false,
},
{
name: "disallowed event with additional allowed event type",
event: &v1.Event{
Feature: "cody.completions",
Action: "accepted",
},
expectAllowed: true,
expectAllowlist: []string{
"languageId",
},
},
} {
t.Run(tc.name, func(t *testing.T) {
allowedKeys, allowed := allowedTypes.IsAllowed(tc.event)
assert.Equal(t, tc.expectAllowed, allowed)
assert.Equal(t, tc.expectAllowlist, allowedKeys)
})
}
}
func TestParseAdditionalAllowedEventTypes(t *testing.T) {
@ -37,25 +71,39 @@ func TestParseAdditionalAllowedEventTypes(t *testing.T) {
config: "asdf,foobar",
expectError: autogold.Expect(`cannot parse SRC_TELEMETRY_SENSITIVEMETADATA_ADDITIONAL_ALLOWED_EVENT_TYPES value "asdf"`),
},
{
name: "invalid, no fields",
config: "foo::bar",
expectError: autogold.Expect(`cannot parse SRC_TELEMETRY_SENSITIVEMETADATA_ADDITIONAL_ALLOWED_EVENT_TYPES value "foo::bar", missing allowlisted fields`),
},
{
name: "1 type",
config: "foo::bar",
config: "foo::bar::field",
expect: autogold.Expect([]EventType{{
Feature: "foo",
Action: "bar",
AllowedPrivateMetadataKeys: []string{
"field",
},
}}),
},
{
name: "multiple types",
config: "foo::bar,baz.bar::bar.baz",
config: "foo::bar::field::field2,baz.bar::bar.baz::field",
expect: autogold.Expect([]EventType{
{
Feature: "foo",
Action: "bar",
AllowedPrivateMetadataKeys: []string{
"field", "field2",
},
},
{
Feature: "baz.bar",
Action: "bar.baz",
AllowedPrivateMetadataKeys: []string{
"field",
},
},
}),
},

View File

@ -130,7 +130,8 @@ func toEventLogs(now func() time.Time, telemetryEvents []*telemetrygatewayv1.Eve
// Attach a simple indicator to denote if this metadata will
// be exported.
md["telemetry.privateMetadata.exportable"] = sensitiveMetadataAllowlist.IsAllowed(e)
md["telemetry.privateMetadata.exportableFields"], md["telemetry.privateMetadata.exportable"] =
sensitiveMetadataAllowlist.IsAllowed(e)
data, err := json.Marshal(md)
if err != nil {

View File

@ -253,7 +253,8 @@ func TestToEventLogs(t *testing.T) {
"AnonymousUserID": "anonymous",
"Argument": {
"private": "sensitive-data",
"telemetry.privateMetadata.exportable": false
"telemetry.privateMetadata.exportable": false,
"telemetry.privateMetadata.exportableFields": null
},
"PublicArgument": {
"public": 2,
@ -318,7 +319,8 @@ func TestToEventLogs(t *testing.T) {
"AnonymousUserID": "anonymous",
"Argument": {
"private": "sensitive-data",
"telemetry.privateMetadata.exportable": false
"telemetry.privateMetadata.exportable": false,
"telemetry.privateMetadata.exportableFields": null
},
"PublicArgument": {
"interaction.traceID": "01020304050607080102040810203040",