feat/msp: allow enablement of logical replication features for Datastream (#63092)

Adds a new `postgreSQL.logicalReplication` configuration to allow MSP to
generate prerequisite setup for integration with Datastream:
https://cloud.google.com/datastream/docs/sources-postgresql. Integration
with Datastream allows the Data Analytics team to self-serve data
enrichment needs for the Telemetry V2 pipeline.

Enabling this feature entails downtime (Cloud SQL instance restart), so
enabling the logical replication feature at the Cloud SQL level
(`cloudsql.logical_decoding`) is gated behind
`postgreSQL.logicalReplication: {}`.

Setting up the required stuff in Postgres is a bit complicated,
requiring 3 Postgres provider instances:

1. The default admin one, authenticated with our admin user
2. New: a workload identity provider, using
https://github.com/cyrilgdn/terraform-provider-postgresql/pull/448 /
https://github.com/sourcegraph/managed-services-platform-cdktf/pull/11.
This is required for creating a publication on selected tables, which
requires being owner of said table. Because tables are created by
application using e.g. auto-migrate, the workload identity is always the
table owner, so we need to impersonate the IAM user
3. New: a "replication user" which is created with the replication
permission. Replication seems to not be a propagated permission so we
need a role/user that has replication enabled.

A bit more context scattered here and there in the docstrings.

Beyond the Postgres configuration we also introduce some additional
resources to enable easy Datastream configuration:

1. Datastream Private Connection, which peers to the service private
network
2. Cloud SQL Proxy VM, which only allows connections to `:5432` from the
range specified in 1, allowing a connection to the Cloud SQL instance
2. Datastream Connection Profile attached to 1

From there, data team can click-ops or manage the Datastream Stream and
BigQuery destination on their own.

Closes CORE-165
Closes CORE-212

Sample config:

```yaml
  resources:
    postgreSQL:
      databases:
        - "primary"
      logicalReplication:
        publications:
          - name: testing
            database: primary
            tables:
              - users
```

## Test plan

https://github.com/sourcegraph/managed-services/pull/1569

## Changelog

- MSP services can now configure `postgreSQL.logicalReplication` to
enable Data Analytics team to replicate selected database tables into
BigQuery.
This commit is contained in:
Robert Lin 2024-07-05 11:24:44 -07:00 committed by GitHub
parent f6fe8df922
commit 28348e7c80
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 667 additions and 25 deletions

View File

@ -5809,8 +5809,8 @@ def go_dependencies():
name = "com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql",
build_file_proto_mode = "disable_global",
importpath = "github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql",
sum = "h1:Jy9vKM1mtyJYgx/DRSDftIuhL2MFO6esU84uj4deNn4=",
version = "v0.0.0-20240513203650-e2b1273f1c1a",
sum = "h1:t0hSCAvffnF3VAlSW3M9eeeubpMF6TieVc52vKLT98o=",
version = "v0.0.0-20240617210115-f286e77e83e8",
)
go_repository(
name = "com_github_sourcegraph_managed_services_platform_cdktf_gen_random",

View File

@ -77,6 +77,13 @@ func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output,
Value: pointers.Stringf("%d", *config.Spec.MaxConnections),
})
}
if config.Spec.LogicalReplication != nil {
// https://cloud.google.com/sql/docs/postgres/replication/configure-logical-replication#set-up-native-postgresql-logical-replication
databaseFlags = append(databaseFlags, sqldatabaseinstance.SqlDatabaseInstanceSettingsDatabaseFlags{
Name: pointers.Ptr("cloudsql.logical_decoding"),
Value: pointers.Ptr("on"),
})
}
instance := sqldatabaseinstance.NewSqlDatabaseInstance(scope, id.TerraformID("instance"), &sqldatabaseinstance.SqlDatabaseInstanceConfig{
Project: &config.ProjectID,
@ -148,7 +155,12 @@ func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output,
IpConfiguration: &sqldatabaseinstance.SqlDatabaseInstanceSettingsIpConfiguration{
Ipv4Enabled: pointers.Ptr(true),
PrivateNetwork: config.Network.Id(),
RequireSsl: pointers.Ptr(true),
// https://cloud.google.com/sql/docs/postgres/admin-api/rest/v1beta4/instances#SslMode
RequireSsl: pointers.Ptr(true),
SslMode: pointers.Ptr("TRUSTED_CLIENT_CERTIFICATE_REQUIRED"),
EnablePrivatePathForGoogleCloudServices: pointers.Ptr(true),
},
},
@ -194,6 +206,7 @@ func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output,
Length: pointers.Float64(32),
Special: pointers.Ptr(false),
})
// sqluser.NewSqlUser has 'cloudsqlsuperuser' by default
adminUser := sqluser.NewSqlUser(scope, id.TerraformID("admin_user"), &sqluser.SqlUserConfig{
Instance: instance.Name(),
Project: &config.ProjectID,

View File

@ -0,0 +1,22 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "datastreamconnection",
srcs = ["datastreamconnection.go"],
importpath = "github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/datastreamconnection",
visibility = ["//dev/managedservicesplatform:__subpackages__"],
deps = [
"//dev/managedservicesplatform/internal/resource/cloudsql",
"//dev/managedservicesplatform/internal/resource/postgresqllogicalreplication",
"//dev/managedservicesplatform/internal/resource/privatenetwork",
"//dev/managedservicesplatform/internal/resource/serviceaccount",
"//dev/managedservicesplatform/internal/resourceid",
"//lib/pointers",
"@com_github_aws_constructs_go_constructs_v10//:constructs",
"@com_github_hashicorp_terraform_cdk_go_cdktf//:cdktf",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//computefirewall",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//computeinstance",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//datastreamconnectionprofile",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//datastreamprivateconnection",
],
)

View File

@ -0,0 +1,193 @@
package datastreamconnection
import (
"fmt"
"github.com/aws/constructs-go/constructs/v10"
"github.com/hashicorp/terraform-cdk-go/cdktf"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/computefirewall"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/computeinstance"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/datastreamprivateconnection"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/datastreamconnectionprofile"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/cloudsql"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/postgresqllogicalreplication"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/privatenetwork"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/serviceaccount"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resourceid"
"github.com/sourcegraph/sourcegraph/lib/pointers"
)
type Config struct {
VPC *privatenetwork.Output
CloudSQL *cloudsql.Output
// CloudSQLClientServiceAccount is used for establishing a proxy that can
// connect to the Cloud SQL instance.
CloudSQLClientServiceAccount serviceaccount.Output
Publications []postgresqllogicalreplication.PublicationOutput
PublicationUserGrants []cdktf.ITerraformDependable
}
type Output struct {
}
// New provisions everything needed for Datastream to connect to Cloud SQL proxy:
//
// Datastream --peering-> Private Network -> Cloud SQL Proxy VM -> Cloud SQL
//
// We need an additional VM proxying connections to Cloud SQL because Datastream
// and Cloud SQL both have their own internal VPCs, and we cannot transitively
// peer them over the private network we manage.
func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output, error) {
const proxyInstanceName = "msp-datastream-cloudsqlproxy"
cloudsqlproxyInstance := computeinstance.NewComputeInstance(scope, id.TerraformID("cloudsqlproxy"), &computeinstance.ComputeInstanceConfig{
Name: pointers.Ptr(proxyInstanceName),
Description: pointers.Ptr("Cloud SQL proxy to allow Datastream to connect to Cloud SQL over private network"),
// Just use a random zone in the same region as the Cloud SQL instance
Zone: pointers.Stringf("%s-a", *config.CloudSQL.Instance.Region()),
MachineType: pointers.Ptr("e2-micro"),
NetworkInterface: []computeinstance.ComputeInstanceNetworkInterface{{
Network: config.VPC.Network.Name(),
Subnetwork: config.VPC.Subnetwork.Name(),
}},
ServiceAccount: &computeinstance.ComputeInstanceServiceAccount{
Email: &config.CloudSQLClientServiceAccount.Email,
Scopes: &[]*string{pointers.Ptr("https://www.googleapis.com/auth/cloud-platform")},
},
BootDisk: &computeinstance.ComputeInstanceBootDisk{
InitializeParams: &computeinstance.ComputeInstanceBootDiskInitializeParams{
Image: pointers.Ptr("cos-cloud/cos-stable"),
Size: pointers.Float64(10), // Gb
},
},
Tags: &[]*string{pointers.Ptr(proxyInstanceName)},
// See docstring of newMetadataGCEContainerDeclaration for details about
// the label and metadata.
Labels: &map[string]*string{
"container-vm": pointers.Ptr(proxyInstanceName),
"msp": pointers.Ptr("true"),
},
Metadata: &map[string]*string{
"gce-container-declaration": pointers.Ptr(
newMetadataGCEContainerDeclaration(proxyInstanceName, *config.CloudSQL.Instance.ConnectionName())),
},
})
const dsPrivateConnectionSubnet = "10.126.0.0/29" // any '/29' range
datastreamConnection := datastreamprivateconnection.NewDatastreamPrivateConnection(scope, id.TerraformID("cloudsqlproxy-privateconnection"), &datastreamprivateconnection.DatastreamPrivateConnectionConfig{
DisplayName: pointers.Ptr(proxyInstanceName),
PrivateConnectionId: pointers.Ptr(proxyInstanceName),
Location: config.CloudSQL.Instance.Region(),
VpcPeeringConfig: &datastreamprivateconnection.DatastreamPrivateConnectionVpcPeeringConfig{
Vpc: config.VPC.Network.Id(),
Subnet: pointers.Ptr(dsPrivateConnectionSubnet),
},
Labels: &map[string]*string{"msp": pointers.Ptr("true")},
})
// Allow ingress from Datastream
datastreamIngressFirewall := computefirewall.NewComputeFirewall(scope, id.TerraformID("cloudsqlproxy-firewall-datastream-ingress"), &computefirewall.ComputeFirewallConfig{
Name: pointers.Stringf("%s-datastream-ingress", proxyInstanceName),
Description: pointers.Ptr("Allow incoming connections from a Datastream private connection to the Cloud SQL Proxy VM"),
Network: config.VPC.Network.Name(),
Priority: pointers.Float64(1000),
Direction: pointers.Ptr("INGRESS"),
SourceRanges: &[]*string{
pointers.Ptr(dsPrivateConnectionSubnet),
},
Allow: []computefirewall.ComputeFirewallAllow{{
Protocol: pointers.Ptr("tcp"),
Ports: &[]*string{pointers.Ptr("5432")},
}},
TargetTags: cloudsqlproxyInstance.Tags(),
})
// Allow IAP ingress for debug https://cloud.google.com/iap/docs/using-tcp-forwarding
_ = computefirewall.NewComputeFirewall(scope, id.TerraformID("cloudsqlproxy-firewall-iap-ingress"), &computefirewall.ComputeFirewallConfig{
Name: pointers.Stringf("%s-iap-ingress", proxyInstanceName),
Description: pointers.Ptr("Allow incoming connections from GCP IAP to the Cloud SQL Proxy VM"),
Network: config.VPC.Network.Name(),
Priority: pointers.Float64(1000),
Direction: pointers.Ptr("INGRESS"),
SourceRanges: &[]*string{
pointers.Ptr("35.235.240.0/20"),
},
Allow: []computefirewall.ComputeFirewallAllow{{
Protocol: pointers.Ptr("tcp"),
Ports: &[]*string{pointers.Ptr("22")},
}},
TargetTags: cloudsqlproxyInstance.Tags(),
})
for _, pub := range config.Publications {
id := id.Group(pub.Name)
// The Datastream Connection Profile is what the data team will click-ops
// during their creation of the actual Datastream "Stream".
// https://cloud.google.com/datastream/docs/create-a-stream
//
// This is where we stop managing things directly in MSP.
_ = datastreamconnectionprofile.NewDatastreamConnectionProfile(scope, id.TerraformID("cloudsqlproxy-connectionprofile"), &datastreamconnectionprofile.DatastreamConnectionProfileConfig{
DisplayName: pointers.Stringf("MSP Publication - %s", pub.Name),
ConnectionProfileId: pointers.Stringf("msp-publication-%s", pub.Name),
Labels: &map[string]*string{
"msp": pointers.Ptr("true"),
"database": pointers.Ptr(pub.Database),
"pg_replication_slot": pub.ReplicationSlotName,
"pg_publication": pub.PublicationName,
},
Location: config.CloudSQL.Instance.Region(),
PostgresqlProfile: &datastreamconnectionprofile.DatastreamConnectionProfilePostgresqlProfile{
Hostname: cloudsqlproxyInstance.NetworkInterface().
Get(pointers.Float64(0)).
NetworkIp(), // internal IP of the instance
Port: pointers.Float64(5432),
Database: pointers.Ptr(pub.Database),
Username: pub.User.Name(),
Password: pub.User.Password(),
},
PrivateConnectivity: &datastreamconnectionprofile.DatastreamConnectionProfilePrivateConnectivity{
PrivateConnection: datastreamConnection.Name(),
},
DependsOn: pointers.Ptr(append(config.PublicationUserGrants,
datastreamIngressFirewall)),
})
}
return &Output{}, nil
}
// newMetadataGCEContainerDeclaration recreates the metadata value that GCP
// provides when you click-ops a Compute Engine VM that runs a container. GCP
// manages the container lifecycle which is quite nice. Sadly this isn't
// available via an official Terraform API, but we can replicate that GCP does
// and hope they don't change anything.
func newMetadataGCEContainerDeclaration(containerName, cloudSQLConnectionString string) string {
// Note the docstring about how this format is not a public API - it's
// generated by GCP, and we include that as well
return fmt.Sprintf(`
spec:
restartPolicy: Always
containers:
- name: %s
image: gcr.io/cloud-sql-connectors/cloud-sql-proxy
args:
- '--auto-iam-authn'
- '--private-ip'
- '--address=0.0.0.0'
- '%s'
# This container declaration format is not public API and may change without notice. Please
# use gcloud command-line tool or Google Cloud Console to run Containers on Google Compute Engine.`,
containerName, cloudSQLConnectionString)
}

View File

@ -0,0 +1,20 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "postgresqllogicalreplication",
srcs = ["postgresqllogicalreplication.go"],
importpath = "github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/postgresqllogicalreplication",
visibility = ["//dev/managedservicesplatform:__subpackages__"],
deps = [
"//dev/managedservicesplatform/internal/resource/cloudsql",
"//dev/managedservicesplatform/internal/resourceid",
"//dev/managedservicesplatform/spec",
"//lib/pointers",
"@com_github_aws_constructs_go_constructs_v10//:constructs",
"@com_github_hashicorp_terraform_cdk_go_cdktf//:cdktf",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql//publication",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql//replicationslot",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql//role",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_random//password",
],
)

View File

@ -0,0 +1,122 @@
package postgresqllogicalreplication
import (
"github.com/aws/constructs-go/constructs/v10"
"github.com/hashicorp/terraform-cdk-go/cdktf"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql/publication"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql/replicationslot"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql/role"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/random/password"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/cloudsql"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resourceid"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/spec"
"github.com/sourcegraph/sourcegraph/lib/pointers"
)
type Config struct {
AdminPostgreSQLProvider cdktf.TerraformProvider
WorkloadUserPostgreSQLProvider cdktf.TerraformProvider
ReplicationPostgreSQLProvider cdktf.TerraformProvider
CloudSQL *cloudsql.Output
Spec spec.EnvironmentResourcePostgreSQLLogicalReplicationSpec
DependsOn []cdktf.ITerraformDependable
}
type PublicationOutput struct {
// The name of the publication in Postgres.
PublicationName *string
// The name of the replication slot in Postgres.
ReplicationSlotName *string
// User for subscribing to the publication.
User role.Role
// The original publication spec.
spec.EnvironmentResourcePostgreSQLLogicalReplicationPublicationsSpec
}
type Output struct {
Publications []PublicationOutput
}
// New applies PostgreSQL runtime configuration for PostgreSQL logical replication.
//
// When tearing down a database only (i.e. not destroying the entire environment),
// we must manually remove resources managed by this provider from state in order
// to apply the diff:
//
// terraform state list | grep postgresql_publication | xargs terraform state rm
// terraform state list | grep postgresql_replication_slot | xargs terraform state rm
//
// This is because we cannot instantiate the provider when removing the
// database, causing plans and applies to fail. We'll likely be stuck with the
// workaround for a while, which is acceptable because CloudSQL-only teardowns
// should be rare - we'll more likely be removing entire environments in general.
//
// TODO(@bobheadxi): Improve documentation around this teardown scenario.
func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output, error) {
var publicationOutputs []PublicationOutput
for _, p := range config.Spec.Publications {
id := id.Group("publications").Group(p.Name)
// Create user for Datastream:
// https://cloud.google.com/datastream/docs/configure-cloudsql-psql#cloudsqlforpostgres-create-datastream-user
logicalReplicationUser := role.NewRole(scope, id.TerraformID("user"), &role.RoleConfig{
Provider: config.AdminPostgreSQLProvider,
Name: pointers.Stringf("msp-publication-%s-user", p.Name),
Password: password.NewPassword(scope, id.TerraformID("user_password"), &password.PasswordConfig{
Length: pointers.Float64(32),
Special: pointers.Ptr(false),
}).Result(),
Login: pointers.Ptr(true),
Replication: pointers.Ptr(true),
DependsOn: &config.DependsOn,
})
// Provision publication and replication slot:
// https://cloud.google.com/datastream/docs/configure-cloudsql-psql#cloudsqlforpostgres-create-publication-and-replication-slot
publicationOutputs = append(publicationOutputs, PublicationOutput{
EnvironmentResourcePostgreSQLLogicalReplicationPublicationsSpec: p,
PublicationName: publication.NewPublication(scope,
id.TerraformID("publication"),
&publication.PublicationConfig{
// Tables are created (and therefore owned) by the application
// workload user by default, so we use the provider authenticated
// as the workload user.
Provider: config.WorkloadUserPostgreSQLProvider,
Name: pointers.Ptr(p.Name),
Database: pointers.Ptr(p.Database),
Tables: pointers.Ptr(pointers.Slice(
// Avoid infinite drift as the table name needs the
// schema, and we assume tables are created in 'public'.
mapPrefix(p.Tables, "public."),
)),
DependsOn: &config.DependsOn,
}).Name(),
ReplicationSlotName: replicationslot.NewReplicationSlot(scope,
id.TerraformID("replication_slot"),
&replicationslot.ReplicationSlotConfig{
Provider: config.ReplicationPostgreSQLProvider,
Name: pointers.Ptr(p.Name + "_pgoutput"),
Database: pointers.Ptr(p.Database),
Plugin: pointers.Ptr("pgoutput"),
DependsOn: &config.DependsOn,
}).Name(),
User: logicalReplicationUser,
})
}
return &Output{
Publications: publicationOutputs,
}, nil
}
func mapPrefix(values []string, prefix string) []string {
out := make([]string, len(values))
for i, v := range values {
out[i] = prefix + v
}
return out
}

View File

@ -8,6 +8,7 @@ go_library(
visibility = ["//dev/managedservicesplatform:__subpackages__"],
deps = [
"//dev/managedservicesplatform/internal/resource/cloudsql",
"//dev/managedservicesplatform/internal/resource/postgresqllogicalreplication",
"//dev/managedservicesplatform/internal/resourceid",
"//lib/pointers",
"@com_github_aws_constructs_go_constructs_v10//:constructs",
@ -15,6 +16,5 @@ go_library(
"@com_github_hashicorp_terraform_cdk_go_cdktf//:cdktf",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql//grant",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql//grantrole",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql//provider",
],
)

View File

@ -7,9 +7,9 @@ import (
"github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql/grant"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql/grantrole"
postgresql "github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql/provider"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/cloudsql"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/postgresqllogicalreplication"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resourceid"
"github.com/sourcegraph/sourcegraph/lib/pointers"
)
@ -18,11 +18,17 @@ type Output struct {
// WorkloadSuperuserGrant should be referenced as a dependency before
// WorkloadUser is used.
WorkloadSuperuserGrant cdktf.ITerraformDependable
// PublicationUserGrants should be referenced as a dependency before
// Publications[*].User is used.
PublicationUserGrants []cdktf.ITerraformDependable
}
type Config struct {
Databases []string
CloudSQL *cloudsql.Output
PostgreSQLProvider cdktf.TerraformProvider
Databases []string
CloudSQL *cloudsql.Output
Publications []postgresqllogicalreplication.PublicationOutput
}
// New applies PostgreSQL roles to a Cloud SQL database.
@ -40,17 +46,8 @@ type Config struct {
//
// TODO(@bobheadxi): Improve documentation around this teardown scenario.
func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output, error) {
pgProvider := postgresql.NewPostgresqlProvider(scope, id.TerraformID("postgresql_provider"), &postgresql.PostgresqlProviderConfig{
Scheme: pointers.Ptr("gcppostgres"),
Host: config.CloudSQL.Instance.ConnectionName(),
Username: config.CloudSQL.AdminUser.Name(),
Password: config.CloudSQL.AdminUser.Password(),
Port: jsii.Number(5432),
Superuser: jsii.Bool(false),
})
workloadSuperuserGrant := grantrole.NewGrantRole(scope, id.TerraformID("workload_service_account_role_cloudsqlsuperuser"), &grantrole.GrantRoleConfig{
Provider: pgProvider,
Provider: config.PostgreSQLProvider,
Role: config.CloudSQL.WorkloadUser.Name(),
GrantRole: jsii.String("cloudsqlsuperuser"),
WithAdminOption: jsii.Bool(true),
@ -61,7 +58,7 @@ func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output,
for _, db := range config.Databases {
id := id.Group(db)
_ = grant.NewGrant(scope, id.TerraformID("operator_access_service_account_connect_grant"), &grant.GrantConfig{
Provider: pgProvider,
Provider: config.PostgreSQLProvider,
Database: &db,
Role: config.CloudSQL.OperatorAccessUser.Name(),
ObjectType: pointers.Ptr("database"),
@ -71,7 +68,7 @@ func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output,
DependsOn: &config.CloudSQL.Databases,
})
_ = grant.NewGrant(scope, id.TerraformID("operator_access_service_account_table_grant"), &grant.GrantConfig{
Provider: pgProvider,
Provider: config.PostgreSQLProvider,
Database: &db,
Role: config.CloudSQL.OperatorAccessUser.Name(),
Schema: pointers.Ptr("public"),
@ -86,7 +83,46 @@ func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output,
})
}
var publicationUserGrants []cdktf.ITerraformDependable
if len(config.Publications) > 0 {
// Assign publication users permissions as required for GCP Datastream.
// https://cloud.google.com/datastream/docs/configure-cloudsql-psql#cloudsqlforpostgres-create-datastream-user
id := id.Group("publication")
for _, p := range config.Publications {
id := id.Group(p.Name)
// Grant SELECT privileges to the publication's tables
publicationUserGrants = append(publicationUserGrants,
grant.NewGrant(scope, id.TerraformID("user_table_select_grant"), &grant.GrantConfig{
Provider: config.PostgreSQLProvider,
Database: &p.Database,
Role: p.User.Name(),
Schema: pointers.Ptr("public"),
ObjectType: pointers.Ptr("table"),
Objects: pointers.Ptr(pointers.Slice(p.Tables)),
// Restricted privileges only
Privileges: pointers.Ptr(pointers.Slice([]string{
"SELECT",
})),
}))
// Grant USAGE dabatases on the public schema
publicationUserGrants = append(publicationUserGrants,
grant.NewGrant(scope, id.TerraformID("user_schema_usage_grant"), &grant.GrantConfig{
Provider: config.PostgreSQLProvider,
Database: &p.Database,
Role: p.User.Name(),
ObjectType: pointers.Ptr("schema"),
Schema: pointers.Ptr("public"),
Privileges: pointers.Ptr(pointers.Slice([]string{
"USAGE",
})),
}))
}
}
return &Output{
WorkloadSuperuserGrant: workloadSuperuserGrant,
PublicationUserGrants: publicationUserGrants,
}, nil
}

View File

@ -790,7 +790,23 @@ type EnvironmentResourcePostgreSQLSpec struct {
// - https://cloud.google.com/sql/pricing
//
// Also see: https://sourcegraph.notion.site/655e89d164b24727803f5e5a603226d8
//
// Toggling highAvailability will incur a small amount of downtime.
HighAvailability *bool `yaml:"highAvailability,omitempty"`
// LogicalReplication configures native logical replication for PostgreSQL:
// https://www.postgresql.org/docs/current/logical-replication.html
//
// Enabling logicalReplication will incur a small amount of downtime. If you
// plan to use logical replication, you should configure an empty
// 'logicalReplication' block to initialize the database instance with the
// prerequisite configuration:
//
// logicalReplication: {}
//
// The primary use case for logicalReplication is to integrate with GCP
// Datastream to make tables available in BigQuery:
// https://cloud.google.com/datastream/docs/sources-postgresql
LogicalReplication *EnvironmentResourcePostgreSQLLogicalReplicationSpec `yaml:"logicalReplication,omitempty"`
}
func (EnvironmentResourcePostgreSQLSpec) ResourceKind() string { return "PostgreSQL instance" }
@ -823,9 +839,67 @@ func (s *EnvironmentResourcePostgreSQLSpec) Validate() []error {
errs = append(errs, errors.New("postgreSQL.memoryGB must be <= 6*postgreSQL.cpu"))
}
}
if s.LogicalReplication != nil {
errs = append(errs, s.LogicalReplication.Validate()...)
}
return errs
}
type EnvironmentResourcePostgreSQLLogicalReplicationSpec struct {
// Publications configure PostgreSQL logical replication publications for
// consumption in tools like GCP Datastream.
//
// Configuriing publications also configures all required Datastream
// connection resources and configuration to set up a Datastream "Stream"
// https://cloud.google.com/datastream/docs/create-a-stream, which must be
// set up separately.
Publications []EnvironmentResourcePostgreSQLLogicalReplicationPublicationsSpec `yaml:"publications,omitempty"`
}
func (s *EnvironmentResourcePostgreSQLLogicalReplicationSpec) Validate() []error {
if s == nil {
return nil
}
var errs []error
seenPublications := map[string]struct{}{}
for i, p := range s.Publications {
if p.Name == "" {
errs = append(errs, errors.Newf("publication[%d].name is required", i))
}
if _, ok := seenPublications[p.Name]; ok {
errs = append(errs, errors.Newf("publication[%d].name must be unique", i))
}
seenPublications[p.Name] = struct{}{}
if p.Database == "" {
errs = append(errs, errors.Newf("publication[%d].database is required", i))
}
if len(p.Tables) == 0 {
errs = append(errs, errors.Newf("publication[%d].tables is required", i))
}
for ti, t := range p.Tables {
if t == "" {
errs = append(errs, errors.Newf("publication[%d].tables[%d] must not be empty", i, ti))
}
}
}
return errs
}
type EnvironmentResourcePostgreSQLLogicalReplicationPublicationsSpec struct {
// Name of the publication. Must be machine-friendly and unique. Required.
Name string `yaml:"name"`
// Database containing the tables you want to replicate and publish. Required.
Database string `yaml:"database"`
// Tables to replicate and publish. Required.
//
// Note that curerntly, referenced tables MUST exist BEFORE a publication
// is provisioned on them. Database tables should be created and owned by
// the application workload identity.
Tables []string `yaml:"tables"`
}
type EnvironmentResourceBigQueryDatasetSpec struct {
// Tables are the IDs of tables to create within the service's BigQuery
// dataset. Required.

View File

@ -13,8 +13,10 @@ go_library(
"//dev/managedservicesplatform/googlesecretsmanager",
"//dev/managedservicesplatform/internal/resource/bigquery",
"//dev/managedservicesplatform/internal/resource/cloudsql",
"//dev/managedservicesplatform/internal/resource/datastreamconnection",
"//dev/managedservicesplatform/internal/resource/deliverypipeline",
"//dev/managedservicesplatform/internal/resource/gsmsecret",
"//dev/managedservicesplatform/internal/resource/postgresqllogicalreplication",
"//dev/managedservicesplatform/internal/resource/postgresqlroles",
"//dev/managedservicesplatform/internal/resource/privatenetwork",
"//dev/managedservicesplatform/internal/resource/random",
@ -43,6 +45,9 @@ go_library(
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//storagebucket",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//storagebucketiammember",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//storagebucketobject",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql//provider",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_postgresql//role",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_random//password",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_sentry//datasentryorganization",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_sentry//datasentryteam",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_sentry//key",

View File

@ -22,6 +22,9 @@ import (
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/storagebucket"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/storagebucketiammember"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/storagebucketobject"
postgresql "github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql/provider"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql/role"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/random/password"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/sentry/datasentryorganization"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/sentry/datasentryteam"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/sentry/key"
@ -30,8 +33,10 @@ import (
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/googlesecretsmanager"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/bigquery"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/cloudsql"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/datastreamconnection"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/deliverypipeline"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/gsmsecret"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/postgresqllogicalreplication"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/postgresqlroles"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/privatenetwork"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/random"
@ -279,15 +284,121 @@ func NewStack(stacks *stack.Set, vars Variables) (crossStackOutput *CrossStackOu
// magically handles certs for us, so we don't need to mount certs in
// Cloud Run.
// Apply additional runtime configuration
// There are additional runtime configuration we need to apply directly
// in the PostgreSQL instance. To do this we use a different provider
// authenticated by the users we just created.
//
// Some of the providers are only used if certain configurations are
// enabled, but we create them all up-front to make teardown scenarios
// easier to manage.
pgRuntimeAdminProvider := postgresql.NewPostgresqlProvider(stack,
id.TerraformID("postgresql_admin_provider"),
&postgresql.PostgresqlProviderConfig{
Alias: pointers.Ptr("postgresql_admin_provider"),
Scheme: pointers.Ptr("gcppostgres"),
Host: sqlInstance.Instance.ConnectionName(),
Port: pointers.Float64(5432),
Superuser: pointers.Ptr(false),
Username: sqlInstance.AdminUser.Name(),
Password: sqlInstance.AdminUser.Password(),
})
// Some configurations require impersonating the workload identity, for
// things like database tables that are likely provisioned by the
// application.
pgRuntimeWorkloadUserProvider := postgresql.NewPostgresqlProvider(stack,
id.TerraformID("postgresql_workloaduser_provider"),
&postgresql.PostgresqlProviderConfig{
Alias: pointers.Ptr("postgresql_workloaduser_provider"),
Scheme: pointers.Ptr("gcppostgres"),
Host: sqlInstance.Instance.ConnectionName(),
Port: pointers.Float64(5432),
Superuser: pointers.Ptr(false),
// Impersonate the workload identity
Username: sqlInstance.WorkloadUser.Name(),
GcpIamImpersonateServiceAccount: &vars.IAM.CloudRunWorkloadServiceAccount.Email,
})
// The admin user's cloudsqlsuperuser does not have replication enabled,
// so we need another user that does have it enabled, because replication
// permission in roles are not inherited. We use the Postgres provider
// instead of the Cloud SQL providers in 'cloudsql.New' so that we can
// enable replication on this user.
replicationUser := role.NewRole(stack, id.TerraformID("postgresql_replicationuser"), &role.RoleConfig{
Provider: pgRuntimeAdminProvider,
Name: pointers.Ptr("msp-replicationuser"),
Password: password.NewPassword(stack, id.TerraformID("postgresql_replicationuser_password"), &password.PasswordConfig{
Length: pointers.Float64(32),
Special: pointers.Ptr(false),
}).Result(),
Login: pointers.Ptr(true),
Replication: pointers.Ptr(true),
})
pgRuntimeReplicationProvider := postgresql.NewPostgresqlProvider(stack,
id.TerraformID("postgresql_replicationuser_provider"),
&postgresql.PostgresqlProviderConfig{
Alias: pointers.Ptr("postgresql_replicationuser_provider"),
Scheme: pointers.Ptr("gcppostgres"),
Host: sqlInstance.Instance.ConnectionName(),
Port: pointers.Float64(5432),
Superuser: pointers.Ptr(false),
Username: replicationUser.Name(),
Password: replicationUser.Password(),
})
// Apply runtime configuration
var publications []postgresqllogicalreplication.PublicationOutput
if pgSpec.LogicalReplication != nil {
replication, err := postgresqllogicalreplication.New(stack,
id.Group("postgresqllogicalreplication"),
postgresqllogicalreplication.Config{
AdminPostgreSQLProvider: pgRuntimeAdminProvider,
WorkloadUserPostgreSQLProvider: pgRuntimeWorkloadUserProvider,
ReplicationPostgreSQLProvider: pgRuntimeReplicationProvider,
CloudSQL: sqlInstance,
Spec: *pgSpec.LogicalReplication,
DependsOn: []cdktf.ITerraformDependable{
// Since tables are managed by the application, in the
// future, we may need to for the application before we
// provision a publication on tables that may not yet
// exist. This is currently a circular dependency - the
// Cloud Run resource does not need logical replication
// config to start, but we cannot reference the Cloud Run
// resource the way the codebase is structured now without
// a bit of trickery or refactoring.
},
})
if err != nil {
return nil, errors.Wrap(err, "failed to render Cloud SQL PostgreSQL logical replication")
}
publications = replication.Publications // for role grants
}
pgRoles, err := postgresqlroles.New(stack, id.Group("postgresqlroles"), postgresqlroles.Config{
Databases: pgSpec.Databases,
CloudSQL: sqlInstance,
PostgreSQLProvider: pgRuntimeAdminProvider,
Databases: pgSpec.Databases,
CloudSQL: sqlInstance,
Publications: publications,
})
if err != nil {
return nil, errors.Wrap(err, "failed to render Cloud SQL PostgreSQL roles")
}
if len(publications) > 0 {
// Configure datastream connection resources for publications
_, err = datastreamconnection.New(stack, id.Group("publication_datastream"), datastreamconnection.Config{
VPC: privateNetwork(),
CloudSQL: sqlInstance,
CloudSQLClientServiceAccount: *vars.IAM.DatastreamToCloudSQLServiceAccount,
Publications: publications,
PublicationUserGrants: pgRoles.PublicationUserGrants,
})
if err != nil {
return nil, errors.Wrap(err, "failed to render datastream configuration")
}
}
// We need the workload superuser role to be granted before Cloud Run
// can correctly use the database instance
cloudRunBuilder.AddDependency(pgRoles.WorkloadSuperuserGrant)

View File

@ -25,6 +25,7 @@ go_library(
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//projectiamcustomrole",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//projectiammember",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//secretmanagersecretiammember",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//serviceaccountiammember",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google_beta//googleprojectserviceidentity",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google_beta//provider",
"@org_golang_x_exp//maps",

View File

@ -12,6 +12,7 @@ import (
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/projectiamcustomrole"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/projectiammember"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/secretmanagersecretiammember"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/serviceaccountiammember"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google_beta/googleprojectserviceidentity"
google_beta "github.com/sourcegraph/managed-services-platform-cdktf/gen/google_beta/provider"
@ -33,6 +34,9 @@ type CrossStackOutput struct {
// IsFinalStageOfRollout is true for this environment.
CloudDeployExecutionServiceAccount *serviceaccount.Output
CloudDeployReleaserServiceAccount *serviceaccount.Output
// DatastreamCloudSQLProxyServiceAccount is a service account for a proxy
// to Cloud SQL to allow Datastream to connect to Cloud SQL for replication.
DatastreamToCloudSQLServiceAccount *serviceaccount.Output
}
type Variables struct {
@ -61,6 +65,10 @@ const (
OutputOperatorServiceAccount = "operator_access_service_account"
OutputCloudDeployReleaserServiceAccountID = "cloud_deploy_releaser_service_account_id"
// tfcRobotMember is the service account used as the identity for our
// Terraform Cloud runners.
tfcRobotMember = "serviceAccount:terraform-cloud@sourcegraph-ci.iam.gserviceaccount.com"
)
func NewStack(stacks *stack.Set, vars Variables) (*CrossStackOutput, error) {
@ -134,6 +142,28 @@ func NewStack(stacks *stack.Set, vars Variables) (*CrossStackOutput, error) {
PreventDestroys: vars.PreventDestroys,
})
// Let the TFC robot impersonate the workload service account to provision
// things on its behalf if needed.
{
id := id.Group("tfc_impersonate_workload")
workloadSAID := pointers.Stringf("projects/%s/serviceAccounts/%s",
vars.ProjectID, workloadServiceAccount.Email)
_ = serviceaccountiammember.NewServiceAccountIamMember(stack,
id.TerraformID("serviceaccountuser"),
&serviceaccountiammember.ServiceAccountIamMemberConfig{
ServiceAccountId: workloadSAID,
Role: pointers.Ptr("roles/iam.serviceAccountUser"),
Member: pointers.Ptr(tfcRobotMember),
})
_ = serviceaccountiammember.NewServiceAccountIamMember(stack,
id.TerraformID("serviceaccounttokencreator"),
&serviceaccountiammember.ServiceAccountIamMemberConfig{
ServiceAccountId: workloadSAID,
Role: pointers.Ptr("roles/iam.serviceAccountTokenCreator"),
Member: pointers.Ptr(tfcRobotMember),
})
}
// Create a service account for operators to impersonate to access other
// provisioned MSP resources. We use a randomized ID for more predictable
// ID lengths and to indicate this is only used by human operators for MSP
@ -260,6 +290,19 @@ func NewStack(stacks *stack.Set, vars Variables) (*CrossStackOutput, error) {
"Service Account ID for Cloud Deploy release creation - intended for workload identity federation in CI")
}
datastreamToCloudSQLServiceAccount := serviceaccount.New(stack,
id.Group("datastream-to-cloudsql"),
serviceaccount.Config{
ProjectID: vars.ProjectID,
AccountID: "datastream-to-cloudsql",
DisplayName: fmt.Sprintf("%s Datastream-to-Cloud-SQL service account", vars.Service.GetName()),
Roles: []serviceaccount.Role{{
ID: resourceid.New("role_cloudsql_client"),
Role: "roles/cloudsql.client",
}},
},
)
// Collect outputs
locals.Add(OutputCloudRunServiceAccount, workloadServiceAccount.Email,
"Service Account email used as Cloud Run resource workload identity")
@ -271,6 +314,7 @@ func NewStack(stacks *stack.Set, vars Variables) (*CrossStackOutput, error) {
OperatorAccessServiceAccount: operatorAccessServiceAccount,
CloudDeployExecutionServiceAccount: cloudDeployExecutorServiceAccount,
CloudDeployReleaserServiceAccount: cloudDeployReleaserServiceAccount,
DatastreamToCloudSQLServiceAccount: datastreamToCloudSQLServiceAccount,
}, nil
}

View File

@ -40,6 +40,7 @@ var gcpServices = []string{
"bigqueryconnection.googleapis.com",
"accesscontextmanager.googleapis.com", // for VPC SC perimeter management
"pubsub.googleapis.com", // for Cloud Deploy notifications
"datastream.googleapis.com", // for Datastream
}
const (

2
go.mod
View File

@ -303,7 +303,7 @@ require (
github.com/sourcegraph/managed-services-platform-cdktf/gen/google_beta v0.0.0-20240513203650-e2b1273f1c1a
github.com/sourcegraph/managed-services-platform-cdktf/gen/nobl9 v0.0.0-20240513203650-e2b1273f1c1a
github.com/sourcegraph/managed-services-platform-cdktf/gen/opsgenie v0.0.0-20240513203650-e2b1273f1c1a
github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql v0.0.0-20240513203650-e2b1273f1c1a
github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql v0.0.0-20240617210115-f286e77e83e8
github.com/sourcegraph/managed-services-platform-cdktf/gen/random v0.0.0-20240513203650-e2b1273f1c1a
github.com/sourcegraph/managed-services-platform-cdktf/gen/sentry v0.0.0-20240513203650-e2b1273f1c1a
github.com/sourcegraph/managed-services-platform-cdktf/gen/slack v0.0.0-20240513203650-e2b1273f1c1a

4
go.sum
View File

@ -1654,8 +1654,8 @@ github.com/sourcegraph/managed-services-platform-cdktf/gen/nobl9 v0.0.0-20240513
github.com/sourcegraph/managed-services-platform-cdktf/gen/nobl9 v0.0.0-20240513203650-e2b1273f1c1a/go.mod h1:1HTn7/xUQWdIjhr9ROmAVAwgG2mdJB6aJjKyi5aCDCg=
github.com/sourcegraph/managed-services-platform-cdktf/gen/opsgenie v0.0.0-20240513203650-e2b1273f1c1a h1:a5CffqYMpDTeVZnz3a6ovY4AvKSuzhvjVJihE24BcKU=
github.com/sourcegraph/managed-services-platform-cdktf/gen/opsgenie v0.0.0-20240513203650-e2b1273f1c1a/go.mod h1:hPo1VnFnbG4CpmG0+jU/mt2afL0oQ48uJep/zm2jDQw=
github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql v0.0.0-20240513203650-e2b1273f1c1a h1:Jy9vKM1mtyJYgx/DRSDftIuhL2MFO6esU84uj4deNn4=
github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql v0.0.0-20240513203650-e2b1273f1c1a/go.mod h1:lQ1E8rSHgTmL8GmtcQFXS75rqQrCmuQRXZWh7A+Fp6s=
github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql v0.0.0-20240617210115-f286e77e83e8 h1:t0hSCAvffnF3VAlSW3M9eeeubpMF6TieVc52vKLT98o=
github.com/sourcegraph/managed-services-platform-cdktf/gen/postgresql v0.0.0-20240617210115-f286e77e83e8/go.mod h1:p6IV7VaCthZ4wlVAEDewHkM+2iwiew4jbaiXQyu6vWg=
github.com/sourcegraph/managed-services-platform-cdktf/gen/random v0.0.0-20240513203650-e2b1273f1c1a h1:Te08CFBDZrBcmICwwOvV5t3GW5GtRDaUSJEuyY0YDNw=
github.com/sourcegraph/managed-services-platform-cdktf/gen/random v0.0.0-20240513203650-e2b1273f1c1a/go.mod h1:TiUqRvYs/Gah8bGw/toyVWCaP3dnCB4tBh3jf5HGdo0=
github.com/sourcegraph/managed-services-platform-cdktf/gen/sentry v0.0.0-20240513203650-e2b1273f1c1a h1:lws+6zhwD9KFG5c0mt0Xnhqw6uV/3LNv9ymQku0FbO0=