msp/cloudrun: use VPC direct egress (#60466)

Adopts [Cloud Run VPC direct egress](https://cloud.google.com/run/docs/configuring/vpc-direct-vpc) for private networks. Private networks are used by MSP services that connect to Cloud SQL, Memorystore (Redis), or other MSP services via VPC-SC perimeters. On paper, this should give us:

- Likely smaller bill, as we no longer pay for serverless VPC connector VMs
- Reduced latency on traffic through private network
- Reduced latency on traffic spikes as serverless VPC connector no longer needs to scale out

There are some caveats we are discussing in Slack: https://sourcegraph.slack.com/archives/C05E2LHPQLX/p1713324250520539

Closes https://github.com/sourcegraph/managed-services/issues/317

## Test plan

Rolled this out without downtime to `msp-testbed: robert`. The VPC private networking test used in https://github.com/sourcegraph/managed-services/pull/1024 still works.
This commit is contained in:
Robert Lin 2024-04-17 10:13:50 -07:00 committed by GitHub
parent ffaac9e591
commit 2189b2991f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 59 additions and 36 deletions

View File

@ -6,12 +6,14 @@ go_library(
importpath = "github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/privatenetwork",
visibility = ["//dev/managedservicesplatform:__subpackages__"],
deps = [
"//dev/managedservicesplatform/internal/resource/random",
"//dev/managedservicesplatform/internal/resourceid",
"//lib/pointers",
"@com_github_aws_constructs_go_constructs_v10//:constructs",
"@com_github_hashicorp_terraform_cdk_go_cdktf//:cdktf",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//computeglobaladdress",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//computenetwork",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//computesubnetwork",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//servicenetworkingconnection",
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//vpcaccessconnector",
],
)

View File

@ -4,12 +4,14 @@ import (
"fmt"
"github.com/aws/constructs-go/constructs/v10"
"github.com/hashicorp/terraform-cdk-go/cdktf"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/computeglobaladdress"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/computenetwork"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/computesubnetwork"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/servicenetworkingconnection"
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/vpcaccessconnector"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/random"
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resourceid"
"github.com/sourcegraph/sourcegraph/lib/pointers"
)
@ -22,18 +24,17 @@ type Config struct {
type Output struct {
// Network is the private network for GCP resources that the Cloud Run
// workload needs to access.
Network computenetwork.ComputeNetwork
Network computenetwork.ComputeNetwork
Subnetwork computesubnetwork.ComputeSubnetwork
// ServiceNetworkingConnection is required for Cloud SQL access, and is
// provisioned by default.
ServiceNetworkingConnection servicenetworkingconnection.ServiceNetworkingConnection
// Connector is used by Cloud Run to connect to the private network.
Connector vpcaccessconnector.VpcAccessConnector
}
// New sets up a network for the Cloud Run service to interface with other GCP
// services. This should only be created once, hence why it does not have accept
// a resourceid.ID
func New(scope constructs.Construct, config Config) *Output {
func New(scope constructs.Construct, id resourceid.ID, config Config) *Output {
network := computenetwork.NewComputeNetwork(
scope,
pointers.Ptr("cloudrun-network"),
@ -44,22 +45,34 @@ func New(scope constructs.Construct, config Config) *Output {
AutoCreateSubnetworks: false,
})
// Cloud Run supports a small set of IPv4 ranges for the subnet:
// https://cloud.google.com/run/docs/configuring/vpc-direct-vpc#supported-ip-ranges
// We choose one with a generous IP range to avoid limitations documented
// here: https://cloud.google.com/run/docs/configuring/vpc-direct-vpc#limitations
subnetworkIPCIDRRange := "172.16.0.0/12"
subnetworkName := random.New(scope, id.Group("subnetwork-name"), random.Config{
Prefix: config.ServiceID,
ByteLength: 4,
Keepers: map[string]*string{
// Range change requires recreation of the subnetwork, so we need
// to change the randomized suffix to avoid a conflict and respect
// CreateBeforeDestroy
"ipcidrrange": pointers.Ptr(subnetworkIPCIDRRange),
},
})
subnetwork := computesubnetwork.NewComputeSubnetwork(
scope,
pointers.Ptr("cloudrun-subnetwork"),
&computesubnetwork.ComputeSubnetworkConfig{
Project: &config.ProjectID,
Region: &config.Region,
Name: &config.ServiceID,
Network: network.Id(),
Project: &config.ProjectID,
Region: &config.Region,
Name: &subnetworkName.HexValue,
Network: network.Id(),
IpCidrRange: pointers.Ptr(subnetworkIPCIDRRange),
// Allow usage of private Google access: https://cloud.google.com/vpc/docs/private-google-access
PrivateIpGoogleAccess: true,
// This is similar to the setup in Cloud v1.1 for connecting to Cloud SQL - we
// set up an arbitrary ip_cidr_range that covers enough IPs for most needs.
// We must use a /28 range because that's the range supported by VPC connectors.
IpCidrRange: pointers.Ptr("10.0.0.0/28"),
//checkov:skip=CKV_GCP_76: Enable dual-stack support for subnetworks is destrutive and require re-creating the subnet and all dependent resources (e.g. NEG)
PrivateIpv6GoogleAccess: pointers.Ptr("DISABLE_GOOGLE_ACCESS"),
// Checkov requirement: https://docs.bridgecrew.io/docs/bc_gcp_logging_1
@ -68,6 +81,11 @@ func New(scope constructs.Construct, config Config) *Output {
FlowSampling: pointers.Float64(0.5),
Metadata: pointers.Ptr("INCLUDE_ALL_METADATA"),
},
Lifecycle: &cdktf.TerraformResourceLifecycle{
// Recreation also requires rename of randomized subnetworkName.
CreateBeforeDestroy: pointers.Ptr(true),
},
},
)
@ -93,25 +111,9 @@ func New(scope constructs.Construct, config Config) *Output {
ReservedPeeringRanges: &[]*string{serviceNetworkingConnectionIP.Name()},
})
// Cloud Run services can't connect directly to networks, and seem to require a
// VPC connector, so we provision one to allow Cloud Run services to talk to
// other GCP services (like Redis)
connector := vpcaccessconnector.NewVpcAccessConnector(
scope,
pointers.Ptr("cloudrun-connector"),
&vpcaccessconnector.VpcAccessConnectorConfig{
Project: &config.ProjectID,
Region: &config.Region,
Name: pointers.Ptr(config.ServiceID),
Subnet: &vpcaccessconnector.VpcAccessConnectorSubnet{
Name: subnetwork.Name(),
},
},
)
return &Output{
Network: network,
Subnetwork: subnetwork,
ServiceNetworkingConnection: serviceNetworkingConnection,
Connector: connector,
}
}

View File

@ -177,7 +177,7 @@ func NewStack(stacks *stack.Set, vars Variables) (crossStackOutput *CrossStackOu
// once. If called, it always returns a non-nil value.
privateNetwork := sync.OnceValue(func() *privatenetwork.Output {
privateNetworkEnabled = true
return privatenetwork.New(stack, privatenetwork.Config{
return privatenetwork.New(stack, resourceid.New("privatenetwork"), privatenetwork.Config{
ProjectID: vars.ProjectID,
ServiceID: vars.Service.ID,
Region: GCPRegion,

View File

@ -81,12 +81,20 @@ func (b *jobBuilder) AddDependency(dep cdktf.ITerraformDependable) {
func (b *jobBuilder) Build(stack cdktf.TerraformStack, vars builder.Variables) (builder.Resource, error) {
var vpcAccess *cloudrunv2job.CloudRunV2JobTemplateTemplateVpcAccess
var launchStage *string
if vars.PrivateNetwork != nil {
// https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/cloud_run_v2_service#example-usage---cloudrunv2-service-directvpc
// https://cloud.google.com/run/docs/configuring/vpc-direct-vpc
launchStage = pointers.Ptr("BETA") // Direct VPC is still in beta.
vpcAccess = &cloudrunv2job.CloudRunV2JobTemplateTemplateVpcAccess{
Connector: vars.PrivateNetwork.Connector.SelfLink(),
Egress: pointers.Ptr("PRIVATE_RANGES_ONLY"),
NetworkInterfaces: &[]*cloudrunv2job.CloudRunV2JobTemplateTemplateVpcAccessNetworkInterfaces{{
Network: vars.PrivateNetwork.Network.Id(),
Subnetwork: vars.PrivateNetwork.Subnetwork.Id(),
}},
Egress: pointers.Ptr("PRIVATE_RANGES_ONLY"),
}
}
name, err := vars.Name()
if err != nil {
return nil, err
@ -97,6 +105,8 @@ func (b *jobBuilder) Build(stack cdktf.TerraformStack, vars builder.Variables) (
Location: pointers.Ptr(vars.GCPRegion),
DependsOn: &b.dependencies,
LaunchStage: launchStage,
Template: &cloudrunv2job.CloudRunV2JobTemplate{
TaskCount: pointers.Ptr(float64(1)),

View File

@ -86,10 +86,17 @@ func (b *serviceBuilder) AddDependency(dep cdktf.ITerraformDependable) {
func (b *serviceBuilder) Build(stack cdktf.TerraformStack, vars builder.Variables) (builder.Resource, error) {
var vpcAccess *cloudrunv2service.CloudRunV2ServiceTemplateVpcAccess
var launchStage *string
if vars.PrivateNetwork != nil {
// https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/cloud_run_v2_service#example-usage---cloudrunv2-service-directvpc
// https://cloud.google.com/run/docs/configuring/vpc-direct-vpc
launchStage = pointers.Ptr("BETA") // Direct VPC is still in beta.
vpcAccess = &cloudrunv2service.CloudRunV2ServiceTemplateVpcAccess{
Connector: vars.PrivateNetwork.Connector.SelfLink(),
Egress: pointers.Ptr("PRIVATE_RANGES_ONLY"),
NetworkInterfaces: &[]*cloudrunv2service.CloudRunV2ServiceTemplateVpcAccessNetworkInterfaces{{
Network: vars.PrivateNetwork.Network.Id(),
Subnetwork: vars.PrivateNetwork.Subnetwork.Id(),
}},
Egress: pointers.Ptr("PRIVATE_RANGES_ONLY"),
}
}
@ -133,6 +140,8 @@ func (b *serviceBuilder) Build(stack cdktf.TerraformStack, vars builder.Variable
DependsOn: &b.dependencies,
Lifecycle: lifecycle,
LaunchStage: launchStage,
// Disallows direct traffic from public internet, we have a LB set up for that.
Ingress: pointers.Ptr("INGRESS_TRAFFIC_INTERNAL_LOAD_BALANCER"),