mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 14:11:44 +00:00
msp: add monitoring stack (#58816)
Adds GCP Monitoring Alert Policies for Cloud Run Services, Cloud Run Jobs and, if enabled, Cloud Redis
This commit is contained in:
parent
b344c534e9
commit
8c9e114549
@ -12,6 +12,7 @@ go_library(
|
||||
"//dev/managedservicesplatform/internal/stack",
|
||||
"//dev/managedservicesplatform/internal/stack/cloudrun",
|
||||
"//dev/managedservicesplatform/internal/stack/iam",
|
||||
"//dev/managedservicesplatform/internal/stack/monitoring",
|
||||
"//dev/managedservicesplatform/internal/stack/options/terraformversion",
|
||||
"//dev/managedservicesplatform/internal/stack/options/tfcbackend",
|
||||
"//dev/managedservicesplatform/internal/stack/project",
|
||||
|
||||
@ -0,0 +1,26 @@
|
||||
load("//dev:go_defs.bzl", "go_test")
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "monitoringalertpolicy",
|
||||
srcs = ["monitoringalertpolicy.go"],
|
||||
importpath = "github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/monitoringalertpolicy",
|
||||
visibility = ["//dev/managedservicesplatform:__subpackages__"],
|
||||
deps = [
|
||||
"//dev/managedservicesplatform/internal/resourceid",
|
||||
"//lib/errors",
|
||||
"//lib/pointers",
|
||||
"@com_github_aws_constructs_go_constructs_v10//:constructs",
|
||||
"@com_github_sourcegraph_managed_services_platform_cdktf_gen_google//monitoringalertpolicy",
|
||||
],
|
||||
)
|
||||
|
||||
go_test(
|
||||
name = "monitoringalertpolicy_test",
|
||||
srcs = ["monitoringalertpolicy_test.go"],
|
||||
embed = [":monitoringalertpolicy"],
|
||||
deps = [
|
||||
"//lib/pointers",
|
||||
"@com_github_hexops_autogold_v2//:autogold",
|
||||
],
|
||||
)
|
||||
@ -0,0 +1,314 @@
|
||||
package monitoringalertpolicy
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
|
||||
"github.com/aws/constructs-go/constructs/v10"
|
||||
"github.com/sourcegraph/managed-services-platform-cdktf/gen/google/monitoringalertpolicy"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resourceid"
|
||||
"github.com/sourcegraph/sourcegraph/lib/pointers"
|
||||
)
|
||||
|
||||
type Aligner string
|
||||
|
||||
const (
|
||||
MonitoringAlignNone Aligner = "ALIGN_NONE"
|
||||
MonitoringAlignDelta Aligner = "ALIGN_DELTA"
|
||||
MonitoringAlignRate Aligner = "ALIGN_RATE"
|
||||
MonitoringAlignInterpolate Aligner = "ALIGN_INTERPOLATE"
|
||||
MonitoringAlignNextOrder Aligner = "ALIGN_NEXT_ORDER"
|
||||
MonitoringAlignMin Aligner = "ALIGN_MIN"
|
||||
MonitoringAlignMax Aligner = "ALIGN_MAX"
|
||||
MonitoringAlignMean Aligner = "ALIGN_MEAN"
|
||||
MonitoringAlignCount Aligner = "ALIGN_COUNT"
|
||||
MonitoringAlignSum Aligner = "ALIGN_SUM"
|
||||
MonitoringAlignStddev Aligner = "ALIGN_STDDEV"
|
||||
MonitoringAlignCountTrue Aligner = "ALIGN_COUNT_TRUE"
|
||||
MonitoringAlignCountFalse Aligner = "ALIGN_COUNT_FALSE"
|
||||
MonitoringAlignFractionTrue Aligner = "ALIGN_FRACTION_TRUE"
|
||||
MonitoringAlignPercentile99 Aligner = "ALIGN_PERCENTILE_99"
|
||||
MonitoringAlignPercentile95 Aligner = "ALIGN_PERCENTILE_95"
|
||||
MonitoringAlignPercentile50 Aligner = "ALIGN_PERCENTILE_50"
|
||||
MonitoringAlignPercentile05 Aligner = "ALIGN_PERCENTILE_05"
|
||||
MonitoringAlignPercentChange Aligner = "ALIGN_PERCENT_CHANGE"
|
||||
)
|
||||
|
||||
type Reducer string
|
||||
|
||||
const (
|
||||
MonitoringReduceNone Reducer = "REDUCE_NONE"
|
||||
MonitoringReduceMean Reducer = "REDUCE_MEAN"
|
||||
MonitoringReduceMin Reducer = "REDUCE_MIN"
|
||||
MonitoringReduceMax Reducer = "REDUCE_MAX"
|
||||
MonitoringReduceSum Reducer = "REDUCE_SUM"
|
||||
MonitoringReduceStddev Reducer = "REDUCE_STDDEV"
|
||||
MonitoringReduceCount Reducer = "REDUCE_COUNT"
|
||||
MonitoringReduceCountTrue Reducer = "REDUCE_COUNT_TRUE"
|
||||
MonitoringReduceCountFalse Reducer = "REDUCE_COUNT_FALSE"
|
||||
MonitoringReduceFractionTrue Reducer = "REDUCE_FRACTION_TRUE"
|
||||
MonitoringReducePercentile99 Reducer = "REDUCE_PERCENTILE_99"
|
||||
MonitoringReducePercentile95 Reducer = "REDUCE_PERCENTILE_95"
|
||||
MonitoringReducePercentile50 Reducer = "REDUCE_PERCENTILE_50"
|
||||
MonitoringReducePercentile05 Reducer = "REDUCE_PERCENTILE_05"
|
||||
)
|
||||
|
||||
type Comparison string
|
||||
|
||||
const (
|
||||
ComparisonGT Comparison = "COMPARISON_GT"
|
||||
ComparisonLT Comparison = "COMPARISON_LT"
|
||||
)
|
||||
|
||||
// ThresholdAggregation for alerting when a metric exceeds a defined threshold
|
||||
//
|
||||
// Must specify a `metric.type` filter. Additional filters are optional.
|
||||
// All filters are joined with ` AND `
|
||||
//
|
||||
// GroupByFields is an optional field specifying time series labels to aggregate:
|
||||
// - For services it defaults to `["resource.label.revision_name"]`; additional fields are appended
|
||||
// - For jobs there is no default
|
||||
type ThresholdAggregation struct {
|
||||
Filters map[string]string
|
||||
GroupByFields []string
|
||||
Comparison Comparison
|
||||
Aligner Aligner
|
||||
Reducer Reducer
|
||||
Period string
|
||||
Threshold float64
|
||||
Duration string
|
||||
}
|
||||
|
||||
// ResponseCodeMetric for alerting when the number of a certain response code exceeds a threshold
|
||||
//
|
||||
// Must specify either `Code` (e.g. 404) or `CodeClass` (e.g. 4xx)
|
||||
//
|
||||
// `ExcludeCodes` allows filtering out specific response codes from the `CodeClass`
|
||||
type ResponseCodeMetric struct {
|
||||
Code *int
|
||||
CodeClass *string
|
||||
ExcludeCodes []string
|
||||
Ratio float64
|
||||
Duration *string
|
||||
}
|
||||
|
||||
type CloudService int
|
||||
|
||||
const (
|
||||
CloudRunService CloudService = iota
|
||||
CloudRunJob
|
||||
CloudRedis
|
||||
)
|
||||
|
||||
// Config for a Monitoring Alert Policy
|
||||
// Must define either `ThresholdAggregation` or `ResponseCodeMetric`
|
||||
type Config struct {
|
||||
// A unique identifier
|
||||
ID string
|
||||
Name string
|
||||
Description *string
|
||||
ProjectID string
|
||||
// Name of the service/job/redis to filter the alert on
|
||||
ServiceName string
|
||||
// Type of the service/job/redis
|
||||
ServiceKind CloudService
|
||||
|
||||
ThresholdAggregation *ThresholdAggregation
|
||||
ResponseCodeMetric *ResponseCodeMetric
|
||||
}
|
||||
|
||||
type Output struct {
|
||||
}
|
||||
|
||||
func New(scope constructs.Construct, id resourceid.ID, config *Config) (*Output, error) {
|
||||
if config.ThresholdAggregation == nil && config.ResponseCodeMetric == nil {
|
||||
return nil, errors.New("Must provide either SingleMetric or ResponseCodeMetric config")
|
||||
}
|
||||
|
||||
if config.ThresholdAggregation != nil && config.ResponseCodeMetric != nil {
|
||||
return nil, errors.New("Must provide either SingleMetric or ResponseCodeMetric config, not both")
|
||||
}
|
||||
|
||||
if config.ThresholdAggregation != nil {
|
||||
if len(config.ThresholdAggregation.Filters) == 0 {
|
||||
return nil, errors.New("must specify at least one filter for threshold aggregation")
|
||||
}
|
||||
|
||||
if _, ok := config.ThresholdAggregation.Filters["metric.type"]; !ok {
|
||||
return nil, errors.New("must specify filter for `metric.type`")
|
||||
}
|
||||
return thresholdAggregation(scope, id, config)
|
||||
}
|
||||
return responseCodeMetric(scope, id, config)
|
||||
}
|
||||
|
||||
// threshholdAggregation defines a monitoring alert policy based on a single metric threshold
|
||||
func thresholdAggregation(scope constructs.Construct, id resourceid.ID, config *Config) (*Output, error) {
|
||||
// Set some defaults
|
||||
switch config.ServiceKind {
|
||||
case CloudRunService:
|
||||
config.ThresholdAggregation.GroupByFields = append([]string{"resource.label.revision_name"}, config.ThresholdAggregation.GroupByFields...)
|
||||
case CloudRunJob:
|
||||
// No defaults
|
||||
case CloudRedis:
|
||||
// No defaults
|
||||
default:
|
||||
return nil, errors.Newf("invalid service kind %q", config.ServiceKind)
|
||||
}
|
||||
|
||||
if config.ThresholdAggregation.Comparison == "" {
|
||||
config.ThresholdAggregation.Comparison = ComparisonGT
|
||||
}
|
||||
|
||||
if config.ThresholdAggregation.Duration == "" {
|
||||
config.ThresholdAggregation.Duration = "0s"
|
||||
}
|
||||
|
||||
_ = monitoringalertpolicy.NewMonitoringAlertPolicy(scope,
|
||||
id.TerraformID(config.ID), &monitoringalertpolicy.MonitoringAlertPolicyConfig{
|
||||
Project: pointers.Ptr(config.ProjectID),
|
||||
DisplayName: pointers.Ptr(config.Name),
|
||||
Documentation: &monitoringalertpolicy.MonitoringAlertPolicyDocumentation{
|
||||
Content: config.Description,
|
||||
MimeType: pointers.Ptr("text/markdown"),
|
||||
},
|
||||
Combiner: pointers.Ptr("OR"),
|
||||
Conditions: []monitoringalertpolicy.MonitoringAlertPolicyConditions{
|
||||
{
|
||||
DisplayName: pointers.Ptr(config.Name),
|
||||
ConditionThreshold: &monitoringalertpolicy.MonitoringAlertPolicyConditionsConditionThreshold{
|
||||
Aggregations: []monitoringalertpolicy.MonitoringAlertPolicyConditionsConditionThresholdAggregations{
|
||||
{
|
||||
AlignmentPeriod: pointers.Ptr(config.ThresholdAggregation.Period),
|
||||
PerSeriesAligner: pointers.Ptr(string(config.ThresholdAggregation.Aligner)),
|
||||
CrossSeriesReducer: pointers.Ptr(string(config.ThresholdAggregation.Reducer)),
|
||||
GroupByFields: pointers.Ptr(pointers.Slice(config.ThresholdAggregation.GroupByFields)),
|
||||
},
|
||||
},
|
||||
Comparison: pointers.Ptr(string(config.ThresholdAggregation.Comparison)),
|
||||
Duration: pointers.Ptr(config.ThresholdAggregation.Duration),
|
||||
Filter: pointers.Ptr(buildFilter(config)),
|
||||
ThresholdValue: pointers.Float64(config.ThresholdAggregation.Threshold),
|
||||
Trigger: &monitoringalertpolicy.MonitoringAlertPolicyConditionsConditionThresholdTrigger{
|
||||
Count: pointers.Float64(1),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
AlertStrategy: &monitoringalertpolicy.MonitoringAlertPolicyAlertStrategy{
|
||||
AutoClose: pointers.Ptr("604800s"),
|
||||
},
|
||||
})
|
||||
return &Output{}, nil
|
||||
}
|
||||
|
||||
// buildFilter creates the Filter string for a ThresholdAggregation monitoring alert policy
|
||||
func buildFilter(config *Config) string {
|
||||
filters := make([]string, 0)
|
||||
for key, val := range config.ThresholdAggregation.Filters {
|
||||
filters = append(filters, fmt.Sprintf(`%s = "%s"`, key, val))
|
||||
}
|
||||
|
||||
// Sort to ensure stable output for testing, because
|
||||
// config.ThresholdAggregation.Filters is a map.
|
||||
sort.Strings(filters)
|
||||
|
||||
switch config.ServiceKind {
|
||||
case CloudRunService:
|
||||
filters = append(filters,
|
||||
`resource.type = "cloud_run_revision"`,
|
||||
fmt.Sprintf(`resource.labels.service_name = "%s"`, config.ServiceName),
|
||||
)
|
||||
case CloudRunJob:
|
||||
filters = append(filters,
|
||||
`resource.type = "cloud_run_job"`,
|
||||
fmt.Sprintf(`resource.labels.job_name = "%s"`, config.ServiceName),
|
||||
)
|
||||
case CloudRedis:
|
||||
filters = append(filters,
|
||||
`resource.type = "redis_instance"`,
|
||||
fmt.Sprintf(`resource.labels.redis_instance_id = "%s"`, config.ServiceName),
|
||||
)
|
||||
}
|
||||
|
||||
return strings.Join(filters, " AND ")
|
||||
}
|
||||
|
||||
// responseCodeMetric defines the MonitoringAlertPolicy for response code metrics
|
||||
// Supports a single Code e.g. 404 or an entire Code Class e.g. 4xx
|
||||
// Optionally when using a Code Class, codes to exclude can be defined
|
||||
func responseCodeMetric(scope constructs.Construct, id resourceid.ID, config *Config) (*Output, error) {
|
||||
query := responseCodeBuilder(config)
|
||||
|
||||
if config.ResponseCodeMetric.Duration == nil {
|
||||
config.ResponseCodeMetric.Duration = pointers.Ptr("60s")
|
||||
}
|
||||
|
||||
_ = monitoringalertpolicy.NewMonitoringAlertPolicy(scope,
|
||||
id.TerraformID(config.ID), &monitoringalertpolicy.MonitoringAlertPolicyConfig{
|
||||
Project: pointers.Ptr(config.ProjectID),
|
||||
DisplayName: pointers.Ptr(fmt.Sprintf("High Ratio of %s Responses", config.Name)),
|
||||
Documentation: &monitoringalertpolicy.MonitoringAlertPolicyDocumentation{
|
||||
Content: config.Description,
|
||||
MimeType: pointers.Ptr("text/markdown"),
|
||||
},
|
||||
Combiner: pointers.Ptr("OR"),
|
||||
Conditions: []monitoringalertpolicy.MonitoringAlertPolicyConditions{
|
||||
{
|
||||
DisplayName: pointers.Ptr(fmt.Sprintf("High Ratio of %s Responses", config.Name)),
|
||||
ConditionMonitoringQueryLanguage: &monitoringalertpolicy.MonitoringAlertPolicyConditionsConditionMonitoringQueryLanguage{
|
||||
Query: pointers.Ptr(query),
|
||||
Duration: config.ResponseCodeMetric.Duration,
|
||||
Trigger: &monitoringalertpolicy.MonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTrigger{
|
||||
Count: pointers.Float64(1),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
AlertStrategy: &monitoringalertpolicy.MonitoringAlertPolicyAlertStrategy{
|
||||
AutoClose: pointers.Ptr("604800s"),
|
||||
},
|
||||
})
|
||||
return &Output{}, nil
|
||||
}
|
||||
|
||||
// responseCodeBuilder builds the MQL for a response code metric alert
|
||||
func responseCodeBuilder(config *Config) string {
|
||||
var builder strings.Builder
|
||||
|
||||
builder.WriteString(`fetch cloud_run_revision
|
||||
| metric 'run.googleapis.com/request_count'
|
||||
| group_by 15s, [value_request_count_aggregate: aggregate(value.request_count)]
|
||||
| every 15s
|
||||
| {
|
||||
`)
|
||||
if config.ResponseCodeMetric.CodeClass != nil {
|
||||
builder.WriteString(" group_by [metric.response_code, metric.response_code_class],\n")
|
||||
} else {
|
||||
builder.WriteString(" group_by [metric.response_code],\n")
|
||||
}
|
||||
builder.WriteString(" [response_code_count_aggregate: aggregate(value_request_count_aggregate)]\n")
|
||||
if config.ResponseCodeMetric.Code != nil {
|
||||
builder.WriteString(fmt.Sprintf(" | filter (metric.response_code = '%d')\n", *config.ResponseCodeMetric.Code))
|
||||
} else {
|
||||
builder.WriteString(fmt.Sprintf(" | filter (metric.response_code_class = '%s')\n", *config.ResponseCodeMetric.CodeClass))
|
||||
}
|
||||
if config.ResponseCodeMetric.ExcludeCodes != nil && len(config.ResponseCodeMetric.ExcludeCodes) > 0 {
|
||||
for _, code := range config.ResponseCodeMetric.ExcludeCodes {
|
||||
builder.WriteString(fmt.Sprintf(" | filter (metric.response_code != '%s')\n", code))
|
||||
}
|
||||
}
|
||||
builder.WriteString(`; group_by [],
|
||||
[value_request_count_aggregate_aggregate: aggregate(value_request_count_aggregate)]
|
||||
}
|
||||
| join
|
||||
| value [response_code_ratio: val(0) / val(1)]
|
||||
`)
|
||||
builder.WriteString(fmt.Sprintf("| condition gt(val(), %s)\n", strconv.FormatFloat(config.ResponseCodeMetric.Ratio, 'f', -1, 64)))
|
||||
return builder.String()
|
||||
}
|
||||
@ -0,0 +1,136 @@
|
||||
package monitoringalertpolicy
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/hexops/autogold/v2"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/lib/pointers"
|
||||
)
|
||||
|
||||
func TestBuildFilter(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
config Config
|
||||
want autogold.Value
|
||||
}{
|
||||
{
|
||||
name: "Service Metric",
|
||||
config: Config{
|
||||
ServiceName: "my-service-name",
|
||||
ServiceKind: CloudRunService,
|
||||
ThresholdAggregation: &ThresholdAggregation{
|
||||
Filters: map[string]string{
|
||||
"metric.type": "run.googleapis.com/container/startup_latencies",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: autogold.Expect(`metric.type = "run.googleapis.com/container/startup_latencies" AND resource.type = "cloud_run_revision" AND resource.labels.service_name = "my-service-name"`),
|
||||
},
|
||||
{
|
||||
name: "Job Metric",
|
||||
config: Config{
|
||||
ServiceName: "my-job-name",
|
||||
ServiceKind: CloudRunJob,
|
||||
ThresholdAggregation: &ThresholdAggregation{
|
||||
Filters: map[string]string{
|
||||
"metric.type": "run.googleapis.com/job/completed_task_attempt_count",
|
||||
"metric.labels.result": "failed",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: autogold.Expect(`metric.labels.result = "failed" AND metric.type = "run.googleapis.com/job/completed_task_attempt_count" AND resource.type = "cloud_run_job" AND resource.labels.job_name = "my-job-name"`),
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := buildFilter(&tc.config)
|
||||
tc.want.Equal(t, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponseCodeBuilder(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
ResponseCodeMetric
|
||||
want autogold.Value
|
||||
}{
|
||||
{
|
||||
name: "Single Response Code",
|
||||
ResponseCodeMetric: ResponseCodeMetric{
|
||||
Code: pointers.Ptr(404),
|
||||
Ratio: 0.1,
|
||||
},
|
||||
want: autogold.Expect(`fetch cloud_run_revision
|
||||
| metric 'run.googleapis.com/request_count'
|
||||
| group_by 15s, [value_request_count_aggregate: aggregate(value.request_count)]
|
||||
| every 15s
|
||||
| {
|
||||
group_by [metric.response_code],
|
||||
[response_code_count_aggregate: aggregate(value_request_count_aggregate)]
|
||||
| filter (metric.response_code = '404')
|
||||
; group_by [],
|
||||
[value_request_count_aggregate_aggregate: aggregate(value_request_count_aggregate)]
|
||||
}
|
||||
| join
|
||||
| value [response_code_ratio: val(0) / val(1)]
|
||||
| condition gt(val(), 0.1)
|
||||
`),
|
||||
},
|
||||
{
|
||||
name: "Response Code Class",
|
||||
ResponseCodeMetric: ResponseCodeMetric{
|
||||
CodeClass: pointers.Ptr("4xx"),
|
||||
Ratio: 0.4,
|
||||
},
|
||||
want: autogold.Expect(`fetch cloud_run_revision
|
||||
| metric 'run.googleapis.com/request_count'
|
||||
| group_by 15s, [value_request_count_aggregate: aggregate(value.request_count)]
|
||||
| every 15s
|
||||
| {
|
||||
group_by [metric.response_code, metric.response_code_class],
|
||||
[response_code_count_aggregate: aggregate(value_request_count_aggregate)]
|
||||
| filter (metric.response_code_class = '4xx')
|
||||
; group_by [],
|
||||
[value_request_count_aggregate_aggregate: aggregate(value_request_count_aggregate)]
|
||||
}
|
||||
| join
|
||||
| value [response_code_ratio: val(0) / val(1)]
|
||||
| condition gt(val(), 0.4)
|
||||
`),
|
||||
},
|
||||
{
|
||||
name: "Response Code Class + Exclude",
|
||||
ResponseCodeMetric: ResponseCodeMetric{
|
||||
CodeClass: pointers.Ptr("4xx"),
|
||||
ExcludeCodes: []string{"404", "429"},
|
||||
Ratio: 0.8,
|
||||
},
|
||||
want: autogold.Expect(`fetch cloud_run_revision
|
||||
| metric 'run.googleapis.com/request_count'
|
||||
| group_by 15s, [value_request_count_aggregate: aggregate(value.request_count)]
|
||||
| every 15s
|
||||
| {
|
||||
group_by [metric.response_code, metric.response_code_class],
|
||||
[response_code_count_aggregate: aggregate(value_request_count_aggregate)]
|
||||
| filter (metric.response_code_class = '4xx')
|
||||
| filter (metric.response_code != '404')
|
||||
| filter (metric.response_code != '429')
|
||||
; group_by [],
|
||||
[value_request_count_aggregate_aggregate: aggregate(value_request_count_aggregate)]
|
||||
}
|
||||
| join
|
||||
| value [response_code_ratio: val(0) / val(1)]
|
||||
| condition gt(val(), 0.8)
|
||||
`),
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := responseCodeBuilder(&Config{
|
||||
ServiceName: "test-service",
|
||||
ResponseCodeMetric: &tc.ResponseCodeMetric,
|
||||
})
|
||||
tc.want.Equal(t, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -15,6 +15,7 @@ import (
|
||||
)
|
||||
|
||||
type Output struct {
|
||||
ID *string
|
||||
Endpoint string
|
||||
Certificate gsmsecret.Output
|
||||
}
|
||||
@ -64,5 +65,6 @@ func New(scope constructs.Construct, id resourceid.ID, config Config) (*Output,
|
||||
Endpoint: fmt.Sprintf("rediss://:%s@%s:%v",
|
||||
*redis.AuthString(), *redis.Host(), *redis.Port()),
|
||||
Certificate: *redisCACert,
|
||||
ID: redis.Id(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -36,7 +36,9 @@ import (
|
||||
"github.com/sourcegraph/sourcegraph/lib/pointers"
|
||||
)
|
||||
|
||||
type CrossStackOutput struct{}
|
||||
type CrossStackOutput struct {
|
||||
RedisInstanceID *string
|
||||
}
|
||||
|
||||
type Variables struct {
|
||||
ProjectID string
|
||||
@ -143,6 +145,8 @@ func NewStack(stacks *stack.Set, vars Variables) (crossStackOutput *CrossStackOu
|
||||
|
||||
// redisInstance is only created and non-nil if Redis is configured for the
|
||||
// environment.
|
||||
// If Redis is configured, populate cross-stack output with Redis ID.
|
||||
var redisInstanceID *string
|
||||
if vars.Environment.Resources != nil && vars.Environment.Resources.Redis != nil {
|
||||
redisInstance, err := redis.New(stack,
|
||||
resourceid.New("redis"),
|
||||
@ -156,6 +160,8 @@ func NewStack(stacks *stack.Set, vars Variables) (crossStackOutput *CrossStackOu
|
||||
return nil, errors.Wrap(err, "failed to render Redis instance")
|
||||
}
|
||||
|
||||
redisInstanceID = redisInstance.ID
|
||||
|
||||
// Configure endpoint string.
|
||||
cloudRunBuilder.AddEnv("REDIS_ENDPOINT", redisInstance.Endpoint)
|
||||
|
||||
@ -265,7 +271,9 @@ func NewStack(stacks *stack.Set, vars Variables) (crossStackOutput *CrossStackOu
|
||||
"Cloud Run resource location")
|
||||
locals.Add("image_tag", imageTag.StringValue,
|
||||
"Resolved tag of service image to deploy")
|
||||
return &CrossStackOutput{}, nil
|
||||
return &CrossStackOutput{
|
||||
RedisInstanceID: redisInstanceID,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type envVariablesData struct {
|
||||
|
||||
@ -0,0 +1,18 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "monitoring",
|
||||
srcs = ["monitoring.go"],
|
||||
importpath = "github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/stack/monitoring",
|
||||
visibility = ["//dev/managedservicesplatform:__subpackages__"],
|
||||
deps = [
|
||||
"//dev/managedservicesplatform/internal/resource/monitoringalertpolicy",
|
||||
"//dev/managedservicesplatform/internal/resourceid",
|
||||
"//dev/managedservicesplatform/internal/stack",
|
||||
"//dev/managedservicesplatform/internal/stack/options/googleprovider",
|
||||
"//dev/managedservicesplatform/spec",
|
||||
"//lib/errors",
|
||||
"//lib/pointers",
|
||||
"@com_github_hashicorp_terraform_cdk_go_cdktf//:cdktf",
|
||||
],
|
||||
)
|
||||
@ -0,0 +1,288 @@
|
||||
package monitoring
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/terraform-cdk-go/cdktf"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resource/monitoringalertpolicy"
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/resourceid"
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/stack"
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/stack/options/googleprovider"
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/spec"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
"github.com/sourcegraph/sourcegraph/lib/pointers"
|
||||
)
|
||||
|
||||
// Common
|
||||
// - Container (8)
|
||||
// - run.googleapis.com/container/billable_instance_time
|
||||
// - run.googleapis.com/container/cpu/allocation_time
|
||||
// * run.googleapis.com/container/cpu/utilizations
|
||||
// - run.googleapis.com/container/memory/allocation_time
|
||||
// * run.googleapis.com/container/memory/utilizations
|
||||
// * run.googleapis.com/container/startup_latencies
|
||||
// - run.googleapis.com/container/network/received_bytes_count
|
||||
// - run.googleapis.com/container/network/sent_bytes_count
|
||||
// - Log-based metrics (2)
|
||||
// - logging.googleapis.com/byte_count
|
||||
// - logging.googleapis.com/log_entry_count
|
||||
// Cloud Run Job
|
||||
// - Job (4)
|
||||
// - run.googleapis.com/job/completed_execution_count
|
||||
// * run.googleapis.com/job/completed_task_attempt_count
|
||||
// - run.googleapis.com/job/running_executions
|
||||
// - run.googleapis.com/job/running_task_attempts
|
||||
// Cloud Run Service
|
||||
// - Container (9)
|
||||
// - run.googleapis.com/container/completed_probe_attempt_count
|
||||
// - run.googleapis.com/container/completed_probe_count
|
||||
// - run.googleapis.com/container/probe_attempt_latencies
|
||||
// - run.googleapis.com/container/probe_latencies
|
||||
// * run.googleapis.com/container/instance_count
|
||||
// - run.googleapis.com/container/max_request_concurrencies
|
||||
// - run.googleapis.com/container/cpu/usage
|
||||
// - run.googleapis.com/container/containers
|
||||
// - run.googleapis.com/container/memory/usage
|
||||
// - Request_count (1)
|
||||
// - run.googleapis.com/request_count
|
||||
// - Request_latencies (1)
|
||||
// * run.googleapis.com/request_latencies
|
||||
// - Pending_queue (1)
|
||||
// - run.googleapis.com/pending_queue/pending_requests
|
||||
|
||||
type CrossStackOutput struct{}
|
||||
|
||||
type Variables struct {
|
||||
ProjectID string
|
||||
Service spec.ServiceSpec
|
||||
Monitoring spec.MonitoringSpec
|
||||
MaxCount *int
|
||||
|
||||
// If Redis is enabled we configure alerts for it
|
||||
RedisInstanceID *string
|
||||
}
|
||||
|
||||
const StackName = "monitoring"
|
||||
|
||||
func NewStack(stacks *stack.Set, vars Variables) (*CrossStackOutput, error) {
|
||||
stack, _, err := stacks.New(StackName, googleprovider.With(vars.ProjectID))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
id := resourceid.New("monitoring")
|
||||
err = commonAlerts(stack, id.Group("common"), vars)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to create common alerts")
|
||||
}
|
||||
|
||||
switch pointers.Deref(vars.Service.Kind, spec.ServiceKindService) {
|
||||
case spec.ServiceKindService:
|
||||
if err = serviceAlerts(stack, id.Group("service"), vars); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to create service alerts")
|
||||
}
|
||||
|
||||
if vars.Monitoring.Alerts.ResponseCodeRatios != nil {
|
||||
if err = responseCodeMetrics(stack, id.Group("response-code"), vars); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to create response code metrics")
|
||||
}
|
||||
}
|
||||
case spec.ServiceKindJob:
|
||||
if err = jobAlerts(stack, id.Group("job"), vars); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to create job alerts")
|
||||
}
|
||||
default:
|
||||
return nil, errors.New("unknown service kind")
|
||||
}
|
||||
|
||||
if vars.RedisInstanceID != nil {
|
||||
if err = redisAlerts(stack, id.Group("redis"), vars); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to create redis alerts")
|
||||
}
|
||||
}
|
||||
|
||||
return &CrossStackOutput{}, nil
|
||||
}
|
||||
|
||||
func commonAlerts(stack cdktf.TerraformStack, id resourceid.ID, vars Variables) error {
|
||||
// Convert a spec.ServiceKind into a monitoringalertpolicy.ServiceKind
|
||||
serviceKind := monitoringalertpolicy.CloudRunService
|
||||
kind := pointers.Deref(vars.Service.Kind, "service")
|
||||
if kind == spec.ServiceKindJob {
|
||||
serviceKind = monitoringalertpolicy.CloudRunJob
|
||||
}
|
||||
|
||||
for _, config := range []monitoringalertpolicy.Config{
|
||||
{
|
||||
ID: "cpu",
|
||||
Name: "High Container CPU Utilization",
|
||||
Description: pointers.Ptr("High CPU Usage - it may be neccessaru to reduce load or increase CPU allocation"),
|
||||
ThresholdAggregation: &monitoringalertpolicy.ThresholdAggregation{
|
||||
Filters: map[string]string{"metric.type": "run.googleapis.com/container/cpu/utilizations"},
|
||||
Aligner: monitoringalertpolicy.MonitoringAlignPercentile99,
|
||||
Reducer: monitoringalertpolicy.MonitoringReduceMax,
|
||||
Period: "300s",
|
||||
Threshold: 0.8,
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "memory",
|
||||
Name: "High Container Memory Utilization",
|
||||
Description: pointers.Ptr("High Memory Usage - it may be neccessary to reduce load or increase memory allocation"),
|
||||
ThresholdAggregation: &monitoringalertpolicy.ThresholdAggregation{
|
||||
Filters: map[string]string{"metric.type": "run.googleapis.com/container/memory/utilizations"},
|
||||
Aligner: monitoringalertpolicy.MonitoringAlignPercentile99,
|
||||
Reducer: monitoringalertpolicy.MonitoringReduceMax,
|
||||
Period: "300s",
|
||||
Threshold: 0.8,
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "startup",
|
||||
Name: "Container Startup Latency",
|
||||
Description: pointers.Ptr("Instance is taking a long time to start up - something may be blocking startup"),
|
||||
ThresholdAggregation: &monitoringalertpolicy.ThresholdAggregation{
|
||||
Filters: map[string]string{"metric.type": "run.googleapis.com/container/startup_latencies"},
|
||||
Aligner: monitoringalertpolicy.MonitoringAlignPercentile99,
|
||||
Reducer: monitoringalertpolicy.MonitoringReduceMax,
|
||||
Period: "60s",
|
||||
Threshold: 10000,
|
||||
},
|
||||
},
|
||||
} {
|
||||
|
||||
config.ProjectID = vars.ProjectID
|
||||
config.ServiceName = vars.Service.ID
|
||||
config.ServiceKind = serviceKind
|
||||
if _, err := monitoringalertpolicy.New(stack, id, &config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func serviceAlerts(stack cdktf.TerraformStack, id resourceid.ID, vars Variables) error {
|
||||
// Only provision if MaxCount is specified above 5
|
||||
if pointers.Deref(vars.MaxCount, 0) > 5 {
|
||||
if _, err := monitoringalertpolicy.New(stack, id, &monitoringalertpolicy.Config{
|
||||
ID: "instance_count",
|
||||
Name: "Container Instance Count",
|
||||
Description: pointers.Ptr("There are a lot of Cloud Run instances running - we may need to increase per-instance requests make make sure we won't hit the configured max instance count"),
|
||||
ProjectID: vars.ProjectID,
|
||||
ServiceName: vars.Service.ID,
|
||||
ServiceKind: monitoringalertpolicy.CloudRunService,
|
||||
ThresholdAggregation: &monitoringalertpolicy.ThresholdAggregation{
|
||||
Filters: map[string]string{"metric.type": "run.googleapis.com/container/instance_count"},
|
||||
Aligner: monitoringalertpolicy.MonitoringAlignMax,
|
||||
Reducer: monitoringalertpolicy.MonitoringReduceMax,
|
||||
Period: "60s",
|
||||
},
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func jobAlerts(stack cdktf.TerraformStack, id resourceid.ID, vars Variables) error {
|
||||
// Alert whenever a Cloud Run Job fails
|
||||
if _, err := monitoringalertpolicy.New(stack, id, &monitoringalertpolicy.Config{
|
||||
ID: "job_failures",
|
||||
Name: "Cloud Run Job Failures",
|
||||
Description: pointers.Ptr("Failed executions of Cloud Run Job"),
|
||||
ProjectID: vars.ProjectID,
|
||||
ServiceName: vars.Service.ID,
|
||||
ServiceKind: monitoringalertpolicy.CloudRunJob,
|
||||
ThresholdAggregation: &monitoringalertpolicy.ThresholdAggregation{
|
||||
Filters: map[string]string{
|
||||
"metric.type": "run.googleapis.com/job/completed_task_attempt_count",
|
||||
"metric.labels.result": "failed",
|
||||
},
|
||||
GroupByFields: []string{"metric.label.result"},
|
||||
Aligner: monitoringalertpolicy.MonitoringAlignCount,
|
||||
Reducer: monitoringalertpolicy.MonitoringReduceSum,
|
||||
Period: "60s",
|
||||
Threshold: 0,
|
||||
},
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func responseCodeMetrics(stack cdktf.TerraformStack, id resourceid.ID, vars Variables) error {
|
||||
for _, config := range vars.Monitoring.Alerts.ResponseCodeRatios {
|
||||
|
||||
if _, err := monitoringalertpolicy.New(stack, id, &monitoringalertpolicy.Config{
|
||||
ID: config.ID,
|
||||
ProjectID: vars.ProjectID,
|
||||
Name: config.Name,
|
||||
ServiceName: vars.Service.ID,
|
||||
ServiceKind: monitoringalertpolicy.CloudRunService,
|
||||
ResponseCodeMetric: &monitoringalertpolicy.ResponseCodeMetric{
|
||||
Code: config.Code,
|
||||
CodeClass: config.CodeClass,
|
||||
ExcludeCodes: config.ExcludeCodes,
|
||||
Ratio: config.Ratio,
|
||||
Duration: config.Duration,
|
||||
},
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func redisAlerts(stack cdktf.TerraformStack, id resourceid.ID, vars Variables) error {
|
||||
for _, config := range []monitoringalertpolicy.Config{
|
||||
{
|
||||
ID: "memory",
|
||||
Name: "Cloud Redis - System Memory Utilization",
|
||||
Description: pointers.Ptr("This alert fires if the system memory utilization is above the set threshold. The utilization is measured on a scale of 0 to 1."),
|
||||
ThresholdAggregation: &monitoringalertpolicy.ThresholdAggregation{
|
||||
Filters: map[string]string{"metric.type": "redis.googleapis.com/stats/memory/system_memory_usage_ratio"},
|
||||
Aligner: monitoringalertpolicy.MonitoringAlignMean,
|
||||
Reducer: monitoringalertpolicy.MonitoringReduceNone,
|
||||
Period: "300s",
|
||||
Threshold: 0.8,
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "cpu",
|
||||
Name: "Cloud Redis - System CPU Utilization",
|
||||
Description: pointers.Ptr("This alert fires if the Redis Engine CPU Utilization goes above the set threshold. The utilization is measured on a scale of 0 to 1."),
|
||||
ThresholdAggregation: &monitoringalertpolicy.ThresholdAggregation{
|
||||
Filters: map[string]string{"metric.type": "redis.googleapis.com/stats/cpu_utilization_main_thread"},
|
||||
GroupByFields: []string{"resource.label.instance_id", "resource.label.node_id"},
|
||||
Aligner: monitoringalertpolicy.MonitoringAlignRate,
|
||||
Reducer: monitoringalertpolicy.MonitoringReduceSum,
|
||||
Period: "300s",
|
||||
Threshold: 0.9,
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "failover",
|
||||
Name: "Cloud Redis - Standard Instance Failover",
|
||||
Description: pointers.Ptr("This alert fires if failover occurs for a standard tier instance."),
|
||||
ThresholdAggregation: &monitoringalertpolicy.ThresholdAggregation{
|
||||
Filters: map[string]string{"metric.type": "redis.googleapis.com/stats/cpu_utilization_main_thread"},
|
||||
GroupByFields: []string{"resource.label.instance_id", "resource.label.node_id"},
|
||||
Aligner: monitoringalertpolicy.MonitoringAlignStddev,
|
||||
Reducer: monitoringalertpolicy.MonitoringReduceNone,
|
||||
Period: "300s",
|
||||
Threshold: 0,
|
||||
},
|
||||
},
|
||||
} {
|
||||
config.ProjectID = vars.ProjectID
|
||||
config.ServiceName = *vars.RedisInstanceID
|
||||
config.ServiceKind = monitoringalertpolicy.CloudRedis
|
||||
if _, err := monitoringalertpolicy.New(stack, id, &config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -11,6 +11,7 @@ import (
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/stack"
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/stack/cloudrun"
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/stack/iam"
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/stack/monitoring"
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/stack/options/terraformversion"
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/stack/options/tfcbackend"
|
||||
"github.com/sourcegraph/sourcegraph/dev/managedservicesplatform/internal/stack/project"
|
||||
@ -53,6 +54,7 @@ func (r *Renderer) RenderEnvironment(
|
||||
svc spec.ServiceSpec,
|
||||
build spec.BuildSpec,
|
||||
env spec.EnvironmentSpec,
|
||||
monitoringSpec spec.MonitoringSpec,
|
||||
) (*CDKTF, error) {
|
||||
terraformVersion := terraform.Version
|
||||
stackSetOptions := []stack.NewStackOption{
|
||||
@ -107,7 +109,7 @@ func (r *Renderer) RenderEnvironment(
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to create IAM stack")
|
||||
}
|
||||
if _, err := cloudrun.NewStack(stacks, cloudrun.Variables{
|
||||
cloudrunOutput, err := cloudrun.NewStack(stacks, cloudrun.Variables{
|
||||
ProjectID: *projectOutput.Project.ProjectId(),
|
||||
CloudRunWorkloadServiceAccount: iamOutput.CloudRunWorkloadServiceAccount,
|
||||
|
||||
@ -116,10 +118,26 @@ func (r *Renderer) RenderEnvironment(
|
||||
Environment: env,
|
||||
|
||||
StableGenerate: r.StableGenerate,
|
||||
}); err != nil {
|
||||
})
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to create cloudrun stack")
|
||||
}
|
||||
|
||||
if _, err := monitoring.NewStack(stacks, monitoring.Variables{
|
||||
ProjectID: *projectOutput.Project.ProjectId(),
|
||||
Service: svc,
|
||||
Monitoring: monitoringSpec,
|
||||
MaxCount: func() *int {
|
||||
if env.Instances.Scaling != nil {
|
||||
return env.Instances.Scaling.MaxCount
|
||||
}
|
||||
return nil
|
||||
}(),
|
||||
RedisInstanceID: cloudrunOutput.RedisInstanceID,
|
||||
}); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to create monitoring stack")
|
||||
}
|
||||
|
||||
// Return CDKTF representation for caller to synthesize
|
||||
return &CDKTF{
|
||||
app: stack.ExtractApp(stacks),
|
||||
|
||||
@ -6,6 +6,7 @@ go_library(
|
||||
srcs = [
|
||||
"build.go",
|
||||
"environment.go",
|
||||
"monitoring.go",
|
||||
"service.go",
|
||||
"spec.go",
|
||||
],
|
||||
|
||||
95
dev/managedservicesplatform/spec/monitoring.go
Normal file
95
dev/managedservicesplatform/spec/monitoring.go
Normal file
@ -0,0 +1,95 @@
|
||||
package spec
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/grafana/regexp"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
var codeClassPattern = regexp.MustCompile(`\dx+`)
|
||||
|
||||
type MonitoringSpec struct {
|
||||
// Alerts is a list of alert configurations for the deployment
|
||||
Alerts MonitoringAlertsSpec `json:"alerts"`
|
||||
}
|
||||
|
||||
func (s *MonitoringSpec) Validate() []error {
|
||||
var errs []error
|
||||
errs = append(errs, s.Alerts.Validate()...)
|
||||
return errs
|
||||
}
|
||||
|
||||
type MonitoringAlertsSpec struct {
|
||||
ResponseCodeRatios []ResponseCodeRatioSpec `json:"responseCodeRatios"`
|
||||
}
|
||||
|
||||
type ResponseCodeRatioSpec struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Description *string `json:"description,omitempty"`
|
||||
Code *int `json:"code,omitempty"`
|
||||
CodeClass *string `json:"codeClass,omitempty"`
|
||||
ExcludeCodes []string `json:"excludeCodes,omitempty"`
|
||||
Duration *string `json:"duration,omitempty"`
|
||||
Ratio float64 `json:"ratio"`
|
||||
}
|
||||
|
||||
func (s *MonitoringAlertsSpec) Validate() []error {
|
||||
var errs []error
|
||||
// Use map to contain seen IDs to ensure uniqueness
|
||||
ids := make(map[string]struct{})
|
||||
for _, r := range s.ResponseCodeRatios {
|
||||
if r.ID == "" {
|
||||
errs = append(errs, errors.New("responseCodeRatios[].id is required and cannot be empty"))
|
||||
}
|
||||
if _, ok := ids[r.ID]; ok {
|
||||
errs = append(errs, errors.Newf("response code alert IDs must be unique, found duplicate ID: %s", r.ID))
|
||||
}
|
||||
ids[r.ID] = struct{}{}
|
||||
errs = append(errs, r.Validate()...)
|
||||
}
|
||||
return errs
|
||||
}
|
||||
|
||||
func (r *ResponseCodeRatioSpec) Validate() []error {
|
||||
var errs []error
|
||||
|
||||
if r.ID == "" {
|
||||
errs = append(errs, errors.New("responseCodeRatios[].id is required"))
|
||||
}
|
||||
|
||||
if r.Name == "" {
|
||||
errs = append(errs, errors.New("responseCodeRatios[].name is required"))
|
||||
}
|
||||
|
||||
if r.Ratio < 0 || r.Ratio > 1 {
|
||||
errs = append(errs, errors.New("responseCodeRatios[].ratio must be between 0 and 1"))
|
||||
}
|
||||
|
||||
if r.CodeClass != nil && r.Code != nil {
|
||||
errs = append(errs, errors.New("only one of responseCodeRatios[].code or responseCodeRatios[].codeClass should be specified"))
|
||||
}
|
||||
|
||||
if r.Code != nil && *r.Code <= 0 {
|
||||
errs = append(errs, errors.New("responseCodeRatios[].code must be positive"))
|
||||
}
|
||||
|
||||
if r.CodeClass != nil {
|
||||
if !codeClassPattern.MatchString(*r.CodeClass) {
|
||||
errs = append(errs, errors.New("responseCodeRatios[].codeClass must match the format Nxx (e.g. 4xx, 5xx)"))
|
||||
}
|
||||
}
|
||||
|
||||
if r.Duration != nil {
|
||||
duration, err := time.ParseDuration(*r.Duration)
|
||||
if err != nil {
|
||||
errs = append(errs, errors.Wrap(err, "responseCodeRatios[].duration must be in the format of XXs"))
|
||||
} else if duration%time.Minute != 0 {
|
||||
errs = append(errs, errors.New("responseCodeRatios[].duration must be a multiple of 60s"))
|
||||
}
|
||||
}
|
||||
|
||||
return errs
|
||||
}
|
||||
@ -25,6 +25,7 @@ type Spec struct {
|
||||
Service ServiceSpec `json:"service"`
|
||||
Build BuildSpec `json:"build"`
|
||||
Environments []EnvironmentSpec `json:"environments"`
|
||||
Monitoring MonitoringSpec `json:"monitoring"`
|
||||
}
|
||||
|
||||
// Open a specification file, validate it, unmarshal the data as a MSP spec,
|
||||
@ -83,6 +84,7 @@ func (s Spec) Validate() []error {
|
||||
for _, env := range s.Environments {
|
||||
errs = append(errs, env.Validate()...)
|
||||
}
|
||||
errs = append(errs, s.Monitoring.Validate()...)
|
||||
return errs
|
||||
}
|
||||
|
||||
|
||||
@ -274,7 +274,7 @@ Supports completions on services and environments.`,
|
||||
return errors.Newf("environment %q not found in service spec", targetEnv)
|
||||
}
|
||||
|
||||
if err := syncEnvironmentWorkspaces(c, tfcClient, service.Service, service.Build, *env); err != nil {
|
||||
if err := syncEnvironmentWorkspaces(c, tfcClient, service.Service, service.Build, *env, service.Monitoring); err != nil {
|
||||
return errors.Wrapf(err, "sync env %q", env.ID)
|
||||
}
|
||||
} else {
|
||||
@ -282,7 +282,7 @@ Supports completions on services and environments.`,
|
||||
return errors.New("second argument environment ID is required without the '-all' flag")
|
||||
}
|
||||
for _, env := range service.Environments {
|
||||
if err := syncEnvironmentWorkspaces(c, tfcClient, service.Service, service.Build, env); err != nil {
|
||||
if err := syncEnvironmentWorkspaces(c, tfcClient, service.Service, service.Build, env, service.Monitoring); err != nil {
|
||||
return errors.Wrapf(err, "sync env %q", env.ID)
|
||||
}
|
||||
}
|
||||
@ -323,7 +323,7 @@ Supports completions on services and environments.`,
|
||||
}
|
||||
}
|
||||
|
||||
func syncEnvironmentWorkspaces(c *cli.Context, tfc *terraformcloud.Client, service spec.ServiceSpec, build spec.BuildSpec, env spec.EnvironmentSpec) error {
|
||||
func syncEnvironmentWorkspaces(c *cli.Context, tfc *terraformcloud.Client, service spec.ServiceSpec, build spec.BuildSpec, env spec.EnvironmentSpec, monitoring spec.MonitoringSpec) error {
|
||||
if os.TempDir() == "" {
|
||||
return errors.New("no temp dir available")
|
||||
}
|
||||
@ -341,7 +341,7 @@ func syncEnvironmentWorkspaces(c *cli.Context, tfc *terraformcloud.Client, servi
|
||||
renderPending := std.Out.Pending(output.Styledf(output.StylePending,
|
||||
"[%s] Rendering required Terraform Cloud workspaces for environment %q",
|
||||
service.ID, env.ID))
|
||||
cdktf, err := renderer.RenderEnvironment(service, build, env)
|
||||
cdktf, err := renderer.RenderEnvironment(service, build, env, monitoring)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -452,7 +452,7 @@ func generateTerraform(serviceID string, opts generateTerraformOptions) error {
|
||||
}
|
||||
|
||||
// Render environment
|
||||
cdktf, err := renderer.RenderEnvironment(service.Service, service.Build, env)
|
||||
cdktf, err := renderer.RenderEnvironment(service.Service, service.Build, env, service.Monitoring)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@ -53,3 +53,13 @@ func Float64[T numberType](v T) *float64 {
|
||||
func Stringf(format string, a ...any) *string {
|
||||
return Ptr(fmt.Sprintf(format, a...))
|
||||
}
|
||||
|
||||
// Slice takes a slice of values and turns it into a slice of pointers.
|
||||
func Slice[S []V, V any](s S) []*V {
|
||||
slice := make([]*V, len(s))
|
||||
for i, v := range s {
|
||||
v := v // copy
|
||||
slice[i] = &v
|
||||
}
|
||||
return slice
|
||||
}
|
||||
|
||||
@ -203,3 +203,11 @@ func TestDeref(t *testing.T) {
|
||||
runDerefTest(t, tc)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSlice(t *testing.T) {
|
||||
values := []string{"1", "2", "3"}
|
||||
pointified := Slice(values)
|
||||
for i, p := range pointified {
|
||||
assert.Equal(t, values[i], *p)
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user