From 22832dcb0cd74e1af8a0483fba51b199ad7dcec1 Mon Sep 17 00:00:00 2001 From: Craig Furman Date: Wed, 29 May 2024 10:04:33 +0100 Subject: [PATCH] appliance: deploy cadvisor (#62942) There are a few intentional differences from our Helm chart: * The cadvisor privileged option is removed. Cadvisor runs with a privileged containerSecurityContext by default, but the component is made opt-in (disabled by default). * The PodSecurityPolicy is removed entirely, since it was removed from recent versions of Kubernetes. --- internal/appliance/config/defaults.go | 8 + internal/appliance/config/spec.go | 6 + internal/appliance/reconciler/BUILD.bazel | 3 + internal/appliance/reconciler/cadvisor.go | 110 +++++++++ .../appliance/reconciler/cadvisor_test.go | 22 ++ internal/appliance/reconciler/golden_test.go | 8 + internal/appliance/reconciler/reconcile.go | 4 + .../golden-fixtures/cadvisor/default.yaml | 217 ++++++++++++++++++ .../testdata/sg/cadvisor/default.yaml | 59 +++++ internal/k8s/resource/daemonset/BUILD.bazel | 13 ++ internal/k8s/resource/daemonset/daemonset.go | 32 +++ internal/k8s/resource/pod/pod.go | 11 + 12 files changed, 493 insertions(+) create mode 100644 internal/appliance/reconciler/cadvisor.go create mode 100644 internal/appliance/reconciler/cadvisor_test.go create mode 100644 internal/appliance/reconciler/testdata/golden-fixtures/cadvisor/default.yaml create mode 100644 internal/appliance/reconciler/testdata/sg/cadvisor/default.yaml create mode 100644 internal/k8s/resource/daemonset/BUILD.bazel create mode 100644 internal/k8s/resource/daemonset/daemonset.go diff --git a/internal/appliance/config/defaults.go b/internal/appliance/config/defaults.go index 021e02e94a8..21ce42a7d34 100644 --- a/internal/appliance/config/defaults.go +++ b/internal/appliance/config/defaults.go @@ -116,6 +116,13 @@ func NewDefaultConfig() Sourcegraph { }, }, }, + Cadvisor: CadvisorSpec{ + StandardConfig: StandardConfig{ + // cadvisor is opt-in due to the privilege requirements + Disabled: true, + PrometheusPort: pointers.Ptr(48080), + }, + }, }, } } @@ -130,6 +137,7 @@ var defaultImages = map[string]map[string]string{ var defaultImagesForVersion_5_3_9104 = map[string]string{ "alpine": "alpine-3.14:5.3.2@sha256:982220e0fd8ce55a73798fa7e814a482c4807c412f054c8440c5970b610239b7", "blobstore": "blobstore:5.3.2@sha256:d625be1eefe61cc42f94498e3c588bf212c4159c8b20c519db84eae4ff715efa", + "cadvisor": "cadvisor:5.3.2@sha256:3860cce1f7ef0278c0d785f66baf69dd2bece19610a2fd6eaa54c03095f2f105", "codeintel-db": "codeintel-db:5.3.2@sha256:1e0e93661a65c832b9697048c797f9894dfb502e2e1da2b8209f0018a6632b79", "gitserver": "gitserver:5.3.2@sha256:6c6042cf3e5f3f16de9b82e3d4ab1647f8bb924cd315245bd7a3162f5489e8c4", "pgsql": "postgres-12-alpine:5.3.2@sha256:1e0e93661a65c832b9697048c797f9894dfb502e2e1da2b8209f0018a6632b79", diff --git a/internal/appliance/config/spec.go b/internal/appliance/config/spec.go index 60a5c400dc4..27c82c23f22 100644 --- a/internal/appliance/config/spec.go +++ b/internal/appliance/config/spec.go @@ -30,6 +30,10 @@ type BlobstoreSpec struct { StandardConfig } +type CadvisorSpec struct { + StandardConfig +} + // CodeInsightsDBSpec defines the desired state of Code Insights database. type CodeInsightsDBSpec struct { // Disabled defines if Code Insights is enabled or not. @@ -225,6 +229,8 @@ type SourcegraphSpec struct { // Blobstore defines the desired state of the Blobstore service. Blobstore BlobstoreSpec `json:"blobstore,omitempty"` + Cadvisor CadvisorSpec `json:"cadvisor,omitempty"` + // CodeInsights defines the desired state of the Code Insights service. CodeInsights CodeInsightsDBSpec `json:"codeInsights,omitempty"` diff --git a/internal/appliance/reconciler/BUILD.bazel b/internal/appliance/reconciler/BUILD.bazel index 71d538ab90f..b1b7e1af5a3 100644 --- a/internal/appliance/reconciler/BUILD.bazel +++ b/internal/appliance/reconciler/BUILD.bazel @@ -5,6 +5,7 @@ go_library( name = "reconciler", srcs = [ "blobstore.go", + "cadvisor.go", "codeintel.go", "gitserver.go", "kubernetes.go", @@ -23,6 +24,7 @@ go_library( "//internal/appliance/config", "//internal/k8s/resource/configmap", "//internal/k8s/resource/container", + "//internal/k8s/resource/daemonset", "//internal/k8s/resource/deployment", "//internal/k8s/resource/pod", "//internal/k8s/resource/pvc", @@ -61,6 +63,7 @@ go_test( name = "reconciler_test", srcs = [ "blobstore_test.go", + "cadvisor_test.go", "codeintel_test.go", "gitserver_test.go", "golden_test.go", diff --git a/internal/appliance/reconciler/cadvisor.go b/internal/appliance/reconciler/cadvisor.go new file mode 100644 index 00000000000..f0c5fb38286 --- /dev/null +++ b/internal/appliance/reconciler/cadvisor.go @@ -0,0 +1,110 @@ +package reconciler + +import ( + "context" + "fmt" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/sourcegraph/sourcegraph/internal/appliance/config" + "github.com/sourcegraph/sourcegraph/internal/k8s/resource/container" + "github.com/sourcegraph/sourcegraph/internal/k8s/resource/daemonset" + "github.com/sourcegraph/sourcegraph/internal/k8s/resource/pod" + "github.com/sourcegraph/sourcegraph/internal/k8s/resource/serviceaccount" + "github.com/sourcegraph/sourcegraph/lib/errors" + "github.com/sourcegraph/sourcegraph/lib/pointers" +) + +func (r *Reconciler) reconcileCadvisor(ctx context.Context, sg *config.Sourcegraph, owner client.Object) error { + if err := r.reconcileCadvisorDaemonset(ctx, sg, owner); err != nil { + return errors.Wrap(err, "reconciling ClusterRole") + } + if err := r.reconcileCadvisorServiceAccount(ctx, sg, owner); err != nil { + return errors.Wrap(err, "reconciling ServiceAccount") + } + return nil +} + +func (r *Reconciler) reconcileCadvisorDaemonset(ctx context.Context, sg *config.Sourcegraph, owner client.Object) error { + name := "cadvisor" + cfg := sg.Spec.Cadvisor + + defaultImage, err := config.GetDefaultImage(sg, name) + if err != nil { + return err + } + ctr := container.NewContainer(name, cfg, config.ContainerConfig{ + Image: defaultImage, + Resources: &corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("150m"), + corev1.ResourceMemory: resource.MustParse("200Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("300m"), + corev1.ResourceMemory: resource.MustParse("2000Mi"), + }, + }, + }) + ctr.Args = []string{ + "--store_container_labels=false", + "--whitelisted_container_labels=io.kubernetes.container.name,io.kubernetes.pod.name,io.kubernetes.pod.namespace,io.kubernetes.pod.uid", + } + ctr.VolumeMounts = []corev1.VolumeMount{ + {Name: "rootfs", MountPath: "/rootfs", ReadOnly: true}, + {Name: "var-run", MountPath: "/var/run", ReadOnly: true}, + {Name: "sys", MountPath: "/sys", ReadOnly: true}, + {Name: "docker", MountPath: "/var/lib/docker", ReadOnly: true}, + {Name: "disk", MountPath: "/dev/disk", ReadOnly: true}, + {Name: "kmsg", MountPath: "/dev/kmsg", ReadOnly: true}, + } + ctr.Ports = []corev1.ContainerPort{ + {Name: "http", ContainerPort: 48080}, + } + ctr.SecurityContext = &corev1.SecurityContext{ + Privileged: pointers.Ptr(true), + } + + podTemplate := pod.NewPodTemplate(name, cfg) + podTemplate.Template.Spec.Containers = []corev1.Container{ctr} + podTemplate.Template.Spec.ServiceAccountName = name + podTemplate.Template.Spec.AutomountServiceAccountToken = pointers.Ptr(false) + podTemplate.Template.Spec.Volumes = []corev1.Volume{ + pod.NewVolumeHostPath("rootfs", "/"), + pod.NewVolumeHostPath("var-run", "/var/run"), + pod.NewVolumeHostPath("sys", "/sys"), + pod.NewVolumeHostPath("docker", "/var/lib/docker"), + pod.NewVolumeHostPath("disk", "/dev/disk"), + pod.NewVolumeHostPath("kmsg", "/dev/kmsg"), + } + podTemplate.Template.Spec.SecurityContext = nil + + // Usually we set the prometheus scrape annotations on a Service (and scrape + // its endpoints rather than the load balancer), but it doesn't usually make + // sense to deploy services alongside daemonsets. We set the scrape + // annotation directly on the pod template here instead. + // Even though this uses the PrometheusPort standard config feature, we + // shouldn't move this code into pod.go, because otherwise every pod + // template will have such an annotation. + if promPort := cfg.GetPrometheusPort(); promPort != nil { + annotations := map[string]string{ + "prometheus.io/port": fmt.Sprintf("%d", *promPort), + "sourcegraph.prometheus/scrape": "true", + } + podTemplate.Template.Annotations = annotations + } + + ds := daemonset.New(name, sg.Namespace, sg.Spec.RequestedVersion) + ds.Spec.Template = podTemplate.Template + + return reconcileObject(ctx, r, cfg, &ds, &appsv1.DaemonSet{}, sg, owner) +} + +func (r *Reconciler) reconcileCadvisorServiceAccount(ctx context.Context, sg *config.Sourcegraph, owner client.Object) error { + cfg := sg.Spec.Cadvisor + sa := serviceaccount.NewServiceAccount("cadvisor", sg.Namespace, cfg) + return reconcileObject(ctx, r, cfg, &sa, &corev1.ServiceAccount{}, sg, owner) +} diff --git a/internal/appliance/reconciler/cadvisor_test.go b/internal/appliance/reconciler/cadvisor_test.go new file mode 100644 index 00000000000..aaa82f5026e --- /dev/null +++ b/internal/appliance/reconciler/cadvisor_test.go @@ -0,0 +1,22 @@ +package reconciler + +import "time" + +func (suite *ApplianceTestSuite) TestDeployCadvisor() { + for _, tc := range []struct { + name string + }{ + {name: "cadvisor/default"}, + } { + suite.Run(tc.name, func() { + namespace := suite.createConfigMap(tc.name) + + // Wait for reconciliation to be finished. + suite.Require().Eventually(func() bool { + return suite.getConfigMapReconcileEventCount(namespace) > 0 + }, time.Second*10, time.Millisecond*200) + + suite.makeGoldenAssertions(namespace, tc.name) + }) + } +} diff --git a/internal/appliance/reconciler/golden_test.go b/internal/appliance/reconciler/golden_test.go index 205c4b4d011..16a4784bd90 100644 --- a/internal/appliance/reconciler/golden_test.go +++ b/internal/appliance/reconciler/golden_test.go @@ -74,6 +74,14 @@ func (suite *ApplianceTestSuite) gatherResources(namespace string) []client.Obje normalizeObj(&obj) objs = append(objs, &obj) } + daemonsets, err := suite.k8sClient.AppsV1().DaemonSets(namespace).List(suite.ctx, metav1.ListOptions{}) + suite.Require().NoError(err) + for _, obj := range daemonsets.Items { + obj := obj + obj.SetGroupVersionKind(schema.GroupVersionKind{Group: "apps", Version: "v1", Kind: "DaemonSet"}) + normalizeObj(&obj) + objs = append(objs, &obj) + } ssets, err := suite.k8sClient.AppsV1().StatefulSets(namespace).List(suite.ctx, metav1.ListOptions{}) suite.Require().NoError(err) for _, obj := range ssets.Items { diff --git a/internal/appliance/reconciler/reconcile.go b/internal/appliance/reconciler/reconcile.go index cf916208268..ffd03511ddf 100644 --- a/internal/appliance/reconciler/reconcile.go +++ b/internal/appliance/reconciler/reconcile.go @@ -102,6 +102,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu if err := r.reconcilePrometheus(ctx, &sourcegraph, &applianceSpec); err != nil { return ctrl.Result{}, errors.Newf("failed to reconcile prometheus: %w", err) } + if err := r.reconcileCadvisor(ctx, &sourcegraph, &applianceSpec); err != nil { + return ctrl.Result{}, errors.Newf("failed to reconcile cadvisor: %w", err) + } // Set the current version annotation in case migration logic depends on it. applianceSpec.Annotations[config.AnnotationKeyCurrentVersion] = sourcegraph.Spec.RequestedVersion @@ -123,6 +126,7 @@ func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error { WithEventFilter(applianceAnnotationPredicate). For(&corev1.ConfigMap{}). Owns(&appsv1.Deployment{}). + Owns(&appsv1.DaemonSet{}). Owns(&appsv1.StatefulSet{}). Owns(&corev1.ConfigMap{}). Owns(&corev1.PersistentVolumeClaim{}). diff --git a/internal/appliance/reconciler/testdata/golden-fixtures/cadvisor/default.yaml b/internal/appliance/reconciler/testdata/golden-fixtures/cadvisor/default.yaml new file mode 100644 index 00000000000..574d684dd7c --- /dev/null +++ b/internal/appliance/reconciler/testdata/golden-fixtures/cadvisor/default.yaml @@ -0,0 +1,217 @@ +resources: + - apiVersion: apps/v1 + kind: DaemonSet + metadata: + annotations: + appliance.sourcegraph.com/configHash: 6ad4c69be1e1c5d02dbd71d1f427fc7d197bf3d18c4acb8d53092dc659455920 + deprecated.daemonset.template.generation: "1" + creationTimestamp: "2024-04-19T00:00:00Z" + generation: 1 + labels: + app.kubernetes.io/component: cadvisor + app.kubernetes.io/name: sourcegraph + app.kubernetes.io/version: 5.3.9104 + deploy: sourcegraph + name: cadvisor + namespace: NORMALIZED_FOR_TESTING + ownerReferences: + - apiVersion: v1 + blockOwnerDeletion: true + controller: true + kind: ConfigMap + name: sg + uid: NORMALIZED_FOR_TESTING + resourceVersion: NORMALIZED_FOR_TESTING + uid: NORMALIZED_FOR_TESTING + spec: + minReadySeconds: 10 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: cadvisor + template: + metadata: + annotations: + prometheus.io/port: "48080" + sourcegraph.prometheus/scrape: "true" + creationTimestamp: null + labels: + app: cadvisor + deploy: sourcegraph + name: cadvisor + spec: + automountServiceAccountToken: false + containers: + - args: + - --store_container_labels=false + - --whitelisted_container_labels=io.kubernetes.container.name,io.kubernetes.pod.name,io.kubernetes.pod.namespace,io.kubernetes.pod.uid + image: index.docker.io/sourcegraph/cadvisor:5.3.2@sha256:3860cce1f7ef0278c0d785f66baf69dd2bece19610a2fd6eaa54c03095f2f105 + imagePullPolicy: IfNotPresent + name: cadvisor + ports: + - containerPort: 48080 + name: http + protocol: TCP + resources: + limits: + cpu: 300m + memory: 2000Mi + requests: + cpu: 150m + memory: 200Mi + securityContext: + privileged: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /rootfs + name: rootfs + readOnly: true + - mountPath: /var/run + name: var-run + readOnly: true + - mountPath: /sys + name: sys + readOnly: true + - mountPath: /var/lib/docker + name: docker + readOnly: true + - mountPath: /dev/disk + name: disk + readOnly: true + - mountPath: /dev/kmsg + name: kmsg + readOnly: true + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + serviceAccount: cadvisor + serviceAccountName: cadvisor + terminationGracePeriodSeconds: 30 + volumes: + - hostPath: + path: / + type: "" + name: rootfs + - hostPath: + path: /var/run + type: "" + name: var-run + - hostPath: + path: /sys + type: "" + name: sys + - hostPath: + path: /var/lib/docker + type: "" + name: docker + - hostPath: + path: /dev/disk + type: "" + name: disk + - hostPath: + path: /dev/kmsg + type: "" + name: kmsg + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate + status: + currentNumberScheduled: 0 + desiredNumberScheduled: 0 + numberMisscheduled: 0 + numberReady: 0 + - apiVersion: v1 + data: + spec: | + spec: + requestedVersion: "5.3.9104" + + blobstore: + disabled: true + + codeInsights: + disabled: true + + codeIntel: + disabled: true + + frontend: + disabled: true + + gitServer: + disabled: true + + indexedSearch: + disabled: true + + indexedSearchIndexer: + disabled: true + + pgsql: + disabled: true + + postgresExporter: + disabled: true + + preciseCodeIntel: + disabled: true + + redisCache: + disabled: true + + redisStore: + disabled: true + + repoUpdater: + disabled: true + + searcher: + disabled: true + + symbols: + disabled: true + + syntectServer: + disabled: true + + worker: + disabled: true + + prometheus: + disabled: true + + cadvisor: + disabled: false + kind: ConfigMap + metadata: + annotations: + appliance.sourcegraph.com/currentVersion: 5.3.9104 + appliance.sourcegraph.com/managed: "true" + creationTimestamp: "2024-04-19T00:00:00Z" + name: sg + namespace: NORMALIZED_FOR_TESTING + resourceVersion: NORMALIZED_FOR_TESTING + uid: NORMALIZED_FOR_TESTING + - apiVersion: v1 + kind: ServiceAccount + metadata: + annotations: + appliance.sourcegraph.com/configHash: 6ad4c69be1e1c5d02dbd71d1f427fc7d197bf3d18c4acb8d53092dc659455920 + creationTimestamp: "2024-04-19T00:00:00Z" + labels: + deploy: sourcegraph + name: cadvisor + namespace: NORMALIZED_FOR_TESTING + ownerReferences: + - apiVersion: v1 + blockOwnerDeletion: true + controller: true + kind: ConfigMap + name: sg + uid: NORMALIZED_FOR_TESTING + resourceVersion: NORMALIZED_FOR_TESTING + uid: NORMALIZED_FOR_TESTING diff --git a/internal/appliance/reconciler/testdata/sg/cadvisor/default.yaml b/internal/appliance/reconciler/testdata/sg/cadvisor/default.yaml new file mode 100644 index 00000000000..1d4eead5639 --- /dev/null +++ b/internal/appliance/reconciler/testdata/sg/cadvisor/default.yaml @@ -0,0 +1,59 @@ +spec: + requestedVersion: "5.3.9104" + + blobstore: + disabled: true + + codeInsights: + disabled: true + + codeIntel: + disabled: true + + frontend: + disabled: true + + gitServer: + disabled: true + + indexedSearch: + disabled: true + + indexedSearchIndexer: + disabled: true + + pgsql: + disabled: true + + postgresExporter: + disabled: true + + preciseCodeIntel: + disabled: true + + redisCache: + disabled: true + + redisStore: + disabled: true + + repoUpdater: + disabled: true + + searcher: + disabled: true + + symbols: + disabled: true + + syntectServer: + disabled: true + + worker: + disabled: true + + prometheus: + disabled: true + + cadvisor: + disabled: false diff --git a/internal/k8s/resource/daemonset/BUILD.bazel b/internal/k8s/resource/daemonset/BUILD.bazel new file mode 100644 index 00000000000..3f9f4153262 --- /dev/null +++ b/internal/k8s/resource/daemonset/BUILD.bazel @@ -0,0 +1,13 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "daemonset", + srcs = ["daemonset.go"], + importpath = "github.com/sourcegraph/sourcegraph/internal/k8s/resource/daemonset", + visibility = ["//:__subpackages__"], + deps = [ + "//lib/pointers", + "@io_k8s_api//apps/v1:apps", + "@io_k8s_apimachinery//pkg/apis/meta/v1:meta", + ], +) diff --git a/internal/k8s/resource/daemonset/daemonset.go b/internal/k8s/resource/daemonset/daemonset.go new file mode 100644 index 00000000000..db15b3bbc67 --- /dev/null +++ b/internal/k8s/resource/daemonset/daemonset.go @@ -0,0 +1,32 @@ +package daemonset + +import ( + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/sourcegraph/sourcegraph/lib/pointers" +) + +func New(name, namespace, version string) appsv1.DaemonSet { + return appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{ + "app.kubernetes.io/component": name, + "app.kubernetes.io/name": "sourcegraph", + "app.kubernetes.io/version": version, + "deploy": "sourcegraph", + }, + }, + Spec: appsv1.DaemonSetSpec{ + MinReadySeconds: int32(10), + RevisionHistoryLimit: pointers.Ptr[int32](10), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": name, + }, + }, + }, + } +} diff --git a/internal/k8s/resource/pod/pod.go b/internal/k8s/resource/pod/pod.go index db6f6344fdf..7157d8fd94d 100644 --- a/internal/k8s/resource/pod/pod.go +++ b/internal/k8s/resource/pod/pod.go @@ -68,6 +68,17 @@ func NewVolumeFromConfigMap(name, configMapName string) corev1.Volume { } } +func NewVolumeHostPath(name, path string) corev1.Volume { + return corev1.Volume{ + Name: name, + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: path, + }, + }, + } +} + func NewVolumeEmptyDir(name string) corev1.Volume { return corev1.Volume{ Name: name,