feat(appliance): add wait for admin state (#64042)

Add basic health check to switch state to `waitForAdmin` after
Sourcegraph frontend is ready. As noted in the code, this is a temporary
health check and will/should be replaced with something more
comprehensive in the near future.

Wait for admin page successfully appears when Sourcegraph frontend is
"ready":

Co-authored-by: Jacob Pleiness <jdpleiness@users.noreply.github.com>
Co-authored-by: Craig Furman <craig.furman@sourcegraph.com>
This commit is contained in:
Jacob Pleiness 2024-07-29 06:21:55 -04:00 committed by GitHub
parent e02bd3d025
commit 7e82c27ab5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 118 additions and 81 deletions

View File

@ -147,7 +147,7 @@ func Start(ctx context.Context, observationCtx *observation.Context, ready servi
return nil
})
g.Go(func() error {
if err := healthChecker.ManageIngressFacingService(ctx, beginHealthCheckLoop, "app=sourcegraph-frontend"); err != nil {
if err := healthChecker.ManageIngressFacingService(ctx, beginHealthCheckLoop, "app=sourcegraph-frontend", config.namespace); err != nil {
logger.Error("problem running HealthChecker", log.Error(err))
return err
}

View File

@ -5,6 +5,7 @@ import (
"dario.cat/mergo"
"golang.org/x/crypto/bcrypt"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
@ -40,7 +41,6 @@ type Appliance struct {
const (
// Secret and key names
dataSecretName = "appliance-data"
dataSecretJWTSigningKeyKey = "jwt-signing-key"
dataSecretEncryptedPasswordKey = "encrypted-admin-password"
initialPasswordSecretName = "appliance-password"
initialPasswordSecretPasswordKey = "password"
@ -153,27 +153,29 @@ func (a *Appliance) reconcileConfigMap(ctx context.Context, configMap *corev1.Co
return errors.Wrap(err, "failed to marshal configmap yaml")
}
existingCfgMap.Name = config.ConfigmapName
existingCfgMap.Namespace = a.namespace
cfgMap := &corev1.ConfigMap{}
cfgMap.Name = config.ConfigmapName
cfgMap.Namespace = a.namespace
existingCfgMap.Labels = map[string]string{
cfgMap.Labels = map[string]string{
"deploy": "sourcegraph",
}
existingCfgMap.Annotations = map[string]string{
cfgMap.Annotations = map[string]string{
// required annotation for our controller filter.
config.AnnotationKeyManaged: "true",
config.AnnotationKeyStatus: string(config.StatusUnknown),
config.AnnotationConditions: "",
}
if configMap.ObjectMeta.Annotations != nil {
existingCfgMap.ObjectMeta.Annotations = configMap.ObjectMeta.Annotations
cfgMap.ObjectMeta.Annotations = configMap.ObjectMeta.Annotations
}
existingCfgMap.Immutable = pointers.Ptr(false)
existingCfgMap.Data = map[string]string{"spec": string(spec)}
cfgMap.Immutable = pointers.Ptr(false)
cfgMap.Data = map[string]string{"spec": string(spec)}
return a.client.Create(ctx, existingCfgMap)
return a.client.Create(ctx, cfgMap)
}
return errors.Wrap(err, "getting configmap")
@ -186,3 +188,49 @@ func (a *Appliance) reconcileConfigMap(ctx context.Context, configMap *corev1.Co
return a.client.Update(ctx, existingCfgMap)
}
// isSourcegraphFrontendReady is a "health check" that is used to be able to know when our backing sourcegraph
// deployment is ready. This is a "quick and dirty" function and should be replaced with a more comprehensive
// health check in the very near future.
func (a *Appliance) isSourcegraphFrontendReady(ctx context.Context) (bool, error) {
frontendDeploymentName := types.NamespacedName{Name: "sourcegraph-frontend", Namespace: a.namespace}
frontendDeployment := &appsv1.Deployment{}
if err := a.client.Get(ctx, frontendDeploymentName, frontendDeployment); err != nil {
// If the frontend deployment is not found, we can assume it's not ready
if apierrors.IsNotFound(err) {
return false, nil
}
return false, errors.Wrap(err, "fetching frontend deployment")
}
return IsObjectReady(frontendDeployment)
}
func (a *Appliance) getStatus(ctx context.Context) (config.Status, error) {
configMapName := types.NamespacedName{Name: config.ConfigmapName, Namespace: a.namespace}
configMap := &corev1.ConfigMap{}
if err := a.client.Get(ctx, configMapName, configMap); err != nil {
if apierrors.IsNotFound(err) {
return config.StatusUnknown, nil
}
return config.StatusUnknown, err
}
return config.Status(configMap.ObjectMeta.Annotations[config.AnnotationKeyStatus]), nil
}
func (a *Appliance) setStatus(ctx context.Context, status config.Status) error {
configMapName := types.NamespacedName{Name: config.ConfigmapName, Namespace: a.namespace}
configMap := &corev1.ConfigMap{}
if err := a.client.Get(ctx, configMapName, configMap); err != nil {
return err
}
configMap.Annotations[config.AnnotationKeyStatus] = string(status)
err := a.client.Update(ctx, configMap)
if err != nil {
return errors.Wrap(err, "failed set status")
}
return nil
}

View File

@ -22,14 +22,16 @@ const (
StatusUnknown Status = "unknown"
StatusInstall Status = "install"
StatusInstalling Status = "installing"
StatusIdle Status = "idle"
StatusUpgrading Status = "upgrading"
StatusWaitingForAdmin Status = "wait-for-admin"
StatusRefresh Status = "refresh"
StatusMaintenance Status = "maintenance"
)
// TODO think about this
func IsPostInstallStatus(status Status) bool {
return status == StatusRefresh
switch status {
case StatusUnknown, StatusInstall, StatusInstalling, StatusWaitingForAdmin:
return false
}
return true
}

View File

@ -34,7 +34,7 @@ export const OperatorStatus: React.FC<ContextProps> = ({ context }) => {
switch (context.stage) {
case 'refresh':
document.location = '/?cacheBust=' + Date.now()
document.location.reload()
break
}

View File

@ -2,13 +2,11 @@ import React, { useEffect, useState } from 'react'
import { Button, CircularProgress, Stack, Typography } from '@mui/material'
import { changeStage } from './state'
import { changeStage } from './state.ts'
const TestAdminUIGoodMs = 1000
const WaitBeforeLaunchMs = 3000
const WaitBeforeLaunchMs = 3 * 1000
export const WaitForAdmin: React.FC = () => {
const [waitingForBalancer, setWaitingForBalancer] = useState<boolean>(false)
const [launching, setLaunching] = useState<boolean>(false)
useEffect(() => {
@ -20,24 +18,9 @@ export const WaitForAdmin: React.FC = () => {
}
}, [launching])
useEffect(() => {
const timer = setInterval(() => {
fetch('/sign-in')
.then(result => {
console.log('waiting for admin ui', result)
if (result.ok) {
setLaunching(true)
setWaitingForBalancer(false)
}
})
.catch(console.error)
}, TestAdminUIGoodMs)
return () => clearInterval(timer)
}, [waitingForBalancer])
return (
<div className="wait-for-admin">
<Typography variant="h5">Waiting For The Admin To Return</Typography>
<Typography variant="h4">Waiting For The Admin To Return</Typography>
<div>
<Typography sx={{ m: 2 }}>
The appliance is ready. We were waiting for you to set its security before opening it up.
@ -46,11 +29,7 @@ export const WaitForAdmin: React.FC = () => {
Now that you're back, please press the button below to launch the Administration UI.
</Typography>
</div>
<Button
variant="contained"
onClick={() => setWaitingForBalancer(true)}
disabled={launching || waitingForBalancer}
>
<Button variant="contained" onClick={() => setLaunching(true)} disabled={launching}>
Launch Admin UI
</Button>
{launching && (
@ -59,12 +38,6 @@ export const WaitForAdmin: React.FC = () => {
<Typography variant="h5">Launching Admin UI... Please wait...</Typography>
</Stack>
)}
{waitingForBalancer && (
<Stack direction="row" spacing={2}>
<CircularProgress size={32} />
<Typography variant="h5">Waiting for Admin UI to be ready... Please wait...</Typography>
</Stack>
)}
</div>
)
}

View File

@ -8,11 +8,12 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"github.com/sourcegraph/log"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
type Probe interface {
CheckPods(ctx context.Context, labelSelector string) error
CheckPods(ctx context.Context, labelSelector, namespace string) error
}
type HealthChecker struct {
@ -25,12 +26,12 @@ type HealthChecker struct {
Graceperiod time.Duration
}
// Waits for the begin channel to close, then periodically monitors the frontend
// ManageIngressFacingService waits for the `begin` channel to close, then periodically monitors the frontend
// service (the ingress-facing service). When there is at least one ready
// frontend pod, it ensures that the service points at the frontend pods. When
// there are no ready pods, it ensures that the service points to the appliance,
// so that the admin can log in and view maintenance status.
func (h *HealthChecker) ManageIngressFacingService(ctx context.Context, begin <-chan struct{}, labelSelector string) error {
func (h *HealthChecker) ManageIngressFacingService(ctx context.Context, begin <-chan struct{}, labelSelector, namespace string) error {
h.Logger.Info("waiting for signal to begin managing ingress-facing service for the appliance")
select {
case <-begin:
@ -47,13 +48,13 @@ func (h *HealthChecker) ManageIngressFacingService(ctx context.Context, begin <-
defer ticker.Stop()
// Do one iteration without having to wait for the first tick
if err := h.maybeFlipServiceOnce(ctx, labelSelector); err != nil {
if err := h.maybeFlipServiceOnce(ctx, labelSelector, namespace); err != nil {
return err
}
for {
select {
case <-ticker.C:
if err := h.maybeFlipServiceOnce(ctx, labelSelector); err != nil {
if err := h.maybeFlipServiceOnce(ctx, labelSelector, namespace); err != nil {
return err
}
@ -64,14 +65,14 @@ func (h *HealthChecker) ManageIngressFacingService(ctx context.Context, begin <-
}
}
func (h *HealthChecker) maybeFlipServiceOnce(ctx context.Context, labelSelector string) error {
func (h *HealthChecker) maybeFlipServiceOnce(ctx context.Context, labelSelector, namespace string) error {
h.Logger.Info("checking deployment health")
if err := h.Probe.CheckPods(ctx, labelSelector); err != nil {
if err := h.Probe.CheckPods(ctx, labelSelector, namespace); err != nil {
h.Logger.Error("found unhealthy state, waiting for the grace period", log.Error(err), log.String("gracePeriod", h.Graceperiod.String()))
time.Sleep(h.Graceperiod)
if err := h.Probe.CheckPods(ctx, labelSelector); err != nil {
if err := h.Probe.CheckPods(ctx, labelSelector, namespace); err != nil {
h.Logger.Error("found unhealthy state, setting service selector to appliance", log.Error(err))
return h.setServiceSelector(ctx, "sourcegraph-appliance")
return h.setServiceSelector(ctx, "sourcegraph-appliance-frontend")
}
}

View File

@ -85,11 +85,11 @@ func TestManageIngressFacingService(t *testing.T) {
{Name: "http", Port: 30080, TargetPort: intstr.FromString("http")},
}
svc.Spec.Selector = map[string]string{
"app": "sourcegraph-appliance",
"app": "sourcegraph-appliance-frontend",
}
err = k8sClient.Create(ctx, &svc)
require.NoError(t, err)
runHealthCheckAndAssertSelector(t, checker, serviceName, "sourcegraph-appliance")
runHealthCheckAndAssertSelector(t, checker, serviceName, ns.GetName(), "sourcegraph-appliance-frontend")
// Simulate some frontend pods existing but with no readiness conditions.
pod1 := mkPod("pod1", ns.GetName())
@ -98,7 +98,7 @@ func TestManageIngressFacingService(t *testing.T) {
pod2 := mkPod("pod2", ns.GetName())
err = k8sClient.Create(ctx, pod2)
require.NoError(t, err)
runHealthCheckAndAssertSelector(t, checker, serviceName, "sourcegraph-appliance")
runHealthCheckAndAssertSelector(t, checker, serviceName, ns.GetName(), "sourcegraph-appliance-frontend")
// Simulate one pod becoming ready to receive traffic
pod1.Status.Conditions = []corev1.PodCondition{
@ -117,10 +117,10 @@ func TestManageIngressFacingService(t *testing.T) {
}
err = k8sClient.Status().Update(ctx, pod2)
require.NoError(t, err)
runHealthCheckAndAssertSelector(t, checker, serviceName, "sourcegraph-frontend")
runHealthCheckAndAssertSelector(t, checker, serviceName, ns.GetName(), "sourcegraph-frontend")
// test idempotency of the monitor
runHealthCheckAndAssertSelector(t, checker, serviceName, "sourcegraph-frontend")
runHealthCheckAndAssertSelector(t, checker, serviceName, ns.GetName(), "sourcegraph-frontend")
// Simulate pods becoming unready
pod1.Status.Conditions = []corev1.PodCondition{
@ -131,11 +131,11 @@ func TestManageIngressFacingService(t *testing.T) {
}
err = k8sClient.Status().Update(ctx, pod1)
require.NoError(t, err)
runHealthCheckAndAssertSelector(t, checker, serviceName, "sourcegraph-appliance")
runHealthCheckAndAssertSelector(t, checker, serviceName, ns.GetName(), "sourcegraph-appliance-frontend")
}
func runHealthCheckAndAssertSelector(t *testing.T, checker *HealthChecker, serviceName types.NamespacedName, expectedSelectorValue string) {
err := checker.maybeFlipServiceOnce(ctx, "app=sourcegraph-frontend")
func runHealthCheckAndAssertSelector(t *testing.T, checker *HealthChecker, serviceName types.NamespacedName, namespace, expectedSelectorValue string) {
err := checker.maybeFlipServiceOnce(ctx, "app=sourcegraph-frontend", namespace)
require.NoError(t, err)
var svc corev1.Service

View File

@ -14,13 +14,13 @@ type PodProbe struct {
K8sClient client.Client
}
func (p *PodProbe) CheckPods(ctx context.Context, labelSelector string) error {
func (p *PodProbe) CheckPods(ctx context.Context, labelSelector, namespace string) error {
var pods corev1.PodList
selector, err := labels.Parse(labelSelector)
if err != nil {
return errors.Wrap(err, "parsing label selector")
}
if err := p.K8sClient.List(ctx, &pods, &client.ListOptions{LabelSelector: selector}); err != nil {
if err := p.K8sClient.List(ctx, &pods, &client.ListOptions{LabelSelector: selector, Namespace: namespace}); err != nil {
return errors.Wrap(err, "listing pods")
}
for _, pod := range pods.Items {

View File

@ -8,8 +8,10 @@ import (
"strings"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/sourcegraph/log"
"github.com/sourcegraph/sourcegraph/internal/appliance/config"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
@ -124,6 +126,16 @@ func (a *Appliance) getInstallProgressJSONHandler() http.Handler {
Tasks: currentTasks,
}
ok, err := a.isSourcegraphFrontendReady(r.Context())
if err != nil {
a.logger.Error("failed to get sourcegraph frontend status")
return
}
if ok {
a.status = config.StatusWaitingForAdmin
}
if err := a.writeJSON(w, http.StatusOK, responseData{"progress": installProgress}, http.Header{}); err != nil {
a.serverErrorResponse(w, r, err)
}
@ -166,7 +178,17 @@ func (a *Appliance) postStatusJSONHandler() http.Handler {
return
}
newStatus := config.Status(input.State)
a.logger.Info("state transition", log.String("state", string(newStatus)))
a.sourcegraph.Spec.RequestedVersion = input.Data
if err := a.setStatus(r.Context(), newStatus); err != nil {
if kerrors.IsNotFound(err) {
a.logger.Info("no configmap found, will not set status")
} else {
a.serverErrorResponse(w, r, err)
return
}
}
//TODO(jdpleiness) check form for value if this should be set or not
a.sourcegraph.SetLocalDevMode()
@ -182,6 +204,6 @@ func (a *Appliance) postStatusJSONHandler() http.Handler {
a.serverErrorResponse(w, r, err)
}
a.status = config.StatusInstalling
a.status = newStatus
})
}

View File

@ -2,6 +2,7 @@ package reconciler
import (
"context"
"sync"
appsv1 "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/types"
@ -24,6 +25,7 @@ import (
var _ reconcile.Reconciler = &Reconciler{}
type Reconciler struct {
sync.Mutex
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
@ -31,6 +33,9 @@ type Reconciler struct {
}
func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
r.Mutex.Lock()
defer r.Mutex.Unlock()
reqLog := log.FromContext(ctx)
reqLog.Info("reconciling sourcegraph appliance")
@ -52,8 +57,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
defer r.Recorder.Event(&applianceSpec, "Normal", "ReconcileFinished", "Reconcile finished.")
status := applianceSpec.GetAnnotations()[config.AnnotationKeyStatus]
if config.IsPostInstallStatus(config.Status(status)) {
if r.BeginHealthCheckLoop != nil && config.IsPostInstallStatus(config.Status(status)) {
close(r.BeginHealthCheckLoop)
r.BeginHealthCheckLoop = nil
}
// TODO place holder code until we get the configmap spec'd out and working'

View File

@ -21,16 +21,8 @@ type Task struct {
LastUpdate time.Time `json:"lastUpdate"`
}
// TODO this needs to be created on install init and statefully stored somewhere
func installTasks() []Task {
return []Task{
{
Title: "Warming up",
Description: "Setting up basic resources",
Started: true,
Finished: false,
Weight: 1,
},
{
Title: "Setup",
Description: "Setting up Sourcegraph Search",
@ -38,13 +30,6 @@ func installTasks() []Task {
Finished: false,
Weight: 25,
},
{
Title: "Start",
Description: "Start Sourcegraph",
Started: false,
Finished: false,
Weight: 1,
},
}
}