Kubernetes Interview Questions

CRDs and Operator Pattern

questions
Scroll to track progress

Design a Postgres operator that handles: failover (if primary node fails, promote replica), backup (hourly snapshots to S3), and scaling (add read replicas on demand). Outline the CRD structure, controller logic, and how you'd handle the primary node failure scenario.

CRD Design

Step 1: Define the Postgres CRD with spec for desired state and status for observed state:

apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: postgresinstances.db.example.com spec: group: db.example.com names: kind: PostgresInstance plural: postgresinstances scope: Namespaced versions: - name: v1 served: true storage: true schema: openAPIV3Schema: type: object properties: spec: type: object properties: version: type: string default: "14" primaryReplicas: type: integer default: 1 readReplicas: type: integer default: 2 backup: type: object properties: schedule: type: string default: "0 * * * *" # hourly retentionDays: type: integer default: 30 s3Bucket: type: string failoverTimeout: type: string default: "300s" status: type: object properties: phase: type: string enum: ["Pending", "Creating", "Ready", "Failing", "Failed"] primaryPod: type: string readReplicaPods: type: array items: type: string lastBackup: type: string lastFailover: type: string conditions: type: array items: type: object properties: type: type: string status: type: string message: type: string

Controller Logic

Step 2: Implement reconciliation for each responsibility:

type PostgresReconciler struct { client.Client s3Client S3Interface k8sClient kubernetes.Interface }

func (r *PostgresReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { pg := &dbv1.PostgresInstance{} r.Get(ctx, req.NamespacedName, pg)

// Reconciliation phases

// 1. CLUSTER CREATION if pg.Status.Phase == "" { r.createPostgresCluster(ctx, pg) pg.Status.Phase = "Creating" r.Status().Update(ctx, pg) return ctrl.Result{RequeueAfter: 10*time.Second}, nil }

// 2. PRIMARY NODE HEALTH CHECK if err := r.checkPrimaryHealth(ctx, pg); err != nil { // Primary is dead, initiate failover r.initiateFailover(ctx, pg) pg.Status.Phase = "Failing" r.Status().Update(ctx, pg) return ctrl.Result{RequeueAfter: 5*time.Second}, nil }

// 3. REPLICA SCALING if err := r.reconcileReplicaCount(ctx, pg); err != nil { return ctrl.Result{}, err }

// 4. BACKUP if time.Since(pg.Status.LastBackup.Time) > 1*time.Hour { r.performBackup(ctx, pg) pg.Status.LastBackup = &metav1.Time{Time: time.Now()} }

// 5. FINALIZATION if pg.DeletionTimestamp != nil { r.performCleanup(ctx, pg) controllerutil.RemoveFinalizer(pg, "postgres.example.com/finalizer") }

pg.Status.Phase = "Ready" r.Status().Update(ctx, pg)

return ctrl.Result{RequeueAfter: 30*time.Second}, nil }

Failover Handling

Step 3: Detect and execute failover when primary is unavailable:

func (r *PostgresReconciler) checkPrimaryHealth(ctx context.Context, pg *dbv1.PostgresInstance) error { primaryPod := &corev1.Pod{} err := r.Get(ctx, types.NamespacedName{ Name: pg.Status.PrimaryPod, Namespace: pg.Namespace, }, primaryPod)

if err != nil && errors.IsNotFound(err) { // Primary pod is gone return fmt.Errorf("primary pod not found") }

// Health check: can we connect to primary? if !r.canConnectToPostgres(ctx, primaryPod) { return fmt.Errorf("cannot connect to primary") }

return nil }

func (r *PostgresReconciler) initiateFailover(ctx context.Context, pg *dbv1.PostgresInstance) error { // 1. Choose best replica to promote replicas := pg.Status.ReadReplicaPods if len(replicas) == 0 { return fmt.Errorf("no replicas available for failover") }

newPrimary := r.selectBestReplica(ctx, replicas)

// 2. Promote replica: stop replication, make it writable cmd := fmt.Sprintf("kubectl exec %s -n %s – psql -U postgres -c 'SELECT pg_promote();'", newPrimary, pg.Namespace) if err := exec.CommandContext(ctx, "sh", "-c", cmd).Run(); err != nil { return fmt.Errorf("failed to promote replica: %v", err) }

// 3. Update CRD status pg.Status.PrimaryPod = newPrimary pg.Status.LastFailover = &metav1.Time{Time: time.Now()}

// 4. Create new replica to replace old primary (or reuse old primary if it recovers) r.createNewReplica(ctx, pg)

// 5. Emit event r.Recorder.Event(pg, corev1.EventTypeWarning, "FailoverExecuted", fmt.Sprintf("Promoted replica %s to primary", newPrimary))

return nil }

Backup Implementation

Step 4: Scheduled backups to S3:

func (r *PostgresReconciler) performBackup(ctx context.Context, pg *dbv1.PostgresInstance) error { // 1. Create backup from primary using pg_dump backupFile := fmt.Sprintf("/tmp/postgres-backup-%s-%d.sql", pg.Name, time.Now().Unix())

cmd := fmt.Sprintf("kubectl exec %s -n %s – pg_dump -U postgres -d postgres > %s", pg.Status.PrimaryPod, pg.Namespace, backupFile) if err := exec.CommandContext(ctx, "sh", "-c", cmd).Run(); err != nil { return fmt.Errorf("backup failed: %v", err) }

// 2. Upload to S3 file, err := os.Open(backupFile) if err != nil { return err } defer file.Close()

uploadPath := fmt.Sprintf("backups/%s/%s-%d.sql.gz", pg.Namespace, pg.Name, time.Now().Unix()) if err := r.s3Client.PutObject(ctx, &s3.PutObjectInput{ Bucket: aws.String(pg.Spec.Backup.S3Bucket), Key: aws.String(uploadPath), Body: file, }); err != nil { return fmt.Errorf("failed to upload backup to S3: %v", err) }

// 3. Cleanup old backups (retention policy) r.cleanupOldBackups(ctx, pg)

return nil }

Key Design Decisions

- CRD spec is declarative (desired state), status is observational (actual state)

- Failover is automatic but recorded in status.lastFailover for auditing

- Backups run on schedule via reconciliation loop (not external CronJob)

- Replicas are managed as child resources (owned by PostgresInstance, deleted when parent deleted)

Follow-up: How would you handle network partitioning where primary and replicas can't communicate but both think they're healthy? Design a quorum-based failover.

Your operator creates a StatefulSet for the Postgres cluster. During failover, you delete the old primary pod and promote a replica. But the StatefulSet controller immediately recreates the deleted pod, undoing your failover. How do you prevent this?

StatefulSet ownership conflict: StatefulSet wants to maintain replicas, but your operator needs surgical control during failover. Solve this:

1. Don't use StatefulSet for Postgres pods. Use Deployment instead (gives you more control):

// BEFORE: StatefulSet apiVersion: apps/v1 kind: StatefulSet metadata: name: postgres-cluster spec: serviceName: postgres replicas: 3 selector: matchLabels: app: postgres template: metadata: labels: app: postgres spec: containers: - name: postgres image: postgres:14

// AFTER: Deployment with pod affinity to spread replicas apiVersion: apps/v1 kind: Deployment metadata: name: postgres-cluster spec: replicas: 3 selector: matchLabels: app: postgres template: metadata: labels: app: postgres spec: affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 100 podAffinityTerm: labelSelector: matchExpressions: - key: app operator: In values: - postgres topologyKey: kubernetes.io/hostname containers: - name: postgres image: postgres:14

2. With Deployment, you have control. During failover, delete the old primary pod and immediately patch the Deployment to remove that pod from the selector:

// During failover: // 1. Promote replica newPrimary := replicas[0] r.promoteReplica(ctx, newPrimary)

// 2. Delete old primary pod oldPrimary := pg.Status.PrimaryPod kubectl.Delete(ctx, oldPrimary)

// 3. Update CRD to track that old primary is gone pg.Status.PrimaryPod = newPrimary pg.Status.DeadPods = append(pg.Status.DeadPods, oldPrimary)

// 4. Optional: patch Deployment to reduce replicas temporarily // so it doesn’t immediately respawn the old primary deployment := &appsv1.Deployment{} r.Get(ctx, types.NamespacedName{Name: "postgres-cluster"}, deployment) // We can modify replica count, labels, etc. to prevent respawning

3. Alternative: Use Deployment with a custom init container that checks the pod’s role before starting:

// Postgres pod init container // Checks: "Is this pod supposed to be primary or replica?" // If it thinks it should be primary but CRD says replica, exit and wait

initContainers:

  • name: postgres-role-check image: postgres-role-checker:1.0 env:
    • name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name
    • name: POSTGRES_INSTANCE value: postgres-cluster command:
    • /bin/sh
    • -c
    • |

      Check if this pod should be running

      EXPECTED_ROLE=$(kubectl get PostgresInstance $POSTGRES_INSTANCE -o jsonpath='{.status.primaryPod}') if [ "$POD_NAME" != "$EXPECTED_ROLE" ]; then

      I’m not the primary, wait for replica setup

      sleep 1000 # Sleep forever, pod will be terminated fi

      4. Best practice: Use an Operator framework (Kubebuilder, Operator SDK) that handles this complexity:

      // With kubebuilder:

// Define owner references so your PostgresInstance owns the Deployment // When PostgresInstance is deleted, Deployment is auto-deleted // You control Deployment replicas and labels programmatically

ctrl.NewControllerManagedBy(mgr). For(&dbv1.PostgresInstance{}). Owns(&appsv1.Deployment{}). Complete(&PostgresReconciler{})

// This ensures clean ownership and prevents conflicts

Follow-up: Design a test that simulates primary node failure and verifies failover completes without StatefulSet interference.

You want to support custom backup strategies: users can specify "hourly", "daily", "on-demand", or "never". Your operator must execute the right strategy. But operators can't schedule cron jobs well inside the reconciliation loop. Design a solution that supports user-defined backup policies without external cron infrastructure.

Backup scheduling inside operator: use the reconciliation loop + status tracking to decide when to backup:

1. Add backup policy to the CRD spec:

apiVersion: db.example.com/v1 kind: PostgresInstance metadata: name: my-postgres spec: backup: policy: "hourly" # or "daily", "on-demand", "never" schedule: "0 * * * *" # cron format for advanced users s3Bucket: "my-backups" retentionDays: 30 status: lastBackupTime: "2026-04-07T10:00:00Z" nextBackupTime: "2026-04-07T11:00:00Z" backupStatus: "Success"

2. In reconciliation, check if it's time to backup:

func (r *PostgresReconciler) shouldBackup(ctx context.Context, pg *dbv1.PostgresInstance) bool { policy := pg.Spec.Backup.Policy lastBackup := pg.Status.LastBackupTime now := time.Now()

switch policy { case "never": return false case "on-demand": // Check if user manually triggered backup if pg.Annotations["backup.example.com/trigger"] == "true" { delete(pg.Annotations, "backup.example.com/trigger") return true } return false case "hourly": if lastBackup == nil || now.Sub(lastBackup.Time) > 1time.Hour { return true } case "daily": if lastBackup == nil || now.Sub(lastBackup.Time) > 24time.Hour { return true } case "custom": // Parse cron schedule from spec.backup.schedule nextBackup := r.parseNextBackupTime(pg.Spec.Backup.Schedule, lastBackup) if now.After(nextBackup) { return true } } return false }

func (r *PostgresReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { pg := &dbv1.PostgresInstance{} r.Get(ctx, req.NamespacedName, pg)

// Check if it’s time to backup if r.shouldBackup(ctx, pg) { if err := r.performBackup(ctx, pg); err != nil { pg.Status.BackupStatus = "Failed" r.Status().Update(ctx, pg) return ctrl.Result{RequeueAfter: 5*time.Minute}, err } pg.Status.LastBackupTime = &metav1.Time{Time: time.Now()} pg.Status.BackupStatus = "Success" }

// Calculate next backup time for scheduling if pg.Spec.Backup.Policy != "never" { nextBackup := r.calculateNextBackupTime(pg) pg.Status.NextBackupTime = &metav1.Time{Time: nextBackup}

// Requeue at next backup time requeue := time.Until(nextBackup) r.Status().Update(ctx, pg) return ctrl.Result{RequeueAfter: requeue}, nil }

return ctrl.Result{RequeueAfter: 5*time.Minute}, nil }

3. Support on-demand backups via annotation:

// User manually triggers backup: kubectl annotate PostgresInstance my-postgres backup.example.com/trigger=true --overwrite

// Operator detects annotation and immediately backs up // After backup, annotation is cleared

4. Handle cron schedule parsing for advanced users:

import "github.com/robfig/cron/v3"

func (r *PostgresReconciler) calculateNextBackupTime(pg dbv1.PostgresInstance) time.Time { if pg.Spec.Backup.Policy == "custom" && pg.Spec.Backup.Schedule != "" { // Parse cron expression parser := cron.NewParser(cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow) schedule, err := parser.Parse(pg.Spec.Backup.Schedule) if err != nil { return time.Now().Add(1time.Hour) // Default fallback } return schedule.Next(time.Now()) }

switch pg.Spec.Backup.Policy { case "hourly": return time.Now().Add(1time.Hour) case "daily": return time.Now().Add(24time.Hour) } return time.Now().Add(1*time.Hour) }

5. This approach has advantages:

- No external cron infrastructure needed

- Flexible: users can change policy without redeploying operator

- On-demand backups via annotation

- Status shows exact next backup time

Follow-up: How would you implement a backup verification that ensures backups are restorable without restoring them?

You deployed your Postgres operator. Users create PostgresInstance CRDs, and backups start automatically. But after a month, you realize backups are silently failing in S3 permission errors, and no one noticed. The operator logs the error but keeps running. How do you make backup failures visible and actionable?

Silent failures are dangerous. Make failures visible via multiple channels:

1. Add conditions to the CRD status to surface issues:

apiVersion: db.example.com/v1 kind: PostgresInstance metadata: name: my-postgres status: conditions: - type: BackupHealthy status: "False" reason: "S3PermissionDenied" message: "Cannot write to S3 bucket: AccessDenied" lastTransitionTime: "2026-04-07T12:00:00Z" - type: PrimaryHealthy status: "True" - type: ReplicasReady status: "True"

2. In reconciliation, set conditions based on actual state:

func (r *PostgresReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { pg := &dbv1.PostgresInstance{} r.Get(ctx, req.NamespacedName, pg)

// Perform backup if r.shouldBackup(ctx, pg) { if err := r.performBackup(ctx, pg); err != nil { // Set condition to indicate backup failure meta.SetStatusCondition(&pg.Status.Conditions, metav1.Condition{ Type: "BackupHealthy", Status: "False", Reason: "BackupFailed", Message: fmt.Sprintf("Backup failed: %v", err), ObservedGeneration: pg.Generation, }) r.Status().Update(ctx, pg)

// Emit event so it shows in kubectl describe r.Recorder.Event(pg, corev1.EventTypeWarning, "BackupFailed", err.Error())

// Requeue sooner to retry return ctrl.Result{RequeueAfter: 5*time.Minute}, nil }

// Success: clear the condition meta.SetStatusCondition(&pg.Status.Conditions, metav1.Condition{ Type: "BackupHealthy", Status: "True", Reason: "BackupSucceeded", Message: "Backup completed successfully", ObservedGeneration: pg.Generation, }) }

r.Status().Update(ctx, pg) return ctrl.Result{RequeueAfter: 1*time.Hour}, nil }

3. Export metrics to monitoring (Prometheus):

import "github.com/prometheus/client_golang/prometheus"

var ( backupFailures = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "postgres_backup_failures_total", Help: "Total number of failed backups", }, []string{"instance", "reason"}, )

lastBackupTimestamp = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "postgres_last_backup_timestamp_seconds", Help: "Unix timestamp of last successful backup", }, []string{"instance"}, ) )

// In reconciliation: if err := r.performBackup(ctx, pg); err != nil { backupFailures.WithLabelValues(pg.Name, extractErrorReason(err)).Inc() return ctrl.Result{}, err } lastBackupTimestamp.WithLabelValues(pg.Name).Set(float64(time.Now().Unix()))

4. Add alerting rules to Prometheus:

apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: postgres-operator-alerts spec: groups:

  • name: postgres rules:
    • alert: PostgresBackupFailed expr: | increase(postgres_backup_failures_total[1h]) > 0 for: 1m annotations: summary: "Postgres backup failed for {{ $labels.instance }}" description: "Backup failure: {{ $labels.reason }}"

    • alert: PostgresBackupStale expr: | time() - postgres_last_backup_timestamp_seconds > 86400 # 24 hours for: 10m annotations: summary: "No backup for {{ $labels.instance }} in 24 hours"

    5. Add a kubectl plugin to surface issues easily:

    #!/bin/bash

kubectl-pg-status plugin

INSTANCE=$1 kubectl get PostgresInstance $INSTANCE -o json | jq '.status.conditions[] | select(.status=="False") | {type, reason, message}'

Usage:

kubectl pg-status my-postgres

Output:

{

"type": "BackupHealthy",

"reason": "S3PermissionDenied",

"message": "Cannot write to S3: AccessDenied"

}

6. Users can now see issues immediately:

kubectl describe PostgresInstance my-postgres # Shows conditions with reasons

kubectl logs -n postgres-system deployment/postgres-operator | grep -i "backup|error"

Shows detailed logs

Prometheus alerts notify on-call team

Best Practice: Every operator action should be auditable: conditions for state, events for history, metrics for monitoring, logs for debugging.

Follow-up: Design a dashboard that shows backup health, failover history, and replica status across all PostgresInstance objects in the cluster.

Your operator manages Postgres clusters. A user creates a PostgresInstance with 10 read replicas. The operator creates 10 pods successfully. But during scaling down from 10 to 3, the operator kills 7 pods, but the data on those pods' PVCs remains. After accidental deletion, the PVCs still consume 1TB. Design how your operator should handle PVC cleanup during scale-down.

Scale-down with PVC cleanup is dangerous. Must be explicit and auditable:

1. Distinguish between pod deletion and data deletion:

// In reconciliation, when scaling down:

// BEFORE: scaling = 10 // AFTER: scaling = 3 // Must delete 7 replicas and their data

// Step 1: Delete pods first (but keep PVCs) for i := 3; i < 10; i++ { pod := &corev1.Pod{} r.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("postgres-replica-%d", i)}, pod) r.Delete(ctx, pod) }

// Wait for pods to terminate time.Sleep(30*time.Second)

// Step 2: Decide what to do with PVCs // Option A: Keep them (safe, but wastes storage) // Option B: Delete them (risky, data is lost)

2. Add deletion policy to CRD to make it explicit:

apiVersion: db.example.com/v1 kind: PostgresInstance metadata: name: my-postgres spec: readReplicas: 3 dataRetention: policy: "delete" # or "retain" confirmDeletion: true # Requires explicit annotation to delete status: replicaCount: 3 replicaPVCs:

  • postgres-replica-1-pvc
  • postgres-replica-2-pvc
  • postgres-replica-3-pvc

    3. Implement safe PVC deletion with confirmation:

    func (r *PostgresReconciler) reconcileReplicaCount(ctx context.Context, pg *dbv1.PostgresInstance) error {

// Current replicas currentPods := r.getReplicaPods(ctx, pg)

// Desired replicas desired := pg.Spec.ReadReplicas current := len(currentPods)

if current > desired { // Scaling down podsToDelete := currentPods[desired:]

// Check deletion policy if pg.Spec.DataRetention.Policy == "delete" { if pg.Spec.DataRetention.ConfirmDeletion { // Require explicit confirmation via annotation if pg.Annotations["postgres.example.com/confirm-deletion"] != "true" { // Emit warning event r.Recorder.Event(pg, corev1.EventTypeWarning, "DeletionPending", fmt.Sprintf("Scale down requested but requires confirmation. Annotate with confirm-deletion=true")) return fmt.Errorf("deletion not confirmed") } }

// Perform deletion for _, pod := range podsToDelete { pvc := r.getPVCForPod(ctx, pod)

// Delete pod first r.Delete(ctx, pod)

// Wait for pod to terminate time.Sleep(10*time.Second)

// Delete PVC r.Delete(ctx, pvc)

// Record in status pg.Status.DeletedPVCs = append(pg.Status.DeletedPVCs, pvc.Name) } } else if pg.Spec.DataRetention.Policy == "retain" { // Just delete pods, keep PVCs for _, pod := range podsToDelete { r.Delete(ctx, pod) }

// PVCs remain for recovery/inspection log.Info("Retained PVCs for deleted replicas", "pvcs", r.getPVCNames(podsToDelete)) } }

return nil }

4. Usage: require explicit confirmation:

// Scale down request kubectl patch PostgresInstance my-postgres -p '{"spec":{"readReplicas":3}}'

// Operator detects pending deletion, emits event kubectl get events

PostgresInstance my-postgres: Scale down requires confirmation

User explicitly confirms

kubectl annotate PostgresInstance my-postgres postgres.example.com/confirm-deletion=true --overwrite

// Operator detects confirmation, deletes replicas and PVCs

5. Add a safeguard: require manual annotation for destructive operations:

// Prevent accidental deletions if pg.Spec.DataRetention.Policy == "delete" && pg.Spec.DataRetention.ConfirmDeletion { if pg.Annotations["postgres.example.com/confirm-deletion"] != "true" { return fmt.Errorf("deletion not confirmed") }

// Auto-clear confirmation after 1 hour to prevent accidental re-deletes lastConfirmed := pg.Annotations["postgres.example.com/last-confirmed"] if lastConfirmed != "" { lastTime, _ := time.Parse(time.RFC3339, lastConfirmed) if time.Since(lastTime) > 1*time.Hour { delete(pg.Annotations, "postgres.example.com/confirm-deletion") return fmt.Errorf("deletion confirmation expired") } } }

6. Alternative: Move PVCs to archive storage instead of deleting:

// Instead of deleting PVC immediately: // 1. Copy data to long-term storage (S3, archive bucket) r.archivePVCData(ctx, pvc)

// 2. Mark PVC for deletion after archive succeeds pvc.Annotations["archive.example.com/archived"] = "true" pvc.Annotations["archive.example.com/delete-after"] = time.Now().Add(30*time.Day).Format(time.RFC3339) r.Update(ctx, pvc)

// 3. A separate cleanup controller periodically deletes PVCs marked for deletion

Best Practice: Data deletion should ALWAYS require explicit confirmation. Offer retention options (keep PVCs, archive to S3) to prevent accidental data loss.

Follow-up: Design an audit log that tracks every destructive operation (pod deletion, PVC deletion) with who triggered it and when.

Your operator has a webhook that validates PostgresInstance CRDs before they're created. The webhook rejects invalid configurations (e.g., readReplicas > 10, s3Bucket not accessible). But the webhook pod crashes periodically, and when it does, all PostgresInstance operations hang (kubectl apply blocks). Design a resilient validation strategy.

Webhook crashes block all CRD operations. Must be resilient:

1. Implement the webhook with failure policy set to "ignore" to allow operations even if webhook fails:

apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: name: postgres-validator webhooks: - name: postgres-validator.example.com failurePolicy: Ignore # Allow operations if webhook is unavailable timeoutSeconds: 5 # Don't wait forever sideEffects: None admissionReviewVersions: ["v1"] clientConfig: service: name: postgres-operator namespace: postgres-system path: "/validate" caBundle: LS0tLS... (base64 CA cert) rules: - operations: ["CREATE", "UPDATE"] apiGroups: ["db.example.com"] apiVersions: ["v1"] resources: ["postgresinstances"] scope: "Namespaced"

2. Implement webhook with proper error handling:

func validatePostgres(w http.ResponseWriter, r *http.Request) { admissionReview := admissionv1.AdmissionReview{} json.NewDecoder(r.Body).Decode(&admissionReview)

pg := &dbv1.PostgresInstance{} json.Unmarshal(admissionReview.Request.Object.Raw, pg)

allowed := true var reason string

// Validate readReplicas if pg.Spec.ReadReplicas > 10 { allowed = false reason = "readReplicas cannot exceed 10" }

// Validate S3 bucket accessibility if !r.canAccessS3Bucket(pg.Spec.Backup.S3Bucket) { allowed = false reason = fmt.Sprintf("S3 bucket %s not accessible", pg.Spec.Backup.S3Bucket) }

// Return admission review response admissionResponse := &admissionv1.AdmissionResponse{ UID: admissionReview.Request.UID, Allowed: allowed, Result: &metav1.Status{ Message: reason, }, }

json.NewEncoder(w).Encode(admissionv1.AdmissionReview{ TypeMeta: metav1.TypeMeta{ APIVersion: "admission.k8s.io/v1", Kind: "AdmissionReview", }, Response: admissionResponse, }) }

3. Add health check endpoint to detect webhook failures:

// Webhook health endpoint func healthCheck(w http.ResponseWriter, r *http.Request) { // Check if webhook can access dependencies (S3, external APIs) if r.s3Client == nil || !r.canReachS3() { w.WriteHeader(http.StatusInternalServerError) w.Write([]byte("S3 unreachable")) return }

w.WriteHeader(http.StatusOK) w.Write([]byte("ok")) }

// In webhook config, add liveness/readiness livenessProbe: httpGet: path: /health port: 8443 scheme: HTTPS failureThreshold: 2 periodSeconds: 10

readinessProbe: httpGet: path: /health port: 8443 scheme: HTTPS failureThreshold: 1 periodSeconds: 5

4. Use multiple webhook replicas for redundancy:

apiVersion: apps/v1 kind: Deployment metadata: name: postgres-operator-webhook namespace: postgres-system spec: replicas: 3 # Multiple replicas selector: matchLabels: app: postgres-operator-webhook template: metadata: labels: app: postgres-operator-webhook spec: affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 100 podAffinityTerm: labelSelector: matchExpressions: - key: app operator: In values: - postgres-operator-webhook topologyKey: kubernetes.io/hostname

5. Alternative: Move validation to controller instead of webhook

// Instead of blocking at admission time: // Allow CRD creation, but immediately validate in reconciliation

func (r *PostgresReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { pg := &dbv1.PostgresInstance{} r.Get(ctx, req.NamespacedName, pg)

// Validate validationErrors := r.validatePostgres(pg) if len(validationErrors) > 0 { // Set condition instead of failing meta.SetStatusCondition(&pg.Status.Conditions, metav1.Condition{ Type: "Valid", Status: "False", Reason: "ValidationFailed", Message: strings.Join(validationErrors, "; "), }) r.Status().Update(ctx, pg) return ctrl.Result{RequeueAfter: 5*time.Minute}, nil }

// Proceed with reconciliation }

Best Practice: Use webhook for fast rejection of clearly invalid configs, but make failure graceful. Use controller validation as fallback. This way, temporary webhook failures don’t block users.

Follow-up: Design a system that allows webhook to be updated/redeployed without blocking any PostgresInstance operations.

Your operator creates a network policy that allows Postgres pods to communicate with S3 for backups. A cluster admin later deletes the network policy for security audit reasons. The operator reconciles and immediately recreates it. The admin is frustrated: "Why can't I delete policies I don't want?" Design how your operator should respect manual deletions.

Operator ownership conflict: operator creates resources, admin deletes them, operator recreates. Respect the delete intent:

1. Implement a "pause" annotation that lets admins opt out of operator management:

apiVersion: db.example.com/v1 kind: PostgresInstance metadata: name: my-postgres annotations: postgres.example.com/paused: "true" # Admin can pause operator spec: readReplicas: 3 status: paused: true # Operator stops reconciling this object

2. In reconciliation, check for pause:

func (r *PostgresReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { pg := &dbv1.PostgresInstance{} r.Get(ctx, req.NamespacedName, pg)

// Check for pause annotation if pg.Annotations["postgres.example.com/paused"] == "true" { pg.Status.Paused = true r.Status().Update(ctx, pg)

// Stop reconciliation, but don’t delete child resources // This allows admin to manually manage them return ctrl.Result{}, nil }

pg.Status.Paused = false // Normal reconciliation continues }

3. For fine-grained control, allow admins to disable specific features:

apiVersion: db.example.com/v1 kind: PostgresInstance metadata: name: my-postgres spec: features: backup: enabled: true networkPolicy: enabled: false # Admin disabled network policy creation managed: false # Don’t recreate if manually deleted failover: enabled: true

4. In reconciliation, respect feature flags:

func (r *PostgresReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { pg := &dbv1.PostgresInstance{} r.Get(ctx, req.NamespacedName, pg)

// Create network policy only if enabled and managed if pg.Spec.Features.NetworkPolicy.Enabled { np := &networkingv1.NetworkPolicy{} err := r.Get(ctx, types.NamespacedName{Name: pg.Name + "-policy"}, np)

if errors.IsNotFound(err) { // Create if missing r.createNetworkPolicy(ctx, pg) } else if err != nil { return ctrl.Result{}, err } else { // Exists: check if admin manually deleted/modified it if pg.Spec.Features.NetworkPolicy.Managed { // Operator owns it, update if needed r.updateNetworkPolicy(ctx, pg, np) } else { // Admin owns it, don’t modify log.Info("NetworkPolicy not managed by operator", "instance", pg.Name) } } } else { // Feature disabled: delete if operator created it r.deleteNetworkPolicy(ctx, pg) } }

5. Use owner references to identify operator-managed resources:

// When creating a resource, set owner reference networkPolicy := &networkingv1.NetworkPolicy{ ObjectMeta: metav1.ObjectMeta{ Name: pg.Name + "-policy", Namespace: pg.Namespace, OwnerReferences: []metav1.OwnerReference{ *metav1.NewControllerRef(pg, dbv1.GroupVersion.WithKind("PostgresInstance")), }, }, }

// Later, if admin deletes the NetworkPolicy: // 1. Operator detects it’s missing // 2. Checks if it has an owner reference to this PostgresInstance // 3. If yes and managed=true, recreates it // 4. If yes and managed=false, respects deletion and doesn’t recreate

6. Allow granular opt-out via finalizers:

// Operator doesn’t automatically delete child resources // Instead, document how admin can safely delete them:

// For NetworkPolicy: kubectl delete networkpolicy my-postgres-policy -n default

// For Backup Job: kubectl delete cronjob my-postgres-backup -n default

// Operator will just re-create them on next reconcile // To permanently disable: annotate with managed=false

kubectl annotate PostgresInstance my-postgres postgres.example.com/managed-networkpolicy=false

Best Practice: Operator should be conservative: create resources if they don’t exist, update if they drift, but respect deletion intent if admin removes them and marks as "not managed".

Follow-up: Design a governance policy that prevents cluster admins from accidentally deleting critical operator-managed resources.

Want to go deeper?