Skip to content

Kubernetes_operators

Operators are a method of packaging, deploying, and managing Kubernetes applications. They extend Kubernetes with custom controllers that understand application-specific operational knowledge.

An Operator is:

  • A custom controller that watches Custom Resources
  • Encodes operational knowledge into code
  • Automates complex application management
  • Handles deployment, scaling, backups, and upgrades
┌─────────────────────────────────────────────────────────────────┐
│ Operator Architecture │
│ │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ Custom Resource (CR) │ │
│ │ apiVersion: example.com/v1 │ │
│ │ kind: Database │ │
│ │ metadata: │ │
│ │ name: mydb │ │
│ │ spec: │ │
│ │ version: "1.0" │ │
│ │ size: small │ │
│ └──────────────────────┬──────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ Operator Controller │ │
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
│ │ │ Watch │─▶│ Reconcile │─▶│ Apply │ │ │
│ │ │ CRDs │ │ Logic │ │ Resources │ │ │
│ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │
│ └──────────────────────┬──────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ Kubernetes Resources │ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌─────────┐ │ │
│ │ │Deployment│ │ Service │ │ Config │ │ PVC │ │ │
│ │ └──────────┘ └──────────┘ └──────────┘ └─────────┘ │ │
│ └─────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────┘

CRDs extend the Kubernetes API with custom resources.

apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: databases.example.com
spec:
group: example.com
names:
kind: Database
plural: databases
singular: database
shortNames:
- db
scope: Namespaced
versions:
- name: v1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
properties:
version:
type: string
size:
type: string
enum: [small, medium, large]
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: websites.example.com
spec:
group: example.com
names:
kind: Website
plural: websites
scope: Namespaced
versions:
- name: v1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
required: ["spec"]
properties:
spec:
type: object
required: ["domain", "tls"]
properties:
domain:
type: string
pattern: '^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,}$'
tls:
type: object
properties:
enabled:
type: boolean
secretName:
type: string
replicas:
type: integer
minimum: 1
maximum: 10
default: 1
# Custom Resource
apiVersion: example.com/v1
kind: Database
metadata:
name: my-production-db
spec:
version: "14"
size: medium
backupEnabled: true
backupSchedule: "0 2 * * *"
# Operator Controller (pseudocode)
type DatabaseReconciler struct {
client.Client
Scheme *runtime.Scheme
}
func (r *DatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) error {
// 1. Fetch the Database instance
db := &examplev1.Database{}
r.Get(ctx, req.NamespacedName, db)
// 2. Create or update resources
deployment := r.buildDeployment(db)
r.Create(ctx, deployment)
service := r.buildService(db)
r.Create(ctx, service)
pvc := r.buildPVC(db)
r.Create(ctx, pvc)
// 3. Handle backup if enabled
if db.Spec.BackupEnabled {
cronJob := r.buildBackupCronJob(db)
r.CreateOrUpdate(ctx, cronJob)
}
// 4. Update status
db.Status.Ready = true
r.Status().Update(ctx, db)
return nil
}
Terminal window
# Install using Helm
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm install prometheus-operator prometheus-community/kube-prometheus-stack
# Install using OperatorHub
kubectl apply -f https://operatorhub.io/install/prometheus.yaml
# Install using OLM (Operator Lifecycle Manager)
kubectl operator install prometheus --operatorhub.io/prometheus
OperatorPurpose
Prometheus OperatorMonitoring and alerting
etcd Operatoretcd cluster management
Prometheus OperatorService monitoring
Rook OperatorBlock and file storage
Istio OperatorService mesh
ArgoCD OperatorGitOps
cert-manager OperatorTLS certificate management
Terminal window
# Create a custom resource
kubectl apply -f my-database.yaml
# List custom resources
kubectl get databases
# Describe custom resource
kubectl describe database my-production-db
# Delete custom resource
kubectl delete database my-production-db
Terminal window
# Install Operator SDK
brew install operator-sdk
# Initialize new operator
operator-sdk init --domain example.com --project-name my-operator
# Create new CRD
operator-sdk create api --group example.com --version v1 --kind Database
# Generate CRD manifests
make generate
make manifests
# Build operator image
make docker-build IMG=my-operator:latest
# Deploy operator
make deploy IMG=my-operator:latest
my-operator/
├── config/
│ ├── crd/
│ │ └── bases/
│ │ └── databases.example.com_databases.yaml
│ ├── rbac/
│ │ ├── role.yaml
│ │ └── role_binding.yaml
│ └── manager/
│ └── manager.yaml
├── controllers/
│ ├── suite_test.go
│ └── database_controller.go
├── api/
│ └── v1/
│ ├── groupversion_info.go
│ └── database_types.go
├── main.go
└── Dockerfile
package controllers
import (
"context"
"fmt"
"github.com/go-logr/logr"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
examplecomv1 "my-operator/api/v1"
)
// DatabaseReconciler reconciles a Database object
type DatabaseReconciler struct {
client.Client
Log logr.Logger
Scheme *runtime.Scheme
}
//+kubebuilder:rbac:groups=example.com,resources=databases,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=example.com,resources=databases/status,verbs=get;update;patch
func (r *DatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := r.Log.WithValues("database", req.NamespacedName)
// Fetch the Database instance
database := &examplecomv1.Database{}
err := r.Get(ctx, req.NamespacedName, database)
if err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
// Check if the instance is marked to be deleted
if database.GetDeletionTimestamp() != nil {
// Handle deletion logic
return ctrl.Result{}, nil
}
// Reconcile logic
if err := r.reconcileDatabase(database); err != nil {
log.Error(err, "Failed to reconcile database")
return ctrl.Result{}, err
}
// Update status
database.Status.Ready = true
database.Status.Message = "Database is ready"
if err := r.Status().Update(ctx, database); err != nil {
log.Error(err, "Failed to update database status")
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
func (r *DatabaseReconciler) reconcileDatabase(database *examplecomv1.Database) error {
// Create or update Deployment
deployment := r.buildDeployment(database)
// ... create/update deployment
// Create or update Service
service := r.buildService(database)
// ... create/update service
// Create or update PVC
pvc := r.buildPVC(database)
// ... create/update pvc
return nil
}
func (r *DatabaseReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&examplecomv1.Database{}).
Owns(&corev1.Pod{}).
Owns(&corev1.Service{}).
Owns(&corev1.PersistentVolumeClaim{}).
Complete(r)
}

OLM helps manage Operators in a cluster:

# OperatorGroup
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
name: my-operator-group
namespace: my-namespace
spec:
targetNamespaces:
- my-namespace
---
# Subscription
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
name: my-operator
namespace: my-namespace
spec:
channel: stable
name: my-operator
source: operatorhubio-catalog
sourceNamespace: olm
  1. Use CRD validation: Define OpenAPI schema for your CR
  2. Implement status subresource: Track resource state
  3. Add finalizers: Handle cleanup properly
  4. Use labels and annotations: Provide metadata
  5. Handle errors gracefully: Implement retry logic
  6. Add observability: Add metrics and logging
  7. Test thoroughly: Unit and integration tests
spec:
finalizers:
- example.com/database-finalizer
status:
phase: Running
// Add finalizer on create
if !containsString(database.Finalizers, finalizerName) {
database.Finalizers = append(database.Finalizers, finalizerName)
}
// Remove finalizer on delete
func (r *DatabaseReconciler) handleFinalizer(database *examplecomv1.Database) error {
if !containsString(database.GetFinalizers(), finalizerName) {
return nil
}
// Cleanup resources
if err := r.cleanupResources(database); err != nil {
return err
}
// Remove finalizer
database.SetFinalizers(removeString(database.GetFinalizers(), finalizerName))
return r.Update(context.Background(), database)
}
// Enable status subresource in CRD
// Add this to your CRD spec:
subresources:
status: {}

Operators are essential for:

  • Automating complex applications: Handle stateful workloads
  • Extending Kubernetes: Add custom functionality
  • Managing lifecycle: Handle upgrades, backups, recovery
  • Self-healing: Automatic failover and recovery
  • GitOps: declarative configuration management

Key concepts:

  • CRD: Custom Resource Definition
  • Custom Resource: Instance of CRD
  • Controller: Reconciles desired state
  • Operator: CRD + Controller
  • OLM: Operator Lifecycle Manager