Created
October 22, 2025 21:13
-
-
Save oleksiyp/7f8d5db2a6802060425bdbafa0715573 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --- | |
| - name: Deploy native Ceph cluster via Rook | |
| become: true | |
| hosts: bootstrapMaster | |
| tasks: | |
| - name: Ping host | |
| ping: | |
| - name: Install rook Helm repo | |
| shell: |- | |
| set -euxo pipefail | |
| export KUBECONFIG=/etc/rancher/k3s/k3s.yaml | |
| helm repo add rook-release https://charts.rook.io/release | |
| helm repo update | |
| - name: Install rook-ceph operator | |
| shell: |- | |
| set -euxo pipefail | |
| export KUBECONFIG=/etc/rancher/k3s/k3s.yaml | |
| helm upgrade --install rook rook-release/rook-ceph --namespace rook-ceph --create-namespace \ | |
| --set crds.enabled=true | |
| kubectl wait --for=condition=Available --timeout=5m -n rook-ceph deployment/rook-ceph-operator | |
| - name: Prepare Ceph storage devices on all nodes (partitioned) | |
| become: true | |
| vars: | |
| ceph_partition: "/dev/nvme0n1p3" | |
| hosts: [bootstrapMaster, masters, workers] | |
| tasks: | |
| - name: Check NVMe Ceph partition exists | |
| stat: | |
| path: "{{ ceph_partition }}" | |
| register: nvme_ceph_partition | |
| - name: Fail if Ceph partition not found | |
| fail: | |
| msg: "Ceph partition {{ ceph_partition }} not found. Run nvme-partitioning playbook first." | |
| when: not nvme_ceph_partition.stat.exists | |
| - name: Verify Ceph partition is clean | |
| shell: | | |
| echo "=== Ceph partition status ===" | |
| lsblk {{ ceph_partition }} | |
| echo "" | |
| echo "=== Filesystem check ===" | |
| blkid {{ ceph_partition }} || echo "No filesystem detected (good for Ceph)" | |
| register: partition_status | |
| - name: Show partition status | |
| debug: | |
| msg: "{{ partition_status.stdout_lines }}" | |
| - name: Deploy native Ceph cluster via Rook | |
| become: true | |
| vars: | |
| ceph_partition: "/dev/nvme0n1p3" | |
| hosts: bootstrapMaster | |
| tasks: | |
| - name: Create native CephCluster | |
| shell: |- | |
| set -euxo pipefail | |
| export KUBECONFIG=/etc/rancher/k3s/k3s.yaml | |
| cat <<EOF | kubectl apply -f - | |
| apiVersion: ceph.rook.io/v1 | |
| kind: CephCluster | |
| metadata: | |
| name: rook-ceph | |
| namespace: rook-ceph | |
| spec: | |
| cephVersion: | |
| image: quay.io/ceph/ceph:v19.2.2 | |
| dataDirHostPath: /var/lib/rook | |
| skipUpgradeChecks: false | |
| continueUpgradeAfterChecksEvenIfNotHealthy: false | |
| waitTimeoutForHealthyOSDInMinutes: 10 | |
| mon: | |
| count: 3 | |
| allowMultiplePerNode: false | |
| mgr: | |
| count: 2 | |
| allowMultiplePerNode: false | |
| modules: | |
| - name: pg_autoscaler | |
| enabled: true | |
| dashboard: | |
| enabled: true | |
| port: 8443 | |
| ssl: true | |
| monitoring: | |
| enabled: false | |
| network: | |
| requireMsgr2: false | |
| crashCollector: | |
| disable: false | |
| logCollector: | |
| enabled: true | |
| periodicity: daily | |
| maxLogSize: 500M | |
| cleanupPolicy: | |
| confirmation: "" | |
| sanitizeDisks: | |
| method: quick | |
| dataSource: zero | |
| iteration: 1 | |
| allowUninstallWithVolumes: false | |
| placement: | |
| all: | |
| nodeAffinity: | |
| requiredDuringSchedulingIgnoredDuringExecution: | |
| nodeSelectorTerms: | |
| - matchExpressions: | |
| - key: kubernetes.io/hostname | |
| operator: In | |
| values: | |
| - blade001 | |
| - blade002 | |
| - blade003 | |
| - blade004 | |
| - blade005 | |
| tolerations: | |
| - effect: NoSchedule | |
| key: node.kubernetes.io/unschedulable | |
| operator: Exists | |
| - effect: NoSchedule | |
| key: node.cloudprovider.kubernetes.io/uninitialized | |
| operator: Exists | |
| annotations: | |
| labels: | |
| resources: | |
| removeOSDsIfOutAndSafeToRemove: false | |
| storage: | |
| useAllNodes: false | |
| useAllDevices: false | |
| config: | |
| osdsPerDevice: "1" | |
| encryptedDevice: "false" | |
| databaseSizeMB: "1024" | |
| walSizeMB: "1024" | |
| nodes: | |
| - name: "blade001" | |
| devices: | |
| - name: "{{ ceph_partition }}" | |
| config: | |
| osdsPerDevice: "1" | |
| - name: "blade002" | |
| devices: | |
| - name: "{{ ceph_partition }}" | |
| config: | |
| osdsPerDevice: "1" | |
| - name: "blade003" | |
| devices: | |
| - name: "{{ ceph_partition }}" | |
| config: | |
| osdsPerDevice: "1" | |
| - name: "blade004" | |
| devices: | |
| - name: "{{ ceph_partition }}" | |
| config: | |
| osdsPerDevice: "1" | |
| - name: "blade005" | |
| devices: | |
| - name: "{{ ceph_partition }}" | |
| config: | |
| osdsPerDevice: "1" | |
| disruptionManagement: | |
| managePodBudgets: false | |
| osdMaintenanceTimeout: 30 | |
| pgHealthCheckTimeout: 0 | |
| EOF | |
| echo "β³ Waiting for CephCluster to be ready..." | |
| kubectl wait --for=condition=Ready --timeout=10m -n rook-ceph cephcluster/rook-ceph | |
| - name: Verify Ceph OSDs are created | |
| shell: |- | |
| set -euxo pipefail | |
| export KUBECONFIG=/etc/rancher/k3s/k3s.yaml | |
| echo "π Checking Ceph cluster status..." | |
| # Wait for OSDs to be created | |
| echo "β³ Waiting for OSDs to be ready..." | |
| for i in {1..30}; do | |
| OSD_COUNT=$(kubectl get pods -n rook-ceph -l app=rook-ceph-osd --no-headers 2>/dev/null | wc -l) | |
| if [ "$OSD_COUNT" -ge 5 ]; then | |
| echo "β Found $OSD_COUNT OSDs" | |
| break | |
| fi | |
| echo " Waiting for OSDs... ($OSD_COUNT/5 found)" | |
| sleep 10 | |
| done | |
| # Show OSD status | |
| kubectl get pods -n rook-ceph -l app=rook-ceph-osd | |
| ignore_errors: true | |
| - name: Create RBD StorageClass and set as default | |
| shell: |- | |
| set -euxo pipefail | |
| export KUBECONFIG=/etc/rancher/k3s/k3s.yaml | |
| # Wait for Ceph cluster to be ready first | |
| echo "β³ Waiting for Ceph cluster to be healthy..." | |
| kubectl wait --for=condition=Ready --timeout=20m -n rook-ceph cephcluster/rook-ceph || true | |
| # Create RBD pool | |
| cat <<EOF | kubectl apply -f - | |
| apiVersion: ceph.rook.io/v1 | |
| kind: CephBlockPool | |
| metadata: | |
| name: replicapool | |
| namespace: rook-ceph | |
| spec: | |
| failureDomain: host | |
| replicated: | |
| size: 3 | |
| EOF | |
| # Wait for pool to be ready | |
| kubectl wait --for=jsonpath='{.status.phase}'=Ready --timeout=15m -n rook-ceph cephblockpool/replicapool | |
| # Remove default annotation from local-path StorageClass | |
| kubectl patch storageclass local-path -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}' || true | |
| # Create RBD StorageClass and set as default | |
| cat <<EOF | kubectl apply -f - | |
| apiVersion: storage.k8s.io/v1 | |
| kind: StorageClass | |
| metadata: | |
| name: ceph-rbd | |
| annotations: | |
| storageclass.kubernetes.io/is-default-class: "true" | |
| provisioner: rook-ceph.rbd.csi.ceph.com | |
| parameters: | |
| clusterID: rook-ceph | |
| pool: replicapool | |
| imageFormat: "2" | |
| imageFeatures: layering | |
| csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner | |
| csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph | |
| csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner | |
| csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph | |
| csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node | |
| csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph | |
| csi.storage.k8s.io/fstype: ext4 | |
| allowVolumeExpansion: true | |
| reclaimPolicy: Delete | |
| EOF | |
| - name: Create test PVC to validate Ceph integration | |
| shell: |- | |
| set -euxo pipefail | |
| export KUBECONFIG=/etc/rancher/k3s/k3s.yaml | |
| # Create test PVC | |
| cat <<EOF | kubectl apply -f - | |
| apiVersion: v1 | |
| kind: PersistentVolumeClaim | |
| metadata: | |
| name: test-ceph-pvc | |
| namespace: default | |
| spec: | |
| accessModes: | |
| - ReadWriteOnce | |
| resources: | |
| requests: | |
| storage: 1Gi | |
| storageClassName: ceph-rbd | |
| EOF | |
| # Wait for PVC to be bound | |
| echo "Waiting for test PVC to be bound..." | |
| kubectl wait --for=condition=Bound pvc/test-ceph-pvc -n default --timeout=5m | |
| # Show PVC status | |
| kubectl get pvc test-ceph-pvc -n default | |
| # Show PV details | |
| PV_NAME=$(kubectl get pvc test-ceph-pvc -n default -o jsonpath='{.spec.volumeName}') | |
| kubectl get pv $PV_NAME | |
| echo "β Native Ceph integration test successful!" | |
| ignore_errors: true | |
| - name: Clean up test PVC | |
| shell: |- | |
| set -euxo pipefail | |
| export KUBECONFIG=/etc/rancher/k3s/k3s.yaml | |
| # Delete test PVC after successful validation | |
| kubectl delete pvc test-ceph-pvc -n default --ignore-not-found=true | |
| echo "π§Ή Test PVC cleaned up" | |
| ignore_errors: true | |
| - name: Display Ceph cluster summary | |
| debug: | |
| msg: | |
| - "==================================================" | |
| - "π Native Ceph Cluster Deployed Successfully" | |
| - "==================================================" | |
| - "" | |
| - "β Rook operator installed and configured" | |
| - "β Native Ceph cluster deployed on NVMe drives" | |
| - "β StorageClass 'ceph-rbd' created and set as default" | |
| - "β local-path StorageClass no longer default" | |
| - "β Test PVC validation completed" | |
| - "" | |
| - "π Cluster Details:" | |
| - " β’ Monitors: 3 (on blade001, blade002, blade003)" | |
| - " β’ Managers: 2" | |
| - " β’ OSDs: 5 (NVMe partition 3 on all nodes)" | |
| - " β’ Dashboard: Enabled on port 8443" | |
| - "" | |
| - "π Storage Configuration:" | |
| - " β’ Pool: replicapool (3 replicas)" | |
| - " β’ Features: layering" | |
| - " β’ Reclaim Policy: Delete" | |
| - " β’ Volume Expansion: Enabled" | |
| - " β’ Default StorageClass: ceph-rbd" | |
| - "" | |
| - "π§ Management Commands:" | |
| - " β’ Cluster status: kubectl get cephcluster -n rook-ceph" | |
| - " β’ Ceph status: kubectl exec -n rook-ceph deployment/rook-ceph-tools -- ceph status" | |
| - " β’ Dashboard: kubectl get svc -n rook-ceph rook-ceph-mgr-dashboard" | |
| - name: Final validation of Ceph cluster | |
| shell: |- | |
| set -euxo pipefail | |
| export KUBECONFIG=/etc/rancher/k3s/k3s.yaml | |
| echo "π Performing final validation..." | |
| # Check Rook operator status | |
| OPERATOR_READY=$(kubectl get deployment rook-ceph-operator -n rook-ceph -o jsonpath='{.status.readyReplicas}') | |
| echo "β Rook operator ready replicas: ${OPERATOR_READY:-0}" | |
| # Check CephCluster status | |
| CLUSTER_PHASE=$(kubectl get cephcluster rook-ceph -n rook-ceph -o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown") | |
| echo "β CephCluster phase: ${CLUSTER_PHASE}" | |
| # Check StorageClass | |
| DEFAULT_SC=$(kubectl get storageclass -o jsonpath='{.items[?(@.metadata.annotations.storageclass\.kubernetes\.io/is-default-class=="true")].metadata.name}') | |
| echo "β Default StorageClass: ${DEFAULT_SC}" | |
| # Check CSI driver pods | |
| CSI_PODS=$(kubectl get pods -n rook-ceph -l app=csi-rbdplugin --no-headers 2>/dev/null | wc -l) | |
| echo "β CSI RBD driver pods: ${CSI_PODS}" | |
| # Check OSDs | |
| OSD_PODS=$(kubectl get pods -n rook-ceph -l app=rook-ceph-osd --no-headers 2>/dev/null | wc -l) | |
| echo "β Ceph OSD pods: ${OSD_PODS}" | |
| echo "π Native Ceph cluster validation completed!" | |
| ignore_errors: true |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment