This submits 1,000 "sleep" workloads to kueue into 2 local queues.
- KinD cluster
cat <<'EOF' > kind-config.yaml
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
kubeadmConfigPatches:
- |
kind: KubeletConfiguration
maxPods: 250
- role: worker
- role: worker
- role: worker
EOF
kind create cluster --name kueue-testing --config kind-config.yaml
- Running the dev / commit version of kueue
make deploy
make kind-image-build
kind load docker-image --name kueue-testing ...
kubectl -n kueue-system patch deployment kueue-controller-manager \
--type='json' \
-p='[{"op": "replace", "path": "/spec/template/spec/containers/0/imagePullPolicy", "value": "IfNotPresent"}]'
- (optional) Ingestion of metrics by prometheus / grafana
I'm shipping from kind to a remote instance, enabled via remote-write:
prometheus:
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--web.external-url=https://prometheus.overcastlab.com'
- '--web.enable-remote-write-receiver' # ADD THIS
Add agent RBAC / deployment:
kubectl create ns monitoring
kubectl apply -f - <<'EOF'
---
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-agent
namespace: monitoring
---
# Permission to scrape metrics via kube-rbac-proxy
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus-agent-metrics
rules:
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
- apiGroups: [""]
resources: ["nodes", "nodes/metrics", "services", "endpoints", "pods"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus-agent-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-agent-metrics
subjects:
- kind: ServiceAccount
name: prometheus-agent
namespace: monitoring
---
# Also bind the kueue metrics-reader role if it exists
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus-agent-kueue-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kueue-metrics-reader
subjects:
- kind: ServiceAccount
name: prometheus-agent
namespace: monitoring
---
# Long-lived token secret (so we don't need projected tokens for a dev setup)
apiVersion: v1
kind: Secret
metadata:
name: prometheus-agent-token
namespace: monitoring
annotations:
kubernetes.io/service-account.name: prometheus-agent
type: kubernetes.io/service-account-token
kubectl apply -f - <<'EOF'
---
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-agent-config
namespace: monitoring
data:
prometheus.yml: |
global:
scrape_interval: 10s
external_labels:
cluster: "local-kind-kueue"
source: "prometheus-agent"
scrape_configs:
- job_name: "kueue-controller"
scheme: https
tls_config:
insecure_skip_verify: true
authorization:
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Scrape the kueue metrics service directly via cluster DNS
static_configs:
- targets:
- "kueue-controller-manager-metrics-service.kueue-system.svc.cluster.local:8443"
remote_write:
- url: "https://prometheus.overcastlab.com/api/v1/write"
tls_config:
insecure_skip_verify: true
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-agent
namespace: monitoring
labels:
app: prometheus-agent
spec:
replicas: 1
selector:
matchLabels:
app: prometheus-agent
template:
metadata:
labels:
app: prometheus-agent
spec:
serviceAccountName: prometheus-agent
containers:
- name: prometheus
image: prom/prometheus:latest
args:
- "--enable-feature=agent"
- "--config.file=/etc/prometheus/prometheus.yml"
- "--web.listen-address=:9090"
ports:
- containerPort: 9090
volumeMounts:
- name: config
mountPath: /etc/prometheus
readOnly: true
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
memory: 256Mi
volumes:
- name: config
configMap:
name: prometheus-agent-config
EOF
- WaitForPods Ready enabled
kubectl get cm -n kueue-system kueue-manager-config -o json | python3 -c "
import json,sys,re
d=json.load(sys.stdin)
k='controller_manager_config.yaml'
lines=d['data'][k].split('\n')
out=[]
i=0
while i<len(lines):
l=lines[i]
if re.match(r'^#waitForPodsReady:',l):
out.append(l[1:])
i+=1
while i<len(lines) and re.match(r'^#[ \t]',lines[i]):
out.append(lines[i][1:])
i+=1
else:
out.append(l)
i+=1
d['data'][k]='\n'.join(out)
print(json.dumps(d))
" | kubectl replace -f -
kubectl rollout restart deploy kueue-controller-manager -n kueue-system
- LQ metrics feature gate enabled
kubectl get cm -n kueue-system kueue-manager-config -o json | python3 -c "
import json,sys,re
d=json.load(sys.stdin)
k='controller_manager_config.yaml'
cfg=d['data'][k]
if 'featureGates:' not in cfg:
cfg=re.sub(r'(kind: Configuration\n)',r'\1featureGates:\n LocalQueueMetrics: true\n',cfg)
d['data'][k]=cfg
print(json.dumps(d))
" | kubectl replace -f -
kubectl rollout restart deploy kueue-controller-manager -n kueue-system
Lastly, ensure the queue used has a good amount of nomial resources:
kubectl apply -f ~/kueue/examples/admin/single-clusterqueue-setup.yaml
# edit...