@@ -1298,3 +1298,107 @@ tests:
1298
1298
description : ' Cluster has overcommitted memory resource requests for Namespaces.'
1299
1299
runbook_url : " https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryquotaovercommit"
1300
1300
summary : " Cluster has overcommitted memory resource requests."
1301
+
1302
+ - name : KubeCPUOvercommit alert (single-node)
1303
+ - interval : 1m
1304
+ input_series :
1305
+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1306
+ values : ' 1x10'
1307
+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1308
+ values : ' 1x10'
1309
+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="cpu", job="kube-state-metrics"}'
1310
+ values : ' 1.9x10' # This value was seen on a 2x vCPU node
1311
+ - series : ' kube_node_info{cluster="kubernetes", node="n1", job="kube-state-metrics"}'
1312
+ values : ' 1x10'
1313
+ alert_rule_test :
1314
+ - eval_time : 9m
1315
+ alertname : KubeCPUOvercommit
1316
+ - eval_time : 10m
1317
+ alertname : KubeCPUOvercommit
1318
+ exp_alerts :
1319
+ - exp_labels :
1320
+ severity : warning
1321
+ exp_annotations :
1322
+ description : Cluster has overcommitted CPU resource requests for Pods by 0.385 CPU shares and cannot tolerate node failure.
1323
+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
1324
+ summary : Cluster has overcommitted CPU resource requests.
1325
+
1326
+ - name : KubeCPUOvercommit alert (multi-node)
1327
+ - interval : 1m
1328
+ input_series :
1329
+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1330
+ values : ' 2x10'
1331
+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1332
+ values : ' 2x10'
1333
+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="cpu", job="kube-state-metrics"}'
1334
+ values : ' 1.9x10' # This value was seen on a 2x vCPU node
1335
+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n2", resource="cpu", job="kube-state-metrics"}'
1336
+ values : ' 1.9x10'
1337
+ - series : ' kube_node_info{cluster="kubernetes", node="n1", job="kube-state-metrics"}'
1338
+ values : ' 1x10'
1339
+ - series : ' kube_node_info{cluster="kubernetes", node="n2", job="kube-state-metrics"}'
1340
+ values : ' 1x10'
1341
+ alert_rule_test :
1342
+ - eval_time : 9m
1343
+ alertname : KubeCPUOvercommit
1344
+ - eval_time : 10m
1345
+ alertname : KubeCPUOvercommit
1346
+ exp_alerts :
1347
+ - exp_labels :
1348
+ severity : warning
1349
+ exp_annotations :
1350
+ description : Cluster has overcommitted CPU resource requests for Pods by 2.1 CPU shares and cannot tolerate node failure.
1351
+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
1352
+ summary : Cluster has overcommitted CPU resource requests.
1353
+
1354
+ - name : KubeMemoryOvercommit alert (single-node)
1355
+ - interval : 1m
1356
+ input_series :
1357
+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1358
+ values : ' 1000000000x10' # 1 GB
1359
+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1360
+ values : ' 1000000000x10'
1361
+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="memory", job="kube-state-metrics"}'
1362
+ values : ' 1000000000x10'
1363
+ - series : ' kube_node_info{cluster="kubernetes", node="n1", job="kube-state-metrics"}'
1364
+ values : ' 1x10'
1365
+ alert_rule_test :
1366
+ - eval_time : 9m
1367
+ alertname : KubeMemoryOvercommit
1368
+ - eval_time : 10m
1369
+ alertname : KubeMemoryOvercommit
1370
+ exp_alerts :
1371
+ - exp_labels :
1372
+ severity : warning
1373
+ exp_annotations :
1374
+ description : Cluster has overcommitted memory resource requests for Pods by 1.15G bytes and cannot tolerate node failure.
1375
+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
1376
+ summary : Cluster has overcommitted memory resource requests.
1377
+
1378
+ - name : KubeMemoryOvercommit alert (multi-node)
1379
+ - interval : 1m
1380
+ input_series :
1381
+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1382
+ values : ' 2000000000x10' # 2 GB
1383
+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1384
+ values : ' 2000000000x10'
1385
+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="memory", job="kube-state-metrics"}'
1386
+ values : ' 1000000000x10'
1387
+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n2", resource="memory", job="kube-state-metrics"}'
1388
+ values : ' 1000000000x10'
1389
+ - series : ' kube_node_info{cluster="kubernetes", node="n1", job="kube-state-metrics"}'
1390
+ values : ' 1x10'
1391
+ - series : ' kube_node_info{cluster="kubernetes", node="n2", job="kube-state-metrics"}'
1392
+ values : ' 1x10'
1393
+ alert_rule_test :
1394
+ - eval_time : 9m
1395
+ alertname : KubeMemoryOvercommit
1396
+ - eval_time : 10m
1397
+ alertname : KubeMemoryOvercommit
1398
+ exp_alerts :
1399
+ - exp_labels :
1400
+ severity : warning
1401
+ exp_annotations :
1402
+ description : Cluster has overcommitted memory resource requests for Pods by 3G bytes and cannot tolerate node failure.
1403
+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
1404
+ summary : Cluster has overcommitted memory resource requests.
0 commit comments