diff --git a/README.md b/README.md index 5a9e85d..6f9dc91 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ ## About this project -A set of Plutono and Perses dashboards and Prometheus alerting rules combined with playbooks to ensure effective operations of Kubernetes. +A set of Perses dashboards and Prometheus alerting rules combined with playbooks to ensure effective operations of Kubernetes. # Content @@ -23,8 +23,6 @@ kubernetes-operations │ ├── alerts Prometheus alerts for kubernetes. │ - ├── dashboards Plutono dashboards for visualizing key metrics. - │ ├── perses-dashboards Perses dashboards for visualizing key metrics. │ └── Chart.yaml Helm chart manifest. @@ -40,7 +38,6 @@ The content of the repository can be installed independently or as part of the [ |-----|------|---------|-------------| | dashboards.create | bool | `true` | Enables ConfigMap resources with dashboards to be created | | dashboards.persesSelectors | list | `[{"name":"perses.dev/resource","value":"\"true\""}]` | Label selectors for the Perses dashboards to be picked up by Perses. | -| dashboards.plutonoSelectors | list | `[{"name":"plutono-dashboard","value":"\"true\""}]` | Label selectors for the Plutono dashboards to be picked up by Plutono. | | global.commonLabels | object | `{}` | Common labels to add to all resources # | | prometheusRules.NodeInMaintenance | object | `{"label":"maintenance_state","value":"in-maintenance"}` | The label value pair that marks a Kubernetes node as 'in maintenance' | | prometheusRules.additionalRuleAnnotations | object | `{}` | Additional annotations for PrometheusRule alerts | @@ -48,6 +45,7 @@ The content of the repository can be installed independently or as part of the [ | prometheusRules.annotations | object | `{}` | Annotations for PrometheusRules | | prometheusRules.create | bool | `true` | Enables PrometheusRule resources to be created | | prometheusRules.disabled | object | `{}` | Disabled PrometheusRule alerts | +| prometheusRules.kubeLabels | list | `[]` | Enrich pod- and deployment-level alert expressions with labels from kube_pod_labels / kube_deployment_labels. Provide a list of kube-state-metrics label names to include in group_left(). Affects: KubernetesPodRestartingTooMuch, KubePodNotReady (join on pod+namespace), KubernetesDeploymentReplicasMismatch (join on namespace+deployment). | | prometheusRules.labels | object | `{}` | Labels for PrometheusRules | | prometheusRules.ruleSelectors | string | `nil` | Label selectors for the Prometheus rules to be picked up by Prometheus. | diff --git a/README.md.gotmpl b/README.md.gotmpl index fb80169..0f6e35f 100644 --- a/README.md.gotmpl +++ b/README.md.gotmpl @@ -4,7 +4,7 @@ ## About this project -A set of Plutono and Perses dashboards and Prometheus alerting rules combined with playbooks to ensure effective operations of Kubernetes. +A set of Perses dashboards and Prometheus alerting rules combined with playbooks to ensure effective operations of Kubernetes. # Content @@ -23,8 +23,6 @@ kubernetes-operations │ ├── alerts Prometheus alerts for kubernetes. │ - ├── dashboards Plutono dashboards for visualizing key metrics. - │ ├── perses-dashboards Perses dashboards for visualizing key metrics. │ └── Chart.yaml Helm chart manifest. diff --git a/charts/kubernetes-operations/Chart.yaml b/charts/kubernetes-operations/Chart.yaml index a143990..c4ac50e 100644 --- a/charts/kubernetes-operations/Chart.yaml +++ b/charts/kubernetes-operations/Chart.yaml @@ -3,15 +3,14 @@ apiVersion: v2 name: kubernetes-operations -version: 1.2.11 -description: A set of Plutono dashboards and Prometheus alerting rules combined with playbooks to ensure effective operations of Kubernetes. +version: 1.3.0 +description: A set of Perses dashboards and Prometheus alerting rules combined with playbooks to ensure effective operations of Kubernetes. maintainers: - name: richardtief - email: richard.tief@sap.com + - name: trouaux keywords: - Helm Chart - Kubernetes operations - - Plutono Dashboards - Perses Dashboards - Prometheus Alerting - Alert Rules diff --git a/charts/kubernetes-operations/alerts/kubernetes-health.yaml b/charts/kubernetes-operations/alerts/kubernetes-health.yaml index f97027d..b36ad29 100644 --- a/charts/kubernetes-operations/alerts/kubernetes-health.yaml +++ b/charts/kubernetes-operations/alerts/kubernetes-health.yaml @@ -56,7 +56,9 @@ groups: {{- if not (.Values.prometheusRules.disabled.KubernetesPodRestartingTooMuch | default false) }} - alert: KubernetesPodRestartingTooMuch - expr: sum by(pod, namespace, container) (rate(kube_pod_container_status_restarts_total[15m])) > 0 + expr: | + sum by(pod, namespace, container) (rate(kube_pod_container_status_restarts_total[15m])) + {{- include "kubernetes-operations.kubePodLabelsJoin" . }} > 0 for: {{ dig "KubernetesPodRestartingTooMuch" "for" "1h" .Values.prometheusRules }} labels: severity: {{ dig "KubernetesPodRestartingTooMuch" "severity" "warning" .Values.prometheusRules }} @@ -93,6 +95,7 @@ groups: == 0 ) + {{- include "kubernetes-operations.kubeDeploymentLabelsJoin" . }} for: {{ dig "KubernetesDeploymentReplicasMismatch" "for" "10m" .Values.prometheusRules }} labels: severity: {{ dig "KubernetesDeploymentReplicasMismatch" "severity" "warning" .Values.prometheusRules }} @@ -116,6 +119,7 @@ groups: * on(node) group_left() kube_node_status_condition{condition="Ready",status="true"}==1 ) + {{- include "kubernetes-operations.kubePodLabelsJoin" . }} for: {{ dig "KubePodNotReady" "for" "30m" .Values.prometheusRules }} labels: severity: {{ dig "KubePodNotReady" "severity" "warning" .Values.prometheusRules }} diff --git a/charts/kubernetes-operations/dashboards/apiserver.json b/charts/kubernetes-operations/dashboards/apiserver.json deleted file mode 100644 index e6b7d34..0000000 --- a/charts/kubernetes-operations/dashboards/apiserver.json +++ /dev/null @@ -1,1042 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Plutono --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": 15761, - "graphTooltip": 1, - "iteration": 1727864391671, - "links": [], - "panels": [ - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "0": { - "text": "DOWN" - }, - "1": { - "text": "UP" - } - }, - "text": "UP", - "type": 1, - "value": "1" - }, - { - "from": "", - "id": null, - "text": "", - "to": "", - "type": 1 - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 42, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "value" - }, - "pluginVersion": "7.5.33", - "targets": [ - { - "exemplar": true, - "expr": "up{job=~\".*apiserver.*\"}", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "API Server - Health Status", - "type": "stat" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 5 - }, - "hiddenSeries": false, - "id": 38, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum by (code) (rate(apiserver_request_total[5m]))", - "interval": "", - "legendFormat": " {{ code }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "HTTP Requests by code", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:2572", - "format": "reqps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:2573", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 5 - }, - "hiddenSeries": false, - "id": 39, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum by (verb) (rate(apiserver_request_total[5m]))", - "interval": "", - "legendFormat": " {{ verb}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "HTTP Requests by verb", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:2632", - "format": "reqps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:2633", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "ms" - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 13 - }, - "hiddenSeries": false, - "id": 54, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{verb!=\"WATCH\"}[5m])) by (verb, le))", - "interval": "", - "legendFormat": "{{ verb }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "HTTP Requests Latency (99th percentile) by verb", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 21 - }, - "hiddenSeries": false, - "id": 61, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "topk(10, max by (resource) (apiserver_storage_objects{job=\"apiserver\"}))", - "interval": "", - "legendFormat": "{{ resource }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Top 10 Objects by kind", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:22", - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:23", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "graph": false, - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 29 - }, - "id": 50, - "options": { - "legend": { - "calcs": [], - "displayMode": "hidden", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - }, - "tooltipOptions": { - "mode": "single" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(rate(apiserver_request_total{job=\"apiserver\", code=~\"5..\"}[5m]))\n/ \nsum(rate(apiserver_request_total{job=\"apiserver\"}[5m]))", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Errors", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "graph": false, - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 29 - }, - "id": 51, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - }, - "tooltipOptions": { - "mode": "single" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "exemplar": true, - "expr": "sum by(verb) (rate(apiserver_request_total{job=\"apiserver\", code=~\"5..\"}[5m]))\n /\nsum by(verb) (rate(apiserver_request_total{job=\"apiserver\"}[5m]))", - "interval": "", - "legendFormat": "{{ verb }}", - "refId": "A" - } - ], - "title": "Errors by verb", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "graph": false, - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 37 - }, - "id": 40, - "options": { - "legend": { - "calcs": [], - "displayMode": "hidden", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - }, - "tooltipOptions": { - "mode": "single" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(rate(apiserver_request_total{job=\"apiserver\"}[5m])) by (job, instance)", - "interval": "", - "legendFormat": "{{ job }}:{{ instance }}", - "refId": "A" - } - ], - "title": "Stacked HTTP Requests", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "graph": false, - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 37 - }, - "id": 56, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - }, - "tooltipOptions": { - "mode": "single" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(rate(workqueue_depth{job=\"apiserver\"}[5m])) by (instance)", - "interval": "", - "legendFormat": "{{ instance }}", - "refId": "A" - } - ], - "title": "Work Queue", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "graph": false, - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 45 - }, - "id": 47, - "options": { - "legend": { - "calcs": [], - "displayMode": "hidden", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - }, - "tooltipOptions": { - "mode": "single" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "exemplar": true, - "expr": "rate(process_cpu_seconds_total{job=\"apiserver\"}[5m])", - "interval": "", - "legendFormat": "{{ job }}:{{ instance }}", - "refId": "A" - } - ], - "title": "CPU Usage", - "type": "timeseries" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 25, - "gradientMode": "opacity", - "hideFrom": { - "graph": false, - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 45 - }, - "id": 48, - "options": { - "legend": { - "calcs": [], - "displayMode": "hidden", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - }, - "tooltipOptions": { - "mode": "single" - } - }, - "pluginVersion": "8.3.3", - "targets": [ - { - "exemplar": true, - "expr": "process_resident_memory_bytes{job=\"apiserver\"}", - "interval": "", - "legendFormat": "{{ job }}:{{ instance }}", - "refId": "A" - } - ], - "title": "Memory Usage", - "type": "timeseries" - } - ], - "refresh": "30s", - "schemaVersion": 27, - "style": "dark", - "tags": [ - "Kubernetes" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "kube-monitoring-prometheus", - "value": "kube-monitoring-prometheus" - }, - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": "", - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - } - ] - }, - "time": { - "from": "now-12h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "APIServer", - "uid": "apiserver", - "version": 2 -} diff --git a/charts/kubernetes-operations/dashboards/container-resources.json b/charts/kubernetes-operations/dashboards/container-resources.json deleted file mode 100644 index d761795..0000000 --- a/charts/kubernetes-operations/dashboards/container-resources.json +++ /dev/null @@ -1,1443 +0,0 @@ -{ - "annotations": { - "list": [ - { - "$$hashKey": "object:974", - "builtIn": 1, - "datasource": "-- Plutono --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Kubernetes Container resources", - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": 6, - "iteration": 1727875708353, - "links": [], - "panels": [ - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 17, - "panels": [], - "repeat": null, - "title": "Summary", - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "${datasource}", - "description": "# used formulas\n* [container_cpu_usage_seconds_total](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md) to calculate the median CPU usage in % based on the cumulative CPU time consumed. If multiple threads are used their times are added to the sum", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 1.5, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0.8 - }, - { - "color": "#d44a3a", - "value": 1 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 0, - "y": 1 - }, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "7.5.33", - "targets": [ - { - "exemplar": true, - "expr": "quantile(0.5, (rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\",container=~\"$container\", pod=~\"$pod\"}[5m])))", - "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "CPU usage", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${datasource}", - "description": "# used formulas\n* ratio between [container_cpu_cfs_throttled_periods_total](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md) and [container_cpu_cfs_periods_total](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md) to calculate the median CPU throttling in %", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0.25 - }, - { - "color": "#d44a3a", - "value": 0.5 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 2, - "y": 1 - }, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "7.5.33", - "targets": [ - { - "exemplar": true, - "expr": "quantile(0.50, rate(container_cpu_cfs_throttled_periods_total{namespace=~\"$namespace\",pod=~\"$pod\", container=~\"$container\"}[5m]) / rate(container_cpu_cfs_periods_total{namespace=~\"$namespace\",pod=~\"$pod\", container=~\"$container\"}[5m]))", - "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "CPU throttling", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${datasource}", - "description": "# used formulas\n* [kube_pod_container_resource_limits](https://github.com/kubernetes/kube-state-metrics/blob/main/docs/pod-metrics.md) is used to calculate the total configured CPU limits of the selected pods or containers", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 2, - "displayName": "Cores", - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 137438953472, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#badff4", - "value": null - }, - { - "color": "#82b5d8", - "value": 1000000000 - }, - { - "color": "#65c5db", - "value": 10000000000 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 4, - "y": 1 - }, - "id": 29, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": false, - "text": {} - }, - "pluginVersion": "7.5.33", - "targets": [ - { - "exemplar": true, - "expr": "sum(kube_pod_container_resource_limits{namespace=~\"$namespace\",resource=\"cpu\",container=~\"$container\", pod=~\"$pod\"})", - "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "CPU limits", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${datasource}", - "description": "# used formulas\n* [kube_pod_container_status_ready](https://github.com/kubernetes/kube-state-metrics/blob/main/docs/pod-metrics.md) is used to calculate the number of ready containers of the selected pods or containers", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 137438953472, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#badff4", - "value": null - }, - { - "color": "#82b5d8", - "value": 1000000000 - }, - { - "color": "#65c5db", - "value": 10000000000 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 6, - "y": 1 - }, - "id": 30, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": false, - "text": {} - }, - "pluginVersion": "7.5.33", - "targets": [ - { - "exemplar": true, - "expr": "sum(min_over_time(kube_pod_container_status_ready{namespace=~\"$namespace\",container=~\"$container\", pod=~\"$pod\"}[5m]))", - "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "# Containers", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${datasource}", - "description": "# used formulas\n* median ratio between [container_memory_working_set_bytes](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md) and [kube_pod_container_resource_requests](https://github.com/kubernetes/kube-state-metrics/blob/main/docs/pod-metrics.md) in % (provided by the [container_memory_utilization_ratio](https://github.com/sapcc/helm-charts/blob/f1c6d7fe8c9093b16e73d292e2454816a192ec22/prometheus-rules/metrics-regional-rules/templates/aggregations/collector/_resource.rules.tpl#L32) formula)", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 1.5, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0.5 - }, - { - "color": "#d44a3a", - "value": 1 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 12, - "y": 1 - }, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "7.5.33", - "targets": [ - { - "exemplar": true, - "expr": "quantile(0.50, (\n sum(container_memory_working_set_bytes) by (namespace, pod, container)\n /\n sum(kube_pod_container_resource_requests{resource=\"memory\"}) by (namespace, pod, container)\n )\n)", - "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "RAM requests usage", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${datasource}", - "description": "# used formulas\n* median ratio between [container_memory_working_set_bytes](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md) and [kube_pod_container_resource_limits](https://github.com/kubernetes/kube-state-metrics/blob/main/docs/pod-metrics.md) in % (provided by the [container_memory_saturation_ratio](https://github.com/sapcc/helm-charts/blob/f1c6d7fe8c9093b16e73d292e2454816a192ec22/prometheus-rules/metrics-regional-rules/templates/aggregations/collector/_resource.rules.tpl#L26) formula)\n* 100% saturation means OOMKill!", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0.5 - }, - { - "color": "#d44a3a", - "value": 0.8 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 15, - "y": 1 - }, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "7.5.33", - "targets": [ - { - "exemplar": true, - "expr": "quantile(0.50, (\n sum(container_memory_working_set_bytes) by (namespace, pod, container)\n /\n sum(kube_pod_container_resource_limits{resource=\"memory\"}) by (namespace, pod, container)\n )\n)", - "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "RAM limits usage", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${datasource}", - "description": "# used formulas\n* [container_memory_working_set_bytes](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md) is used to calculate the total RAM usage of the selected pods or containers", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 137438953472, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#cffaff", - "value": null - }, - { - "color": "#70dbed", - "value": 1000000000 - }, - { - "color": "#6ed0e0", - "value": 10000000000 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 18, - "y": 1 - }, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": false, - "text": {} - }, - "pluginVersion": "7.5.33", - "targets": [ - { - "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"})", - "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "RAM usage", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${datasource}", - "description": "# used formulas\n* [kube_pod_container_resource_requests](https://github.com/kubernetes/kube-state-metrics/blob/main/docs/pod-metrics.md) is used to calculate the total RAM requests of the selected pods or containers", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 137438953472, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#badff4", - "value": null - }, - { - "color": "#82b5d8", - "value": 1000000000 - }, - { - "color": "#65c5db", - "value": 10000000000 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 20, - "y": 1 - }, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": false, - "text": {} - }, - "pluginVersion": "7.5.33", - "targets": [ - { - "exemplar": true, - "expr": "sum(kube_pod_container_resource_requests{namespace=~\"$namespace\",resource=\"memory\",container=~\"$container\",pod=~\"$pod\"})", - "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "RAM requests", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${datasource}", - "description": "# used formulas\n* [kube_pod_container_resource_limits](https://github.com/kubernetes/kube-state-metrics/blob/main/docs/pod-metrics.md) is used to calculate the total configured RAM limits of the selected pods or containers", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 137438953472, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#badff4", - "value": null - }, - { - "color": "#82b5d8", - "value": 1000000000 - }, - { - "color": "#65c5db", - "value": 10000000000 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 22, - "y": 1 - }, - "id": 28, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": false, - "text": {} - }, - "pluginVersion": "7.5.33", - "targets": [ - { - "exemplar": true, - "expr": "sum(kube_pod_container_resource_limits{namespace=~\"$namespace\",resource=\"memory\",container=~\"$container\", pod=~\"$pod\"})", - "interval": "", - "legendFormat": "", - "queryType": "randomWalk", - "refId": "A" - } - ], - "title": "RAM limits", - "type": "gauge" - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "id": 19, - "panels": [], - "repeat": null, - "title": "CPU", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "description": "# used formulas\n* [container_cpu_usage_seconds_total](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md) to calculate the CPU usage in % based on the cumulative CPU time consumed. If multiple threads are used their times are added to the sum\n* [container_cpu_cfs_throttled_periods_total](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md) to calculate the CPU throttling in % because of defined limits", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 6 - }, - "hiddenSeries": false, - "id": 8, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/throttled .*/", - "bars": true, - "lines": false, - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum by (pod, container) (rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\",container=~\"$container\", pod=~\"$pod\"}[5m]))", - "interval": "", - "legendFormat": "cpu usage | {{pod}}/{{container}}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "exemplar": true, - "expr": "sum by (pod, container) (rate(container_cpu_cfs_throttled_periods_total{namespace=~\"$namespace\",pod=~\"$pod\", container=~\"$container\"}[5m]) / rate(container_cpu_cfs_periods_total[5m]))", - "hide": false, - "interval": "", - "legendFormat": "cpu throttling | {{pod}}/{{container}}", - "refId": "B" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": false, - "op": "gt", - "value": 2 - }, - { - "colorMode": "critical", - "fill": true, - "line": false, - "op": "lt", - "value": 0.2 - }, - { - "colorMode": "warning", - "fill": true, - "line": false, - "op": "gt", - "value": 1.2 - }, - { - "colorMode": "warning", - "fill": true, - "line": false, - "op": "lt", - "value": 0.5 - } - ], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU usage and throttling ratio", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "transformations": [], - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1097", - "decimals": null, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1098", - "decimals": null, - "format": "percentunit", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "decimals": null, - "description": "# used formulas\n* [container_cpu_cfs_periods_total](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md) is used to count the amount of CPU request periods that have triggered limit checks\n* [container_cpu_cfs_throttled_periods_total](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md) is used to count the amount of throttled CPU request periods because of limit checks", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 6 - }, - "hiddenSeries": false, - "id": 5, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:525", - "alias": "/throttled .*/", - "bars": true, - "lines": false, - "zindex": -3 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "round(rate(container_cpu_cfs_periods_total{container=~\"$container\",pod=~\"$pod\"}[5m]), 0.01)", - "interval": "", - "legendFormat": "periods | {{pod}}/ {{container}}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "exemplar": true, - "expr": "round(rate(container_cpu_cfs_throttled_periods_total{namespace=~\"$namespace\",container=~\"$container\", pod=~\"$pod\"}[5m]), 0.01)", - "hide": false, - "interval": "", - "legendFormat": "throttled periods | {{pod}}/{{container}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU usage and throttling periods", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:503", - "decimals": null, - "format": "string", - "label": "periods", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:504", - "format": "short", - "label": "seconds", - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 14 - }, - "id": 18, - "panels": [], - "repeat": null, - "title": "RAM", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "decimals": null, - "description": "# used formulas\n* [container_memory_working_set_bytes](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md) to calculate the RAM usage of the container", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 15 - }, - "hiddenSeries": false, - "id": 1, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 100, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\",container=~\"$container\",pod=~\"$pod\"}) by (namespace, pod, container)", - "interval": "", - "legendFormat": "{{pod}}/{{container}}", - "queryType": "randomWalk", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:3446", - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:3447", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "description": "# used formulas\n* [container_memory_saturation_ratio](https://github.com/sapcc/helm-charts/blob/f1c6d7fe8c9093b16e73d292e2454816a192ec22/prometheus-rules/metrics-regional-rules/templates/aggregations/collector/_resource.rules.tpl#L26) is used to calculate the used memory to configured limits ratio\n* [container_memory_utilization_ratio](https://github.com/sapcc/helm-charts/blob/f1c6d7fe8c9093b16e73d292e2454816a192ec22/prometheus-rules/metrics-regional-rules/templates/aggregations/collector/_resource.rules.tpl#L32) is used to calculate the used memory to configured requests ratio", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 15 - }, - "hiddenSeries": false, - "id": 2, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(container_memory_working_set_bytes) by (namespace, pod, container)\n/\nsum(kube_pod_container_resource_limits{resource=\"memory\"}) by (namespace, pod, container)", - "interval": "", - "legendFormat": "used limits | {{pod}}/{{container}}", - "queryType": "randomWalk", - "refId": "A" - }, - { - "exemplar": true, - "expr": "sum(container_memory_working_set_bytes) by (namespace, pod, container)\n/\nsum(kube_pod_container_resource_requests{resource=\"memory\"}) by (namespace, pod, container)", - "hide": false, - "interval": "", - "legendFormat": "used request | {{pod}}/{{container}}", - "refId": "B" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": false, - "op": "gt", - "value": 0.8 - }, - { - "colorMode": "warning", - "fill": true, - "line": false, - "op": "gt", - "value": 0.5 - } - ], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory usage to requests/limits ratio", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:3018", - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:3019", - "format": "percentunit", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "schemaVersion": 27, - "style": "dark", - "tags": [ - "kubernetes" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "kube-monitoring-prometheus", - "value": "kube-monitoring-prometheus" - }, - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": "", - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": "${datasource}", - "definition": "label_values(namespace)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "", - "multi": true, - "name": "namespace", - "options": [], - "query": { - "query": "label_values(namespace)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": "${datasource}", - "definition": "label_values(container_memory_working_set_bytes{namespace=~\"$namespace\"}, pod)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": null, - "multi": true, - "name": "pod", - "options": [], - "query": { - "query": "label_values(container_memory_working_set_bytes{namespace=~\"$namespace\"}, pod)", - "refId": "StandardVariableQuery" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".*", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": "${datasource}", - "definition": "label_values(container_memory_working_set_bytes{pod=~\"$pod\"}, container)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": null, - "multi": true, - "name": "container", - "options": [], - "query": { - "query": "label_values(container_memory_working_set_bytes{pod=~\"$pod\"}, container)", - "refId": "StandardVariableQuery" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Kubernetes Container Resources", - "uid": "kubernetes-container-resources", - "version": 1 -} diff --git a/charts/kubernetes-operations/dashboards/core-dns.json b/charts/kubernetes-operations/dashboards/core-dns.json deleted file mode 100644 index 543f48e..0000000 --- a/charts/kubernetes-operations/dashboards/core-dns.json +++ /dev/null @@ -1,1120 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Plutono --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": 7, - "iteration": 1727875635782, - "links": [], - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "hiddenSeries": false, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "container_memory_working_set_bytes{pod=~\"coredns-.*\", namespace=\"kube-system\"}", - "interval": "", - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:450", - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:451", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "hiddenSeries": false, - "id": 26, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "label_replace((go_goroutines{kubernetes_pod_name=~\"coredns-.*\", kubernetes_namespace=\"kube-system\"} or go_goroutines{pod=~\"coredns-.*\", namespace=\"kube-system\"}), \"pod\", \"$1\", \"kubernetes_pod_name\", \"(.+)\")", - "interval": "", - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Goroutines", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 16, - "title": "Throughput", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "hiddenSeries": false, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum (rate(coredns_dns_requests_total[5m])) or sum (rate(coredns_dns_request_count_total[5m]))", - "interval": "", - "legendFormat": "total", - "refId": "A" - }, - { - "exemplar": true, - "expr": "sum (rate(coredns_forward_requests_total[5m])) or sum (rate(coredns_forward_request_count_total[5m]))", - "hide": false, - "interval": "", - "legendFormat": "forwarded", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Requests", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:216", - "format": "reqps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:217", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "hiddenSeries": false, - "id": 18, - "interval": null, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum by (pod) (label_replace(rate(coredns_dns_requests_total[5m]), \"pod\", \"$1\", \"kubernetes_pod_name\", \"(.+)\")) or sum by (pod) (label_replace(rate(coredns_dns_request_count_total[5m]), \"pod\", \"$1\", \"kubernetes_pod_name\", \"(.+)\"))", - "interval": "", - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Requests per instance", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1642", - "format": "reqps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1643", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 17 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket[5m])) by (le))", - "interval": "", - "legendFormat": "total", - "refId": "A" - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(coredns_forward_request_duration_seconds_bucket[5m])) by (le))", - "hide": false, - "interval": "", - "legendFormat": "forwarded", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Request latency (99th percentile)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:752", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:753", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 17 - }, - "hiddenSeries": false, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum by (type) (rate(coredns_dns_requests_total[5m])) or sum by (type) (rate(coredns_dns_request_type_count_total[5m]))", - "interval": "", - "legendFormat": "{{type}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Requests by type", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:296", - "format": "reqps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:297", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 25 - }, - "hiddenSeries": false, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum by (rcode) (rate(coredns_dns_response_rcode_count_total[5m])) or sum by (rcode) (rate(coredns_dns_responses_total[5m]))", - "interval": "", - "legendFormat": "{{rcode}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Respones by rcode", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:672", - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:673", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 25 - }, - "hiddenSeries": false, - "id": 19, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum by (rcode) (rate(coredns_forward_responses_total[5m])) or sum by (rcode) (rate(coredns_forward_response_rcode_count_total[5m]))", - "interval": "", - "legendFormat": "{{rcode}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Forwarded responses by rcode", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:672", - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:673", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 2, - "panels": [], - "title": "Cache", - "type": "row" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "#EAB839", - "value": 3000 - }, - { - "color": "semi-dark-red", - "value": 6000 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 34 - }, - "id": 21, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "7.5.33", - "targets": [ - { - "exemplar": true, - "expr": "max(coredns_cache_entries{type=\"success\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Success Cache Size", - "type": "stat" - }, - { - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "#EAB839", - "value": 3000 - }, - { - "color": "semi-dark-red", - "value": 6000 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 34 - }, - "id": 22, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "7.5.33", - "targets": [ - { - "exemplar": true, - "expr": "max(coredns_cache_entries{type=\"denial\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Denial Cache Size", - "type": "stat" - }, - { - "aliasColors": { - " misses": "dark-red", - "hit ratio": "semi-dark-purple" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 34 - }, - "hiddenSeries": false, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.33", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:2268", - "alias": "hit ratio", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum by (type) (rate(coredns_cache_hits_total[5m]))", - "interval": "", - "legendFormat": "{{type}} hits", - "refId": "A" - }, - { - "exemplar": true, - "expr": "sum (rate(coredns_cache_misses_total[5m]))", - "hide": false, - "interval": "", - "legendFormat": "{{type}} misses", - "refId": "B" - }, - { - "exemplar": true, - "expr": "sum(rate(coredns_cache_hits_total[5m]))/(sum(rate(coredns_cache_hits_total[5m]))+sum(rate(coredns_cache_misses_total[5m])))", - "hide": false, - "interval": "", - "legendFormat": "hit ratio", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Cache hits/misses", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:136", - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:137", - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "schemaVersion": 27, - "style": "dark", - "tags": [ - "kubernetes" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "kube-monitoring-prometheus", - "value": "kube-monitoring-prometheus" - }, - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - } - ] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "CoreDNS", - "uid": "coredns", - "version": 1 -} diff --git a/charts/kubernetes-operations/dashboards/nodes.json b/charts/kubernetes-operations/dashboards/nodes.json deleted file mode 100644 index fad789e..0000000 --- a/charts/kubernetes-operations/dashboards/nodes.json +++ /dev/null @@ -1,1840 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Plutono --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "", - "editable": true, - "gnetId": 704, - "graphTooltip": 0, - "id": 16, - "iteration": 1732622485987, - "links": [ - { - "icon": "external link", - "includeVars": true, - "tags": [ - "kubernetes" - ], - "type": "dashboards" - } - ], - "panels": [ - { - "cacheTimeout": null, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(255, 255, 255)", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 0 - }, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "7.5.34", - "targets": [ - { - "exemplar": true, - "expr": "count without(cpu, mode) (node_cpu_seconds_total{mode=\"idle\", node=~\"$server.*\"})", - "format": "time_series", - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600 - } - ], - "title": "Cores", - "type": "stat" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$datasource", - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 0 - }, - "id": 14, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(node_filesystem_size_bytes{node=~\"$server\", device=~\"/dev/.*\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A", - "step": 300 - } - ], - "thresholds": "", - "title": "Disk Total", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$datasource", - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 0 - }, - "hideTimeOverride": true, - "id": 13, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "node_memory_MemTotal_bytes{node=~\"$server\"}", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A", - "step": 300 - } - ], - "thresholds": "", - "timeFrom": "1m", - "title": "Total Memory", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 12, - "y": 0 - }, - "id": 11, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "100 - (avg by (node) (irate(node_cpu_seconds_total{node=~\"$server\",mode=\"idle\"}[5m])) * 100)", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "B" - } - ], - "thresholds": "0.8,0.9", - "title": "CPU Used", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$datasource", - "decimals": 1, - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 0 - }, - "hideTimeOverride": true, - "id": 15, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "time() - node_boot_time_seconds{node=~\"$server\"}", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A", - "step": 300 - } - ], - "thresholds": "", - "timeFrom": "2m", - "title": "Uptime", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$datasource", - "decimals": 2, - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "format": "percentunit", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 0 - }, - "id": 17, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(irate(node_scrape_collector_duration_seconds{node=~\"$server\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600 - } - ], - "thresholds": "", - "title": "Scrape CPU use", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$datasource", - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 8, - "w": 4, - "x": 0, - "y": 4 - }, - "id": 5, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "(node_memory_MemFree_bytes{node=~\"$server\"} / node_memory_MemTotal_bytes{node=~\"$server\"}) * 100", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600, - "target": "" - } - ], - "thresholds": "10, 20", - "title": "Available memory", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 6, - "x": 4, - "y": 4 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.34", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Free", - "zindex": 3 - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_MemTotal_bytes{node=~\"$server\"} - node_memory_MemFree_bytes{node=~\"$server\"} - node_memory_Cached_bytes{node=~\"$server\"} - node_memory_Buffers_bytes{node=~\"$server\"} - node_memory_Slab_bytes{node=~\"$server\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Used", - "metric": "memo", - "refId": "A", - "step": 30, - "target": "" - }, - { - "expr": "node_memory_Buffers_bytes{node=~\"$server\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Buffers", - "refId": "C", - "step": 30 - }, - { - "expr": "node_memory_Cached_bytes{node=~\"$server\"} + node_memory_Slab_bytes{node=~\"$server\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Cached", - "refId": "D", - "step": 30 - }, - { - "expr": "node_memory_MemFree_bytes{node=~\"$server\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Free", - "refId": "B", - "step": 30 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 14, - "x": 10, - "y": 4 - }, - "height": "", - "hiddenSeries": false, - "id": 27, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.34", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*outbound/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "topk(5, container_memory_usage_bytes{pod=~\".+\"} AND on(pod) (kube_pod_info{node=\"$server\"}))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ namespace }}/{{ pod }}", - "refId": "A", - "step": 30 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Pods Memory Usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$datasource", - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "format": "percentunit", - "gauge": { - "maxValue": 1, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 8, - "w": 4, - "x": 0, - "y": 12 - }, - "height": "", - "id": 7, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "min(node_filesystem_free_bytes{fstype=~\"xfs|ext4\",node=~\"$server\"} / node_filesystem_size_bytes{fstype=~\"xfs|ext4\",node=~\"$server\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600, - "target": "" - } - ], - "thresholds": "0.10, 0.25", - "title": "Free Filesystem Space (Lowest)", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 3, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 6, - "x": 4, - "y": 12 - }, - "hiddenSeries": false, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.34", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(mode)(irate(node_cpu_seconds_total{node=~\"$server\", mode!=\"idle\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ mode }}", - "refId": "A", - "step": 30, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": "", - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 14, - "x": 10, - "y": 12 - }, - "height": "", - "hiddenSeries": false, - "id": 26, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.34", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*outbound/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "topk(5, sum(rate(container_cpu_usage_seconds_total{pod=~\".+\"}[5m])) by (pod, namespace) AND on (pod, namespace) (kube_pod_info{node=\"$server\"}))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ namespace }}/{{ pod }}", - "refId": "A", - "step": 30 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Pods CPU Usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 20 - }, - "height": "", - "hiddenSeries": false, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.34", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*outbound/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_transmit_bytes_total{node=~\"$server\", device!~\"lo|cbr[0-9]|veth.*\"}[5m]) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} outbound", - "refId": "A", - "step": 30 - }, - { - "expr": "irate(node_network_receive_bytes_total{node=~\"$server\", device!~\"lo|cbr[0-9]|veth.*\"}[5m]) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} inbound", - "refId": "B", - "step": 30 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 29 - }, - "hiddenSeries": false, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.34", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "read", - "yaxis": 1 - }, - { - "alias": "{node=\"172.17.0.1:9100\"}", - "yaxis": 2 - }, - { - "alias": "io time", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (node) (irate(node_disk_reads_completed_total{node=~\"$server\"}[5m]))", - "format": "time_series", - "hide": false, - "intervalFactor": 4, - "legendFormat": "reads per second", - "refId": "A", - "step": 40, - "target": "" - }, - { - "expr": "sum by (node) (irate(node_disk_writes_completed_total{node=~\"$server\"}[5m]))", - "format": "time_series", - "intervalFactor": 4, - "legendFormat": "writes per second", - "refId": "B", - "step": 40 - }, - { - "expr": "sum by (node) (irate(node_disk_io_time_seconds_total{node=~\"$server\"}[5m]))", - "format": "time_series", - "intervalFactor": 4, - "legendFormat": "io time", - "refId": "C", - "step": 40 - }, - { - "expr": "sum by (node) (irate(node_disk_reads_completed_total{node=~\"$server\"}[5m])) + sum by (node) (irate(node_disk_writes_completed_total{node=~\"$server\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "IOPS", - "refId": "D", - "step": 20 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "IOPs", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": false, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 29 - }, - "hiddenSeries": false, - "id": 22, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.34", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(node_disk_io_time_seconds_total{node=~\"$server\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ device }}", - "refId": "A", - "step": 20 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "IO Speed by Device", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 37 - }, - "hiddenSeries": false, - "id": 23, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.34", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum by (method) (rate(node_nfsd_requests_total{node=~\"$server\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ method }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "NFS requests by method", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 37 - }, - "hiddenSeries": false, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.34", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum by (error) (rate(node_nfsd_rpc_errors_total[5m]))", - "interval": "", - "legendFormat": "{{ error }}", - "queryType": "randomWalk", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "NFS RPC Errors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "30s", - "schemaVersion": 27, - "style": "dark", - "tags": [ - "kubernetes" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "kube-monitoring-prometheus", - "value": "kube-monitoring-prometheus" - }, - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": { - "selected": false, - "text": "node-1", - "value": "node-1" - }, - "datasource": "$datasource", - "definition": "label_values(kube_node_info, node)", - "description": null, - "error": null, - "hide": 1, - "includeAll": false, - "label": "Node", - "multi": false, - "name": "server", - "options": [], - "query": { - "query": "label_values(kube_node_info, node)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": null, - "tags": [], - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kubernetes Node", - "version": 4 -} diff --git a/charts/kubernetes-operations/plugindefinition.yaml b/charts/kubernetes-operations/plugindefinition.yaml index b584127..2f09942 100644 --- a/charts/kubernetes-operations/plugindefinition.yaml +++ b/charts/kubernetes-operations/plugindefinition.yaml @@ -9,7 +9,7 @@ kind: PluginDefinition metadata: name: kubernetes-operations spec: - version: 1.1.6 + version: 1.2.0 displayName: Kubernetes operations bundle description: Operations bundle for Kubernetes docMarkDownUrl: https://raw.githubusercontent.com/cloudoperators/kubernetes-operations/main/README.md @@ -17,7 +17,7 @@ spec: helmChart: name: kubernetes-operations repository: oci://ghcr.io/cloudoperators/kubernetes-operations/charts - version: 1.2.10 + version: 1.3.0 options: - name: prometheusRules.create description: Create Prometheus rules @@ -37,7 +37,3 @@ spec: required: false default: true type: bool - - name: dashboards.dashboardSelector - description: Selector for dashboards to be picked up by the Plutono. List of key-value pairs. - required: false - type: list diff --git a/charts/kubernetes-operations/templates/_helpers.tpl b/charts/kubernetes-operations/templates/_helpers.tpl index 846e061..4c1b96f 100644 --- a/charts/kubernetes-operations/templates/_helpers.tpl +++ b/charts/kubernetes-operations/templates/_helpers.tpl @@ -29,16 +29,29 @@ plugin: {{ $root.Release.Name }} {{- end }} {{- end }} -{{- define "kubernetes-operations.dashboardSelectorLabels" }} -{{- $path := index . 0 -}} -{{- $root := index . 1 -}} -plugin: {{ $root.Release.Name }} -{{- if $root.Values.dashboards.plutonoSelectors }} -{{- range $i, $target := $root.Values.dashboards.plutonoSelectors }} -{{ $target.name | required (printf "$.Values.dashboards.plutonoSelectors.[%v].name missing" $i) }}: {{ tpl ($target.value | required (printf "$.Values.dashboards.plutonoSelectors.[%v].value missing" $i)) $ }} -{{- end }} -{{- end }} -{{- end }} +{{/* +Optionally append a kube_pod_labels join to pod-level alert expressions. +Enriches alerts with configurable labels from kube_pod_labels. +Enable via .Values.prometheusRules.kubeLabels (list of label names, e.g. [label_app, label_team, label_environment]) +*/}} +{{- define "kubernetes-operations.kubePodLabelsJoin" -}} +{{- if .Values.prometheusRules.kubeLabels }} + * on(pod, namespace) group_left({{ join ", " .Values.prometheusRules.kubeLabels }}) + kube_pod_labels +{{- end -}} +{{- end -}} + +{{/* +Optionally append a kube_deployment_labels join to deployment-level alert expressions. +Enriches alerts with configurable labels from kube_deployment_labels. +Enable via .Values.prometheusRules.kubeLabels (list of label names, e.g. [label_app, label_team, label_environment]) +*/}} +{{- define "kubernetes-operations.kubeDeploymentLabelsJoin" -}} +{{- if .Values.prometheusRules.kubeLabels }} + * on(namespace, deployment) group_left({{ join ", " .Values.prometheusRules.kubeLabels }}) + kube_deployment_labels +{{- end -}} +{{- end -}} {{- define "kubernetes-operations.persesDashboardSelectorLabels" }} {{- $path := index . 0 -}} diff --git a/charts/kubernetes-operations/templates/dashboards.yaml b/charts/kubernetes-operations/templates/dashboards.yaml deleted file mode 100644 index b709fdc..0000000 --- a/charts/kubernetes-operations/templates/dashboards.yaml +++ /dev/null @@ -1,17 +0,0 @@ -{{- if .Values.dashboards.create }} -{{ $root := . }} -{{- range $path, $bytes := .Files.Glob "dashboards/*.json" }} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ printf "%s-%s" $root.Release.Name $path | replace "/" "-" | trunc 63 }} - labels: -{{ include "kubernetes-operations.dashboardSelectorLabels" (list $path $root) | indent 4 }} -{{ include "kubernetes-operations.labels" (list $path $root) | indent 4 }} -data: -{{ printf "%s: |-" $path | replace "/" "-" | indent 2 }} -{{ printf "%s" $bytes | indent 4 }} - -{{- end }} -{{- end }} diff --git a/charts/kubernetes-operations/values.yaml b/charts/kubernetes-operations/values.yaml index 514d490..f607781 100644 --- a/charts/kubernetes-operations/values.yaml +++ b/charts/kubernetes-operations/values.yaml @@ -39,6 +39,15 @@ prometheusRules: # KubernetesApiServerDown: true # KubeletDown: true + # -- Enrich pod- and deployment-level alert expressions with labels from kube_pod_labels / kube_deployment_labels. + # Provide a list of kube-state-metrics label names to include in group_left(). + # Affects: KubernetesPodRestartingTooMuch, KubePodNotReady (join on pod+namespace), + # KubernetesDeploymentReplicasMismatch (join on namespace+deployment). + kubeLabels: [] + # - label_app + # - label_team + # - label_environment + # -- The label value pair that marks a Kubernetes node as 'in maintenance' NodeInMaintenance: label: maintenance_state @@ -50,11 +59,6 @@ dashboards: # -- Enables ConfigMap resources with dashboards to be created create: true - # -- Label selectors for the Plutono dashboards to be picked up by Plutono. - plutonoSelectors: - - name: plutono-dashboard - value: '"true"' - # -- Label selectors for the Perses dashboards to be picked up by Perses. persesSelectors: - name: perses.dev/resource