Add executive summary Grafana dashboard

Single-pane dashboard combining app status (UP/DOWN, response time,
7-day uptime, SSL expiry), pod health (status counts, restarts, not
ready pods, namespace distribution), cluster resources (CPU/memory/disk
gauges, node/pod/PVC counts), and a firing alerts table. Auto-deploys
via the existing grafana-dashboards ArgoCD app.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-09 15:37:09 -07:00
parent a462bb5c21
commit 87015b19b1
2 changed files with 1590 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,566 @@
{
"description": "Single-pane view of application health, pod status, and cluster resource utilization",
"editable": true,
"graphTooltip": 1,
"id": null,
"links": [
{
"icon": "external link",
"title": "App Uptime Detail",
"url": "/d/app-uptime-dashboard",
"type": "link"
}
],
"panels": [
{
"title": "Application Status",
"type": "stat",
"gridPos": { "h": 4, "w": 24, "x": 0, "y": 0 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {
"mappings": [
{ "options": { "0": { "color": "red", "text": "DOWN" } }, "type": "value" },
{ "options": { "1": { "color": "green", "text": "UP" } }, "type": "value" }
],
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "green", "value": 1 }
]
}
}
},
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto",
"orientation": "horizontal"
},
"targets": [
{
"expr": "probe_success{job=\"blackbox-http\"}",
"legendFormat": "{{ app }}",
"refId": "A"
}
]
},
{
"title": "Response Time",
"type": "stat",
"gridPos": { "h": 4, "w": 24, "x": 0, "y": 4 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {
"unit": "s",
"decimals": 2,
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "orange", "value": 3 },
{ "color": "red", "value": 5 }
]
}
}
},
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto",
"orientation": "horizontal"
},
"targets": [
{
"expr": "probe_http_duration_seconds{job=\"blackbox-http\"}",
"legendFormat": "{{ app }}",
"refId": "A"
}
]
},
{
"title": "Uptime (7d)",
"type": "gauge",
"gridPos": { "h": 5, "w": 12, "x": 0, "y": 8 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {
"decimals": 3,
"max": 100,
"min": 0,
"unit": "percent",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "orange", "value": 95 },
{ "color": "yellow", "value": 99 },
{ "color": "green", "value": 99.9 }
]
}
}
},
"options": {
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"targets": [
{
"expr": "avg_over_time(probe_success{job=\"blackbox-http\"}[7d]) * 100",
"legendFormat": "{{ app }}",
"refId": "A"
}
]
},
{
"title": "SSL Certificate Expiry",
"type": "stat",
"gridPos": { "h": 5, "w": 12, "x": 12, "y": 8 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {
"unit": "dtdurations",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "orange", "value": 604800 },
{ "color": "yellow", "value": 2592000 },
{ "color": "green", "value": 7776000 }
]
}
}
},
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto",
"orientation": "horizontal"
},
"targets": [
{
"expr": "probe_ssl_earliest_cert_expiry{job=\"blackbox-http\"} - time()",
"legendFormat": "{{ app }}",
"refId": "A"
}
]
},
{
"title": "",
"type": "text",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 },
"options": {
"mode": "markdown",
"content": "### Pod Health"
}
},
{
"title": "Pods by Status",
"type": "stat",
"gridPos": { "h": 4, "w": 8, "x": 0, "y": 14 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null }
]
}
},
"overrides": [
{
"matcher": { "id": "byName", "options": "Failed" },
"properties": [
{ "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }] } }
]
},
{
"matcher": { "id": "byName", "options": "Pending" },
"properties": [
{ "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "yellow", "value": null }] } }
]
}
]
},
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto",
"orientation": "horizontal"
},
"targets": [
{
"expr": "sum(kube_pod_status_phase{phase=\"Running\"})",
"legendFormat": "Running",
"refId": "A"
},
{
"expr": "sum(kube_pod_status_phase{phase=\"Pending\"})",
"legendFormat": "Pending",
"refId": "B"
},
{
"expr": "sum(kube_pod_status_phase{phase=\"Failed\"})",
"legendFormat": "Failed",
"refId": "C"
}
]
},
{
"title": "Pod Restarts (last 1h)",
"type": "table",
"gridPos": { "h": 4, "w": 16, "x": 8, "y": 14 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {},
"overrides": [
{
"matcher": { "id": "byName", "options": "Value" },
"properties": [
{ "id": "displayName", "value": "Restarts" },
{
"id": "thresholds",
"value": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "red", "value": 5 }
]
}
},
{ "id": "custom.displayMode", "value": "color-background" }
]
}
]
},
"options": {
"showHeader": true,
"sortBy": [{ "displayName": "Restarts", "desc": true }]
},
"transformations": [
{ "id": "reduce", "options": { "reducers": ["lastNotNull"] } },
{ "id": "sortBy", "options": { "fields": {}, "sort": [{ "field": "Value", "desc": true }] } }
],
"targets": [
{
"expr": "sum by (namespace, pod) (increase(kube_pod_container_status_restarts_total[1h])) > 0",
"legendFormat": "{{ namespace }}/{{ pod }}",
"refId": "A",
"instant": true
}
]
},
{
"title": "Not Ready Pods",
"type": "table",
"gridPos": { "h": 5, "w": 12, "x": 0, "y": 18 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {},
"overrides": [
{
"matcher": { "id": "byName", "options": "Value" },
"properties": [
{ "id": "displayName", "value": "Ready" },
{ "id": "mappings", "value": [
{ "options": { "0": { "text": "NOT READY", "color": "red" } }, "type": "value" },
{ "options": { "1": { "text": "Ready", "color": "green" } }, "type": "value" }
]},
{ "id": "custom.displayMode", "value": "color-text" }
]
}
]
},
"options": { "showHeader": true },
"targets": [
{
"expr": "kube_pod_status_ready{condition=\"true\"} == 0",
"legendFormat": "{{ namespace }}/{{ pod }}",
"refId": "A",
"instant": true
}
]
},
{
"title": "Pods per Namespace",
"type": "piechart",
"gridPos": { "h": 5, "w": 12, "x": 12, "y": 18 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {
"custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false } }
}
},
"options": {
"pieType": "donut",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"legend": { "displayMode": "table", "placement": "right", "values": ["value"] }
},
"targets": [
{
"expr": "sum by (namespace) (kube_pod_status_phase{phase=\"Running\"})",
"legendFormat": "{{ namespace }}",
"refId": "A"
}
]
},
{
"title": "",
"type": "text",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 },
"options": {
"mode": "markdown",
"content": "### Cluster Resources"
}
},
{
"title": "CPU Utilization",
"type": "gauge",
"gridPos": { "h": 5, "w": 8, "x": 0, "y": 24 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {
"decimals": 1,
"max": 100,
"min": 0,
"unit": "percent",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 70 },
{ "color": "orange", "value": 85 },
{ "color": "red", "value": 95 }
]
}
}
},
"options": {
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"targets": [
{
"expr": "100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)",
"legendFormat": "Cluster CPU",
"refId": "A"
}
]
},
{
"title": "Memory Utilization",
"type": "gauge",
"gridPos": { "h": 5, "w": 8, "x": 8, "y": 24 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {
"decimals": 1,
"max": 100,
"min": 0,
"unit": "percent",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 70 },
{ "color": "orange", "value": 85 },
{ "color": "red", "value": 95 }
]
}
}
},
"options": {
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"targets": [
{
"expr": "(1 - sum(node_memory_MemAvailable_bytes) / sum(node_memory_MemTotal_bytes)) * 100",
"legendFormat": "Cluster Memory",
"refId": "A"
}
]
},
{
"title": "Disk Utilization",
"type": "gauge",
"gridPos": { "h": 5, "w": 8, "x": 16, "y": 24 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {
"decimals": 1,
"max": 100,
"min": 0,
"unit": "percent",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 70 },
{ "color": "orange", "value": 85 },
{ "color": "red", "value": 95 }
]
}
}
},
"options": {
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"targets": [
{
"expr": "(1 - sum(node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\"})) * 100",
"legendFormat": "Cluster Disk",
"refId": "A"
}
]
},
{
"title": "Node Count",
"type": "stat",
"gridPos": { "h": 3, "w": 8, "x": 0, "y": 29 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [{ "color": "blue", "value": null }]
}
}
},
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"expr": "count(kube_node_info)",
"legendFormat": "Nodes",
"refId": "A"
}
]
},
{
"title": "Total Pods",
"type": "stat",
"gridPos": { "h": 3, "w": 8, "x": 8, "y": 29 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [{ "color": "blue", "value": null }]
}
}
},
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"expr": "sum(kube_pod_status_phase{phase=\"Running\"})",
"legendFormat": "Running Pods",
"refId": "A"
}
]
},
{
"title": "PVC Usage",
"type": "stat",
"gridPos": { "h": 3, "w": 8, "x": 16, "y": 29 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [{ "color": "blue", "value": null }]
}
}
},
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "center",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"expr": "count(kube_persistentvolumeclaim_info)",
"legendFormat": "PVCs",
"refId": "A"
}
]
},
{
"title": "",
"type": "text",
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 32 },
"options": {
"mode": "markdown",
"content": "### Recent Alerts"
}
},
{
"title": "Firing Alerts",
"type": "table",
"gridPos": { "h": 6, "w": 24, "x": 0, "y": 33 },
"datasource": { "type": "prometheus" },
"fieldConfig": {
"defaults": {},
"overrides": [
{
"matcher": { "id": "byName", "options": "Value" },
"properties": [
{ "id": "displayName", "value": "Active" },
{ "id": "mappings", "value": [
{ "options": { "1": { "text": "FIRING", "color": "red" } }, "type": "value" }
]},
{ "id": "custom.displayMode", "value": "color-text" }
]
}
]
},
"options": { "showHeader": true },
"targets": [
{
"expr": "ALERTS{alertstate=\"firing\"}",
"legendFormat": "{{ alertname }} — {{ app }}{{ namespace }}{{ pod }}",
"refId": "A",
"instant": true
}
]
}
],
"schemaVersion": 39,
"tags": ["executive", "overview", "cluster", "apps"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "browser",
"title": "Executive Summary",
"uid": "executive-summary"
}