Refactor mixin dashboards (#885)

This is a complete refactor of the dashboard system. It brings the dashboard creation, metrics, alerting, etc into alignment with other projects that use jsonnet/grafonnet/mixins. This should allow users to customize what we have created and deploy into their environments. The dashboard was the focus of this iteration, reaching parity with the previous dashboard.

- Add in jsonnet and grafonnet
- Add scripts to compile and lint mixin
- Add CI for the mixin

---------

Signed-off-by: Joe Adams <github@joeadams.io>
This commit is contained in:
Joe Adams 2024-12-14 11:52:05 -05:00 committed by GitHub
parent bb6320875c
commit 3774123827
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
27 changed files with 1296 additions and 2250 deletions

34
.github/workflows/mixin.yml vendored Normal file
View File

@ -0,0 +1,34 @@
---
name: mixin
on:
pull_request:
paths:
- "elasticsearch-mixin/**"
jobs:
check-mixin:
name: check
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: 1.22.5
- name: Install dependencies
run: |
go install github.com/google/go-jsonnet/cmd/jsonnet@v0.20.0
go install github.com/google/go-jsonnet/cmd/jsonnetfmt@v0.20.0
go install github.com/google/go-jsonnet/cmd/jsonnet-lint@v0.20.0
go install github.com/monitoring-mixins/mixtool/cmd/mixtool@16dc166166d91e93475b86b9355a4faed2400c18
go install github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@v0.5.1
- name: Lint
run: bash ./scripts/lint-jsonnet.sh
- name: Compile mixin
run: bash ./scripts/compile-mixin.sh
- name: Verify compiled mixin matches repo
run: |
git diff --exit-code -- ./elasticsearch-mixin || (echo "Compiled mixin does not match repo" && exit 1)
# Check if there are any new untracked files
test -z "$(git status --porcelain)" || (echo "Untracked files found, please run ./scripts/compile-mixin.sh" && exit 1)

1
.gitignore vendored
View File

@ -4,3 +4,4 @@ elasticsearch_exporter
*-stamp
.tarballs
/vendor
vendor/

View File

@ -0,0 +1,29 @@
# Elasticsearch Exporter Mixin
This is a mixin for the elasticsearch_exporter to define dashboards, alerts, and monitoring queries for use with this exporter.
Good example of upstream mixin for reference: https://github.com/kubernetes-monitoring/kubernetes-mixin
## Development
### JSONNET
https://jsonnet.org/
```go install github.com/google/go-jsonnet/cmd/jsonnet@latest```
### JSONNET BUNDLER
jsonnet bundler is a package manager for jsonnet
https://github.com/jsonnet-bundler/jsonnet-bundler
```go install -a github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@latest```
### Grafonnet
Grafana libraries for jsonnet: https://grafana.github.io/grafonnet/
```jb install github.com/grafana/grafonnet/gen/grafonnet-latest@main```
### Run the build
```bash
./scripts/compile-mixin.sh
```

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1,687 @@
{
"graphTooltip": 1,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"panels": [ ],
"title": "Overview",
"type": "row"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 8,
"x": 0,
"y": 1
},
"id": 2,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n elasticsearch_cluster_health_number_of_nodes{cluster=~\"$cluster\"}\n)\n"
}
],
"title": "Nodes",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 8,
"x": 8,
"y": 1
},
"id": 3,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n elasticsearch_cluster_health_number_of_data_nodes{cluster=~\"$cluster\"}\n)\n"
}
],
"title": "Data Nodes",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 8,
"x": 16,
"y": 1
},
"id": 4,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n elasticsearch_cluster_health_number_of_pending_tasks{cluster=~\"$cluster\"}\n)\n"
}
],
"title": "Pending Tasks",
"type": "stat"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 5
},
"id": 5,
"panels": [ ],
"title": "Shards",
"type": "row"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 0,
"y": 6
},
"id": 6,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n elasticsearch_cluster_health_active_shards{cluster=~\"$cluster\"}\n)\n"
}
],
"title": "Active",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 4,
"y": 6
},
"id": 7,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n elasticsearch_cluster_health_active_primary_shards{cluster=~\"$cluster\"}\n)\n"
}
],
"title": "Active Primary",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 8,
"y": 6
},
"id": 8,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n elasticsearch_cluster_health_initializing_shards{cluster=~\"$cluster\"}\n)\n"
}
],
"title": "Initializing",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 12,
"y": 6
},
"id": 9,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n elasticsearch_cluster_health_reloacting_shards{cluster=~\"$cluster\"}\n)\n"
}
],
"title": "Relocating",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 16,
"y": 6
},
"id": 10,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n elasticsearch_cluster_health_unassigned_shards{cluster=~\"$cluster\"}\n)\n"
}
],
"title": "Unassigned",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 20,
"y": 6
},
"id": 11,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n elasticsearch_cluster_health_delayed_unassigned_shards{cluster=~\"$cluster\"}\n)\n"
}
],
"title": "DelayedUnassigned",
"type": "stat"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 10
},
"id": 12,
"panels": [ ],
"title": "Documents",
"type": "row"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 0,
"y": 11
},
"id": 13,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "elasticsearch_indices_docs{cluster=~\"$cluster\"}\n"
}
],
"title": "Indexed Documents",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "bytes"
}
},
"gridPos": {
"h": 4,
"w": 4,
"x": 4,
"y": 11
},
"id": 14,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "elasticsearch_indices_store_size_bytes{cluster=~\"$cluster\"}\n"
}
],
"title": "Index Size",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 8,
"y": 11
},
"id": 15,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "rate(elasticsearch_indices_indexing_index_total{cluster=~\"$cluster\"}[$__rate_interval])\n",
"legendFormat": "{{name}}"
}
],
"title": "Index Rate",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 12,
"y": 11
},
"id": 16,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "rate(elasticsearch_indices_search_query_total{cluster=~\"$cluster\"}[$__rate_interval])\n",
"legendFormat": "{{name}}"
}
],
"title": "Query Rate",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 4,
"x": 16,
"y": 11
},
"id": 17,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(elasticsearch_thread_pool_queue_count{cluster=~\"$cluster\",type!=\"management\"}) by (type)\n",
"legendFormat": "{{type}}"
}
],
"title": "Queue Count",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 15
},
"id": 18,
"panels": [ ],
"title": "Memory",
"type": "row"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "bytes"
}
},
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 16
},
"id": 19,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "elasticsearch_jvm_memory_used_bytes{cluster=~\"$cluster\"}\n",
"legendFormat": "{{name}} {{area}}"
}
],
"title": "Memory Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"max": 1,
"min": 0,
"unit": "percentunit"
}
},
"gridPos": {
"h": 4,
"w": 6,
"x": 6,
"y": 16
},
"id": 20,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "avg_over_time(\n elasticsearch_jvm_memory_used_bytes{cluster=~\"$cluster\"}[15m]\n) /\nelasticsearch_jvm_memory_max_bytes{cluster=~\"$cluster\"}\n",
"legendFormat": "{{name}} {{area}}"
}
],
"title": "Memory 15m Avg",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "bytes"
}
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 16
},
"id": 21,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "elasticsearch_jvm_memory_max_bytes{cluster=~\"$cluster\"}\n",
"legendFormat": "{{name}} {{area}}"
}
],
"title": "Memory Max",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "s"
}
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 16
},
"id": 22,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "rate(\n elasticsearch_jvm_gc_collection_seconds_sum{cluster=~\"$cluster\"}[$__rate_interval]\n)\n",
"legendFormat": "{{name}} {{gc}}"
}
],
"title": "GC Rate",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 20
},
"id": 23,
"panels": [ ],
"title": "Threads",
"type": "row"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 12,
"x": 0,
"y": 21
},
"id": 24,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "elasticsearch_thread_pool_active_count{cluster=~\"$cluster\"}\n",
"legendFormat": "{{type}}"
}
],
"title": "Thread Pools",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 4,
"w": 12,
"x": 12,
"y": 21
},
"id": 25,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "elasticsearch_thread_pool_rejected_count{cluster=~\"$cluster\"}\n",
"legendFormat": "{{name}} {{type}}"
}
],
"title": "Thread Pool Rejections",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 25
},
"id": 26,
"panels": [ ],
"title": "Network",
"type": "row"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "bytes"
}
},
"gridPos": {
"h": 4,
"w": 24,
"x": 0,
"y": 26
},
"id": 27,
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "rate(\n elasticsearch_transport_rx_size_bytes_total{cluster=~\"$cluster\"}[$__rate_interval]\n)\n",
"legendFormat": "{{name}} TX"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "rate(\n elasticsearch_transport_tx_size_bytes_total{cluster=~\"$cluster\"}[$__rate_interval]\n)\n",
"legendFormat": "{{name}} RX"
}
],
"title": "Transport Rate",
"type": "timeseries"
}
],
"refresh": "1m",
"schemaVersion": 36,
"tags": [
"elasticsearch-exporter-mixin"
],
"templating": {
"list": [
{
"name": "datasource",
"query": "prometheus",
"type": "datasource"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"name": "cluster",
"query": "label_values(elasticsearch_cluster_health_status, cluster)",
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timezone": "utc",
"title": "Elasticsearch Exporter / Cluster"
}

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1,6 @@
{
_config+:: {
dashboardNamePrefix: 'Elasticsearch Exporter / ',
dashboardTags: ['elasticsearch-exporter-mixin'],
},
}

View File

@ -0,0 +1,3 @@
local dashboards = (import 'mixin.libsonnet').grafanaDashboards;
{ [name]: dashboards[name] for name in std.objectFields(dashboards) }

View File

@ -0,0 +1,67 @@
local g = import 'g.libsonnet';
local dashboard = g.dashboard;
local row = g.panel.row;
local panels = import './panels.libsonnet';
local queries = import './queries.libsonnet';
local variables = import './variables.libsonnet';
local util = import './util.libsonnet';
{
grafanaDashboards+:: {
'cluster.json':
dashboard.new('%s Cluster' % $._config.dashboardNamePrefix)
+ dashboard.withTags($._config.dashboardTags)
+ dashboard.withRefresh('1m')
+ dashboard.time.withFrom(value='now-1h')
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withVariables([
variables.datasource,
variables.cluster,
])
+ dashboard.withPanels(
util.makeGrid([
row.new('Overview')
+ row.withPanels([
panels.stat.nodes('Nodes', queries.runningNodes),
panels.stat.nodes('Data Nodes', queries.dataNodes),
panels.stat.nodes('Pending Tasks', queries.pendingTasks),
]),
row.new('Shards')
+ row.withPanels([
panels.stat.nodes('Active', queries.activeShards),
panels.stat.nodes('Active Primary', queries.activePrimaryShards),
panels.stat.nodes('Initializing', queries.initializingShards),
panels.stat.nodes('Relocating', queries.reloactingShards),
panels.stat.nodes('Unassigned', queries.unassignedShards),
panels.stat.nodes('DelayedUnassigned', queries.delayedUnassignedShards),
]),
row.new('Documents')
+ row.withPanels([
panels.timeSeries.base('Indexed Documents', queries.indexedDocuments),
panels.timeSeries.bytes('Index Size', queries.indexSize),
panels.timeSeries.base('Index Rate', queries.indexRate),
panels.timeSeries.base('Query Rate', queries.queryRate),
panels.timeSeries.base('Queue Count', queries.queueCount),
]),
row.new('Memory')
+ row.withPanels([
panels.timeSeries.bytes('Memory Usage', queries.memoryUsage),
panels.timeSeries.ratioMax1('Memory 15m Avg', queries.memoryUsageAverage15),
panels.timeSeries.bytes('Memory Max', queries.memoryMax),
panels.timeSeries.seconds('GC Rate', queries.gcSeconds),
]),
row.new('Threads')
+ row.withPanels([
panels.timeSeries.base('Thread Pools', queries.threadPoolActive),
panels.timeSeries.base('Thread Pool Rejections', queries.threadPoolRejections),
]),
row.new('Network')
+ row.withPanels([
panels.timeSeries.bytes('Transport Rate', [queries.transportTXRate, queries.transportRXRate]),
]),
]),
),
},
}

View File

@ -0,0 +1 @@
(import 'cluster.libsonnet')

View File

@ -0,0 +1 @@
import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'

View File

@ -0,0 +1,38 @@
local g = import 'g.libsonnet';
{
stat: {
local stat = g.panel.stat,
base(title, targets):
stat.new(title)
+ stat.queryOptions.withTargets(targets),
nodes: self.base,
},
timeSeries: {
local timeSeries = g.panel.timeSeries,
base(title, targets):
timeSeries.new(title)
+ timeSeries.queryOptions.withTargets(targets),
ratio(title, targets):
self.base(title, targets)
+ timeSeries.standardOptions.withUnit('percentunit'),
ratioMax1(title, targets):
self.ratio(title, targets)
+ timeSeries.standardOptions.withMax(1)
+ timeSeries.standardOptions.withMin(0),
bytes(title, targets):
self.base(title, targets)
+ timeSeries.standardOptions.withUnit('bytes'),
seconds(title, targets):
self.base(title, targets)
+ timeSeries.standardOptions.withUnit('s'),
},
}

View File

@ -0,0 +1,11 @@
local g = import './g.libsonnet';
local prometheusQuery = g.query.prometheus;
local variables = import './variables.libsonnet';
(import './queries/general.libsonnet') +
(import './queries/shard.libsonnet') +
(import './queries/document.libsonnet') +
(import './queries/memory.libsonnet') +
(import './queries/threads.libsonnet') +
(import './queries/network.libsonnet')

View File

@ -0,0 +1,50 @@
local g = import '../g.libsonnet';
local prometheusQuery = g.query.prometheus;
local variables = import '../variables.libsonnet';
{
indexedDocuments:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
elasticsearch_indices_docs{cluster=~"$cluster"}
|||
),
indexSize:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
elasticsearch_indices_store_size_bytes{cluster=~"$cluster"}
|||
),
indexRate:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
rate(elasticsearch_indices_indexing_index_total{cluster=~"$cluster"}[$__rate_interval])
|||
)
+ prometheusQuery.withLegendFormat('{{name}}'),
queryRate:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
rate(elasticsearch_indices_search_query_total{cluster=~"$cluster"}[$__rate_interval])
|||
)
+ prometheusQuery.withLegendFormat('{{name}}'),
queueCount:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
sum(elasticsearch_thread_pool_queue_count{cluster=~"$cluster",type!="management"}) by (type)
|||
)
+ prometheusQuery.withLegendFormat('{{type}}'),
}

View File

@ -0,0 +1,35 @@
local g = import '../g.libsonnet';
local prometheusQuery = g.query.prometheus;
local variables = import '../variables.libsonnet';
{
runningNodes:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
sum(
elasticsearch_cluster_health_number_of_nodes{cluster=~"$cluster"}
)
|||
),
dataNodes:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
sum(
elasticsearch_cluster_health_number_of_data_nodes{cluster=~"$cluster"}
)
|||
),
pendingTasks:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
sum(
elasticsearch_cluster_health_number_of_pending_tasks{cluster=~"$cluster"}
)
|||
),
}

View File

@ -0,0 +1,47 @@
local g = import '../g.libsonnet';
local prometheusQuery = g.query.prometheus;
local variables = import '../variables.libsonnet';
{
memoryUsage:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
elasticsearch_jvm_memory_used_bytes{cluster=~"$cluster"}
|||
)
+ prometheusQuery.withLegendFormat('{{name}} {{area}}'),
memoryUsageAverage15:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
avg_over_time(
elasticsearch_jvm_memory_used_bytes{cluster=~"$cluster"}[15m]
) /
elasticsearch_jvm_memory_max_bytes{cluster=~"$cluster"}
|||
)
+ prometheusQuery.withLegendFormat('{{name}} {{area}}'),
memoryMax:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
elasticsearch_jvm_memory_max_bytes{cluster=~"$cluster"}
|||
)
+ prometheusQuery.withLegendFormat('{{name}} {{area}}'),
gcSeconds:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
rate(
elasticsearch_jvm_gc_collection_seconds_sum{cluster=~"$cluster"}[$__rate_interval]
)
|||
)
+ prometheusQuery.withLegendFormat('{{name}} {{gc}}'),
}

View File

@ -0,0 +1,28 @@
local g = import '../g.libsonnet';
local prometheusQuery = g.query.prometheus;
local variables = import '../variables.libsonnet';
{
transportTXRate:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
rate(
elasticsearch_transport_rx_size_bytes_total{cluster=~"$cluster"}[$__rate_interval]
)
|||
)
+ prometheusQuery.withLegendFormat('{{name}} TX'),
transportRXRate:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
rate(
elasticsearch_transport_tx_size_bytes_total{cluster=~"$cluster"}[$__rate_interval]
)
|||
)
+ prometheusQuery.withLegendFormat('{{name}} RX'),
}

View File

@ -0,0 +1,66 @@
local g = import '../g.libsonnet';
local prometheusQuery = g.query.prometheus;
local variables = import '../variables.libsonnet';
{
activeShards:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
sum(
elasticsearch_cluster_health_active_shards{cluster=~"$cluster"}
)
|||
),
activePrimaryShards:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
sum(
elasticsearch_cluster_health_active_primary_shards{cluster=~"$cluster"}
)
|||
),
initializingShards:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
sum(
elasticsearch_cluster_health_initializing_shards{cluster=~"$cluster"}
)
|||
),
reloactingShards:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
sum(
elasticsearch_cluster_health_reloacting_shards{cluster=~"$cluster"}
)
|||
),
unassignedShards:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
sum(
elasticsearch_cluster_health_unassigned_shards{cluster=~"$cluster"}
)
|||
),
delayedUnassignedShards:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
sum(
elasticsearch_cluster_health_delayed_unassigned_shards{cluster=~"$cluster"}
)
|||
),
}

View File

@ -0,0 +1,24 @@
local g = import '../g.libsonnet';
local prometheusQuery = g.query.prometheus;
local variables = import '../variables.libsonnet';
{
threadPoolActive:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
elasticsearch_thread_pool_active_count{cluster=~"$cluster"}
|||
)
+ prometheusQuery.withLegendFormat('{{type}}'),
threadPoolRejections:
prometheusQuery.new(
'$' + variables.datasource.name,
|||
elasticsearch_thread_pool_rejected_count{cluster=~"$cluster"}
|||
)
+ prometheusQuery.withLegendFormat('{{name}} {{type}}'),
}

View File

@ -0,0 +1,66 @@
local g = import 'g.libsonnet';
local panelUtil = g.util.panel;
{
local gridWidth = 24,
// makeGrid returns an array of panels organized into a grid layout.
// This is a modified version of the grafonnet makeGrid function to
// calculate the width of each panel based on the number of panels.
makeGrid(panels, panelHeight=4, startY=0):
local sanitizePanels(ps) =
// Figure out the number of panels and the width of each panel
local numPanels = std.length(ps);
local panelWidth = std.floor(gridWidth / numPanels);
// Sanitize the panels, this ensures tht the panels have the valid gridPos
std.map(
function(p)
local sanePanel = panelUtil.sanitizePanel(p, defaultHeight=panelHeight);
(
if p.type == 'row'
then sanePanel {
panels: sanitizePanels(sanePanel.panels),
}
else sanePanel {
gridPos+: {
w: panelWidth,
},
}
),
ps
);
local sanitizedPanels = sanitizePanels(panels);
local grouped = panelUtil.groupPanelsInRows(sanitizedPanels);
local panelsBeforeRows = panelUtil.getPanelsBeforeNextRow(grouped);
local rowPanels =
std.filter(
function(p) p.type == 'row',
grouped
);
local CalculateXforPanel(index, panel) =
local panelsPerRow = std.floor(gridWidth / panel.gridPos.w);
local col = std.mod(index, panelsPerRow);
panel { gridPos+: { x: panel.gridPos.w * col } };
local panelsBeforeRowsWithX = std.mapWithIndex(CalculateXforPanel, panelsBeforeRows);
local rowPanelsWithX =
std.map(
function(row)
row { panels: std.mapWithIndex(CalculateXforPanel, row.panels) },
rowPanels
);
local uncollapsed = panelUtil.resolveCollapsedFlagOnRows(panelsBeforeRowsWithX + rowPanelsWithX);
local normalized = panelUtil.normalizeY(uncollapsed);
std.map(function(p) p { gridPos+: { y+: startY } }, normalized),
}

View File

@ -0,0 +1,15 @@
local g = import './g.libsonnet';
local var = g.dashboard.variable;
{
datasource:
var.datasource.new('datasource', 'prometheus'),
cluster:
var.query.new('cluster')
+ var.query.withDatasourceFromVariable(self.datasource)
+ var.query.queryTypes.withLabelValues(
'cluster',
'elasticsearch_cluster_health_status',
),
}

View File

@ -0,0 +1,15 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-latest"
}
},
"version": "main"
}
],
"legacyImports": true
}

View File

@ -0,0 +1,46 @@
{
"version": 1,
"dependencies": [
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-latest"
}
},
"version": "1c56af39815c4903e47c27194444456f005f65df",
"sum": "GxEO83uxgsDclLp/fmlUJZDbSGpeUZY6Ap3G2cgdL1g="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-v10.4.0"
}
},
"version": "1c56af39815c4903e47c27194444456f005f65df",
"sum": "DKj+Sn+rlI48g/aoJpzkfPge46ya0jLk5kcZoiZ2X/I="
},
{
"source": {
"git": {
"remote": "https://github.com/jsonnet-libs/docsonnet.git",
"subdir": "doc-util"
}
},
"version": "6ac6c69685b8c29c54515448eaca583da2d88150",
"sum": "BrAL/k23jq+xy9oA7TWIhUx07dsA/QLm3g7ktCwe//U="
},
{
"source": {
"git": {
"remote": "https://github.com/jsonnet-libs/xtd.git",
"subdir": ""
}
},
"version": "63d430b69a95741061c2f7fc9d84b1a778511d9c",
"sum": "qiZi3axUSXCVzKUF83zSAxklwrnitMmrDK4XAfjPMdE="
}
],
"legacyImports": false
}

View File

@ -0,0 +1,3 @@
// (import 'alerts/alerts.libsonnet') +
(import 'dashboards/dashboards.libsonnet') +
(import 'config.libsonnet')

File diff suppressed because it is too large Load Diff

8
scripts/compile-mixin.sh Executable file
View File

@ -0,0 +1,8 @@
#!/bin/bash
MIXIN_PATH=./elasticsearch-mixin
MIXIN_OUT_PATH=./elasticsearch-mixin/compiled
rm -rf ${MIXIN_OUT_PATH} && mkdir ${MIXIN_OUT_PATH}
pushd ${MIXIN_PATH} && jb install && popd
mixtool generate all --output-alerts ${MIXIN_OUT_PATH}/alerts.yaml --output-rules ${MIXIN_OUT_PATH}/rules.yaml --directory ${MIXIN_OUT_PATH}/dashboards ${MIXIN_PATH}/mixin.libsonnet

13
scripts/lint-jsonnet.sh Executable file
View File

@ -0,0 +1,13 @@
#!/bin/bash
# Run lint on all jsonnet files in the repository
RESULT=0;
for f in $(find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print); do
# jsonnet fmt -i "$$f"
echo "Linting ${f}"
jsonnetfmt -- "${f}" | diff -u "${f}" -
RESULT=$((RESULT+$?))
done
echo "Linting complete"
exit $RESULT