mirror of
https://github.com/prometheus-community/elasticsearch_exporter.git
synced 2026-02-06 10:58:13 +00:00
Compare commits
258 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
92b25a1eb0 | ||
|
|
5233bc8957 | ||
|
|
5dfce351ec | ||
|
|
84ca898d8e | ||
|
|
348b806bda | ||
|
|
f57d9b6b5b | ||
|
|
9469a6d617 | ||
|
|
fa338d24ce | ||
|
|
1b970de07b | ||
|
|
1ea8dddf33 | ||
|
|
de34ae3d01 | ||
|
|
02c5a3e6d9 | ||
|
|
cdc4b01705 | ||
|
|
1ec2242bca | ||
|
|
eee51ae24a | ||
|
|
d74274fa19 | ||
|
|
d20e394771 | ||
|
|
f1fda6818f | ||
|
|
df28d5921f | ||
|
|
279ba21db4 | ||
|
|
110a885c66 | ||
|
|
aca930544c | ||
|
|
c9fe3d448c | ||
|
|
36aba75c33 | ||
|
|
8b66ebdc44 | ||
|
|
a4f9120aff | ||
|
|
1bb240e13e | ||
|
|
56914a1d8b | ||
|
|
bdfeeffc09 | ||
|
|
325e7dd61f | ||
|
|
7131333ce7 | ||
|
|
91008f4902 | ||
|
|
88c8fb6d31 | ||
|
|
1cd5ffe190 | ||
|
|
a04a789c0d | ||
|
|
078972dba4 | ||
|
|
1f8b5ad641 | ||
|
|
dbe29fedec | ||
|
|
90bc1111dd | ||
|
|
6d25270625 | ||
|
|
1bb4b5823e | ||
|
|
ca4c3133e5 | ||
|
|
5ceff33669 | ||
|
|
2c91a658f4 | ||
|
|
c463acc8a6 | ||
|
|
b3e2aaa8bc | ||
|
|
33661b982b | ||
|
|
60f9e0c8e5 | ||
|
|
00dfe058f4 | ||
|
|
8a1d851bdb | ||
|
|
abac1b96e7 | ||
|
|
46721e114b | ||
|
|
4a54705614 | ||
|
|
25c76faf3a | ||
|
|
80a0be6a4b | ||
|
|
2645c58e90 | ||
|
|
8f67121f0b | ||
|
|
78c2672b04 | ||
|
|
08636845fe | ||
|
|
ff2a9185ed | ||
|
|
3f41e5f9c0 | ||
|
|
fee0e4dc40 | ||
|
|
1799ebe922 | ||
|
|
b6fd4be0cf | ||
|
|
9dc402e2c1 | ||
|
|
36754ef9a3 | ||
|
|
8afbc87e07 | ||
|
|
46f304de22 | ||
|
|
d6b75359fc | ||
|
|
f0ae1957f4 | ||
|
|
fc9f42c4c8 | ||
|
|
4ab0f07290 | ||
|
|
e0de42975b | ||
|
|
4301b8d655 | ||
|
|
17f9f07e3d | ||
|
|
ea5a657777 | ||
|
|
043dbe314a | ||
|
|
f84d8b5d48 | ||
|
|
5e8e0c0baa | ||
|
|
6799cb5f84 | ||
|
|
e998563ec0 | ||
|
|
73b5e4615a | ||
|
|
6f79acc690 | ||
|
|
06975c8507 | ||
|
|
6700e15bee | ||
|
|
98c791f3b1 | ||
|
|
e4eefa0627 | ||
|
|
7b794ff7b2 | ||
|
|
ea1dd11d4f | ||
|
|
05609ff593 | ||
|
|
6c221f72d9 | ||
|
|
41561072de | ||
|
|
ff87990e2a | ||
|
|
2b71755dbf | ||
|
|
5bc975f8d5 | ||
|
|
23b43a85e4 | ||
|
|
000290352e | ||
|
|
e7fe7a32cc | ||
|
|
b550e54b4e | ||
|
|
c89f8131bd | ||
|
|
0355d873cf | ||
|
|
dbcf459aeb | ||
|
|
41797efe66 | ||
|
|
7408f21caf | ||
|
|
3774123827 | ||
|
|
bb6320875c | ||
|
|
34c4936c53 | ||
|
|
b846254d5c | ||
|
|
0a6cf82d14 | ||
|
|
811cee2182 | ||
|
|
51b1b9a882 | ||
|
|
c6f86ac844 | ||
|
|
c56248ac02 | ||
|
|
7ec6712655 | ||
|
|
7f3eeafd7e | ||
|
|
66b82c8997 | ||
|
|
3a591ed02e | ||
|
|
8b44de5a95 | ||
|
|
ca263b9d2e | ||
|
|
7aa2664f17 | ||
|
|
7a09a847ab | ||
|
|
ccd458b065 | ||
|
|
33f42c6eab | ||
|
|
79b0e15fab | ||
|
|
77836071b2 | ||
|
|
3b3879a2ea | ||
|
|
b826b14a31 | ||
|
|
08a1ceea6a | ||
|
|
22d4a8a633 | ||
|
|
da4a406b39 | ||
|
|
0fc81859b9 | ||
|
|
a4f94e585b | ||
|
|
8103328051 | ||
|
|
5c8fca8769 | ||
|
|
fd25030ff5 | ||
|
|
d98d2f6185 | ||
|
|
711a6ce467 | ||
|
|
d13c5552b7 | ||
|
|
bf89cef4de | ||
|
|
0d92bd3d10 | ||
|
|
fed8a6b740 | ||
|
|
42c30156ea | ||
|
|
2f3360592d | ||
|
|
8ebe0a5056 | ||
|
|
ea789710ce | ||
|
|
08915a5742 | ||
|
|
51401e4b82 | ||
|
|
00814928b4 | ||
|
|
08d9748697 | ||
|
|
e1ddc12424 | ||
|
|
1d5d44be41 | ||
|
|
1810f7c30b | ||
|
|
c2c33b1681 | ||
|
|
c276c3e0ea | ||
|
|
e730d38034 | ||
|
|
6010fd106a | ||
|
|
066b7eac3d | ||
|
|
3588461cf6 | ||
|
|
7b0ebe5370 | ||
|
|
56dc80b394 | ||
|
|
fcf5f83d90 | ||
|
|
b729189b61 | ||
|
|
083bfe2418 | ||
|
|
b24d0ace72 | ||
|
|
a20eec030c | ||
|
|
1a82b986e6 | ||
|
|
d24c8349fe | ||
|
|
bf276d4e17 | ||
|
|
b47c56e9ec | ||
|
|
9fd3634cf5 | ||
|
|
e9ad2f22fb | ||
|
|
9df3161384 | ||
|
|
763c5f8fe2 | ||
|
|
af205b4efe | ||
|
|
3c7b5c7bf8 | ||
|
|
dc2704a1e4 | ||
|
|
8ff7b1a8af | ||
|
|
8393bfcc18 | ||
|
|
d54e8ec48e | ||
|
|
cc1a83fd06 | ||
|
|
a6e3814f5d | ||
|
|
4d262b16eb | ||
|
|
861e7bafc2 | ||
|
|
6b8b33f9d8 | ||
|
|
5a51b7ff72 | ||
|
|
74d8ca45b0 | ||
|
|
989d4d608d | ||
|
|
8e7aebfadd | ||
|
|
391c7df5b4 | ||
|
|
acddc580d6 | ||
|
|
0669e2dca1 | ||
|
|
b0f63dfe2a | ||
|
|
620c886d8f | ||
|
|
c3b7d97797 | ||
|
|
c9e2f4edfb | ||
|
|
1cc6f17b78 | ||
|
|
c8e5b3bf95 | ||
|
|
a09cf6dfff | ||
|
|
9bb0ad5544 | ||
|
|
157d0e4a4d | ||
|
|
5c142bf058 | ||
|
|
cf37e02b1b | ||
|
|
bf9c0fd2c7 | ||
|
|
bd915838cd | ||
|
|
fc7cdd7048 | ||
|
|
7e388a8600 | ||
|
|
2c39d17c5a | ||
|
|
bc37d3664a | ||
|
|
530934804b | ||
|
|
397fcbc5f6 | ||
|
|
f15ac981ef | ||
|
|
7aab9dab92 | ||
|
|
efd6062a04 | ||
|
|
d889c8bcba | ||
|
|
a5f42797a1 | ||
|
|
831c5642ba | ||
|
|
d103c1e52d | ||
|
|
a399d71c3d | ||
|
|
c52fc4235f | ||
|
|
e92e0ce5a4 | ||
|
|
168002a0bd | ||
|
|
1ce43d748b | ||
|
|
d0d64e9c5e | ||
|
|
c51cf41a09 | ||
|
|
fda1b57ea8 | ||
|
|
ed8a758707 | ||
|
|
fe69494fe6 | ||
|
|
6887a82891 | ||
|
|
ad3ee502c2 | ||
|
|
790bd21184 | ||
|
|
fa851f6565 | ||
|
|
0008ee9768 | ||
|
|
33b6077bfb | ||
|
|
7a7eb85ed6 | ||
|
|
687e75ef04 | ||
|
|
bc05ee5537 | ||
|
|
25c6a905d1 | ||
|
|
b87a0ea31d | ||
|
|
96c88be307 | ||
|
|
3bf8a36403 | ||
|
|
d55d6d2b4d | ||
|
|
c2b9c71110 | ||
|
|
6934773e65 | ||
|
|
a5f6de38ac | ||
|
|
05090514bd | ||
|
|
416fa22e41 | ||
|
|
70152feb08 | ||
|
|
b0357ee8a7 | ||
|
|
92c4ce89e4 | ||
|
|
f6480d1337 | ||
|
|
3d70f1e6e7 | ||
|
|
be107e0e7b | ||
|
|
620fd0c66d | ||
|
|
ca4b237171 | ||
|
|
73d75a3315 | ||
|
|
102a9d3810 | ||
|
|
d9b9ecd449 | ||
|
|
a0993ec436 |
@ -1,52 +0,0 @@
|
||||
---
|
||||
version: 2.1
|
||||
orbs:
|
||||
prometheus: prometheus/prometheus@0.16.0
|
||||
executors:
|
||||
# This must match .promu.yml.
|
||||
golang:
|
||||
docker:
|
||||
- image: cimg/go:1.18
|
||||
jobs:
|
||||
test:
|
||||
executor: golang
|
||||
steps:
|
||||
- prometheus/setup_environment
|
||||
- run: make
|
||||
- prometheus/store_artifact:
|
||||
file: elasticsearch_exporter
|
||||
workflows:
|
||||
version: 2
|
||||
elasticsearch_exporter:
|
||||
jobs:
|
||||
- test:
|
||||
filters:
|
||||
tags:
|
||||
only: /.*/
|
||||
- prometheus/build:
|
||||
name: build
|
||||
filters:
|
||||
tags:
|
||||
only: /.*/
|
||||
- prometheus/publish_master:
|
||||
context: org-context
|
||||
docker_hub_organization: prometheuscommunity
|
||||
quay_io_organization: prometheuscommunity
|
||||
requires:
|
||||
- test
|
||||
- build
|
||||
filters:
|
||||
branches:
|
||||
only: master
|
||||
- prometheus/publish_release:
|
||||
context: org-context
|
||||
docker_hub_organization: prometheuscommunity
|
||||
quay_io_organization: prometheuscommunity
|
||||
requires:
|
||||
- test
|
||||
- build
|
||||
filters:
|
||||
tags:
|
||||
only: /^v.*/
|
||||
branches:
|
||||
ignore: /.*/
|
||||
8
.github/dependabot.yml
vendored
8
.github/dependabot.yml
vendored
@ -4,3 +4,11 @@ updates:
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
groups:
|
||||
aws:
|
||||
patterns:
|
||||
- "github.com/aws/*"
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
|
||||
107
.github/workflows/ci.yml
vendored
Normal file
107
.github/workflows/ci.yml
vendored
Normal file
@ -0,0 +1,107 @@
|
||||
---
|
||||
name: CI
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
|
||||
jobs:
|
||||
test_go:
|
||||
name: Go tests
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Whenever the Go version is updated here, .promu.yml
|
||||
# should also be updated.
|
||||
image: quay.io/prometheus/golang-builder:1.25-base
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3
|
||||
- uses: ./.github/promci/actions/setup_environment
|
||||
- run: make GO_ONLY=1 SKIP_GOLANGCI_LINT=1
|
||||
|
||||
build:
|
||||
name: Build Prometheus for common architectures
|
||||
runs-on: ubuntu-latest
|
||||
if: |
|
||||
!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v'))
|
||||
&&
|
||||
!(github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-'))
|
||||
&&
|
||||
!(github.event_name == 'push' && github.event.ref == 'refs/heads/main')
|
||||
&&
|
||||
!(github.event_name == 'push' && github.event.ref == 'refs/heads/master')
|
||||
strategy:
|
||||
matrix:
|
||||
thread: [ 0, 1, 2 ]
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3
|
||||
- uses: ./.github/promci/actions/build
|
||||
with:
|
||||
promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386"
|
||||
parallelism: 3
|
||||
thread: ${{ matrix.thread }}
|
||||
|
||||
build_all:
|
||||
name: Build Prometheus for all architectures
|
||||
runs-on: ubuntu-latest
|
||||
if: |
|
||||
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v'))
|
||||
||
|
||||
(github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-'))
|
||||
||
|
||||
(github.event_name == 'push' && github.event.ref == 'refs/heads/main')
|
||||
||
|
||||
(github.event_name == 'push' && github.event.ref == 'refs/heads/master')
|
||||
strategy:
|
||||
matrix:
|
||||
thread: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ]
|
||||
|
||||
# Whenever the Go version is updated here, .promu.yml
|
||||
# should also be updated.
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3
|
||||
- uses: ./.github/promci/actions/build
|
||||
with:
|
||||
parallelism: 12
|
||||
thread: ${{ matrix.thread }}
|
||||
|
||||
publish_main:
|
||||
# https://github.com/prometheus/promci/blob/52c7012f5f0070d7281b8db4a119e21341d43c91/actions/publish_main/action.yml
|
||||
name: Publish main branch artifacts
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test_go, build_all]
|
||||
if: |
|
||||
(github.event_name == 'push' && github.event.ref == 'refs/heads/main')
|
||||
||
|
||||
(github.event_name == 'push' && github.event.ref == 'refs/heads/master')
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3
|
||||
- uses: ./.github/promci/actions/publish_main
|
||||
with:
|
||||
docker_hub_organization: prometheuscommunity
|
||||
docker_hub_login: ${{ secrets.docker_hub_login }}
|
||||
docker_hub_password: ${{ secrets.docker_hub_password }}
|
||||
quay_io_organization: prometheuscommunity
|
||||
quay_io_login: ${{ secrets.quay_io_login }}
|
||||
quay_io_password: ${{ secrets.quay_io_password }}
|
||||
|
||||
publish_release:
|
||||
name: Publish release artefacts
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test_go, build_all]
|
||||
if: |
|
||||
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v'))
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: prometheus/promci@c0916f0a41f13444612a8f0f5e700ea34edd7c19 # v0.5.3
|
||||
- uses: ./.github/promci/actions/publish_release
|
||||
with:
|
||||
docker_hub_organization: prometheuscommunity
|
||||
docker_hub_login: ${{ secrets.docker_hub_login }}
|
||||
docker_hub_password: ${{ secrets.docker_hub_password }}
|
||||
quay_io_organization: prometheuscommunity
|
||||
quay_io_login: ${{ secrets.quay_io_login }}
|
||||
quay_io_password: ${{ secrets.quay_io_password }}
|
||||
github_token: ${{ secrets.PROMBOT_GITHUB_TOKEN }}
|
||||
61
.github/workflows/container_description.yml
vendored
Normal file
61
.github/workflows/container_description.yml
vendored
Normal file
@ -0,0 +1,61 @@
|
||||
---
|
||||
name: Push README to Docker Hub
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- "README.md"
|
||||
- "README-containers.md"
|
||||
- ".github/workflows/container_description.yml"
|
||||
branches: [ main, master ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
PushDockerHubReadme:
|
||||
runs-on: ubuntu-latest
|
||||
name: Push README to Docker Hub
|
||||
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
|
||||
steps:
|
||||
- name: git checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Set docker hub repo name
|
||||
run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV
|
||||
- name: Push README to Dockerhub
|
||||
uses: christian-korneck/update-container-description-action@d36005551adeaba9698d8d67a296bd16fa91f8e8 # v1
|
||||
env:
|
||||
DOCKER_USER: ${{ secrets.DOCKER_HUB_LOGIN }}
|
||||
DOCKER_PASS: ${{ secrets.DOCKER_HUB_PASSWORD }}
|
||||
with:
|
||||
destination_container_repo: ${{ env.DOCKER_REPO_NAME }}
|
||||
provider: dockerhub
|
||||
short_description: ${{ env.DOCKER_REPO_NAME }}
|
||||
# Empty string results in README-containers.md being pushed if it
|
||||
# exists. Otherwise, README.md is pushed.
|
||||
readme_file: ''
|
||||
|
||||
PushQuayIoReadme:
|
||||
runs-on: ubuntu-latest
|
||||
name: Push README to quay.io
|
||||
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
|
||||
steps:
|
||||
- name: git checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Set quay.io org name
|
||||
run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV
|
||||
- name: Set quay.io repo name
|
||||
run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV
|
||||
- name: Push README to quay.io
|
||||
uses: christian-korneck/update-container-description-action@d36005551adeaba9698d8d67a296bd16fa91f8e8 # v1
|
||||
env:
|
||||
DOCKER_APIKEY: ${{ secrets.QUAY_IO_API_TOKEN }}
|
||||
with:
|
||||
destination_container_repo: ${{ env.DOCKER_REPO_NAME }}
|
||||
provider: quay
|
||||
# Empty string results in README-containers.md being pushed if it
|
||||
# exists. Otherwise, README.md is pushed.
|
||||
readme_file: ''
|
||||
26
.github/workflows/golangci-lint.yml
vendored
26
.github/workflows/golangci-lint.yml
vendored
@ -1,3 +1,5 @@
|
||||
---
|
||||
# This action is synced from https://github.com/prometheus/prometheus
|
||||
name: golangci-lint
|
||||
on:
|
||||
push:
|
||||
@ -10,21 +12,33 @@ on:
|
||||
- ".golangci.yml"
|
||||
pull_request:
|
||||
|
||||
permissions: # added using https://github.com/step-security/secure-repo
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
golangci:
|
||||
permissions:
|
||||
contents: read # for actions/checkout to fetch code
|
||||
pull-requests: read # for golangci/golangci-lint-action to fetch pull requests
|
||||
name: lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
- name: install Go
|
||||
uses: actions/setup-go@v2
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
go-version: 1.18.x
|
||||
persist-credentials: false
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0
|
||||
with:
|
||||
go-version: 1.25.x
|
||||
- name: Install snmp_exporter/generator dependencies
|
||||
run: sudo apt-get update && sudo apt-get -y install libsnmp-dev
|
||||
if: github.repository == 'prometheus/snmp_exporter'
|
||||
- name: Get golangci-lint version
|
||||
id: golangci-lint-version
|
||||
run: echo "version=$(make print-golangci-lint-version)" >> $GITHUB_OUTPUT
|
||||
- name: Lint
|
||||
uses: golangci/golangci-lint-action@v3.2.0
|
||||
uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # v9.2.0
|
||||
with:
|
||||
version: v1.45.2
|
||||
args: --verbose
|
||||
version: ${{ steps.golangci-lint-version.outputs.version }}
|
||||
|
||||
34
.github/workflows/mixin.yml
vendored
Normal file
34
.github/workflows/mixin.yml
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
---
|
||||
name: mixin
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- "elasticsearch-mixin/**"
|
||||
|
||||
jobs:
|
||||
check-mixin:
|
||||
name: check
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0
|
||||
with:
|
||||
go-version: 1.25.x
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
go install github.com/google/go-jsonnet/cmd/jsonnet@v0.20.0
|
||||
go install github.com/google/go-jsonnet/cmd/jsonnetfmt@v0.20.0
|
||||
go install github.com/google/go-jsonnet/cmd/jsonnet-lint@v0.20.0
|
||||
go install github.com/monitoring-mixins/mixtool/cmd/mixtool@16dc166166d91e93475b86b9355a4faed2400c18
|
||||
go install github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@v0.5.1
|
||||
- name: Lint
|
||||
run: bash ./scripts/lint-jsonnet.sh
|
||||
- name: Compile mixin
|
||||
run: bash ./scripts/compile-mixin.sh
|
||||
- name: Verify compiled mixin matches repo
|
||||
run: |
|
||||
git diff --exit-code -- ./elasticsearch-mixin || (echo "Compiled mixin does not match repo" && exit 1)
|
||||
# Check if there are any new untracked files
|
||||
test -z "$(git status --porcelain)" || (echo "Untracked files found, please run ./scripts/compile-mixin.sh" && exit 1)
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@ -4,3 +4,4 @@ elasticsearch_exporter
|
||||
*-stamp
|
||||
.tarballs
|
||||
/vendor
|
||||
vendor/
|
||||
|
||||
122
.golangci.yml
122
.golangci.yml
@ -1,10 +1,124 @@
|
||||
---
|
||||
version: "2"
|
||||
|
||||
formatters:
|
||||
enable:
|
||||
- gci
|
||||
- gofumpt
|
||||
settings:
|
||||
gci:
|
||||
sections:
|
||||
- standard
|
||||
- prefix(github.com/prometheus-community/elasticsearch_exporter)
|
||||
- default
|
||||
|
||||
linters:
|
||||
enable:
|
||||
- depguard
|
||||
# TODO(@sysadmind): Enable and fix the issues.
|
||||
# - errorlint
|
||||
- exptostd
|
||||
# TODO(@sysadmind): Enable and fix the issues.
|
||||
# - gocritic
|
||||
# - godot
|
||||
- loggercheck
|
||||
# TODO(@sysadmind): Enable and fix the issues.
|
||||
# - misspell
|
||||
- nilnesserr
|
||||
- nolintlint
|
||||
# TODO(@sysadmind): Enable and fix the issues.
|
||||
# - perfsprint
|
||||
- predeclared
|
||||
- revive
|
||||
- sloglint
|
||||
- testifylint
|
||||
- unconvert
|
||||
- unused
|
||||
- usestdlibvars
|
||||
- whitespace
|
||||
exclusions:
|
||||
rules:
|
||||
# Disable errcheck for test files.
|
||||
- linters:
|
||||
- errcheck
|
||||
path: _test.go
|
||||
|
||||
# Disable errcheck rule for some specific functions.
|
||||
- linters:
|
||||
- errcheck
|
||||
# Taken from the default exclusions in v1.
|
||||
text: Error return value of .((os\.)?std(out|err)\..*|.*Close|.*Flush|os\.Remove(All)?|.*print(f|ln)?|os\.(Un)?Setenv). is not checked
|
||||
|
||||
settings:
|
||||
revive:
|
||||
rules:
|
||||
# https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md
|
||||
- name: blank-imports
|
||||
- name: comment-spacings
|
||||
- name: context-as-argument
|
||||
arguments:
|
||||
# Allow functions with test or bench signatures.
|
||||
- allowTypesBefore: '*testing.T,testing.TB'
|
||||
- name: context-keys-type
|
||||
- name: dot-imports
|
||||
- name: early-return
|
||||
arguments:
|
||||
- "preserveScope"
|
||||
# A lot of false positives: incorrectly identifies channel draining as "empty code block".
|
||||
# See https://github.com/mgechev/revive/issues/386
|
||||
- name: empty-block
|
||||
disabled: true
|
||||
- name: error-naming
|
||||
- name: error-return
|
||||
- name: error-strings
|
||||
- name: errorf
|
||||
# TODO(@sysadmind): Enable and fix the issues.
|
||||
# - name: exported
|
||||
- name: increment-decrement
|
||||
- name: indent-error-flow
|
||||
arguments:
|
||||
- "preserveScope"
|
||||
- name: package-comments
|
||||
# TODO(beorn7/sysadmind): Currently, we have a lot of missing package doc comments. Maybe we should have them.
|
||||
disabled: true
|
||||
- name: range
|
||||
- name: receiver-naming
|
||||
- name: redefines-builtin-id
|
||||
- name: superfluous-else
|
||||
arguments:
|
||||
- "preserveScope"
|
||||
- name: time-naming
|
||||
# TODO(@sysadmind): Enable and fix the issues.
|
||||
# - name: unexported-return
|
||||
- name: unreachable-code
|
||||
- name: unused-parameter
|
||||
severity: warning
|
||||
disabled: true
|
||||
- name: var-declaration
|
||||
- name: var-naming
|
||||
|
||||
depguard:
|
||||
rules:
|
||||
main:
|
||||
deny:
|
||||
- pkg: "sync/atomic"
|
||||
desc: "Use go.uber.org/atomic instead of sync/atomic"
|
||||
- pkg: "github.com/stretchr/testify/assert"
|
||||
desc: "Use github.com/stretchr/testify/require instead of github.com/stretchr/testify/assert"
|
||||
- pkg: "github.com/go-kit/kit/log"
|
||||
desc: "Use github.com/go-kit/log instead of github.com/go-kit/kit/log"
|
||||
- pkg: "io/ioutil"
|
||||
desc: "Use corresponding 'os' or 'io' functions instead."
|
||||
- pkg: "regexp"
|
||||
desc: "Use github.com/grafana/regexp instead of regexp"
|
||||
- pkg: "github.com/pkg/errors"
|
||||
desc: "Use 'errors' or 'fmt' instead of github.com/pkg/errors"
|
||||
- pkg: "gzip"
|
||||
desc: "Use github.com/klauspost/compress instead of gzip"
|
||||
- pkg: "zlib"
|
||||
desc: "Use github.com/klauspost/compress instead of zlib"
|
||||
- pkg: "golang.org/x/exp/slices"
|
||||
desc: "Use 'slices' instead."
|
||||
issues:
|
||||
exclude-rules:
|
||||
- path: _test.go
|
||||
linters:
|
||||
- errcheck
|
||||
max-issues-per-linter: 0
|
||||
max-same-issues: 0
|
||||
|
||||
@ -1,12 +1,12 @@
|
||||
go:
|
||||
# This must match .circle/config.yml.
|
||||
version: 1.18
|
||||
# Whenever the Go version is updated here,
|
||||
# .github/workflows should also be updated.
|
||||
version: 1.25
|
||||
repository:
|
||||
path: github.com/prometheus-community/elasticsearch_exporter
|
||||
build:
|
||||
binaries:
|
||||
- name: elasticsearch_exporter
|
||||
flags: -a -tags netgo
|
||||
ldflags: |
|
||||
-s
|
||||
-X github.com/prometheus/common/version.Version={{.Version}}
|
||||
|
||||
10
.yamllint
10
.yamllint
@ -1,5 +1,8 @@
|
||||
---
|
||||
extends: default
|
||||
ignore: |
|
||||
**/node_modules
|
||||
web/api/v1/testdata/openapi_*_golden.yaml
|
||||
|
||||
rules:
|
||||
braces:
|
||||
@ -20,9 +23,4 @@ rules:
|
||||
config/testdata/section_key_dup.bad.yml
|
||||
line-length: disable
|
||||
truthy:
|
||||
ignore: |
|
||||
.github/workflows/codeql-analysis.yml
|
||||
.github/workflows/funcbench.yml
|
||||
.github/workflows/fuzzing.yml
|
||||
.github/workflows/prombench.yml
|
||||
.github/workflows/golangci-lint.yml
|
||||
check-keys: false
|
||||
|
||||
72
CHANGELOG.md
72
CHANGELOG.md
@ -1,3 +1,75 @@
|
||||
## master / unreleased
|
||||
|
||||
|
||||
## 1.10.0 / 2025-12-02
|
||||
|
||||
### BREAKING CHANGES
|
||||
|
||||
* `--es.uri` now defaults to empty string #1063
|
||||
* The flag `--es.data_stream` has been renamed to `--collector.data-stream`.
|
||||
* The flag `--es.ilm` has been renamed to `--collector.ilm`.
|
||||
|
||||
### Changelog
|
||||
|
||||
* [SECURITY] Remove logging unsanitized URL when HTTP request fails #1051
|
||||
* [CHANGE] Rename --es.data_stream to --collector.data-stream #983
|
||||
* [CHANGE] Rename --es.ilm to --collector.ilm #999
|
||||
* [FEATURE] Add multi-target scraping via /probe endpoint #1063
|
||||
* [FEATURE] Add health-report collector #1002
|
||||
* [FEATURE] Add pprof profiling #1033
|
||||
* [ENHANCEMENT] Ensure time.Ticker is released #1049
|
||||
* [BUGFIX] Fix disk watermark values json parsing #1055
|
||||
* [BUGFIX] Change collector failure log level to warning #1050
|
||||
|
||||
## 1.9.0 / 2025-02-27
|
||||
|
||||
BREAKING CHANGES:
|
||||
|
||||
The flag `--es.slm` has been renamed to `--collector.slm`.
|
||||
|
||||
The logging system has been replaced with log/slog from the stdlib. This change is being made across the prometheus ecosystem. The logging output has changed, but the messages and levels remain the same. The `ts` label for the timestamp has bewen replaced with `time`, the accuracy is less, and the timezone is not forced to UTC. The `caller` field has been replaced by the `source` field, which now includes the full path to the source file. The `level` field now exposes the log level in capital letters.
|
||||
|
||||
* [CHANGE] Rename --es.slm to --collector.slm #932
|
||||
* [CHANGE] Replace logging system #942
|
||||
* [ENHANCEMENT] Add external refresh stats #933
|
||||
|
||||
## 1.8.0 / 2024-09-14
|
||||
|
||||
* [FEATURE] Add tasks action collector. Enable using `--collector.tasks.actions`. #778
|
||||
* [FEATURE] Add additional nodes metrics for indexing pressure monitoring. #904
|
||||
|
||||
## 1.7.0 / 2023-12-02
|
||||
|
||||
BREAKING CHANGES:
|
||||
|
||||
The flag `--es.snapshots` has been renamed to `--collector.snapshots`.
|
||||
|
||||
* [CHANGE] Rename --es.snapshots to --collector.snapshots #789
|
||||
* [CHANGE] Add cluster label to `elasticsearch_node_shards_total` metric #639
|
||||
* [FEATURE] Add watermark metrics #611
|
||||
* [FEATURE] Add `elasticsearch_indices_settings_creation_timestamp_seconds` metric #816
|
||||
|
||||
## 1.6.0 / 2023-06-22
|
||||
|
||||
BREAKING CHANGES:
|
||||
|
||||
The flag `--es.cluster_settings` has been renamed to `--collector.clustersettings`.
|
||||
|
||||
* [CHANGE] Rename --es.cluster_settings to --collector.clustersettings
|
||||
* [FEATURE] Add ILM metrics #513
|
||||
* [ENHANCEMENT] Add ElasticCloud node roles to role label #652
|
||||
* [ENHANCEMENT] Add ability to use AWS IAM role for authentication #653
|
||||
* [ENHANCEMENT] Add metric for index replica count #483
|
||||
* [BUGFIX] Set elasticsearch_clusterinfo_version_info guage to 1 #728
|
||||
* [BUGFIX] Fix index field counts with nested fields #675
|
||||
|
||||
|
||||
## 1.5.0 / 2022-07-28
|
||||
|
||||
* [FEATURE] Add metrics collection for data stream statistics #592
|
||||
* [FEATURE] Support for AWS Elasticsearch using AWS SDK v2 #597
|
||||
* [BUGFIX] Fix cluster settings collection when max_shards_per_node is manually set. #603
|
||||
|
||||
## 1.4.0 / 2022-06-29
|
||||
|
||||
* [BREAKING] Remove ENV var support for most non-sensitive options. #518
|
||||
|
||||
193
Makefile.common
193
Makefile.common
@ -1,4 +1,4 @@
|
||||
# Copyright 2018 The Prometheus Authors
|
||||
# Copyright The Prometheus Authors
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
@ -49,25 +49,29 @@ endif
|
||||
GOTEST := $(GO) test
|
||||
GOTEST_DIR :=
|
||||
ifneq ($(CIRCLE_JOB),)
|
||||
ifneq ($(shell which gotestsum),)
|
||||
ifneq ($(shell command -v gotestsum 2> /dev/null),)
|
||||
GOTEST_DIR := test-results
|
||||
GOTEST := gotestsum --junitfile $(GOTEST_DIR)/unit-tests.xml --
|
||||
endif
|
||||
endif
|
||||
|
||||
PROMU_VERSION ?= 0.13.0
|
||||
PROMU_VERSION ?= 0.17.0
|
||||
PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz
|
||||
|
||||
SKIP_GOLANGCI_LINT :=
|
||||
GOLANGCI_LINT :=
|
||||
GOLANGCI_LINT_OPTS ?=
|
||||
GOLANGCI_LINT_VERSION ?= v1.45.2
|
||||
# golangci-lint only supports linux, darwin and windows platforms on i386/amd64.
|
||||
GOLANGCI_LINT_VERSION ?= v2.7.2
|
||||
GOLANGCI_FMT_OPTS ?=
|
||||
# golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64.
|
||||
# windows isn't included here because of the path separator being different.
|
||||
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))
|
||||
ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386))
|
||||
ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386 arm64))
|
||||
# If we're in CI and there is an Actions file, that means the linter
|
||||
# is being run in Actions, so we don't need to run it here.
|
||||
ifeq (,$(CIRCLE_JOB))
|
||||
ifneq (,$(SKIP_GOLANGCI_LINT))
|
||||
GOLANGCI_LINT :=
|
||||
else ifeq (,$(CIRCLE_JOB))
|
||||
GOLANGCI_LINT := $(FIRST_GOPATH)/bin/golangci-lint
|
||||
else ifeq (,$(wildcard .github/workflows/golangci-lint.yml))
|
||||
GOLANGCI_LINT := $(FIRST_GOPATH)/bin/golangci-lint
|
||||
@ -78,16 +82,39 @@ endif
|
||||
PREFIX ?= $(shell pwd)
|
||||
BIN_DIR ?= $(shell pwd)
|
||||
DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD))
|
||||
DOCKERFILE_PATH ?= ./Dockerfile
|
||||
DOCKERBUILD_CONTEXT ?= ./
|
||||
DOCKER_REPO ?= prom
|
||||
|
||||
# Check if deprecated DOCKERFILE_PATH is set
|
||||
ifdef DOCKERFILE_PATH
|
||||
$(error DOCKERFILE_PATH is deprecated. Use DOCKERFILE_VARIANTS ?= $(DOCKERFILE_PATH) in the Makefile)
|
||||
endif
|
||||
|
||||
DOCKER_ARCHS ?= amd64
|
||||
DOCKERFILE_VARIANTS ?= Dockerfile $(wildcard Dockerfile.*)
|
||||
|
||||
# Function to extract variant from Dockerfile label.
|
||||
# Returns the variant name from io.prometheus.image.variant label, or "default" if not found.
|
||||
define dockerfile_variant
|
||||
$(strip $(or $(shell sed -n 's/.*io\.prometheus\.image\.variant="\([^"]*\)".*/\1/p' $(1)),default))
|
||||
endef
|
||||
|
||||
# Check for duplicate variant names (including default for Dockerfiles without labels).
|
||||
DOCKERFILE_VARIANT_NAMES := $(foreach df,$(DOCKERFILE_VARIANTS),$(call dockerfile_variant,$(df)))
|
||||
DOCKERFILE_VARIANT_NAMES_SORTED := $(sort $(DOCKERFILE_VARIANT_NAMES))
|
||||
ifneq ($(words $(DOCKERFILE_VARIANT_NAMES)),$(words $(DOCKERFILE_VARIANT_NAMES_SORTED)))
|
||||
$(error Duplicate variant names found. Each Dockerfile must have a unique io.prometheus.image.variant label, and only one can be without a label (default))
|
||||
endif
|
||||
|
||||
# Build variant:dockerfile pairs for shell iteration.
|
||||
DOCKERFILE_VARIANTS_WITH_NAMES := $(foreach df,$(DOCKERFILE_VARIANTS),$(call dockerfile_variant,$(df)):$(df))
|
||||
|
||||
BUILD_DOCKER_ARCHS = $(addprefix common-docker-,$(DOCKER_ARCHS))
|
||||
PUBLISH_DOCKER_ARCHS = $(addprefix common-docker-publish-,$(DOCKER_ARCHS))
|
||||
TAG_DOCKER_ARCHS = $(addprefix common-docker-tag-latest-,$(DOCKER_ARCHS))
|
||||
|
||||
SANITIZED_DOCKER_IMAGE_TAG := $(subst +,-,$(DOCKER_IMAGE_TAG))
|
||||
|
||||
ifeq ($(GOHOSTARCH),amd64)
|
||||
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux freebsd darwin windows))
|
||||
# Only supported on amd64
|
||||
@ -106,7 +133,7 @@ common-all: precheck style check_license lint yamllint unused build test
|
||||
.PHONY: common-style
|
||||
common-style:
|
||||
@echo ">> checking code style"
|
||||
@fmtRes=$$($(GOFMT) -d $$(find . -path ./vendor -prune -o -name '*.go' -print)); \
|
||||
@fmtRes=$$($(GOFMT) -d $$(git ls-files '*.go' ':!:vendor/*' || find . -path ./vendor -prune -o -name '*.go' -print)); \
|
||||
if [ -n "$${fmtRes}" ]; then \
|
||||
echo "gofmt checking failed!"; echo "$${fmtRes}"; echo; \
|
||||
echo "Please ensure you are using $$($(GO) version) for formatting code."; \
|
||||
@ -116,13 +143,19 @@ common-style:
|
||||
.PHONY: common-check_license
|
||||
common-check_license:
|
||||
@echo ">> checking license header"
|
||||
@licRes=$$(for file in $$(find . -type f -iname '*.go' ! -path './vendor/*') ; do \
|
||||
@licRes=$$(for file in $$(git ls-files '*.go' ':!:vendor/*' || find . -path ./vendor -prune -o -type f -iname '*.go' -print) ; do \
|
||||
awk 'NR<=3' $$file | grep -Eq "(Copyright|generated|GENERATED)" || echo $$file; \
|
||||
done); \
|
||||
if [ -n "$${licRes}" ]; then \
|
||||
echo "license header checking failed:"; echo "$${licRes}"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@echo ">> checking for copyright years 2026 or later"
|
||||
@futureYearRes=$$(git grep -E 'Copyright (202[6-9]|20[3-9][0-9])' -- '*.go' ':!:vendor/*' || true); \
|
||||
if [ -n "$${futureYearRes}" ]; then \
|
||||
echo "Files with copyright year 2026 or later found (should use 'Copyright The Prometheus Authors'):"; echo "$${futureYearRes}"; \
|
||||
exit 1; \
|
||||
fi
|
||||
|
||||
.PHONY: common-deps
|
||||
common-deps:
|
||||
@ -133,7 +166,7 @@ common-deps:
|
||||
update-go-deps:
|
||||
@echo ">> updating Go dependencies"
|
||||
@for m in $$($(GO) list -mod=readonly -m -f '{{ if and (not .Indirect) (not .Main)}}{{.Path}}{{end}}' all); do \
|
||||
$(GO) get -d $$m; \
|
||||
$(GO) get $$m; \
|
||||
done
|
||||
$(GO) mod tidy
|
||||
|
||||
@ -151,9 +184,13 @@ $(GOTEST_DIR):
|
||||
@mkdir -p $@
|
||||
|
||||
.PHONY: common-format
|
||||
common-format:
|
||||
common-format: $(GOLANGCI_LINT)
|
||||
@echo ">> formatting code"
|
||||
$(GO) fmt $(pkgs)
|
||||
ifdef GOLANGCI_LINT
|
||||
@echo ">> formatting code with golangci-lint"
|
||||
$(GOLANGCI_LINT) fmt $(GOLANGCI_FMT_OPTS)
|
||||
endif
|
||||
|
||||
.PHONY: common-vet
|
||||
common-vet:
|
||||
@ -164,16 +201,20 @@ common-vet:
|
||||
common-lint: $(GOLANGCI_LINT)
|
||||
ifdef GOLANGCI_LINT
|
||||
@echo ">> running golangci-lint"
|
||||
# 'go list' needs to be executed before staticcheck to prepopulate the modules cache.
|
||||
# Otherwise staticcheck might fail randomly for some reason not yet explained.
|
||||
$(GO) list -e -compiled -test=true -export=false -deps=true -find=false -tags= -- ./... > /dev/null
|
||||
$(GOLANGCI_LINT) run $(GOLANGCI_LINT_OPTS) $(pkgs)
|
||||
endif
|
||||
|
||||
.PHONY: common-lint-fix
|
||||
common-lint-fix: $(GOLANGCI_LINT)
|
||||
ifdef GOLANGCI_LINT
|
||||
@echo ">> running golangci-lint fix"
|
||||
$(GOLANGCI_LINT) run --fix $(GOLANGCI_LINT_OPTS) $(pkgs)
|
||||
endif
|
||||
|
||||
.PHONY: common-yamllint
|
||||
common-yamllint:
|
||||
@echo ">> running yamllint on all YAML files in the repository"
|
||||
ifeq (, $(shell which yamllint))
|
||||
ifeq (, $(shell command -v yamllint 2> /dev/null))
|
||||
@echo "yamllint not installed so skipping"
|
||||
else
|
||||
yamllint .
|
||||
@ -199,31 +240,117 @@ common-tarball: promu
|
||||
@echo ">> building release tarball"
|
||||
$(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR)
|
||||
|
||||
.PHONY: common-docker-repo-name
|
||||
common-docker-repo-name:
|
||||
@echo "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)"
|
||||
|
||||
.PHONY: common-docker $(BUILD_DOCKER_ARCHS)
|
||||
common-docker: $(BUILD_DOCKER_ARCHS)
|
||||
$(BUILD_DOCKER_ARCHS): common-docker-%:
|
||||
docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" \
|
||||
-f $(DOCKERFILE_PATH) \
|
||||
--build-arg ARCH="$*" \
|
||||
--build-arg OS="linux" \
|
||||
$(DOCKERBUILD_CONTEXT)
|
||||
@for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \
|
||||
dockerfile=$${variant#*:}; \
|
||||
variant_name=$${variant%%:*}; \
|
||||
distroless_arch="$*"; \
|
||||
if [ "$*" = "armv7" ]; then \
|
||||
distroless_arch="arm"; \
|
||||
fi; \
|
||||
if [ "$$dockerfile" = "Dockerfile" ]; then \
|
||||
echo "Building default variant ($$variant_name) for linux-$* using $$dockerfile"; \
|
||||
docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \
|
||||
-f $$dockerfile \
|
||||
--build-arg ARCH="$*" \
|
||||
--build-arg OS="linux" \
|
||||
--build-arg DISTROLESS_ARCH="$$distroless_arch" \
|
||||
$(DOCKERBUILD_CONTEXT); \
|
||||
if [ "$$variant_name" != "default" ]; then \
|
||||
echo "Tagging default variant with $$variant_name suffix"; \
|
||||
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \
|
||||
"$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \
|
||||
fi; \
|
||||
else \
|
||||
echo "Building $$variant_name variant for linux-$* using $$dockerfile"; \
|
||||
docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" \
|
||||
-f $$dockerfile \
|
||||
--build-arg ARCH="$*" \
|
||||
--build-arg OS="linux" \
|
||||
--build-arg DISTROLESS_ARCH="$$distroless_arch" \
|
||||
$(DOCKERBUILD_CONTEXT); \
|
||||
fi; \
|
||||
done
|
||||
|
||||
.PHONY: common-docker-publish $(PUBLISH_DOCKER_ARCHS)
|
||||
common-docker-publish: $(PUBLISH_DOCKER_ARCHS)
|
||||
$(PUBLISH_DOCKER_ARCHS): common-docker-publish-%:
|
||||
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)"
|
||||
@for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \
|
||||
dockerfile=$${variant#*:}; \
|
||||
variant_name=$${variant%%:*}; \
|
||||
if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \
|
||||
echo "Pushing $$variant_name variant for linux-$*"; \
|
||||
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \
|
||||
fi; \
|
||||
if [ "$$dockerfile" = "Dockerfile" ]; then \
|
||||
echo "Pushing default variant ($$variant_name) for linux-$*"; \
|
||||
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)"; \
|
||||
fi; \
|
||||
if [ "$(DOCKER_IMAGE_TAG)" = "latest" ]; then \
|
||||
if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \
|
||||
echo "Pushing $$variant_name variant version tags for linux-$*"; \
|
||||
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \
|
||||
fi; \
|
||||
if [ "$$dockerfile" = "Dockerfile" ]; then \
|
||||
echo "Pushing default variant version tag for linux-$*"; \
|
||||
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"; \
|
||||
fi; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
DOCKER_MAJOR_VERSION_TAG = $(firstword $(subst ., ,$(shell cat VERSION)))
|
||||
.PHONY: common-docker-tag-latest $(TAG_DOCKER_ARCHS)
|
||||
common-docker-tag-latest: $(TAG_DOCKER_ARCHS)
|
||||
$(TAG_DOCKER_ARCHS): common-docker-tag-latest-%:
|
||||
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest"
|
||||
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"
|
||||
@for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \
|
||||
dockerfile=$${variant#*:}; \
|
||||
variant_name=$${variant%%:*}; \
|
||||
if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \
|
||||
echo "Tagging $$variant_name variant for linux-$* as latest"; \
|
||||
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest-$$variant_name"; \
|
||||
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \
|
||||
fi; \
|
||||
if [ "$$dockerfile" = "Dockerfile" ]; then \
|
||||
echo "Tagging default variant ($$variant_name) for linux-$* as latest"; \
|
||||
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest"; \
|
||||
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
.PHONY: common-docker-manifest
|
||||
common-docker-manifest:
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(DOCKER_IMAGE_TAG))
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)"
|
||||
@for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \
|
||||
dockerfile=$${variant#*:}; \
|
||||
variant_name=$${variant%%:*}; \
|
||||
if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \
|
||||
echo "Creating manifest for $$variant_name variant"; \
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name); \
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \
|
||||
fi; \
|
||||
if [ "$$dockerfile" = "Dockerfile" ]; then \
|
||||
echo "Creating default variant ($$variant_name) manifest"; \
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(SANITIZED_DOCKER_IMAGE_TAG)); \
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)"; \
|
||||
fi; \
|
||||
if [ "$(DOCKER_IMAGE_TAG)" = "latest" ]; then \
|
||||
if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \
|
||||
echo "Creating manifest for $$variant_name variant version tag"; \
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name); \
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \
|
||||
fi; \
|
||||
if [ "$$dockerfile" = "Dockerfile" ]; then \
|
||||
echo "Creating default variant version tag manifest"; \
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):v$(DOCKER_MAJOR_VERSION_TAG)); \
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)"; \
|
||||
fi; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
.PHONY: promu
|
||||
promu: $(PROMU)
|
||||
@ -235,8 +362,8 @@ $(PROMU):
|
||||
cp $(PROMU_TMP)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(FIRST_GOPATH)/bin/promu
|
||||
rm -r $(PROMU_TMP)
|
||||
|
||||
.PHONY: proto
|
||||
proto:
|
||||
.PHONY: common-proto
|
||||
common-proto:
|
||||
@echo ">> generating code from proto files"
|
||||
@./scripts/genproto.sh
|
||||
|
||||
@ -248,6 +375,10 @@ $(GOLANGCI_LINT):
|
||||
| sh -s -- -b $(FIRST_GOPATH)/bin $(GOLANGCI_LINT_VERSION)
|
||||
endif
|
||||
|
||||
.PHONY: common-print-golangci-lint-version
|
||||
common-print-golangci-lint-version:
|
||||
@echo $(GOLANGCI_LINT_VERSION)
|
||||
|
||||
.PHONY: precheck
|
||||
precheck::
|
||||
|
||||
@ -262,3 +393,9 @@ $(1)_precheck:
|
||||
exit 1; \
|
||||
fi
|
||||
endef
|
||||
|
||||
govulncheck: install-govulncheck
|
||||
govulncheck ./...
|
||||
|
||||
install-govulncheck:
|
||||
command -v govulncheck > /dev/null || go install golang.org/x/vuln/cmd/govulncheck@latest
|
||||
|
||||
285
README.md
285
README.md
@ -1,13 +1,18 @@
|
||||
# Elasticsearch Exporter
|
||||
|
||||
[](https://circleci.com/gh/prometheus-community/elasticsearch_exporter)
|
||||
[](https://goreportcard.com/report/github.com/prometheus-community/elasticsearch_exporter)
|
||||
|
||||
Prometheus exporter for various metrics about Elasticsearch, written in Go.
|
||||
Prometheus exporter for various metrics about Elasticsearch and OpenSearch, written in Go.
|
||||
|
||||
## Supported Versions
|
||||
|
||||
We support all currently supported versions of Elasticsearch and OpenSearch. This project will make reasonable attempts to maintain compatibility with previous versions but considerations will be made for code maintainability and favoring supported versions. Where Elasticsearch and OpenSearch diverge, this project will make reasonable attempts to maintain compatibility with both. Some collectors may only be compatible with one or the other.
|
||||
|
||||
### Installation
|
||||
|
||||
For pre-built binaries please take a look at the releases.
|
||||
https://github.com/prometheus-community/elasticsearch_exporter/releases
|
||||
<https://github.com/prometheus-community/elasticsearch_exporter/releases>
|
||||
|
||||
#### Docker
|
||||
|
||||
@ -30,7 +35,7 @@ elasticsearch_exporter:
|
||||
|
||||
#### Kubernetes
|
||||
|
||||
You can find a helm chart in the prometheus-community charts repository at https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-elasticsearch-exporter
|
||||
You can find a helm chart in the prometheus-community charts repository at <https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-elasticsearch-exporter>
|
||||
|
||||
```bash
|
||||
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||
@ -42,37 +47,48 @@ helm install [RELEASE_NAME] prometheus-community/prometheus-elasticsearch-export
|
||||
**NOTE:** The exporter fetches information from an Elasticsearch cluster on every scrape, therefore having a too short scrape interval can impose load on ES master nodes, particularly if you run with `--es.all` and `--es.indices`. We suggest you measure how long fetching `/_nodes/stats` and `/_all/_stats` takes for your ES cluster to determine whether your scraping interval is too short. As a last resort, you can scrape this exporter using a dedicated job with its own scraping interval.
|
||||
|
||||
Below is the command line options summary:
|
||||
|
||||
```bash
|
||||
elasticsearch_exporter --help
|
||||
```
|
||||
|
||||
| Argument | Introduced in Version | Description | Default |
|
||||
| -------- | --------------------- | ----------- | ----------- |
|
||||
| es.uri | 1.0.2 | Address (host and port) of the Elasticsearch node we should connect to. This could be a local node (`localhost:9200`, for instance), or the address of a remote Elasticsearch server. When basic auth is needed, specify as: `<proto>://<user>:<password>@<host>:<port>`. E.G., `http://admin:pass@localhost:9200`. Special characters in the user credentials need to be URL-encoded. | http://localhost:9200 |
|
||||
| es.all | 1.0.2 | If true, query stats for all nodes in the cluster, rather than just the node we connect to. | false |
|
||||
| es.cluster_settings | 1.1.0rc1 | If true, query stats for cluster settings. | false |
|
||||
| es.indices | 1.0.2 | If true, query stats for all indices in the cluster. | false |
|
||||
| es.indices_settings | 1.0.4rc1 | If true, query settings stats for all indices in the cluster. | false |
|
||||
| es.indices_mappings | 1.2.0 | If true, query stats for mappings of all indices of the cluster. | false |
|
||||
| es.aliases | 1.0.4rc1 | If true, include informational aliases metrics. | true |
|
||||
| es.shards | 1.0.3rc1 | If true, query stats for all indices in the cluster, including shard-level stats (implies `es.indices=true`). | false |
|
||||
| es.snapshots | 1.0.4rc1 | If true, query stats for the cluster snapshots. | false |
|
||||
| es.slm | | If true, query stats for SLM. | false |
|
||||
| es.timeout | 1.0.2 | Timeout for trying to get stats from Elasticsearch. (ex: 20s) | 5s |
|
||||
| es.ca | 1.0.2 | Path to PEM file that contains trusted Certificate Authorities for the Elasticsearch connection. | |
|
||||
| es.client-private-key | 1.0.2 | Path to PEM file that contains the private key for client auth when connecting to Elasticsearch. | |
|
||||
| es.client-cert | 1.0.2 | Path to PEM file that contains the corresponding cert for the private key to connect to Elasticsearch. | |
|
||||
| es.clusterinfo.interval | 1.1.0rc1 | Cluster info update interval for the cluster label | 5m |
|
||||
| es.ssl-skip-verify | 1.0.4rc1 | Skip SSL verification when connecting to Elasticsearch. | false |
|
||||
| web.listen-address | 1.0.2 | Address to listen on for web interface and telemetry. | :9114 |
|
||||
| web.telemetry-path | 1.0.2 | Path under which to expose metrics. | /metrics |
|
||||
| version | 1.0.2 | Show version info on stdout and exit. | |
|
||||
| Argument | Introduced in Version | Description | Default |
|
||||
| ----------------------- | --------------------- |---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ----------- |
|
||||
| collector.clustersettings| 1.6.0 | If true, query stats for cluster settings (As of v1.6.0, this flag has replaced "es.cluster_settings"). | false |
|
||||
| es.uri | 1.0.2 | Address (host and port) of the Elasticsearch node we should connect to **when running in single-target mode**. Leave empty (the default) when you want to run the exporter only as a multi-target `/probe` endpoint. When basic auth is needed, specify as: `<proto>://<user>:<password>@<host>:<port>`. E.G., `http://admin:pass@localhost:9200`. Special characters in the user credentials need to be URL-encoded. | "" |
|
||||
| es.all | 1.0.2 | If true, query stats for all nodes in the cluster, rather than just the node we connect to. | false |
|
||||
| es.indices | 1.0.2 | If true, query stats for all indices in the cluster. | false |
|
||||
| es.indices_settings | 1.0.4rc1 | If true, query settings stats for all indices in the cluster. | false |
|
||||
| es.indices_mappings | 1.2.0 | If true, query stats for mappings of all indices of the cluster. | false |
|
||||
| es.aliases | 1.0.4rc1 | If true, include informational aliases metrics. | true |
|
||||
| es.ilm | 1.6.0 | If true, query index lifecycle policies for indices in the cluster.
|
||||
| es.shards | 1.0.3rc1 | If true, query stats for all indices in the cluster, including shard-level stats (implies `es.indices=true`). | false |
|
||||
| collector.snapshots | 1.0.4rc1 | If true, query stats for the cluster snapshots. (As of v1.7.0, this flag has replaced "es.snapshots"). | false |
|
||||
| collector.health-report | 1.10.0 | If true, query the health report (requires elasticsearch 8.7.0 or later) | false |
|
||||
| es.slm | | If true, query stats for SLM. | false |
|
||||
| es.data_stream | | If true, query state for Data Steams. | false |
|
||||
| es.timeout | 1.0.2 | Timeout for trying to get stats from Elasticsearch. (ex: 20s) | 5s |
|
||||
| es.ca | 1.0.2 | Path to PEM file that contains trusted Certificate Authorities for the Elasticsearch connection. | |
|
||||
| es.client-private-key | 1.0.2 | Path to PEM file that contains the private key for client auth when connecting to Elasticsearch. | |
|
||||
| es.client-cert | 1.0.2 | Path to PEM file that contains the corresponding cert for the private key to connect to Elasticsearch. | |
|
||||
| es.clusterinfo.interval | 1.1.0rc1 | Cluster info update interval for the cluster label | 5m |
|
||||
| es.ssl-skip-verify | 1.0.4rc1 | Skip SSL verification when connecting to Elasticsearch. | false |
|
||||
| web.listen-address | 1.0.2 | Address to listen on for web interface and telemetry. | :9114 |
|
||||
| web.telemetry-path | 1.0.2 | Path under which to expose metrics. | /metrics |
|
||||
| aws.region | 1.5.0 | Region for AWS elasticsearch | |
|
||||
| aws.role-arn | 1.6.0 | Role ARN of an IAM role to assume. | |
|
||||
| config.file | 1.10.0 | Path to a YAML configuration file that defines `auth_modules:` used by the `/probe` multi-target endpoint. Leave unset when not using multi-target mode. | |
|
||||
| version | 1.0.2 | Show version info on stdout and exit. | |
|
||||
|
||||
Commandline parameters start with a single `-` for versions less than `1.1.0rc1`.
|
||||
For versions greater than `1.1.0rc1`, commandline parameters are specified with `--`.
|
||||
|
||||
The API key used to connect can be set with the `ES_API_KEY` environment variable.
|
||||
|
||||
#### Logging
|
||||
|
||||
Logging by the exporter is handled by the `log/slog` package. The output format can be customized with the `--log.format` flag which defaults to logfmt. The log level can be set with the `--log.level` flag which defaults to info. The output can be set to either stdout (default) or stderr with the `--log.output` flag.
|
||||
|
||||
#### Elasticsearch 7.x security privileges
|
||||
|
||||
Username and password can be passed either directly in the URI or through the `ES_USERNAME` and `ES_PASSWORD` environment variables.
|
||||
@ -82,165 +98,86 @@ ES 7.x supports RBACs. The following security privileges are required for the el
|
||||
|
||||
Setting | Privilege Required | Description
|
||||
:---- | :---- | :----
|
||||
collector.clustersettings| `cluster` `monitor` |
|
||||
exporter defaults | `cluster` `monitor` | All cluster read-only operations, like cluster health and state, hot threads, node info, node and cluster stats, and pending cluster tasks. |
|
||||
es.cluster_settings | `cluster` `monitor` |
|
||||
es.indices | `indices` `monitor` (per index or `*`) | All actions that are required for monitoring (recovery, segments info, index stats and status)
|
||||
es.indices_settings | `indices` `monitor` (per index or `*`) |
|
||||
es.indices_mappings | `indices` `view_index_metadata` (per index or `*`) |
|
||||
es.shards | not sure if `indices` or `cluster` `monitor` or both |
|
||||
es.snapshots | `cluster:admin/snapshot/status` and `cluster:admin/repository/get` | [ES Forum Post](https://discuss.elastic.co/t/permissions-for-backup-user-with-x-pack/88057)
|
||||
es.slm | `read_slm`
|
||||
collector.snapshots | `cluster:admin/snapshot/status` and `cluster:admin/repository/get` | [ES Forum Post](https://discuss.elastic.co/t/permissions-for-backup-user-with-x-pack/88057)
|
||||
es.slm | `manage_slm`
|
||||
es.data_stream | `monitor` or `manage` (per index or `*`) |
|
||||
|
||||
Further Information
|
||||
- [Build in Users](https://www.elastic.co/guide/en/elastic-stack-overview/7.3/built-in-users.html)
|
||||
|
||||
- [Built in Users](https://www.elastic.co/guide/en/elastic-stack-overview/7.3/built-in-users.html)
|
||||
- [Defining Roles](https://www.elastic.co/guide/en/elastic-stack-overview/7.3/defining-roles.html)
|
||||
- [Privileges](https://www.elastic.co/guide/en/elastic-stack-overview/7.3/security-privileges.html)
|
||||
|
||||
### Multi-Target Scraping (beta)
|
||||
|
||||
From v2.X the exporter exposes `/probe` allowing one running instance to scrape many clusters.
|
||||
|
||||
Supported `auth_module` types:
|
||||
|
||||
| type | YAML fields | Injected into request |
|
||||
| ---------- | ----------------------------------------------------------------- | ------------------------------------------------------------------------------------- |
|
||||
| `userpass` | `userpass.username`, `userpass.password`, optional `options:` map | Sets HTTP basic-auth header, appends `options` as query parameters |
|
||||
| `apikey` | `apikey:` Base64 API-Key string, optional `options:` map | Adds `Authorization: ApiKey …` header, appends `options` |
|
||||
| `aws` | `aws.region`, optional `aws.role_arn`, optional `options:` map | Uses AWS SigV4 signing transport for HTTP(S) requests, appends `options` |
|
||||
| `tls` | `tls.ca_file`, `tls.cert_file`, `tls.key_file` | Uses client certificate authentication via TLS; cannot be mixed with other auth types |
|
||||
|
||||
Example config:
|
||||
|
||||
```yaml
|
||||
# exporter-config.yml
|
||||
auth_modules:
|
||||
prod_basic:
|
||||
type: userpass
|
||||
userpass:
|
||||
username: metrics
|
||||
password: s3cr3t
|
||||
|
||||
staging_key:
|
||||
type: apikey
|
||||
apikey: "bXk6YXBpa2V5Ig==" # base64 id:key
|
||||
options:
|
||||
sslmode: disable
|
||||
```
|
||||
|
||||
Run exporter:
|
||||
|
||||
```bash
|
||||
./elasticsearch_exporter --config.file=exporter-config.yml
|
||||
```
|
||||
|
||||
Prometheus scrape_config:
|
||||
|
||||
```yaml
|
||||
- job_name: es
|
||||
metrics_path: /probe
|
||||
params:
|
||||
auth_module: [staging_key]
|
||||
static_configs:
|
||||
- targets: ["https://es-stage:9200"]
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: exporter:9114
|
||||
```
|
||||
|
||||
Notes:
|
||||
- `/metrics` serves a single, process-wide registry and is intended for single-target mode.
|
||||
- `/probe` creates a fresh registry per scrape for the given `target` allowing multi-target scraping.
|
||||
- Any `options:` under an auth module will be appended as URL query parameters to the target URL.
|
||||
- The `tls` auth module (client certificate authentication) is intended for self‑managed Elasticsearch/OpenSearch deployments. Amazon OpenSearch Service typically authenticates at the domain edge with IAM/SigV4 and does not support client certificate authentication; use the `aws` auth module instead when scraping Amazon OpenSearch Service domains.
|
||||
|
||||
### Metrics
|
||||
|
||||
|Name |Type |Cardinality |Help
|
||||
|---- |---- |----------- |----
|
||||
| elasticsearch_breakers_estimated_size_bytes | gauge | 4 | Estimated size in bytes of breaker
|
||||
| elasticsearch_breakers_limit_size_bytes | gauge | 4 | Limit size in bytes for breaker
|
||||
| elasticsearch_breakers_tripped | counter | 4 | tripped for breaker
|
||||
| elasticsearch_cluster_health_active_primary_shards | gauge | 1 | The number of primary shards in your cluster. This is an aggregate total across all indices.
|
||||
| elasticsearch_cluster_health_active_shards | gauge | 1 | Aggregate total of all shards across all indices, which includes replica shards.
|
||||
| elasticsearch_cluster_health_delayed_unassigned_shards | gauge | 1 | Shards delayed to reduce reallocation overhead
|
||||
| elasticsearch_cluster_health_initializing_shards | gauge | 1 | Count of shards that are being freshly created.
|
||||
| elasticsearch_cluster_health_number_of_data_nodes | gauge | 1 | Number of data nodes in the cluster.
|
||||
| elasticsearch_cluster_health_number_of_in_flight_fetch | gauge | 1 | The number of ongoing shard info requests.
|
||||
| elasticsearch_cluster_health_number_of_nodes | gauge | 1 | Number of nodes in the cluster.
|
||||
| elasticsearch_cluster_health_number_of_pending_tasks | gauge | 1 | Cluster level changes which have not yet been executed
|
||||
| elasticsearch_cluster_health_task_max_waiting_in_queue_millis | gauge | 1 | Max time in millis that a task is waiting in queue.
|
||||
| elasticsearch_cluster_health_relocating_shards | gauge | 1 | The number of shards that are currently moving from one node to another node.
|
||||
| elasticsearch_cluster_health_status | gauge | 3 | Whether all primary and replica shards are allocated.
|
||||
| elasticsearch_cluster_health_timed_out | gauge | 1 | Number of cluster health checks timed out
|
||||
| elasticsearch_cluster_health_unassigned_shards | gauge | 1 | The number of shards that exist in the cluster state, but cannot be found in the cluster itself.
|
||||
| elasticsearch_clustersettings_stats_max_shards_per_node | gauge | 0 | Current maximum number of shards per node setting.
|
||||
| elasticsearch_filesystem_data_available_bytes | gauge | 1 | Available space on block device in bytes
|
||||
| elasticsearch_filesystem_data_free_bytes | gauge | 1 | Free space on block device in bytes
|
||||
| elasticsearch_filesystem_data_size_bytes | gauge | 1 | Size of block device in bytes
|
||||
| elasticsearch_filesystem_io_stats_device_operations_count | gauge | 1 | Count of disk operations
|
||||
| elasticsearch_filesystem_io_stats_device_read_operations_count | gauge | 1 | Count of disk read operations
|
||||
| elasticsearch_filesystem_io_stats_device_write_operations_count | gauge | 1 | Count of disk write operations
|
||||
| elasticsearch_filesystem_io_stats_device_read_size_kilobytes_sum | gauge | 1 | Total kilobytes read from disk
|
||||
| elasticsearch_filesystem_io_stats_device_write_size_kilobytes_sum | gauge | 1 | Total kilobytes written to disk
|
||||
| elasticsearch_indices_active_queries | gauge | 1 | The number of currently active queries
|
||||
| elasticsearch_indices_docs | gauge | 1 | Count of documents on this node
|
||||
| elasticsearch_indices_docs_deleted | gauge | 1 | Count of deleted documents on this node
|
||||
| elasticsearch_indices_docs_primary | gauge | | Count of documents with only primary shards on all nodes
|
||||
| elasticsearch_indices_fielddata_evictions | counter | 1 | Evictions from field data
|
||||
| elasticsearch_indices_fielddata_memory_size_bytes | gauge | 1 | Field data cache memory usage in bytes
|
||||
| elasticsearch_indices_filter_cache_evictions | counter | 1 | Evictions from filter cache
|
||||
| elasticsearch_indices_filter_cache_memory_size_bytes | gauge | 1 | Filter cache memory usage in bytes
|
||||
| elasticsearch_indices_flush_time_seconds | counter | 1 | Cumulative flush time in seconds
|
||||
| elasticsearch_indices_flush_total | counter | 1 | Total flushes
|
||||
| elasticsearch_indices_get_exists_time_seconds | counter | 1 | Total time get exists in seconds
|
||||
| elasticsearch_indices_get_exists_total | counter | 1 | Total get exists operations
|
||||
| elasticsearch_indices_get_missing_time_seconds | counter | 1 | Total time of get missing in seconds
|
||||
| elasticsearch_indices_get_missing_total | counter | 1 | Total get missing
|
||||
| elasticsearch_indices_get_time_seconds | counter | 1 | Total get time in seconds
|
||||
| elasticsearch_indices_get_total | counter | 1 | Total get
|
||||
| elasticsearch_indices_indexing_delete_time_seconds_total | counter | 1 | Total time indexing delete in seconds
|
||||
| elasticsearch_indices_indexing_delete_total | counter | 1 | Total indexing deletes
|
||||
| elasticsearch_indices_index_current | gauge | 1 | The number of documents currently being indexed to an index
|
||||
| elasticsearch_indices_indexing_index_time_seconds_total | counter | 1 | Cumulative index time in seconds
|
||||
| elasticsearch_indices_indexing_index_total | counter | 1 | Total index calls
|
||||
| elasticsearch_indices_mappings_stats_fields | gauge | 1 | Count of fields currently mapped by index
|
||||
| elasticsearch_indices_mappings_stats_json_parse_failures_total | counter | 0 | Number of errors while parsing JSON
|
||||
| elasticsearch_indices_mappings_stats_scrapes_total | counter | 0 | Current total Elasticsearch Indices Mappings scrapes
|
||||
| elasticsearch_indices_mappings_stats_up | gauge | 0 | Was the last scrape of the Elasticsearch Indices Mappings endpoint successful
|
||||
| elasticsearch_indices_merges_docs_total | counter | 1 | Cumulative docs merged
|
||||
| elasticsearch_indices_merges_total | counter | 1 | Total merges
|
||||
| elasticsearch_indices_merges_total_size_bytes_total | counter | 1 | Total merge size in bytes
|
||||
| elasticsearch_indices_merges_total_time_seconds_total | counter | 1 | Total time spent merging in seconds
|
||||
| elasticsearch_indices_query_cache_cache_total | counter | 1 | Count of query cache
|
||||
| elasticsearch_indices_query_cache_cache_size | gauge | 1 | Size of query cache
|
||||
| elasticsearch_indices_query_cache_count | counter | 2 | Count of query cache hit/miss
|
||||
| elasticsearch_indices_query_cache_evictions | counter | 1 | Evictions from query cache
|
||||
| elasticsearch_indices_query_cache_memory_size_bytes | gauge | 1 | Query cache memory usage in bytes
|
||||
| elasticsearch_indices_query_cache_total | counter | 1 | Size of query cache total
|
||||
| elasticsearch_indices_refresh_time_seconds_total | counter | 1 | Total time spent refreshing in seconds
|
||||
| elasticsearch_indices_refresh_total | counter | 1 | Total refreshes
|
||||
| elasticsearch_indices_request_cache_count | counter | 2 | Count of request cache hit/miss
|
||||
| elasticsearch_indices_request_cache_evictions | counter | 1 | Evictions from request cache
|
||||
| elasticsearch_indices_request_cache_memory_size_bytes | gauge | 1 | Request cache memory usage in bytes
|
||||
| elasticsearch_indices_search_fetch_time_seconds | counter | 1 | Total search fetch time in seconds
|
||||
| elasticsearch_indices_search_fetch_total | counter | 1 | Total number of fetches
|
||||
| elasticsearch_indices_search_query_time_seconds | counter | 1 | Total search query time in seconds
|
||||
| elasticsearch_indices_search_query_total | counter | 1 | Total number of queries
|
||||
| elasticsearch_indices_segments_count | gauge | 1 | Count of index segments on this node
|
||||
| elasticsearch_indices_segments_memory_bytes | gauge | 1 | Current memory size of segments in bytes
|
||||
| elasticsearch_indices_settings_stats_read_only_indices | gauge | 1 | Count of indices that have read_only_allow_delete=true
|
||||
| elasticsearch_indices_settings_total_fields | gauge | | Index setting value for index.mapping.total_fields.limit (total allowable mapped fields in a index)
|
||||
| elasticsearch_indices_shards_docs | gauge | 3 | Count of documents on this shard
|
||||
| elasticsearch_indices_shards_docs_deleted | gauge | 3 | Count of deleted documents on each shard
|
||||
| elasticsearch_indices_store_size_bytes | gauge | 1 | Current size of stored index data in bytes
|
||||
| elasticsearch_indices_store_size_bytes_primary | gauge | | Current size of stored index data in bytes with only primary shards on all nodes
|
||||
| elasticsearch_indices_store_size_bytes_total | gauge | | Current size of stored index data in bytes with all shards on all nodes
|
||||
| elasticsearch_indices_store_throttle_time_seconds_total | counter | 1 | Throttle time for index store in seconds
|
||||
| elasticsearch_indices_translog_operations | counter | 1 | Total translog operations
|
||||
| elasticsearch_indices_translog_size_in_bytes | counter | 1 | Total translog size in bytes
|
||||
| elasticsearch_indices_warmer_time_seconds_total | counter | 1 | Total warmer time in seconds
|
||||
| elasticsearch_indices_warmer_total | counter | 1 | Total warmer count
|
||||
| elasticsearch_jvm_gc_collection_seconds_count | counter | 2 | Count of JVM GC runs
|
||||
| elasticsearch_jvm_gc_collection_seconds_sum | counter | 2 | GC run time in seconds
|
||||
| elasticsearch_jvm_memory_committed_bytes | gauge | 2 | JVM memory currently committed by area
|
||||
| elasticsearch_jvm_memory_max_bytes | gauge | 1 | JVM memory max
|
||||
| elasticsearch_jvm_memory_used_bytes | gauge | 2 | JVM memory currently used by area
|
||||
| elasticsearch_jvm_memory_pool_used_bytes | gauge | 3 | JVM memory currently used by pool
|
||||
| elasticsearch_jvm_memory_pool_max_bytes | counter | 3 | JVM memory max by pool
|
||||
| elasticsearch_jvm_memory_pool_peak_used_bytes | counter | 3 | JVM memory peak used by pool
|
||||
| elasticsearch_jvm_memory_pool_peak_max_bytes | counter | 3 | JVM memory peak max by pool
|
||||
| elasticsearch_os_cpu_percent | gauge | 1 | Percent CPU used by the OS
|
||||
| elasticsearch_os_load1 | gauge | 1 | Shortterm load average
|
||||
| elasticsearch_os_load5 | gauge | 1 | Midterm load average
|
||||
| elasticsearch_os_load15 | gauge | 1 | Longterm load average
|
||||
| elasticsearch_process_cpu_percent | gauge | 1 | Percent CPU used by process
|
||||
| elasticsearch_process_cpu_seconds_total | counter | 1 | Process CPU time in seconds
|
||||
| elasticsearch_process_mem_resident_size_bytes | gauge | 1 | Resident memory in use by process in bytes
|
||||
| elasticsearch_process_mem_share_size_bytes | gauge | 1 | Shared memory in use by process in bytes
|
||||
| elasticsearch_process_mem_virtual_size_bytes | gauge | 1 | Total virtual memory used in bytes
|
||||
| elasticsearch_process_open_files_count | gauge | 1 | Open file descriptors
|
||||
| elasticsearch_snapshot_stats_number_of_snapshots | gauge | 1 | Total number of snapshots
|
||||
| elasticsearch_snapshot_stats_oldest_snapshot_timestamp | gauge | 1 | Oldest snapshot timestamp
|
||||
| elasticsearch_snapshot_stats_snapshot_start_time_timestamp | gauge | 1 | Last snapshot start timestamp
|
||||
| elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds | gauge | 1 | Timestamp of the latest SUCCESS or PARTIAL snapshot
|
||||
| elasticsearch_snapshot_stats_snapshot_end_time_timestamp | gauge | 1 | Last snapshot end timestamp
|
||||
| elasticsearch_snapshot_stats_snapshot_number_of_failures | gauge | 1 | Last snapshot number of failures
|
||||
| elasticsearch_snapshot_stats_snapshot_number_of_indices | gauge | 1 | Last snapshot number of indices
|
||||
| elasticsearch_snapshot_stats_snapshot_failed_shards | gauge | 1 | Last snapshot failed shards
|
||||
| elasticsearch_snapshot_stats_snapshot_successful_shards | gauge | 1 | Last snapshot successful shards
|
||||
| elasticsearch_snapshot_stats_snapshot_total_shards | gauge | 1 | Last snapshot total shard
|
||||
| elasticsearch_thread_pool_active_count | gauge | 14 | Thread Pool threads active
|
||||
| elasticsearch_thread_pool_completed_count | counter | 14 | Thread Pool operations completed
|
||||
| elasticsearch_thread_pool_largest_count | gauge | 14 | Thread Pool largest threads count
|
||||
| elasticsearch_thread_pool_queue_count | gauge | 14 | Thread Pool operations queued
|
||||
| elasticsearch_thread_pool_rejected_count | counter | 14 | Thread Pool operations rejected
|
||||
| elasticsearch_thread_pool_threads_count | gauge | 14 | Thread Pool current threads count
|
||||
| elasticsearch_transport_rx_packets_total | counter | 1 | Count of packets received
|
||||
| elasticsearch_transport_rx_size_bytes_total | counter | 1 | Total number of bytes received
|
||||
| elasticsearch_transport_tx_packets_total | counter | 1 | Count of packets sent
|
||||
| elasticsearch_transport_tx_size_bytes_total | counter | 1 | Total number of bytes sent
|
||||
| elasticsearch_clusterinfo_last_retrieval_success_ts | gauge | 1 | Timestamp of the last successful cluster info retrieval
|
||||
| elasticsearch_clusterinfo_up | gauge | 1 | Up metric for the cluster info collector
|
||||
| elasticsearch_clusterinfo_version_info | gauge | 6 | Constant metric with ES version information as labels
|
||||
| elasticsearch_slm_stats_up | gauge | 0 | Up metric for SLM collector
|
||||
| elasticsearch_slm_stats_total_scrapes | counter | 0 | Number of scrapes for SLM collector
|
||||
| elasticsearch_slm_stats_json_parse_failures | counter | 0 | JSON parse failures for SLM collector
|
||||
| elasticsearch_slm_stats_retention_runs_total | counter | 0 | Total retention runs
|
||||
| elasticsearch_slm_stats_retention_failed_total | counter | 0 | Total failed retention runs
|
||||
| elasticsearch_slm_stats_retention_timed_out_total | counter | 0 | Total retention run timeouts
|
||||
| elasticsearch_slm_stats_retention_deletion_time_seconds | gauge | 0 | Retention run deletion time
|
||||
| elasticsearch_slm_stats_total_snapshots_taken_total | counter | 0 | Total snapshots taken
|
||||
| elasticsearch_slm_stats_total_snapshots_failed_total | counter | 0 | Total snapshots failed
|
||||
| elasticsearch_slm_stats_total_snapshots_deleted_total | counter | 0 | Total snapshots deleted
|
||||
| elasticsearch_slm_stats_total_snapshots_failed_total | counter | 0 | Total snapshots failed
|
||||
| elasticsearch_slm_stats_snapshots_taken_total | counter | 1 | Snapshots taken by policy
|
||||
| elasticsearch_slm_stats_snapshots_failed_total | counter | 1 | Snapshots failed by policy
|
||||
| elasticsearch_slm_stats_snapshots_deleted_total | counter | 1 | Snapshots deleted by policy
|
||||
| elasticsearch_slm_stats_snapshot_deletion_failures_total | counter | 1 | Snapshot deletion failures by policy
|
||||
| elasticsearch_slm_stats_operation_mode | gauge | 1 | SLM operation mode (Running, stopping, stopped)
|
||||
|
||||
See the [metrics documentation](metrics.md)
|
||||
|
||||
### Alerts & Recording Rules
|
||||
|
||||
@ -272,10 +209,6 @@ Then transferred this repository to the Prometheus Community in May 2021.
|
||||
This package was originally created and maintained by [Eric Richardson](https://github.com/ewr),
|
||||
who transferred this repository to us in January 2017.
|
||||
|
||||
Maintainers of this repository:
|
||||
|
||||
* Christoph Oelmüller <christoph.oelmueller@justwatch.com> @zwopir
|
||||
|
||||
Please refer to the Git commit log for a complete list of contributors.
|
||||
|
||||
## Contributing
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -16,13 +16,12 @@ package collector
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/go-kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
@ -46,19 +45,16 @@ type clusterHealthStatusMetric struct {
|
||||
|
||||
// ClusterHealth type defines the collector struct
|
||||
type ClusterHealth struct {
|
||||
logger log.Logger
|
||||
logger *slog.Logger
|
||||
client *http.Client
|
||||
url *url.URL
|
||||
|
||||
up prometheus.Gauge
|
||||
totalScrapes, jsonParseFailures prometheus.Counter
|
||||
|
||||
metrics []*clusterHealthMetric
|
||||
statusMetric *clusterHealthStatusMetric
|
||||
}
|
||||
|
||||
// NewClusterHealth returns a new Collector exposing ClusterHealth stats.
|
||||
func NewClusterHealth(logger log.Logger, client *http.Client, url *url.URL) *ClusterHealth {
|
||||
func NewClusterHealth(logger *slog.Logger, client *http.Client, url *url.URL) *ClusterHealth {
|
||||
subsystem := "cluster_health"
|
||||
|
||||
return &ClusterHealth{
|
||||
@ -66,19 +62,6 @@ func NewClusterHealth(logger log.Logger, client *http.Client, url *url.URL) *Clu
|
||||
client: client,
|
||||
url: url,
|
||||
|
||||
up: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: prometheus.BuildFQName(namespace, subsystem, "up"),
|
||||
Help: "Was the last scrape of the Elasticsearch cluster health endpoint successful.",
|
||||
}),
|
||||
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, subsystem, "total_scrapes"),
|
||||
Help: "Current total Elasticsearch cluster health scrapes.",
|
||||
}),
|
||||
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, subsystem, "json_parse_failures"),
|
||||
Help: "Number of errors while parsing JSON.",
|
||||
}),
|
||||
|
||||
metrics: []*clusterHealthMetric{
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
@ -225,10 +208,6 @@ func (c *ClusterHealth) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- metric.Desc
|
||||
}
|
||||
ch <- c.statusMetric.Desc
|
||||
|
||||
ch <- c.up.Desc()
|
||||
ch <- c.totalScrapes.Desc()
|
||||
ch <- c.jsonParseFailures.Desc()
|
||||
}
|
||||
|
||||
func (c *ClusterHealth) fetchAndDecodeClusterHealth() (clusterHealthResponse, error) {
|
||||
@ -245,8 +224,8 @@ func (c *ClusterHealth) fetchAndDecodeClusterHealth() (clusterHealthResponse, er
|
||||
defer func() {
|
||||
err = res.Body.Close()
|
||||
if err != nil {
|
||||
_ = level.Warn(c.logger).Log(
|
||||
"msg", "failed to close http.Client",
|
||||
c.logger.Warn(
|
||||
"failed to close http.Client",
|
||||
"err", err,
|
||||
)
|
||||
}
|
||||
@ -256,14 +235,12 @@ func (c *ClusterHealth) fetchAndDecodeClusterHealth() (clusterHealthResponse, er
|
||||
return chr, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
|
||||
}
|
||||
|
||||
bts, err := ioutil.ReadAll(res.Body)
|
||||
bts, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
c.jsonParseFailures.Inc()
|
||||
return chr, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, &chr); err != nil {
|
||||
c.jsonParseFailures.Inc()
|
||||
return chr, err
|
||||
}
|
||||
|
||||
@ -272,24 +249,14 @@ func (c *ClusterHealth) fetchAndDecodeClusterHealth() (clusterHealthResponse, er
|
||||
|
||||
// Collect collects ClusterHealth metrics.
|
||||
func (c *ClusterHealth) Collect(ch chan<- prometheus.Metric) {
|
||||
var err error
|
||||
c.totalScrapes.Inc()
|
||||
defer func() {
|
||||
ch <- c.up
|
||||
ch <- c.totalScrapes
|
||||
ch <- c.jsonParseFailures
|
||||
}()
|
||||
|
||||
clusterHealthResp, err := c.fetchAndDecodeClusterHealth()
|
||||
if err != nil {
|
||||
c.up.Set(0)
|
||||
_ = level.Warn(c.logger).Log(
|
||||
"msg", "failed to fetch and decode cluster health",
|
||||
c.logger.Warn(
|
||||
"failed to fetch and decode cluster health",
|
||||
"err", err,
|
||||
)
|
||||
return
|
||||
}
|
||||
c.up.Set(1)
|
||||
|
||||
for _, metric := range c.metrics {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -14,13 +14,16 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/promslog"
|
||||
)
|
||||
|
||||
func TestClusterHealth(t *testing.T) {
|
||||
@ -28,46 +31,172 @@ func TestClusterHealth(t *testing.T) {
|
||||
// docker run -d -p 9200:9200 elasticsearch:VERSION-alpine
|
||||
// curl -XPUT http://localhost:9200/twitter
|
||||
// curl http://localhost:9200/_cluster/health
|
||||
tcs := map[string]string{
|
||||
"1.7.6": `{"cluster_name":"elasticsearch","status":"yellow","timed_out":false,"number_of_nodes":1,"number_of_data_nodes":1,"active_primary_shards":5,"active_shards":5,"relocating_shards":0,"initializing_shards":0,"unassigned_shards":5,"delayed_unassigned_shards":0,"number_of_pending_tasks":0,"number_of_in_flight_fetch":0}`,
|
||||
"2.4.5": `{"cluster_name":"elasticsearch","status":"yellow","timed_out":false,"number_of_nodes":1,"number_of_data_nodes":1,"active_primary_shards":5,"active_shards":5,"relocating_shards":0,"initializing_shards":0,"unassigned_shards":5,"delayed_unassigned_shards":0,"number_of_pending_tasks":0,"number_of_in_flight_fetch":0,"task_max_waiting_in_queue_millis":12,"active_shards_percent_as_number":50.0}`,
|
||||
"5.4.2": `{"cluster_name":"elasticsearch","status":"yellow","timed_out":false,"number_of_nodes":1,"number_of_data_nodes":1,"active_primary_shards":5,"active_shards":5,"relocating_shards":0,"initializing_shards":0,"unassigned_shards":5,"delayed_unassigned_shards":0,"number_of_pending_tasks":0,"number_of_in_flight_fetch":0,"task_max_waiting_in_queue_millis":12,"active_shards_percent_as_number":50.0}`,
|
||||
}
|
||||
for ver, out := range tcs {
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
fmt.Fprintln(w, out)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse URL: %s", err)
|
||||
}
|
||||
c := NewClusterHealth(log.NewNopLogger(), http.DefaultClient, u)
|
||||
chr, err := c.fetchAndDecodeClusterHealth()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to fetch or decode cluster health: %s", err)
|
||||
}
|
||||
t.Logf("[%s] Cluster Health Response: %+v", ver, chr)
|
||||
if chr.ClusterName != "elasticsearch" {
|
||||
t.Errorf("Invalid cluster health response")
|
||||
}
|
||||
if chr.Status != "yellow" {
|
||||
t.Errorf("Invalid cluster status")
|
||||
}
|
||||
if chr.TimedOut {
|
||||
t.Errorf("Check didn't time out")
|
||||
}
|
||||
if chr.NumberOfNodes != 1 {
|
||||
t.Errorf("Wrong number of nodes")
|
||||
}
|
||||
if chr.NumberOfDataNodes != 1 {
|
||||
t.Errorf("Wrong number of data nodes")
|
||||
}
|
||||
if ver != "1.7.6" {
|
||||
if chr.TaskMaxWaitingInQueueMillis != 12 {
|
||||
t.Errorf("Wrong task max waiting time in millis")
|
||||
tests := []struct {
|
||||
name string
|
||||
file string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "1.7.6",
|
||||
file: "../fixtures/clusterhealth/1.7.6.json",
|
||||
want: `
|
||||
# HELP elasticsearch_cluster_health_active_primary_shards The number of primary shards in your cluster. This is an aggregate total across all indices.
|
||||
# TYPE elasticsearch_cluster_health_active_primary_shards gauge
|
||||
elasticsearch_cluster_health_active_primary_shards{cluster="elasticsearch"} 5
|
||||
# HELP elasticsearch_cluster_health_active_shards Aggregate total of all shards across all indices, which includes replica shards.
|
||||
# TYPE elasticsearch_cluster_health_active_shards gauge
|
||||
elasticsearch_cluster_health_active_shards{cluster="elasticsearch"} 5
|
||||
# HELP elasticsearch_cluster_health_delayed_unassigned_shards Shards delayed to reduce reallocation overhead
|
||||
# TYPE elasticsearch_cluster_health_delayed_unassigned_shards gauge
|
||||
elasticsearch_cluster_health_delayed_unassigned_shards{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_initializing_shards Count of shards that are being freshly created.
|
||||
# TYPE elasticsearch_cluster_health_initializing_shards gauge
|
||||
elasticsearch_cluster_health_initializing_shards{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_number_of_data_nodes Number of data nodes in the cluster.
|
||||
# TYPE elasticsearch_cluster_health_number_of_data_nodes gauge
|
||||
elasticsearch_cluster_health_number_of_data_nodes{cluster="elasticsearch"} 1
|
||||
# HELP elasticsearch_cluster_health_number_of_in_flight_fetch The number of ongoing shard info requests.
|
||||
# TYPE elasticsearch_cluster_health_number_of_in_flight_fetch gauge
|
||||
elasticsearch_cluster_health_number_of_in_flight_fetch{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_number_of_nodes Number of nodes in the cluster.
|
||||
# TYPE elasticsearch_cluster_health_number_of_nodes gauge
|
||||
elasticsearch_cluster_health_number_of_nodes{cluster="elasticsearch"} 1
|
||||
# HELP elasticsearch_cluster_health_number_of_pending_tasks Cluster level changes which have not yet been executed
|
||||
# TYPE elasticsearch_cluster_health_number_of_pending_tasks gauge
|
||||
elasticsearch_cluster_health_number_of_pending_tasks{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_relocating_shards The number of shards that are currently moving from one node to another node.
|
||||
# TYPE elasticsearch_cluster_health_relocating_shards gauge
|
||||
elasticsearch_cluster_health_relocating_shards{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_status Whether all primary and replica shards are allocated.
|
||||
# TYPE elasticsearch_cluster_health_status gauge
|
||||
elasticsearch_cluster_health_status{cluster="elasticsearch",color="green"} 0
|
||||
elasticsearch_cluster_health_status{cluster="elasticsearch",color="red"} 0
|
||||
elasticsearch_cluster_health_status{cluster="elasticsearch",color="yellow"} 1
|
||||
# HELP elasticsearch_cluster_health_task_max_waiting_in_queue_millis Tasks max time waiting in queue.
|
||||
# TYPE elasticsearch_cluster_health_task_max_waiting_in_queue_millis gauge
|
||||
elasticsearch_cluster_health_task_max_waiting_in_queue_millis{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_unassigned_shards The number of shards that exist in the cluster state, but cannot be found in the cluster itself.
|
||||
# TYPE elasticsearch_cluster_health_unassigned_shards gauge
|
||||
elasticsearch_cluster_health_unassigned_shards{cluster="elasticsearch"} 5
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "2.4.5",
|
||||
file: "../fixtures/clusterhealth/2.4.5.json",
|
||||
want: `
|
||||
# HELP elasticsearch_cluster_health_active_primary_shards The number of primary shards in your cluster. This is an aggregate total across all indices.
|
||||
# TYPE elasticsearch_cluster_health_active_primary_shards gauge
|
||||
elasticsearch_cluster_health_active_primary_shards{cluster="elasticsearch"} 5
|
||||
# HELP elasticsearch_cluster_health_active_shards Aggregate total of all shards across all indices, which includes replica shards.
|
||||
# TYPE elasticsearch_cluster_health_active_shards gauge
|
||||
elasticsearch_cluster_health_active_shards{cluster="elasticsearch"} 5
|
||||
# HELP elasticsearch_cluster_health_delayed_unassigned_shards Shards delayed to reduce reallocation overhead
|
||||
# TYPE elasticsearch_cluster_health_delayed_unassigned_shards gauge
|
||||
elasticsearch_cluster_health_delayed_unassigned_shards{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_initializing_shards Count of shards that are being freshly created.
|
||||
# TYPE elasticsearch_cluster_health_initializing_shards gauge
|
||||
elasticsearch_cluster_health_initializing_shards{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_number_of_data_nodes Number of data nodes in the cluster.
|
||||
# TYPE elasticsearch_cluster_health_number_of_data_nodes gauge
|
||||
elasticsearch_cluster_health_number_of_data_nodes{cluster="elasticsearch"} 1
|
||||
# HELP elasticsearch_cluster_health_number_of_in_flight_fetch The number of ongoing shard info requests.
|
||||
# TYPE elasticsearch_cluster_health_number_of_in_flight_fetch gauge
|
||||
elasticsearch_cluster_health_number_of_in_flight_fetch{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_number_of_nodes Number of nodes in the cluster.
|
||||
# TYPE elasticsearch_cluster_health_number_of_nodes gauge
|
||||
elasticsearch_cluster_health_number_of_nodes{cluster="elasticsearch"} 1
|
||||
# HELP elasticsearch_cluster_health_number_of_pending_tasks Cluster level changes which have not yet been executed
|
||||
# TYPE elasticsearch_cluster_health_number_of_pending_tasks gauge
|
||||
elasticsearch_cluster_health_number_of_pending_tasks{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_relocating_shards The number of shards that are currently moving from one node to another node.
|
||||
# TYPE elasticsearch_cluster_health_relocating_shards gauge
|
||||
elasticsearch_cluster_health_relocating_shards{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_status Whether all primary and replica shards are allocated.
|
||||
# TYPE elasticsearch_cluster_health_status gauge
|
||||
elasticsearch_cluster_health_status{cluster="elasticsearch",color="green"} 0
|
||||
elasticsearch_cluster_health_status{cluster="elasticsearch",color="red"} 0
|
||||
elasticsearch_cluster_health_status{cluster="elasticsearch",color="yellow"} 1
|
||||
# HELP elasticsearch_cluster_health_task_max_waiting_in_queue_millis Tasks max time waiting in queue.
|
||||
# TYPE elasticsearch_cluster_health_task_max_waiting_in_queue_millis gauge
|
||||
elasticsearch_cluster_health_task_max_waiting_in_queue_millis{cluster="elasticsearch"} 12
|
||||
# HELP elasticsearch_cluster_health_unassigned_shards The number of shards that exist in the cluster state, but cannot be found in the cluster itself.
|
||||
# TYPE elasticsearch_cluster_health_unassigned_shards gauge
|
||||
elasticsearch_cluster_health_unassigned_shards{cluster="elasticsearch"} 5
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "5.4.2",
|
||||
file: "../fixtures/clusterhealth/5.4.2.json",
|
||||
want: `
|
||||
# HELP elasticsearch_cluster_health_active_primary_shards The number of primary shards in your cluster. This is an aggregate total across all indices.
|
||||
# TYPE elasticsearch_cluster_health_active_primary_shards gauge
|
||||
elasticsearch_cluster_health_active_primary_shards{cluster="elasticsearch"} 5
|
||||
# HELP elasticsearch_cluster_health_active_shards Aggregate total of all shards across all indices, which includes replica shards.
|
||||
# TYPE elasticsearch_cluster_health_active_shards gauge
|
||||
elasticsearch_cluster_health_active_shards{cluster="elasticsearch"} 5
|
||||
# HELP elasticsearch_cluster_health_delayed_unassigned_shards Shards delayed to reduce reallocation overhead
|
||||
# TYPE elasticsearch_cluster_health_delayed_unassigned_shards gauge
|
||||
elasticsearch_cluster_health_delayed_unassigned_shards{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_initializing_shards Count of shards that are being freshly created.
|
||||
# TYPE elasticsearch_cluster_health_initializing_shards gauge
|
||||
elasticsearch_cluster_health_initializing_shards{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_number_of_data_nodes Number of data nodes in the cluster.
|
||||
# TYPE elasticsearch_cluster_health_number_of_data_nodes gauge
|
||||
elasticsearch_cluster_health_number_of_data_nodes{cluster="elasticsearch"} 1
|
||||
# HELP elasticsearch_cluster_health_number_of_in_flight_fetch The number of ongoing shard info requests.
|
||||
# TYPE elasticsearch_cluster_health_number_of_in_flight_fetch gauge
|
||||
elasticsearch_cluster_health_number_of_in_flight_fetch{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_number_of_nodes Number of nodes in the cluster.
|
||||
# TYPE elasticsearch_cluster_health_number_of_nodes gauge
|
||||
elasticsearch_cluster_health_number_of_nodes{cluster="elasticsearch"} 1
|
||||
# HELP elasticsearch_cluster_health_number_of_pending_tasks Cluster level changes which have not yet been executed
|
||||
# TYPE elasticsearch_cluster_health_number_of_pending_tasks gauge
|
||||
elasticsearch_cluster_health_number_of_pending_tasks{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_relocating_shards The number of shards that are currently moving from one node to another node.
|
||||
# TYPE elasticsearch_cluster_health_relocating_shards gauge
|
||||
elasticsearch_cluster_health_relocating_shards{cluster="elasticsearch"} 0
|
||||
# HELP elasticsearch_cluster_health_status Whether all primary and replica shards are allocated.
|
||||
# TYPE elasticsearch_cluster_health_status gauge
|
||||
elasticsearch_cluster_health_status{cluster="elasticsearch",color="green"} 0
|
||||
elasticsearch_cluster_health_status{cluster="elasticsearch",color="red"} 0
|
||||
elasticsearch_cluster_health_status{cluster="elasticsearch",color="yellow"} 1
|
||||
# HELP elasticsearch_cluster_health_task_max_waiting_in_queue_millis Tasks max time waiting in queue.
|
||||
# TYPE elasticsearch_cluster_health_task_max_waiting_in_queue_millis gauge
|
||||
elasticsearch_cluster_health_task_max_waiting_in_queue_millis{cluster="elasticsearch"} 12
|
||||
# HELP elasticsearch_cluster_health_unassigned_shards The number of shards that exist in the cluster state, but cannot be found in the cluster itself.
|
||||
# TYPE elasticsearch_cluster_health_unassigned_shards gauge
|
||||
elasticsearch_cluster_health_unassigned_shards{cluster="elasticsearch"} 5
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
f, err := os.Open(tt.file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
io.Copy(w, f)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
c := NewClusterHealth(promslog.NewNopLogger(), http.DefaultClient, u)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := testutil.CollectAndCompare(c, strings.NewReader(tt.want)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2022 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -16,12 +16,12 @@ package collector
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/blang/semver"
|
||||
"github.com/go-kit/log"
|
||||
"github.com/blang/semver/v4"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
@ -30,12 +30,12 @@ func init() {
|
||||
}
|
||||
|
||||
type ClusterInfoCollector struct {
|
||||
logger log.Logger
|
||||
logger *slog.Logger
|
||||
u *url.URL
|
||||
hc *http.Client
|
||||
}
|
||||
|
||||
func NewClusterInfo(logger log.Logger, u *url.URL, hc *http.Client) (Collector, error) {
|
||||
func NewClusterInfo(logger *slog.Logger, u *url.URL, hc *http.Client) (Collector, error) {
|
||||
return &ClusterInfoCollector{
|
||||
logger: logger,
|
||||
u: u,
|
||||
@ -77,13 +77,13 @@ type VersionInfo struct {
|
||||
LuceneVersion semver.Version `json:"lucene_version"`
|
||||
}
|
||||
|
||||
func (c *ClusterInfoCollector) Update(ctx context.Context, ch chan<- prometheus.Metric) error {
|
||||
func (c *ClusterInfoCollector) Update(_ context.Context, ch chan<- prometheus.Metric) error {
|
||||
resp, err := c.hc.Get(c.u.String())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
b, err := ioutil.ReadAll(resp.Body)
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
93
collector/cluster_info_test.go
Normal file
93
collector/cluster_info_test.go
Normal file
@ -0,0 +1,93 @@
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/promslog"
|
||||
)
|
||||
|
||||
func TestClusterInfo(t *testing.T) {
|
||||
// Testcases created using:
|
||||
// docker run -p 9200:9200 -e "discovery.type=single-node" elasticsearch:${VERSION}
|
||||
// curl http://localhost:9200/ > fixtures/cluster_info/${VERSION}.json
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
file string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "2.4.5",
|
||||
file: "../fixtures/clusterinfo/2.4.5.json",
|
||||
want: `# HELP elasticsearch_version Elasticsearch version information.
|
||||
# TYPE elasticsearch_version gauge
|
||||
elasticsearch_version{build_date="",build_hash="c849dd13904f53e63e88efc33b2ceeda0b6a1276",cluster="elasticsearch",cluster_uuid="3qps7bcWTqyzV49ApmPVfw",lucene_version="5.5.4",version="2.4.5"} 1
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "5.4.2",
|
||||
file: "../fixtures/clusterinfo/5.4.2.json",
|
||||
want: `# HELP elasticsearch_version Elasticsearch version information.
|
||||
# TYPE elasticsearch_version gauge
|
||||
elasticsearch_version{build_date="2017-06-15T02:29:28.122Z",build_hash="929b078",cluster="elasticsearch",cluster_uuid="kbqi7yhQT-WlPdGL2m0xJg",lucene_version="6.5.1",version="5.4.2"} 1
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "7.13.1",
|
||||
file: "../fixtures/clusterinfo/7.13.1.json",
|
||||
want: `# HELP elasticsearch_version Elasticsearch version information.
|
||||
# TYPE elasticsearch_version gauge
|
||||
elasticsearch_version{build_date="2021-05-28T17:40:59.346932922Z",build_hash="9a7758028e4ea59bcab41c12004603c5a7dd84a9",cluster="docker-cluster",cluster_uuid="aCMrCY1VQpqJ6U4Sw_xdiw",lucene_version="8.8.2",version="7.13.1"} 1
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
f, err := os.Open(tt.file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
io.Copy(w, f)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
c, err := NewClusterInfo(promslog.NewNopLogger(), u, http.DefaultClient)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := testutil.CollectAndCompare(wrapCollector{c}, strings.NewReader(tt.want)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -14,157 +14,188 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/go-kit/log/level"
|
||||
"github.com/imdario/mergo"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// ClusterSettings information struct
|
||||
type ClusterSettings struct {
|
||||
logger log.Logger
|
||||
client *http.Client
|
||||
url *url.URL
|
||||
|
||||
up prometheus.Gauge
|
||||
shardAllocationEnabled prometheus.Gauge
|
||||
maxShardsPerNode prometheus.Gauge
|
||||
totalScrapes, jsonParseFailures prometheus.Counter
|
||||
func init() {
|
||||
registerCollector("clustersettings", defaultDisabled, NewClusterSettings)
|
||||
}
|
||||
|
||||
// NewClusterSettings defines Cluster Settings Prometheus metrics
|
||||
func NewClusterSettings(logger log.Logger, client *http.Client, url *url.URL) *ClusterSettings {
|
||||
return &ClusterSettings{
|
||||
type ClusterSettingsCollector struct {
|
||||
logger *slog.Logger
|
||||
u *url.URL
|
||||
hc *http.Client
|
||||
}
|
||||
|
||||
func NewClusterSettings(logger *slog.Logger, u *url.URL, hc *http.Client) (Collector, error) {
|
||||
return &ClusterSettingsCollector{
|
||||
logger: logger,
|
||||
client: client,
|
||||
url: url,
|
||||
|
||||
up: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "clustersettings_stats", "up"),
|
||||
Help: "Was the last scrape of the Elasticsearch cluster settings endpoint successful.",
|
||||
}),
|
||||
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "clustersettings_stats", "total_scrapes"),
|
||||
Help: "Current total Elasticsearch cluster settings scrapes.",
|
||||
}),
|
||||
shardAllocationEnabled: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "clustersettings_stats", "shard_allocation_enabled"),
|
||||
Help: "Current mode of cluster wide shard routing allocation settings.",
|
||||
}),
|
||||
maxShardsPerNode: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "clustersettings_stats", "max_shards_per_node"),
|
||||
Help: "Current maximum number of shards per node setting.",
|
||||
}),
|
||||
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "clustersettings_stats", "json_parse_failures"),
|
||||
Help: "Number of errors while parsing JSON.",
|
||||
}),
|
||||
}
|
||||
u: u,
|
||||
hc: hc,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Describe add Snapshots metrics descriptions
|
||||
func (cs *ClusterSettings) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- cs.up.Desc()
|
||||
ch <- cs.totalScrapes.Desc()
|
||||
ch <- cs.shardAllocationEnabled.Desc()
|
||||
ch <- cs.maxShardsPerNode.Desc()
|
||||
ch <- cs.jsonParseFailures.Desc()
|
||||
var clusterSettingsDesc = map[string]*prometheus.Desc{
|
||||
"shardAllocationEnabled": prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "clustersettings_stats", "shard_allocation_enabled"),
|
||||
"Current mode of cluster wide shard routing allocation settings.",
|
||||
nil, nil,
|
||||
),
|
||||
|
||||
"maxShardsPerNode": prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "clustersettings_stats", "max_shards_per_node"),
|
||||
"Current maximum number of shards per node setting.",
|
||||
nil, nil,
|
||||
),
|
||||
|
||||
"thresholdEnabled": prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "clustersettings_allocation", "threshold_enabled"),
|
||||
"Is disk allocation decider enabled.",
|
||||
nil, nil,
|
||||
),
|
||||
|
||||
"floodStageRatio": prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "clustersettings_allocation_watermark", "flood_stage_ratio"),
|
||||
"Flood stage watermark as a ratio.",
|
||||
nil, nil,
|
||||
),
|
||||
|
||||
"highRatio": prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "clustersettings_allocation_watermark", "high_ratio"),
|
||||
"High watermark for disk usage as a ratio.",
|
||||
nil, nil,
|
||||
),
|
||||
|
||||
"lowRatio": prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "clustersettings_allocation_watermark", "low_ratio"),
|
||||
"Low watermark for disk usage as a ratio.",
|
||||
nil, nil,
|
||||
),
|
||||
|
||||
"floodStageBytes": prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "clustersettings_allocation_watermark", "flood_stage_bytes"),
|
||||
"Flood stage watermark as in bytes.",
|
||||
nil, nil,
|
||||
),
|
||||
|
||||
"highBytes": prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "clustersettings_allocation_watermark", "high_bytes"),
|
||||
"High watermark for disk usage in bytes.",
|
||||
nil, nil,
|
||||
),
|
||||
|
||||
"lowBytes": prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "clustersettings_allocation_watermark", "low_bytes"),
|
||||
"Low watermark for disk usage in bytes.",
|
||||
nil, nil,
|
||||
),
|
||||
}
|
||||
|
||||
func (cs *ClusterSettings) getAndParseURL(u *url.URL, data interface{}) error {
|
||||
res, err := cs.client.Get(u.String())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get from %s://%s:%s%s: %s",
|
||||
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err = res.Body.Close()
|
||||
if err != nil {
|
||||
_ = level.Warn(cs.logger).Log(
|
||||
"msg", "failed to close http.Client",
|
||||
"err", err,
|
||||
)
|
||||
}
|
||||
}()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
|
||||
}
|
||||
|
||||
bts, err := ioutil.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
cs.jsonParseFailures.Inc()
|
||||
return err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, data); err != nil {
|
||||
cs.jsonParseFailures.Inc()
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
// clusterSettingsResponse is a representation of a Elasticsearch Cluster Settings
|
||||
type clusterSettingsResponse struct {
|
||||
Defaults clusterSettingsSection `json:"defaults"`
|
||||
Persistent clusterSettingsSection `json:"persistent"`
|
||||
Transient clusterSettingsSection `json:"transient"`
|
||||
}
|
||||
|
||||
func (cs *ClusterSettings) fetchAndDecodeClusterSettingsStats() (ClusterSettingsResponse, error) {
|
||||
// clusterSettingsSection is a representation of a Elasticsearch Cluster Settings
|
||||
type clusterSettingsSection struct {
|
||||
Cluster clusterSettingsCluster `json:"cluster"`
|
||||
}
|
||||
|
||||
u := *cs.url
|
||||
u.Path = path.Join(u.Path, "/_cluster/settings")
|
||||
// clusterSettingsCluster is a representation of a Elasticsearch clusterSettingsCluster Settings
|
||||
type clusterSettingsCluster struct {
|
||||
Routing clusterSettingsRouting `json:"routing"`
|
||||
// This can be either a JSON object (which does not contain the value we are interested in) or a string
|
||||
MaxShardsPerNode interface{} `json:"max_shards_per_node"`
|
||||
}
|
||||
|
||||
// clusterSettingsRouting is a representation of a Elasticsearch Cluster shard routing configuration
|
||||
type clusterSettingsRouting struct {
|
||||
Allocation clusterSettingsAllocation `json:"allocation"`
|
||||
}
|
||||
|
||||
// clusterSettingsAllocation is a representation of a Elasticsearch Cluster shard routing allocation settings
|
||||
type clusterSettingsAllocation struct {
|
||||
Enabled string `json:"enable"`
|
||||
Disk clusterSettingsDisk `json:"disk"`
|
||||
}
|
||||
|
||||
// clusterSettingsDisk is a representation of a Elasticsearch Cluster shard routing disk allocation settings
|
||||
type clusterSettingsDisk struct {
|
||||
ThresholdEnabled string `json:"threshold_enabled"`
|
||||
Watermark clusterSettingsWatermark `json:"watermark"`
|
||||
}
|
||||
|
||||
// clusterSettingsWatermark is representation of Elasticsearch Cluster shard routing disk allocation watermark settings
|
||||
type clusterSettingsWatermark struct {
|
||||
FloodStage interface{} `json:"flood_stage"`
|
||||
High interface{} `json:"high"`
|
||||
Low interface{} `json:"low"`
|
||||
}
|
||||
|
||||
func (c *ClusterSettingsCollector) Update(ctx context.Context, ch chan<- prometheus.Metric) error {
|
||||
u := c.u.ResolveReference(&url.URL{Path: "_cluster/settings"})
|
||||
q := u.Query()
|
||||
q.Set("include_defaults", "true")
|
||||
u.RawQuery = q.Encode()
|
||||
u.RawPath = q.Encode()
|
||||
var csfr ClusterSettingsFullResponse
|
||||
var csr ClusterSettingsResponse
|
||||
err := cs.getAndParseURL(&u, &csfr)
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
|
||||
if err != nil {
|
||||
return csr, err
|
||||
return err
|
||||
}
|
||||
err = mergo.Merge(&csr, csfr.Defaults, mergo.WithOverride)
|
||||
|
||||
resp, err := c.hc.Do(req)
|
||||
if err != nil {
|
||||
return csr, err
|
||||
return err
|
||||
}
|
||||
err = mergo.Merge(&csr, csfr.Persistent, mergo.WithOverride)
|
||||
defer resp.Body.Close()
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return csr, err
|
||||
return err
|
||||
}
|
||||
err = mergo.Merge(&csr, csfr.Transient, mergo.WithOverride)
|
||||
|
||||
return csr, err
|
||||
}
|
||||
|
||||
// Collect gets cluster settings metric values
|
||||
func (cs *ClusterSettings) Collect(ch chan<- prometheus.Metric) {
|
||||
|
||||
cs.totalScrapes.Inc()
|
||||
defer func() {
|
||||
ch <- cs.up
|
||||
ch <- cs.totalScrapes
|
||||
ch <- cs.jsonParseFailures
|
||||
ch <- cs.shardAllocationEnabled
|
||||
ch <- cs.maxShardsPerNode
|
||||
}()
|
||||
|
||||
csr, err := cs.fetchAndDecodeClusterSettingsStats()
|
||||
var data clusterSettingsResponse
|
||||
err = json.Unmarshal(b, &data)
|
||||
if err != nil {
|
||||
cs.shardAllocationEnabled.Set(0)
|
||||
cs.up.Set(0)
|
||||
_ = level.Warn(cs.logger).Log(
|
||||
"msg", "failed to fetch and decode cluster settings stats",
|
||||
"err", err,
|
||||
)
|
||||
return
|
||||
return err
|
||||
}
|
||||
cs.up.Set(1)
|
||||
|
||||
// Merge all settings into one struct
|
||||
merged := data.Defaults
|
||||
|
||||
err = mergo.Merge(&merged, data.Persistent, mergo.WithOverride)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = mergo.Merge(&merged, data.Transient, mergo.WithOverride)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Max shards per node
|
||||
if maxShardsPerNodeString, ok := merged.Cluster.MaxShardsPerNode.(string); ok {
|
||||
maxShardsPerNode, err := strconv.ParseInt(maxShardsPerNodeString, 10, 64)
|
||||
if err == nil {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
clusterSettingsDesc["maxShardsPerNode"],
|
||||
prometheus.GaugeValue,
|
||||
float64(maxShardsPerNode),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Shard allocation enabled
|
||||
shardAllocationMap := map[string]int{
|
||||
"all": 0,
|
||||
"primaries": 1,
|
||||
@ -172,10 +203,173 @@ func (cs *ClusterSettings) Collect(ch chan<- prometheus.Metric) {
|
||||
"none": 3,
|
||||
}
|
||||
|
||||
cs.shardAllocationEnabled.Set(float64(shardAllocationMap[csr.Cluster.Routing.Allocation.Enabled]))
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
clusterSettingsDesc["shardAllocationEnabled"],
|
||||
prometheus.GaugeValue,
|
||||
float64(shardAllocationMap[merged.Cluster.Routing.Allocation.Enabled]),
|
||||
)
|
||||
|
||||
maxShardsPerNode, err := strconv.ParseInt(csr.Cluster.MaxShardsPerNode, 10, 64)
|
||||
if err == nil {
|
||||
cs.maxShardsPerNode.Set(float64(maxShardsPerNode))
|
||||
// Threshold enabled
|
||||
thresholdMap := map[string]int{
|
||||
"false": 0,
|
||||
"true": 1,
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
clusterSettingsDesc["thresholdEnabled"],
|
||||
prometheus.GaugeValue,
|
||||
float64(thresholdMap[merged.Cluster.Routing.Allocation.Disk.ThresholdEnabled]),
|
||||
)
|
||||
|
||||
// Watermark bytes or ratio metrics
|
||||
watermarkFlood, err := parseWatermarkValue(merged.Cluster.Routing.Allocation.Disk.Watermark.FloodStage)
|
||||
if err != nil {
|
||||
c.logger.Error("failed to parse flood stage watermark", "err", err)
|
||||
} else {
|
||||
if strings.HasSuffix(watermarkFlood, "b") {
|
||||
floodStageBytes, err := getValueInBytes(watermarkFlood)
|
||||
if err != nil {
|
||||
c.logger.Error("failed to parse flood_stage bytes", "err", err)
|
||||
} else {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
clusterSettingsDesc["floodStageBytes"],
|
||||
prometheus.GaugeValue,
|
||||
floodStageBytes,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
floodStageRatio, err := getValueAsRatio(watermarkFlood)
|
||||
if err != nil {
|
||||
c.logger.Error("failed to parse flood_stage ratio", "err", err)
|
||||
} else {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
clusterSettingsDesc["floodStageRatio"],
|
||||
prometheus.GaugeValue,
|
||||
floodStageRatio,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
watermarkHigh, err := parseWatermarkValue(merged.Cluster.Routing.Allocation.Disk.Watermark.High)
|
||||
if err != nil {
|
||||
c.logger.Error("failed to parse high watermark", "err", err)
|
||||
} else {
|
||||
if strings.HasSuffix(watermarkHigh, "b") {
|
||||
highBytes, err := getValueInBytes(watermarkHigh)
|
||||
if err != nil {
|
||||
c.logger.Error("failed to parse high bytes", "err", err)
|
||||
} else {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
clusterSettingsDesc["highBytes"],
|
||||
prometheus.GaugeValue,
|
||||
highBytes,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
highRatio, err := getValueAsRatio(watermarkHigh)
|
||||
if err != nil {
|
||||
c.logger.Error("failed to parse high ratio", "err", err)
|
||||
} else {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
clusterSettingsDesc["highRatio"],
|
||||
prometheus.GaugeValue,
|
||||
highRatio,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
watermarkLow, err := parseWatermarkValue(merged.Cluster.Routing.Allocation.Disk.Watermark.Low)
|
||||
if err != nil {
|
||||
c.logger.Error("failed to parse low watermark", "err", err)
|
||||
} else {
|
||||
if strings.HasSuffix(watermarkLow, "b") {
|
||||
lowBytes, err := getValueInBytes(watermarkLow)
|
||||
if err != nil {
|
||||
c.logger.Error("failed to parse low bytes", "err", err)
|
||||
} else {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
clusterSettingsDesc["lowBytes"],
|
||||
prometheus.GaugeValue,
|
||||
lowBytes,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
lowRatio, err := getValueAsRatio(watermarkLow)
|
||||
if err != nil {
|
||||
c.logger.Error("failed to parse low ratio", "err", err)
|
||||
} else {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
clusterSettingsDesc["lowRatio"],
|
||||
prometheus.GaugeValue,
|
||||
lowRatio,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseWatermarkValue(value interface{}) (string, error) {
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
return v, nil
|
||||
case map[string]interface{}:
|
||||
if val, ok := v["value"].(string); ok {
|
||||
return val, nil
|
||||
}
|
||||
return "", fmt.Errorf("unexpected structure in watermark value: %v", v)
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported type for watermark value: %T", v)
|
||||
}
|
||||
}
|
||||
|
||||
func getValueInBytes(value string) (float64, error) {
|
||||
type UnitValue struct {
|
||||
unit string
|
||||
val float64
|
||||
}
|
||||
|
||||
unitValues := []UnitValue{
|
||||
{"pb", 1024 * 1024 * 1024 * 1024 * 1024},
|
||||
{"tb", 1024 * 1024 * 1024 * 1024},
|
||||
{"gb", 1024 * 1024 * 1024},
|
||||
{"mb", 1024 * 1024},
|
||||
{"kb", 1024},
|
||||
{"b", 1},
|
||||
}
|
||||
|
||||
for _, uv := range unitValues {
|
||||
if strings.HasSuffix(value, uv.unit) {
|
||||
numberStr := strings.TrimSuffix(value, uv.unit)
|
||||
|
||||
number, err := strconv.ParseFloat(numberStr, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return number * uv.val, nil
|
||||
}
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("failed to convert unit %s to bytes", value)
|
||||
}
|
||||
|
||||
func getValueAsRatio(value string) (float64, error) {
|
||||
if strings.HasSuffix(value, "%") {
|
||||
percentValue, err := strconv.Atoi(strings.TrimSpace(strings.TrimSuffix(value, "%")))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return float64(percentValue) / 100, nil
|
||||
}
|
||||
|
||||
ratio, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return ratio, nil
|
||||
}
|
||||
|
||||
@ -1,42 +0,0 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
// ClusterSettingsFullResponse is a representation of a Elasticsearch Cluster Settings
|
||||
type ClusterSettingsFullResponse struct {
|
||||
Defaults ClusterSettingsResponse `json:"defaults"`
|
||||
Persistent ClusterSettingsResponse `json:"persistent"`
|
||||
Transient ClusterSettingsResponse `json:"transient"`
|
||||
}
|
||||
|
||||
// ClusterSettingsResponse is a representation of a Elasticsearch Cluster Settings
|
||||
type ClusterSettingsResponse struct {
|
||||
Cluster Cluster `json:"cluster"`
|
||||
}
|
||||
|
||||
// Cluster is a representation of a Elasticsearch Cluster Settings
|
||||
type Cluster struct {
|
||||
Routing Routing `json:"routing"`
|
||||
MaxShardsPerNode string `json:"max_shards_per_node"`
|
||||
}
|
||||
|
||||
// Routing is a representation of a Elasticsearch Cluster shard routing configuration
|
||||
type Routing struct {
|
||||
Allocation Allocation `json:"allocation"`
|
||||
}
|
||||
|
||||
// Allocation is a representation of a Elasticsearch Cluster shard routing allocation settings
|
||||
type Allocation struct {
|
||||
Enabled string `json:"enable"`
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -19,75 +19,209 @@ import (
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/promslog"
|
||||
)
|
||||
|
||||
func TestClusterSettingsStats(t *testing.T) {
|
||||
// Testcases created using:
|
||||
// docker run -d -p 9200:9200 elasticsearch:VERSION-alpine
|
||||
// curl http://localhost:9200/_cluster/settings/?include_defaults=true
|
||||
files := []string{"../fixtures/settings-5.4.2.json", "../fixtures/settings-merge-5.4.2.json"}
|
||||
for _, filename := range files {
|
||||
f, _ := os.Open(filename)
|
||||
defer f.Close()
|
||||
for hn, handler := range map[string]http.Handler{
|
||||
"plain": http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
file string
|
||||
want string
|
||||
}{
|
||||
// MaxShardsPerNode is empty in older versions
|
||||
{
|
||||
name: "5.4.2",
|
||||
file: "../fixtures/settings-5.4.2.json",
|
||||
want: `
|
||||
# HELP elasticsearch_clustersettings_stats_shard_allocation_enabled Current mode of cluster wide shard routing allocation settings.
|
||||
# TYPE elasticsearch_clustersettings_stats_shard_allocation_enabled gauge
|
||||
elasticsearch_clustersettings_stats_shard_allocation_enabled 0
|
||||
# HELP elasticsearch_clustersettings_allocation_threshold_enabled Is disk allocation decider enabled.
|
||||
# TYPE elasticsearch_clustersettings_allocation_threshold_enabled gauge
|
||||
elasticsearch_clustersettings_allocation_threshold_enabled 1
|
||||
# HELP elasticsearch_clustersettings_allocation_watermark_high_ratio High watermark for disk usage as a ratio.
|
||||
# TYPE elasticsearch_clustersettings_allocation_watermark_high_ratio gauge
|
||||
elasticsearch_clustersettings_allocation_watermark_high_ratio 0.9
|
||||
# HELP elasticsearch_clustersettings_allocation_watermark_low_ratio Low watermark for disk usage as a ratio.
|
||||
# TYPE elasticsearch_clustersettings_allocation_watermark_low_ratio gauge
|
||||
elasticsearch_clustersettings_allocation_watermark_low_ratio 0.85
|
||||
`,
|
||||
},
|
||||
|
||||
{
|
||||
name: "5.4.2-merge",
|
||||
file: "../fixtures/settings-merge-5.4.2.json",
|
||||
want: `
|
||||
# HELP elasticsearch_clustersettings_stats_shard_allocation_enabled Current mode of cluster wide shard routing allocation settings.
|
||||
# TYPE elasticsearch_clustersettings_stats_shard_allocation_enabled gauge
|
||||
elasticsearch_clustersettings_stats_shard_allocation_enabled 0
|
||||
# HELP elasticsearch_clustersettings_allocation_threshold_enabled Is disk allocation decider enabled.
|
||||
# TYPE elasticsearch_clustersettings_allocation_threshold_enabled gauge
|
||||
elasticsearch_clustersettings_allocation_threshold_enabled 0
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "7.3.0",
|
||||
file: "../fixtures/settings-7.3.0.json",
|
||||
want: `
|
||||
# HELP elasticsearch_clustersettings_stats_max_shards_per_node Current maximum number of shards per node setting.
|
||||
# TYPE elasticsearch_clustersettings_stats_max_shards_per_node gauge
|
||||
elasticsearch_clustersettings_stats_max_shards_per_node 1000
|
||||
# HELP elasticsearch_clustersettings_stats_shard_allocation_enabled Current mode of cluster wide shard routing allocation settings.
|
||||
# TYPE elasticsearch_clustersettings_stats_shard_allocation_enabled gauge
|
||||
elasticsearch_clustersettings_stats_shard_allocation_enabled 0
|
||||
# HELP elasticsearch_clustersettings_allocation_threshold_enabled Is disk allocation decider enabled.
|
||||
# TYPE elasticsearch_clustersettings_allocation_threshold_enabled gauge
|
||||
elasticsearch_clustersettings_allocation_threshold_enabled 0
|
||||
# HELP elasticsearch_clustersettings_allocation_watermark_flood_stage_ratio Flood stage watermark as a ratio.
|
||||
# TYPE elasticsearch_clustersettings_allocation_watermark_flood_stage_ratio gauge
|
||||
elasticsearch_clustersettings_allocation_watermark_flood_stage_ratio 0.95
|
||||
# HELP elasticsearch_clustersettings_allocation_watermark_high_ratio High watermark for disk usage as a ratio.
|
||||
# TYPE elasticsearch_clustersettings_allocation_watermark_high_ratio gauge
|
||||
elasticsearch_clustersettings_allocation_watermark_high_ratio 0.9
|
||||
# HELP elasticsearch_clustersettings_allocation_watermark_low_ratio Low watermark for disk usage as a ratio.
|
||||
# TYPE elasticsearch_clustersettings_allocation_watermark_low_ratio gauge
|
||||
elasticsearch_clustersettings_allocation_watermark_low_ratio 0.85
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "7.17.5-persistent-clustermaxshardspernode",
|
||||
file: "../fixtures/settings-persistent-clustermaxshardspernode-7.17.5.json",
|
||||
want: `
|
||||
# HELP elasticsearch_clustersettings_stats_max_shards_per_node Current maximum number of shards per node setting.
|
||||
# TYPE elasticsearch_clustersettings_stats_max_shards_per_node gauge
|
||||
elasticsearch_clustersettings_stats_max_shards_per_node 1000
|
||||
# HELP elasticsearch_clustersettings_stats_shard_allocation_enabled Current mode of cluster wide shard routing allocation settings.
|
||||
# TYPE elasticsearch_clustersettings_stats_shard_allocation_enabled gauge
|
||||
elasticsearch_clustersettings_stats_shard_allocation_enabled 0
|
||||
# HELP elasticsearch_clustersettings_allocation_threshold_enabled Is disk allocation decider enabled.
|
||||
# TYPE elasticsearch_clustersettings_allocation_threshold_enabled gauge
|
||||
elasticsearch_clustersettings_allocation_threshold_enabled 1
|
||||
# HELP elasticsearch_clustersettings_allocation_watermark_flood_stage_bytes Flood stage watermark as in bytes.
|
||||
# TYPE elasticsearch_clustersettings_allocation_watermark_flood_stage_bytes gauge
|
||||
elasticsearch_clustersettings_allocation_watermark_flood_stage_bytes 100
|
||||
# HELP elasticsearch_clustersettings_allocation_watermark_high_bytes High watermark for disk usage in bytes.
|
||||
# TYPE elasticsearch_clustersettings_allocation_watermark_high_bytes gauge
|
||||
elasticsearch_clustersettings_allocation_watermark_high_bytes 2.147483648e+11
|
||||
# HELP elasticsearch_clustersettings_allocation_watermark_low_bytes Low watermark for disk usage in bytes.
|
||||
# TYPE elasticsearch_clustersettings_allocation_watermark_low_bytes gauge
|
||||
elasticsearch_clustersettings_allocation_watermark_low_bytes 5.24288e+07
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "8.9.1-persistent-watermark-percent",
|
||||
file: "../fixtures/settings-8.9.1-watermark.json",
|
||||
want: `
|
||||
# HELP elasticsearch_clustersettings_stats_max_shards_per_node Current maximum number of shards per node setting.
|
||||
# TYPE elasticsearch_clustersettings_stats_max_shards_per_node gauge
|
||||
elasticsearch_clustersettings_stats_max_shards_per_node 1000
|
||||
# HELP elasticsearch_clustersettings_stats_shard_allocation_enabled Current mode of cluster wide shard routing allocation settings.
|
||||
# TYPE elasticsearch_clustersettings_stats_shard_allocation_enabled gauge
|
||||
elasticsearch_clustersettings_stats_shard_allocation_enabled 0
|
||||
# HELP elasticsearch_clustersettings_allocation_threshold_enabled Is disk allocation decider enabled.
|
||||
# TYPE elasticsearch_clustersettings_allocation_threshold_enabled gauge
|
||||
elasticsearch_clustersettings_allocation_threshold_enabled 1
|
||||
# HELP elasticsearch_clustersettings_allocation_watermark_flood_stage_ratio Flood stage watermark as a ratio.
|
||||
# TYPE elasticsearch_clustersettings_allocation_watermark_flood_stage_ratio gauge
|
||||
elasticsearch_clustersettings_allocation_watermark_flood_stage_ratio 0.96
|
||||
# HELP elasticsearch_clustersettings_allocation_watermark_high_ratio High watermark for disk usage as a ratio.
|
||||
# TYPE elasticsearch_clustersettings_allocation_watermark_high_ratio gauge
|
||||
elasticsearch_clustersettings_allocation_watermark_high_ratio 0.92
|
||||
# HELP elasticsearch_clustersettings_allocation_watermark_low_ratio Low watermark for disk usage as a ratio.
|
||||
# TYPE elasticsearch_clustersettings_allocation_watermark_low_ratio gauge
|
||||
elasticsearch_clustersettings_allocation_watermark_low_ratio 0.88
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
f, err := os.Open(tt.file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
io.Copy(w, f)
|
||||
}),
|
||||
} {
|
||||
ts := httptest.NewServer(handler)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse URL: %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
c := NewClusterSettings(log.NewNopLogger(), http.DefaultClient, u)
|
||||
nsr, err := c.fetchAndDecodeClusterSettingsStats()
|
||||
|
||||
c, err := NewClusterSettings(promslog.NewNopLogger(), u, http.DefaultClient)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to fetch or decode cluster settings stats: %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Logf("[%s/%s] Cluster Settings Stats Response: %+v", hn, filename, nsr)
|
||||
if nsr.Cluster.Routing.Allocation.Enabled != "ALL" {
|
||||
t.Errorf("Wrong setting for cluster routing allocation enabled")
|
||||
|
||||
if err := testutil.CollectAndCompare(wrapCollector{c}, strings.NewReader(tt.want)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if nsr.Cluster.MaxShardsPerNode != "" {
|
||||
t.Errorf("MaxShardsPerNode should be empty on older releases")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestClusterMaxShardsPerNode(t *testing.T) {
|
||||
// Testcases created using:
|
||||
// docker run -d -p 9200:9200 elasticsearch:VERSION-alpine
|
||||
// curl http://localhost:9200/_cluster/settings/?include_defaults=true
|
||||
files := []string{"../fixtures/settings-7.3.0.json"}
|
||||
for _, filename := range files {
|
||||
f, _ := os.Open(filename)
|
||||
defer f.Close()
|
||||
for hn, handler := range map[string]http.Handler{
|
||||
"plain": http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
io.Copy(w, f)
|
||||
}),
|
||||
} {
|
||||
ts := httptest.NewServer(handler)
|
||||
defer ts.Close()
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse URL: %s", err)
|
||||
func Test_getValueInBytes(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want float64
|
||||
wantErr bool
|
||||
}{
|
||||
{name: "Bytes", input: "100b", want: 100},
|
||||
{name: "Kibibytes", input: "200kb", want: 204800},
|
||||
{name: "Mebibytes", input: "300mb", want: 314572800},
|
||||
{name: "Gibibytes", input: "400gb", want: 429496729600},
|
||||
{name: "Tebibytes", input: "500tb", want: 549755813888000},
|
||||
{name: "Pebibytes", input: "600pb", want: 675539944105574400},
|
||||
{name: "Unknown", input: "9ab", wantErr: true},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := getValueInBytes(tt.input)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("getValueInBytes() error = %v, wantErr %v", err, tt.wantErr)
|
||||
}
|
||||
c := NewClusterSettings(log.NewNopLogger(), http.DefaultClient, u)
|
||||
nsr, err := c.fetchAndDecodeClusterSettingsStats()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to fetch or decode cluster settings stats: %s", err)
|
||||
|
||||
if got != tt.want {
|
||||
t.Errorf("getValueInBytes() = %v, want %v", got, tt.want)
|
||||
}
|
||||
t.Logf("[%s/%s] Cluster Settings Stats Response: %+v", hn, filename, nsr)
|
||||
if nsr.Cluster.MaxShardsPerNode != "1000" {
|
||||
t.Errorf("Wrong value for MaxShardsPerNode")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_getValueAsRatio(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want float64
|
||||
wantErr bool
|
||||
}{
|
||||
{name: "Ratio", input: "0.5", want: 0.5},
|
||||
{name: "Percentage", input: "50%", want: 0.5},
|
||||
{name: "Invalid", input: "500b", wantErr: true},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := getValueAsRatio(tt.input)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Fatalf("getValueAsRatio() error = %v, wantErr %v", err, tt.wantErr)
|
||||
}
|
||||
|
||||
if got != tt.want {
|
||||
t.Errorf("getValueAsRatio() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2022 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -18,26 +18,25 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/go-kit/log/level"
|
||||
"github.com/alecthomas/kingpin/v2"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"gopkg.in/alecthomas/kingpin.v2"
|
||||
)
|
||||
|
||||
const (
|
||||
// Namespace defines the common namespace to be used by all metrics.
|
||||
namespace = "elasticsearch"
|
||||
|
||||
defaultEnabled = true
|
||||
// defaultDisabled = false
|
||||
defaultEnabled = true
|
||||
defaultDisabled = false
|
||||
)
|
||||
|
||||
type factoryFunc func(logger log.Logger, u *url.URL, hc *http.Client) (Collector, error)
|
||||
type factoryFunc func(logger *slog.Logger, u *url.URL, hc *http.Client) (Collector, error)
|
||||
|
||||
var (
|
||||
factories = make(map[string]factoryFunc)
|
||||
@ -90,7 +89,7 @@ func registerCollector(name string, isDefaultEnabled bool, createFunc factoryFun
|
||||
|
||||
type ElasticsearchCollector struct {
|
||||
Collectors map[string]Collector
|
||||
logger log.Logger
|
||||
logger *slog.Logger
|
||||
esURL *url.URL
|
||||
httpClient *http.Client
|
||||
}
|
||||
@ -98,7 +97,7 @@ type ElasticsearchCollector struct {
|
||||
type Option func(*ElasticsearchCollector) error
|
||||
|
||||
// NewElasticsearchCollector creates a new ElasticsearchCollector
|
||||
func NewElasticsearchCollector(logger log.Logger, filters []string, options ...Option) (*ElasticsearchCollector, error) {
|
||||
func NewElasticsearchCollector(logger *slog.Logger, filters []string, options ...Option) (*ElasticsearchCollector, error) {
|
||||
e := &ElasticsearchCollector{logger: logger}
|
||||
// Apply options to customize the collector
|
||||
for _, o := range options {
|
||||
@ -128,7 +127,7 @@ func NewElasticsearchCollector(logger log.Logger, filters []string, options ...O
|
||||
if collector, ok := initiatedCollectors[key]; ok {
|
||||
collectors[key] = collector
|
||||
} else {
|
||||
collector, err := factories[key](log.With(logger, "collector", key), e.esURL, e.httpClient)
|
||||
collector, err := factories[key](logger.With("collector", key), e.esURL, e.httpClient)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -176,7 +175,7 @@ func (e ElasticsearchCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func execute(ctx context.Context, name string, c Collector, ch chan<- prometheus.Metric, logger log.Logger) {
|
||||
func execute(ctx context.Context, name string, c Collector, ch chan<- prometheus.Metric, logger *slog.Logger) {
|
||||
begin := time.Now()
|
||||
err := c.Update(ctx, ch)
|
||||
duration := time.Since(begin)
|
||||
@ -184,13 +183,13 @@ func execute(ctx context.Context, name string, c Collector, ch chan<- prometheus
|
||||
|
||||
if err != nil {
|
||||
if IsNoDataError(err) {
|
||||
_ = level.Debug(logger).Log("msg", "collector returned no data", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
||||
logger.Debug("collector returned no data", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
||||
} else {
|
||||
_ = level.Error(logger).Log("msg", "collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
||||
logger.Warn("collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err)
|
||||
}
|
||||
success = 0
|
||||
} else {
|
||||
_ = level.Debug(logger).Log("msg", "collector succeeded", "name", name, "duration_seconds", duration.Seconds())
|
||||
logger.Debug("collector succeeded", "name", name, "duration_seconds", duration.Seconds())
|
||||
success = 1
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -13,12 +13,24 @@
|
||||
|
||||
package collector
|
||||
|
||||
var testElasticsearchVersions = []string{
|
||||
"5.4.2",
|
||||
"5.6.16",
|
||||
"6.5.4",
|
||||
"6.8.8",
|
||||
"7.3.0",
|
||||
"7.6.2",
|
||||
"7.13.1",
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// wrapCollector is a util to let you test your Collector implementation.
|
||||
//
|
||||
// Use this with prometheus/client_golang/prometheus/testutil to test metric output, for example:
|
||||
//
|
||||
// testutil.CollectAndCompare(wrapCollector{c}, strings.NewReader(want))
|
||||
type wrapCollector struct {
|
||||
c Collector
|
||||
}
|
||||
|
||||
func (w wrapCollector) Describe(_ chan<- *prometheus.Desc) {
|
||||
}
|
||||
|
||||
func (w wrapCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
w.c.Update(context.Background(), ch)
|
||||
}
|
||||
119
collector/data_stream.go
Normal file
119
collector/data_stream.go
Normal file
@ -0,0 +1,119 @@
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
dataStreamBackingIndicesTotal = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "data_stream", "backing_indices_total"),
|
||||
"Number of backing indices",
|
||||
[]string{"data_stream"},
|
||||
nil,
|
||||
)
|
||||
dataStreamStoreSizeBytes = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "data_stream", "store_size_bytes"),
|
||||
"Store size of data stream",
|
||||
[]string{"data_stream"},
|
||||
nil,
|
||||
)
|
||||
)
|
||||
|
||||
func init() {
|
||||
registerCollector("data-stream", defaultDisabled, NewDataStream)
|
||||
}
|
||||
|
||||
// DataStream Information Struct
|
||||
type DataStream struct {
|
||||
logger *slog.Logger
|
||||
hc *http.Client
|
||||
u *url.URL
|
||||
}
|
||||
|
||||
// NewDataStream defines DataStream Prometheus metrics
|
||||
func NewDataStream(logger *slog.Logger, u *url.URL, hc *http.Client) (Collector, error) {
|
||||
return &DataStream{
|
||||
logger: logger,
|
||||
hc: hc,
|
||||
u: u,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// DataStreamStatsResponse is a representation of the Data Stream stats
|
||||
type DataStreamStatsResponse struct {
|
||||
Shards DataStreamStatsShards `json:"_shards"`
|
||||
DataStreamCount int64 `json:"data_stream_count"`
|
||||
BackingIndices int64 `json:"backing_indices"`
|
||||
TotalStoreSizeBytes int64 `json:"total_store_size_bytes"`
|
||||
DataStreamStats []DataStreamStatsDataStream `json:"data_streams"`
|
||||
}
|
||||
|
||||
// DataStreamStatsShards defines data stream stats shards information structure
|
||||
type DataStreamStatsShards struct {
|
||||
Total int64 `json:"total"`
|
||||
Successful int64 `json:"successful"`
|
||||
Failed int64 `json:"failed"`
|
||||
}
|
||||
|
||||
// DataStreamStatsDataStream defines the structure of per data stream stats
|
||||
type DataStreamStatsDataStream struct {
|
||||
DataStream string `json:"data_stream"`
|
||||
BackingIndices int64 `json:"backing_indices"`
|
||||
StoreSizeBytes int64 `json:"store_size_bytes"`
|
||||
MaximumTimestamp int64 `json:"maximum_timestamp"`
|
||||
}
|
||||
|
||||
func (ds *DataStream) Update(ctx context.Context, ch chan<- prometheus.Metric) error {
|
||||
var dsr DataStreamStatsResponse
|
||||
|
||||
u := ds.u.ResolveReference(&url.URL{Path: "/_data_stream/*/_stats"})
|
||||
|
||||
resp, err := getURL(ctx, ds.hc, ds.logger, u.String())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(resp, &dsr); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, dataStream := range dsr.DataStreamStats {
|
||||
fmt.Printf("Metric: %+v", dataStream)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
dataStreamBackingIndicesTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(dataStream.BackingIndices),
|
||||
dataStream.DataStream,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
dataStreamStoreSizeBytes,
|
||||
prometheus.CounterValue,
|
||||
float64(dataStream.StoreSizeBytes),
|
||||
dataStream.DataStream,
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
77
collector/data_stream_test.go
Normal file
77
collector/data_stream_test.go
Normal file
@ -0,0 +1,77 @@
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/promslog"
|
||||
)
|
||||
|
||||
func TestDataStream(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
file string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "7.15.0",
|
||||
file: "../fixtures/datastream/7.15.0.json",
|
||||
want: `# HELP elasticsearch_data_stream_backing_indices_total Number of backing indices
|
||||
# TYPE elasticsearch_data_stream_backing_indices_total counter
|
||||
elasticsearch_data_stream_backing_indices_total{data_stream="bar"} 2
|
||||
elasticsearch_data_stream_backing_indices_total{data_stream="foo"} 5
|
||||
# HELP elasticsearch_data_stream_store_size_bytes Store size of data stream
|
||||
# TYPE elasticsearch_data_stream_store_size_bytes counter
|
||||
elasticsearch_data_stream_store_size_bytes{data_stream="bar"} 6.7382272e+08
|
||||
elasticsearch_data_stream_store_size_bytes{data_stream="foo"} 4.29205396e+08
|
||||
`,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
f, err := os.Open(tt.file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
io.Copy(w, f)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
c, err := NewDataStream(promslog.NewNopLogger(), u, http.DefaultClient)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := testutil.CollectAndCompare(wrapCollector{c}, strings.NewReader(tt.want)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
472
collector/health_report.go
Normal file
472
collector/health_report.go
Normal file
@ -0,0 +1,472 @@
|
||||
// Copyright 2025 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
statusColors = []string{"green", "yellow", "red"}
|
||||
defaultHealthReportLabels = []string{"cluster"}
|
||||
)
|
||||
|
||||
var (
|
||||
healthReportTotalRepositories = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "total_repositories"),
|
||||
"The number of snapshot repositories",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportMaxShardsInClusterData = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "max_shards_in_cluster_data"),
|
||||
"The number of maximum shards in a cluster",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportMaxShardsInClusterFrozen = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "max_shards_in_cluster_frozen"),
|
||||
"The number of maximum frozen shards in a cluster",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportRestartingReplicas = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "restarting_replicas"),
|
||||
"The number of restarting replica shards",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportCreatingPrimaries = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "creating_primaries"),
|
||||
"The number of creating primary shards",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportInitializingReplicas = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "initializing_replicas"),
|
||||
"The number of initializing replica shards",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportUnassignedReplicas = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "unassigned_replicas"),
|
||||
"The number of unassigned replica shards",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportStartedPrimaries = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "started_primaries"),
|
||||
"The number of started primary shards",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportRestartingPrimaries = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "restarting_primaries"),
|
||||
"The number of restarting primary shards",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportInitializingPrimaries = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "initializing_primaries"),
|
||||
"The number of initializing primary shards",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportCreatingReplicas = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "creating_replicas"),
|
||||
"The number of creating replica shards",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportStartedReplicas = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "started_replicas"),
|
||||
"The number of started replica shards",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportUnassignedPrimaries = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "unassigned_primaries"),
|
||||
"The number of unassigned primary shards",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportSlmPolicies = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "slm_policies"),
|
||||
"The number of SLM policies",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportIlmPolicies = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "ilm_policies"),
|
||||
"The number of ILM Policies",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportIlmStagnatingIndices = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "ilm_stagnating_indices"),
|
||||
"The number of stagnating indices",
|
||||
defaultHealthReportLabels, nil,
|
||||
)
|
||||
healthReportStatus = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "status"),
|
||||
"Overall cluster status",
|
||||
[]string{"cluster", "color"}, nil,
|
||||
)
|
||||
healthReportMasterIsStableStatus = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "master_is_stable_status"),
|
||||
"Master is stable status",
|
||||
[]string{"cluster", "color"}, nil,
|
||||
)
|
||||
healthReportRepositoryIntegrityStatus = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "repository_integrity_status"),
|
||||
"Repository integrity status",
|
||||
[]string{"cluster", "color"}, nil,
|
||||
)
|
||||
healthReportDiskStatus = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "disk_status"),
|
||||
"Disk status",
|
||||
[]string{"cluster", "color"}, nil,
|
||||
)
|
||||
healthReportShardsCapacityStatus = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "shards_capacity_status"),
|
||||
"Shards capacity status",
|
||||
[]string{"cluster", "color"}, nil,
|
||||
)
|
||||
healthReportShardsAvailabiltystatus = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "shards_availabilty_status"),
|
||||
"Shards availabilty status",
|
||||
[]string{"cluster", "color"}, nil,
|
||||
)
|
||||
healthReportDataStreamLifecycleStatus = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "data_stream_lifecycle_status"),
|
||||
"Data stream lifecycle status",
|
||||
[]string{"cluster", "color"}, nil,
|
||||
)
|
||||
healthReportSlmStatus = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "slm_status"),
|
||||
"SLM status",
|
||||
[]string{"cluster", "color"}, nil,
|
||||
)
|
||||
healthReportIlmStatus = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "health_report", "ilm_status"),
|
||||
"ILM status",
|
||||
[]string{"cluster", "color"}, nil,
|
||||
)
|
||||
)
|
||||
|
||||
func init() {
|
||||
registerCollector("health-report", defaultDisabled, NewHealthReport)
|
||||
}
|
||||
|
||||
type HealthReport struct {
|
||||
logger *slog.Logger
|
||||
client *http.Client
|
||||
url *url.URL
|
||||
}
|
||||
|
||||
func NewHealthReport(logger *slog.Logger, url *url.URL, client *http.Client) (Collector, error) {
|
||||
return &HealthReport{
|
||||
logger: logger,
|
||||
client: client,
|
||||
url: url,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type HealthReportResponse struct {
|
||||
ClusterName string `json:"cluster_name"`
|
||||
Status string `json:"status"`
|
||||
Indicators HealthReportIndicators `json:"indicators"`
|
||||
}
|
||||
|
||||
type HealthReportIndicators struct {
|
||||
MasterIsStable HealthReportMasterIsStable `json:"master_is_stable"`
|
||||
RepositoryIntegrity HealthReportRepositoryIntegrity `json:"repository_integrity"`
|
||||
Disk HealthReportDisk `json:"disk"`
|
||||
ShardsCapacity HealthReportShardsCapacity `json:"shards_capacity"`
|
||||
ShardsAvailability HealthReportShardsAvailability `json:"shards_availability"`
|
||||
DataStreamLifecycle HealthReportDataStreamLifecycle `json:"data_stream_lifecycle"`
|
||||
Slm HealthReportSlm `json:"slm"`
|
||||
Ilm HealthReportIlm `json:"ilm"`
|
||||
}
|
||||
|
||||
type HealthReportMasterIsStable struct {
|
||||
Status string `json:"status"`
|
||||
Symptom string `json:"symptom"`
|
||||
Details HealthReportMasterIsStableDetails `json:"details"`
|
||||
}
|
||||
|
||||
type HealthReportMasterIsStableDetails struct {
|
||||
CurrentMaster HealthReportMasterIsStableDetailsNode `json:"current_master"`
|
||||
RecentMasters []HealthReportMasterIsStableDetailsNode `json:"recent_masters"`
|
||||
}
|
||||
|
||||
type HealthReportMasterIsStableDetailsNode struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
type HealthReportRepositoryIntegrity struct {
|
||||
Status string `json:"status"`
|
||||
Symptom string `json:"symptom"`
|
||||
Details HealthReportRepositoriyIntegrityDetails `json:"details"`
|
||||
}
|
||||
|
||||
type HealthReportRepositoriyIntegrityDetails struct {
|
||||
TotalRepositories int `json:"total_repositories"`
|
||||
}
|
||||
|
||||
type HealthReportDisk struct {
|
||||
Status string `json:"status"`
|
||||
Symptom string `json:"symptom"`
|
||||
Details HealthReportDiskDetails `json:"details"`
|
||||
}
|
||||
|
||||
type HealthReportDiskDetails struct {
|
||||
IndicesWithReadonlyBlock int `json:"indices_with_readonly_block"`
|
||||
NodesWithEnoughDiskSpace int `json:"nodes_with_enough_disk_space"`
|
||||
NodesWithUnknownDiskStatus int `json:"nodes_with_unknown_disk_status"`
|
||||
NodesOverHighWatermark int `json:"nodes_over_high_watermark"`
|
||||
NodesOverFloodStageWatermark int `json:"nodes_over_flood_stage_watermark"`
|
||||
}
|
||||
|
||||
type HealthReportShardsCapacity struct {
|
||||
Status string `json:"status"`
|
||||
Symptom string `json:"symptom"`
|
||||
Details HealthReportShardsCapacityDetails `json:"details"`
|
||||
}
|
||||
|
||||
type HealthReportShardsCapacityDetails struct {
|
||||
Data HealthReportShardsCapacityDetailsMaxShards `json:"data"`
|
||||
Frozen HealthReportShardsCapacityDetailsMaxShards `json:"frozen"`
|
||||
}
|
||||
|
||||
type HealthReportShardsCapacityDetailsMaxShards struct {
|
||||
MaxShardsInCluster int `json:"max_shards_in_cluster"`
|
||||
}
|
||||
|
||||
type HealthReportShardsAvailability struct {
|
||||
Status string `json:"status"`
|
||||
Symptom string `json:"symptom"`
|
||||
Details HealthReportShardsAvailabilityDetails `json:"details"`
|
||||
}
|
||||
|
||||
type HealthReportShardsAvailabilityDetails struct {
|
||||
RestartingReplicas int `json:"restarting_replicas"`
|
||||
CreatingPrimaries int `json:"creating_primaries"`
|
||||
InitializingReplicas int `json:"initializing_replicas"`
|
||||
UnassignedReplicas int `json:"unassigned_replicas"`
|
||||
StartedPrimaries int `json:"started_primaries"`
|
||||
RestartingPrimaries int `json:"restarting_primaries"`
|
||||
InitializingPrimaries int `json:"initializing_primaries"`
|
||||
CreatingReplicas int `json:"creating_replicas"`
|
||||
StartedReplicas int `json:"started_replicas"`
|
||||
UnassignedPrimaries int `json:"unassigned_primaries"`
|
||||
}
|
||||
|
||||
type HealthReportDataStreamLifecycle struct {
|
||||
Status string `json:"status"`
|
||||
Symptom string `json:"symptom"`
|
||||
}
|
||||
|
||||
type HealthReportSlm struct {
|
||||
Status string `json:"status"`
|
||||
Symptom string `json:"symptom"`
|
||||
Details HealthReportSlmDetails `json:"details"`
|
||||
}
|
||||
|
||||
type HealthReportSlmDetails struct {
|
||||
SlmStatus string `json:"slm_status"`
|
||||
Policies int `json:"policies"`
|
||||
}
|
||||
|
||||
type HealthReportIlm struct {
|
||||
Status string `json:"status"`
|
||||
Symptom string `json:"symptom"`
|
||||
Details HealthReportIlmDetails `json:"details"`
|
||||
}
|
||||
|
||||
type HealthReportIlmDetails struct {
|
||||
Policies int `json:"policies"`
|
||||
StagnatingIndices int `json:"stagnating_indices"`
|
||||
IlmStatus string `json:"ilm_status"`
|
||||
}
|
||||
|
||||
func statusValue(value string, color string) float64 {
|
||||
if value == color {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (c *HealthReport) Update(ctx context.Context, ch chan<- prometheus.Metric) error {
|
||||
u := c.url.ResolveReference(&url.URL{Path: "/_health_report"})
|
||||
var healthReportResponse HealthReportResponse
|
||||
|
||||
resp, err := getURL(ctx, c.client, c.logger, u.String())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = json.Unmarshal(resp, &healthReportResponse)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportTotalRepositories,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.RepositoryIntegrity.Details.TotalRepositories),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportMaxShardsInClusterData,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.ShardsCapacity.Details.Data.MaxShardsInCluster),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportMaxShardsInClusterFrozen,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.ShardsCapacity.Details.Frozen.MaxShardsInCluster),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportRestartingReplicas,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.ShardsAvailability.Details.RestartingReplicas),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportCreatingPrimaries,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.ShardsAvailability.Details.CreatingPrimaries),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportInitializingReplicas,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.ShardsAvailability.Details.InitializingReplicas),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportUnassignedReplicas,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.ShardsAvailability.Details.UnassignedReplicas),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportStartedPrimaries,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.ShardsAvailability.Details.StartedPrimaries),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportRestartingPrimaries,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.ShardsAvailability.Details.RestartingPrimaries),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportInitializingPrimaries,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.ShardsAvailability.Details.InitializingPrimaries),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportCreatingReplicas,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.ShardsAvailability.Details.CreatingReplicas),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportStartedReplicas,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.ShardsAvailability.Details.StartedReplicas),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportUnassignedPrimaries,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.ShardsAvailability.Details.UnassignedPrimaries),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportSlmPolicies,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.Slm.Details.Policies),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportIlmPolicies,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.Ilm.Details.Policies),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportIlmStagnatingIndices,
|
||||
prometheus.GaugeValue,
|
||||
float64(healthReportResponse.Indicators.Ilm.Details.StagnatingIndices),
|
||||
healthReportResponse.ClusterName,
|
||||
)
|
||||
|
||||
for _, color := range statusColors {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportStatus,
|
||||
prometheus.GaugeValue,
|
||||
statusValue(healthReportResponse.Status, color),
|
||||
healthReportResponse.ClusterName, color,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportMasterIsStableStatus,
|
||||
prometheus.GaugeValue,
|
||||
statusValue(healthReportResponse.Indicators.MasterIsStable.Status, color),
|
||||
healthReportResponse.ClusterName, color,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportRepositoryIntegrityStatus,
|
||||
prometheus.GaugeValue,
|
||||
statusValue(healthReportResponse.Indicators.RepositoryIntegrity.Status, color),
|
||||
healthReportResponse.ClusterName, color,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportDiskStatus,
|
||||
prometheus.GaugeValue,
|
||||
statusValue(healthReportResponse.Indicators.Disk.Status, color),
|
||||
healthReportResponse.ClusterName, color,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportShardsCapacityStatus,
|
||||
prometheus.GaugeValue,
|
||||
statusValue(healthReportResponse.Indicators.ShardsCapacity.Status, color),
|
||||
healthReportResponse.ClusterName, color,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportShardsAvailabiltystatus,
|
||||
prometheus.GaugeValue,
|
||||
statusValue(healthReportResponse.Indicators.ShardsAvailability.Status, color),
|
||||
healthReportResponse.ClusterName, color,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportDataStreamLifecycleStatus,
|
||||
prometheus.GaugeValue,
|
||||
statusValue(healthReportResponse.Indicators.DataStreamLifecycle.Status, color),
|
||||
healthReportResponse.ClusterName, color,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportSlmStatus,
|
||||
prometheus.GaugeValue,
|
||||
statusValue(healthReportResponse.Indicators.Slm.Status, color),
|
||||
healthReportResponse.ClusterName, color,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
healthReportIlmStatus,
|
||||
prometheus.GaugeValue,
|
||||
statusValue(healthReportResponse.Indicators.Ilm.Status, color),
|
||||
healthReportResponse.ClusterName, color,
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
169
collector/health_report_test.go
Normal file
169
collector/health_report_test.go
Normal file
@ -0,0 +1,169 @@
|
||||
// Copyright 2025 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/promslog"
|
||||
)
|
||||
|
||||
func TestHealthReport(t *testing.T) {
|
||||
// Testcases created using:
|
||||
// docker run -d -p 9200:9200 elasticsearch:VERSION
|
||||
// curl -XPUT http://localhost:9200/twitter
|
||||
// curl http://localhost:9200/_health_report
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
file string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "8.7.0",
|
||||
file: "../fixtures/healthreport/8.7.0.json",
|
||||
want: `
|
||||
# HELP elasticsearch_health_report_creating_primaries The number of creating primary shards
|
||||
# TYPE elasticsearch_health_report_creating_primaries gauge
|
||||
elasticsearch_health_report_creating_primaries{cluster="docker-cluster"} 0
|
||||
# HELP elasticsearch_health_report_creating_replicas The number of creating replica shards
|
||||
# TYPE elasticsearch_health_report_creating_replicas gauge
|
||||
elasticsearch_health_report_creating_replicas{cluster="docker-cluster"} 0
|
||||
# HELP elasticsearch_health_report_data_stream_lifecycle_status Data stream lifecycle status
|
||||
# TYPE elasticsearch_health_report_data_stream_lifecycle_status gauge
|
||||
elasticsearch_health_report_data_stream_lifecycle_status{cluster="docker-cluster",color="green"} 1
|
||||
elasticsearch_health_report_data_stream_lifecycle_status{cluster="docker-cluster",color="red"} 0
|
||||
elasticsearch_health_report_data_stream_lifecycle_status{cluster="docker-cluster",color="yellow"} 0
|
||||
# HELP elasticsearch_health_report_disk_status Disk status
|
||||
# TYPE elasticsearch_health_report_disk_status gauge
|
||||
elasticsearch_health_report_disk_status{cluster="docker-cluster",color="green"} 1
|
||||
elasticsearch_health_report_disk_status{cluster="docker-cluster",color="red"} 0
|
||||
elasticsearch_health_report_disk_status{cluster="docker-cluster",color="yellow"} 0
|
||||
# HELP elasticsearch_health_report_ilm_policies The number of ILM Policies
|
||||
# TYPE elasticsearch_health_report_ilm_policies gauge
|
||||
elasticsearch_health_report_ilm_policies{cluster="docker-cluster"} 17
|
||||
# HELP elasticsearch_health_report_ilm_stagnating_indices The number of stagnating indices
|
||||
# TYPE elasticsearch_health_report_ilm_stagnating_indices gauge
|
||||
elasticsearch_health_report_ilm_stagnating_indices{cluster="docker-cluster"} 0
|
||||
# HELP elasticsearch_health_report_ilm_status ILM status
|
||||
# TYPE elasticsearch_health_report_ilm_status gauge
|
||||
elasticsearch_health_report_ilm_status{cluster="docker-cluster",color="green"} 1
|
||||
elasticsearch_health_report_ilm_status{cluster="docker-cluster",color="red"} 0
|
||||
elasticsearch_health_report_ilm_status{cluster="docker-cluster",color="yellow"} 0
|
||||
# HELP elasticsearch_health_report_initializing_primaries The number of initializing primary shards
|
||||
# TYPE elasticsearch_health_report_initializing_primaries gauge
|
||||
elasticsearch_health_report_initializing_primaries{cluster="docker-cluster"} 0
|
||||
# HELP elasticsearch_health_report_initializing_replicas The number of initializing replica shards
|
||||
# TYPE elasticsearch_health_report_initializing_replicas gauge
|
||||
elasticsearch_health_report_initializing_replicas{cluster="docker-cluster"} 0
|
||||
# HELP elasticsearch_health_report_master_is_stable_status Master is stable status
|
||||
# TYPE elasticsearch_health_report_master_is_stable_status gauge
|
||||
elasticsearch_health_report_master_is_stable_status{cluster="docker-cluster",color="green"} 1
|
||||
elasticsearch_health_report_master_is_stable_status{cluster="docker-cluster",color="red"} 0
|
||||
elasticsearch_health_report_master_is_stable_status{cluster="docker-cluster",color="yellow"} 0
|
||||
# HELP elasticsearch_health_report_max_shards_in_cluster_data The number of maximum shards in a cluster
|
||||
# TYPE elasticsearch_health_report_max_shards_in_cluster_data gauge
|
||||
elasticsearch_health_report_max_shards_in_cluster_data{cluster="docker-cluster"} 13500
|
||||
# HELP elasticsearch_health_report_max_shards_in_cluster_frozen The number of maximum frozen shards in a cluster
|
||||
# TYPE elasticsearch_health_report_max_shards_in_cluster_frozen gauge
|
||||
elasticsearch_health_report_max_shards_in_cluster_frozen{cluster="docker-cluster"} 9000
|
||||
# HELP elasticsearch_health_report_repository_integrity_status Repository integrity status
|
||||
# TYPE elasticsearch_health_report_repository_integrity_status gauge
|
||||
elasticsearch_health_report_repository_integrity_status{cluster="docker-cluster",color="green"} 1
|
||||
elasticsearch_health_report_repository_integrity_status{cluster="docker-cluster",color="red"} 0
|
||||
elasticsearch_health_report_repository_integrity_status{cluster="docker-cluster",color="yellow"} 0
|
||||
# HELP elasticsearch_health_report_restarting_primaries The number of restarting primary shards
|
||||
# TYPE elasticsearch_health_report_restarting_primaries gauge
|
||||
elasticsearch_health_report_restarting_primaries{cluster="docker-cluster"} 0
|
||||
# HELP elasticsearch_health_report_restarting_replicas The number of restarting replica shards
|
||||
# TYPE elasticsearch_health_report_restarting_replicas gauge
|
||||
elasticsearch_health_report_restarting_replicas{cluster="docker-cluster"} 0
|
||||
# HELP elasticsearch_health_report_shards_availabilty_status Shards availabilty status
|
||||
# TYPE elasticsearch_health_report_shards_availabilty_status gauge
|
||||
elasticsearch_health_report_shards_availabilty_status{cluster="docker-cluster",color="green"} 1
|
||||
elasticsearch_health_report_shards_availabilty_status{cluster="docker-cluster",color="red"} 0
|
||||
elasticsearch_health_report_shards_availabilty_status{cluster="docker-cluster",color="yellow"} 0
|
||||
# HELP elasticsearch_health_report_shards_capacity_status Shards capacity status
|
||||
# TYPE elasticsearch_health_report_shards_capacity_status gauge
|
||||
elasticsearch_health_report_shards_capacity_status{cluster="docker-cluster",color="green"} 1
|
||||
elasticsearch_health_report_shards_capacity_status{cluster="docker-cluster",color="red"} 0
|
||||
elasticsearch_health_report_shards_capacity_status{cluster="docker-cluster",color="yellow"} 0
|
||||
# HELP elasticsearch_health_report_slm_policies The number of SLM policies
|
||||
# TYPE elasticsearch_health_report_slm_policies gauge
|
||||
elasticsearch_health_report_slm_policies{cluster="docker-cluster"} 0
|
||||
# HELP elasticsearch_health_report_slm_status SLM status
|
||||
# TYPE elasticsearch_health_report_slm_status gauge
|
||||
elasticsearch_health_report_slm_status{cluster="docker-cluster",color="green"} 1
|
||||
elasticsearch_health_report_slm_status{cluster="docker-cluster",color="red"} 0
|
||||
elasticsearch_health_report_slm_status{cluster="docker-cluster",color="yellow"} 0
|
||||
# HELP elasticsearch_health_report_started_primaries The number of started primary shards
|
||||
# TYPE elasticsearch_health_report_started_primaries gauge
|
||||
elasticsearch_health_report_started_primaries{cluster="docker-cluster"} 11703
|
||||
# HELP elasticsearch_health_report_started_replicas The number of started replica shards
|
||||
# TYPE elasticsearch_health_report_started_replicas gauge
|
||||
elasticsearch_health_report_started_replicas{cluster="docker-cluster"} 1701
|
||||
# HELP elasticsearch_health_report_status Overall cluster status
|
||||
# TYPE elasticsearch_health_report_status gauge
|
||||
elasticsearch_health_report_status{cluster="docker-cluster",color="green"} 1
|
||||
elasticsearch_health_report_status{cluster="docker-cluster",color="red"} 0
|
||||
elasticsearch_health_report_status{cluster="docker-cluster",color="yellow"} 0
|
||||
# HELP elasticsearch_health_report_total_repositories The number of snapshot repositories
|
||||
# TYPE elasticsearch_health_report_total_repositories gauge
|
||||
elasticsearch_health_report_total_repositories{cluster="docker-cluster"} 1
|
||||
# HELP elasticsearch_health_report_unassigned_primaries The number of unassigned primary shards
|
||||
# TYPE elasticsearch_health_report_unassigned_primaries gauge
|
||||
elasticsearch_health_report_unassigned_primaries{cluster="docker-cluster"} 0
|
||||
# HELP elasticsearch_health_report_unassigned_replicas The number of unassigned replica shards
|
||||
# TYPE elasticsearch_health_report_unassigned_replicas gauge
|
||||
elasticsearch_health_report_unassigned_replicas{cluster="docker-cluster"} 0
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
f, err := os.Open(tt.file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
io.Copy(w, f)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
c, err := NewHealthReport(promslog.NewNopLogger(), u, http.DefaultClient)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := testutil.CollectAndCompare(wrapCollector{c}, strings.NewReader(tt.want)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
128
collector/ilm.go
Normal file
128
collector/ilm.go
Normal file
@ -0,0 +1,128 @@
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
ilmStatusOptions = []string{"STOPPED", "RUNNING", "STOPPING"}
|
||||
|
||||
ilmIndexStatus = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "ilm_index", "status"),
|
||||
"Status of ILM policy for index",
|
||||
[]string{"index", "phase", "action", "step"}, nil)
|
||||
|
||||
ilmStatus = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "ilm", "status"),
|
||||
"Current status of ILM. Status can be STOPPED, RUNNING, STOPPING.",
|
||||
[]string{"operation_mode"}, nil,
|
||||
)
|
||||
)
|
||||
|
||||
func init() {
|
||||
registerCollector("ilm", defaultDisabled, NewILM)
|
||||
}
|
||||
|
||||
type ILM struct {
|
||||
logger *slog.Logger
|
||||
hc *http.Client
|
||||
u *url.URL
|
||||
}
|
||||
|
||||
func NewILM(logger *slog.Logger, u *url.URL, hc *http.Client) (Collector, error) {
|
||||
return &ILM{
|
||||
logger: logger,
|
||||
hc: hc,
|
||||
u: u,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type IlmResponse struct {
|
||||
Indices map[string]IlmIndexResponse `json:"indices"`
|
||||
}
|
||||
|
||||
type IlmIndexResponse struct {
|
||||
Index string `json:"index"`
|
||||
Managed bool `json:"managed"`
|
||||
Phase string `json:"phase"`
|
||||
Action string `json:"action"`
|
||||
Step string `json:"step"`
|
||||
StepTimeMillis float64 `json:"step_time_millis"`
|
||||
}
|
||||
|
||||
type IlmStatusResponse struct {
|
||||
OperationMode string `json:"operation_mode"`
|
||||
}
|
||||
|
||||
func (i *ILM) Update(ctx context.Context, ch chan<- prometheus.Metric) error {
|
||||
var ir IlmResponse
|
||||
|
||||
indexURL := i.u.ResolveReference(&url.URL{Path: "/_all/_ilm/explain"})
|
||||
|
||||
indexResp, err := getURL(ctx, i.hc, i.logger, indexURL.String())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load ILM url: %w", err)
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(indexResp, &ir); err != nil {
|
||||
return fmt.Errorf("failed to decode JSON body: %w", err)
|
||||
}
|
||||
|
||||
var isr IlmStatusResponse
|
||||
|
||||
indexStatusURL := i.u.ResolveReference(&url.URL{Path: "/_ilm/status"})
|
||||
|
||||
indexStatusResp, err := getURL(ctx, i.hc, i.logger, indexStatusURL.String())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load ILM url: %w", err)
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(indexStatusResp, &isr); err != nil {
|
||||
return fmt.Errorf("failed to decode JSON body: %w", err)
|
||||
}
|
||||
|
||||
for name, ilm := range ir.Indices {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
ilmIndexStatus,
|
||||
prometheus.GaugeValue,
|
||||
bool2Float(ilm.Managed),
|
||||
name, ilm.Phase, ilm.Action, ilm.Step,
|
||||
)
|
||||
}
|
||||
|
||||
for _, status := range ilmStatusOptions {
|
||||
statusActive := false
|
||||
if isr.OperationMode == status {
|
||||
statusActive = true
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
ilmStatus,
|
||||
prometheus.GaugeValue,
|
||||
bool2Float(statusActive),
|
||||
status,
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
94
collector/ilm_test.go
Normal file
94
collector/ilm_test.go
Normal file
@ -0,0 +1,94 @@
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/promslog"
|
||||
)
|
||||
|
||||
func TestILM(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
file string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "6.6.0",
|
||||
file: "6.6.0.json",
|
||||
want: `
|
||||
# HELP elasticsearch_ilm_index_status Status of ILM policy for index
|
||||
# TYPE elasticsearch_ilm_index_status gauge
|
||||
elasticsearch_ilm_index_status{action="",index="twitter",phase="",step=""} 0
|
||||
elasticsearch_ilm_index_status{action="complete",index="facebook",phase="new",step="complete"} 1
|
||||
# HELP elasticsearch_ilm_status Current status of ILM. Status can be STOPPED, RUNNING, STOPPING.
|
||||
# TYPE elasticsearch_ilm_status gauge
|
||||
elasticsearch_ilm_status{operation_mode="RUNNING"} 1
|
||||
elasticsearch_ilm_status{operation_mode="STOPPED"} 0
|
||||
elasticsearch_ilm_status{operation_mode="STOPPING"} 0
|
||||
`,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
indexF, err := os.Open(path.Join("../fixtures/ilm_indices", tt.file))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer indexF.Close()
|
||||
|
||||
statusF, err := os.Open(path.Join("../fixtures/ilm_status", tt.file))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer statusF.Close()
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
sm := http.NewServeMux()
|
||||
sm.HandleFunc("/_all/_ilm/explain", func(w http.ResponseWriter, r *http.Request) {
|
||||
io.Copy(w, indexF)
|
||||
})
|
||||
sm.HandleFunc("/_ilm/status", func(w http.ResponseWriter, r *http.Request) {
|
||||
io.Copy(w, statusF)
|
||||
})
|
||||
|
||||
sm.ServeHTTP(w, r)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
c, err := NewILM(promslog.NewNopLogger(), u, http.DefaultClient)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := testutil.CollectAndCompare(wrapCollector{c}, strings.NewReader(tt.want)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
2347
collector/indices.go
2347
collector/indices.go
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -16,19 +16,16 @@ package collector
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/go-kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
defaultIndicesMappingsLabels = []string{"index"}
|
||||
)
|
||||
var defaultIndicesMappingsLabels = []string{"index"}
|
||||
|
||||
type indicesMappingsMetric struct {
|
||||
Type prometheus.ValueType
|
||||
@ -38,18 +35,15 @@ type indicesMappingsMetric struct {
|
||||
|
||||
// IndicesMappings information struct
|
||||
type IndicesMappings struct {
|
||||
logger log.Logger
|
||||
logger *slog.Logger
|
||||
client *http.Client
|
||||
url *url.URL
|
||||
|
||||
up prometheus.Gauge
|
||||
totalScrapes, jsonParseFailures prometheus.Counter
|
||||
|
||||
metrics []*indicesMappingsMetric
|
||||
}
|
||||
|
||||
// NewIndicesMappings defines Indices IndexMappings Prometheus metrics
|
||||
func NewIndicesMappings(logger log.Logger, client *http.Client, url *url.URL) *IndicesMappings {
|
||||
func NewIndicesMappings(logger *slog.Logger, client *http.Client, url *url.URL) *IndicesMappings {
|
||||
subsystem := "indices_mappings_stats"
|
||||
|
||||
return &IndicesMappings{
|
||||
@ -57,18 +51,6 @@ func NewIndicesMappings(logger log.Logger, client *http.Client, url *url.URL) *I
|
||||
client: client,
|
||||
url: url,
|
||||
|
||||
up: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: prometheus.BuildFQName(namespace, subsystem, "up"),
|
||||
Help: "Was the last scrape of the Elasticsearch Indices Mappings endpoint successful.",
|
||||
}),
|
||||
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, subsystem, "scrapes_total"),
|
||||
Help: "Current total Elasticsearch Indices Mappings scrapes.",
|
||||
}),
|
||||
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, subsystem, "json_parse_failures_total"),
|
||||
Help: "Number of errors while parsing JSON.",
|
||||
}),
|
||||
metrics: []*indicesMappingsMetric{
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
@ -88,8 +70,9 @@ func NewIndicesMappings(logger log.Logger, client *http.Client, url *url.URL) *I
|
||||
func countFieldsRecursive(properties IndexMappingProperties, fieldCounter float64) float64 {
|
||||
// iterate over all properties
|
||||
for _, property := range properties {
|
||||
if property.Type != nil {
|
||||
// property has a type set - counts as a field
|
||||
if property.Type != nil && *property.Type != "object" {
|
||||
// property has a type set - counts as a field unless the value is object
|
||||
// as the recursion below will handle counting that
|
||||
fieldCounter++
|
||||
|
||||
// iterate over all fields of that property
|
||||
@ -103,7 +86,7 @@ func countFieldsRecursive(properties IndexMappingProperties, fieldCounter float6
|
||||
|
||||
// count recursively in case the property has more properties
|
||||
if property.Properties != nil {
|
||||
fieldCounter = +countFieldsRecursive(property.Properties, fieldCounter)
|
||||
fieldCounter = 1 + countFieldsRecursive(property.Properties, fieldCounter)
|
||||
}
|
||||
}
|
||||
|
||||
@ -115,10 +98,6 @@ func (im *IndicesMappings) Describe(ch chan<- *prometheus.Desc) {
|
||||
for _, metric := range im.metrics {
|
||||
ch <- metric.Desc
|
||||
}
|
||||
|
||||
ch <- im.up.Desc()
|
||||
ch <- im.totalScrapes.Desc()
|
||||
ch <- im.jsonParseFailures.Desc()
|
||||
}
|
||||
|
||||
func (im *IndicesMappings) getAndParseURL(u *url.URL) (*IndicesMappingsResponse, error) {
|
||||
@ -132,21 +111,20 @@ func (im *IndicesMappings) getAndParseURL(u *url.URL) (*IndicesMappingsResponse,
|
||||
return nil, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(res.Body)
|
||||
body, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
_ = level.Warn(im.logger).Log("msg", "failed to read response body", "err", err)
|
||||
im.logger.Warn("failed to read response body", "err", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = res.Body.Close()
|
||||
if err != nil {
|
||||
_ = level.Warn(im.logger).Log("msg", "failed to close response body", "err", err)
|
||||
im.logger.Warn("failed to close response body", "err", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var imr IndicesMappingsResponse
|
||||
if err := json.Unmarshal(body, &imr); err != nil {
|
||||
im.jsonParseFailures.Inc()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -161,24 +139,14 @@ func (im *IndicesMappings) fetchAndDecodeIndicesMappings() (*IndicesMappingsResp
|
||||
|
||||
// Collect gets all indices mappings metric values
|
||||
func (im *IndicesMappings) Collect(ch chan<- prometheus.Metric) {
|
||||
|
||||
im.totalScrapes.Inc()
|
||||
defer func() {
|
||||
ch <- im.up
|
||||
ch <- im.totalScrapes
|
||||
ch <- im.jsonParseFailures
|
||||
}()
|
||||
|
||||
indicesMappingsResponse, err := im.fetchAndDecodeIndicesMappings()
|
||||
if err != nil {
|
||||
im.up.Set(0)
|
||||
_ = level.Warn(im.logger).Log(
|
||||
"msg", "failed to fetch and decode cluster mappings stats",
|
||||
im.logger.Warn(
|
||||
"failed to fetch and decode cluster mappings stats",
|
||||
"err", err,
|
||||
)
|
||||
return
|
||||
}
|
||||
im.up.Set(1)
|
||||
|
||||
for _, metric := range im.metrics {
|
||||
for indexName, mappings := range *indicesMappingsResponse {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -14,13 +14,16 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/promslog"
|
||||
)
|
||||
|
||||
func TestMapping(t *testing.T) {
|
||||
@ -66,96 +69,58 @@ func TestMapping(t *testing.T) {
|
||||
}
|
||||
}'*/
|
||||
// curl http://localhost:9200/_all/_mapping
|
||||
tcs := map[string]string{
|
||||
"7.8.0": `{
|
||||
"facebook": {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"contact": {
|
||||
"properties": {
|
||||
"email": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"raw": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
},
|
||||
"phone": {
|
||||
"type": "text"
|
||||
}
|
||||
}
|
||||
},
|
||||
"name": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"raw": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"twitter": {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"email": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"phone": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}`,
|
||||
tests := []struct {
|
||||
name string
|
||||
file string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "7.8.0",
|
||||
file: "../fixtures/indices_mappings/7.8.0.json",
|
||||
want: `
|
||||
# HELP elasticsearch_indices_mappings_stats_fields Current number fields within cluster.
|
||||
# TYPE elasticsearch_indices_mappings_stats_fields gauge
|
||||
elasticsearch_indices_mappings_stats_fields{index="facebook"} 6
|
||||
elasticsearch_indices_mappings_stats_fields{index="twitter"} 2
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "counts",
|
||||
file: "../fixtures/indices_mappings/counts.json",
|
||||
want: `
|
||||
# HELP elasticsearch_indices_mappings_stats_fields Current number fields within cluster.
|
||||
# TYPE elasticsearch_indices_mappings_stats_fields gauge
|
||||
elasticsearch_indices_mappings_stats_fields{index="test-data-2023.01.20"} 40
|
||||
`,
|
||||
},
|
||||
}
|
||||
for ver, out := range tcs {
|
||||
for hn, handler := range map[string]http.Handler{
|
||||
"plain": http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
fmt.Fprintln(w, out)
|
||||
}),
|
||||
} {
|
||||
ts := httptest.NewServer(handler)
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
f, err := os.Open(tt.file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
io.Copy(w, f)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse URL: %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
c := NewIndicesMappings(log.NewNopLogger(), http.DefaultClient, u)
|
||||
imr, err := c.fetchAndDecodeIndicesMappings()
|
||||
|
||||
c := NewIndicesMappings(promslog.NewNopLogger(), http.DefaultClient, u)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to fetch or decode indices mappings: %s", err)
|
||||
}
|
||||
t.Logf("[%s/%s] All Indices Mappings Response: %+v", hn, ver, imr)
|
||||
|
||||
response := *imr
|
||||
if *response["facebook"].Mappings.Properties["contact"].Properties["phone"].Type != "text" {
|
||||
t.Errorf("Marshalling error at facebook.contact.phone")
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if *response["facebook"].Mappings.Properties["contact"].Properties["email"].Fields["raw"].Type != "keyword" {
|
||||
t.Errorf("Marshalling error at facebook.contact.email.raw")
|
||||
if err := testutil.CollectAndCompare(c, strings.NewReader(tt.want)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if *response["facebook"].Mappings.Properties["name"].Type != "text" {
|
||||
t.Errorf("Marshalling error at facebook.name")
|
||||
}
|
||||
|
||||
if *response["facebook"].Mappings.Properties["name"].Fields["raw"].Type != "keyword" {
|
||||
t.Errorf("Marshalling error at facebook.name.raw")
|
||||
}
|
||||
|
||||
if *response["twitter"].Mappings.Properties["email"].Type != "keyword" {
|
||||
t.Errorf("Marshalling error at twitter.email")
|
||||
}
|
||||
|
||||
if *response["twitter"].Mappings.Properties["phone"].Type != "keyword" {
|
||||
t.Errorf("Marshalling error at twitter.phone")
|
||||
}
|
||||
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -89,16 +89,17 @@ type IndexStatsIndexStoreResponse struct {
|
||||
|
||||
// IndexStatsIndexIndexingResponse defines index stats index indexing information structure
|
||||
type IndexStatsIndexIndexingResponse struct {
|
||||
IndexTotal int64 `json:"index_total"`
|
||||
IndexTimeInMillis int64 `json:"index_time_in_millis"`
|
||||
IndexCurrent int64 `json:"index_current"`
|
||||
IndexFailed int64 `json:"index_failed"`
|
||||
DeleteTotal int64 `json:"delete_total"`
|
||||
DeleteTimeInMillis int64 `json:"delete_time_in_millis"`
|
||||
DeleteCurrent int64 `json:"delete_current"`
|
||||
NoopUpdateTotal int64 `json:"noop_update_total"`
|
||||
IsThrottled bool `json:"is_throttled"`
|
||||
ThrottleTimeInMillis int64 `json:"throttle_time_in_millis"`
|
||||
IndexTotal int64 `json:"index_total"`
|
||||
IndexTimeInMillis int64 `json:"index_time_in_millis"`
|
||||
IndexCurrent int64 `json:"index_current"`
|
||||
IndexFailed *int64 `json:"index_failed,omitempty"`
|
||||
DeleteTotal int64 `json:"delete_total"`
|
||||
DeleteTimeInMillis int64 `json:"delete_time_in_millis"`
|
||||
DeleteCurrent int64 `json:"delete_current"`
|
||||
NoopUpdateTotal int64 `json:"noop_update_total"`
|
||||
IsThrottled bool `json:"is_throttled"`
|
||||
ThrottleTimeInMillis int64 `json:"throttle_time_in_millis"`
|
||||
WriteLoad *float64 `json:"write_load,omitempty"`
|
||||
}
|
||||
|
||||
// IndexStatsIndexGetResponse defines index stats index get information structure
|
||||
@ -145,9 +146,11 @@ type IndexStatsIndexMergesResponse struct {
|
||||
|
||||
// IndexStatsIndexRefreshResponse defines index stats index refresh information structure
|
||||
type IndexStatsIndexRefreshResponse struct {
|
||||
Total int64 `json:"total"`
|
||||
TotalTimeInMillis int64 `json:"total_time_in_millis"`
|
||||
Listeners int64 `json:"listeners"`
|
||||
Total int64 `json:"total"`
|
||||
TotalTimeInMillis int64 `json:"total_time_in_millis"`
|
||||
ExternalTotal int64 `json:"external_total"`
|
||||
ExternalTotalTimeInMillis int64 `json:"external_total_time_in_millis"`
|
||||
Listeners int64 `json:"listeners"`
|
||||
}
|
||||
|
||||
// IndexStatsIndexFlushResponse defines index stats index flush information structure
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -16,33 +16,31 @@ package collector
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"strconv"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/go-kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// IndicesSettings information struct
|
||||
type IndicesSettings struct {
|
||||
logger log.Logger
|
||||
logger *slog.Logger
|
||||
client *http.Client
|
||||
url *url.URL
|
||||
|
||||
up prometheus.Gauge
|
||||
readOnlyIndices prometheus.Gauge
|
||||
|
||||
totalScrapes, jsonParseFailures prometheus.Counter
|
||||
metrics []*indicesSettingsMetric
|
||||
metrics []*indicesSettingsMetric
|
||||
}
|
||||
|
||||
var (
|
||||
defaultIndicesTotalFieldsLabels = []string{"index"}
|
||||
defaultTotalFieldsValue = 1000 //es default configuration for total fields
|
||||
defaultTotalFieldsValue = 1000 // es default configuration for total fields
|
||||
defaultDateCreation = 0 // es index default creation date
|
||||
)
|
||||
|
||||
type indicesSettingsMetric struct {
|
||||
@ -52,28 +50,17 @@ type indicesSettingsMetric struct {
|
||||
}
|
||||
|
||||
// NewIndicesSettings defines Indices Settings Prometheus metrics
|
||||
func NewIndicesSettings(logger log.Logger, client *http.Client, url *url.URL) *IndicesSettings {
|
||||
func NewIndicesSettings(logger *slog.Logger, client *http.Client, url *url.URL) *IndicesSettings {
|
||||
return &IndicesSettings{
|
||||
logger: logger,
|
||||
client: client,
|
||||
url: url,
|
||||
|
||||
up: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "indices_settings_stats", "up"),
|
||||
Help: "Was the last scrape of the Elasticsearch Indices Settings endpoint successful.",
|
||||
}),
|
||||
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "indices_settings_stats", "total_scrapes"),
|
||||
Help: "Current total Elasticsearch Indices Settings scrapes.",
|
||||
}),
|
||||
readOnlyIndices: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "indices_settings_stats", "read_only_indices"),
|
||||
Help: "Current number of read only indices within cluster",
|
||||
}),
|
||||
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "indices_settings_stats", "json_parse_failures"),
|
||||
Help: "Number of errors while parsing JSON.",
|
||||
}),
|
||||
|
||||
metrics: []*indicesSettingsMetric{
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
@ -90,16 +77,47 @@ func NewIndicesSettings(logger log.Logger, client *http.Client, url *url.URL) *I
|
||||
return val
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "indices_settings", "replicas"),
|
||||
"index setting number_of_replicas",
|
||||
defaultIndicesTotalFieldsLabels, nil,
|
||||
),
|
||||
Value: func(indexSettings Settings) float64 {
|
||||
val, err := strconv.ParseFloat(indexSettings.IndexInfo.NumberOfReplicas, 64)
|
||||
if err != nil {
|
||||
return float64(defaultTotalFieldsValue)
|
||||
}
|
||||
return val
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "indices_settings", "creation_timestamp_seconds"),
|
||||
"index setting creation_date",
|
||||
defaultIndicesTotalFieldsLabels, nil,
|
||||
),
|
||||
Value: func(indexSettings Settings) float64 {
|
||||
val, err := strconv.ParseFloat(indexSettings.IndexInfo.CreationDate, 64)
|
||||
if err != nil {
|
||||
return float64(defaultDateCreation)
|
||||
}
|
||||
return val / 1000.0
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Describe add Snapshots metrics descriptions
|
||||
func (cs *IndicesSettings) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- cs.up.Desc()
|
||||
ch <- cs.totalScrapes.Desc()
|
||||
ch <- cs.readOnlyIndices.Desc()
|
||||
ch <- cs.jsonParseFailures.Desc()
|
||||
|
||||
for _, metric := range cs.metrics {
|
||||
ch <- metric.Desc
|
||||
}
|
||||
}
|
||||
|
||||
func (cs *IndicesSettings) getAndParseURL(u *url.URL, data interface{}) error {
|
||||
@ -112,8 +130,8 @@ func (cs *IndicesSettings) getAndParseURL(u *url.URL, data interface{}) error {
|
||||
defer func() {
|
||||
err = res.Body.Close()
|
||||
if err != nil {
|
||||
_ = level.Warn(cs.logger).Log(
|
||||
"msg", "failed to close http.Client",
|
||||
cs.logger.Warn(
|
||||
"failed to close http.Client",
|
||||
"err", err,
|
||||
)
|
||||
}
|
||||
@ -123,21 +141,18 @@ func (cs *IndicesSettings) getAndParseURL(u *url.URL, data interface{}) error {
|
||||
return fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
|
||||
}
|
||||
|
||||
bts, err := ioutil.ReadAll(res.Body)
|
||||
bts, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
cs.jsonParseFailures.Inc()
|
||||
return err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, data); err != nil {
|
||||
cs.jsonParseFailures.Inc()
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cs *IndicesSettings) fetchAndDecodeIndicesSettings() (IndicesSettingsResponse, error) {
|
||||
|
||||
u := *cs.url
|
||||
u.Path = path.Join(u.Path, "/_all/_settings")
|
||||
var asr IndicesSettingsResponse
|
||||
@ -151,26 +166,15 @@ func (cs *IndicesSettings) fetchAndDecodeIndicesSettings() (IndicesSettingsRespo
|
||||
|
||||
// Collect gets all indices settings metric values
|
||||
func (cs *IndicesSettings) Collect(ch chan<- prometheus.Metric) {
|
||||
|
||||
cs.totalScrapes.Inc()
|
||||
defer func() {
|
||||
ch <- cs.up
|
||||
ch <- cs.totalScrapes
|
||||
ch <- cs.jsonParseFailures
|
||||
ch <- cs.readOnlyIndices
|
||||
}()
|
||||
|
||||
asr, err := cs.fetchAndDecodeIndicesSettings()
|
||||
if err != nil {
|
||||
cs.readOnlyIndices.Set(0)
|
||||
cs.up.Set(0)
|
||||
_ = level.Warn(cs.logger).Log(
|
||||
"msg", "failed to fetch and decode cluster settings stats",
|
||||
cs.logger.Warn(
|
||||
"failed to fetch and decode cluster settings stats",
|
||||
"err", err,
|
||||
)
|
||||
return
|
||||
}
|
||||
cs.up.Set(1)
|
||||
|
||||
var c int
|
||||
for indexName, value := range asr {
|
||||
@ -187,4 +191,6 @@ func (cs *IndicesSettings) Collect(ch chan<- prometheus.Metric) {
|
||||
}
|
||||
}
|
||||
cs.readOnlyIndices.Set(float64(c))
|
||||
|
||||
ch <- cs.readOnlyIndices
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -28,8 +28,10 @@ type Settings struct {
|
||||
|
||||
// IndexInfo defines the blocks of the current index
|
||||
type IndexInfo struct {
|
||||
Blocks Blocks `json:"blocks"`
|
||||
Mapping Mapping `json:"mapping"`
|
||||
Blocks Blocks `json:"blocks"`
|
||||
Mapping Mapping `json:"mapping"`
|
||||
NumberOfReplicas string `json:"number_of_replicas"`
|
||||
CreationDate string `json:"creation_date"`
|
||||
}
|
||||
|
||||
// Blocks defines whether current index has read_only_allow_delete enabled
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -14,13 +14,17 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/promslog"
|
||||
)
|
||||
|
||||
func TestIndicesSettings(t *testing.T) {
|
||||
@ -54,53 +58,65 @@ func TestIndicesSettings(t *testing.T) {
|
||||
|
||||
// curl http://localhost:9200/_all/_settings
|
||||
|
||||
tcs := map[string]string{
|
||||
"6.5.4": `{"viber":{"settings":{"index":{"creation_date":"1618593207186","number_of_shards":"5","number_of_replicas":"1","uuid":"lWg86KTARzO3r7lELytT1Q","version":{"created":"6050499"},"provided_name":"viber"}}},"instagram":{"settings":{"index":{"mapping":{"total_fields":{"limit":"10000"}},"number_of_shards":"5","blocks":{"read_only_allow_delete":"true"},"provided_name":"instagram","creation_date":"1618593203353","number_of_replicas":"1","uuid":"msb6eG7aT8GmNe-a4oyVtQ","version":{"created":"6050499"}}}},"twitter":{"settings":{"index":{"number_of_shards":"5","blocks":{"read_only_allow_delete":"true"},"provided_name":"twitter","creation_date":"1618593193641","number_of_replicas":"1","uuid":"YRUT8t4aSkKsNmGl7K3y4Q","version":{"created":"6050499"}}}},"facebook":{"settings":{"index":{"creation_date":"1618593199101","number_of_shards":"5","number_of_replicas":"1","uuid":"trZhb_YOTV-RWKitTYw81A","version":{"created":"6050499"},"provided_name":"facebook"}}}}`,
|
||||
tests := []struct {
|
||||
name string
|
||||
file string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "6.5.4",
|
||||
file: "6.5.4.json",
|
||||
want: `# HELP elasticsearch_indices_settings_creation_timestamp_seconds index setting creation_date
|
||||
# TYPE elasticsearch_indices_settings_creation_timestamp_seconds gauge
|
||||
elasticsearch_indices_settings_creation_timestamp_seconds{index="facebook"} 1.618593199101e+09
|
||||
elasticsearch_indices_settings_creation_timestamp_seconds{index="instagram"} 1.618593203353e+09
|
||||
elasticsearch_indices_settings_creation_timestamp_seconds{index="twitter"} 1.618593193641e+09
|
||||
elasticsearch_indices_settings_creation_timestamp_seconds{index="viber"} 1.618593207186e+09
|
||||
# HELP elasticsearch_indices_settings_replicas index setting number_of_replicas
|
||||
# TYPE elasticsearch_indices_settings_replicas gauge
|
||||
elasticsearch_indices_settings_replicas{index="facebook"} 1
|
||||
elasticsearch_indices_settings_replicas{index="instagram"} 1
|
||||
elasticsearch_indices_settings_replicas{index="twitter"} 1
|
||||
elasticsearch_indices_settings_replicas{index="viber"} 1
|
||||
# HELP elasticsearch_indices_settings_stats_read_only_indices Current number of read only indices within cluster
|
||||
# TYPE elasticsearch_indices_settings_stats_read_only_indices gauge
|
||||
elasticsearch_indices_settings_stats_read_only_indices 2
|
||||
# HELP elasticsearch_indices_settings_total_fields index mapping setting for total_fields
|
||||
# TYPE elasticsearch_indices_settings_total_fields gauge
|
||||
elasticsearch_indices_settings_total_fields{index="facebook"} 1000
|
||||
elasticsearch_indices_settings_total_fields{index="instagram"} 10000
|
||||
elasticsearch_indices_settings_total_fields{index="twitter"} 1000
|
||||
elasticsearch_indices_settings_total_fields{index="viber"} 1000
|
||||
`,
|
||||
},
|
||||
}
|
||||
for ver, out := range tcs {
|
||||
for hn, handler := range map[string]http.Handler{
|
||||
"plain": http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
fmt.Fprintln(w, out)
|
||||
}),
|
||||
} {
|
||||
ts := httptest.NewServer(handler)
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
f, err := os.Open(path.Join("../fixtures/indices_settings", tt.file))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
io.Copy(w, f)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse URL: %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
c := NewIndicesSettings(log.NewNopLogger(), http.DefaultClient, u)
|
||||
nsr, err := c.fetchAndDecodeIndicesSettings()
|
||||
|
||||
c := NewIndicesSettings(promslog.NewNopLogger(), http.DefaultClient, u)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to fetch or decode indices settings: %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Logf("[%s/%s] All Indices Settings Response: %+v", hn, ver, nsr)
|
||||
// if nsr.Cluster.Routing.Allocation.Enabled != "ALL" {
|
||||
// t.Errorf("Wrong setting for cluster routing allocation enabled")
|
||||
// }
|
||||
var counter int
|
||||
var totalFields int
|
||||
for key, value := range nsr {
|
||||
if value.Settings.IndexInfo.Blocks.ReadOnly == "true" {
|
||||
counter++
|
||||
if key != "instagram" && key != "twitter" {
|
||||
t.Errorf("Wrong read_only index")
|
||||
}
|
||||
}
|
||||
if value.Settings.IndexInfo.Mapping.TotalFields.Limit == "10000" {
|
||||
totalFields++
|
||||
if key != "instagram" {
|
||||
t.Errorf("Expected 10000 total_fields only for instagram")
|
||||
}
|
||||
}
|
||||
|
||||
if err := testutil.CollectAndCompare(c, strings.NewReader(tt.want)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if counter != 2 {
|
||||
t.Errorf("Wrong number of read_only indexes")
|
||||
}
|
||||
if totalFields != 1 {
|
||||
t.Errorf(("Wrong number of total_fields found"))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -16,23 +16,30 @@ package collector
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/go-kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
func getRoles(node NodeStatsNodeResponse) map[string]bool {
|
||||
// default settings (2.x) and map, which roles to consider
|
||||
roles := map[string]bool{
|
||||
"master": false,
|
||||
"data": false,
|
||||
"ingest": false,
|
||||
"client": true,
|
||||
"master": false,
|
||||
"data": false,
|
||||
"data_hot": false,
|
||||
"data_warm": false,
|
||||
"data_cold": false,
|
||||
"data_frozen": false,
|
||||
"data_content": false,
|
||||
"ml": false,
|
||||
"remote_cluster_client": false,
|
||||
"transform": false,
|
||||
"ingest": false,
|
||||
"client": true,
|
||||
}
|
||||
// assumption: a 5.x node has at least one role, otherwise it's a 1.7 or 2.x node
|
||||
if len(node.Roles) > 0 {
|
||||
@ -62,32 +69,18 @@ func getRoles(node NodeStatsNodeResponse) map[string]bool {
|
||||
return roles
|
||||
}
|
||||
|
||||
func createRoleMetric(role string) *nodeMetric {
|
||||
return &nodeMetric{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "nodes", "roles"),
|
||||
"Node roles",
|
||||
defaultRoleLabels, prometheus.Labels{"role": role},
|
||||
),
|
||||
Value: func(node NodeStatsNodeResponse) float64 {
|
||||
return 1.0
|
||||
},
|
||||
Labels: func(cluster string, node NodeStatsNodeResponse) []string {
|
||||
return []string{
|
||||
cluster,
|
||||
node.Host,
|
||||
node.Name,
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
var nodesRolesMetric = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "nodes", "roles"),
|
||||
"Node roles",
|
||||
append(defaultRoleLabels, "role"), nil,
|
||||
)
|
||||
|
||||
var (
|
||||
defaultNodeLabels = []string{"cluster", "host", "name", "es_master_node", "es_data_node", "es_ingest_node", "es_client_node"}
|
||||
defaultRoleLabels = []string{"cluster", "host", "name"}
|
||||
defaultRoleLabels = []string{"cluster", "host", "name", "node"}
|
||||
defaultThreadPoolLabels = append(defaultNodeLabels, "type")
|
||||
defaultBreakerLabels = append(defaultNodeLabels, "breaker")
|
||||
defaultIndexingPressureLabels = []string{"cluster", "host", "name", "indexing_pressure"}
|
||||
defaultFilesystemDataLabels = append(defaultNodeLabels, "mount", "path")
|
||||
defaultFilesystemIODeviceLabels = append(defaultNodeLabels, "device")
|
||||
defaultCacheLabels = append(defaultNodeLabels, "cache")
|
||||
@ -142,6 +135,13 @@ type breakerMetric struct {
|
||||
Labels func(cluster string, node NodeStatsNodeResponse, breaker string) []string
|
||||
}
|
||||
|
||||
type indexingPressureMetric struct {
|
||||
Type prometheus.ValueType
|
||||
Desc *prometheus.Desc
|
||||
Value func(indexingPressureStats NodeStatsIndexingPressureResponse) float64
|
||||
Labels func(cluster string, node NodeStatsNodeResponse, indexingPressure string) []string
|
||||
}
|
||||
|
||||
type threadPoolMetric struct {
|
||||
Type prometheus.ValueType
|
||||
Desc *prometheus.Desc
|
||||
@ -165,25 +165,23 @@ type filesystemIODeviceMetric struct {
|
||||
|
||||
// Nodes information struct
|
||||
type Nodes struct {
|
||||
logger log.Logger
|
||||
logger *slog.Logger
|
||||
client *http.Client
|
||||
url *url.URL
|
||||
all bool
|
||||
node string
|
||||
|
||||
up prometheus.Gauge
|
||||
totalScrapes, jsonParseFailures prometheus.Counter
|
||||
|
||||
nodeMetrics []*nodeMetric
|
||||
gcCollectionMetrics []*gcCollectionMetric
|
||||
breakerMetrics []*breakerMetric
|
||||
indexingPressureMetrics []*indexingPressureMetric
|
||||
threadPoolMetrics []*threadPoolMetric
|
||||
filesystemDataMetrics []*filesystemDataMetric
|
||||
filesystemIODeviceMetrics []*filesystemIODeviceMetric
|
||||
}
|
||||
|
||||
// NewNodes defines Nodes Prometheus metrics
|
||||
func NewNodes(logger log.Logger, client *http.Client, url *url.URL, all bool, node string) *Nodes {
|
||||
func NewNodes(logger *slog.Logger, client *http.Client, url *url.URL, all bool, node string) *Nodes {
|
||||
return &Nodes{
|
||||
logger: logger,
|
||||
client: client,
|
||||
@ -191,19 +189,6 @@ func NewNodes(logger log.Logger, client *http.Client, url *url.URL, all bool, no
|
||||
all: all,
|
||||
node: node,
|
||||
|
||||
up: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "node_stats", "up"),
|
||||
Help: "Was the last scrape of the Elasticsearch nodes endpoint successful.",
|
||||
}),
|
||||
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "node_stats", "total_scrapes"),
|
||||
Help: "Current total Elasticsearch node scrapes.",
|
||||
}),
|
||||
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "node_stats", "json_parse_failures"),
|
||||
Help: "Number of errors while parsing JSON.",
|
||||
}),
|
||||
|
||||
nodeMetrics: []*nodeMetric{
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
@ -506,7 +491,7 @@ func NewNodes(logger log.Logger, client *http.Client, url *url.URL, all bool, no
|
||||
Labels: defaultNodeLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "indices", "translog_size_in_bytes"),
|
||||
"Total translog size in bytes",
|
||||
@ -613,6 +598,30 @@ func NewNodes(logger log.Logger, client *http.Client, url *url.URL, all bool, no
|
||||
},
|
||||
Labels: defaultNodeLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "indices_refresh", "external_total"),
|
||||
"Total external refreshes",
|
||||
defaultNodeLabels, nil,
|
||||
),
|
||||
Value: func(node NodeStatsNodeResponse) float64 {
|
||||
return float64(node.Indices.Refresh.ExternalTotal)
|
||||
},
|
||||
Labels: defaultNodeLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "indices_refresh", "external_time_seconds_total"),
|
||||
"Total time spent external refreshing in seconds",
|
||||
defaultNodeLabels, nil,
|
||||
),
|
||||
Value: func(node NodeStatsNodeResponse) float64 {
|
||||
return float64(node.Indices.Refresh.ExternalTotalTimeInMillis) / 1000
|
||||
},
|
||||
Labels: defaultNodeLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
@ -1599,6 +1608,46 @@ func NewNodes(logger log.Logger, client *http.Client, url *url.URL, all bool, no
|
||||
},
|
||||
},
|
||||
},
|
||||
indexingPressureMetrics: []*indexingPressureMetric{
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "indexing_pressure", "current_all_in_bytes"),
|
||||
"Memory consumed, in bytes, by indexing requests in the coordinating, primary, or replica stage.",
|
||||
defaultIndexingPressureLabels, nil,
|
||||
),
|
||||
Value: func(indexingPressureMem NodeStatsIndexingPressureResponse) float64 {
|
||||
return float64(indexingPressureMem.Current.AllInBytes)
|
||||
},
|
||||
Labels: func(cluster string, node NodeStatsNodeResponse, indexingPressure string) []string {
|
||||
return []string{
|
||||
cluster,
|
||||
node.Host,
|
||||
node.Name,
|
||||
indexingPressure,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "indexing_pressure", "limit_in_bytes"),
|
||||
"Configured memory limit, in bytes, for the indexing requests",
|
||||
defaultIndexingPressureLabels, nil,
|
||||
),
|
||||
Value: func(indexingPressureStats NodeStatsIndexingPressureResponse) float64 {
|
||||
return float64(indexingPressureStats.LimitInBytes)
|
||||
},
|
||||
Labels: func(cluster string, node NodeStatsNodeResponse, indexingPressure string) []string {
|
||||
return []string{
|
||||
cluster,
|
||||
node.Host,
|
||||
node.Name,
|
||||
indexingPressure,
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
threadPoolMetrics: []*threadPoolMetric{
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
@ -1778,12 +1827,20 @@ func NewNodes(logger log.Logger, client *http.Client, url *url.URL, all bool, no
|
||||
|
||||
// Describe add metrics descriptions
|
||||
func (c *Nodes) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- nodesRolesMetric
|
||||
|
||||
for _, metric := range c.nodeMetrics {
|
||||
ch <- metric.Desc
|
||||
}
|
||||
for _, metric := range c.gcCollectionMetrics {
|
||||
ch <- metric.Desc
|
||||
}
|
||||
for _, metric := range c.breakerMetrics {
|
||||
ch <- metric.Desc
|
||||
}
|
||||
for _, metric := range c.indexingPressureMetrics {
|
||||
ch <- metric.Desc
|
||||
}
|
||||
for _, metric := range c.threadPoolMetrics {
|
||||
ch <- metric.Desc
|
||||
}
|
||||
@ -1793,9 +1850,6 @@ func (c *Nodes) Describe(ch chan<- *prometheus.Desc) {
|
||||
for _, metric := range c.filesystemIODeviceMetrics {
|
||||
ch <- metric.Desc
|
||||
}
|
||||
ch <- c.up.Desc()
|
||||
ch <- c.totalScrapes.Desc()
|
||||
ch <- c.jsonParseFailures.Desc()
|
||||
}
|
||||
|
||||
func (c *Nodes) fetchAndDecodeNodeStats() (nodeStatsResponse, error) {
|
||||
@ -1818,8 +1872,8 @@ func (c *Nodes) fetchAndDecodeNodeStats() (nodeStatsResponse, error) {
|
||||
defer func() {
|
||||
err = res.Body.Close()
|
||||
if err != nil {
|
||||
_ = level.Warn(c.logger).Log(
|
||||
"msg", "failed to close http.Client",
|
||||
c.logger.Warn(
|
||||
"failed to close http.Client",
|
||||
"err", err,
|
||||
)
|
||||
}
|
||||
@ -1829,14 +1883,12 @@ func (c *Nodes) fetchAndDecodeNodeStats() (nodeStatsResponse, error) {
|
||||
return nsr, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
|
||||
}
|
||||
|
||||
bts, err := ioutil.ReadAll(res.Body)
|
||||
bts, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
c.jsonParseFailures.Inc()
|
||||
return nsr, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, &nsr); err != nil {
|
||||
c.jsonParseFailures.Inc()
|
||||
return nsr, err
|
||||
}
|
||||
return nsr, nil
|
||||
@ -1844,38 +1896,39 @@ func (c *Nodes) fetchAndDecodeNodeStats() (nodeStatsResponse, error) {
|
||||
|
||||
// Collect gets nodes metric values
|
||||
func (c *Nodes) Collect(ch chan<- prometheus.Metric) {
|
||||
c.totalScrapes.Inc()
|
||||
defer func() {
|
||||
ch <- c.up
|
||||
ch <- c.totalScrapes
|
||||
ch <- c.jsonParseFailures
|
||||
}()
|
||||
|
||||
nodeStatsResp, err := c.fetchAndDecodeNodeStats()
|
||||
if err != nil {
|
||||
c.up.Set(0)
|
||||
_ = level.Warn(c.logger).Log(
|
||||
"msg", "failed to fetch and decode node stats",
|
||||
c.logger.Warn(
|
||||
"failed to fetch and decode node stats",
|
||||
"err", err,
|
||||
)
|
||||
return
|
||||
}
|
||||
c.up.Set(1)
|
||||
|
||||
for _, node := range nodeStatsResp.Nodes {
|
||||
for nodeID, node := range nodeStatsResp.Nodes {
|
||||
// Handle the node labels metric
|
||||
roles := getRoles(node)
|
||||
|
||||
for _, role := range []string{"master", "data", "client", "ingest"} {
|
||||
if roles[role] {
|
||||
metric := createRoleMetric(role)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
metric.Desc,
|
||||
metric.Type,
|
||||
metric.Value(node),
|
||||
metric.Labels(nodeStatsResp.ClusterName, node)...,
|
||||
)
|
||||
for role, roleEnabled := range roles {
|
||||
val := 0.0
|
||||
if roleEnabled {
|
||||
val = 1.0
|
||||
}
|
||||
|
||||
labels := []string{
|
||||
nodeStatsResp.ClusterName,
|
||||
node.Host,
|
||||
node.Name,
|
||||
nodeID,
|
||||
role,
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
nodesRolesMetric,
|
||||
prometheus.GaugeValue,
|
||||
val,
|
||||
labels...,
|
||||
)
|
||||
}
|
||||
|
||||
for _, metric := range c.nodeMetrics {
|
||||
@ -1911,6 +1964,18 @@ func (c *Nodes) Collect(ch chan<- prometheus.Metric) {
|
||||
}
|
||||
}
|
||||
|
||||
// Indexing Pressure stats
|
||||
for indexingPressure, ipstats := range node.IndexingPressure {
|
||||
for _, metric := range c.indexingPressureMetrics {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
metric.Desc,
|
||||
metric.Type,
|
||||
metric.Value(ipstats),
|
||||
metric.Labels(nodeStatsResp.ClusterName, node, indexingPressure)...,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Thread Pool stats
|
||||
for pool, pstats := range node.ThreadPool {
|
||||
for _, metric := range c.threadPoolMetrics {
|
||||
@ -1946,6 +2011,5 @@ func (c *Nodes) Collect(ch chan<- prometheus.Metric) {
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -23,23 +23,24 @@ type nodeStatsResponse struct {
|
||||
|
||||
// NodeStatsNodeResponse defines node stats information structure for nodes
|
||||
type NodeStatsNodeResponse struct {
|
||||
Name string `json:"name"`
|
||||
Host string `json:"host"`
|
||||
Timestamp int64 `json:"timestamp"`
|
||||
TransportAddress string `json:"transport_address"`
|
||||
Hostname string `json:"hostname"`
|
||||
Roles []string `json:"roles"`
|
||||
Attributes map[string]string `json:"attributes"`
|
||||
Indices NodeStatsIndicesResponse `json:"indices"`
|
||||
OS NodeStatsOSResponse `json:"os"`
|
||||
Network NodeStatsNetworkResponse `json:"network"`
|
||||
FS NodeStatsFSResponse `json:"fs"`
|
||||
ThreadPool map[string]NodeStatsThreadPoolPoolResponse `json:"thread_pool"`
|
||||
JVM NodeStatsJVMResponse `json:"jvm"`
|
||||
Breakers map[string]NodeStatsBreakersResponse `json:"breakers"`
|
||||
HTTP map[string]interface{} `json:"http"`
|
||||
Transport NodeStatsTransportResponse `json:"transport"`
|
||||
Process NodeStatsProcessResponse `json:"process"`
|
||||
Name string `json:"name"`
|
||||
Host string `json:"host"`
|
||||
Timestamp int64 `json:"timestamp"`
|
||||
TransportAddress string `json:"transport_address"`
|
||||
Hostname string `json:"hostname"`
|
||||
Roles []string `json:"roles"`
|
||||
Attributes map[string]string `json:"attributes"`
|
||||
Indices NodeStatsIndicesResponse `json:"indices"`
|
||||
OS NodeStatsOSResponse `json:"os"`
|
||||
Network NodeStatsNetworkResponse `json:"network"`
|
||||
FS NodeStatsFSResponse `json:"fs"`
|
||||
ThreadPool map[string]NodeStatsThreadPoolPoolResponse `json:"thread_pool"`
|
||||
JVM NodeStatsJVMResponse `json:"jvm"`
|
||||
Breakers map[string]NodeStatsBreakersResponse `json:"breakers"`
|
||||
HTTP map[string]interface{} `json:"http"`
|
||||
Transport NodeStatsTransportResponse `json:"transport"`
|
||||
Process NodeStatsProcessResponse `json:"process"`
|
||||
IndexingPressure map[string]NodeStatsIndexingPressureResponse `json:"indexing_pressure"`
|
||||
}
|
||||
|
||||
// NodeStatsBreakersResponse is a representation of a statistics about the field data circuit breaker
|
||||
@ -50,6 +51,17 @@ type NodeStatsBreakersResponse struct {
|
||||
Tripped int64 `json:"tripped"`
|
||||
}
|
||||
|
||||
// NodeStatsIndexingPressureResponse is a representation of a elasticsearch indexing pressure
|
||||
type NodeStatsIndexingPressureResponse struct {
|
||||
Current NodeStatsIndexingPressureCurrentResponse `json:"current"`
|
||||
LimitInBytes int64 `json:"limit_in_bytes"`
|
||||
}
|
||||
|
||||
// NodeStatsIndexingPressureMemoryCurrentResponse is a representation of a elasticsearch indexing pressure current memory usage
|
||||
type NodeStatsIndexingPressureCurrentResponse struct {
|
||||
AllInBytes int64 `json:"all_in_bytes"`
|
||||
}
|
||||
|
||||
// NodeStatsJVMResponse is a representation of a JVM stats, memory pool information, garbage collection, buffer pools, number of loaded/unloaded classes
|
||||
type NodeStatsJVMResponse struct {
|
||||
BufferPools map[string]NodeStatsJVMBufferPoolResponse `json:"buffer_pools"`
|
||||
@ -160,8 +172,10 @@ type NodeStatsIndicesDocsResponse struct {
|
||||
|
||||
// NodeStatsIndicesRefreshResponse defines node stats refresh information structure for indices
|
||||
type NodeStatsIndicesRefreshResponse struct {
|
||||
Total int64 `json:"total"`
|
||||
TotalTime int64 `json:"total_time_in_millis"`
|
||||
Total int64 `json:"total"`
|
||||
TotalTime int64 `json:"total_time_in_millis"`
|
||||
ExternalTotal int64 `json:"external_total"`
|
||||
ExternalTotalTimeInMillis int64 `json:"external_total_time_in_millis"`
|
||||
}
|
||||
|
||||
// NodeStatsIndicesTranslogResponse defines node stats translog information structure for indices
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
// Copyright 2022 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -10,80 +10,148 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/go-kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
defaultNodeShardLabels = []string{"node"}
|
||||
|
||||
defaultNodeShardLabelValues = func(node string) []string {
|
||||
return []string{
|
||||
node,
|
||||
}
|
||||
}
|
||||
"github.com/prometheus-community/elasticsearch_exporter/pkg/clusterinfo"
|
||||
)
|
||||
|
||||
// ShardResponse has shard's node and index info
|
||||
type ShardResponse struct {
|
||||
Index string `json:"index"`
|
||||
Shard string `json:"shard"`
|
||||
State string `json:"state"`
|
||||
Node string `json:"node"`
|
||||
}
|
||||
|
||||
// Shards information struct
|
||||
type Shards struct {
|
||||
logger log.Logger
|
||||
client *http.Client
|
||||
url *url.URL
|
||||
logger *slog.Logger
|
||||
client *http.Client
|
||||
url *url.URL
|
||||
clusterInfoCh chan *clusterinfo.Response
|
||||
lastClusterInfo *clusterinfo.Response
|
||||
|
||||
nodeShardMetrics []*nodeShardMetric
|
||||
jsonParseFailures prometheus.Counter
|
||||
}
|
||||
|
||||
// ClusterLabelUpdates returns a pointer to a channel to receive cluster info updates. It implements the
|
||||
// (not exported) clusterinfo.consumer interface
|
||||
func (s *Shards) ClusterLabelUpdates() *chan *clusterinfo.Response {
|
||||
return &s.clusterInfoCh
|
||||
}
|
||||
|
||||
// String implements the stringer interface. It is part of the clusterinfo.consumer interface
|
||||
func (s *Shards) String() string {
|
||||
return namespace + "shards"
|
||||
}
|
||||
|
||||
type nodeShardMetric struct {
|
||||
Type prometheus.ValueType
|
||||
Desc *prometheus.Desc
|
||||
Value func(shards float64) float64
|
||||
Labels func(node string) []string
|
||||
Labels labels
|
||||
}
|
||||
|
||||
// fetchClusterNameOnce performs a single request to the root endpoint to obtain the cluster name.
|
||||
func fetchClusterNameOnce(s *Shards) string {
|
||||
if s.lastClusterInfo != nil && s.lastClusterInfo.ClusterName != "unknown_cluster" {
|
||||
return s.lastClusterInfo.ClusterName
|
||||
}
|
||||
u := *s.url
|
||||
u.Path = path.Join(u.Path, "/")
|
||||
resp, err := s.client.Get(u.String())
|
||||
if err == nil {
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
var root struct {
|
||||
ClusterName string `json:"cluster_name"`
|
||||
}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&root); err == nil && root.ClusterName != "" {
|
||||
s.lastClusterInfo = &clusterinfo.Response{ClusterName: root.ClusterName}
|
||||
return root.ClusterName
|
||||
}
|
||||
}
|
||||
}
|
||||
return "unknown_cluster"
|
||||
}
|
||||
|
||||
// NewShards defines Shards Prometheus metrics
|
||||
func NewShards(logger log.Logger, client *http.Client, url *url.URL) *Shards {
|
||||
return &Shards{
|
||||
func NewShards(logger *slog.Logger, client *http.Client, url *url.URL) *Shards {
|
||||
var shardPtr *Shards
|
||||
nodeLabels := labels{
|
||||
keys: func(...string) []string {
|
||||
return []string{"node", "cluster"}
|
||||
},
|
||||
values: func(lastClusterinfo *clusterinfo.Response, base ...string) []string {
|
||||
if lastClusterinfo != nil {
|
||||
return append(base, lastClusterinfo.ClusterName)
|
||||
}
|
||||
if shardPtr != nil {
|
||||
return append(base, fetchClusterNameOnce(shardPtr))
|
||||
}
|
||||
return append(base, "unknown_cluster")
|
||||
},
|
||||
}
|
||||
|
||||
shards := &Shards{
|
||||
// will assign later
|
||||
|
||||
logger: logger,
|
||||
client: client,
|
||||
url: url,
|
||||
|
||||
clusterInfoCh: make(chan *clusterinfo.Response),
|
||||
lastClusterInfo: &clusterinfo.Response{
|
||||
ClusterName: "unknown_cluster",
|
||||
},
|
||||
|
||||
nodeShardMetrics: []*nodeShardMetric{
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "node_shards", "total"),
|
||||
"Total shards per node",
|
||||
defaultNodeShardLabels, nil,
|
||||
nodeLabels.keys(), nil,
|
||||
),
|
||||
Value: func(shards float64) float64 {
|
||||
return shards
|
||||
},
|
||||
Labels: defaultNodeShardLabelValues,
|
||||
}},
|
||||
Labels: nodeLabels,
|
||||
},
|
||||
},
|
||||
|
||||
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "node_shards", "json_parse_failures"),
|
||||
Help: "Number of errors while parsing JSON.",
|
||||
}),
|
||||
}
|
||||
|
||||
// start go routine to fetch clusterinfo updates and save them to lastClusterinfo
|
||||
go func() {
|
||||
logger.Debug("starting cluster info receive loop")
|
||||
for ci := range shards.clusterInfoCh {
|
||||
if ci != nil {
|
||||
logger.Debug("received cluster info update", "cluster", ci.ClusterName)
|
||||
shards.lastClusterInfo = ci
|
||||
}
|
||||
}
|
||||
logger.Debug("exiting cluster info receive loop")
|
||||
}()
|
||||
|
||||
shardPtr = shards
|
||||
return shards
|
||||
}
|
||||
|
||||
// Describe Shards
|
||||
@ -105,8 +173,8 @@ func (s *Shards) getAndParseURL(u *url.URL) ([]ShardResponse, error) {
|
||||
defer func() {
|
||||
err = res.Body.Close()
|
||||
if err != nil {
|
||||
_ = level.Warn(s.logger).Log(
|
||||
"msg", "failed to close http.Client",
|
||||
s.logger.Warn(
|
||||
"failed to close http.Client",
|
||||
"err", err,
|
||||
)
|
||||
}
|
||||
@ -124,7 +192,6 @@ func (s *Shards) getAndParseURL(u *url.URL) ([]ShardResponse, error) {
|
||||
}
|
||||
|
||||
func (s *Shards) fetchAndDecodeShards() ([]ShardResponse, error) {
|
||||
|
||||
u := *s.url
|
||||
u.Path = path.Join(u.Path, "/_cat/shards")
|
||||
q := u.Query()
|
||||
@ -137,17 +204,16 @@ func (s *Shards) fetchAndDecodeShards() ([]ShardResponse, error) {
|
||||
return sfr, err
|
||||
}
|
||||
|
||||
// Collect number of shards on each nodes
|
||||
// Collect number of shards on each node
|
||||
func (s *Shards) Collect(ch chan<- prometheus.Metric) {
|
||||
|
||||
defer func() {
|
||||
ch <- s.jsonParseFailures
|
||||
}()
|
||||
|
||||
sr, err := s.fetchAndDecodeShards()
|
||||
if err != nil {
|
||||
_ = level.Warn(s.logger).Log(
|
||||
"msg", "failed to fetch and decode node shards stats",
|
||||
s.logger.Warn(
|
||||
"failed to fetch and decode node shards stats",
|
||||
"err", err,
|
||||
)
|
||||
return
|
||||
@ -156,10 +222,8 @@ func (s *Shards) Collect(ch chan<- prometheus.Metric) {
|
||||
nodeShards := make(map[string]float64)
|
||||
|
||||
for _, shard := range sr {
|
||||
if val, ok := nodeShards[shard.Node]; ok {
|
||||
nodeShards[shard.Node] = val + 1
|
||||
} else {
|
||||
nodeShards[shard.Node] = 1
|
||||
if shard.State == "STARTED" {
|
||||
nodeShards[shard.Node]++
|
||||
}
|
||||
}
|
||||
|
||||
@ -169,7 +233,7 @@ func (s *Shards) Collect(ch chan<- prometheus.Metric) {
|
||||
metric.Desc,
|
||||
metric.Type,
|
||||
metric.Value(shards),
|
||||
metric.Labels(node)...,
|
||||
metric.Labels.values(s.lastClusterInfo, node)...,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
83
collector/shards_test.go
Normal file
83
collector/shards_test.go
Normal file
@ -0,0 +1,83 @@
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/promslog"
|
||||
)
|
||||
|
||||
func TestShards(t *testing.T) {
|
||||
// Testcases created using:
|
||||
// docker run --rm -d -p 9200:9200 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:$VERSION
|
||||
// curl -XPUT http://localhost:9200/testindex
|
||||
// curl -XPUT http://localhost:9200/otherindex
|
||||
// curl http://localhost:9200/_cat/shards?format=json > fixtures/shards/$VERSION.json
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
file string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "7.15.0",
|
||||
file: "7.15.0.json",
|
||||
want: `# HELP elasticsearch_node_shards_json_parse_failures Number of errors while parsing JSON.
|
||||
# TYPE elasticsearch_node_shards_json_parse_failures counter
|
||||
elasticsearch_node_shards_json_parse_failures 0
|
||||
# HELP elasticsearch_node_shards_total Total shards per node
|
||||
# TYPE elasticsearch_node_shards_total gauge
|
||||
elasticsearch_node_shards_total{cluster="unknown_cluster",node="35dfca79831a"} 3
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
f, err := os.Open(path.Join("../fixtures/shards/", tt.file))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
io.Copy(w, f)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse URL: %s", err)
|
||||
}
|
||||
|
||||
s := NewShards(promslog.NewNopLogger(), http.DefaultClient, u)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := testutil.CollectAndCompare(s, strings.NewReader(tt.want)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
536
collector/slm.go
536
collector/slm.go
@ -1,4 +1,4 @@
|
||||
// Copyright 2022 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -14,388 +14,244 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/go-kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
type policyMetric struct {
|
||||
Type prometheus.ValueType
|
||||
Desc *prometheus.Desc
|
||||
Value func(policyStats PolicyStats) float64
|
||||
Labels func(policyStats PolicyStats) []string
|
||||
}
|
||||
|
||||
type slmMetric struct {
|
||||
Type prometheus.ValueType
|
||||
Desc *prometheus.Desc
|
||||
Value func(slmStats SLMStatsResponse) float64
|
||||
}
|
||||
|
||||
type slmStatusMetric struct {
|
||||
Type prometheus.ValueType
|
||||
Desc *prometheus.Desc
|
||||
Value func(slmStatus SLMStatusResponse, operationMode string) float64
|
||||
Labels func(operationMode string) []string
|
||||
}
|
||||
var statuses = []string{"RUNNING", "STOPPING", "STOPPED"}
|
||||
|
||||
var (
|
||||
defaultPolicyLabels = []string{"policy"}
|
||||
defaultPolicyLabelValues = func(policyStats PolicyStats) []string {
|
||||
return []string{policyStats.Policy}
|
||||
}
|
||||
slmRetentionRunsTotal = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "retention_runs_total"),
|
||||
"Total retention runs",
|
||||
nil, nil,
|
||||
)
|
||||
slmRetentionFailedTotal = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "retention_failed_total"),
|
||||
"Total failed retention runs",
|
||||
nil, nil,
|
||||
)
|
||||
slmRetentionTimedOutTotal = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "retention_timed_out_total"),
|
||||
"Total timed out retention runs",
|
||||
nil, nil,
|
||||
)
|
||||
slmRetentionDeletionTimeSeconds = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "retention_deletion_time_seconds"),
|
||||
"Retention run deletion time",
|
||||
nil, nil,
|
||||
)
|
||||
slmTotalSnapshotsTaken = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_taken_total"),
|
||||
"Total snapshots taken",
|
||||
nil, nil,
|
||||
)
|
||||
slmTotalSnapshotsFailed = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_failed_total"),
|
||||
"Total snapshots failed",
|
||||
nil, nil,
|
||||
)
|
||||
slmTotalSnapshotsDeleted = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_deleted_total"),
|
||||
"Total snapshots deleted",
|
||||
nil, nil,
|
||||
)
|
||||
slmTotalSnapshotsDeleteFailed = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "total_snapshot_deletion_failures_total"),
|
||||
"Total snapshot deletion failures",
|
||||
nil, nil,
|
||||
)
|
||||
|
||||
statuses = []string{"RUNNING", "STOPPING", "STOPPED"}
|
||||
slmOperationMode = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "operation_mode"),
|
||||
"Operating status of SLM",
|
||||
[]string{"operation_mode"}, nil,
|
||||
)
|
||||
|
||||
slmSnapshotsTaken = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "snapshots_taken_total"),
|
||||
"Total snapshots taken",
|
||||
[]string{"policy"}, nil,
|
||||
)
|
||||
slmSnapshotsFailed = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "snapshots_failed_total"),
|
||||
"Total snapshots failed",
|
||||
[]string{"policy"}, nil,
|
||||
)
|
||||
slmSnapshotsDeleted = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "snapshots_deleted_total"),
|
||||
"Total snapshots deleted",
|
||||
[]string{"policy"}, nil,
|
||||
)
|
||||
slmSnapshotsDeletionFailure = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "snapshot_deletion_failures_total"),
|
||||
"Total snapshot deletion failures",
|
||||
[]string{"policy"}, nil,
|
||||
)
|
||||
)
|
||||
|
||||
func init() {
|
||||
registerCollector("slm", defaultDisabled, NewSLM)
|
||||
}
|
||||
|
||||
// SLM information struct
|
||||
type SLM struct {
|
||||
logger log.Logger
|
||||
client *http.Client
|
||||
url *url.URL
|
||||
|
||||
up prometheus.Gauge
|
||||
totalScrapes, jsonParseFailures prometheus.Counter
|
||||
|
||||
slmMetrics []*slmMetric
|
||||
policyMetrics []*policyMetric
|
||||
slmStatusMetric *slmStatusMetric
|
||||
logger *slog.Logger
|
||||
hc *http.Client
|
||||
u *url.URL
|
||||
}
|
||||
|
||||
// NewSLM defines SLM Prometheus metrics
|
||||
func NewSLM(logger log.Logger, client *http.Client, url *url.URL) *SLM {
|
||||
func NewSLM(logger *slog.Logger, u *url.URL, hc *http.Client) (Collector, error) {
|
||||
return &SLM{
|
||||
logger: logger,
|
||||
client: client,
|
||||
url: url,
|
||||
|
||||
up: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "slm_stats", "up"),
|
||||
Help: "Was the last scrape of the Elasticsearch SLM endpoint successful.",
|
||||
}),
|
||||
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "slm_stats", "total_scrapes"),
|
||||
Help: "Current total Elasticsearch SLM scrapes.",
|
||||
}),
|
||||
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "slm_stats", "json_parse_failures"),
|
||||
Help: "Number of errors while parsing JSON.",
|
||||
}),
|
||||
slmMetrics: []*slmMetric{
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "retention_runs_total"),
|
||||
"Total retention runs",
|
||||
nil, nil,
|
||||
),
|
||||
Value: func(slmStats SLMStatsResponse) float64 {
|
||||
return float64(slmStats.RetentionRuns)
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "retention_failed_total"),
|
||||
"Total failed retention runs",
|
||||
nil, nil,
|
||||
),
|
||||
Value: func(slmStats SLMStatsResponse) float64 {
|
||||
return float64(slmStats.RetentionFailed)
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "retention_timed_out_total"),
|
||||
"Total timed out retention runs",
|
||||
nil, nil,
|
||||
),
|
||||
Value: func(slmStats SLMStatsResponse) float64 {
|
||||
return float64(slmStats.RetentionTimedOut)
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "retention_deletion_time_seconds"),
|
||||
"Retention run deletion time",
|
||||
nil, nil,
|
||||
),
|
||||
Value: func(slmStats SLMStatsResponse) float64 {
|
||||
return float64(slmStats.RetentionDeletionTimeMillis) / 1000
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_taken_total"),
|
||||
"Total snapshots taken",
|
||||
nil, nil,
|
||||
),
|
||||
Value: func(slmStats SLMStatsResponse) float64 {
|
||||
return float64(slmStats.TotalSnapshotsTaken)
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_failed_total"),
|
||||
"Total snapshots failed",
|
||||
nil, nil,
|
||||
),
|
||||
Value: func(slmStats SLMStatsResponse) float64 {
|
||||
return float64(slmStats.TotalSnapshotsFailed)
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "total_snapshots_deleted_total"),
|
||||
"Total snapshots deleted",
|
||||
nil, nil,
|
||||
),
|
||||
Value: func(slmStats SLMStatsResponse) float64 {
|
||||
return float64(slmStats.TotalSnapshotsDeleted)
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "total_snapshot_deletion_failures_total"),
|
||||
"Total snapshot deletion failures",
|
||||
nil, nil,
|
||||
),
|
||||
Value: func(slmStats SLMStatsResponse) float64 {
|
||||
return float64(slmStats.TotalSnapshotDeletionFailures)
|
||||
},
|
||||
},
|
||||
},
|
||||
policyMetrics: []*policyMetric{
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "snapshots_taken_total"),
|
||||
"Total snapshots taken",
|
||||
defaultPolicyLabels, nil,
|
||||
),
|
||||
Value: func(policyStats PolicyStats) float64 {
|
||||
return float64(policyStats.SnapshotsTaken)
|
||||
},
|
||||
Labels: defaultPolicyLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "snapshots_failed_total"),
|
||||
"Total snapshots failed",
|
||||
defaultPolicyLabels, nil,
|
||||
),
|
||||
Value: func(policyStats PolicyStats) float64 {
|
||||
return float64(policyStats.SnapshotsFailed)
|
||||
},
|
||||
Labels: defaultPolicyLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "snapshots_deleted_total"),
|
||||
"Total snapshots deleted",
|
||||
defaultPolicyLabels, nil,
|
||||
),
|
||||
Value: func(policyStats PolicyStats) float64 {
|
||||
return float64(policyStats.SnapshotsDeleted)
|
||||
},
|
||||
Labels: defaultPolicyLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.CounterValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "snapshot_deletion_failures_total"),
|
||||
"Total snapshot deletion failures",
|
||||
defaultPolicyLabels, nil,
|
||||
),
|
||||
Value: func(policyStats PolicyStats) float64 {
|
||||
return float64(policyStats.SnapshotDeletionFailures)
|
||||
},
|
||||
Labels: defaultPolicyLabelValues,
|
||||
},
|
||||
},
|
||||
slmStatusMetric: &slmStatusMetric{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "slm_stats", "operation_mode"),
|
||||
"Operating status of SLM",
|
||||
[]string{"operation_mode"}, nil,
|
||||
),
|
||||
Value: func(slmStatus SLMStatusResponse, operationMode string) float64 {
|
||||
if slmStatus.OperationMode == operationMode {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
},
|
||||
},
|
||||
}
|
||||
hc: hc,
|
||||
u: u,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Describe adds SLM metrics descriptions
|
||||
func (s *SLM) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- s.slmStatusMetric.Desc
|
||||
|
||||
for _, metric := range s.slmMetrics {
|
||||
ch <- metric.Desc
|
||||
}
|
||||
|
||||
for _, metric := range s.policyMetrics {
|
||||
ch <- metric.Desc
|
||||
}
|
||||
|
||||
ch <- s.up.Desc()
|
||||
ch <- s.totalScrapes.Desc()
|
||||
ch <- s.jsonParseFailures.Desc()
|
||||
// SLMStatsResponse is a representation of the SLM stats
|
||||
type SLMStatsResponse struct {
|
||||
RetentionRuns int64 `json:"retention_runs"`
|
||||
RetentionFailed int64 `json:"retention_failed"`
|
||||
RetentionTimedOut int64 `json:"retention_timed_out"`
|
||||
RetentionDeletionTime string `json:"retention_deletion_time"`
|
||||
RetentionDeletionTimeMillis int64 `json:"retention_deletion_time_millis"`
|
||||
TotalSnapshotsTaken int64 `json:"total_snapshots_taken"`
|
||||
TotalSnapshotsFailed int64 `json:"total_snapshots_failed"`
|
||||
TotalSnapshotsDeleted int64 `json:"total_snapshots_deleted"`
|
||||
TotalSnapshotDeletionFailures int64 `json:"total_snapshot_deletion_failures"`
|
||||
PolicyStats []PolicyStats `json:"policy_stats"`
|
||||
}
|
||||
|
||||
func (s *SLM) fetchAndDecodeSLMStats() (SLMStatsResponse, error) {
|
||||
var ssr SLMStatsResponse
|
||||
|
||||
u := *s.url
|
||||
u.Path = path.Join(u.Path, "/_slm/stats")
|
||||
res, err := s.client.Get(u.String())
|
||||
if err != nil {
|
||||
return ssr, fmt.Errorf("failed to get slm stats health from %s://%s:%s%s: %s",
|
||||
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err = res.Body.Close()
|
||||
if err != nil {
|
||||
_ = level.Warn(s.logger).Log(
|
||||
"msg", "failed to close http.Client",
|
||||
"err", err,
|
||||
)
|
||||
}
|
||||
}()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return ssr, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
|
||||
}
|
||||
|
||||
bts, err := ioutil.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
s.jsonParseFailures.Inc()
|
||||
return ssr, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, &ssr); err != nil {
|
||||
s.jsonParseFailures.Inc()
|
||||
return ssr, err
|
||||
}
|
||||
|
||||
return ssr, nil
|
||||
// PolicyStats is a representation of SLM stats for specific policies
|
||||
type PolicyStats struct {
|
||||
Policy string `json:"policy"`
|
||||
SnapshotsTaken int64 `json:"snapshots_taken"`
|
||||
SnapshotsFailed int64 `json:"snapshots_failed"`
|
||||
SnapshotsDeleted int64 `json:"snapshots_deleted"`
|
||||
SnapshotDeletionFailures int64 `json:"snapshot_deletion_failures"`
|
||||
}
|
||||
|
||||
func (s *SLM) fetchAndDecodeSLMStatus() (SLMStatusResponse, error) {
|
||||
var ssr SLMStatusResponse
|
||||
|
||||
u := *s.url
|
||||
u.Path = path.Join(u.Path, "/_slm/status")
|
||||
res, err := s.client.Get(u.String())
|
||||
if err != nil {
|
||||
return ssr, fmt.Errorf("failed to get slm status from %s://%s:%s%s: %s",
|
||||
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err = res.Body.Close()
|
||||
if err != nil {
|
||||
_ = level.Warn(s.logger).Log(
|
||||
"msg", "failed to close http.Client",
|
||||
"err", err,
|
||||
)
|
||||
}
|
||||
}()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return ssr, fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
|
||||
}
|
||||
|
||||
bts, err := ioutil.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
s.jsonParseFailures.Inc()
|
||||
return ssr, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, &ssr); err != nil {
|
||||
s.jsonParseFailures.Inc()
|
||||
return ssr, err
|
||||
}
|
||||
|
||||
return ssr, nil
|
||||
// SLMStatusResponse is a representation of the SLM status
|
||||
type SLMStatusResponse struct {
|
||||
OperationMode string `json:"operation_mode"`
|
||||
}
|
||||
|
||||
// Collect gets SLM metric values
|
||||
func (s *SLM) Collect(ch chan<- prometheus.Metric) {
|
||||
s.totalScrapes.Inc()
|
||||
defer func() {
|
||||
ch <- s.up
|
||||
ch <- s.totalScrapes
|
||||
ch <- s.jsonParseFailures
|
||||
}()
|
||||
func (s *SLM) Update(ctx context.Context, ch chan<- prometheus.Metric) error {
|
||||
u := s.u.ResolveReference(&url.URL{Path: "/_slm/status"})
|
||||
var slmStatusResp SLMStatusResponse
|
||||
|
||||
slmStatusResp, err := s.fetchAndDecodeSLMStatus()
|
||||
resp, err := getURL(ctx, s.hc, s.logger, u.String())
|
||||
if err != nil {
|
||||
s.up.Set(0)
|
||||
_ = level.Warn(s.logger).Log(
|
||||
"msg", "failed to fetch and decode slm status",
|
||||
"err", err,
|
||||
)
|
||||
return
|
||||
return err
|
||||
}
|
||||
|
||||
slmStatsResp, err := s.fetchAndDecodeSLMStats()
|
||||
err = json.Unmarshal(resp, &slmStatusResp)
|
||||
if err != nil {
|
||||
s.up.Set(0)
|
||||
_ = level.Warn(s.logger).Log(
|
||||
"msg", "failed to fetch and decode slm stats",
|
||||
"err", err,
|
||||
)
|
||||
return
|
||||
return err
|
||||
}
|
||||
|
||||
s.up.Set(1)
|
||||
u = s.u.ResolveReference(&url.URL{Path: "/_slm/stats"})
|
||||
var slmStatsResp SLMStatsResponse
|
||||
|
||||
resp, err = getURL(ctx, s.hc, s.logger, u.String())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = json.Unmarshal(resp, &slmStatsResp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, status := range statuses {
|
||||
var value float64
|
||||
if slmStatusResp.OperationMode == status {
|
||||
value = 1
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
s.slmStatusMetric.Desc,
|
||||
s.slmStatusMetric.Type,
|
||||
s.slmStatusMetric.Value(slmStatusResp, status),
|
||||
slmOperationMode,
|
||||
prometheus.GaugeValue,
|
||||
value,
|
||||
status,
|
||||
)
|
||||
}
|
||||
|
||||
for _, metric := range s.slmMetrics {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
slmRetentionRunsTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(slmStatsResp.RetentionRuns),
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
slmRetentionFailedTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(slmStatsResp.RetentionFailed),
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
slmRetentionTimedOutTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(slmStatsResp.RetentionTimedOut),
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
slmRetentionDeletionTimeSeconds,
|
||||
prometheus.GaugeValue,
|
||||
float64(slmStatsResp.RetentionDeletionTimeMillis)/1000,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
slmTotalSnapshotsTaken,
|
||||
prometheus.CounterValue,
|
||||
float64(slmStatsResp.TotalSnapshotsTaken),
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
slmTotalSnapshotsFailed,
|
||||
prometheus.CounterValue,
|
||||
float64(slmStatsResp.TotalSnapshotsFailed),
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
slmTotalSnapshotsDeleted,
|
||||
prometheus.CounterValue,
|
||||
float64(slmStatsResp.TotalSnapshotsDeleted),
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
slmTotalSnapshotsDeleteFailed,
|
||||
prometheus.CounterValue,
|
||||
float64(slmStatsResp.TotalSnapshotDeletionFailures),
|
||||
)
|
||||
|
||||
for _, policy := range slmStatsResp.PolicyStats {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
metric.Desc,
|
||||
metric.Type,
|
||||
metric.Value(slmStatsResp),
|
||||
slmSnapshotsTaken,
|
||||
prometheus.CounterValue,
|
||||
float64(policy.SnapshotsTaken),
|
||||
policy.Policy,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
slmSnapshotsFailed,
|
||||
prometheus.CounterValue,
|
||||
float64(policy.SnapshotsFailed),
|
||||
policy.Policy,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
slmSnapshotsDeleted,
|
||||
prometheus.CounterValue,
|
||||
float64(policy.SnapshotsDeleted),
|
||||
policy.Policy,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
slmSnapshotsDeletionFailure,
|
||||
prometheus.CounterValue,
|
||||
float64(policy.SnapshotDeletionFailures),
|
||||
policy.Policy,
|
||||
)
|
||||
}
|
||||
|
||||
for _, metric := range s.policyMetrics {
|
||||
for _, policy := range slmStatsResp.PolicyStats {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
metric.Desc,
|
||||
metric.Type,
|
||||
metric.Value(policy),
|
||||
metric.Labels(policy)...,
|
||||
)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -1,42 +0,0 @@
|
||||
// Copyright 2022 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
// SLMStatsResponse is a representation of the SLM stats
|
||||
type SLMStatsResponse struct {
|
||||
RetentionRuns int64 `json:"retention_runs"`
|
||||
RetentionFailed int64 `json:"retention_failed"`
|
||||
RetentionTimedOut int64 `json:"retention_timed_out"`
|
||||
RetentionDeletionTime string `json:"retention_deletion_time"`
|
||||
RetentionDeletionTimeMillis int64 `json:"retention_deletion_time_millis"`
|
||||
TotalSnapshotsTaken int64 `json:"total_snapshots_taken"`
|
||||
TotalSnapshotsFailed int64 `json:"total_snapshots_failed"`
|
||||
TotalSnapshotsDeleted int64 `json:"total_snapshots_deleted"`
|
||||
TotalSnapshotDeletionFailures int64 `json:"total_snapshot_deletion_failures"`
|
||||
PolicyStats []PolicyStats `json:"policy_stats"`
|
||||
}
|
||||
|
||||
// PolicyStats is a representation of SLM stats for specific policies
|
||||
type PolicyStats struct {
|
||||
Policy string `json:"policy"`
|
||||
SnapshotsTaken int64 `json:"snapshots_taken"`
|
||||
SnapshotsFailed int64 `json:"snapshots_failed"`
|
||||
SnapshotsDeleted int64 `json:"snapshots_deleted"`
|
||||
SnapshotDeletionFailures int64 `json:"snapshot_deletion_failures"`
|
||||
}
|
||||
|
||||
// SLMStatusResponse is a representation of the SLM status
|
||||
type SLMStatusResponse struct {
|
||||
OperationMode string `json:"operation_mode"`
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2022 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -14,13 +14,17 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/promslog"
|
||||
)
|
||||
|
||||
func TestSLM(t *testing.T) {
|
||||
@ -31,35 +35,102 @@ func TestSLM(t *testing.T) {
|
||||
// curl -XPUT http://127.0.0.1:9200/_slm/policy/everything -H 'Content-Type: application/json' -d '{"schedule":"0 */15 * * * ?","name":"<everything-{now/d}>","repository":"my_repository","config":{"indices":".*","include_global_state":true,"ignore_unavailable":true},"retention":{"expire_after":"7d"}}'
|
||||
// curl http://127.0.0.1:9200/_slm/stats (Numbers manually tweaked)
|
||||
|
||||
tcs := map[string]string{
|
||||
"7.15.0": `{"retention_runs":9,"retention_failed":0,"retention_timed_out":0,"retention_deletion_time":"1.2m","retention_deletion_time_millis":72491,"total_snapshots_taken":103,"total_snapshots_failed":2,"total_snapshots_deleted":20,"total_snapshot_deletion_failures":0,"policy_stats":[{"policy":"everything","snapshots_taken":50,"snapshots_failed":2,"snapshots_deleted":20,"snapshot_deletion_failures":0}]}`,
|
||||
}
|
||||
for ver, out := range tcs {
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
fmt.Fprintln(w, out)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse URL: %s", err)
|
||||
}
|
||||
s := NewSLM(log.NewNopLogger(), http.DefaultClient, u)
|
||||
stats, err := s.fetchAndDecodeSLMStats()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to fetch or decode snapshots stats: %s", err)
|
||||
}
|
||||
t.Logf("[%s] SLM Response: %+v", ver, stats)
|
||||
slmStats := stats
|
||||
policyStats := stats.PolicyStats[0]
|
||||
|
||||
if slmStats.TotalSnapshotsTaken != 103 {
|
||||
t.Errorf("Bad number of total snapshots taken")
|
||||
}
|
||||
|
||||
if policyStats.SnapshotsTaken != 50 {
|
||||
t.Errorf("Bad number of policy snapshots taken")
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
file string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "7.15.0",
|
||||
file: "7.15.0.json",
|
||||
want: `# HELP elasticsearch_slm_stats_operation_mode Operating status of SLM
|
||||
# TYPE elasticsearch_slm_stats_operation_mode gauge
|
||||
elasticsearch_slm_stats_operation_mode{operation_mode="RUNNING"} 0
|
||||
elasticsearch_slm_stats_operation_mode{operation_mode="STOPPED"} 0
|
||||
elasticsearch_slm_stats_operation_mode{operation_mode="STOPPING"} 0
|
||||
# HELP elasticsearch_slm_stats_retention_deletion_time_seconds Retention run deletion time
|
||||
# TYPE elasticsearch_slm_stats_retention_deletion_time_seconds gauge
|
||||
elasticsearch_slm_stats_retention_deletion_time_seconds 72.491
|
||||
# HELP elasticsearch_slm_stats_retention_failed_total Total failed retention runs
|
||||
# TYPE elasticsearch_slm_stats_retention_failed_total counter
|
||||
elasticsearch_slm_stats_retention_failed_total 0
|
||||
# HELP elasticsearch_slm_stats_retention_runs_total Total retention runs
|
||||
# TYPE elasticsearch_slm_stats_retention_runs_total counter
|
||||
elasticsearch_slm_stats_retention_runs_total 9
|
||||
# HELP elasticsearch_slm_stats_retention_timed_out_total Total timed out retention runs
|
||||
# TYPE elasticsearch_slm_stats_retention_timed_out_total counter
|
||||
elasticsearch_slm_stats_retention_timed_out_total 0
|
||||
# HELP elasticsearch_slm_stats_snapshot_deletion_failures_total Total snapshot deletion failures
|
||||
# TYPE elasticsearch_slm_stats_snapshot_deletion_failures_total counter
|
||||
elasticsearch_slm_stats_snapshot_deletion_failures_total{policy="everything"} 0
|
||||
# HELP elasticsearch_slm_stats_snapshots_deleted_total Total snapshots deleted
|
||||
# TYPE elasticsearch_slm_stats_snapshots_deleted_total counter
|
||||
elasticsearch_slm_stats_snapshots_deleted_total{policy="everything"} 20
|
||||
# HELP elasticsearch_slm_stats_snapshots_failed_total Total snapshots failed
|
||||
# TYPE elasticsearch_slm_stats_snapshots_failed_total counter
|
||||
elasticsearch_slm_stats_snapshots_failed_total{policy="everything"} 2
|
||||
# HELP elasticsearch_slm_stats_snapshots_taken_total Total snapshots taken
|
||||
# TYPE elasticsearch_slm_stats_snapshots_taken_total counter
|
||||
elasticsearch_slm_stats_snapshots_taken_total{policy="everything"} 50
|
||||
# HELP elasticsearch_slm_stats_total_snapshot_deletion_failures_total Total snapshot deletion failures
|
||||
# TYPE elasticsearch_slm_stats_total_snapshot_deletion_failures_total counter
|
||||
elasticsearch_slm_stats_total_snapshot_deletion_failures_total 0
|
||||
# HELP elasticsearch_slm_stats_total_snapshots_deleted_total Total snapshots deleted
|
||||
# TYPE elasticsearch_slm_stats_total_snapshots_deleted_total counter
|
||||
elasticsearch_slm_stats_total_snapshots_deleted_total 20
|
||||
# HELP elasticsearch_slm_stats_total_snapshots_failed_total Total snapshots failed
|
||||
# TYPE elasticsearch_slm_stats_total_snapshots_failed_total counter
|
||||
elasticsearch_slm_stats_total_snapshots_failed_total 2
|
||||
# HELP elasticsearch_slm_stats_total_snapshots_taken_total Total snapshots taken
|
||||
# TYPE elasticsearch_slm_stats_total_snapshots_taken_total counter
|
||||
elasticsearch_slm_stats_total_snapshots_taken_total 103
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
fStatsPath := path.Join("../fixtures/slm/stats/", tt.file)
|
||||
fStats, err := os.Open(fStatsPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fStats.Close()
|
||||
|
||||
fStatusPath := path.Join("../fixtures/slm/status/", tt.file)
|
||||
fStatus, err := os.Open(fStatusPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fStatus.Close()
|
||||
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.RequestURI {
|
||||
case "/_slm/stats":
|
||||
io.Copy(w, fStats)
|
||||
return
|
||||
case "/_slm/status":
|
||||
io.Copy(w, fStatus)
|
||||
return
|
||||
}
|
||||
|
||||
http.Error(w, "Not Found", http.StatusNotFound)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse URL: %s", err)
|
||||
}
|
||||
|
||||
s, err := NewSLM(promslog.NewNopLogger(), u, http.DefaultClient)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := testutil.CollectAndCompare(wrapCollector{s}, strings.NewReader(tt.want)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -14,32 +14,17 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/go-kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
type snapshotMetric struct {
|
||||
Type prometheus.ValueType
|
||||
Desc *prometheus.Desc
|
||||
Value func(snapshotStats SnapshotStatDataResponse) float64
|
||||
Labels func(repositoryName string, snapshotStats SnapshotStatDataResponse) []string
|
||||
}
|
||||
|
||||
type repositoryMetric struct {
|
||||
Type prometheus.ValueType
|
||||
Desc *prometheus.Desc
|
||||
Value func(snapshotsStats SnapshotStatsResponse) float64
|
||||
Labels func(repositoryName string) []string
|
||||
}
|
||||
|
||||
var (
|
||||
defaultSnapshotLabels = []string{"repository", "state", "version"}
|
||||
defaultSnapshotLabelValues = func(repositoryName string, snapshotStats SnapshotStatDataResponse) []string {
|
||||
@ -51,285 +36,194 @@ var (
|
||||
}
|
||||
)
|
||||
|
||||
var (
|
||||
numIndices = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_number_of_indices"),
|
||||
"Number of indices in the last snapshot",
|
||||
defaultSnapshotLabels, nil,
|
||||
)
|
||||
snapshotStartTimestamp = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_start_time_timestamp"),
|
||||
"Last snapshot start timestamp",
|
||||
defaultSnapshotLabels, nil,
|
||||
)
|
||||
snapshotEndTimestamp = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_end_time_timestamp"),
|
||||
"Last snapshot end timestamp",
|
||||
defaultSnapshotLabels, nil,
|
||||
)
|
||||
snapshotNumFailures = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_number_of_failures"),
|
||||
"Last snapshot number of failures",
|
||||
defaultSnapshotLabels, nil,
|
||||
)
|
||||
snapshotNumShards = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_total_shards"),
|
||||
"Last snapshot total shards",
|
||||
defaultSnapshotLabels, nil,
|
||||
)
|
||||
snapshotFailedShards = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_failed_shards"),
|
||||
"Last snapshot failed shards",
|
||||
defaultSnapshotLabels, nil,
|
||||
)
|
||||
snapshotSuccessfulShards = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_successful_shards"),
|
||||
"Last snapshot successful shards",
|
||||
defaultSnapshotLabels, nil,
|
||||
)
|
||||
|
||||
numSnapshots = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "number_of_snapshots"),
|
||||
"Number of snapshots in a repository",
|
||||
defaultSnapshotRepositoryLabels, nil,
|
||||
)
|
||||
oldestSnapshotTimestamp = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "oldest_snapshot_timestamp"),
|
||||
"Timestamp of the oldest snapshot",
|
||||
defaultSnapshotRepositoryLabels, nil,
|
||||
)
|
||||
latestSnapshotTimestamp = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "latest_snapshot_timestamp_seconds"),
|
||||
"Timestamp of the latest SUCCESS or PARTIAL snapshot",
|
||||
defaultSnapshotRepositoryLabels, nil,
|
||||
)
|
||||
)
|
||||
|
||||
func init() {
|
||||
registerCollector("snapshots", defaultDisabled, NewSnapshots)
|
||||
}
|
||||
|
||||
// Snapshots information struct
|
||||
type Snapshots struct {
|
||||
logger log.Logger
|
||||
client *http.Client
|
||||
url *url.URL
|
||||
|
||||
up prometheus.Gauge
|
||||
totalScrapes, jsonParseFailures prometheus.Counter
|
||||
|
||||
snapshotMetrics []*snapshotMetric
|
||||
repositoryMetrics []*repositoryMetric
|
||||
logger *slog.Logger
|
||||
hc *http.Client
|
||||
u *url.URL
|
||||
}
|
||||
|
||||
// NewSnapshots defines Snapshots Prometheus metrics
|
||||
func NewSnapshots(logger log.Logger, client *http.Client, url *url.URL) *Snapshots {
|
||||
func NewSnapshots(logger *slog.Logger, u *url.URL, hc *http.Client) (Collector, error) {
|
||||
return &Snapshots{
|
||||
logger: logger,
|
||||
client: client,
|
||||
url: url,
|
||||
|
||||
up: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "snapshot_stats", "up"),
|
||||
Help: "Was the last scrape of the Elasticsearch snapshots endpoint successful.",
|
||||
}),
|
||||
totalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "snapshot_stats", "total_scrapes"),
|
||||
Help: "Current total Elasticsearch snapshots scrapes.",
|
||||
}),
|
||||
jsonParseFailures: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: prometheus.BuildFQName(namespace, "snapshot_stats", "json_parse_failures"),
|
||||
Help: "Number of errors while parsing JSON.",
|
||||
}),
|
||||
snapshotMetrics: []*snapshotMetric{
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_number_of_indices"),
|
||||
"Number of indices in the last snapshot",
|
||||
defaultSnapshotLabels, nil,
|
||||
),
|
||||
Value: func(snapshotStats SnapshotStatDataResponse) float64 {
|
||||
return float64(len(snapshotStats.Indices))
|
||||
},
|
||||
Labels: defaultSnapshotLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_start_time_timestamp"),
|
||||
"Last snapshot start timestamp",
|
||||
defaultSnapshotLabels, nil,
|
||||
),
|
||||
Value: func(snapshotStats SnapshotStatDataResponse) float64 {
|
||||
return float64(snapshotStats.StartTimeInMillis / 1000)
|
||||
},
|
||||
Labels: defaultSnapshotLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_end_time_timestamp"),
|
||||
"Last snapshot end timestamp",
|
||||
defaultSnapshotLabels, nil,
|
||||
),
|
||||
Value: func(snapshotStats SnapshotStatDataResponse) float64 {
|
||||
return float64(snapshotStats.EndTimeInMillis / 1000)
|
||||
},
|
||||
Labels: defaultSnapshotLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_number_of_failures"),
|
||||
"Last snapshot number of failures",
|
||||
defaultSnapshotLabels, nil,
|
||||
),
|
||||
Value: func(snapshotStats SnapshotStatDataResponse) float64 {
|
||||
return float64(len(snapshotStats.Failures))
|
||||
},
|
||||
Labels: defaultSnapshotLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_total_shards"),
|
||||
"Last snapshot total shards",
|
||||
defaultSnapshotLabels, nil,
|
||||
),
|
||||
Value: func(snapshotStats SnapshotStatDataResponse) float64 {
|
||||
return float64(snapshotStats.Shards.Total)
|
||||
},
|
||||
Labels: defaultSnapshotLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_failed_shards"),
|
||||
"Last snapshot failed shards",
|
||||
defaultSnapshotLabels, nil,
|
||||
),
|
||||
Value: func(snapshotStats SnapshotStatDataResponse) float64 {
|
||||
return float64(snapshotStats.Shards.Failed)
|
||||
},
|
||||
Labels: defaultSnapshotLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "snapshot_successful_shards"),
|
||||
"Last snapshot successful shards",
|
||||
defaultSnapshotLabels, nil,
|
||||
),
|
||||
Value: func(snapshotStats SnapshotStatDataResponse) float64 {
|
||||
return float64(snapshotStats.Shards.Successful)
|
||||
},
|
||||
Labels: defaultSnapshotLabelValues,
|
||||
},
|
||||
},
|
||||
repositoryMetrics: []*repositoryMetric{
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "number_of_snapshots"),
|
||||
"Number of snapshots in a repository",
|
||||
defaultSnapshotRepositoryLabels, nil,
|
||||
),
|
||||
Value: func(snapshotsStats SnapshotStatsResponse) float64 {
|
||||
return float64(len(snapshotsStats.Snapshots))
|
||||
},
|
||||
Labels: defaultSnapshotRepositoryLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "oldest_snapshot_timestamp"),
|
||||
"Timestamp of the oldest snapshot",
|
||||
defaultSnapshotRepositoryLabels, nil,
|
||||
),
|
||||
Value: func(snapshotsStats SnapshotStatsResponse) float64 {
|
||||
if len(snapshotsStats.Snapshots) == 0 {
|
||||
return 0
|
||||
}
|
||||
return float64(snapshotsStats.Snapshots[0].StartTimeInMillis / 1000)
|
||||
},
|
||||
Labels: defaultSnapshotRepositoryLabelValues,
|
||||
},
|
||||
{
|
||||
Type: prometheus.GaugeValue,
|
||||
Desc: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "snapshot_stats", "latest_snapshot_timestamp_seconds"),
|
||||
"Timestamp of the latest SUCCESS or PARTIAL snapshot",
|
||||
defaultSnapshotRepositoryLabels, nil,
|
||||
),
|
||||
Value: func(snapshotsStats SnapshotStatsResponse) float64 {
|
||||
for i := len(snapshotsStats.Snapshots) - 1; i >= 0; i-- {
|
||||
var snap = snapshotsStats.Snapshots[i]
|
||||
if snap.State == "SUCCESS" || snap.State == "PARTIAL" {
|
||||
return float64(snap.StartTimeInMillis / 1000)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
},
|
||||
Labels: defaultSnapshotRepositoryLabelValues,
|
||||
},
|
||||
},
|
||||
}
|
||||
u: u,
|
||||
hc: hc,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Describe add Snapshots metrics descriptions
|
||||
func (s *Snapshots) Describe(ch chan<- *prometheus.Desc) {
|
||||
for _, metric := range s.snapshotMetrics {
|
||||
ch <- metric.Desc
|
||||
}
|
||||
ch <- s.up.Desc()
|
||||
ch <- s.totalScrapes.Desc()
|
||||
ch <- s.jsonParseFailures.Desc()
|
||||
}
|
||||
func (c *Snapshots) Update(ctx context.Context, ch chan<- prometheus.Metric) error {
|
||||
// indices
|
||||
snapshotsStatsResp := make(map[string]SnapshotStatsResponse)
|
||||
u := c.u.ResolveReference(&url.URL{Path: "/_snapshot"})
|
||||
|
||||
func (s *Snapshots) getAndParseURL(u *url.URL, data interface{}) error {
|
||||
res, err := s.client.Get(u.String())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get from %s://%s:%s%s: %s",
|
||||
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err = res.Body.Close()
|
||||
if err != nil {
|
||||
_ = level.Warn(s.logger).Log(
|
||||
"msg", "failed to close http.Client",
|
||||
"err", err,
|
||||
)
|
||||
}
|
||||
}()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("HTTP Request failed with code %d", res.StatusCode)
|
||||
}
|
||||
|
||||
bts, err := ioutil.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
s.jsonParseFailures.Inc()
|
||||
return err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bts, data); err != nil {
|
||||
s.jsonParseFailures.Inc()
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Snapshots) fetchAndDecodeSnapshotsStats() (map[string]SnapshotStatsResponse, error) {
|
||||
mssr := make(map[string]SnapshotStatsResponse)
|
||||
|
||||
u := *s.url
|
||||
u.Path = path.Join(u.Path, "/_snapshot")
|
||||
var srr SnapshotRepositoriesResponse
|
||||
err := s.getAndParseURL(&u, &srr)
|
||||
resp, err := getURL(ctx, c.hc, c.logger, u.String())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return err
|
||||
}
|
||||
|
||||
err = json.Unmarshal(resp, &srr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to unmarshal JSON: %v", err)
|
||||
}
|
||||
|
||||
for repository := range srr {
|
||||
u := *s.url
|
||||
u.Path = path.Join(u.Path, "/_snapshot", repository, "/_all")
|
||||
pathPart := path.Join("/_snapshot", repository, "/_all")
|
||||
u := c.u.ResolveReference(&url.URL{Path: pathPart})
|
||||
var ssr SnapshotStatsResponse
|
||||
err := s.getAndParseURL(&u, &ssr)
|
||||
resp, err := getURL(ctx, c.hc, c.logger, u.String())
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
mssr[repository] = ssr
|
||||
err = json.Unmarshal(resp, &ssr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to unmarshal JSON: %v", err)
|
||||
}
|
||||
snapshotsStatsResp[repository] = ssr
|
||||
}
|
||||
|
||||
return mssr, nil
|
||||
}
|
||||
|
||||
// Collect gets Snapshots metric values
|
||||
func (s *Snapshots) Collect(ch chan<- prometheus.Metric) {
|
||||
s.totalScrapes.Inc()
|
||||
defer func() {
|
||||
ch <- s.up
|
||||
ch <- s.totalScrapes
|
||||
ch <- s.jsonParseFailures
|
||||
}()
|
||||
|
||||
// indices
|
||||
snapshotsStatsResp, err := s.fetchAndDecodeSnapshotsStats()
|
||||
if err != nil {
|
||||
s.up.Set(0)
|
||||
_ = level.Warn(s.logger).Log(
|
||||
"msg", "failed to fetch and decode snapshot stats",
|
||||
"err", err,
|
||||
)
|
||||
return
|
||||
}
|
||||
s.up.Set(1)
|
||||
|
||||
// Snapshots stats
|
||||
for repositoryName, snapshotStats := range snapshotsStatsResp {
|
||||
for _, metric := range s.repositoryMetrics {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
metric.Desc,
|
||||
metric.Type,
|
||||
metric.Value(snapshotStats),
|
||||
metric.Labels(repositoryName)...,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
numSnapshots,
|
||||
prometheus.GaugeValue,
|
||||
float64(len(snapshotStats.Snapshots)),
|
||||
defaultSnapshotRepositoryLabelValues(repositoryName)...,
|
||||
)
|
||||
|
||||
oldest := float64(0)
|
||||
if len(snapshotStats.Snapshots) > 0 {
|
||||
oldest = float64(snapshotStats.Snapshots[0].StartTimeInMillis / 1000)
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
oldestSnapshotTimestamp,
|
||||
prometheus.GaugeValue,
|
||||
oldest,
|
||||
defaultSnapshotRepositoryLabelValues(repositoryName)...,
|
||||
)
|
||||
|
||||
latest := float64(0)
|
||||
for i := len(snapshotStats.Snapshots) - 1; i >= 0; i-- {
|
||||
snap := snapshotStats.Snapshots[i]
|
||||
if snap.State == "SUCCESS" || snap.State == "PARTIAL" {
|
||||
latest = float64(snap.StartTimeInMillis / 1000)
|
||||
break
|
||||
}
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
latestSnapshotTimestamp,
|
||||
prometheus.GaugeValue,
|
||||
latest,
|
||||
defaultSnapshotRepositoryLabelValues(repositoryName)...,
|
||||
)
|
||||
|
||||
if len(snapshotStats.Snapshots) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
lastSnapshot := snapshotStats.Snapshots[len(snapshotStats.Snapshots)-1]
|
||||
for _, metric := range s.snapshotMetrics {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
metric.Desc,
|
||||
metric.Type,
|
||||
metric.Value(lastSnapshot),
|
||||
metric.Labels(repositoryName, lastSnapshot)...,
|
||||
)
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
numIndices,
|
||||
prometheus.GaugeValue,
|
||||
float64(len(lastSnapshot.Indices)),
|
||||
defaultSnapshotLabelValues(repositoryName, lastSnapshot)...,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
snapshotStartTimestamp,
|
||||
prometheus.GaugeValue,
|
||||
float64(lastSnapshot.StartTimeInMillis/1000),
|
||||
defaultSnapshotLabelValues(repositoryName, lastSnapshot)...,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
snapshotEndTimestamp,
|
||||
prometheus.GaugeValue,
|
||||
float64(lastSnapshot.EndTimeInMillis/1000),
|
||||
defaultSnapshotLabelValues(repositoryName, lastSnapshot)...,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
snapshotNumFailures,
|
||||
prometheus.GaugeValue,
|
||||
float64(len(lastSnapshot.Failures)),
|
||||
defaultSnapshotLabelValues(repositoryName, lastSnapshot)...,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
snapshotNumShards,
|
||||
prometheus.GaugeValue,
|
||||
float64(lastSnapshot.Shards.Total),
|
||||
defaultSnapshotLabelValues(repositoryName, lastSnapshot)...,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
snapshotFailedShards,
|
||||
prometheus.GaugeValue,
|
||||
float64(lastSnapshot.Shards.Failed),
|
||||
defaultSnapshotLabelValues(repositoryName, lastSnapshot)...,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
snapshotSuccessfulShards,
|
||||
prometheus.GaugeValue,
|
||||
float64(lastSnapshot.Shards.Successful),
|
||||
defaultSnapshotLabelValues(repositoryName, lastSnapshot)...,
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright 2021 The Prometheus Authors
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@ -15,12 +15,16 @@ package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/go-kit/log"
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/promslog"
|
||||
)
|
||||
|
||||
func TestSnapshots(t *testing.T) {
|
||||
@ -37,50 +41,182 @@ func TestSnapshots(t *testing.T) {
|
||||
// curl http://localhost:9200/_snapshot/
|
||||
// curl http://localhost:9200/_snapshot/test1/_all
|
||||
|
||||
tcs := map[string][]string{
|
||||
"1.7.6": {`{"test1":{"type":"fs","settings":{"location":"/tmp/test1"}}}`, `{"snapshots":[{"snapshot":"snapshot_1","version_id":1070699,"version":"1.7.6","indices":["foo_1","foo_2"],"state":"SUCCESS","start_time":"2018-09-04T09:09:02.427Z","start_time_in_millis":1536052142427,"end_time":"2018-09-04T09:09:02.755Z","end_time_in_millis":1536052142755,"duration_in_millis":328,"failures":[],"shards":{"total":10,"failed":0,"successful":10}}]}`},
|
||||
"2.4.5": {`{"test1":{"type":"fs","settings":{"location":"/tmp/test1"}}}`, `{"snapshots":[{"snapshot":"snapshot_1","version_id":2040599,"version":"2.4.5","indices":["foo_2","foo_1"],"state":"SUCCESS","start_time":"2018-09-04T09:25:25.818Z","start_time_in_millis":1536053125818,"end_time":"2018-09-04T09:25:26.326Z","end_time_in_millis":1536053126326,"duration_in_millis":508,"failures":[],"shards":{"total":10,"failed":0,"successful":10}}]}`},
|
||||
"5.4.2": {`{"test1":{"type":"fs","settings":{"location":"/tmp/test1"}}}`, `{"snapshots":[{"snapshot":"snapshot_1","uuid":"VZ_c_kKISAW8rpcqiwSg0w","version_id":5040299,"version":"5.4.2","indices":["foo_2","foo_1"],"state":"SUCCESS","start_time":"2018-09-04T09:29:13.971Z","start_time_in_millis":1536053353971,"end_time":"2018-09-04T09:29:14.477Z","end_time_in_millis":1536053354477,"duration_in_millis":506,"failures":[],"shards":{"total":10,"failed":0,"successful":10}}]}`},
|
||||
"5.4.2-failed": {`{"test1":{"type":"fs","settings":{"location":"/tmp/test1"}}}`, `{"snapshots":[{"snapshot":"snapshot_1","uuid":"VZ_c_kKISAW8rpcqiwSg0w","version_id":5040299,"version":"5.4.2","indices":["foo_2","foo_1"],"state":"SUCCESS","start_time":"2018-09-04T09:29:13.971Z","start_time_in_millis":1536053353971,"end_time":"2018-09-04T09:29:14.477Z","end_time_in_millis":1536053354477,"duration_in_millis":506,"failures":[{"index" : "index_name","index_uuid" : "index_name","shard_id" : 52,"reason" : "IndexShardSnapshotFailedException[error deleting index file [pending-index-5] during cleanup]; nested: NoSuchFileException[Blob [pending-index-5] does not exist]; ","node_id" : "pPm9jafyTjyMk0T5A101xA","status" : "INTERNAL_SERVER_ERROR"}],"shards":{"total":10,"failed":1,"successful":10}}]}`},
|
||||
tests := []struct {
|
||||
name string
|
||||
file string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "1.7.6",
|
||||
file: "../fixtures/snapshots/1.7.6.json",
|
||||
want: `# HELP elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds Timestamp of the latest SUCCESS or PARTIAL snapshot
|
||||
# TYPE elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds gauge
|
||||
elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds{repository="test1"} 1.536052142e+09
|
||||
# HELP elasticsearch_snapshot_stats_number_of_snapshots Number of snapshots in a repository
|
||||
# TYPE elasticsearch_snapshot_stats_number_of_snapshots gauge
|
||||
elasticsearch_snapshot_stats_number_of_snapshots{repository="test1"} 1
|
||||
# HELP elasticsearch_snapshot_stats_oldest_snapshot_timestamp Timestamp of the oldest snapshot
|
||||
# TYPE elasticsearch_snapshot_stats_oldest_snapshot_timestamp gauge
|
||||
elasticsearch_snapshot_stats_oldest_snapshot_timestamp{repository="test1"} 1.536052142e+09
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_end_time_timestamp Last snapshot end timestamp
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_end_time_timestamp gauge
|
||||
elasticsearch_snapshot_stats_snapshot_end_time_timestamp{repository="test1",state="SUCCESS",version="1.7.6"} 1.536052142e+09
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_failed_shards Last snapshot failed shards
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_failed_shards gauge
|
||||
elasticsearch_snapshot_stats_snapshot_failed_shards{repository="test1",state="SUCCESS",version="1.7.6"} 0
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_number_of_failures Last snapshot number of failures
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_number_of_failures gauge
|
||||
elasticsearch_snapshot_stats_snapshot_number_of_failures{repository="test1",state="SUCCESS",version="1.7.6"} 0
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_number_of_indices Number of indices in the last snapshot
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_number_of_indices gauge
|
||||
elasticsearch_snapshot_stats_snapshot_number_of_indices{repository="test1",state="SUCCESS",version="1.7.6"} 2
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_start_time_timestamp Last snapshot start timestamp
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_start_time_timestamp gauge
|
||||
elasticsearch_snapshot_stats_snapshot_start_time_timestamp{repository="test1",state="SUCCESS",version="1.7.6"} 1.536052142e+09
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_successful_shards Last snapshot successful shards
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_successful_shards gauge
|
||||
elasticsearch_snapshot_stats_snapshot_successful_shards{repository="test1",state="SUCCESS",version="1.7.6"} 10
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_total_shards Last snapshot total shards
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_total_shards gauge
|
||||
elasticsearch_snapshot_stats_snapshot_total_shards{repository="test1",state="SUCCESS",version="1.7.6"} 10
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "2.4.5",
|
||||
file: "../fixtures/snapshots/2.4.5.json",
|
||||
want: `# HELP elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds Timestamp of the latest SUCCESS or PARTIAL snapshot
|
||||
# TYPE elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds gauge
|
||||
elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds{repository="test1"} 1.536053125e+09
|
||||
# HELP elasticsearch_snapshot_stats_number_of_snapshots Number of snapshots in a repository
|
||||
# TYPE elasticsearch_snapshot_stats_number_of_snapshots gauge
|
||||
elasticsearch_snapshot_stats_number_of_snapshots{repository="test1"} 1
|
||||
# HELP elasticsearch_snapshot_stats_oldest_snapshot_timestamp Timestamp of the oldest snapshot
|
||||
# TYPE elasticsearch_snapshot_stats_oldest_snapshot_timestamp gauge
|
||||
elasticsearch_snapshot_stats_oldest_snapshot_timestamp{repository="test1"} 1.536053125e+09
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_end_time_timestamp Last snapshot end timestamp
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_end_time_timestamp gauge
|
||||
elasticsearch_snapshot_stats_snapshot_end_time_timestamp{repository="test1",state="SUCCESS",version="2.4.5"} 1.536053126e+09
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_failed_shards Last snapshot failed shards
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_failed_shards gauge
|
||||
elasticsearch_snapshot_stats_snapshot_failed_shards{repository="test1",state="SUCCESS",version="2.4.5"} 0
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_number_of_failures Last snapshot number of failures
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_number_of_failures gauge
|
||||
elasticsearch_snapshot_stats_snapshot_number_of_failures{repository="test1",state="SUCCESS",version="2.4.5"} 0
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_number_of_indices Number of indices in the last snapshot
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_number_of_indices gauge
|
||||
elasticsearch_snapshot_stats_snapshot_number_of_indices{repository="test1",state="SUCCESS",version="2.4.5"} 2
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_start_time_timestamp Last snapshot start timestamp
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_start_time_timestamp gauge
|
||||
elasticsearch_snapshot_stats_snapshot_start_time_timestamp{repository="test1",state="SUCCESS",version="2.4.5"} 1.536053125e+09
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_successful_shards Last snapshot successful shards
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_successful_shards gauge
|
||||
elasticsearch_snapshot_stats_snapshot_successful_shards{repository="test1",state="SUCCESS",version="2.4.5"} 10
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_total_shards Last snapshot total shards
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_total_shards gauge
|
||||
elasticsearch_snapshot_stats_snapshot_total_shards{repository="test1",state="SUCCESS",version="2.4.5"} 10
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "5.4.2",
|
||||
file: "../fixtures/snapshots/5.4.2.json",
|
||||
want: `# HELP elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds Timestamp of the latest SUCCESS or PARTIAL snapshot
|
||||
# TYPE elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds gauge
|
||||
elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds{repository="test1"} 1.536053353e+09
|
||||
# HELP elasticsearch_snapshot_stats_number_of_snapshots Number of snapshots in a repository
|
||||
# TYPE elasticsearch_snapshot_stats_number_of_snapshots gauge
|
||||
elasticsearch_snapshot_stats_number_of_snapshots{repository="test1"} 1
|
||||
# HELP elasticsearch_snapshot_stats_oldest_snapshot_timestamp Timestamp of the oldest snapshot
|
||||
# TYPE elasticsearch_snapshot_stats_oldest_snapshot_timestamp gauge
|
||||
elasticsearch_snapshot_stats_oldest_snapshot_timestamp{repository="test1"} 1.536053353e+09
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_end_time_timestamp Last snapshot end timestamp
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_end_time_timestamp gauge
|
||||
elasticsearch_snapshot_stats_snapshot_end_time_timestamp{repository="test1",state="SUCCESS",version="5.4.2"} 1.536053354e+09
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_failed_shards Last snapshot failed shards
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_failed_shards gauge
|
||||
elasticsearch_snapshot_stats_snapshot_failed_shards{repository="test1",state="SUCCESS",version="5.4.2"} 0
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_number_of_failures Last snapshot number of failures
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_number_of_failures gauge
|
||||
elasticsearch_snapshot_stats_snapshot_number_of_failures{repository="test1",state="SUCCESS",version="5.4.2"} 0
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_number_of_indices Number of indices in the last snapshot
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_number_of_indices gauge
|
||||
elasticsearch_snapshot_stats_snapshot_number_of_indices{repository="test1",state="SUCCESS",version="5.4.2"} 2
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_start_time_timestamp Last snapshot start timestamp
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_start_time_timestamp gauge
|
||||
elasticsearch_snapshot_stats_snapshot_start_time_timestamp{repository="test1",state="SUCCESS",version="5.4.2"} 1.536053353e+09
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_successful_shards Last snapshot successful shards
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_successful_shards gauge
|
||||
elasticsearch_snapshot_stats_snapshot_successful_shards{repository="test1",state="SUCCESS",version="5.4.2"} 10
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_total_shards Last snapshot total shards
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_total_shards gauge
|
||||
elasticsearch_snapshot_stats_snapshot_total_shards{repository="test1",state="SUCCESS",version="5.4.2"} 10
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "5.4.2-failure",
|
||||
file: "../fixtures/snapshots/5.4.2-failed.json",
|
||||
want: `# HELP elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds Timestamp of the latest SUCCESS or PARTIAL snapshot
|
||||
# TYPE elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds gauge
|
||||
elasticsearch_snapshot_stats_latest_snapshot_timestamp_seconds{repository="test1"} 1.536053353e+09
|
||||
# HELP elasticsearch_snapshot_stats_number_of_snapshots Number of snapshots in a repository
|
||||
# TYPE elasticsearch_snapshot_stats_number_of_snapshots gauge
|
||||
elasticsearch_snapshot_stats_number_of_snapshots{repository="test1"} 1
|
||||
# HELP elasticsearch_snapshot_stats_oldest_snapshot_timestamp Timestamp of the oldest snapshot
|
||||
# TYPE elasticsearch_snapshot_stats_oldest_snapshot_timestamp gauge
|
||||
elasticsearch_snapshot_stats_oldest_snapshot_timestamp{repository="test1"} 1.536053353e+09
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_end_time_timestamp Last snapshot end timestamp
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_end_time_timestamp gauge
|
||||
elasticsearch_snapshot_stats_snapshot_end_time_timestamp{repository="test1",state="SUCCESS",version="5.4.2"} 1.536053354e+09
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_failed_shards Last snapshot failed shards
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_failed_shards gauge
|
||||
elasticsearch_snapshot_stats_snapshot_failed_shards{repository="test1",state="SUCCESS",version="5.4.2"} 1
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_number_of_failures Last snapshot number of failures
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_number_of_failures gauge
|
||||
elasticsearch_snapshot_stats_snapshot_number_of_failures{repository="test1",state="SUCCESS",version="5.4.2"} 1
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_number_of_indices Number of indices in the last snapshot
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_number_of_indices gauge
|
||||
elasticsearch_snapshot_stats_snapshot_number_of_indices{repository="test1",state="SUCCESS",version="5.4.2"} 2
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_start_time_timestamp Last snapshot start timestamp
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_start_time_timestamp gauge
|
||||
elasticsearch_snapshot_stats_snapshot_start_time_timestamp{repository="test1",state="SUCCESS",version="5.4.2"} 1.536053353e+09
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_successful_shards Last snapshot successful shards
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_successful_shards gauge
|
||||
elasticsearch_snapshot_stats_snapshot_successful_shards{repository="test1",state="SUCCESS",version="5.4.2"} 10
|
||||
# HELP elasticsearch_snapshot_stats_snapshot_total_shards Last snapshot total shards
|
||||
# TYPE elasticsearch_snapshot_stats_snapshot_total_shards gauge
|
||||
elasticsearch_snapshot_stats_snapshot_total_shards{repository="test1",state="SUCCESS",version="5.4.2"} 10
|
||||
`,
|
||||
},
|
||||
}
|
||||
for ver, out := range tcs {
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.RequestURI == "/_snapshot" {
|
||||
fmt.Fprint(w, out[0])
|
||||
return
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
f, err := os.Open(tt.file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
fmt.Fprint(w, out[1])
|
||||
}))
|
||||
defer ts.Close()
|
||||
defer f.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse URL: %s", err)
|
||||
}
|
||||
s := NewSnapshots(log.NewNopLogger(), http.DefaultClient, u)
|
||||
stats, err := s.fetchAndDecodeSnapshotsStats()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to fetch or decode snapshots stats: %s", err)
|
||||
}
|
||||
t.Logf("[%s] Snapshots Response: %+v", ver, stats)
|
||||
repositoryStats := stats["test1"]
|
||||
snapshotStats := repositoryStats.Snapshots[0]
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.RequestURI == "/_snapshot" {
|
||||
fmt.Fprint(w, `{"test1":{"type":"fs","settings":{"location":"/tmp/test1"}}}`)
|
||||
return
|
||||
}
|
||||
io.Copy(w, f)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
if len(snapshotStats.Indices) != 2 {
|
||||
t.Errorf("Bad number of snapshot indices")
|
||||
}
|
||||
if len(snapshotStats.Failures) != int(snapshotStats.Shards.Failed) {
|
||||
t.Errorf("Bad number of snapshot failures")
|
||||
}
|
||||
if snapshotStats.Shards.Total != 10 {
|
||||
t.Errorf("Bad number of snapshot shards total")
|
||||
}
|
||||
if snapshotStats.Shards.Successful != 10 {
|
||||
t.Errorf("Bad number of snapshot shards successful")
|
||||
}
|
||||
if len(repositoryStats.Snapshots) != 1 {
|
||||
t.Errorf("Bad number of repository snapshots")
|
||||
}
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
c, err := NewSnapshots(promslog.NewNopLogger(), u, http.DefaultClient)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := testutil.CollectAndCompare(wrapCollector{c}, strings.NewReader(tt.want)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
142
collector/tasks.go
Normal file
142
collector/tasks.go
Normal file
@ -0,0 +1,142 @@
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/alecthomas/kingpin/v2"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// filterByTask global required because collector interface doesn't expose any way to take
|
||||
// constructor args.
|
||||
var actionFilter string
|
||||
|
||||
var taskActionDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "task_stats", "action"),
|
||||
"Number of tasks of a certain action",
|
||||
[]string{"action"}, nil)
|
||||
|
||||
func init() {
|
||||
kingpin.Flag("tasks.actions",
|
||||
"Filter on task actions. Used in same way as Task API actions param").
|
||||
Default("indices:*").StringVar(&actionFilter)
|
||||
registerCollector("tasks", defaultDisabled, NewTaskCollector)
|
||||
}
|
||||
|
||||
// Task Information Struct
|
||||
type TaskCollector struct {
|
||||
logger *slog.Logger
|
||||
hc *http.Client
|
||||
u *url.URL
|
||||
}
|
||||
|
||||
// NewTaskCollector defines Task Prometheus metrics
|
||||
func NewTaskCollector(logger *slog.Logger, u *url.URL, hc *http.Client) (Collector, error) {
|
||||
logger.Info("task collector created",
|
||||
"actionFilter", actionFilter,
|
||||
)
|
||||
|
||||
return &TaskCollector{
|
||||
logger: logger,
|
||||
hc: hc,
|
||||
u: u,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (t *TaskCollector) Update(ctx context.Context, ch chan<- prometheus.Metric) error {
|
||||
tasks, err := t.fetchTasks(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to fetch and decode task stats: %w", err)
|
||||
}
|
||||
|
||||
stats := AggregateTasks(tasks)
|
||||
for action, count := range stats.CountByAction {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
taskActionDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(count),
|
||||
action,
|
||||
)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *TaskCollector) fetchTasks(_ context.Context) (tasksResponse, error) {
|
||||
u := t.u.ResolveReference(&url.URL{Path: "_tasks"})
|
||||
q := u.Query()
|
||||
q.Set("group_by", "none")
|
||||
q.Set("actions", actionFilter)
|
||||
u.RawQuery = q.Encode()
|
||||
|
||||
var tr tasksResponse
|
||||
res, err := t.hc.Get(u.String())
|
||||
if err != nil {
|
||||
return tr, fmt.Errorf("failed to get data stream stats health from %s://%s:%s%s: %s",
|
||||
u.Scheme, u.Hostname(), u.Port(), u.Path, err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err = res.Body.Close()
|
||||
if err != nil {
|
||||
t.logger.Warn(
|
||||
"failed to close http.Client",
|
||||
"err", err,
|
||||
)
|
||||
}
|
||||
}()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return tr, fmt.Errorf("HTTP Request to %v failed with code %d", u.String(), res.StatusCode)
|
||||
}
|
||||
|
||||
bts, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
return tr, err
|
||||
}
|
||||
|
||||
err = json.Unmarshal(bts, &tr)
|
||||
return tr, err
|
||||
}
|
||||
|
||||
// tasksResponse is a representation of the Task management API.
|
||||
type tasksResponse struct {
|
||||
Tasks []taskResponse `json:"tasks"`
|
||||
}
|
||||
|
||||
// taskResponse is a representation of the individual task item returned by task API endpoint.
|
||||
//
|
||||
// We only parse a very limited amount of this API for use in aggregation.
|
||||
type taskResponse struct {
|
||||
Action string `json:"action"`
|
||||
}
|
||||
|
||||
type aggregatedTaskStats struct {
|
||||
CountByAction map[string]int64
|
||||
}
|
||||
|
||||
func AggregateTasks(t tasksResponse) aggregatedTaskStats {
|
||||
actions := map[string]int64{}
|
||||
for _, task := range t.Tasks {
|
||||
actions[task.Action]++
|
||||
}
|
||||
return aggregatedTaskStats{CountByAction: actions}
|
||||
}
|
||||
78
collector/tasks_test.go
Normal file
78
collector/tasks_test.go
Normal file
@ -0,0 +1,78 @@
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/promslog"
|
||||
)
|
||||
|
||||
func TestTasks(t *testing.T) {
|
||||
// Test data was collected by running the following:
|
||||
// # create container
|
||||
// docker run -d --name elasticsearch -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" elasticsearch:7.17.11
|
||||
// sleep 15
|
||||
// # start some busy work in background
|
||||
// for i in $(seq 1 500)
|
||||
// do
|
||||
// curl -o /dev/null -sX POST "localhost:9200/a1/_doc" -H 'Content-Type: application/json' -d'{"a1": "'"$i"'"}'
|
||||
// sleep .01
|
||||
// curl -o /dev/null -sX POST "localhost:9200/a1/_doc" -H 'Content-Type: application/json' -d'{"a2": "'"$i"'"}'
|
||||
// sleep .01
|
||||
// curl -o /dev/null -sX POST "localhost:9200/a1/_doc" -H 'Content-Type: application/json' -d'{"a3": "'"$i"'"}'
|
||||
// sleep .01
|
||||
// done &
|
||||
// # try and collect a good sample
|
||||
// curl -X GET 'localhost:9200/_tasks?group_by=none&actions=indices:*'
|
||||
// # cleanup
|
||||
// docker rm --force elasticsearch
|
||||
tcs := map[string]string{
|
||||
"7.17": `{"tasks":[{"node":"9lWCm1y_QkujaAg75bVx7A","id":70,"type":"transport","action":"indices:admin/index_template/put","start_time_in_millis":1695900464655,"running_time_in_nanos":308640039,"cancellable":false,"headers":{}},{"node":"9lWCm1y_QkujaAg75bVx7A","id":73,"type":"transport","action":"indices:admin/index_template/put","start_time_in_millis":1695900464683,"running_time_in_nanos":280672000,"cancellable":false,"headers":{}},{"node":"9lWCm1y_QkujaAg75bVx7A","id":76,"type":"transport","action":"indices:admin/index_template/put","start_time_in_millis":1695900464711,"running_time_in_nanos":253247906,"cancellable":false,"headers":{}},{"node":"9lWCm1y_QkujaAg75bVx7A","id":93,"type":"transport","action":"indices:admin/index_template/put","start_time_in_millis":1695900464904,"running_time_in_nanos":60230460,"cancellable":false,"headers":{}},{"node":"9lWCm1y_QkujaAg75bVx7A","id":50,"type":"transport","action":"indices:data/write/index","start_time_in_millis":1695900464229,"running_time_in_nanos":734480468,"cancellable":false,"headers":{}},{"node":"9lWCm1y_QkujaAg75bVx7A","id":51,"type":"transport","action":"indices:admin/auto_create","start_time_in_millis":1695900464235,"running_time_in_nanos":729223933,"cancellable":false,"headers":{}}]}`,
|
||||
}
|
||||
want := `# HELP elasticsearch_task_stats_action Number of tasks of a certain action
|
||||
# TYPE elasticsearch_task_stats_action gauge
|
||||
elasticsearch_task_stats_action{action="indices:admin/auto_create"} 1
|
||||
elasticsearch_task_stats_action{action="indices:admin/index_template/put"} 4
|
||||
elasticsearch_task_stats_action{action="indices:data/write/index"} 1
|
||||
`
|
||||
for ver, out := range tcs {
|
||||
t.Run(ver, func(t *testing.T) {
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||||
fmt.Fprintln(w, out)
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse URL: %s", err)
|
||||
}
|
||||
|
||||
c, err := NewTaskCollector(promslog.NewNopLogger(), u, ts.Client())
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create collector: %v", err)
|
||||
}
|
||||
|
||||
if err := testutil.CollectAndCompare(wrapCollector{c}, strings.NewReader(want)); err != nil {
|
||||
t.Fatalf("Metrics did not match: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
63
collector/util.go
Normal file
63
collector/util.go
Normal file
@ -0,0 +1,63 @@
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func getURL(ctx context.Context, hc *http.Client, log *slog.Logger, u string) ([]byte, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp, err := hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err = resp.Body.Close()
|
||||
if err != nil {
|
||||
log.Warn(
|
||||
"failed to close response body",
|
||||
"err", err,
|
||||
)
|
||||
}
|
||||
}()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("HTTP Request failed with code %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// bool2Float converts a bool to a float64. True is 1, false is 0.
|
||||
func bool2Float(managed bool) float64 {
|
||||
if managed {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
137
config/config.go
Normal file
137
config/config.go
Normal file
@ -0,0 +1,137 @@
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"go.yaml.in/yaml/v3"
|
||||
)
|
||||
|
||||
// Config represents the YAML configuration file structure.
|
||||
type Config struct {
|
||||
AuthModules map[string]AuthModule `yaml:"auth_modules"`
|
||||
}
|
||||
|
||||
type AuthModule struct {
|
||||
Type string `yaml:"type"`
|
||||
UserPass *UserPassConfig `yaml:"userpass,omitempty"`
|
||||
APIKey string `yaml:"apikey,omitempty"`
|
||||
AWS *AWSConfig `yaml:"aws,omitempty"`
|
||||
TLS *TLSConfig `yaml:"tls,omitempty"`
|
||||
Options map[string]string `yaml:"options,omitempty"`
|
||||
}
|
||||
|
||||
// AWSConfig contains settings for SigV4 authentication.
|
||||
type AWSConfig struct {
|
||||
Region string `yaml:"region,omitempty"`
|
||||
RoleARN string `yaml:"role_arn,omitempty"`
|
||||
}
|
||||
|
||||
// TLSConfig allows per-target TLS options.
|
||||
type TLSConfig struct {
|
||||
CAFile string `yaml:"ca_file,omitempty"`
|
||||
CertFile string `yaml:"cert_file,omitempty"`
|
||||
KeyFile string `yaml:"key_file,omitempty"`
|
||||
InsecureSkipVerify bool `yaml:"insecure_skip_verify,omitempty"`
|
||||
}
|
||||
|
||||
type UserPassConfig struct {
|
||||
Username string `yaml:"username"`
|
||||
Password string `yaml:"password"`
|
||||
}
|
||||
|
||||
// validate ensures every auth module has the required fields according to its type.
|
||||
func (c *Config) validate() error {
|
||||
for name, am := range c.AuthModules {
|
||||
// Validate fields based on auth type
|
||||
switch strings.ToLower(am.Type) {
|
||||
case "userpass":
|
||||
if am.UserPass == nil || am.UserPass.Username == "" || am.UserPass.Password == "" {
|
||||
return fmt.Errorf("auth_module %s type userpass requires username and password", name)
|
||||
}
|
||||
case "apikey":
|
||||
if am.APIKey == "" {
|
||||
return fmt.Errorf("auth_module %s type apikey requires apikey", name)
|
||||
}
|
||||
case "aws":
|
||||
// No strict validation: region can come from environment/defaults; role_arn is optional.
|
||||
case "tls":
|
||||
// TLS auth type means client certificate authentication only (no other auth)
|
||||
if am.TLS == nil {
|
||||
return fmt.Errorf("auth_module %s type tls requires tls configuration section", name)
|
||||
}
|
||||
if am.TLS.CertFile == "" || am.TLS.KeyFile == "" {
|
||||
return fmt.Errorf("auth_module %s type tls requires cert_file and key_file for client certificate authentication", name)
|
||||
}
|
||||
// Validate that other auth fields are not set when using TLS auth type
|
||||
if am.UserPass != nil {
|
||||
return fmt.Errorf("auth_module %s type tls cannot have userpass configuration", name)
|
||||
}
|
||||
if am.APIKey != "" {
|
||||
return fmt.Errorf("auth_module %s type tls cannot have apikey", name)
|
||||
}
|
||||
if am.AWS != nil {
|
||||
return fmt.Errorf("auth_module %s type tls cannot have aws configuration", name)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("auth_module %s has unsupported type %s", name, am.Type)
|
||||
}
|
||||
|
||||
// Validate TLS configuration (optional for all auth types, provides transport security)
|
||||
if am.TLS != nil {
|
||||
// For cert-based auth (type: tls), cert and key are required
|
||||
// For other auth types, TLS config is optional and used for transport security
|
||||
if strings.ToLower(am.Type) != "tls" {
|
||||
// For non-TLS auth types, if cert/key are provided, both must be present
|
||||
if (am.TLS.CertFile != "") != (am.TLS.KeyFile != "") {
|
||||
return fmt.Errorf("auth_module %s: if providing client certificate, both cert_file and key_file must be specified", name)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate file accessibility
|
||||
for fileType, path := range map[string]string{
|
||||
"ca_file": am.TLS.CAFile,
|
||||
"cert_file": am.TLS.CertFile,
|
||||
"key_file": am.TLS.KeyFile,
|
||||
} {
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
return fmt.Errorf("auth_module %s: %s '%s' not accessible: %w", name, fileType, path, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadConfig reads, parses, and validates the YAML config file.
|
||||
func LoadConfig(path string) (*Config, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var cfg Config
|
||||
if err := yaml.Unmarshal(data, &cfg); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := cfg.validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &cfg, nil
|
||||
}
|
||||
183
config/config_test.go
Normal file
183
config/config_test.go
Normal file
@ -0,0 +1,183 @@
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func mustTempFile(t *testing.T) string {
|
||||
f, err := os.CreateTemp(t.TempDir(), "pem-*.crt")
|
||||
if err != nil {
|
||||
t.Fatalf("temp file: %v", err)
|
||||
}
|
||||
f.Close()
|
||||
// Ensure temp file is removed even if created outside of test's TempDir semantics change
|
||||
path := f.Name()
|
||||
t.Cleanup(func() { _ = os.Remove(path) })
|
||||
return path
|
||||
}
|
||||
|
||||
// ---------------------------- Positive cases ----------------------------
|
||||
func TestLoadConfigPositiveVariants(t *testing.T) {
|
||||
ca := mustTempFile(t)
|
||||
cert := mustTempFile(t)
|
||||
key := mustTempFile(t)
|
||||
|
||||
positive := []struct {
|
||||
name string
|
||||
yaml string
|
||||
}{{
|
||||
"userpass",
|
||||
`auth_modules:
|
||||
basic:
|
||||
type: userpass
|
||||
userpass:
|
||||
username: u
|
||||
password: p`,
|
||||
}, {
|
||||
"userpass-with-tls",
|
||||
`auth_modules:
|
||||
basic:
|
||||
type: userpass
|
||||
userpass:
|
||||
username: u
|
||||
password: p
|
||||
tls:
|
||||
ca_file: ` + ca + `
|
||||
insecure_skip_verify: true`,
|
||||
}, {
|
||||
"apikey",
|
||||
`auth_modules:
|
||||
key:
|
||||
type: apikey
|
||||
apikey: ZXhhbXBsZQ==`,
|
||||
}, {
|
||||
"apikey-with-tls",
|
||||
`auth_modules:
|
||||
key:
|
||||
type: apikey
|
||||
apikey: ZXhhbXBsZQ==
|
||||
tls:
|
||||
ca_file: ` + ca + `
|
||||
cert_file: ` + cert + `
|
||||
key_file: ` + key + ``,
|
||||
}, {
|
||||
"aws-with-tls",
|
||||
`auth_modules:
|
||||
awsmod:
|
||||
type: aws
|
||||
aws:
|
||||
region: us-east-1
|
||||
tls:
|
||||
insecure_skip_verify: true`,
|
||||
}, {
|
||||
"tls-only",
|
||||
`auth_modules:
|
||||
pki:
|
||||
type: tls
|
||||
tls:
|
||||
ca_file: ` + ca + `
|
||||
cert_file: ` + cert + `
|
||||
key_file: ` + key + ``,
|
||||
}}
|
||||
|
||||
for _, c := range positive {
|
||||
tmp, _ := os.CreateTemp(t.TempDir(), "cfg-*.yml")
|
||||
_, _ = tmp.WriteString(c.yaml)
|
||||
_ = tmp.Close()
|
||||
t.Cleanup(func() { _ = os.Remove(tmp.Name()) })
|
||||
if _, err := LoadConfig(tmp.Name()); err != nil {
|
||||
t.Fatalf("%s: expected success, got %v", c.name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------- Negative cases ----------------------------
|
||||
func TestLoadConfigNegativeVariants(t *testing.T) {
|
||||
cert := mustTempFile(t)
|
||||
key := mustTempFile(t)
|
||||
|
||||
negative := []struct {
|
||||
name string
|
||||
yaml string
|
||||
}{{
|
||||
"userpassMissingPassword",
|
||||
`auth_modules:
|
||||
bad:
|
||||
type: userpass
|
||||
userpass: {username: u}`,
|
||||
}, {
|
||||
"tlsMissingCert",
|
||||
`auth_modules:
|
||||
bad:
|
||||
type: tls
|
||||
tls: {key_file: ` + key + `}`,
|
||||
}, {
|
||||
"tlsMissingKey",
|
||||
`auth_modules:
|
||||
bad:
|
||||
type: tls
|
||||
tls: {cert_file: ` + cert + `}`,
|
||||
}, {
|
||||
"tlsMissingConfig",
|
||||
`auth_modules:
|
||||
bad:
|
||||
type: tls`,
|
||||
}, {
|
||||
"tlsWithUserpass",
|
||||
`auth_modules:
|
||||
bad:
|
||||
type: tls
|
||||
tls: {cert_file: ` + cert + `, key_file: ` + key + `}
|
||||
userpass: {username: u, password: p}`,
|
||||
}, {
|
||||
"tlsWithAPIKey",
|
||||
`auth_modules:
|
||||
bad:
|
||||
type: tls
|
||||
tls: {cert_file: ` + cert + `, key_file: ` + key + `}
|
||||
apikey: ZXhhbXBsZQ==`,
|
||||
}, {
|
||||
"tlsWithAWS",
|
||||
`auth_modules:
|
||||
bad:
|
||||
type: tls
|
||||
tls: {cert_file: ` + cert + `, key_file: ` + key + `}
|
||||
aws: {region: us-east-1}`,
|
||||
}, {
|
||||
"tlsIncompleteCert",
|
||||
`auth_modules:
|
||||
bad:
|
||||
type: apikey
|
||||
apikey: ZXhhbXBsZQ==
|
||||
tls: {cert_file: ` + cert + `}`,
|
||||
}, {
|
||||
"unsupportedType",
|
||||
`auth_modules:
|
||||
bad:
|
||||
type: foobar`,
|
||||
}}
|
||||
|
||||
for _, c := range negative {
|
||||
tmp, _ := os.CreateTemp(t.TempDir(), "cfg-*.yml")
|
||||
_, _ = tmp.WriteString(c.yaml)
|
||||
_ = tmp.Close()
|
||||
t.Cleanup(func() { _ = os.Remove(tmp.Name()) })
|
||||
if _, err := LoadConfig(tmp.Name()); err == nil {
|
||||
t.Fatalf("%s: expected validation error, got none", c.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
29
elasticsearch-mixin/README.md
Normal file
29
elasticsearch-mixin/README.md
Normal file
@ -0,0 +1,29 @@
|
||||
# Elasticsearch Exporter Mixin
|
||||
|
||||
This is a mixin for the elasticsearch_exporter to define dashboards, alerts, and monitoring queries for use with this exporter.
|
||||
|
||||
Good example of upstream mixin for reference: https://github.com/kubernetes-monitoring/kubernetes-mixin
|
||||
|
||||
## Development
|
||||
|
||||
### JSONNET
|
||||
https://jsonnet.org/
|
||||
|
||||
```go install github.com/google/go-jsonnet/cmd/jsonnet@latest```
|
||||
|
||||
### JSONNET BUNDLER
|
||||
jsonnet bundler is a package manager for jsonnet
|
||||
|
||||
https://github.com/jsonnet-bundler/jsonnet-bundler
|
||||
|
||||
```go install -a github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@latest```
|
||||
|
||||
### Grafonnet
|
||||
Grafana libraries for jsonnet: https://grafana.github.io/grafonnet/
|
||||
|
||||
```jb install github.com/grafana/grafonnet/gen/grafonnet-latest@main```
|
||||
|
||||
### Run the build
|
||||
```bash
|
||||
./scripts/compile-mixin.sh
|
||||
```
|
||||
1
elasticsearch-mixin/compiled/alerts.yaml
Normal file
1
elasticsearch-mixin/compiled/alerts.yaml
Normal file
@ -0,0 +1 @@
|
||||
{}
|
||||
687
elasticsearch-mixin/compiled/dashboards/cluster.json
Normal file
687
elasticsearch-mixin/compiled/dashboards/cluster.json
Normal file
@ -0,0 +1,687 @@
|
||||
{
|
||||
"graphTooltip": 1,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"panels": [ ],
|
||||
"title": "Overview",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 2,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n elasticsearch_cluster_health_number_of_nodes{cluster=~\"$cluster\"}\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "Nodes",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 1
|
||||
},
|
||||
"id": 3,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n elasticsearch_cluster_health_number_of_data_nodes{cluster=~\"$cluster\"}\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "Data Nodes",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 1
|
||||
},
|
||||
"id": 4,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n elasticsearch_cluster_health_number_of_pending_tasks{cluster=~\"$cluster\"}\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "Pending Tasks",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 5
|
||||
},
|
||||
"id": 5,
|
||||
"panels": [ ],
|
||||
"title": "Shards",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 6
|
||||
},
|
||||
"id": 6,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n elasticsearch_cluster_health_active_shards{cluster=~\"$cluster\"}\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "Active",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 4,
|
||||
"y": 6
|
||||
},
|
||||
"id": 7,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n elasticsearch_cluster_health_active_primary_shards{cluster=~\"$cluster\"}\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "Active Primary",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 8,
|
||||
"y": 6
|
||||
},
|
||||
"id": 8,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n elasticsearch_cluster_health_initializing_shards{cluster=~\"$cluster\"}\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "Initializing",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 12,
|
||||
"y": 6
|
||||
},
|
||||
"id": 9,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n elasticsearch_cluster_health_reloacting_shards{cluster=~\"$cluster\"}\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "Relocating",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 16,
|
||||
"y": 6
|
||||
},
|
||||
"id": 10,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n elasticsearch_cluster_health_unassigned_shards{cluster=~\"$cluster\"}\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "Unassigned",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 20,
|
||||
"y": 6
|
||||
},
|
||||
"id": 11,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n elasticsearch_cluster_health_delayed_unassigned_shards{cluster=~\"$cluster\"}\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "DelayedUnassigned",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 12,
|
||||
"panels": [ ],
|
||||
"title": "Documents",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 11
|
||||
},
|
||||
"id": 13,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "elasticsearch_indices_docs{cluster=~\"$cluster\"}\n"
|
||||
}
|
||||
],
|
||||
"title": "Indexed Documents",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 4,
|
||||
"y": 11
|
||||
},
|
||||
"id": 14,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "elasticsearch_indices_store_size_bytes{cluster=~\"$cluster\"}\n"
|
||||
}
|
||||
],
|
||||
"title": "Index Size",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 8,
|
||||
"y": 11
|
||||
},
|
||||
"id": 15,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "rate(elasticsearch_indices_indexing_index_total{cluster=~\"$cluster\"}[$__rate_interval])\n",
|
||||
"legendFormat": "{{name}}"
|
||||
}
|
||||
],
|
||||
"title": "Index Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 12,
|
||||
"y": 11
|
||||
},
|
||||
"id": 16,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "rate(elasticsearch_indices_search_query_total{cluster=~\"$cluster\"}[$__rate_interval])\n",
|
||||
"legendFormat": "{{name}}"
|
||||
}
|
||||
],
|
||||
"title": "Query Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 16,
|
||||
"y": 11
|
||||
},
|
||||
"id": 17,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(elasticsearch_thread_pool_queue_count{cluster=~\"$cluster\",type!=\"management\"}) by (type)\n",
|
||||
"legendFormat": "{{type}}"
|
||||
}
|
||||
],
|
||||
"title": "Queue Count",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 15
|
||||
},
|
||||
"id": 18,
|
||||
"panels": [ ],
|
||||
"title": "Memory",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 19,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "elasticsearch_jvm_memory_used_bytes{cluster=~\"$cluster\"}\n",
|
||||
"legendFormat": "{{name}} {{area}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory Usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"max": 1,
|
||||
"min": 0,
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 16
|
||||
},
|
||||
"id": 20,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "avg_over_time(\n elasticsearch_jvm_memory_used_bytes{cluster=~\"$cluster\"}[15m]\n) /\nelasticsearch_jvm_memory_max_bytes{cluster=~\"$cluster\"}\n",
|
||||
"legendFormat": "{{name}} {{area}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory 15m Avg",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"id": 21,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "elasticsearch_jvm_memory_max_bytes{cluster=~\"$cluster\"}\n",
|
||||
"legendFormat": "{{name}} {{area}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory Max",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 16
|
||||
},
|
||||
"id": 22,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "rate(\n elasticsearch_jvm_gc_collection_seconds_sum{cluster=~\"$cluster\"}[$__rate_interval]\n)\n",
|
||||
"legendFormat": "{{name}} {{gc}}"
|
||||
}
|
||||
],
|
||||
"title": "GC Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 20
|
||||
},
|
||||
"id": 23,
|
||||
"panels": [ ],
|
||||
"title": "Threads",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 24,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "elasticsearch_thread_pool_active_count{cluster=~\"$cluster\"}\n",
|
||||
"legendFormat": "{{type}}"
|
||||
}
|
||||
],
|
||||
"title": "Thread Pools",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 21
|
||||
},
|
||||
"id": 25,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "elasticsearch_thread_pool_rejected_count{cluster=~\"$cluster\"}\n",
|
||||
"legendFormat": "{{name}} {{type}}"
|
||||
}
|
||||
],
|
||||
"title": "Thread Pool Rejections",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 25
|
||||
},
|
||||
"id": 26,
|
||||
"panels": [ ],
|
||||
"title": "Network",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 26
|
||||
},
|
||||
"id": 27,
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "rate(\n elasticsearch_transport_rx_size_bytes_total{cluster=~\"$cluster\"}[$__rate_interval]\n)\n",
|
||||
"legendFormat": "{{name}} TX"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "rate(\n elasticsearch_transport_tx_size_bytes_total{cluster=~\"$cluster\"}[$__rate_interval]\n)\n",
|
||||
"legendFormat": "{{name}} RX"
|
||||
}
|
||||
],
|
||||
"title": "Transport Rate",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"schemaVersion": 36,
|
||||
"tags": [
|
||||
"elasticsearch-exporter-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"name": "datasource",
|
||||
"query": "prometheus",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"name": "cluster",
|
||||
"query": "label_values(elasticsearch_cluster_health_status, cluster)",
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Elasticsearch Exporter / Cluster"
|
||||
}
|
||||
1
elasticsearch-mixin/compiled/rules.yaml
Normal file
1
elasticsearch-mixin/compiled/rules.yaml
Normal file
@ -0,0 +1 @@
|
||||
{}
|
||||
6
elasticsearch-mixin/config.libsonnet
Normal file
6
elasticsearch-mixin/config.libsonnet
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
_config+:: {
|
||||
dashboardNamePrefix: 'Elasticsearch Exporter / ',
|
||||
dashboardTags: ['elasticsearch-exporter-mixin'],
|
||||
},
|
||||
}
|
||||
3
elasticsearch-mixin/dashboards.jsonnet
Normal file
3
elasticsearch-mixin/dashboards.jsonnet
Normal file
@ -0,0 +1,3 @@
|
||||
local dashboards = (import 'mixin.libsonnet').grafanaDashboards;
|
||||
|
||||
{ [name]: dashboards[name] for name in std.objectFields(dashboards) }
|
||||
67
elasticsearch-mixin/dashboards/cluster.libsonnet
Normal file
67
elasticsearch-mixin/dashboards/cluster.libsonnet
Normal file
@ -0,0 +1,67 @@
|
||||
local g = import 'g.libsonnet';
|
||||
|
||||
local dashboard = g.dashboard;
|
||||
local row = g.panel.row;
|
||||
|
||||
local panels = import './panels.libsonnet';
|
||||
local queries = import './queries.libsonnet';
|
||||
local variables = import './variables.libsonnet';
|
||||
local util = import './util.libsonnet';
|
||||
|
||||
{
|
||||
grafanaDashboards+:: {
|
||||
'cluster.json':
|
||||
dashboard.new('%s Cluster' % $._config.dashboardNamePrefix)
|
||||
+ dashboard.withTags($._config.dashboardTags)
|
||||
+ dashboard.withRefresh('1m')
|
||||
+ dashboard.time.withFrom(value='now-1h')
|
||||
+ dashboard.graphTooltip.withSharedCrosshair()
|
||||
+ dashboard.withVariables([
|
||||
variables.datasource,
|
||||
variables.cluster,
|
||||
])
|
||||
+ dashboard.withPanels(
|
||||
util.makeGrid([
|
||||
row.new('Overview')
|
||||
+ row.withPanels([
|
||||
panels.stat.nodes('Nodes', queries.runningNodes),
|
||||
panels.stat.nodes('Data Nodes', queries.dataNodes),
|
||||
panels.stat.nodes('Pending Tasks', queries.pendingTasks),
|
||||
]),
|
||||
row.new('Shards')
|
||||
+ row.withPanels([
|
||||
panels.stat.nodes('Active', queries.activeShards),
|
||||
panels.stat.nodes('Active Primary', queries.activePrimaryShards),
|
||||
panels.stat.nodes('Initializing', queries.initializingShards),
|
||||
panels.stat.nodes('Relocating', queries.reloactingShards),
|
||||
panels.stat.nodes('Unassigned', queries.unassignedShards),
|
||||
panels.stat.nodes('DelayedUnassigned', queries.delayedUnassignedShards),
|
||||
]),
|
||||
row.new('Documents')
|
||||
+ row.withPanels([
|
||||
panels.timeSeries.base('Indexed Documents', queries.indexedDocuments),
|
||||
panels.timeSeries.bytes('Index Size', queries.indexSize),
|
||||
panels.timeSeries.base('Index Rate', queries.indexRate),
|
||||
panels.timeSeries.base('Query Rate', queries.queryRate),
|
||||
panels.timeSeries.base('Queue Count', queries.queueCount),
|
||||
]),
|
||||
row.new('Memory')
|
||||
+ row.withPanels([
|
||||
panels.timeSeries.bytes('Memory Usage', queries.memoryUsage),
|
||||
panels.timeSeries.ratioMax1('Memory 15m Avg', queries.memoryUsageAverage15),
|
||||
panels.timeSeries.bytes('Memory Max', queries.memoryMax),
|
||||
panels.timeSeries.seconds('GC Rate', queries.gcSeconds),
|
||||
]),
|
||||
row.new('Threads')
|
||||
+ row.withPanels([
|
||||
panels.timeSeries.base('Thread Pools', queries.threadPoolActive),
|
||||
panels.timeSeries.base('Thread Pool Rejections', queries.threadPoolRejections),
|
||||
]),
|
||||
row.new('Network')
|
||||
+ row.withPanels([
|
||||
panels.timeSeries.bytes('Transport Rate', [queries.transportTXRate, queries.transportRXRate]),
|
||||
]),
|
||||
]),
|
||||
),
|
||||
},
|
||||
}
|
||||
1
elasticsearch-mixin/dashboards/dashboards.libsonnet
Normal file
1
elasticsearch-mixin/dashboards/dashboards.libsonnet
Normal file
@ -0,0 +1 @@
|
||||
(import 'cluster.libsonnet')
|
||||
1
elasticsearch-mixin/dashboards/g.libsonnet
Normal file
1
elasticsearch-mixin/dashboards/g.libsonnet
Normal file
@ -0,0 +1 @@
|
||||
import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'
|
||||
38
elasticsearch-mixin/dashboards/panels.libsonnet
Normal file
38
elasticsearch-mixin/dashboards/panels.libsonnet
Normal file
@ -0,0 +1,38 @@
|
||||
local g = import 'g.libsonnet';
|
||||
|
||||
{
|
||||
stat: {
|
||||
local stat = g.panel.stat,
|
||||
|
||||
base(title, targets):
|
||||
stat.new(title)
|
||||
+ stat.queryOptions.withTargets(targets),
|
||||
|
||||
nodes: self.base,
|
||||
},
|
||||
|
||||
timeSeries: {
|
||||
local timeSeries = g.panel.timeSeries,
|
||||
|
||||
base(title, targets):
|
||||
timeSeries.new(title)
|
||||
+ timeSeries.queryOptions.withTargets(targets),
|
||||
|
||||
ratio(title, targets):
|
||||
self.base(title, targets)
|
||||
+ timeSeries.standardOptions.withUnit('percentunit'),
|
||||
|
||||
ratioMax1(title, targets):
|
||||
self.ratio(title, targets)
|
||||
+ timeSeries.standardOptions.withMax(1)
|
||||
+ timeSeries.standardOptions.withMin(0),
|
||||
|
||||
bytes(title, targets):
|
||||
self.base(title, targets)
|
||||
+ timeSeries.standardOptions.withUnit('bytes'),
|
||||
|
||||
seconds(title, targets):
|
||||
self.base(title, targets)
|
||||
+ timeSeries.standardOptions.withUnit('s'),
|
||||
},
|
||||
}
|
||||
11
elasticsearch-mixin/dashboards/queries.libsonnet
Normal file
11
elasticsearch-mixin/dashboards/queries.libsonnet
Normal file
@ -0,0 +1,11 @@
|
||||
local g = import './g.libsonnet';
|
||||
local prometheusQuery = g.query.prometheus;
|
||||
|
||||
local variables = import './variables.libsonnet';
|
||||
|
||||
(import './queries/general.libsonnet') +
|
||||
(import './queries/shard.libsonnet') +
|
||||
(import './queries/document.libsonnet') +
|
||||
(import './queries/memory.libsonnet') +
|
||||
(import './queries/threads.libsonnet') +
|
||||
(import './queries/network.libsonnet')
|
||||
50
elasticsearch-mixin/dashboards/queries/document.libsonnet
Normal file
50
elasticsearch-mixin/dashboards/queries/document.libsonnet
Normal file
@ -0,0 +1,50 @@
|
||||
local g = import '../g.libsonnet';
|
||||
local prometheusQuery = g.query.prometheus;
|
||||
|
||||
local variables = import '../variables.libsonnet';
|
||||
|
||||
{
|
||||
indexedDocuments:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
elasticsearch_indices_docs{cluster=~"$cluster"}
|
||||
|||
|
||||
),
|
||||
|
||||
indexSize:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
elasticsearch_indices_store_size_bytes{cluster=~"$cluster"}
|
||||
|||
|
||||
),
|
||||
|
||||
indexRate:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
rate(elasticsearch_indices_indexing_index_total{cluster=~"$cluster"}[$__rate_interval])
|
||||
|||
|
||||
)
|
||||
+ prometheusQuery.withLegendFormat('{{name}}'),
|
||||
|
||||
queryRate:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
rate(elasticsearch_indices_search_query_total{cluster=~"$cluster"}[$__rate_interval])
|
||||
|||
|
||||
)
|
||||
+ prometheusQuery.withLegendFormat('{{name}}'),
|
||||
|
||||
queueCount:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
sum(elasticsearch_thread_pool_queue_count{cluster=~"$cluster",type!="management"}) by (type)
|
||||
|||
|
||||
)
|
||||
+ prometheusQuery.withLegendFormat('{{type}}'),
|
||||
|
||||
}
|
||||
35
elasticsearch-mixin/dashboards/queries/general.libsonnet
Normal file
35
elasticsearch-mixin/dashboards/queries/general.libsonnet
Normal file
@ -0,0 +1,35 @@
|
||||
local g = import '../g.libsonnet';
|
||||
local prometheusQuery = g.query.prometheus;
|
||||
|
||||
local variables = import '../variables.libsonnet';
|
||||
|
||||
{
|
||||
runningNodes:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
sum(
|
||||
elasticsearch_cluster_health_number_of_nodes{cluster=~"$cluster"}
|
||||
)
|
||||
|||
|
||||
),
|
||||
dataNodes:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
sum(
|
||||
elasticsearch_cluster_health_number_of_data_nodes{cluster=~"$cluster"}
|
||||
)
|
||||
|||
|
||||
),
|
||||
|
||||
pendingTasks:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
sum(
|
||||
elasticsearch_cluster_health_number_of_pending_tasks{cluster=~"$cluster"}
|
||||
)
|
||||
|||
|
||||
),
|
||||
}
|
||||
47
elasticsearch-mixin/dashboards/queries/memory.libsonnet
Normal file
47
elasticsearch-mixin/dashboards/queries/memory.libsonnet
Normal file
@ -0,0 +1,47 @@
|
||||
local g = import '../g.libsonnet';
|
||||
local prometheusQuery = g.query.prometheus;
|
||||
|
||||
local variables = import '../variables.libsonnet';
|
||||
|
||||
{
|
||||
memoryUsage:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
elasticsearch_jvm_memory_used_bytes{cluster=~"$cluster"}
|
||||
|||
|
||||
)
|
||||
+ prometheusQuery.withLegendFormat('{{name}} {{area}}'),
|
||||
|
||||
memoryUsageAverage15:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
avg_over_time(
|
||||
elasticsearch_jvm_memory_used_bytes{cluster=~"$cluster"}[15m]
|
||||
) /
|
||||
elasticsearch_jvm_memory_max_bytes{cluster=~"$cluster"}
|
||||
|||
|
||||
)
|
||||
+ prometheusQuery.withLegendFormat('{{name}} {{area}}'),
|
||||
|
||||
memoryMax:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
elasticsearch_jvm_memory_max_bytes{cluster=~"$cluster"}
|
||||
|||
|
||||
)
|
||||
+ prometheusQuery.withLegendFormat('{{name}} {{area}}'),
|
||||
|
||||
gcSeconds:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
rate(
|
||||
elasticsearch_jvm_gc_collection_seconds_sum{cluster=~"$cluster"}[$__rate_interval]
|
||||
)
|
||||
|||
|
||||
)
|
||||
+ prometheusQuery.withLegendFormat('{{name}} {{gc}}'),
|
||||
}
|
||||
28
elasticsearch-mixin/dashboards/queries/network.libsonnet
Normal file
28
elasticsearch-mixin/dashboards/queries/network.libsonnet
Normal file
@ -0,0 +1,28 @@
|
||||
local g = import '../g.libsonnet';
|
||||
local prometheusQuery = g.query.prometheus;
|
||||
|
||||
local variables = import '../variables.libsonnet';
|
||||
|
||||
{
|
||||
transportTXRate:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
rate(
|
||||
elasticsearch_transport_rx_size_bytes_total{cluster=~"$cluster"}[$__rate_interval]
|
||||
)
|
||||
|||
|
||||
)
|
||||
+ prometheusQuery.withLegendFormat('{{name}} TX'),
|
||||
|
||||
transportRXRate:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
rate(
|
||||
elasticsearch_transport_tx_size_bytes_total{cluster=~"$cluster"}[$__rate_interval]
|
||||
)
|
||||
|||
|
||||
)
|
||||
+ prometheusQuery.withLegendFormat('{{name}} RX'),
|
||||
}
|
||||
66
elasticsearch-mixin/dashboards/queries/shard.libsonnet
Normal file
66
elasticsearch-mixin/dashboards/queries/shard.libsonnet
Normal file
@ -0,0 +1,66 @@
|
||||
local g = import '../g.libsonnet';
|
||||
local prometheusQuery = g.query.prometheus;
|
||||
|
||||
local variables = import '../variables.libsonnet';
|
||||
|
||||
{
|
||||
activeShards:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
sum(
|
||||
elasticsearch_cluster_health_active_shards{cluster=~"$cluster"}
|
||||
)
|
||||
|||
|
||||
),
|
||||
|
||||
activePrimaryShards:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
sum(
|
||||
elasticsearch_cluster_health_active_primary_shards{cluster=~"$cluster"}
|
||||
)
|
||||
|||
|
||||
),
|
||||
|
||||
initializingShards:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
sum(
|
||||
elasticsearch_cluster_health_initializing_shards{cluster=~"$cluster"}
|
||||
)
|
||||
|||
|
||||
),
|
||||
|
||||
reloactingShards:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
sum(
|
||||
elasticsearch_cluster_health_reloacting_shards{cluster=~"$cluster"}
|
||||
)
|
||||
|||
|
||||
),
|
||||
|
||||
unassignedShards:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
sum(
|
||||
elasticsearch_cluster_health_unassigned_shards{cluster=~"$cluster"}
|
||||
)
|
||||
|||
|
||||
),
|
||||
|
||||
delayedUnassignedShards:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
sum(
|
||||
elasticsearch_cluster_health_delayed_unassigned_shards{cluster=~"$cluster"}
|
||||
)
|
||||
|||
|
||||
),
|
||||
}
|
||||
24
elasticsearch-mixin/dashboards/queries/threads.libsonnet
Normal file
24
elasticsearch-mixin/dashboards/queries/threads.libsonnet
Normal file
@ -0,0 +1,24 @@
|
||||
local g = import '../g.libsonnet';
|
||||
local prometheusQuery = g.query.prometheus;
|
||||
|
||||
local variables = import '../variables.libsonnet';
|
||||
|
||||
{
|
||||
threadPoolActive:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
elasticsearch_thread_pool_active_count{cluster=~"$cluster"}
|
||||
|||
|
||||
)
|
||||
+ prometheusQuery.withLegendFormat('{{type}}'),
|
||||
|
||||
threadPoolRejections:
|
||||
prometheusQuery.new(
|
||||
'$' + variables.datasource.name,
|
||||
|||
|
||||
elasticsearch_thread_pool_rejected_count{cluster=~"$cluster"}
|
||||
|||
|
||||
)
|
||||
+ prometheusQuery.withLegendFormat('{{name}} {{type}}'),
|
||||
}
|
||||
66
elasticsearch-mixin/dashboards/util.libsonnet
Normal file
66
elasticsearch-mixin/dashboards/util.libsonnet
Normal file
@ -0,0 +1,66 @@
|
||||
local g = import 'g.libsonnet';
|
||||
local panelUtil = g.util.panel;
|
||||
|
||||
{
|
||||
local gridWidth = 24,
|
||||
|
||||
// makeGrid returns an array of panels organized into a grid layout.
|
||||
// This is a modified version of the grafonnet makeGrid function to
|
||||
// calculate the width of each panel based on the number of panels.
|
||||
makeGrid(panels, panelHeight=4, startY=0):
|
||||
local sanitizePanels(ps) =
|
||||
// Figure out the number of panels and the width of each panel
|
||||
local numPanels = std.length(ps);
|
||||
local panelWidth = std.floor(gridWidth / numPanels);
|
||||
|
||||
// Sanitize the panels, this ensures tht the panels have the valid gridPos
|
||||
std.map(
|
||||
function(p)
|
||||
local sanePanel = panelUtil.sanitizePanel(p, defaultHeight=panelHeight);
|
||||
(
|
||||
if p.type == 'row'
|
||||
then sanePanel {
|
||||
panels: sanitizePanels(sanePanel.panels),
|
||||
}
|
||||
else sanePanel {
|
||||
gridPos+: {
|
||||
w: panelWidth,
|
||||
},
|
||||
}
|
||||
),
|
||||
ps
|
||||
);
|
||||
|
||||
local sanitizedPanels = sanitizePanels(panels);
|
||||
|
||||
local grouped = panelUtil.groupPanelsInRows(sanitizedPanels);
|
||||
|
||||
local panelsBeforeRows = panelUtil.getPanelsBeforeNextRow(grouped);
|
||||
local rowPanels =
|
||||
std.filter(
|
||||
function(p) p.type == 'row',
|
||||
grouped
|
||||
);
|
||||
|
||||
|
||||
local CalculateXforPanel(index, panel) =
|
||||
local panelsPerRow = std.floor(gridWidth / panel.gridPos.w);
|
||||
local col = std.mod(index, panelsPerRow);
|
||||
panel { gridPos+: { x: panel.gridPos.w * col } };
|
||||
|
||||
|
||||
local panelsBeforeRowsWithX = std.mapWithIndex(CalculateXforPanel, panelsBeforeRows);
|
||||
|
||||
local rowPanelsWithX =
|
||||
std.map(
|
||||
function(row)
|
||||
row { panels: std.mapWithIndex(CalculateXforPanel, row.panels) },
|
||||
rowPanels
|
||||
);
|
||||
|
||||
local uncollapsed = panelUtil.resolveCollapsedFlagOnRows(panelsBeforeRowsWithX + rowPanelsWithX);
|
||||
|
||||
local normalized = panelUtil.normalizeY(uncollapsed);
|
||||
|
||||
std.map(function(p) p { gridPos+: { y+: startY } }, normalized),
|
||||
}
|
||||
15
elasticsearch-mixin/dashboards/variables.libsonnet
Normal file
15
elasticsearch-mixin/dashboards/variables.libsonnet
Normal file
@ -0,0 +1,15 @@
|
||||
local g = import './g.libsonnet';
|
||||
local var = g.dashboard.variable;
|
||||
|
||||
{
|
||||
datasource:
|
||||
var.datasource.new('datasource', 'prometheus'),
|
||||
|
||||
cluster:
|
||||
var.query.new('cluster')
|
||||
+ var.query.withDatasourceFromVariable(self.datasource)
|
||||
+ var.query.queryTypes.withLabelValues(
|
||||
'cluster',
|
||||
'elasticsearch_cluster_health_status',
|
||||
),
|
||||
}
|
||||
15
elasticsearch-mixin/jsonnetfile.json
Normal file
15
elasticsearch-mixin/jsonnetfile.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"version": 1,
|
||||
"dependencies": [
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet.git",
|
||||
"subdir": "gen/grafonnet-latest"
|
||||
}
|
||||
},
|
||||
"version": "main"
|
||||
}
|
||||
],
|
||||
"legacyImports": true
|
||||
}
|
||||
46
elasticsearch-mixin/jsonnetfile.lock.json
Normal file
46
elasticsearch-mixin/jsonnetfile.lock.json
Normal file
@ -0,0 +1,46 @@
|
||||
{
|
||||
"version": 1,
|
||||
"dependencies": [
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet.git",
|
||||
"subdir": "gen/grafonnet-latest"
|
||||
}
|
||||
},
|
||||
"version": "1c56af39815c4903e47c27194444456f005f65df",
|
||||
"sum": "GxEO83uxgsDclLp/fmlUJZDbSGpeUZY6Ap3G2cgdL1g="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet.git",
|
||||
"subdir": "gen/grafonnet-v10.4.0"
|
||||
}
|
||||
},
|
||||
"version": "1c56af39815c4903e47c27194444456f005f65df",
|
||||
"sum": "DKj+Sn+rlI48g/aoJpzkfPge46ya0jLk5kcZoiZ2X/I="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/jsonnet-libs/docsonnet.git",
|
||||
"subdir": "doc-util"
|
||||
}
|
||||
},
|
||||
"version": "6ac6c69685b8c29c54515448eaca583da2d88150",
|
||||
"sum": "BrAL/k23jq+xy9oA7TWIhUx07dsA/QLm3g7ktCwe//U="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/jsonnet-libs/xtd.git",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "63d430b69a95741061c2f7fc9d84b1a778511d9c",
|
||||
"sum": "qiZi3axUSXCVzKUF83zSAxklwrnitMmrDK4XAfjPMdE="
|
||||
}
|
||||
],
|
||||
"legacyImports": false
|
||||
}
|
||||
3
elasticsearch-mixin/mixin.libsonnet
Normal file
3
elasticsearch-mixin/mixin.libsonnet
Normal file
@ -0,0 +1,3 @@
|
||||
// (import 'alerts/alerts.libsonnet') +
|
||||
(import 'dashboards/dashboards.libsonnet') +
|
||||
(import 'config.libsonnet')
|
||||
55
examples/auth_modules.yml
Normal file
55
examples/auth_modules.yml
Normal file
@ -0,0 +1,55 @@
|
||||
# Example exporter-config.yml demonstrating multiple auth modules
|
||||
# Each module can be referenced with ?auth_module=<name> in /probe requests.
|
||||
|
||||
auth_modules:
|
||||
###########################################################################
|
||||
# 1. Simple basic-auth over HTTPS #
|
||||
###########################################################################
|
||||
prod_basic:
|
||||
type: userpass
|
||||
userpass:
|
||||
username: metrics
|
||||
password: s3cr3t
|
||||
# extra URL query parameters are appended to the target DSN
|
||||
options:
|
||||
sslmode: disable # becomes ?sslmode=disable
|
||||
|
||||
###########################################################################
|
||||
# 2. Read-only account for staging cluster #
|
||||
###########################################################################
|
||||
staging_ro:
|
||||
type: userpass
|
||||
userpass:
|
||||
username: readonly
|
||||
password: changeme
|
||||
|
||||
###########################################################################
|
||||
# 3. API-Key authentication #
|
||||
###########################################################################
|
||||
prod_key:
|
||||
type: apikey
|
||||
apikey: BASE64-ENCODED-KEY==
|
||||
|
||||
###########################################################################
|
||||
# 5. AWS SigV4 signing with optional TLS settings #
|
||||
###########################################################################
|
||||
aws_sigv4:
|
||||
type: aws
|
||||
aws:
|
||||
region: us-east-1
|
||||
# role_arn is optional
|
||||
# Optional TLS configuration for transport security
|
||||
tls:
|
||||
ca_file: /etc/ssl/ca.pem
|
||||
insecure_skip_verify: false
|
||||
|
||||
###########################################################################
|
||||
# 6. Client certificate authentication only (no username/password) #
|
||||
###########################################################################
|
||||
pki_mtls:
|
||||
type: tls # This auth type uses ONLY client certificates for authentication
|
||||
tls:
|
||||
ca_file: /etc/ssl/pki/ca.pem # Optional: CA for server verification
|
||||
cert_file: /etc/ssl/pki/client.pem # Required: Client certificate for auth
|
||||
key_file: /etc/ssl/pki/client-key.pem # Required: Client private key for auth
|
||||
insecure_skip_verify: false # Optional: Skip server cert validation
|
||||
33
examples/example-prometheus.yml
Normal file
33
examples/example-prometheus.yml
Normal file
@ -0,0 +1,33 @@
|
||||
scrape_configs:
|
||||
- job_name: es-multi
|
||||
metrics_path: /probe
|
||||
# Default parameters for all scrapes in this job.
|
||||
# Can be overridden by labels on a per-target basis.
|
||||
params:
|
||||
auth_module: [prod_key]
|
||||
static_configs:
|
||||
# This is a target group. All targets here will use the default 'prod_key' auth_module.
|
||||
- targets:
|
||||
- https://es-prod-1:9200
|
||||
- https://es-prod-2:9200
|
||||
# This is another target group.
|
||||
- targets:
|
||||
- https://es-stage:9200
|
||||
# The __param_ prefix on a label causes it to be added as a URL parameter.
|
||||
# This will override the default auth_module for this target.
|
||||
labels:
|
||||
__param_auth_module: staging_basic
|
||||
relabel_configs:
|
||||
# The following relabeling rules are applied to every target.
|
||||
|
||||
# 1. The special label __address__ (the target address) is saved as the 'target' URL parameter.
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
|
||||
# 2. The 'target' parameter is used as the 'instance' label for the scraped metrics.
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
|
||||
# 3. The scrape address is rewritten to point to the exporter.
|
||||
- target_label: __address__
|
||||
replacement: exporter:9114 # host:port of the single exporter
|
||||
File diff suppressed because it is too large
Load Diff
15
fixtures/clusterhealth/1.7.6.json
Normal file
15
fixtures/clusterhealth/1.7.6.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"cluster_name": "elasticsearch",
|
||||
"status": "yellow",
|
||||
"timed_out": false,
|
||||
"number_of_nodes": 1,
|
||||
"number_of_data_nodes": 1,
|
||||
"active_primary_shards": 5,
|
||||
"active_shards": 5,
|
||||
"relocating_shards": 0,
|
||||
"initializing_shards": 0,
|
||||
"unassigned_shards": 5,
|
||||
"delayed_unassigned_shards": 0,
|
||||
"number_of_pending_tasks": 0,
|
||||
"number_of_in_flight_fetch": 0
|
||||
}
|
||||
17
fixtures/clusterhealth/2.4.5.json
Normal file
17
fixtures/clusterhealth/2.4.5.json
Normal file
@ -0,0 +1,17 @@
|
||||
{
|
||||
"cluster_name": "elasticsearch",
|
||||
"status": "yellow",
|
||||
"timed_out": false,
|
||||
"number_of_nodes": 1,
|
||||
"number_of_data_nodes": 1,
|
||||
"active_primary_shards": 5,
|
||||
"active_shards": 5,
|
||||
"relocating_shards": 0,
|
||||
"initializing_shards": 0,
|
||||
"unassigned_shards": 5,
|
||||
"delayed_unassigned_shards": 0,
|
||||
"number_of_pending_tasks": 0,
|
||||
"number_of_in_flight_fetch": 0,
|
||||
"task_max_waiting_in_queue_millis": 12,
|
||||
"active_shards_percent_as_number": 50.0
|
||||
}
|
||||
17
fixtures/clusterhealth/5.4.2.json
Normal file
17
fixtures/clusterhealth/5.4.2.json
Normal file
@ -0,0 +1,17 @@
|
||||
{
|
||||
"cluster_name": "elasticsearch",
|
||||
"status": "yellow",
|
||||
"timed_out": false,
|
||||
"number_of_nodes": 1,
|
||||
"number_of_data_nodes": 1,
|
||||
"active_primary_shards": 5,
|
||||
"active_shards": 5,
|
||||
"relocating_shards": 0,
|
||||
"initializing_shards": 0,
|
||||
"unassigned_shards": 5,
|
||||
"delayed_unassigned_shards": 0,
|
||||
"number_of_pending_tasks": 0,
|
||||
"number_of_in_flight_fetch": 0,
|
||||
"task_max_waiting_in_queue_millis": 12,
|
||||
"active_shards_percent_as_number": 50.0
|
||||
}
|
||||
13
fixtures/clusterinfo/2.4.5.json
Normal file
13
fixtures/clusterinfo/2.4.5.json
Normal file
@ -0,0 +1,13 @@
|
||||
{
|
||||
"name" : "Mys-Tech",
|
||||
"cluster_name" : "elasticsearch",
|
||||
"cluster_uuid" : "3qps7bcWTqyzV49ApmPVfw",
|
||||
"version" : {
|
||||
"number" : "2.4.5",
|
||||
"build_hash" : "c849dd13904f53e63e88efc33b2ceeda0b6a1276",
|
||||
"build_timestamp" : "2017-04-24T16:18:17Z",
|
||||
"build_snapshot" : false,
|
||||
"lucene_version" : "5.5.4"
|
||||
},
|
||||
"tagline" : "You Know, for Search"
|
||||
}
|
||||
13
fixtures/clusterinfo/5.4.2.json
Normal file
13
fixtures/clusterinfo/5.4.2.json
Normal file
@ -0,0 +1,13 @@
|
||||
{
|
||||
"name" : "gOHPUga",
|
||||
"cluster_name" : "elasticsearch",
|
||||
"cluster_uuid" : "kbqi7yhQT-WlPdGL2m0xJg",
|
||||
"version" : {
|
||||
"number" : "5.4.2",
|
||||
"build_hash" : "929b078",
|
||||
"build_date" : "2017-06-15T02:29:28.122Z",
|
||||
"build_snapshot" : false,
|
||||
"lucene_version" : "6.5.1"
|
||||
},
|
||||
"tagline" : "You Know, for Search"
|
||||
}
|
||||
17
fixtures/clusterinfo/7.13.1.json
Normal file
17
fixtures/clusterinfo/7.13.1.json
Normal file
@ -0,0 +1,17 @@
|
||||
{
|
||||
"name" : "e0630cfd8e1e",
|
||||
"cluster_name" : "docker-cluster",
|
||||
"cluster_uuid" : "aCMrCY1VQpqJ6U4Sw_xdiw",
|
||||
"version" : {
|
||||
"number" : "7.13.1",
|
||||
"build_flavor" : "default",
|
||||
"build_type" : "docker",
|
||||
"build_hash" : "9a7758028e4ea59bcab41c12004603c5a7dd84a9",
|
||||
"build_date" : "2021-05-28T17:40:59.346932922Z",
|
||||
"build_snapshot" : false,
|
||||
"lucene_version" : "8.8.2",
|
||||
"minimum_wire_compatibility_version" : "6.8.0",
|
||||
"minimum_index_compatibility_version" : "6.0.0-beta1"
|
||||
},
|
||||
"tagline" : "You Know, for Search"
|
||||
}
|
||||
24
fixtures/datastream/7.15.0.json
Normal file
24
fixtures/datastream/7.15.0.json
Normal file
@ -0,0 +1,24 @@
|
||||
{
|
||||
"_shards": {
|
||||
"total": 30,
|
||||
"successful": 30,
|
||||
"failed": 0
|
||||
},
|
||||
"data_stream_count": 2,
|
||||
"backing_indices": 7,
|
||||
"total_store_size_bytes": 1103028116,
|
||||
"data_streams": [
|
||||
{
|
||||
"data_stream": "foo",
|
||||
"backing_indices": 5,
|
||||
"store_size_bytes": 429205396,
|
||||
"maximum_timestamp": 1656079894000
|
||||
},
|
||||
{
|
||||
"data_stream": "bar",
|
||||
"backing_indices": 2,
|
||||
"store_size_bytes": 673822720,
|
||||
"maximum_timestamp": 1656028796000
|
||||
}
|
||||
]
|
||||
}
|
||||
111
fixtures/healthreport/8.7.0.json
Normal file
111
fixtures/healthreport/8.7.0.json
Normal file
@ -0,0 +1,111 @@
|
||||
{
|
||||
"status": "green",
|
||||
"cluster_name": "docker-cluster",
|
||||
"indicators": {
|
||||
"master_is_stable": {
|
||||
"status": "green",
|
||||
"symptom": "The cluster has a stable master node",
|
||||
"details": {
|
||||
"current_master": {
|
||||
"node_id": "X8BAj1mfQ3qgcSoAlG3HHw",
|
||||
"name": "5da1610e99a7"
|
||||
},
|
||||
"recent_masters": [
|
||||
{
|
||||
"node_id": "X8BAj1mfQ3qgcSoAlG3HHw",
|
||||
"name": "5da1610e99a7"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"repository_integrity": {
|
||||
"status": "green",
|
||||
"symptom": "All repositories are healthy.",
|
||||
"details": {
|
||||
"total_repositories": 1
|
||||
}
|
||||
},
|
||||
"shards_capacity": {
|
||||
"status": "green",
|
||||
"symptom": "The cluster has enough room to add new shards.",
|
||||
"details": {
|
||||
"data": {
|
||||
"max_shards_in_cluster": 13500
|
||||
},
|
||||
"frozen": {
|
||||
"max_shards_in_cluster": 9000
|
||||
}
|
||||
}
|
||||
},
|
||||
"shards_availability": {
|
||||
"status": "green",
|
||||
"symptom": "This cluster has all shards available.",
|
||||
"details": {
|
||||
"restarting_replicas": 0,
|
||||
"creating_primaries": 0,
|
||||
"initializing_replicas": 0,
|
||||
"unassigned_replicas": 0,
|
||||
"started_primaries": 11703,
|
||||
"restarting_primaries": 0,
|
||||
"initializing_primaries": 0,
|
||||
"creating_replicas": 0,
|
||||
"started_replicas": 1701,
|
||||
"unassigned_primaries": 0
|
||||
},
|
||||
"impacts": [
|
||||
{
|
||||
"id": "elasticsearch:health:shards_availability:impact:replica_unassigned",
|
||||
"severity": 2,
|
||||
"description": "Searches might be slower than usual. Fewer redundant copies of the data exist on 1 index [twitter].",
|
||||
"impact_areas": [
|
||||
"search"
|
||||
]
|
||||
}
|
||||
],
|
||||
"diagnosis": [
|
||||
{
|
||||
"id": "elasticsearch:health:shards_availability:diagnosis:increase_tier_capacity_for_allocations:tier:data_content",
|
||||
"cause": "Elasticsearch isn't allowed to allocate some shards from these indices to any of the nodes in the desired data tier because there are not enough nodes in the [data_content] tier to allocate each shard copy on a different node.",
|
||||
"action": "Increase the number of nodes in this tier or decrease the number of replica shards in the affected indices.",
|
||||
"help_url": "https://ela.st/tier-capacity",
|
||||
"affected_resources": {
|
||||
"indices": [
|
||||
"twitter"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"disk": {
|
||||
"status": "green",
|
||||
"symptom": "The cluster has enough available disk space.",
|
||||
"details": {
|
||||
"indices_with_readonly_block": 0,
|
||||
"nodes_with_enough_disk_space": 1,
|
||||
"nodes_with_unknown_disk_status": 0,
|
||||
"nodes_over_high_watermark": 0,
|
||||
"nodes_over_flood_stage_watermark": 0
|
||||
}
|
||||
},
|
||||
"data_stream_lifecycle": {
|
||||
"status": "green",
|
||||
"symptom": "No data stream lifecycle health data available yet. Health information will be reported after the first run."
|
||||
},
|
||||
"ilm": {
|
||||
"status": "green",
|
||||
"symptom": "Index Lifecycle Management is running",
|
||||
"details": {
|
||||
"policies": 17,
|
||||
"ilm_status": "RUNNING"
|
||||
}
|
||||
},
|
||||
"slm": {
|
||||
"status": "green",
|
||||
"symptom": "No Snapshot Lifecycle Management policies configured",
|
||||
"details": {
|
||||
"slm_status": "RUNNING",
|
||||
"policies": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
20
fixtures/ilm_indices/6.6.0.json
Normal file
20
fixtures/ilm_indices/6.6.0.json
Normal file
@ -0,0 +1,20 @@
|
||||
{
|
||||
"indices": {
|
||||
"twitter": {
|
||||
"index": "twitter",
|
||||
"managed": false
|
||||
},
|
||||
"facebook": {
|
||||
"index": "facebook",
|
||||
"managed": true,
|
||||
"policy": "my_policy",
|
||||
"lifecycle_date_millis": 1660799138565,
|
||||
"phase": "new",
|
||||
"phase_time_millis": 1660799138651,
|
||||
"action": "complete",
|
||||
"action_time_millis": 1660799138651,
|
||||
"step": "complete",
|
||||
"step_time_millis": 1660799138651
|
||||
}
|
||||
}
|
||||
}
|
||||
3
fixtures/ilm_status/6.6.0.json
Normal file
3
fixtures/ilm_status/6.6.0.json
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"operation_mode": "RUNNING"
|
||||
}
|
||||
681
fixtures/indices/1.7.6.json
Normal file
681
fixtures/indices/1.7.6.json
Normal file
@ -0,0 +1,681 @@
|
||||
{
|
||||
"_shards": {
|
||||
"total": 20,
|
||||
"successful": 10,
|
||||
"failed": 0
|
||||
},
|
||||
"_all": {
|
||||
"primaries": {
|
||||
"docs": {
|
||||
"count": 5,
|
||||
"deleted": 0
|
||||
},
|
||||
"store": {
|
||||
"size_in_bytes": 13798,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"indexing": {
|
||||
"index_total": 5,
|
||||
"index_time_in_millis": 52,
|
||||
"index_current": 0,
|
||||
"delete_total": 0,
|
||||
"delete_time_in_millis": 0,
|
||||
"delete_current": 0,
|
||||
"noop_update_total": 0,
|
||||
"is_throttled": false,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"get": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"exists_total": 0,
|
||||
"exists_time_in_millis": 0,
|
||||
"missing_total": 0,
|
||||
"missing_time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"search": {
|
||||
"open_contexts": 0,
|
||||
"query_total": 0,
|
||||
"query_time_in_millis": 0,
|
||||
"query_current": 0,
|
||||
"fetch_total": 0,
|
||||
"fetch_time_in_millis": 0,
|
||||
"fetch_current": 0
|
||||
},
|
||||
"merges": {
|
||||
"current": 0,
|
||||
"current_docs": 0,
|
||||
"current_size_in_bytes": 0,
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0,
|
||||
"total_docs": 0,
|
||||
"total_size_in_bytes": 0
|
||||
},
|
||||
"refresh": {
|
||||
"total": 5,
|
||||
"total_time_in_millis": 163
|
||||
},
|
||||
"flush": {
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"warmer": {
|
||||
"current": 0,
|
||||
"total": 30,
|
||||
"total_time_in_millis": 42
|
||||
},
|
||||
"filter_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"id_cache": {
|
||||
"memory_size_in_bytes": 0
|
||||
},
|
||||
"fielddata": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"percolate": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0,
|
||||
"memory_size_in_bytes": -1,
|
||||
"memory_size": "-1b",
|
||||
"queries": 0
|
||||
},
|
||||
"completion": {
|
||||
"size_in_bytes": 0
|
||||
},
|
||||
"segments": {
|
||||
"count": 5,
|
||||
"memory_in_bytes": 18410,
|
||||
"index_writer_memory_in_bytes": 0,
|
||||
"index_writer_max_memory_in_bytes": 671088640,
|
||||
"version_map_memory_in_bytes": 0,
|
||||
"fixed_bit_set_memory_in_bytes": 0
|
||||
},
|
||||
"translog": {
|
||||
"operations": 5,
|
||||
"size_in_bytes": 102
|
||||
},
|
||||
"suggest": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"query_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0
|
||||
},
|
||||
"recovery": {
|
||||
"current_as_source": 0,
|
||||
"current_as_target": 0,
|
||||
"throttle_time_in_millis": 0
|
||||
}
|
||||
},
|
||||
"total": {
|
||||
"docs": {
|
||||
"count": 5,
|
||||
"deleted": 0
|
||||
},
|
||||
"store": {
|
||||
"size_in_bytes": 13798,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"indexing": {
|
||||
"index_total": 5,
|
||||
"index_time_in_millis": 52,
|
||||
"index_current": 0,
|
||||
"delete_total": 0,
|
||||
"delete_time_in_millis": 0,
|
||||
"delete_current": 0,
|
||||
"noop_update_total": 0,
|
||||
"is_throttled": false,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"get": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"exists_total": 0,
|
||||
"exists_time_in_millis": 0,
|
||||
"missing_total": 0,
|
||||
"missing_time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"search": {
|
||||
"open_contexts": 0,
|
||||
"query_total": 0,
|
||||
"query_time_in_millis": 0,
|
||||
"query_current": 0,
|
||||
"fetch_total": 0,
|
||||
"fetch_time_in_millis": 0,
|
||||
"fetch_current": 0
|
||||
},
|
||||
"merges": {
|
||||
"current": 0,
|
||||
"current_docs": 0,
|
||||
"current_size_in_bytes": 0,
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0,
|
||||
"total_docs": 0,
|
||||
"total_size_in_bytes": 0
|
||||
},
|
||||
"refresh": {
|
||||
"total": 5,
|
||||
"total_time_in_millis": 163
|
||||
},
|
||||
"flush": {
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"warmer": {
|
||||
"current": 0,
|
||||
"total": 30,
|
||||
"total_time_in_millis": 42
|
||||
},
|
||||
"filter_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"id_cache": {
|
||||
"memory_size_in_bytes": 0
|
||||
},
|
||||
"fielddata": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"percolate": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0,
|
||||
"memory_size_in_bytes": -1,
|
||||
"memory_size": "-1b",
|
||||
"queries": 0
|
||||
},
|
||||
"completion": {
|
||||
"size_in_bytes": 0
|
||||
},
|
||||
"segments": {
|
||||
"count": 5,
|
||||
"memory_in_bytes": 18410,
|
||||
"index_writer_memory_in_bytes": 0,
|
||||
"index_writer_max_memory_in_bytes": 671088640,
|
||||
"version_map_memory_in_bytes": 0,
|
||||
"fixed_bit_set_memory_in_bytes": 0
|
||||
},
|
||||
"translog": {
|
||||
"operations": 5,
|
||||
"size_in_bytes": 102
|
||||
},
|
||||
"suggest": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"query_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0
|
||||
},
|
||||
"recovery": {
|
||||
"current_as_source": 0,
|
||||
"current_as_target": 0,
|
||||
"throttle_time_in_millis": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"indices": {
|
||||
"foo_2": {
|
||||
"primaries": {
|
||||
"docs": {
|
||||
"count": 3,
|
||||
"deleted": 0
|
||||
},
|
||||
"store": {
|
||||
"size_in_bytes": 8207,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"indexing": {
|
||||
"index_total": 3,
|
||||
"index_time_in_millis": 6,
|
||||
"index_current": 0,
|
||||
"delete_total": 0,
|
||||
"delete_time_in_millis": 0,
|
||||
"delete_current": 0,
|
||||
"noop_update_total": 0,
|
||||
"is_throttled": false,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"get": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"exists_total": 0,
|
||||
"exists_time_in_millis": 0,
|
||||
"missing_total": 0,
|
||||
"missing_time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"search": {
|
||||
"open_contexts": 0,
|
||||
"query_total": 0,
|
||||
"query_time_in_millis": 0,
|
||||
"query_current": 0,
|
||||
"fetch_total": 0,
|
||||
"fetch_time_in_millis": 0,
|
||||
"fetch_current": 0
|
||||
},
|
||||
"merges": {
|
||||
"current": 0,
|
||||
"current_docs": 0,
|
||||
"current_size_in_bytes": 0,
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0,
|
||||
"total_docs": 0,
|
||||
"total_size_in_bytes": 0
|
||||
},
|
||||
"refresh": {
|
||||
"total": 3,
|
||||
"total_time_in_millis": 38
|
||||
},
|
||||
"flush": {
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"warmer": {
|
||||
"current": 0,
|
||||
"total": 16,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"filter_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"id_cache": {
|
||||
"memory_size_in_bytes": 0
|
||||
},
|
||||
"fielddata": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"percolate": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0,
|
||||
"memory_size_in_bytes": -1,
|
||||
"memory_size": "-1b",
|
||||
"queries": 0
|
||||
},
|
||||
"completion": {
|
||||
"size_in_bytes": 0
|
||||
},
|
||||
"segments": {
|
||||
"count": 3,
|
||||
"memory_in_bytes": 11046,
|
||||
"index_writer_memory_in_bytes": 0,
|
||||
"index_writer_max_memory_in_bytes": 335544320,
|
||||
"version_map_memory_in_bytes": 0,
|
||||
"fixed_bit_set_memory_in_bytes": 0
|
||||
},
|
||||
"translog": {
|
||||
"operations": 3,
|
||||
"size_in_bytes": 102
|
||||
},
|
||||
"suggest": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"query_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0
|
||||
},
|
||||
"recovery": {
|
||||
"current_as_source": 0,
|
||||
"current_as_target": 0,
|
||||
"throttle_time_in_millis": 0
|
||||
}
|
||||
},
|
||||
"total": {
|
||||
"docs": {
|
||||
"count": 3,
|
||||
"deleted": 0
|
||||
},
|
||||
"store": {
|
||||
"size_in_bytes": 8207,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"indexing": {
|
||||
"index_total": 3,
|
||||
"index_time_in_millis": 6,
|
||||
"index_current": 0,
|
||||
"delete_total": 0,
|
||||
"delete_time_in_millis": 0,
|
||||
"delete_current": 0,
|
||||
"noop_update_total": 0,
|
||||
"is_throttled": false,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"get": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"exists_total": 0,
|
||||
"exists_time_in_millis": 0,
|
||||
"missing_total": 0,
|
||||
"missing_time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"search": {
|
||||
"open_contexts": 0,
|
||||
"query_total": 0,
|
||||
"query_time_in_millis": 0,
|
||||
"query_current": 0,
|
||||
"fetch_total": 0,
|
||||
"fetch_time_in_millis": 0,
|
||||
"fetch_current": 0
|
||||
},
|
||||
"merges": {
|
||||
"current": 0,
|
||||
"current_docs": 0,
|
||||
"current_size_in_bytes": 0,
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0,
|
||||
"total_docs": 0,
|
||||
"total_size_in_bytes": 0
|
||||
},
|
||||
"refresh": {
|
||||
"total": 3,
|
||||
"total_time_in_millis": 38
|
||||
},
|
||||
"flush": {
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"warmer": {
|
||||
"current": 0,
|
||||
"total": 16,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"filter_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"id_cache": {
|
||||
"memory_size_in_bytes": 0
|
||||
},
|
||||
"fielddata": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"percolate": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0,
|
||||
"memory_size_in_bytes": -1,
|
||||
"memory_size": "-1b",
|
||||
"queries": 0
|
||||
},
|
||||
"completion": {
|
||||
"size_in_bytes": 0
|
||||
},
|
||||
"segments": {
|
||||
"count": 3,
|
||||
"memory_in_bytes": 11046,
|
||||
"index_writer_memory_in_bytes": 0,
|
||||
"index_writer_max_memory_in_bytes": 335544320,
|
||||
"version_map_memory_in_bytes": 0,
|
||||
"fixed_bit_set_memory_in_bytes": 0
|
||||
},
|
||||
"translog": {
|
||||
"operations": 3,
|
||||
"size_in_bytes": 102
|
||||
},
|
||||
"suggest": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"query_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0
|
||||
},
|
||||
"recovery": {
|
||||
"current_as_source": 0,
|
||||
"current_as_target": 0,
|
||||
"throttle_time_in_millis": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"foo_1": {
|
||||
"primaries": {
|
||||
"docs": {
|
||||
"count": 2,
|
||||
"deleted": 0
|
||||
},
|
||||
"store": {
|
||||
"size_in_bytes": 5591,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"indexing": {
|
||||
"index_total": 2,
|
||||
"index_time_in_millis": 46,
|
||||
"index_current": 0,
|
||||
"delete_total": 0,
|
||||
"delete_time_in_millis": 0,
|
||||
"delete_current": 0,
|
||||
"noop_update_total": 0,
|
||||
"is_throttled": false,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"get": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"exists_total": 0,
|
||||
"exists_time_in_millis": 0,
|
||||
"missing_total": 0,
|
||||
"missing_time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"search": {
|
||||
"open_contexts": 0,
|
||||
"query_total": 0,
|
||||
"query_time_in_millis": 0,
|
||||
"query_current": 0,
|
||||
"fetch_total": 0,
|
||||
"fetch_time_in_millis": 0,
|
||||
"fetch_current": 0
|
||||
},
|
||||
"merges": {
|
||||
"current": 0,
|
||||
"current_docs": 0,
|
||||
"current_size_in_bytes": 0,
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0,
|
||||
"total_docs": 0,
|
||||
"total_size_in_bytes": 0
|
||||
},
|
||||
"refresh": {
|
||||
"total": 2,
|
||||
"total_time_in_millis": 125
|
||||
},
|
||||
"flush": {
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"warmer": {
|
||||
"current": 0,
|
||||
"total": 14,
|
||||
"total_time_in_millis": 42
|
||||
},
|
||||
"filter_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"id_cache": {
|
||||
"memory_size_in_bytes": 0
|
||||
},
|
||||
"fielddata": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"percolate": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0,
|
||||
"memory_size_in_bytes": -1,
|
||||
"memory_size": "-1b",
|
||||
"queries": 0
|
||||
},
|
||||
"completion": {
|
||||
"size_in_bytes": 0
|
||||
},
|
||||
"segments": {
|
||||
"count": 2,
|
||||
"memory_in_bytes": 7364,
|
||||
"index_writer_memory_in_bytes": 0,
|
||||
"index_writer_max_memory_in_bytes": 335544320,
|
||||
"version_map_memory_in_bytes": 0,
|
||||
"fixed_bit_set_memory_in_bytes": 0
|
||||
},
|
||||
"translog": {
|
||||
"operations": 2,
|
||||
"size_in_bytes": 17
|
||||
},
|
||||
"suggest": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"query_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0
|
||||
},
|
||||
"recovery": {
|
||||
"current_as_source": 0,
|
||||
"current_as_target": 0,
|
||||
"throttle_time_in_millis": 0
|
||||
}
|
||||
},
|
||||
"total": {
|
||||
"docs": {
|
||||
"count": 2,
|
||||
"deleted": 0
|
||||
},
|
||||
"store": {
|
||||
"size_in_bytes": 5591,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"indexing": {
|
||||
"index_total": 2,
|
||||
"index_time_in_millis": 46,
|
||||
"index_current": 0,
|
||||
"delete_total": 0,
|
||||
"delete_time_in_millis": 0,
|
||||
"delete_current": 0,
|
||||
"noop_update_total": 0,
|
||||
"is_throttled": false,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"get": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"exists_total": 0,
|
||||
"exists_time_in_millis": 0,
|
||||
"missing_total": 0,
|
||||
"missing_time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"search": {
|
||||
"open_contexts": 0,
|
||||
"query_total": 0,
|
||||
"query_time_in_millis": 0,
|
||||
"query_current": 0,
|
||||
"fetch_total": 0,
|
||||
"fetch_time_in_millis": 0,
|
||||
"fetch_current": 0
|
||||
},
|
||||
"merges": {
|
||||
"current": 0,
|
||||
"current_docs": 0,
|
||||
"current_size_in_bytes": 0,
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0,
|
||||
"total_docs": 0,
|
||||
"total_size_in_bytes": 0
|
||||
},
|
||||
"refresh": {
|
||||
"total": 2,
|
||||
"total_time_in_millis": 125
|
||||
},
|
||||
"flush": {
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"warmer": {
|
||||
"current": 0,
|
||||
"total": 14,
|
||||
"total_time_in_millis": 42
|
||||
},
|
||||
"filter_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"id_cache": {
|
||||
"memory_size_in_bytes": 0
|
||||
},
|
||||
"fielddata": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"percolate": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0,
|
||||
"memory_size_in_bytes": -1,
|
||||
"memory_size": "-1b",
|
||||
"queries": 0
|
||||
},
|
||||
"completion": {
|
||||
"size_in_bytes": 0
|
||||
},
|
||||
"segments": {
|
||||
"count": 2,
|
||||
"memory_in_bytes": 7364,
|
||||
"index_writer_memory_in_bytes": 0,
|
||||
"index_writer_max_memory_in_bytes": 335544320,
|
||||
"version_map_memory_in_bytes": 0,
|
||||
"fixed_bit_set_memory_in_bytes": 0
|
||||
},
|
||||
"translog": {
|
||||
"operations": 2,
|
||||
"size_in_bytes": 17
|
||||
},
|
||||
"suggest": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"query_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0
|
||||
},
|
||||
"recovery": {
|
||||
"current_as_source": 0,
|
||||
"current_as_target": 0,
|
||||
"throttle_time_in_millis": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
765
fixtures/indices/2.4.5.json
Normal file
765
fixtures/indices/2.4.5.json
Normal file
@ -0,0 +1,765 @@
|
||||
{
|
||||
"_shards": {
|
||||
"total": 20,
|
||||
"successful": 10,
|
||||
"failed": 0
|
||||
},
|
||||
"_all": {
|
||||
"primaries": {
|
||||
"docs": {
|
||||
"count": 5,
|
||||
"deleted": 0
|
||||
},
|
||||
"store": {
|
||||
"size_in_bytes": 3610,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"indexing": {
|
||||
"index_total": 5,
|
||||
"index_time_in_millis": 40,
|
||||
"index_current": 0,
|
||||
"index_failed": 0,
|
||||
"delete_total": 0,
|
||||
"delete_time_in_millis": 0,
|
||||
"delete_current": 0,
|
||||
"noop_update_total": 0,
|
||||
"is_throttled": false,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"get": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"exists_total": 0,
|
||||
"exists_time_in_millis": 0,
|
||||
"missing_total": 0,
|
||||
"missing_time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"search": {
|
||||
"open_contexts": 0,
|
||||
"query_total": 0,
|
||||
"query_time_in_millis": 0,
|
||||
"query_current": 0,
|
||||
"fetch_total": 0,
|
||||
"fetch_time_in_millis": 0,
|
||||
"fetch_current": 0,
|
||||
"scroll_total": 0,
|
||||
"scroll_time_in_millis": 0,
|
||||
"scroll_current": 0
|
||||
},
|
||||
"merges": {
|
||||
"current": 0,
|
||||
"current_docs": 0,
|
||||
"current_size_in_bytes": 0,
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0,
|
||||
"total_docs": 0,
|
||||
"total_size_in_bytes": 0,
|
||||
"total_stopped_time_in_millis": 0,
|
||||
"total_throttled_time_in_millis": 0,
|
||||
"total_auto_throttle_in_bytes": 209715200
|
||||
},
|
||||
"refresh": {
|
||||
"total": 5,
|
||||
"total_time_in_millis": 171
|
||||
},
|
||||
"flush": {
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"warmer": {
|
||||
"current": 0,
|
||||
"total": 30,
|
||||
"total_time_in_millis": 12
|
||||
},
|
||||
"query_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"total_count": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0,
|
||||
"cache_size": 0,
|
||||
"cache_count": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"fielddata": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"percolate": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0,
|
||||
"memory_size_in_bytes": -1,
|
||||
"memory_size": "-1b",
|
||||
"queries": 0
|
||||
},
|
||||
"completion": {
|
||||
"size_in_bytes": 0
|
||||
},
|
||||
"segments": {
|
||||
"count": 5,
|
||||
"memory_in_bytes": 10530,
|
||||
"terms_memory_in_bytes": 7550,
|
||||
"stored_fields_memory_in_bytes": 1560,
|
||||
"term_vectors_memory_in_bytes": 0,
|
||||
"norms_memory_in_bytes": 960,
|
||||
"doc_values_memory_in_bytes": 460,
|
||||
"index_writer_memory_in_bytes": 0,
|
||||
"index_writer_max_memory_in_bytes": 103887660,
|
||||
"version_map_memory_in_bytes": 0,
|
||||
"fixed_bit_set_memory_in_bytes": 0
|
||||
},
|
||||
"translog": {
|
||||
"operations": 5,
|
||||
"size_in_bytes": 843
|
||||
},
|
||||
"suggest": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"request_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0
|
||||
},
|
||||
"recovery": {
|
||||
"current_as_source": 0,
|
||||
"current_as_target": 0,
|
||||
"throttle_time_in_millis": 0
|
||||
}
|
||||
},
|
||||
"total": {
|
||||
"docs": {
|
||||
"count": 5,
|
||||
"deleted": 0
|
||||
},
|
||||
"store": {
|
||||
"size_in_bytes": 3610,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"indexing": {
|
||||
"index_total": 5,
|
||||
"index_time_in_millis": 40,
|
||||
"index_current": 0,
|
||||
"index_failed": 0,
|
||||
"delete_total": 0,
|
||||
"delete_time_in_millis": 0,
|
||||
"delete_current": 0,
|
||||
"noop_update_total": 0,
|
||||
"is_throttled": false,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"get": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"exists_total": 0,
|
||||
"exists_time_in_millis": 0,
|
||||
"missing_total": 0,
|
||||
"missing_time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"search": {
|
||||
"open_contexts": 0,
|
||||
"query_total": 0,
|
||||
"query_time_in_millis": 0,
|
||||
"query_current": 0,
|
||||
"fetch_total": 0,
|
||||
"fetch_time_in_millis": 0,
|
||||
"fetch_current": 0,
|
||||
"scroll_total": 0,
|
||||
"scroll_time_in_millis": 0,
|
||||
"scroll_current": 0
|
||||
},
|
||||
"merges": {
|
||||
"current": 0,
|
||||
"current_docs": 0,
|
||||
"current_size_in_bytes": 0,
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0,
|
||||
"total_docs": 0,
|
||||
"total_size_in_bytes": 0,
|
||||
"total_stopped_time_in_millis": 0,
|
||||
"total_throttled_time_in_millis": 0,
|
||||
"total_auto_throttle_in_bytes": 209715200
|
||||
},
|
||||
"refresh": {
|
||||
"total": 5,
|
||||
"total_time_in_millis": 171
|
||||
},
|
||||
"flush": {
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"warmer": {
|
||||
"current": 0,
|
||||
"total": 30,
|
||||
"total_time_in_millis": 12
|
||||
},
|
||||
"query_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"total_count": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0,
|
||||
"cache_size": 0,
|
||||
"cache_count": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"fielddata": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"percolate": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0,
|
||||
"memory_size_in_bytes": -1,
|
||||
"memory_size": "-1b",
|
||||
"queries": 0
|
||||
},
|
||||
"completion": {
|
||||
"size_in_bytes": 0
|
||||
},
|
||||
"segments": {
|
||||
"count": 5,
|
||||
"memory_in_bytes": 10530,
|
||||
"terms_memory_in_bytes": 7550,
|
||||
"stored_fields_memory_in_bytes": 1560,
|
||||
"term_vectors_memory_in_bytes": 0,
|
||||
"norms_memory_in_bytes": 960,
|
||||
"doc_values_memory_in_bytes": 460,
|
||||
"index_writer_memory_in_bytes": 0,
|
||||
"index_writer_max_memory_in_bytes": 103887660,
|
||||
"version_map_memory_in_bytes": 0,
|
||||
"fixed_bit_set_memory_in_bytes": 0
|
||||
},
|
||||
"translog": {
|
||||
"operations": 5,
|
||||
"size_in_bytes": 843
|
||||
},
|
||||
"suggest": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"request_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0
|
||||
},
|
||||
"recovery": {
|
||||
"current_as_source": 0,
|
||||
"current_as_target": 0,
|
||||
"throttle_time_in_millis": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"indices": {
|
||||
"foo_2": {
|
||||
"primaries": {
|
||||
"docs": {
|
||||
"count": 3,
|
||||
"deleted": 0
|
||||
},
|
||||
"store": {
|
||||
"size_in_bytes": 3350,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"indexing": {
|
||||
"index_total": 3,
|
||||
"index_time_in_millis": 6,
|
||||
"index_current": 0,
|
||||
"index_failed": 0,
|
||||
"delete_total": 0,
|
||||
"delete_time_in_millis": 0,
|
||||
"delete_current": 0,
|
||||
"noop_update_total": 0,
|
||||
"is_throttled": false,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"get": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"exists_total": 0,
|
||||
"exists_time_in_millis": 0,
|
||||
"missing_total": 0,
|
||||
"missing_time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"search": {
|
||||
"open_contexts": 0,
|
||||
"query_total": 0,
|
||||
"query_time_in_millis": 0,
|
||||
"query_current": 0,
|
||||
"fetch_total": 0,
|
||||
"fetch_time_in_millis": 0,
|
||||
"fetch_current": 0,
|
||||
"scroll_total": 0,
|
||||
"scroll_time_in_millis": 0,
|
||||
"scroll_current": 0
|
||||
},
|
||||
"merges": {
|
||||
"current": 0,
|
||||
"current_docs": 0,
|
||||
"current_size_in_bytes": 0,
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0,
|
||||
"total_docs": 0,
|
||||
"total_size_in_bytes": 0,
|
||||
"total_stopped_time_in_millis": 0,
|
||||
"total_throttled_time_in_millis": 0,
|
||||
"total_auto_throttle_in_bytes": 104857600
|
||||
},
|
||||
"refresh": {
|
||||
"total": 3,
|
||||
"total_time_in_millis": 34
|
||||
},
|
||||
"flush": {
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"warmer": {
|
||||
"current": 0,
|
||||
"total": 16,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"query_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"total_count": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0,
|
||||
"cache_size": 0,
|
||||
"cache_count": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"fielddata": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"percolate": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0,
|
||||
"memory_size_in_bytes": -1,
|
||||
"memory_size": "-1b",
|
||||
"queries": 0
|
||||
},
|
||||
"completion": {
|
||||
"size_in_bytes": 0
|
||||
},
|
||||
"segments": {
|
||||
"count": 3,
|
||||
"memory_in_bytes": 6318,
|
||||
"terms_memory_in_bytes": 4530,
|
||||
"stored_fields_memory_in_bytes": 936,
|
||||
"term_vectors_memory_in_bytes": 0,
|
||||
"norms_memory_in_bytes": 576,
|
||||
"doc_values_memory_in_bytes": 276,
|
||||
"index_writer_memory_in_bytes": 0,
|
||||
"index_writer_max_memory_in_bytes": 51943830,
|
||||
"version_map_memory_in_bytes": 0,
|
||||
"fixed_bit_set_memory_in_bytes": 0
|
||||
},
|
||||
"translog": {
|
||||
"operations": 3,
|
||||
"size_in_bytes": 470
|
||||
},
|
||||
"suggest": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"request_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0
|
||||
},
|
||||
"recovery": {
|
||||
"current_as_source": 0,
|
||||
"current_as_target": 0,
|
||||
"throttle_time_in_millis": 0
|
||||
}
|
||||
},
|
||||
"total": {
|
||||
"docs": {
|
||||
"count": 3,
|
||||
"deleted": 0
|
||||
},
|
||||
"store": {
|
||||
"size_in_bytes": 3350,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"indexing": {
|
||||
"index_total": 3,
|
||||
"index_time_in_millis": 6,
|
||||
"index_current": 0,
|
||||
"index_failed": 0,
|
||||
"delete_total": 0,
|
||||
"delete_time_in_millis": 0,
|
||||
"delete_current": 0,
|
||||
"noop_update_total": 0,
|
||||
"is_throttled": false,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"get": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"exists_total": 0,
|
||||
"exists_time_in_millis": 0,
|
||||
"missing_total": 0,
|
||||
"missing_time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"search": {
|
||||
"open_contexts": 0,
|
||||
"query_total": 0,
|
||||
"query_time_in_millis": 0,
|
||||
"query_current": 0,
|
||||
"fetch_total": 0,
|
||||
"fetch_time_in_millis": 0,
|
||||
"fetch_current": 0,
|
||||
"scroll_total": 0,
|
||||
"scroll_time_in_millis": 0,
|
||||
"scroll_current": 0
|
||||
},
|
||||
"merges": {
|
||||
"current": 0,
|
||||
"current_docs": 0,
|
||||
"current_size_in_bytes": 0,
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0,
|
||||
"total_docs": 0,
|
||||
"total_size_in_bytes": 0,
|
||||
"total_stopped_time_in_millis": 0,
|
||||
"total_throttled_time_in_millis": 0,
|
||||
"total_auto_throttle_in_bytes": 104857600
|
||||
},
|
||||
"refresh": {
|
||||
"total": 3,
|
||||
"total_time_in_millis": 34
|
||||
},
|
||||
"flush": {
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"warmer": {
|
||||
"current": 0,
|
||||
"total": 16,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"query_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"total_count": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0,
|
||||
"cache_size": 0,
|
||||
"cache_count": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"fielddata": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"percolate": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0,
|
||||
"memory_size_in_bytes": -1,
|
||||
"memory_size": "-1b",
|
||||
"queries": 0
|
||||
},
|
||||
"completion": {
|
||||
"size_in_bytes": 0
|
||||
},
|
||||
"segments": {
|
||||
"count": 3,
|
||||
"memory_in_bytes": 6318,
|
||||
"terms_memory_in_bytes": 4530,
|
||||
"stored_fields_memory_in_bytes": 936,
|
||||
"term_vectors_memory_in_bytes": 0,
|
||||
"norms_memory_in_bytes": 576,
|
||||
"doc_values_memory_in_bytes": 276,
|
||||
"index_writer_memory_in_bytes": 0,
|
||||
"index_writer_max_memory_in_bytes": 51943830,
|
||||
"version_map_memory_in_bytes": 0,
|
||||
"fixed_bit_set_memory_in_bytes": 0
|
||||
},
|
||||
"translog": {
|
||||
"operations": 3,
|
||||
"size_in_bytes": 470
|
||||
},
|
||||
"suggest": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"request_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0
|
||||
},
|
||||
"recovery": {
|
||||
"current_as_source": 0,
|
||||
"current_as_target": 0,
|
||||
"throttle_time_in_millis": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"foo_1": {
|
||||
"primaries": {
|
||||
"docs": {
|
||||
"count": 2,
|
||||
"deleted": 0
|
||||
},
|
||||
"store": {
|
||||
"size_in_bytes": 260,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"indexing": {
|
||||
"index_total": 2,
|
||||
"index_time_in_millis": 34,
|
||||
"index_current": 0,
|
||||
"index_failed": 0,
|
||||
"delete_total": 0,
|
||||
"delete_time_in_millis": 0,
|
||||
"delete_current": 0,
|
||||
"noop_update_total": 0,
|
||||
"is_throttled": false,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"get": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"exists_total": 0,
|
||||
"exists_time_in_millis": 0,
|
||||
"missing_total": 0,
|
||||
"missing_time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"search": {
|
||||
"open_contexts": 0,
|
||||
"query_total": 0,
|
||||
"query_time_in_millis": 0,
|
||||
"query_current": 0,
|
||||
"fetch_total": 0,
|
||||
"fetch_time_in_millis": 0,
|
||||
"fetch_current": 0,
|
||||
"scroll_total": 0,
|
||||
"scroll_time_in_millis": 0,
|
||||
"scroll_current": 0
|
||||
},
|
||||
"merges": {
|
||||
"current": 0,
|
||||
"current_docs": 0,
|
||||
"current_size_in_bytes": 0,
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0,
|
||||
"total_docs": 0,
|
||||
"total_size_in_bytes": 0,
|
||||
"total_stopped_time_in_millis": 0,
|
||||
"total_throttled_time_in_millis": 0,
|
||||
"total_auto_throttle_in_bytes": 104857600
|
||||
},
|
||||
"refresh": {
|
||||
"total": 2,
|
||||
"total_time_in_millis": 137
|
||||
},
|
||||
"flush": {
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"warmer": {
|
||||
"current": 0,
|
||||
"total": 14,
|
||||
"total_time_in_millis": 12
|
||||
},
|
||||
"query_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"total_count": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0,
|
||||
"cache_size": 0,
|
||||
"cache_count": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"fielddata": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"percolate": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0,
|
||||
"memory_size_in_bytes": -1,
|
||||
"memory_size": "-1b",
|
||||
"queries": 0
|
||||
},
|
||||
"completion": {
|
||||
"size_in_bytes": 0
|
||||
},
|
||||
"segments": {
|
||||
"count": 2,
|
||||
"memory_in_bytes": 4212,
|
||||
"terms_memory_in_bytes": 3020,
|
||||
"stored_fields_memory_in_bytes": 624,
|
||||
"term_vectors_memory_in_bytes": 0,
|
||||
"norms_memory_in_bytes": 384,
|
||||
"doc_values_memory_in_bytes": 184,
|
||||
"index_writer_memory_in_bytes": 0,
|
||||
"index_writer_max_memory_in_bytes": 51943830,
|
||||
"version_map_memory_in_bytes": 0,
|
||||
"fixed_bit_set_memory_in_bytes": 0
|
||||
},
|
||||
"translog": {
|
||||
"operations": 2,
|
||||
"size_in_bytes": 373
|
||||
},
|
||||
"suggest": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"request_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0
|
||||
},
|
||||
"recovery": {
|
||||
"current_as_source": 0,
|
||||
"current_as_target": 0,
|
||||
"throttle_time_in_millis": 0
|
||||
}
|
||||
},
|
||||
"total": {
|
||||
"docs": {
|
||||
"count": 2,
|
||||
"deleted": 0
|
||||
},
|
||||
"store": {
|
||||
"size_in_bytes": 260,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"indexing": {
|
||||
"index_total": 2,
|
||||
"index_time_in_millis": 34,
|
||||
"index_current": 0,
|
||||
"index_failed": 0,
|
||||
"delete_total": 0,
|
||||
"delete_time_in_millis": 0,
|
||||
"delete_current": 0,
|
||||
"noop_update_total": 0,
|
||||
"is_throttled": false,
|
||||
"throttle_time_in_millis": 0
|
||||
},
|
||||
"get": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"exists_total": 0,
|
||||
"exists_time_in_millis": 0,
|
||||
"missing_total": 0,
|
||||
"missing_time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"search": {
|
||||
"open_contexts": 0,
|
||||
"query_total": 0,
|
||||
"query_time_in_millis": 0,
|
||||
"query_current": 0,
|
||||
"fetch_total": 0,
|
||||
"fetch_time_in_millis": 0,
|
||||
"fetch_current": 0,
|
||||
"scroll_total": 0,
|
||||
"scroll_time_in_millis": 0,
|
||||
"scroll_current": 0
|
||||
},
|
||||
"merges": {
|
||||
"current": 0,
|
||||
"current_docs": 0,
|
||||
"current_size_in_bytes": 0,
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0,
|
||||
"total_docs": 0,
|
||||
"total_size_in_bytes": 0,
|
||||
"total_stopped_time_in_millis": 0,
|
||||
"total_throttled_time_in_millis": 0,
|
||||
"total_auto_throttle_in_bytes": 104857600
|
||||
},
|
||||
"refresh": {
|
||||
"total": 2,
|
||||
"total_time_in_millis": 137
|
||||
},
|
||||
"flush": {
|
||||
"total": 0,
|
||||
"total_time_in_millis": 0
|
||||
},
|
||||
"warmer": {
|
||||
"current": 0,
|
||||
"total": 14,
|
||||
"total_time_in_millis": 12
|
||||
},
|
||||
"query_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"total_count": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0,
|
||||
"cache_size": 0,
|
||||
"cache_count": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"fielddata": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0
|
||||
},
|
||||
"percolate": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0,
|
||||
"memory_size_in_bytes": -1,
|
||||
"memory_size": "-1b",
|
||||
"queries": 0
|
||||
},
|
||||
"completion": {
|
||||
"size_in_bytes": 0
|
||||
},
|
||||
"segments": {
|
||||
"count": 2,
|
||||
"memory_in_bytes": 4212,
|
||||
"terms_memory_in_bytes": 3020,
|
||||
"stored_fields_memory_in_bytes": 624,
|
||||
"term_vectors_memory_in_bytes": 0,
|
||||
"norms_memory_in_bytes": 384,
|
||||
"doc_values_memory_in_bytes": 184,
|
||||
"index_writer_memory_in_bytes": 0,
|
||||
"index_writer_max_memory_in_bytes": 51943830,
|
||||
"version_map_memory_in_bytes": 0,
|
||||
"fixed_bit_set_memory_in_bytes": 0
|
||||
},
|
||||
"translog": {
|
||||
"operations": 2,
|
||||
"size_in_bytes": 373
|
||||
},
|
||||
"suggest": {
|
||||
"total": 0,
|
||||
"time_in_millis": 0,
|
||||
"current": 0
|
||||
},
|
||||
"request_cache": {
|
||||
"memory_size_in_bytes": 0,
|
||||
"evictions": 0,
|
||||
"hit_count": 0,
|
||||
"miss_count": 0
|
||||
},
|
||||
"recovery": {
|
||||
"current_as_source": 0,
|
||||
"current_as_target": 0,
|
||||
"throttle_time_in_millis": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
1437
fixtures/indices/5.4.2.json
Normal file
1437
fixtures/indices/5.4.2.json
Normal file
File diff suppressed because it is too large
Load Diff
1303
fixtures/indices/7.17.3.json
Normal file
1303
fixtures/indices/7.17.3.json
Normal file
File diff suppressed because it is too large
Load Diff
20
fixtures/indices/alias/1.7.6.json
Normal file
20
fixtures/indices/alias/1.7.6.json
Normal file
@ -0,0 +1,20 @@
|
||||
{
|
||||
"foo_1": {
|
||||
"aliases": {}
|
||||
},
|
||||
"foo_2": {
|
||||
"aliases": {
|
||||
"foo_alias_2_1": {}
|
||||
}
|
||||
},
|
||||
"foo_3": {
|
||||
"aliases": {
|
||||
"foo_alias_3_1": {
|
||||
"index_routing": "title",
|
||||
"search_routing": "title",
|
||||
"is_write_index": true
|
||||
},
|
||||
"foo_alias_3_2": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
20
fixtures/indices/alias/2.4.5.json
Normal file
20
fixtures/indices/alias/2.4.5.json
Normal file
@ -0,0 +1,20 @@
|
||||
{
|
||||
"foo_1": {
|
||||
"aliases": {}
|
||||
},
|
||||
"foo_2": {
|
||||
"aliases": {
|
||||
"foo_alias_2_1": {}
|
||||
}
|
||||
},
|
||||
"foo_3": {
|
||||
"aliases": {
|
||||
"foo_alias_3_1": {
|
||||
"index_routing": "title",
|
||||
"search_routing": "title",
|
||||
"is_write_index": true
|
||||
},
|
||||
"foo_alias_3_2": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
20
fixtures/indices/alias/5.4.2.json
Normal file
20
fixtures/indices/alias/5.4.2.json
Normal file
@ -0,0 +1,20 @@
|
||||
{
|
||||
"foo_1": {
|
||||
"aliases": {}
|
||||
},
|
||||
"foo_2": {
|
||||
"aliases": {
|
||||
"foo_alias_2_1": {}
|
||||
}
|
||||
},
|
||||
"foo_3": {
|
||||
"aliases": {
|
||||
"foo_alias_3_1": {
|
||||
"index_routing": "title",
|
||||
"search_routing": "title",
|
||||
"is_write_index": true
|
||||
},
|
||||
"foo_alias_3_2": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
20
fixtures/indices/alias/7.17.3.json
Normal file
20
fixtures/indices/alias/7.17.3.json
Normal file
@ -0,0 +1,20 @@
|
||||
{
|
||||
"foo_1": {
|
||||
"aliases": {}
|
||||
},
|
||||
"foo_2": {
|
||||
"aliases": {
|
||||
"foo_alias_2_1": {}
|
||||
}
|
||||
},
|
||||
"foo_3": {
|
||||
"aliases": {
|
||||
"foo_alias_3_1": {
|
||||
"index_routing": "title",
|
||||
"search_routing": "title",
|
||||
"is_write_index": true
|
||||
},
|
||||
"foo_alias_3_2": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
0
fixtures/indices/shards/1.7.6.json
Normal file
0
fixtures/indices/shards/1.7.6.json
Normal file
0
fixtures/indices/shards/2.4.5.json
Normal file
0
fixtures/indices/shards/2.4.5.json
Normal file
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user