Fix mixed value types for disk watermark (#1055)

* Fix mixed value types for disk watermark

The disk watermark values can be a ratio or percentage according to the docs[1], however when set to a percentage, the defaults become an object and therefore fails to parse. In that case, we really only care about what the user set. Adds a test to confirm a fix for #1044.

Fixes #1044

[1] https://www.elastic.co/docs/reference/elasticsearch/configuration-reference/cluster-level-shard-allocation-routing-settings#disk-based-shard-allocation

Signed-off-by: Joe Adams <github@joeadams.io>

* Add missing test fixture

Signed-off-by: Joe Adams <github@joeadams.io>

---------

Signed-off-by: Joe Adams <github@joeadams.io>
This commit is contained in:
Joe Adams 2025-10-27 21:12:26 -04:00 committed by GitHub
parent 91008f4902
commit 325e7dd61f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 1678 additions and 62 deletions

View File

@ -140,9 +140,9 @@ type clusterSettingsDisk struct {
// clusterSettingsWatermark is representation of Elasticsearch Cluster shard routing disk allocation watermark settings
type clusterSettingsWatermark struct {
FloodStage string `json:"flood_stage"`
High string `json:"high"`
Low string `json:"low"`
FloodStage interface{} `json:"flood_stage"`
High interface{} `json:"high"`
Low interface{} `json:"low"`
}
func (c *ClusterSettingsCollector) Update(ctx context.Context, ch chan<- prometheus.Metric) error {
@ -222,80 +222,110 @@ func (c *ClusterSettingsCollector) Update(ctx context.Context, ch chan<- prometh
)
// Watermark bytes or ratio metrics
if strings.HasSuffix(merged.Cluster.Routing.Allocation.Disk.Watermark.High, "b") {
flooodStageBytes, err := getValueInBytes(merged.Cluster.Routing.Allocation.Disk.Watermark.FloodStage)
if err != nil {
c.logger.Error("failed to parse flood_stage bytes", "err", err)
watermarkFlood, err := parseWatermarkValue(merged.Cluster.Routing.Allocation.Disk.Watermark.FloodStage)
if err != nil {
c.logger.Error("failed to parse flood stage watermark", "err", err)
} else {
if strings.HasSuffix(watermarkFlood, "b") {
floodStageBytes, err := getValueInBytes(watermarkFlood)
if err != nil {
c.logger.Error("failed to parse flood_stage bytes", "err", err)
} else {
ch <- prometheus.MustNewConstMetric(
clusterSettingsDesc["floodStageBytes"],
prometheus.GaugeValue,
floodStageBytes,
)
}
} else {
ch <- prometheus.MustNewConstMetric(
clusterSettingsDesc["floodStageBytes"],
prometheus.GaugeValue,
flooodStageBytes,
)
floodStageRatio, err := getValueAsRatio(watermarkFlood)
if err != nil {
c.logger.Error("failed to parse flood_stage ratio", "err", err)
} else {
ch <- prometheus.MustNewConstMetric(
clusterSettingsDesc["floodStageRatio"],
prometheus.GaugeValue,
floodStageRatio,
)
}
}
highBytes, err := getValueInBytes(merged.Cluster.Routing.Allocation.Disk.Watermark.High)
if err != nil {
c.logger.Error("failed to parse high bytes", "err", err)
} else {
ch <- prometheus.MustNewConstMetric(
clusterSettingsDesc["highBytes"],
prometheus.GaugeValue,
highBytes,
)
}
lowBytes, err := getValueInBytes(merged.Cluster.Routing.Allocation.Disk.Watermark.Low)
if err != nil {
c.logger.Error("failed to parse low bytes", "err", err)
} else {
ch <- prometheus.MustNewConstMetric(
clusterSettingsDesc["lowBytes"],
prometheus.GaugeValue,
lowBytes,
)
}
return nil
}
// Watermark ratio metrics
floodRatio, err := getValueAsRatio(merged.Cluster.Routing.Allocation.Disk.Watermark.FloodStage)
watermarkHigh, err := parseWatermarkValue(merged.Cluster.Routing.Allocation.Disk.Watermark.High)
if err != nil {
c.logger.Error("failed to parse flood_stage ratio", "err", err)
c.logger.Error("failed to parse high watermark", "err", err)
} else {
ch <- prometheus.MustNewConstMetric(
clusterSettingsDesc["floodStageRatio"],
prometheus.GaugeValue,
floodRatio,
)
if strings.HasSuffix(watermarkHigh, "b") {
highBytes, err := getValueInBytes(watermarkHigh)
if err != nil {
c.logger.Error("failed to parse high bytes", "err", err)
} else {
ch <- prometheus.MustNewConstMetric(
clusterSettingsDesc["highBytes"],
prometheus.GaugeValue,
highBytes,
)
}
} else {
highRatio, err := getValueAsRatio(watermarkHigh)
if err != nil {
c.logger.Error("failed to parse high ratio", "err", err)
} else {
ch <- prometheus.MustNewConstMetric(
clusterSettingsDesc["highRatio"],
prometheus.GaugeValue,
highRatio,
)
}
}
}
highRatio, err := getValueAsRatio(merged.Cluster.Routing.Allocation.Disk.Watermark.High)
watermarkLow, err := parseWatermarkValue(merged.Cluster.Routing.Allocation.Disk.Watermark.Low)
if err != nil {
c.logger.Error("failed to parse high ratio", "err", err)
c.logger.Error("failed to parse low watermark", "err", err)
} else {
ch <- prometheus.MustNewConstMetric(
clusterSettingsDesc["highRatio"],
prometheus.GaugeValue,
highRatio,
)
}
lowRatio, err := getValueAsRatio(merged.Cluster.Routing.Allocation.Disk.Watermark.Low)
if err != nil {
c.logger.Error("failed to parse low ratio", "err", err)
} else {
ch <- prometheus.MustNewConstMetric(
clusterSettingsDesc["lowRatio"],
prometheus.GaugeValue,
lowRatio,
)
if strings.HasSuffix(watermarkLow, "b") {
lowBytes, err := getValueInBytes(watermarkLow)
if err != nil {
c.logger.Error("failed to parse low bytes", "err", err)
} else {
ch <- prometheus.MustNewConstMetric(
clusterSettingsDesc["lowBytes"],
prometheus.GaugeValue,
lowBytes,
)
}
} else {
lowRatio, err := getValueAsRatio(watermarkLow)
if err != nil {
c.logger.Error("failed to parse low ratio", "err", err)
} else {
ch <- prometheus.MustNewConstMetric(
clusterSettingsDesc["lowRatio"],
prometheus.GaugeValue,
lowRatio,
)
}
}
}
return nil
}
func parseWatermarkValue(value interface{}) (string, error) {
switch v := value.(type) {
case string:
return v, nil
case map[string]interface{}:
if val, ok := v["value"].(string); ok {
return val, nil
}
return "", fmt.Errorf("unexpected structure in watermark value: %v", v)
default:
return "", fmt.Errorf("unsupported type for watermark value: %T", v)
}
}
func getValueInBytes(value string) (float64, error) {
type UnitValue struct {
unit string

View File

@ -114,6 +114,30 @@ elasticsearch_clustersettings_allocation_watermark_high_bytes 2.147483648e+11
# HELP elasticsearch_clustersettings_allocation_watermark_low_bytes Low watermark for disk usage in bytes.
# TYPE elasticsearch_clustersettings_allocation_watermark_low_bytes gauge
elasticsearch_clustersettings_allocation_watermark_low_bytes 5.24288e+07
`,
},
{
name: "8.9.1-persistent-watermark-percent",
file: "../fixtures/settings-8.9.1-watermark.json",
want: `
# HELP elasticsearch_clustersettings_stats_max_shards_per_node Current maximum number of shards per node setting.
# TYPE elasticsearch_clustersettings_stats_max_shards_per_node gauge
elasticsearch_clustersettings_stats_max_shards_per_node 1000
# HELP elasticsearch_clustersettings_stats_shard_allocation_enabled Current mode of cluster wide shard routing allocation settings.
# TYPE elasticsearch_clustersettings_stats_shard_allocation_enabled gauge
elasticsearch_clustersettings_stats_shard_allocation_enabled 0
# HELP elasticsearch_clustersettings_allocation_threshold_enabled Is disk allocation decider enabled.
# TYPE elasticsearch_clustersettings_allocation_threshold_enabled gauge
elasticsearch_clustersettings_allocation_threshold_enabled 1
# HELP elasticsearch_clustersettings_allocation_watermark_flood_stage_ratio Flood stage watermark as a ratio.
# TYPE elasticsearch_clustersettings_allocation_watermark_flood_stage_ratio gauge
elasticsearch_clustersettings_allocation_watermark_flood_stage_ratio 0.96
# HELP elasticsearch_clustersettings_allocation_watermark_high_ratio High watermark for disk usage as a ratio.
# TYPE elasticsearch_clustersettings_allocation_watermark_high_ratio gauge
elasticsearch_clustersettings_allocation_watermark_high_ratio 0.92
# HELP elasticsearch_clustersettings_allocation_watermark_low_ratio Low watermark for disk usage as a ratio.
# TYPE elasticsearch_clustersettings_allocation_watermark_low_ratio gauge
elasticsearch_clustersettings_allocation_watermark_low_ratio 0.88
`,
},
}

File diff suppressed because it is too large Load Diff