From 4dce8ece9cd459816a49ee91b7d8e2cbe772f8fd Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Thu, 30 Oct 2025 13:28:19 -0400 Subject: [PATCH 01/13] Modernize opensearch-mixin to grafonnet v11 and signals architecture --- .gitignore | 1 + .../{alerts => }/alerts.libsonnet | 52 +- opensearch-mixin/config.libsonnet | 71 +- opensearch-mixin/dashboards.libsonnet | 125 ++ .../dashboards/dashboards.libsonnet | 3 - .../opensearch-cluster-overview.libsonnet | 1717 --------------- .../opensearch-node-overview.libsonnet | 1201 ----------- ...search-search-and-index-overview.libsonnet | 1898 ----------------- .../dashboards_out/node-overview.json | 1706 --------------- .../opensearch-cluster-overview.json | 1771 +++++---------- .../dashboards_out/opensearch-logs.json | 297 +++ .../opensearch-node-overview.json | 1288 +++++++++++ ...opensearch-search-and-index-overview.json} | 1340 +++--------- opensearch-mixin/g.libsonnet | 2 +- opensearch-mixin/jsonnetfile.json | 66 +- opensearch-mixin/links.libsonnet | 33 + opensearch-mixin/main.libsonnet | 48 + opensearch-mixin/mixin.libsonnet | 34 +- opensearch-mixin/panels.libsonnet | 1237 +++++++++-- opensearch-mixin/rows.libsonnet | 156 ++ opensearch-mixin/signals/cluster.libsonnet | 152 ++ opensearch-mixin/signals/indexing.libsonnet | 265 +++ opensearch-mixin/signals/node.libsonnet | 270 +++ opensearch-mixin/signals/roles.libsonnet | 81 + opensearch-mixin/signals/search.libsonnet | 195 ++ opensearch-mixin/signals/topk.libsonnet | 163 ++ opensearch-mixin/variables.libsonnet | 76 - 27 files changed, 5082 insertions(+), 9166 deletions(-) rename opensearch-mixin/{alerts => }/alerts.libsonnet (91%) create mode 100644 opensearch-mixin/dashboards.libsonnet delete mode 100644 opensearch-mixin/dashboards/dashboards.libsonnet delete mode 100644 opensearch-mixin/dashboards/opensearch-cluster-overview.libsonnet delete mode 100644 opensearch-mixin/dashboards/opensearch-node-overview.libsonnet delete mode 100644 opensearch-mixin/dashboards/opensearch-search-and-index-overview.libsonnet delete mode 100644 opensearch-mixin/dashboards_out/node-overview.json create mode 100644 opensearch-mixin/dashboards_out/opensearch-logs.json create mode 100644 opensearch-mixin/dashboards_out/opensearch-node-overview.json rename opensearch-mixin/dashboards_out/{search-and-index-overview.json => opensearch-search-and-index-overview.json} (52%) create mode 100644 opensearch-mixin/links.libsonnet create mode 100644 opensearch-mixin/main.libsonnet create mode 100644 opensearch-mixin/rows.libsonnet create mode 100644 opensearch-mixin/signals/cluster.libsonnet create mode 100644 opensearch-mixin/signals/indexing.libsonnet create mode 100644 opensearch-mixin/signals/node.libsonnet create mode 100644 opensearch-mixin/signals/roles.libsonnet create mode 100644 opensearch-mixin/signals/search.libsonnet create mode 100644 opensearch-mixin/signals/topk.libsonnet delete mode 100644 opensearch-mixin/variables.libsonnet diff --git a/.gitignore b/.gitignore index d68c86c04..1a57d51f0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ vendor jsonnetfile.lock.json *.zip +.worktrees diff --git a/opensearch-mixin/alerts/alerts.libsonnet b/opensearch-mixin/alerts.libsonnet similarity index 91% rename from opensearch-mixin/alerts/alerts.libsonnet rename to opensearch-mixin/alerts.libsonnet index 91f0d5bb0..27ae4dc18 100644 --- a/opensearch-mixin/alerts/alerts.libsonnet +++ b/opensearch-mixin/alerts.libsonnet @@ -1,14 +1,14 @@ { - prometheusAlerts+:: { + new(this): { groups+: [ { - name: $._config.uid + '-alerts', + name: this.config.uid + '-alerts', rules: [ { alert: 'OpenSearchYellowCluster', expr: ||| opensearch_cluster_status{%(filteringSelector)s} == 1 - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -18,14 +18,14 @@ description: ( '{{$labels.cluster}} health status is yellow over the last 5 minutes' - ) % $._config, + ) % this.config, }, }, { alert: 'OpenSearchRedCluster', expr: ||| opensearch_cluster_status{%(filteringSelector)s} == 2 - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -35,14 +35,14 @@ description: ( '{{$labels.cluster}} health status is red over the last 5 minutes' - ) % $._config, + ) % this.config, }, }, { alert: 'OpenSearchUnstableShardReallocation', expr: ||| sum without(type) (opensearch_cluster_shards_number{%(filteringSelector)s, type="relocating"}) > %(alertsWarningShardReallocations)s - ||| % $._config, + ||| % this.config, 'for': '1m', labels: { severity: 'warning', @@ -51,14 +51,14 @@ summary: 'A node has gone offline or has been disconnected triggering shard reallocation.', description: ||| {{$labels.cluster}} has had {{ printf "%%.0f" $value }} shard reallocation over the last 1m which is above the threshold of %(alertsWarningShardReallocations)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchUnstableShardUnassigned', expr: ||| sum without(type) (opensearch_cluster_shards_number{%(filteringSelector)s, type="unassigned"}) > %(alertsWarningShardUnassigned)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -67,14 +67,14 @@ summary: 'There are shards that have been detected as unassigned.', description: ||| {{$labels.cluster}} has had {{ printf "%%.0f" $value }} shard unassigned over the last 5m which is above the threshold of %(alertsWarningShardUnassigned)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchHighNodeDiskUsage', expr: ||| 100 * sum without(nodeid, path, mount, type) ((opensearch_fs_path_total_bytes{%(filteringSelector)s} - opensearch_fs_path_free_bytes{%(filteringSelector)s}) / opensearch_fs_path_total_bytes{%(filteringSelector)s}) > %(alertsWarningDiskUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -83,14 +83,14 @@ summary: 'The node disk usage has exceeded the warning threshold.', description: ||| {{$labels.node}} has had {{ printf "%%.0f" $value }} disk usage over the last 5m which is above the threshold of %(alertsWarningDiskUsage)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchHighNodeDiskUsage', expr: ||| 100 * sum without(nodeid, path, mount, type) ((opensearch_fs_path_total_bytes{%(filteringSelector)s} - opensearch_fs_path_free_bytes{%(filteringSelector)s}) / opensearch_fs_path_total_bytes{%(filteringSelector)s}) > %(alertsCriticalDiskUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -99,14 +99,14 @@ summary: 'The node disk usage has exceeded the critical threshold.', description: ||| {{$labels.node}} has had {{ printf "%%.0f" $value }}%% disk usage over the last 5m which is above the threshold of %(alertsCriticalDiskUsage)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchHighNodeCpuUsage', expr: ||| sum without(nodeid) (opensearch_os_cpu_percent{%(filteringSelector)s}) > %(alertsWarningCPUUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -115,14 +115,14 @@ summary: 'The node CPU usage has exceeded the warning threshold.', description: ||| {{$labels.node}} has had {{ printf "%%.0f" $value }}%% CPU usage over the last 5m which is above the threshold of %(alertsWarningCPUUsage)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchHighNodeCpuUsage', expr: ||| sum without(nodeid) (opensearch_os_cpu_percent{%(filteringSelector)s}) > %(alertsCriticalCPUUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -131,14 +131,14 @@ summary: 'The node CPU usage has exceeded the critical threshold.', description: ||| {{$labels.node}} has had {{ printf "%%.0f" $value }}%% CPU usage over the last 5m which is above the threshold of %(alertsCriticalCPUUsage)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchHighNodeMemoryUsage', expr: ||| sum without(nodeid) (opensearch_os_mem_used_percent{%(filteringSelector)s}) > %(alertsWarningMemoryUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -147,14 +147,14 @@ summary: 'The node memory usage has exceeded the warning threshold.', description: ||| {{$labels.node}} has had {{ printf "%%.0f" $value }}%% memory usage over the last 5m which is above the threshold of %(alertsWarningMemoryUsage)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchHighNodeMemoryUsage', expr: ||| sum without(nodeid) (opensearch_os_mem_used_percent{%(filteringSelector)s}) > %(alertsCriticalMemoryUsage)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'critical', @@ -163,14 +163,14 @@ summary: 'The node memory usage has exceeded the critical threshold.', description: ||| {{$labels.node}} has had {{ printf "%%.0f" $value }}%% memory usage over the last 5m which is above the threshold of %(alertsCriticalMemoryUsage)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchModerateRequestLatency', expr: ||| sum without(context) ((increase(opensearch_index_search_fetch_time_seconds{%(filteringSelector)s, context="total"}[5m])+increase(opensearch_index_search_query_time_seconds{context="total"}[5m])+increase(opensearch_index_search_scroll_time_seconds{context="total"}[5m])) / clamp_min(increase(opensearch_index_search_fetch_count{context="total"}[5m])+increase(opensearch_index_search_query_count{context="total"}[5m])+increase(opensearch_index_search_scroll_count{context="total"}[5m]), 1)) > %(alertsWarningRequestLatency)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -179,14 +179,14 @@ summary: 'The request latency has exceeded the warning threshold.', description: ||| {{$labels.index}} has had {{ printf "%%.0f" $value }}s of request latency over the last 5m which is above the threshold of %(alertsWarningRequestLatency)s. - ||| % $._config, + ||| % this.config, }, }, { alert: 'OpenSearchModerateIndexLatency', expr: ||| sum without(context) (increase(opensearch_index_indexing_index_time_seconds{%(filteringSelector)s, context="total"}[5m]) / clamp_min(increase(opensearch_index_indexing_index_count{context="total"}[5m]), 1)) > %(alertsWarningIndexLatency)s - ||| % $._config, + ||| % this.config, 'for': '5m', labels: { severity: 'warning', @@ -195,7 +195,7 @@ summary: 'The index latency has exceeded the warning threshold.', description: ||| {{$labels.index}} has had {{ printf "%%.0f" $value }}s of index latency over the last 5m which is above the threshold of %(alertsWarningIndexLatency)s. - ||| % $._config, + ||| % this.config, }, }, ], diff --git a/opensearch-mixin/config.libsonnet b/opensearch-mixin/config.libsonnet index 99bac4d6a..25b3d6868 100644 --- a/opensearch-mixin/config.libsonnet +++ b/opensearch-mixin/config.libsonnet @@ -1,31 +1,52 @@ { - _config+:: { - enableMultiCluster: false, - // extra static selector to apply to all templated variables and alerts - filteringSelector: if self.enableMultiCluster then 'cluster!="",opensearch_cluster!=""' else 'opensearch_cluster!=""', - groupLabels: if self.enableMultiCluster then ['job', 'cluster', 'opensearch_cluster'] else ['job', 'opensearch_cluster'], - instanceLabels: ['node'], - dashboardTags: ['opensearch-mixin'], - dashboardPeriod: 'now-1h', - dashboardTimezone: 'default', - dashboardRefresh: '1m', - dashboardNamePrefix: '', + local this = self, + filteringSelector: if self.enableMultiCluster then 'cluster!="",opensearch_cluster!=""' else 'opensearch_cluster!=""', + groupLabels: if self.enableMultiCluster then ['job', 'cluster', 'opensearch_cluster'] else ['job', 'opensearch_cluster'], + logLabels: ['job', 'cluster', 'node'], + instanceLabels: ['node'], - // prefix dashboards uids - uid: 'opensearch', + dashboardTags: [self.uid], + uid: 'opensearch', + dashboardNamePrefix: 'OpenSearch', + dashboardPeriod: 'now-1h', + dashboardTimezone: 'default', + dashboardRefresh: '1m', + metricsSource: 'prometheus', // metrics source for signals - // alerts thresholds - alertsWarningShardReallocations: 0, - alertsWarningShardUnassigned: 0, - alertsWarningDiskUsage: 60, - alertsCriticalDiskUsage: 80, - alertsWarningCPUUsage: 70, - alertsCriticalCPUUsage: 85, - alertsWarningMemoryUsage: 70, - alertsCriticalMemoryUsage: 85, - alertsWarningRequestLatency: 0.5, // seconds - alertsWarningIndexLatency: 0.5, // seconds + // Agg Lists + groupAggList: std.join(',', this.groupLabels), + groupAggListWithInstance: std.join(',', this.groupLabels + this.instanceLabels), + + // Multi-cluster support + enableMultiCluster: false, + opensearchSelector: if self.enableMultiCluster then 'job=~"$job", instance=~"$instance", cluster=~"$cluster"' else 'job=~"$job", instance=~"$instance"', - enableLokiLogs: true, + // Logging configuration + enableLokiLogs: true, + extraLogLabels: ['level', 'severity'], // Required by logs-lib + logsVolumeGroupBy: 'level', + showLogsVolume: true, + logExpression: '{job=~"$job", cluster=~"$cluster", instance=~"$instance", exception_class=~".+"} | json | line_format "{{.severity}} {{.exception_class}} - {{.exception_message}}" | drop time_extracted, severity_extracted, exception_class_extracted, correlation_id_extracted', + + // Alerts configuration + alertsWarningShardReallocations: 0, // count + alertsWarningShardUnassigned: 0, // count + alertsWarningDiskUsage: 60, // % + alertsCriticalDiskUsage: 80, // % + alertsWarningCPUUsage: 70, // % + alertsCriticalCPUUsage: 85, // % + alertsWarningMemoryUsage: 70, // % + alertsCriticalMemoryUsage: 85, // % + alertsWarningRequestLatency: 0.5, // seconds + alertsWarningIndexLatency: 0.5, // seconds + + // Signals configuration + signals+: { + cluster: (import './signals/cluster.libsonnet')(this), + node: (import './signals/node.libsonnet')(this), + topk: (import './signals/topk.libsonnet')(this), + roles: (import './signals/roles.libsonnet')(this), + search: (import './signals/search.libsonnet')(this), + indexing: (import './signals/indexing.libsonnet')(this), }, } diff --git a/opensearch-mixin/dashboards.libsonnet b/opensearch-mixin/dashboards.libsonnet new file mode 100644 index 000000000..4f541e816 --- /dev/null +++ b/opensearch-mixin/dashboards.libsonnet @@ -0,0 +1,125 @@ +local g = import '../g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; +local logslib = import 'logs-lib/logs/main.libsonnet'; +{ + local root = self, + new(this):: + + local links = this.grafana.links; + local tags = this.config.dashboardTags; + local uid = g.util.string.slugify(this.config.uid); + local vars = this.grafana.variables; + local annotations = this.grafana.annotations; + local refresh = this.config.dashboardRefresh; + local period = this.config.dashboardPeriod; + local timezone = this.config.dashboardTimezone; + { + + 'opensearch-cluster-overview.json': + g.dashboard.new(this.config.dashboardNamePrefix + ' Cluster Overview') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels([ + this.grafana.rows.clusterOverviewRow, + this.grafana.rows.rolesRow, + this.grafana.rows.resourceUsageRow, + this.grafana.rows.storageAndTasksRow, + this.grafana.rows.searchPerformanceRow, + this.grafana.rows.ingestPerformanceRow, + this.grafana.rows.indexingPerformanceRow, + ]), + ) + ) + root.applyCommon( + vars.multiInstance, + uid + '-cluster-overview', + tags, + links { opensearchClusterOverview+:: {} }, + annotations, + timezone, + refresh, + period, + ), + 'opensearch-node-overview.json': + g.dashboard.new(this.config.dashboardNamePrefix + ' Node Overview') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels([ + this.grafana.rows.nodeRolesRow, + this.grafana.rows.nodeHealthRow, + this.grafana.rows.nodeJVMRow, + this.grafana.rows.threadPoolsRow, + ]) + ) + ) + root.applyCommon( + vars.multiInstance, + uid + '-node-overview', + tags, + links { opensearchNodeOverview+:: {} }, + annotations, + timezone, + refresh, + period, + ), + 'opensearch-search-and-index-overview.json': + g.dashboard.new(this.config.dashboardNamePrefix + ' Search and Index Overview') + + g.dashboard.withPanels( + g.util.panel.resolveCollapsedFlagOnRows( + g.util.grid.wrapPanels([ + this.grafana.rows.searchAndIndexSearchPerformanceRow, + this.grafana.rows.searchAndIndexIndexingPerformanceRow, + this.grafana.rows.searchAndIndexCapacityRow, + ]) + ) + ) + root.applyCommon( + vars.multiInstance, + uid + '-search-and-index-overview', + tags, + links { opensearchSearchAndIndexOverview+:: {} }, + annotations, + timezone, + refresh, + period, + ), + + } + if this.config.enableLokiLogs then { + 'opensearch-logs.json': + logslib.new( + this.config.dashboardNamePrefix + ' Logs', + datasourceName=this.grafana.variables.datasources.loki.name, + datasourceRegex=this.grafana.variables.datasources.loki.regex, + filterSelector=this.config.filteringSelector, + labels=this.config.groupLabels + this.config.extraLogLabels, + formatParser=null, + showLogsVolume=this.config.showLogsVolume, + ) + { + dashboards+: + { + logs+: + root.applyCommon(vars.multiInstance, uid=uid + '-logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period), + }, + panels+: + { + logs+: + g.panel.logs.options.withEnableLogDetails(true) + + g.panel.logs.options.withShowTime(false) + + g.panel.logs.options.withWrapLogMessage(false), + }, + variables+: { + toArray+: [ + this.grafana.variables.datasources.prometheus { hide: 2 }, + ], + }, + }.dashboards.logs, + } else {}, + + applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period): + g.dashboard.withTags(tags) + + g.dashboard.withUid(uid) + + g.dashboard.withLinks(std.objectValues(links)) + + g.dashboard.withTimezone(timezone) + + g.dashboard.withRefresh(refresh) + + g.dashboard.time.withFrom(period) + + g.dashboard.withVariables(vars) + + g.dashboard.withAnnotations(std.objectValues(annotations)), +} diff --git a/opensearch-mixin/dashboards/dashboards.libsonnet b/opensearch-mixin/dashboards/dashboards.libsonnet deleted file mode 100644 index 65bd82d29..000000000 --- a/opensearch-mixin/dashboards/dashboards.libsonnet +++ /dev/null @@ -1,3 +0,0 @@ -(import 'opensearch-cluster-overview.libsonnet') + -(import 'opensearch-node-overview.libsonnet') + -(import 'opensearch-search-and-index-overview.libsonnet') diff --git a/opensearch-mixin/dashboards/opensearch-cluster-overview.libsonnet b/opensearch-mixin/dashboards/opensearch-cluster-overview.libsonnet deleted file mode 100644 index c8d18c418..000000000 --- a/opensearch-mixin/dashboards/opensearch-cluster-overview.libsonnet +++ /dev/null @@ -1,1717 +0,0 @@ -local g = import '../g.libsonnet'; -local grafana = import 'grafonnet/grafana.libsonnet'; -local prometheus = grafana.prometheus; -local commonlib = import 'common-lib/common/main.libsonnet'; -local xtd = import 'github.com/jsonnet-libs/xtd/main.libsonnet'; -local utils = commonlib.utils; - -local dashboardUidSuffix = '-cluster-overview'; - -{ - // variables - local variables = (import '../variables.libsonnet').new( - filteringSelector=$._config.filteringSelector, - groupLabels=$._config.groupLabels, - instanceLabels=[], - varMetric='opensearch_cluster_status', - ), - - local legendGroupLabels = xtd.array.slice($._config.groupLabels, -1), - - local panels = (import '../panels.libsonnet').new( - $._config.groupLabels, - $._config.instanceLabels, - variables, - ), - - local promDatasource = { - uid: '${%s}' % variables.datasources.prometheus.name, - }, - // panels - local clusterStatusPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'min by(%(agg)s) (opensearch_cluster_status{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels) - ), - ], - type: 'stat', - title: 'Cluster status', - description: 'The overall health and availability of the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [ - { - options: { - '0': { - index: 0, - text: 'Green', - }, - '1': { - index: 1, - text: 'Yellow', - }, - '2': { - index: 2, - text: 'Red', - }, - }, - type: 'value', - }, - ], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'green', - value: 0, - }, - { - color: 'yellow', - value: 1, - }, - { - color: 'red', - value: 2, - }, - ], - }, - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.4.3', - }, - - local nodeCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'min by(%(agg)s) (opensearch_cluster_nodes_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels) - ), - ], - type: 'stat', - title: 'Node count', - description: 'The number of running nodes across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 0, - }, - { - color: 'green', - value: 1, - }, - ], - }, - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.4.3', - }, - - local dataNodeCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'min by(%(agg)s) (opensearch_cluster_datanodes_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels) - ), - ], - type: 'stat', - title: 'Data node count', - description: 'The number of data nodes in the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 0, - }, - { - color: 'green', - value: 1, - }, - ], - }, - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.4.3', - }, - - local shardCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum(max by (type) (opensearch_cluster_shards_number{%(queriesSelector)s}))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels) - ), - ], - type: 'stat', - title: 'Shard count', - description: 'The number of shards in the OpenSearch cluster across all indices.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 0, - }, - { - color: 'green', - value: 1, - }, - ], - }, - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.4.3', - }, - - local activeShardsPercentagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'min by(%(agg)s) (opensearch_cluster_shards_active_percent{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels) - ), - - ], - type: 'stat', - title: 'Active shards %', - description: 'Percent of active shards across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 0, - }, - { - color: 'yellow', - value: 1, - }, - { - color: 'green', - value: 100, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - colorMode: 'value', - graphMode: 'none', - justifyMode: 'auto', - orientation: 'auto', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - textMode: 'auto', - }, - pluginVersion: '9.4.3', - }, - - local topNodesByCPUUsagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(10, sort_desc(sum by(node, %(agg)s) (opensearch_os_cpu_percent{%(queriesSelector)s})))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{node}}', - ), - ], - type: 'bargauge', - title: 'Top nodes by CPU usage', - description: 'Top nodes by OS CPU usage across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - max: 100, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - displayMode: 'gradient', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - }, - pluginVersion: '9.4.3', - }, - - local breakersTrippedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(%(agg)s, node) (increase(opensearch_circuitbreaker_tripped_count{%(queriesSelector)s}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{node}}', - interval='1m', - ), - ], - type: 'bargauge', - title: 'Breakers tripped', - description: 'The total count of circuit breakers tripped across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'trips', - }, - overrides: [], - }, - options: { - displayMode: 'gradient', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - }, - pluginVersion: '9.4.3', - }, - - local shardStatusPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'min by(type, %(agg)s) (opensearch_cluster_shards_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{type}}', - ), - ], - type: 'bargauge', - title: 'Shard status', - description: 'Shard status counts across the Opensearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'shards', - }, - overrides: [], - }, - options: { - displayMode: 'gradient', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - }, - pluginVersion: '9.4.3', - }, - - local topNodesByDiskUsagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(10, sort_desc((100 * (sum by(node, %(agg)s) (opensearch_fs_path_total_bytes{%(queriesSelector)s})- sum by(node, %(agg)s) (opensearch_fs_path_free_bytes{%(queriesSelector)s})) / sum by(node, %(agg)s) (opensearch_fs_path_total_bytes{%(queriesSelector)s}))))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{node}}', - ), - ], - type: 'bargauge', - title: 'Top nodes by disk usage', - description: 'Top nodes by disk usage across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'thresholds', - }, - mappings: [], - max: 100, - min: 0, - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - displayMode: 'gradient', - minVizHeight: 10, - minVizWidth: 0, - orientation: 'horizontal', - reduceOptions: { - calcs: [ - 'lastNotNull', - ], - fields: '', - values: false, - }, - showUnfilled: true, - }, - pluginVersion: '9.4.3', - }, - - local totalDocumentsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_indices_indexing_index_count{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels), - ), - ], - type: 'timeseries', - title: 'Total documents', - description: 'The total count of documents indexed across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'documents', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local pendingTasksPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_cluster_pending_tasks_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels), - ), - ], - type: 'timeseries', - title: 'Pending tasks', - description: 'The number of tasks waiting to be executed across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'tasks', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local storeSizePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_indices_store_size_bytes{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels), - ), - ], - type: 'timeseries', - title: 'Store size', - description: 'The total size of the store across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local maxTaskWaitTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'max by(%(agg)s) (opensearch_cluster_task_max_waiting_time_seconds{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendGroupLabels), - ), - ], - type: 'timeseries', - title: 'Max task wait time', - description: 'The max wait time for tasks to be executed across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local clusterSearchAndIndexSummaryRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Cluster search and index summary', - collapsed: false, - }, - - local topIndicesByRequestRatePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - ||| - topk(10, sort_desc(avg by(index, %(agg)s) ( - opensearch_index_search_fetch_current_number{%(queriesSelector)s, context="total"} + - opensearch_index_search_query_current_number{%(queriesSelector)s, context="total"} + - opensearch_index_search_scroll_current_number{%(queriesSelector)s, context="total"} - ))) - ||| - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{index}}', - ), - ], - type: 'timeseries', - title: 'Top indices by request rate', - description: 'Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topIndicesByRequestLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - ||| - topk(10, sort_desc(sum by(index, %(agg)s) ((increase(opensearch_index_search_fetch_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) - +increase(opensearch_index_search_query_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) - +increase(opensearch_index_search_scroll_time_seconds{%(queriesSelector)s, context="total"}[$__interval:])) - / clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelector)s, context="total"}[$__interval:]) - +increase(opensearch_index_search_query_count{%(queriesSelector)s, context="total"}[$__interval:]) - +increase(opensearch_index_search_scroll_count{%(queriesSelector)s, context="total"}[$__interval:]), 1)))) - ||| - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{index}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Top indices by request latency', - description: 'Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topIndicesByCombinedCacheHitRatioPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - ||| - topk(10, sort_desc(avg by(index, %(agg)s) ( - 100 * (opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"} + - opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"}) / - clamp_min((opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"} + - opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"} + - opensearch_index_requestcache_miss_count{%(queriesSelector)s, context="total"} + - opensearch_index_querycache_miss_number{%(queriesSelector)s, context="total"}), 1 - )))) - ||| - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{index}}', - ), - ], - type: 'timeseries', - title: 'Top indices by combined cache hit ratio', - description: 'Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topNodesByIngestRatePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(10, sum by(node, %(agg)s) (rate(opensearch_ingest_total_count{%(queriesSelector)s}[$__rate_interval])))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{node}}', - ), - ], - type: 'timeseries', - title: 'Top nodes by ingest rate', - description: 'Top nodes by rate of ingest across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'Bps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topNodesByIngestLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - ||| - topk(10, sum by(%(agg)s, node) ( - increase(opensearch_ingest_total_time_seconds{%(queriesSelector)s}[$__interval:]) / - clamp_min(increase(opensearch_ingest_total_count{%(queriesSelector)s}[$__interval:]), 1))) - ||| - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{node}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Top nodes by ingest latency', - description: 'Top nodes by ingestion latency across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topNodesByIngestErrorsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(10, sum by(%(agg)s, node) (increase(opensearch_ingest_total_failed_count{%(queriesSelector)s}[$__interval:])))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{node}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Top nodes by ingest errors', - description: 'Top nodes by ingestion failures across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'errors', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topIndicesByIndexRatePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(10, avg by(%(agg)s, index) (opensearch_index_indexing_index_current_number{%(queriesSelector)s}))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{index}}', - ), - ], - type: 'timeseries', - title: 'Top indices by index rate', - description: 'Top indices by rate of document indexing across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'documents/s', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topIndicesByIndexLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - ||| - topk(10, avg by(%(agg)s, index) - (increase(opensearch_index_indexing_index_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / - clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelector)s, context="total"}[$__interval:]), 1))) - ||| - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{index}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Top indices by index latency', - description: 'Top indices by indexing latency across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local topIndicesByIndexFailuresPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'topk(10, avg by(%(agg)s, index) (increase(opensearch_index_indexing_index_failed_count{%(queriesSelector)s}[$__interval:])))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels), - }, - datasource=promDatasource, - legendFormat='{{index}}', - interval='1m', - ), - ], - type: 'timeseries', - title: 'Top indices by index failures', - description: 'Top indices by index document failures across the OpenSearch cluster.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'failures', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - grafanaDashboards+:: { - 'opensearch-cluster-overview.json': - g.dashboard.new($._config.dashboardNamePrefix + 'OpenSearch cluster overview') - + g.dashboard.withTags($._config.dashboardTags) - + g.dashboard.time.withFrom($._config.dashboardPeriod) - + g.dashboard.withTimezone($._config.dashboardTimezone) - + g.dashboard.withRefresh($._config.dashboardRefresh) - + g.dashboard.withUid($._config.uid + dashboardUidSuffix) - + g.dashboard.withLinks( - g.dashboard.link.dashboards.new( - 'Other Opensearch dashboards', - $._config.dashboardTags - ) - + g.dashboard.link.dashboards.options.withIncludeVars(true) - + g.dashboard.link.dashboards.options.withKeepTime(true) - + g.dashboard.link.dashboards.options.withAsDropdown(false) - ) - + g.dashboard.withPanels( - [ - panels.osRoles { gridPos: { h: 6, w: 24, x: 0, y: 0 } }, - clusterStatusPanel { gridPos: { h: 5, w: 3, x: 0, y: 2 } }, - nodeCountPanel { gridPos: { h: 5, w: 3, x: 3, y: 2 } }, - dataNodeCountPanel { gridPos: { h: 5, w: 3, x: 6, y: 2 } }, - shardCountPanel { gridPos: { h: 5, w: 3, x: 9, y: 2 } }, - activeShardsPercentagePanel { gridPos: { h: 5, w: 3, x: 12, y: 2 } }, - panels.osRolesTimeline { gridPos: { h: 5, w: 9, x: 15, y: 2 } }, - topNodesByCPUUsagePanel { gridPos: { h: 9, w: 8, x: 0, y: 4 } }, - breakersTrippedPanel { gridPos: { h: 9, w: 8, x: 8, y: 4 } }, - shardStatusPanel { gridPos: { h: 9, w: 8, x: 16, y: 4 } }, - topNodesByDiskUsagePanel { gridPos: { h: 10, w: 8, x: 0, y: 13 } }, - totalDocumentsPanel { gridPos: { h: 5, w: 8, x: 8, y: 13 } }, - pendingTasksPanel { gridPos: { h: 5, w: 8, x: 16, y: 13 } }, - storeSizePanel { gridPos: { h: 5, w: 8, x: 8, y: 18 } }, - maxTaskWaitTimePanel { gridPos: { h: 5, w: 8, x: 16, y: 18 } }, - clusterSearchAndIndexSummaryRow { gridPos: { h: 1, w: 24, x: 0, y: 23 } }, - topIndicesByRequestRatePanel { gridPos: { h: 8, w: 8, x: 0, y: 24 } }, - topIndicesByRequestLatencyPanel { gridPos: { h: 8, w: 8, x: 8, y: 24 } }, - topIndicesByCombinedCacheHitRatioPanel { gridPos: { h: 8, w: 8, x: 16, y: 24 } }, - topNodesByIngestRatePanel { gridPos: { h: 8, w: 8, x: 0, y: 32 } }, - topNodesByIngestLatencyPanel { gridPos: { h: 8, w: 8, x: 8, y: 32 } }, - topNodesByIngestErrorsPanel { gridPos: { h: 8, w: 8, x: 16, y: 32 } }, - topIndicesByIndexRatePanel { gridPos: { h: 8, w: 8, x: 0, y: 40 } }, - topIndicesByIndexLatencyPanel { gridPos: { h: 8, w: 8, x: 8, y: 40 } }, - topIndicesByIndexFailuresPanel { gridPos: { h: 8, w: 8, x: 16, y: 40 } }, - ] - ) - + g.dashboard.withVariables(variables.singleInstance), - }, -} diff --git a/opensearch-mixin/dashboards/opensearch-node-overview.libsonnet b/opensearch-mixin/dashboards/opensearch-node-overview.libsonnet deleted file mode 100644 index 32b18e5ff..000000000 --- a/opensearch-mixin/dashboards/opensearch-node-overview.libsonnet +++ /dev/null @@ -1,1201 +0,0 @@ -local g = (import '../g.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local commonlib = import 'common-lib/common/main.libsonnet'; -local utils = commonlib.utils; -local prometheus = grafana.prometheus; -local xtd = import 'github.com/jsonnet-libs/xtd/main.libsonnet'; -local dashboardUidSuffix = '-node-overview'; - -{ - - // variables - local variables = (import '../variables.libsonnet').new( - filteringSelector=$._config.filteringSelector, - groupLabels=$._config.groupLabels, - instanceLabels=$._config.instanceLabels, - varMetric='opensearch_os_cpu_percent', - enableLokiLogs=$._config.enableLokiLogs, - ), - - local legendInstanceLabels = xtd.array.slice($._config.instanceLabels, -1), - - local panels = (import '../panels.libsonnet').new( - $._config.groupLabels, - $._config.instanceLabels, - variables, - ), - - local promDatasource = { - uid: '${%s}' % variables.datasources.prometheus.name, - }, - - local lokiDatasource = { - uid: '${%s}' % variables.datasources.loki.name, - }, - - local nodeHealthRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Node health', - collapsed: false, - }, - - local nodeCPUUsagePanel = - commonlib.panels.cpu.timeSeries.utilization.new( - 'Node CPU usage', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - 'opensearch_os_cpu_percent{%(queriesSelector)s}' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat(utils.labelsToPanelLegend(legendInstanceLabels)), - - ], - description="CPU usage percentage of the node's Operating System.", - ) - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), - - local nodeMemoryUsagePanel = - commonlib.panels.memory.timeSeries.usagePercent.new( - 'Node memory usage', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - 'opensearch_os_mem_used_percent{%(queriesSelector)s}' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat(utils.labelsToPanelLegend(legendInstanceLabels)), - ], - description='Memory usage percentage of the node for the Operating System and OpenSearch', - ) - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), - - local nodeIOPanel = - commonlib.panels.disk.timeSeries.ioBytesPerSec.new( - 'Node I/O', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - 'sum by(%(agg)s) (rate(opensearch_fs_io_total_read_bytes{%(queriesSelector)s}[$__rate_interval]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat('%s - read' % utils.labelsToPanelLegend(legendInstanceLabels)), - g.query.prometheus.new( - promDatasource.uid, - 'sum by(%(agg)s) (rate(opensearch_fs_io_total_write_bytes{%(queriesSelector)s}[$__rate_interval]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat('%s - write' % utils.labelsToPanelLegend(legendInstanceLabels)), - ], - description='Node file system read and write data.', - ) - + g.panel.timeSeries.fieldConfig.defaults.custom.withStacking(value='normal'), - - local nodeOpenConnectionsPanel = - commonlib.panels.generic.timeSeries.base.new( - 'Node open connections', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - 'sum by (%(agg)s) (opensearch_transport_server_open_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat(utils.labelsToPanelLegend(legendInstanceLabels)), - ], - description='Number of open connections for the selected node.', - ) - + g.panel.timeSeries.fieldConfig.defaults.custom.withStacking(value='normal') - + g.panel.timeSeries.standardOptions.withUnit(''), - - local nodeDiskUsagePanel = - commonlib.panels.disk.timeSeries.usagePercent.new( - 'Node disk usage', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - '100 - (100 * opensearch_fs_path_free_bytes{%(queriesSelector)s} / clamp_min(opensearch_fs_path_total_bytes{%(queriesSelector)s}, 1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat(utils.labelsToPanelLegend(legendInstanceLabels)), - ], - description='Disk usage percentage of the selected node.', - ), - - local nodeMemorySwapPanel = - commonlib.panels.memory.timeSeries.usagePercent.new( - 'Node memory swap', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - '100 * opensearch_os_swap_used_bytes{%(queriesSelector)s} / clamp_min((opensearch_os_swap_used_bytes{%(queriesSelector)s} + opensearch_os_swap_free_bytes{%(queriesSelector)s}), 1)' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat(utils.labelsToPanelLegend(legendInstanceLabels)), - ], - description='Percentage of swap space used by OpenSearch and the Operating System on the selected node.', - ) - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), - - local nodeNetworkTrafficPanel = - commonlib.panels.network.timeSeries.traffic.new( - 'Node network traffic', - targets=[ - g.query.prometheus.new( - promDatasource.uid, - 'sum by (%(agg)s) (rate(opensearch_transport_tx_bytes_count{%(queriesSelector)s}[$__rate_interval])) * 8' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - ) - + g.query.prometheus.withLegendFormat('%s - sent' % utils.labelsToPanelLegend(legendInstanceLabels)), - g.query.prometheus.new( - promDatasource.uid, - 'sum by (%(agg)s) (rate(opensearch_transport_rx_bytes_count{%(queriesSelector)s}[$__rate_interval])) * 8' - % - { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - } - - ) + g.query.prometheus.withLegendFormat('%s - received' % utils.labelsToPanelLegend(legendInstanceLabels)), - ], - description='Node network traffic sent and received.', - ) - + commonlib.panels.network.timeSeries.traffic.withNegateOutPackets('/sent/') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), - - local circuitBreakersPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(name, %(agg)s) (increase(opensearch_circuitbreaker_tripped_count{%(queriesSelector)s}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - {{ name }}' % utils.labelsToPanelLegend(legendInstanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'Circuit breakers', - description: 'Circuit breakers tripped on the selected node by type', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'trips', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local nodeJVMRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Node JVM', - collapsed: false, - }, - - local jvmHeapUsedVsCommittedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (%(agg)s) (opensearch_jvm_mem_heap_used_bytes{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - used' % utils.labelsToPanelLegend(legendInstanceLabels), - ), - prometheus.target( - 'sum by (%(agg)s) (opensearch_jvm_mem_heap_committed_bytes{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - commited' % utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'JVM heap used vs. committed', - description: 'The amount of heap memory used vs committed on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - }, - - local jvmNonheapUsedVsCommittedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (%(agg)s) (opensearch_jvm_mem_nonheap_used_bytes{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - used' % utils.labelsToPanelLegend(legendInstanceLabels), - ), - prometheus.target( - 'sum by (%(agg)s) (opensearch_jvm_mem_nonheap_committed_bytes{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - commited' % utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'JVM non-heap used vs. committed', - description: 'The amount of non-heap memory used vs committed on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - }, - - local jvmThreadsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (%(agg)s) (opensearch_jvm_threads_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'JVM threads', - description: 'The number of threads running in the JVM on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'threads', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local jvmBufferPoolsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by( %(agg)s, bufferpool) (opensearch_jvm_bufferpool_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - {{bufferpool}}' % utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'JVM buffer pools', - description: 'The number of buffer pools available on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'buffer pools', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - }, - - local jvmUptimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(%(agg)s) (opensearch_jvm_uptime_seconds{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'JVM uptime', - description: 'The uptime of the JVM in seconds on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - pluginVersion: '9.4.3', - }, - - local jvmGarbageCollectionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (%(agg)s) (increase(opensearch_jvm_gc_collection_count{%(queriesSelector)s}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendInstanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'JVM garbage collections', - description: 'The number of garbage collection operations on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'operations', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local jvmGarbageCollectionTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (%(agg)s) (increase(opensearch_jvm_gc_collection_time_seconds{%(queriesSelector)s}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendInstanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'JVM garbage collection time', - description: 'The amount of time spent on garbage collection on the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local jvmBufferPoolUsagePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - '100 * (sum by (%(agg)s, bufferpool) (opensearch_jvm_bufferpool_used_bytes{%(queriesSelector)s})) / clamp_min((sum by (job, bufferpool, cluster) (opensearch_jvm_bufferpool_total_capacity_bytes{%(queriesSelector)s})),1)' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - {{bufferpool}}' % utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'JVM buffer pool usage', - description: 'The percent used of JVM buffer pool memory.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local threadPoolsRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Thread pools', - collapsed: false, - }, - - local threadPoolThreadsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by(%(agg)s) ((opensearch_threadpool_threads_number{%(queriesSelector)s}))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'Thread pool threads', - description: 'The number of threads in the thread pool for the selected node', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'threads', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local threadPoolTasksPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (%(agg)s) (opensearch_threadpool_tasks_number{%(queriesSelector)s})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), - }, - datasource=promDatasource, - legendFormat=utils.labelsToPanelLegend(legendInstanceLabels), - ), - ], - type: 'timeseries', - title: 'Thread pool tasks', - description: 'The number of tasks in the thread pool for the selected node.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'tasks', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - }, - - local errorLogsPanelPanel = { - datasource: lokiDatasource, - targets: [ - { - datasource: lokiDatasource, - editorMode: 'code', - expr: '{%(queriesSelector)s} |~ ""' % { queriesSelector: variables.queriesSelector }, - queryType: 'range', - refId: 'A', - }, - ], - type: 'logs', - title: 'Error logs panel', - description: 'The recent error logs being reported by OpenSearch.', - options: { - dedupStrategy: 'none', - enableLogDetails: true, - prettifyLogMessage: false, - showCommonLabels: false, - showLabels: false, - showTime: false, - sortOrder: 'Descending', - wrapLogMessage: false, - }, - }, - - grafanaDashboards+:: { - 'node-overview.json': - g.dashboard.new($._config.dashboardNamePrefix + 'OpenSearch node overview') - + g.dashboard.withTags($._config.dashboardTags) - + g.dashboard.time.withFrom($._config.dashboardPeriod) - + g.dashboard.withTimezone($._config.dashboardTimezone) - + g.dashboard.withRefresh($._config.dashboardRefresh) - + g.dashboard.withUid($._config.uid + dashboardUidSuffix) - + g.dashboard.withLinks( - g.dashboard.link.dashboards.new( - 'Other Opensearch dashboards', - $._config.dashboardTags - ) - + g.dashboard.link.dashboards.options.withIncludeVars(true) - + g.dashboard.link.dashboards.options.withKeepTime(true) - + g.dashboard.link.dashboards.options.withAsDropdown(false) - ) - + g.dashboard.withPanels( - std.flattenArrays([ - [ - panels.osRolesTimeline { gridPos: { h: 5, w: 24, x: 0, y: 0 } }, - nodeHealthRow { gridPos: { h: 1, w: 24, x: 0, y: 1 } }, - nodeCPUUsagePanel { gridPos: { h: 7, w: 6, x: 0, y: 2 } }, - nodeMemoryUsagePanel { gridPos: { h: 7, w: 6, x: 6, y: 2 } }, - nodeIOPanel { gridPos: { h: 7, w: 6, x: 12, y: 2 } }, - nodeOpenConnectionsPanel { gridPos: { h: 7, w: 6, x: 18, y: 2 } }, - nodeDiskUsagePanel { gridPos: { h: 7, w: 6, x: 0, y: 8 } }, - nodeMemorySwapPanel { gridPos: { h: 7, w: 6, x: 6, y: 8 } }, - nodeNetworkTrafficPanel { gridPos: { h: 7, w: 6, x: 12, y: 8 } }, - circuitBreakersPanel { gridPos: { h: 7, w: 6, x: 18, y: 8 } }, - nodeJVMRow { gridPos: { h: 1, w: 24, x: 0, y: 15 } }, - jvmHeapUsedVsCommittedPanel { gridPos: { h: 6, w: 6, x: 0, y: 16 } }, - jvmNonheapUsedVsCommittedPanel { gridPos: { h: 6, w: 6, x: 6, y: 16 } }, - jvmThreadsPanel { gridPos: { h: 6, w: 6, x: 12, y: 16 } }, - jvmBufferPoolsPanel { gridPos: { h: 6, w: 6, x: 18, y: 16 } }, - jvmUptimePanel { gridPos: { h: 6, w: 6, x: 0, y: 22 } }, - jvmGarbageCollectionsPanel { gridPos: { h: 6, w: 6, x: 6, y: 22 } }, - jvmGarbageCollectionTimePanel { gridPos: { h: 6, w: 6, x: 12, y: 22 } }, - jvmBufferPoolUsagePanel { gridPos: { h: 6, w: 6, x: 18, y: 22 } }, - threadPoolsRow { gridPos: { h: 1, w: 24, x: 0, y: 28 } }, - threadPoolThreadsPanel { gridPos: { h: 8, w: 12, x: 0, y: 29 } }, - threadPoolTasksPanel { gridPos: { h: 8, w: 12, x: 12, y: 29 } }, - ], - if $._config.enableLokiLogs then [ - errorLogsPanelPanel { gridPos: { h: 7, w: 24, x: 0, y: 37 } }, - ] else [], - [], - ]) - ) - + g.dashboard.withVariables(variables.multiInstance), - }, -} diff --git a/opensearch-mixin/dashboards/opensearch-search-and-index-overview.libsonnet b/opensearch-mixin/dashboards/opensearch-search-and-index-overview.libsonnet deleted file mode 100644 index c94f5f8d9..000000000 --- a/opensearch-mixin/dashboards/opensearch-search-and-index-overview.libsonnet +++ /dev/null @@ -1,1898 +0,0 @@ -local g = (import '../g.libsonnet'); -local grafana = (import 'grafonnet/grafana.libsonnet'); -local dashboard = grafana.dashboard; -local commonlib = import 'common-lib/common/main.libsonnet'; -local utils = commonlib.utils; -local prometheus = grafana.prometheus; - -local dashboardUidSuffix = '-search-and-index-overview'; - -local promDatasourceName = 'prometheus_datasource'; -local instanceLabels = ['index']; - -{ - - // override - local hideZeros = - { - matcher: { - id: 'byValue', - options: { - reducer: 'allIsZero', - op: 'gte', - value: 0, - }, - }, - properties: [ - { - id: 'custom.hideFrom', - value: { - tooltip: true, - viz: false, - legend: true, - }, - }, - ], - }, - // variables - local variables = (import '../variables.libsonnet').new( - filteringSelector=$._config.filteringSelector, - groupLabels=$._config.groupLabels, - instanceLabels=instanceLabels, - varMetric='opensearch_index_search_fetch_count', - ), - - local promDatasource = { - uid: '${%s}' % variables.datasources.prometheus.name, - }, - - local requestPerformanceRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Request performance', - collapsed: false, - }, - - local requestRatePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by (%(agg)s) (opensearch_index_search_query_current_number{%(queriesSelector)s, context=~"total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - query' % utils.labelsToPanelLegend(instanceLabels), - ), - prometheus.target( - 'avg by (%(agg)s) (opensearch_index_search_fetch_current_number{%(queriesSelector)s, context=~"total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - fetch' % utils.labelsToPanelLegend(instanceLabels), - ), - prometheus.target( - 'avg by (%(agg)s) (opensearch_index_search_scroll_current_number{%(queriesSelector)s, context=~"total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - scroll' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Request rate', - description: 'Rate of fetch, scroll, and query requests by selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'reqps', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local requestLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by (%(agg)s) (increase(opensearch_index_search_query_time_seconds{%(queriesSelector)s}[$__interval:]) / clamp_min(increase(opensearch_index_search_query_count{%(queriesSelector)s, context="total"}[$__interval:]), 1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - query' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - prometheus.target( - 'avg by (%(agg)s) (increase(opensearch_index_search_fetch_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelector)s, context="total"}[$__interval:]), 1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - fetch' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - prometheus.target( - 'avg by (%(agg)s) (increase(opensearch_index_search_scroll_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_search_scroll_count{%(queriesSelector)s, context="total"}[$__interval:]), 1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - scroll' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'Request latency', - description: 'Latency of fetch, scroll, and query requests by selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local cacheHitRatioPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (100 * (opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"}) / clamp_min(opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"} + opensearch_index_requestcache_miss_count{%(queriesSelector)s, context="total"}, 1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - request' % utils.labelsToPanelLegend(instanceLabels), - ), - prometheus.target( - 'avg by(%(agg)s) (100 * (opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"}) / clamp_min(opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"} + opensearch_index_querycache_miss_number{%(queriesSelector)s, context="total"}, 1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - query' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Cache hit ratio', - description: 'Ratio of query cache and request cache hits and misses.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'percent', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local evictionsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_querycache_evictions_count{%(queriesSelector)s, context="total"}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - query cache' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_requestcache_evictions_count{%(queriesSelector)s, context="total"}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - request cache' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_fielddata_evictions_count{%(queriesSelector)s, context="total"}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - field data' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'Evictions', - description: 'Total evictions count by cache type for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'evictions', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local indexPerformanceRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Index performance', - collapsed: false, - }, - - local indexRatePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_indexing_index_current_number{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Index rate', - description: 'Rate of indexed documents for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'documents/s', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local indexLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_indexing_index_time_seconds{%(queriesSelector)s, context=~"total"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelector)s, context=~"total"}[$__interval:]),1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'Index latency', - description: 'Document indexing latency for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local indexFailuresPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_indexing_index_failed_count{%(queriesSelector)s, context="total"}[$__interval:]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - interval='1m', - ), - ], - type: 'timeseries', - title: 'Index failures', - description: 'Number of indexing failures for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'failures', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local flushLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_flush_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_flush_total_count{%(queriesSelector)s, context="total"}[$__interval:]),1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Flush latency', - description: 'Index flush latency for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local mergeTimePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_merges_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:])) > 0' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - total' % utils.labelsToPanelLegend(instanceLabels), - ), - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_merges_total_stopped_time_seconds{%(queriesSelector)s, context="total"}[$__interval:])) > 0' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - stopped' % utils.labelsToPanelLegend(instanceLabels), - ), - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_merges_total_throttled_time_seconds{%(queriesSelector)s, context="total"}[$__interval:])) > 0' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - throttled' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Merge time', - description: 'Index merge time for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local refreshLatencyPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_refresh_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_refresh_total_count{%(queriesSelector)s, context="total"}[$__interval:]),1))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Refresh latency', - description: 'Index refresh latency for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 's', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local translogOperationsPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_translog_operations_number{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Translog operations', - description: 'Current number of translog operations for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'operations', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local docsDeletedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by (%(agg)s) (opensearch_index_indexing_delete_current_number{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Docs deleted', - description: 'Rate of documents deleted for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'documents/s', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local indexCapacityRow = { - datasource: promDatasource, - targets: [], - type: 'row', - title: 'Index capacity', - collapsed: false, - }, - - local documentsIndexedPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by (%(agg)s) (opensearch_index_indexing_index_count{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Documents indexed', - description: 'Number of indexed documents for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'documents', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local segmentCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_segments_number{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Segment count', - description: 'Current number of segments for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'segments', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local mergeCountPanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (increase(opensearch_index_merges_total_docs_count{%(queriesSelector)s, context="total"}[$__interval:])) > 0' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Merge count', - description: 'Number of merge operations for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'merges', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local cacheSizePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_querycache_memory_size_bytes{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - query' % utils.labelsToPanelLegend(instanceLabels), - ), - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_requestcache_memory_size_bytes{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s - request' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Cache size', - description: 'Size of query cache and request cache.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'multi', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local storeSizePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_store_size_bytes{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Store size', - description: 'Size of the store in bytes for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local segmentSizePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_segments_memory_bytes{%(queriesSelector)s, context="total"})' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Segment size', - description: 'Memory used by segments for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - local mergeSizePanel = { - datasource: promDatasource, - targets: [ - prometheus.target( - 'avg by(%(agg)s) (opensearch_index_merges_current_size_bytes{%(queriesSelector)s, context="total"}) > 0' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Merge size', - description: 'Size of merge operations in bytes for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'bytes', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - local shardCountPanel = - { - datasource: promDatasource, - targets: [ - prometheus.target( - 'sum by (index) (avg by(%(agg)s) (opensearch_index_shards_number{%(queriesSelector)s, type=~"active|active_primary"}))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', $._config.groupLabels + instanceLabels), - }, - datasource=promDatasource, - legendFormat='%s' % utils.labelsToPanelLegend(instanceLabels), - ), - ], - type: 'timeseries', - title: 'Shard count', - description: 'The number of index shards for the selected index.', - fieldConfig: { - defaults: { - color: { - mode: 'palette-classic', - }, - custom: { - axisCenteredZero: false, - axisColorMode: 'text', - axisLabel: '', - axisPlacement: 'auto', - barAlignment: 0, - drawStyle: 'line', - fillOpacity: 0, - gradientMode: 'none', - hideFrom: { - legend: false, - tooltip: false, - viz: false, - }, - lineInterpolation: 'linear', - lineWidth: 1, - pointSize: 5, - scaleDistribution: { - type: 'linear', - }, - showPoints: 'auto', - spanNulls: false, - stacking: { - group: 'A', - mode: 'none', - }, - thresholdsStyle: { - mode: 'off', - }, - }, - mappings: [], - thresholds: { - mode: 'absolute', - steps: [ - { - color: 'green', - value: null, - }, - { - color: 'red', - value: 80, - }, - ], - }, - unit: 'shards', - }, - overrides: [], - }, - options: { - legend: { - calcs: [], - displayMode: 'list', - placement: 'bottom', - showLegend: true, - }, - tooltip: { - mode: 'single', - sort: 'none', - }, - }, - } - + g.panel.timeSeries.standardOptions.withOverridesMixin(hideZeros), - - grafanaDashboards+:: { - 'search-and-index-overview.json': - g.dashboard.new($._config.dashboardNamePrefix + 'OpenSearch search and index overview') - + g.dashboard.withTags($._config.dashboardTags) - + g.dashboard.time.withFrom($._config.dashboardPeriod) - + g.dashboard.withTimezone($._config.dashboardTimezone) - + g.dashboard.withRefresh($._config.dashboardRefresh) - + g.dashboard.withUid($._config.uid + dashboardUidSuffix) - + g.dashboard.withLinks( - g.dashboard.link.dashboards.new( - 'Other Opensearch dashboards', - $._config.dashboardTags - ) - + g.dashboard.link.dashboards.options.withIncludeVars(true) - + g.dashboard.link.dashboards.options.withKeepTime(true) - + g.dashboard.link.dashboards.options.withAsDropdown(false) - ) - + g.dashboard.withPanels( - [ - requestPerformanceRow { gridPos: { h: 1, w: 24, x: 0, y: 0 } }, - requestRatePanel { gridPos: { h: 8, w: 6, x: 0, y: 1 } }, - requestLatencyPanel { gridPos: { h: 8, w: 6, x: 6, y: 1 } }, - cacheHitRatioPanel { gridPos: { h: 8, w: 6, x: 12, y: 1 } }, - evictionsPanel { gridPos: { h: 8, w: 6, x: 18, y: 1 } }, - indexPerformanceRow { gridPos: { h: 1, w: 24, x: 0, y: 9 } }, - indexRatePanel { gridPos: { h: 8, w: 6, x: 0, y: 10 } }, - indexLatencyPanel { gridPos: { h: 8, w: 6, x: 6, y: 10 } }, - indexFailuresPanel { gridPos: { h: 8, w: 6, x: 12, y: 10 } }, - flushLatencyPanel { gridPos: { h: 8, w: 6, x: 18, y: 10 } }, - mergeTimePanel { gridPos: { h: 8, w: 6, x: 0, y: 18 } }, - refreshLatencyPanel { gridPos: { h: 8, w: 6, x: 6, y: 18 } }, - translogOperationsPanel { gridPos: { h: 8, w: 6, x: 12, y: 18 } }, - docsDeletedPanel { gridPos: { h: 8, w: 6, x: 18, y: 18 } }, - indexCapacityRow { gridPos: { h: 1, w: 24, x: 0, y: 26 } }, - documentsIndexedPanel { gridPos: { h: 8, w: 6, x: 0, y: 27 } }, - segmentCountPanel { gridPos: { h: 8, w: 6, x: 6, y: 27 } }, - mergeCountPanel { gridPos: { h: 8, w: 6, x: 12, y: 27 } }, - cacheSizePanel { gridPos: { h: 8, w: 6, x: 18, y: 27 } }, - storeSizePanel { gridPos: { h: 8, w: 6, x: 0, y: 35 } }, - segmentSizePanel { gridPos: { h: 8, w: 6, x: 6, y: 35 } }, - mergeSizePanel { gridPos: { h: 8, w: 6, x: 12, y: 35 } }, - shardCountPanel { gridPos: { h: 8, w: 6, x: 18, y: 35 } }, - ] - ) - + g.dashboard.withVariables(variables.multiInstance), - }, -} diff --git a/opensearch-mixin/dashboards_out/node-overview.json b/opensearch-mixin/dashboards_out/node-overview.json deleted file mode 100644 index ac83ca553..000000000 --- a/opensearch-mixin/dashboards_out/node-overview.json +++ /dev/null @@ -1,1706 +0,0 @@ -{ - "links": [ - { - "asDropdown": false, - "includeVars": true, - "keepTime": true, - "tags": [ - "opensearch-mixin" - ], - "title": "Other Opensearch dashboards", - "type": "dashboards" - } - ], - "panels": [ - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "OpenSearch node roles over time.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "2": { - "color": "light-purple", - "index": 0, - "text": "data" - }, - "3": { - "color": "light-green", - "index": 1, - "text": "master" - }, - "4": { - "color": "light-blue", - "index": 2, - "text": "ingest" - }, - "5": { - "color": "light-yellow", - "index": 3, - "text": "cluster_manager" - }, - "6": { - "color": "super-light-red", - "index": 4, - "text": "remote_cluster_client" - } - }, - "type": "value" - } - ] - } - }, - "gridPos": { - "h": 5, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 1, - "maxDataPoints": 100, - "options": { - "legend": false, - "showValue": "never" - }, - "pluginVersion": "v10.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"data\"}[1m]) == 1\n) * 2\n", - "legendFormat": "{{node}}" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"master\"}[1m]) == 1\n) * 3\n", - "legendFormat": "{{node}}" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"ingest\"}[1m]) == 1\n) * 4\n", - "legendFormat": "{{node}}" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"cluster_manager\"}[1m]) == 1\n) * 5\n", - "legendFormat": "{{node}}" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"remote_cluster_client\"}[1m]) == 1\n) * 6\n", - "legendFormat": "{{node}}" - } - ], - "title": "Roles timeline", - "type": "status-history" - }, - { - "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 2, - "targets": [ ], - "title": "Node health", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "CPU usage percentage of the node's Operating System.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "custom": { - "fillOpacity": 5, - "gradientMode": "scheme", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "decimals": 1, - "max": 100, - "min": 0, - "unit": "percent" - } - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 2 - }, - "id": 3, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}", - "legendFormat": "{{node}}" - } - ], - "title": "Node CPU usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Memory usage percentage of the node for the Operating System and OpenSearch", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "custom": { - "fillOpacity": 5, - "gradientMode": "scheme", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "decimals": 1, - "max": 100, - "min": 0, - "unit": "percent" - } - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 2 - }, - "id": 4, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "opensearch_os_mem_used_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}", - "legendFormat": "{{node}}" - } - ], - "title": "Node memory usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Node file system read and write data.", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 1, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never", - "stacking": "normal" - }, - "unit": "Bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/time|used|busy|util/" - }, - "properties": [ - { - "id": "custom.axisSoftMax", - "value": 100 - }, - { - "id": "custom.drawStyle", - "value": "points" - }, - { - "id": "unit", - "value": "percent" - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 2 - }, - "id": 5, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job,opensearch_cluster,node) (rate(opensearch_fs_io_total_read_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval]))", - "legendFormat": "{{node}} - read" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job,opensearch_cluster,node) (rate(opensearch_fs_io_total_write_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval]))", - "legendFormat": "{{node}} - write" - } - ], - "title": "Node I/O", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Number of open connections for the selected node.", - "fieldConfig": { - "defaults": { - "custom": { - "fillOpacity": 30, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never", - "stacking": "normal" - }, - "unit": "" - } - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 2 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_transport_server_open_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "legendFormat": "{{node}}" - } - ], - "title": "Node open connections", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Disk usage percentage of the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "custom": { - "fillOpacity": 1, - "gradientMode": "scheme", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "decimals": 1, - "max": 100, - "min": 0, - "unit": "percent" - } - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 8 - }, - "id": 7, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "100 - (100 * opensearch_fs_path_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} / clamp_min(opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}, 1))", - "legendFormat": "{{node}}" - } - ], - "title": "Node disk usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Percentage of swap space used by OpenSearch and the Operating System on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "custom": { - "fillOpacity": 5, - "gradientMode": "scheme", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "decimals": 1, - "max": 100, - "min": 0, - "unit": "percent" - } - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 8 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "100 * opensearch_os_swap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} / clamp_min((opensearch_os_swap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} + opensearch_os_swap_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}), 1)", - "legendFormat": "{{node}}" - } - ], - "title": "Node memory swap", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "description": "Node network traffic sent and received.", - "fieldConfig": { - "defaults": { - "custom": { - "axisCenteredZero": false, - "axisLabel": "out(-) | in(+)", - "fillOpacity": 5, - "gradientMode": "opacity", - "lineInterpolation": "smooth", - "lineWidth": 2, - "showPoints": "never" - }, - "decimals": 1, - "noValue": "No traffic", - "unit": "bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/sent/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 8 - }, - "id": 9, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list" - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "v11.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_transport_tx_bytes_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval])) * 8", - "legendFormat": "{{node}} - sent" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_transport_rx_bytes_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval])) * 8", - "legendFormat": "{{node}} - received" - } - ], - "title": "Node network traffic", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "Circuit breakers tripped on the selected node by type", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "trips" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 8 - }, - "id": 10, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(name, job,opensearch_cluster,node) (increase(opensearch_circuitbreaker_tripped_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{node}} - {{ name }}" - } - ], - "title": "Circuit breakers", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 15 - }, - "id": 11, - "targets": [ ], - "title": "Node JVM", - "type": "row" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The amount of heap memory used vs committed on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 16 - }, - "id": 12, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_mem_heap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}} - used" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_mem_heap_committed_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}} - commited" - } - ], - "title": "JVM heap used vs. committed", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The amount of non-heap memory used vs committed on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 16 - }, - "id": 13, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_mem_nonheap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}} - used" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_mem_nonheap_committed_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}} - commited" - } - ], - "title": "JVM non-heap used vs. committed", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The number of threads running in the JVM on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "threads" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 16 - }, - "id": 14, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_jvm_threads_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}}" - } - ], - "title": "JVM threads", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The number of buffer pools available on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "buffer pools" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 16 - }, - "id": 15, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by( job,opensearch_cluster,node, bufferpool) (opensearch_jvm_bufferpool_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}} - {{bufferpool}}" - } - ], - "title": "JVM buffer pools", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The uptime of the JVM in seconds on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 22 - }, - "id": 16, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.4.3", - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job,opensearch_cluster,node) (opensearch_jvm_uptime_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}}" - } - ], - "title": "JVM uptime", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The number of garbage collection operations on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "operations" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 22 - }, - "id": 17, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (increase(opensearch_jvm_gc_collection_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{node}}" - } - ], - "title": "JVM garbage collections", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The amount of time spent on garbage collection on the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "s" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 22 - }, - "id": 18, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (increase(opensearch_jvm_gc_collection_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{node}}" - } - ], - "title": "JVM garbage collection time", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The percent used of JVM buffer pool memory.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 22 - }, - "id": 19, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "100 * (sum by (job,opensearch_cluster,node, bufferpool) (opensearch_jvm_bufferpool_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})) / clamp_min((sum by (job, bufferpool, cluster) (opensearch_jvm_bufferpool_total_capacity_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})),1)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}} - {{bufferpool}}" - } - ], - "title": "JVM buffer pool usage", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 20, - "targets": [ ], - "title": "Thread pools", - "type": "row" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The number of threads in the thread pool for the selected node", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "threads" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 29 - }, - "id": 21, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by(job,opensearch_cluster,node) ((opensearch_threadpool_threads_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}}" - } - ], - "title": "Thread pool threads", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "description": "The number of tasks in the thread pool for the selected node.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "tasks" - }, - "overrides": [ ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 29 - }, - "id": 22, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "${prometheus_datasource}" - }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_threadpool_tasks_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{node}}" - } - ], - "title": "Thread pool tasks", - "type": "timeseries" - }, - { - "datasource": { - "uid": "${loki_datasource}" - }, - "description": "The recent error logs being reported by OpenSearch.", - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 37 - }, - "id": 23, - "options": { - "dedupStrategy": "none", - "enableLogDetails": true, - "prettifyLogMessage": false, - "showCommonLabels": false, - "showLabels": false, - "showTime": false, - "sortOrder": "Descending", - "wrapLogMessage": false - }, - "targets": [ - { - "datasource": { - "uid": "${loki_datasource}" - }, - "editorMode": "code", - "expr": "{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} |~ \"\"", - "queryType": "range", - "refId": "A" - } - ], - "title": "Error logs panel", - "type": "logs" - } - ], - "refresh": "1m", - "schemaVersion": 36, - "tags": [ - "opensearch-mixin" - ], - "templating": { - "list": [ - { - "label": "Prometheus data source", - "name": "prometheus_datasource", - "query": "prometheus", - "regex": "", - "type": "datasource" - }, - { - "allValue": ".+", - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "includeAll": true, - "label": "Job", - "multi": true, - "name": "job", - "query": "label_values(opensearch_os_cpu_percent{opensearch_cluster!=\"\"}, job)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".+", - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "includeAll": true, - "label": "Opensearch_cluster", - "multi": true, - "name": "opensearch_cluster", - "query": "label_values(opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "allValue": ".+", - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "includeAll": true, - "label": "Node", - "multi": true, - "name": "node", - "query": "label_values(opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", - "refresh": 2, - "sort": 1, - "type": "query" - }, - { - "hide": 2, - "label": "Loki data source", - "name": "loki_datasource", - "query": "loki", - "regex": "", - "type": "datasource" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timezone": "default", - "title": "OpenSearch node overview", - "uid": "opensearch-node-overview" - } \ No newline at end of file diff --git a/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json b/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json index 891e95e38..89f7eebdd 100644 --- a/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json @@ -1,149 +1,45 @@ { + "annotations": { + "list": [ ] + }, + "editable": false, + "id": null, "links": [ { - "asDropdown": false, + "keepTime": true, + "title": "Opensearch Logs", + "type": "link", + "url": "/d/opensearch-logs" + }, + { + "asDropdown": true, "includeVars": true, "keepTime": true, "tags": [ - "opensearch-mixin" + "opensearch" ], - "title": "Other Opensearch dashboards", + "title": "All dashboards", "type": "dashboards" } ], "panels": [ { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "OpenSearch node roles.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "0": { - "color": "super-light-orange", - "index": 5, - "text": "False" - }, - "1": { - "color": "light-green", - "index": 3, - "text": "True" - }, - "Data": { - "color": "light-purple", - "index": 0, - "text": "data" - }, - "Ingest": { - "color": "light-blue", - "index": 2, - "text": "ingest" - }, - "Master": { - "color": "light-green", - "index": 1, - "text": "master" - }, - "Remote cluster client": { - "color": "light-orange", - "index": 4, - "text": "remote_cluster_client" - } - }, - "type": "value" - } - ] - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/Data|Master|Ingest|Remote.+|Cluster.+/" - }, - "properties": [ - { - "id": "custom.cellOptions", - "value": { - "type": "color-text" - } - } - ] - } - ] - }, + "collapsed": false, "gridPos": { - "h": 6, - "w": 24, + "h": 1, + "w": 0, "x": 0, "y": 0 }, "id": 1, - "pluginVersion": "v10.0.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (job,opensearch_cluster,node,node,nodeid,role,primary_ip) (last_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[1d]))", - "instant": true, - "legendFormat": "{{node}}" - } - ], - "title": "Roles", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "mode": "columns", - "valueLabel": "role" - } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "cluster_manager": 108, - "data": 105, - "ingest": 106, - "job": 3, - "master": 104, - "node": 3, - "nodeid": 3, - "opensearch_cluster": 3, - "remote_cluster_client": 107 - }, - "renameByName": { - "Time": "", - "cluster": "Cluster", - "cluster_manager": "Cluster manager", - "data": "Data", - "ingest": "Ingest", - "master": "Master", - "node": "Node", - "nodeid": "Nodeid", - "remote_cluster_client": "Remote cluster client" - } - } - } - ], - "type": "table" + "panels": [ ], + "title": "Cluster Overview", + "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The overall health and availability of the OpenSearch cluster.", "fieldConfig": { @@ -171,7 +67,6 @@ } ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -191,40 +86,35 @@ } ] } - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, - "w": 3, + "h": 6, + "w": 5, "x": 0, - "y": 2 + "y": 1 }, "id": 2, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by(job,opensearch_cluster) (opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "min by (job,opensearch_cluster) (\n opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Cluster status" } ], "title": "Cluster status", @@ -232,7 +122,8 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of running nodes across the OpenSearch cluster.", "fieldConfig": { @@ -240,9 +131,7 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -258,40 +147,35 @@ } ] } - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, - "w": 3, - "x": 3, - "y": 2 + "h": 6, + "w": 5, + "x": 5, + "y": 1 }, "id": 3, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by(job,opensearch_cluster) (opensearch_cluster_nodes_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "min by (job,opensearch_cluster) (\n opensearch_cluster_nodes_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Node count" } ], "title": "Node count", @@ -299,7 +183,8 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of data nodes in the OpenSearch cluster.", "fieldConfig": { @@ -307,9 +192,7 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -325,40 +208,35 @@ } ] } - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, - "w": 3, - "x": 6, - "y": 2 + "h": 6, + "w": 5, + "x": 10, + "y": 1 }, "id": 4, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by(job,opensearch_cluster) (opensearch_cluster_datanodes_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "min by (job,opensearch_cluster) (\n opensearch_cluster_datanodes_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Data node count" } ], "title": "Data node count", @@ -366,7 +244,8 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of shards in the OpenSearch cluster across all indices.", "fieldConfig": { @@ -374,9 +253,7 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -392,40 +269,35 @@ } ] } - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, - "w": 3, - "x": 9, - "y": 2 + "h": 6, + "w": 5, + "x": 15, + "y": 1 }, "id": 5, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum(max by (type) (opensearch_cluster_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))", + "expr": "sum(\n max by (job,opensearch_cluster,type) (\n opensearch_cluster_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Shard count" } ], "title": "Shard count", @@ -433,7 +305,8 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Percent of active shards across the OpenSearch cluster.", "fieldConfig": { @@ -441,9 +314,7 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -464,78 +335,91 @@ ] }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, - "w": 3, - "x": 12, - "y": 2 + "h": 6, + "w": 4, + "x": 20, + "y": 1 }, "id": 6, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by(job,opensearch_cluster) (opensearch_cluster_shards_active_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "min by (job,opensearch_cluster) (\n opensearch_cluster_shards_active_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Active shards %%" } ], "title": "Active shards %", "type": "stat" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 7 + }, + "id": 7, + "panels": [ ], + "title": "Node Roles", + "type": "row" + }, { "datasource": { "type": "datasource", "uid": "-- Mixed --" }, - "description": "OpenSearch node roles over time.", + "description": "OpenSearch node roles.", "fieldConfig": { "defaults": { "mappings": [ { "options": { - "2": { + "0": { + "color": "super-light-orange", + "index": 5, + "text": "False" + }, + "1": { + "color": "light-green", + "index": 3, + "text": "True" + }, + "Data": { "color": "light-purple", "index": 0, "text": "data" }, - "3": { - "color": "light-green", - "index": 1, - "text": "master" - }, - "4": { + "Ingest": { "color": "light-blue", "index": 2, "text": "ingest" }, - "5": { - "color": "light-yellow", - "index": 3, - "text": "cluster_manager" + "Master": { + "color": "light-green", + "index": 1, + "text": "master" }, - "6": { - "color": "super-light-red", + "Remote cluster client": { + "color": "light-orange", "index": 4, "text": "remote_cluster_client" } @@ -543,69 +427,224 @@ "type": "value" } ] - } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Data|Master|Ingest|Remote.+|Cluster.+/" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + } + ] + } + ] }, "gridPos": { - "h": 5, - "w": 9, - "x": 15, - "y": 2 - }, - "id": 7, - "maxDataPoints": 100, - "options": { - "legend": false, - "showValue": "never" + "h": 8, + "w": 24, + "x": 0, + "y": 8 }, - "pluginVersion": "v10.0.0", + "id": 8, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"data\"}[1m]) == 1\n) * 2\n", - "legendFormat": "{{node}}" - }, + "expr": "max by (job,opensearch_cluster, nodeid, role, primary_ip) (last_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[1d]))", + "format": "time_series", + "instant": true, + "legendFormat": "{{node}}: Node role bool last seen", + "refId": "Node role bool last seen" + } + ], + "title": "Roles", + "transformations": [ { - "datasource": { + "id": "labelsToFields", + "options": { + "mode": "columns", + "valueLabel": "role" + } + }, + { + "id": "merge", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "cluster_manager": 108, + "data": 105, + "ingest": 106, + "job": 3, + "master": 104, + "node": 3, + "nodeid": 3, + "opensearch_cluster": 3, + "remote_cluster_client": 107 + }, + "renameByName": { + "Time": "", + "cluster": "Cluster", + "cluster_manager": "Cluster manager", + "data": "Data", + "ingest": "Ingest", + "master": "Master", + "node": "Node", + "nodeid": "Nodeid", + "remote_cluster_client": "Remote cluster client" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "OpenSearch node roles over time.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "2": { + "color": "light-purple", + "index": 0, + "text": "data" + }, + "3": { + "color": "light-green", + "index": 1, + "text": "master" + }, + "4": { + "color": "light-blue", + "index": 2, + "text": "ingest" + }, + "5": { + "color": "light-yellow", + "index": 3, + "text": "cluster_manager" + }, + "6": { + "color": "super-light-red", + "index": 4, + "text": "remote_cluster_client" + } + }, + "type": "value" + } + ] + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 9, + "maxDataPoints": 100, + "options": { + "legend": false, + "showValue": "never" + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"data\"}[1m]) == 1) * 2", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / data", + "refId": "Node role: data" + }, + { + "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"master\"}[1m]) == 1\n) * 3\n", - "legendFormat": "{{node}}" + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"master\"}[1m]) == 1) * 3", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / master", + "refId": "Node role: master" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"ingest\"}[1m]) == 1\n) * 4\n", - "legendFormat": "{{node}}" + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"ingest\"}[1m]) == 1) * 4", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / ingest", + "refId": "Node role: ingest" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"cluster_manager\"}[1m]) == 1\n) * 5\n", - "legendFormat": "{{node}}" + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"cluster_manager\"}[1m]) == 1) * 5", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / cluster_manager", + "refId": "Node role: cluster_manager" }, { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node,role) (\n max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", role=\"remote_cluster_client\"}[1m]) == 1\n) * 6\n", - "legendFormat": "{{node}}" + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"remote_cluster_client\"}[1m]) == 1) * 6", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / remote_client", + "refId": "Node role: remote_cluster_client" } ], "title": "Roles timeline", "type": "status-history" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 24 + }, + "id": 10, + "panels": [ ], + "title": "Resource Usage", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top nodes by OS CPU usage across the OpenSearch cluster.", "fieldConfig": { @@ -613,11 +652,9 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "max": 100, "min": 0, "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -630,40 +667,35 @@ ] }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 9, + "h": 8, "w": 8, "x": 0, - "y": 4 + "y": 25 }, - "id": 8, + "id": 11, "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sort_desc(sum by(node, job,opensearch_cluster) (opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})))", + "expr": "topk(10, sort_desc(\n topk(10, sort_desc(sum by(job,opensearch_cluster,node) (opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})))\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{node}}" + "legendFormat": "{{node}}", + "refId": "Top nodes by CPU usage" } ], "title": "Top nodes by CPU usage", @@ -671,7 +703,8 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The total count of circuit breakers tripped across the OpenSearch cluster.", "fieldConfig": { @@ -679,9 +712,7 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -694,41 +725,36 @@ ] }, "unit": "trips" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 9, + "h": 8, "w": 8, "x": 8, - "y": 4 + "y": 25 }, - "id": 9, + "id": 12, "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by(job,opensearch_cluster, node) (increase(opensearch_circuitbreaker_tripped_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]))", + "expr": "sum by (job,opensearch_cluster) (\n increase(opensearch_circuitbreaker_tripped_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{node}}" + "legendFormat": "{{node}}", + "refId": "Breakers tripped" } ], "title": "Breakers tripped", @@ -736,17 +762,16 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, - "description": "Shard status counts across the Opensearch cluster.", + "description": "Shard status counts across the OpenSearch cluster.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -759,48 +784,57 @@ ] }, "unit": "shards" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 9, + "h": 8, "w": 8, "x": 16, - "y": 4 + "y": 25 }, - "id": 10, + "id": 13, "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by(type, job,opensearch_cluster) (opensearch_cluster_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "min by (job,opensearch_cluster,type) (\n opensearch_cluster_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{type}}" + "legendFormat": "{{type}}", + "refId": "Shard status" } ], "title": "Shard status", "type": "bargauge" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 33 + }, + "id": 14, + "panels": [ ], + "title": "Storage and Tasks", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top nodes by disk usage across the OpenSearch cluster.", "fieldConfig": { @@ -808,11 +842,9 @@ "color": { "mode": "thresholds" }, - "mappings": [ ], "max": 100, "min": 0, "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -825,40 +857,35 @@ ] }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 10, + "h": 8, "w": 8, "x": 0, - "y": 13 + "y": 34 }, - "id": 11, + "id": 15, "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true + ] + } }, - "pluginVersion": "9.4.3", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sort_desc((100 * (sum by(node, job,opensearch_cluster) (opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})- sum by(node, job,opensearch_cluster) (opensearch_fs_path_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})) / sum by(node, job,opensearch_cluster) (opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))))", + "expr": "topk(10, sort_desc((100 * (sum by(job,opensearch_cluster,node) (opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})- sum by(job,opensearch_cluster,node) (opensearch_fs_path_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})) / sum by(job,opensearch_cluster,node) (opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{node}}" + "legendFormat": "{{node}}", + "refId": "Top nodes by disk usage" } ], "title": "Top nodes by disk usage", @@ -866,86 +893,35 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The total count of documents indexed across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "documents" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, - "w": 8, + "h": 8, + "w": 16, "x": 8, - "y": 13 - }, - "id": 12, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 34 }, + "id": 16, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster) (opensearch_indices_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "avg by (job,opensearch_cluster) (\n opensearch_indices_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Total documents" } ], "title": "Total documents", @@ -953,86 +929,35 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of tasks waiting to be executed across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "tasks" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, + "h": 8, "w": 8, - "x": 16, - "y": 13 - }, - "id": 13, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "x": 0, + "y": 42 }, + "id": 17, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster) (opensearch_cluster_pending_tasks_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "avg by (job,opensearch_cluster) (\n opensearch_cluster_pending_tasks_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Pending tasks" } ], "title": "Pending tasks", @@ -1040,86 +965,35 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The total size of the store across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "bytes" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, + "h": 8, "w": 8, "x": 8, - "y": 18 - }, - "id": 14, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 42 }, + "id": 18, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster) (opensearch_indices_store_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "avg by (job,opensearch_cluster) (\n opensearch_indices_store_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "legendFormat": "{{opensearch_cluster}}", + "refId": "Store size" } ], "title": "Store size", @@ -1127,86 +1001,34 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The max wait time for tasks to be executed across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { - "h": 5, + "h": 8, "w": 8, "x": 16, - "y": 18 - }, - "id": 15, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 42 }, + "id": 19, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by(job,opensearch_cluster) (opensearch_cluster_task_max_waiting_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})", + "expr": "max by (job,opensearch_cluster) (\n opensearch_cluster_task_max_waiting_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{opensearch_cluster}}" + "instant": false, + "legendFormat": "{{opensearch_cluster}}", + "refId": "Max task wait time" } ], "title": "Max task wait time", @@ -1214,106 +1036,47 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, - "y": 23 + "y": 50 }, - "id": 16, - "targets": [ ], - "title": "Cluster search and index summary", + "id": 20, + "panels": [ ], + "title": "Search Performance", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "reqps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 0, - "y": 24 - }, - "id": 17, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 51 }, + "id": 21, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "topk(10, sort_desc(avg by(index, job,opensearch_cluster) (\n opensearch_index_search_fetch_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_search_query_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_search_scroll_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}\n)))\n", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{index}}" + "instant": false, + "legendFormat": "{{index}}", + "refId": "Top indices by request rate" } ], "title": "Top indices by request rate", @@ -1321,87 +1084,35 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 8, - "y": 24 - }, - "id": 18, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 51 }, + "id": 22, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "topk(10, sort_desc(sum by(index, job,opensearch_cluster) ((increase(opensearch_index_search_fetch_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_query_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_scroll_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]))\n/ clamp_min(increase(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_query_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_scroll_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]), 1))))\n", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Top indices by request latency" } ], "title": "Top indices by request latency", @@ -1409,181 +1120,84 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "percent" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 24 - }, - "id": 19, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 51 }, + "id": 23, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sort_desc(avg by(index, job,opensearch_cluster) (\n 100 * (opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}) / \n clamp_min((opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_requestcache_miss_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_miss_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}), 1\n ))))\n", + "expr": "topk(10, sort_desc(avg by(index, job,opensearch_cluster) (\n 100 * (opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}) / \n clamp_min((opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_requestcache_miss_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_miss_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}), 1\n ))))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Top indices by combined cache hit ratio" } ], "title": "Top indices by combined cache hit ratio", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 59 + }, + "id": 24, + "panels": [ ], + "title": "Ingest Performance", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top nodes by rate of ingest across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "Bps" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 0, - "y": 32 - }, - "id": 20, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 60 }, + "id": 25, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sum by(node, job,opensearch_cluster) (rate(opensearch_ingest_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__rate_interval])))", + "expr": "topk(10, sum by(job,opensearch_cluster,node) (rate(opensearch_ingest_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__rate_interval])))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{node}}" + "legendFormat": "{{node}}", + "refId": "Top nodes by ingest rate" } ], "title": "Top nodes by ingest rate", @@ -1591,91 +1205,36 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top nodes by ingestion latency across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 8, - "y": 32 - }, - "id": 21, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 60 }, + "id": 26, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sum by(job,opensearch_cluster, node) (\n increase(opensearch_ingest_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]) / \n clamp_min(increase(opensearch_ingest_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]), 1)))\n", + "expr": "topk(10, sum by(job,opensearch_cluster,node) (increase(opensearch_ingest_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]) / clamp_min(increase(opensearch_ingest_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]), 1)))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{node}}" + "legendFormat": "{{node}}", + "refId": "Top nodes by ingest latency" } ], "title": "Top nodes by ingest latency", @@ -1683,174 +1242,85 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top nodes by ingestion failures across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "errors" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 32 - }, - "id": 22, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 60 }, + "id": 27, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sum by(job,opensearch_cluster, node) (increase(opensearch_ingest_total_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:])))", + "expr": "topk(10, sum by(job,opensearch_cluster,node) (increase(opensearch_ingest_total_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:])))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{node}}" + "legendFormat": "{{node}}", + "refId": "Top nodes by ingest errors" } ], "title": "Top nodes by ingest errors", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 68 + }, + "id": 28, + "panels": [ ], + "title": "Indexing Performance", + "type": "row" + }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top indices by rate of document indexing across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "documents/s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 0, - "y": 40 - }, - "id": 23, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 69 }, + "id": 29, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, avg by(job,opensearch_cluster, index) (opensearch_index_indexing_index_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))", + "expr": "topk(10, avg by(index, job,opensearch_cluster) (opensearch_index_indexing_index_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Top indices by index rate" } ], "title": "Top indices by index rate", @@ -1858,87 +1328,36 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top indices by indexing latency across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "s" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 8, - "y": 40 - }, - "id": 24, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 69 }, + "id": 30, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, avg by(job,opensearch_cluster, index) \n(increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]) / \nclamp_min(increase(opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]), 1)))\n", + "expr": "topk(10, avg by(index, job,opensearch_cluster) (increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]), 1)))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Top indices by index latency" } ], "title": "Top indices by index latency", @@ -1946,97 +1365,46 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Top indices by index document failures across the OpenSearch cluster.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, "unit": "failures" - }, - "overrides": [ ] + } }, "gridPos": { "h": 8, "w": 8, "x": 16, - "y": 40 - }, - "id": 25, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "y": 69 }, + "id": 31, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, avg by(job,opensearch_cluster, index) (increase(opensearch_index_indexing_index_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:])))", + "expr": "topk(10, avg by(index, job,opensearch_cluster) (increase(opensearch_index_indexing_index_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:])))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Top indices by index failures" } ], "title": "Top indices by index failures", "type": "timeseries" } ], - "refresh": "1m", - "schemaVersion": 36, + "refresh": "30s", + "schemaVersion": 39, "tags": [ - "opensearch-mixin" + "opensearch" ], "templating": { "list": [ @@ -2044,7 +1412,7 @@ "label": "Prometheus data source", "name": "prometheus_datasource", "query": "prometheus", - "regex": "", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", "type": "datasource" }, { @@ -2076,14 +1444,37 @@ "refresh": 2, "sort": 1, "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Node", + "multi": true, + "name": "node", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "type": "datasource" } ] }, "time": { - "from": "now-1h", + "from": "now-30m", "to": "now" }, "timezone": "default", - "title": "OpenSearch cluster overview", + "title": "OpenSearch Cluster Overview", "uid": "opensearch-cluster-overview" } \ No newline at end of file diff --git a/opensearch-mixin/dashboards_out/opensearch-logs.json b/opensearch-mixin/dashboards_out/opensearch-logs.json new file mode 100644 index 000000000..6ea00e8ea --- /dev/null +++ b/opensearch-mixin/dashboards_out/opensearch-logs.json @@ -0,0 +1,297 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": false, + "id": null, + "links": [ + { + "keepTime": true, + "title": "Opensearch Cluster Overview", + "type": "link", + "url": "/d/opensearch-cluster-overview" + }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "opensearch" + ], + "title": "All dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "description": "Logs volume grouped by \"level\" label.", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "bars", + "fillOpacity": 50, + "stacking": { + "mode": "normal" + } + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(E|e)(rr.*|RR.*)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(W|w)(arn.*|ARN.*|rn|RN)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(T|t)(race|RACE)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "logs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "text", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "maxDataPoints": 100, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "expr": "sum by (level) (count_over_time({opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",level=~\"$level\",severity=~\"$severity\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", + "legendFormat": "{{ level }}" + } + ], + "title": "Logs volume", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "Value", + "renamePattern": "logs" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 18, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 2, + "options": { + "dedupStrategy": "exact", + "enableLogDetails": true, + "prettifyLogMessage": true, + "showTime": false, + "wrapLogMessage": false + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "expr": "{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",level=~\"$level\",severity=~\"$severity\"} \n|~ \"$regex_search\"\n\n\n" + } + ], + "title": "Logs", + "type": "logs" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "opensearch" + ], + "templating": { + "list": [ + { + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Opensearch_cluster", + "multi": true, + "name": "opensearch_cluster", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Node", + "multi": true, + "name": "node", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timezone": "default", + "title": "OpenSearch Logs", + "uid": "opensearch-logs" + } \ No newline at end of file diff --git a/opensearch-mixin/dashboards_out/opensearch-node-overview.json b/opensearch-mixin/dashboards_out/opensearch-node-overview.json new file mode 100644 index 000000000..845b8673d --- /dev/null +++ b/opensearch-mixin/dashboards_out/opensearch-node-overview.json @@ -0,0 +1,1288 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": false, + "id": null, + "links": [ + { + "keepTime": true, + "title": "Opensearch Logs", + "type": "link", + "url": "/d/opensearch-logs" + }, + { + "keepTime": true, + "title": "Opensearch Cluster Overview", + "type": "link", + "url": "/d/opensearch-cluster-overview" + }, + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "opensearch" + ], + "title": "All dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Node Roles", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "OpenSearch node roles over time.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "2": { + "color": "light-purple", + "index": 0, + "text": "data" + }, + "3": { + "color": "light-green", + "index": 1, + "text": "master" + }, + "4": { + "color": "light-blue", + "index": 2, + "text": "ingest" + }, + "5": { + "color": "light-yellow", + "index": 3, + "text": "cluster_manager" + }, + "6": { + "color": "super-light-red", + "index": 4, + "text": "remote_cluster_client" + } + }, + "type": "value" + } + ] + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 2, + "maxDataPoints": 100, + "options": { + "legend": false, + "showValue": "never" + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"data\"}[1m]) == 1) * 2", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / data", + "refId": "Node role: data" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"master\"}[1m]) == 1) * 3", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / master", + "refId": "Node role: master" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"ingest\"}[1m]) == 1) * 4", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / ingest", + "refId": "Node role: ingest" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"cluster_manager\"}[1m]) == 1) * 5", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / cluster_manager", + "refId": "Node role: cluster_manager" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"remote_cluster_client\"}[1m]) == 1) * 6", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / remote_client", + "refId": "Node role: remote_cluster_client" + } + ], + "title": "Roles timeline", + "type": "status-history" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "OpenSearch node roles.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "0": { + "color": "super-light-orange", + "index": 5, + "text": "False" + }, + "1": { + "color": "light-green", + "index": 3, + "text": "True" + }, + "Data": { + "color": "light-purple", + "index": 0, + "text": "data" + }, + "Ingest": { + "color": "light-blue", + "index": 2, + "text": "ingest" + }, + "Master": { + "color": "light-green", + "index": 1, + "text": "master" + }, + "Remote cluster client": { + "color": "light-orange", + "index": 4, + "text": "remote_cluster_client" + } + }, + "type": "value" + } + ] + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Data|Master|Ingest|Remote.+|Cluster.+/" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 3, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "max by (job,opensearch_cluster, nodeid, role, primary_ip) (last_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[1d]))", + "format": "time_series", + "instant": true, + "legendFormat": "{{node}}: Node role bool last seen", + "refId": "Node role bool last seen" + } + ], + "title": "Roles", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "mode": "columns", + "valueLabel": "role" + } + }, + { + "id": "merge", + "options": { } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "cluster_manager": 108, + "data": 105, + "ingest": 106, + "job": 3, + "master": 104, + "node": 3, + "nodeid": 3, + "opensearch_cluster": 3, + "remote_cluster_client": 107 + }, + "renameByName": { + "Time": "", + "cluster": "Cluster", + "cluster_manager": "Cluster manager", + "data": "Data", + "ingest": "Ingest", + "master": "Master", + "node": "Node", + "nodeid": "Nodeid", + "remote_cluster_client": "Remote cluster client" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 17 + }, + "id": 4, + "panels": [ ], + "title": "Node health", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "CPU usage percentage of the node's Operating System.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "max": 100, + "min": 0, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 18 + }, + "id": 5, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}", + "refId": "CPU %%" + } + ], + "title": "Node CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Memory usage percentage of the node for the Operating System and OpenSearch", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "max": 100, + "min": 0, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 18 + }, + "id": 6, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "opensearch_os_mem_used_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}", + "refId": "Memory used %%" + } + ], + "title": "Node memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Node file system read and write data.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 1, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "stacking": { + "mode": "normal" + } + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/time|used|busy|util/" + }, + "properties": [ + { + "id": "custom.axisSoftMax", + "value": 100 + }, + { + "id": "custom.drawStyle", + "value": "points" + }, + { + "id": "unit", + "value": "percent" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 18 + }, + "id": 7, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_fs_io_total_read_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval]))", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}} - read", + "refId": "FS read bytes/s" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_fs_io_total_write_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval]))", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}} - write", + "refId": "FS write bytes/s" + } + ], + "title": "Node I/O", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Number of open connections for the selected node.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never", + "stacking": { + "mode": "normal" + } + } + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 18 + }, + "id": 8, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (opensearch_transport_server_open_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}", + "refId": "Transport server open" + } + ], + "title": "Node open connections", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Disk usage percentage of the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "fillOpacity": 1, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "max": 100, + "min": 0, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 26 + }, + "id": 9, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "100 - (100 * opensearch_fs_path_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} / clamp_min(opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}, 1))", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}", + "refId": "FS used %%" + } + ], + "title": "Node disk usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Percentage of swap space used by OpenSearch and the Operating System on the selected node.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "decimals": 1, + "max": 100, + "min": 0, + "unit": "percent" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 26 + }, + "id": 10, + "options": { + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "100 * opensearch_os_swap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} / clamp_min((opensearch_os_swap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} + opensearch_os_swap_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}), 1)", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}", + "refId": "Swap used %%" + } + ], + "title": "Node memory swap", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Network traffic on the node's Operating System.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 26 + }, + "id": 11, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_transport_rx_bytes_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval])) * 8", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}} - received", + "refId": "Transport RX bitrate" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_transport_tx_bytes_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval])) * 8", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}} - sent", + "refId": "Transport TX bitrate" + } + ], + "title": "Node network traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Circuit breakers tripped on the selected node by type", + "fieldConfig": { + "defaults": { + "unit": "trips" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 26 + }, + "id": 12, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (name, job,opensearch_cluster,node) (increase(opensearch_circuitbreaker_tripped_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", + "format": "time_series", + "instant": false, + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{node}} - {{ name }}", + "refId": "Circuit breaker trips by name" + } + ], + "title": "Circuit breakers", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 34 + }, + "id": 13, + "panels": [ ], + "title": "Node JVM", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM heap memory usage vs committed.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 35 + }, + "id": 14, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_mem_heap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM heap used", + "refId": "JVM heap used" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_mem_heap_committed_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM heap committed", + "refId": "JVM heap committed" + } + ], + "title": "JVM heap used vs committed", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM non-heap memory usage vs committed.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 35 + }, + "id": 15, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_mem_nonheap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM non-heap used", + "refId": "JVM non-heap used" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_mem_nonheap_committed_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM non-heap committed", + "refId": "JVM non-heap committed" + } + ], + "title": "JVM non-heap used vs committed", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM thread count.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "threads" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 35 + }, + "id": 16, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_threads_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM threads", + "refId": "JVM threads" + } + ], + "title": "JVM threads", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM buffer pool usage.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 35 + }, + "id": 17, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster, bufferpool) (opensearch_jvm_bufferpool_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{ bufferpool }}", + "refId": "JVM buffer pools" + } + ], + "title": "JVM buffer pools", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM uptime in seconds.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "s" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 43 + }, + "id": 18, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_uptime_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM uptime", + "refId": "JVM uptime" + } + ], + "title": "JVM uptime", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM garbage collection count.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "collections" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 43 + }, + "id": 19, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (increase(opensearch_jvm_gc_collection_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM GC collections", + "refId": "JVM GC collections" + } + ], + "title": "JVM garbage collections", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM garbage collection time in milliseconds.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2 + }, + "unit": "ms" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 43 + }, + "id": 20, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (increase(opensearch_jvm_gc_collection_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: JVM GC time", + "refId": "JVM GC time" + } + ], + "title": "JVM garbage collection time", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "JVM buffer pool usage by pool.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 43 + }, + "id": 21, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "100 * (sum by (job,opensearch_cluster, bufferpool) (opensearch_jvm_bufferpool_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})) / clamp_min((sum by (job, bufferpool, cluster) (opensearch_jvm_bufferpool_total_capacity_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})),1)", + "format": "time_series", + "instant": false, + "legendFormat": "{{ bufferpool }}", + "refId": "JVM bufferpool used %%" + } + ], + "title": "JVM buffer pool usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 51 + }, + "id": 22, + "panels": [ ], + "title": "Thread pools", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Thread pool thread count.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "threads" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 23, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) ((opensearch_threadpool_threads_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}))", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: Threadpool threads", + "refId": "Threadpool threads" + } + ], + "title": "Thread pool threads", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "Thread pool task count.", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 5, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, + "unit": "tasks" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 52 + }, + "id": 24, + "pluginVersion": "v11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "sum by (job,opensearch_cluster) (opensearch_threadpool_tasks_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "format": "time_series", + "instant": false, + "legendFormat": "{{node}}: Threadpool tasks", + "refId": "Threadpool tasks" + } + ], + "title": "Thread pool tasks", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "opensearch" + ], + "templating": { + "list": [ + { + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", + "type": "datasource" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Opensearch_cluster", + "multi": true, + "name": "opensearch_cluster", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Node", + "multi": true, + "name": "node", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timezone": "default", + "title": "OpenSearch Node Overview", + "uid": "opensearch-node-overview" + } \ No newline at end of file diff --git a/opensearch-mixin/dashboards_out/search-and-index-overview.json b/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json similarity index 52% rename from opensearch-mixin/dashboards_out/search-and-index-overview.json rename to opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json index e52086683..4ead34a3c 100644 --- a/opensearch-mixin/dashboards_out/search-and-index-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json @@ -1,87 +1,55 @@ { + "annotations": { + "list": [ ] + }, + "editable": false, + "id": null, "links": [ { - "asDropdown": false, + "keepTime": true, + "title": "Opensearch Logs", + "type": "link", + "url": "/d/opensearch-logs" + }, + { + "keepTime": true, + "title": "Opensearch Cluster Overview", + "type": "link", + "url": "/d/opensearch-cluster-overview" + }, + { + "asDropdown": true, "includeVars": true, "keepTime": true, "tags": [ - "opensearch-mixin" + "opensearch" ], - "title": "Other Opensearch dashboards", + "title": "All dashboards", "type": "dashboards" } ], "panels": [ { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, "y": 0 }, "id": 1, - "targets": [ ], - "title": "Request performance", + "panels": [ ], + "title": "Search Performance", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Rate of fetch, scroll, and query requests by selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "reqps" }, "overrides": [ @@ -115,44 +83,47 @@ }, "id": 2, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "none" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_search_query_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_search_query_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - query" + "legendFormat": "{{index}} - query", + "refId": "Search queries in-flight" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_search_fetch_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_search_fetch_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - fetch" + "legendFormat": "{{index}} - fetch", + "refId": "Search fetch in-flight" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_search_scroll_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_search_scroll_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - scroll" + "legendFormat": "{{index}} - scroll", + "refId": "Search scroll in-flight" } ], "title": "Request rate", @@ -160,58 +131,12 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Latency of fetch, scroll, and query requests by selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "s" }, "overrides": [ @@ -245,47 +170,50 @@ }, "id": 3, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "none" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_query_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\"}[$__interval:]) / clamp_min(increase(opensearch_index_search_query_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]), 1))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}} - query" + "legendFormat": "{{index}} - query", + "refId": "Search query latency (avg)" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_fetch_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]), 1))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}} - fetch" + "legendFormat": "{{index}} - fetch", + "refId": "Search fetch latency (avg)" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_scroll_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_search_scroll_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]), 1))", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}} - scroll" + "legendFormat": "{{index}} - scroll", + "refId": "Search scroll latency (avg)" } ], "title": "Request latency", @@ -293,58 +221,12 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Ratio of query cache and request cache hits and misses.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "percent" }, "overrides": [ @@ -378,35 +260,35 @@ }, "id": 4, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "none" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (100 * (opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}) / clamp_min(opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"} + opensearch_index_requestcache_miss_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}, 1))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - request" + "legendFormat": "{{index}} - request", + "refId": "Request cache hit rate %%" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (100 * (opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}) / clamp_min(opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"} + opensearch_index_querycache_miss_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}, 1))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - query" + "legendFormat": "{{index}} - query", + "refId": "Query cache hit rate %%" } ], "title": "Cache hit ratio", @@ -414,47 +296,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Total evictions count by cache type for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -499,47 +347,50 @@ }, "id": 5, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "none" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_querycache_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]))", + "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_querycache_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}} - query cache" + "legendFormat": "{{index}} - query cache", + "refId": "Query cache evictions" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_requestcache_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]))", + "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_requestcache_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}} - request cache" + "legendFormat": "{{index}} - request cache", + "refId": "Request cache evictions" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_fielddata_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]))", + "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_fielddata_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}} - field data" + "legendFormat": "{{index}} - field data", + "refId": "Fielddata evictions" } ], "title": "Evictions", @@ -547,74 +398,25 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, "y": 9 }, "id": 6, - "targets": [ ], - "title": "Index performance", + "panels": [ ], + "title": "Indexing Performance", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Rate of indexed documents for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, "unit": "documents/s" }, "overrides": [ @@ -647,27 +449,19 @@ "y": 10 }, "id": 7, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_indexing_index_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_indexing_index_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\",context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Indexing current" } ], "title": "Index rate", @@ -675,47 +469,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Document indexing latency for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -728,29 +488,7 @@ ] }, "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -759,28 +497,19 @@ "y": 10 }, "id": 8, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}[$__interval:]),1))", + "expr": "avg by(job,opensearch_cluster) (increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=~\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=~\"total\"}[$__interval:]),1))", "format": "time_series", + "instant": false, "interval": "1m", - "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{node}}: Indexing latency (avg)", + "refId": "Indexing latency (avg)" } ], "title": "Index latency", @@ -788,47 +517,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Number of indexing failures for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -872,28 +567,20 @@ "y": 10 }, "id": 9, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_indexing_index_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]))", + "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_indexing_index_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\",context=\"total\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", + "instant": false, "interval": "1m", "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Indexing failed (avg)" } ], "title": "Index failures", @@ -901,47 +588,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Index flush latency for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -985,27 +638,19 @@ "y": 10 }, "id": 10, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_flush_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_flush_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]),1))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Flush latency (avg)" } ], "title": "Flush latency", @@ -1013,47 +658,16 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Index merge time for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "points", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "drawStyle": "points" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1098,44 +712,47 @@ }, "id": 11, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "none" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - total" + "legendFormat": "{{index}} - total", + "refId": "Merge time increase" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_stopped_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - stopped" + "legendFormat": "{{index}} - stopped", + "refId": "Merge stopped time increase" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_throttled_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - throttled" + "legendFormat": "{{index}} - throttled", + "refId": "Merge throttled time increase" } ], "title": "Merge time", @@ -1143,47 +760,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Index refresh latency for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1227,27 +810,19 @@ "y": 18 }, "id": 12, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_refresh_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_refresh_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]),1))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Refresh latency (avg)" } ], "title": "Refresh latency", @@ -1255,47 +830,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Current number of translog operations for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1339,27 +880,19 @@ "y": 18 }, "id": 13, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_translog_operations_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_translog_operations_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Translog operations" } ], "title": "Translog operations", @@ -1367,47 +900,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Rate of documents deleted for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1451,27 +950,19 @@ "y": 18 }, "id": 14, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_indexing_delete_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_indexing_delete_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Indexing delete current" } ], "title": "Docs deleted", @@ -1479,63 +970,26 @@ }, { "collapsed": false, - "datasource": { - "uid": "${prometheus_datasource}" - }, "gridPos": { "h": 1, - "w": 24, + "w": 0, "x": 0, "y": 26 }, "id": 15, - "targets": [ ], - "title": "Index capacity", + "panels": [ ], + "title": "Index Capacity", "type": "row" }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Number of indexed documents for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1579,27 +1033,19 @@ "y": 27 }, "id": 16, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Indexing count (avg)" } ], "title": "Documents indexed", @@ -1607,47 +1053,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Current number of segments for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1691,27 +1103,19 @@ "y": 27 }, "id": 17, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_segments_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_segments_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Segments number" } ], "title": "Segment count", @@ -1719,47 +1123,16 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Number of merge operations for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "points", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "drawStyle": "points" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1803,27 +1176,19 @@ "y": 27 }, "id": 18, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_docs_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", + "expr": "avg by(job,opensearch_cluster) (increase(opensearch_index_merges_total_docs_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=\"total\"}[$__interval:])) > 0", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{node}}: Merge docs increase", + "refId": "Merge docs increase" } ], "title": "Merge count", @@ -1831,47 +1196,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Size of query cache and request cache.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -1916,35 +1247,35 @@ }, "id": 19, "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, "tooltip": { - "mode": "multi", - "sort": "none" + "mode": "multi" } }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_querycache_memory_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster) (\n opensearch_index_querycache_memory_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - query" + "legendFormat": "{{opensearch_cluster}}: Query cache memory bytes", + "refId": "Query cache memory bytes" }, { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_requestcache_memory_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster) (\n opensearch_index_requestcache_memory_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}} - request" + "legendFormat": "{{opensearch_cluster}}: Request cache memory bytes", + "refId": "Request cache memory bytes" } ], "title": "Cache size", @@ -1952,47 +1283,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Size of the store in bytes for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -2036,27 +1333,19 @@ "y": 35 }, "id": 20, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_store_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_store_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Store size bytes" } ], "title": "Store size", @@ -2064,47 +1353,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Memory used by segments for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -2148,27 +1403,19 @@ "y": 35 }, "id": 21, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_segments_memory_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"})", + "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_segments_memory_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Segments memory bytes" } ], "title": "Segment size", @@ -2176,47 +1423,16 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "Size of merge operations in bytes for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "points", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "drawStyle": "points" }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -2260,27 +1476,19 @@ "y": 35 }, "id": 22, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_merges_current_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}) > 0", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{index}}", + "refId": "Merge current size bytes" } ], "title": "Merge size", @@ -2288,47 +1496,13 @@ }, { "datasource": { - "uid": "${prometheus_datasource}" + "type": "datasource", + "uid": "-- Mixed --" }, "description": "The number of index shards for the selected index.", "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [ ], "thresholds": { - "mode": "absolute", "steps": [ { "color": "green", @@ -2372,37 +1546,29 @@ "y": 35 }, "id": 23, - "options": { - "legend": { - "calcs": [ ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { + "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (index) (avg by(job,opensearch_cluster,index) (opensearch_index_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", type=~\"active|active_primary\"}))", + "expr": "sum by (index) (avg by(job,opensearch_cluster) (opensearch_index_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", type=~\"active|active_primary\"}))", "format": "time_series", + "instant": false, "intervalFactor": 2, - "legendFormat": "{{index}}" + "legendFormat": "{{ index }}", + "refId": "Active shards per index" } ], "title": "Shard count", "type": "timeseries" } ], - "refresh": "1m", - "schemaVersion": 36, + "refresh": "30s", + "schemaVersion": 39, "tags": [ - "opensearch-mixin" + "opensearch" ], "templating": { "list": [ @@ -2410,7 +1576,7 @@ "label": "Prometheus data source", "name": "prometheus_datasource", "query": "prometheus", - "regex": "", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", "type": "datasource" }, { @@ -2423,7 +1589,7 @@ "label": "Job", "multi": true, "name": "job", - "query": "label_values(opensearch_index_search_fetch_count{opensearch_cluster!=\"\"}, job)", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\"}, job)", "refresh": 2, "sort": 1, "type": "query" @@ -2438,7 +1604,7 @@ "label": "Opensearch_cluster", "multi": true, "name": "opensearch_cluster", - "query": "label_values(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", "refresh": 2, "sort": 1, "type": "query" @@ -2450,21 +1616,29 @@ "uid": "${prometheus_datasource}" }, "includeAll": true, - "label": "Index", + "label": "Node", "multi": true, - "name": "index", - "query": "label_values(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, index)", + "name": "node", + "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", "refresh": 2, "sort": 1, "type": "query" + }, + { + "hide": 2, + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "type": "datasource" } ] }, "time": { - "from": "now-1h", + "from": "now-30m", "to": "now" }, "timezone": "default", - "title": "OpenSearch search and index overview", + "title": "OpenSearch Search and Index Overview", "uid": "opensearch-search-and-index-overview" } \ No newline at end of file diff --git a/opensearch-mixin/g.libsonnet b/opensearch-mixin/g.libsonnet index 6da9f4eef..e6a2060ee 100644 --- a/opensearch-mixin/g.libsonnet +++ b/opensearch-mixin/g.libsonnet @@ -1 +1 @@ -import 'github.com/grafana/grafonnet/gen/grafonnet-v10.0.0/main.libsonnet' +import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet' diff --git a/opensearch-mixin/jsonnetfile.json b/opensearch-mixin/jsonnetfile.json index 53d0be67e..7205eeac9 100644 --- a/opensearch-mixin/jsonnetfile.json +++ b/opensearch-mixin/jsonnetfile.json @@ -1,33 +1,51 @@ { "version": 1, "dependencies": [ - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" - } + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet-lib.git", + "subdir": "grafonnet" + } + }, + "version": "master" }, - "version": "master" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/jsonnet-libs.git", - "subdir": "common-lib" - } + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-v11.4.0" + } + }, + "version": "main" }, - "version": "master" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet.git", - "subdir": "gen/grafonnet-v10.0.0" - } + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "common-lib" + } + }, + "version": "master" }, - "version": "main" - } + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "grafana-cloud-integration-utils" + } + }, + "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "logs-lib" + } + }, + "version": "master" + } ], "legacyImports": true } diff --git a/opensearch-mixin/links.libsonnet b/opensearch-mixin/links.libsonnet new file mode 100644 index 000000000..867b91809 --- /dev/null +++ b/opensearch-mixin/links.libsonnet @@ -0,0 +1,33 @@ +local g = import './g.libsonnet'; + +{ + local link = g.dashboard.link, + new(this): + { + opensearchClusterOverview: + link.link.new('Opensearch Cluster Overview', '/d/' + this.grafana.dashboards['opensearch-cluster-overview.json'].uid) + + link.link.options.withKeepTime(true), + + // opensearchNodeOverview: + // link.link.new('Opensearch Node Overview', '/d/' + this.grafana.dashboards['opensearch-node-overview.json'].uid) + // + link.link.options.withKeepTime(true), + + // opensearchSearchAndIndexOverview: + // link.link.new('Opensearch Search and Index Overview', '/d/' + this.grafana.dashboards['opensearch-search-and-index-overview.json'].uid) + // + link.link.options.withKeepTime(true), + + otherDashboards: + link.dashboards.new('All dashboards', this.config.dashboardTags) + + link.dashboards.options.withIncludeVars(true) + + link.dashboards.options.withKeepTime(true) + + link.dashboards.options.withAsDropdown(true), + } + + + if this.config.enableLokiLogs then + { + logs: + link.link.new('Opensearch Logs', '/d/' + this.grafana.dashboards['opensearch-logs.json'].uid) + + link.link.options.withKeepTime(true), + } + else {}, +} diff --git a/opensearch-mixin/main.libsonnet b/opensearch-mixin/main.libsonnet new file mode 100644 index 000000000..598ae832b --- /dev/null +++ b/opensearch-mixin/main.libsonnet @@ -0,0 +1,48 @@ +local alerts = import './alerts.libsonnet'; +local config = import './config.libsonnet'; +local dashboards = import './dashboards.libsonnet'; +local g = import './g.libsonnet'; +local links = import './links.libsonnet'; +local panels = import './panels.libsonnet'; +local rows = import './rows.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; + +{ + withConfigMixin(config): { + config+: config, + }, + + new(): { + local this = self, + config: config, + signals: + { + [sig]: commonlib.signals.unmarshallJsonMulti( + this.config.signals[sig], + type=this.config.metricsSource + ) + for sig in std.objectFields(this.config.signals) + }, + + grafana: { + variables: commonlib.variables.new( + filteringSelector=this.config.filteringSelector, + groupLabels=this.config.groupLabels, + instanceLabels=this.config.instanceLabels, + varMetric='opensearch_cluster_status', + customAllValue='.+', + enableLokiLogs=this.config.enableLokiLogs, + ), + annotations: {}, + links: links.new(this), + panels: panels.new(this), + dashboards: dashboards.new(this), + rows: rows.new(this), + }, + + prometheus: { + alerts: alerts.new(this), + recordingRules: {}, + }, + }, +} diff --git a/opensearch-mixin/mixin.libsonnet b/opensearch-mixin/mixin.libsonnet index 4d987cf31..d28e80b23 100644 --- a/opensearch-mixin/mixin.libsonnet +++ b/opensearch-mixin/mixin.libsonnet @@ -1,3 +1,31 @@ -(import 'dashboards/dashboards.libsonnet') + -(import 'alerts/alerts.libsonnet') + -(import 'config.libsonnet') +local mixinlib = import './main.libsonnet'; +local config = (import './config.libsonnet'); +local util = import 'grafana-cloud-integration-utils/util.libsonnet'; + + +local mixin = mixinlib.new() + + mixinlib.withConfigMixin( + { + filteringSelecter: config.filteringSelector, + uid: config.uid, + enableLokiLogs: true, + } + ); + +local label_patch = { + cluster+: { + allValue: '.*', + }, +}; + +{ + grafanaDashboards+:: { + [fname]: + local dashboard = util.decorate_dashboard(mixin.grafana.dashboards[fname], tags=config.dashboardTags); + dashboard + util.patch_variables(dashboard, label_patch) + + for fname in std.objectFields(mixin.grafana.dashboards) + }, + prometheusAlerts+:: mixin.prometheus.alerts, + prometheusRules+:: mixin.prometheus.recordingRules, +} diff --git a/opensearch-mixin/panels.libsonnet b/opensearch-mixin/panels.libsonnet index 42bc830bc..1b2e9e165 100644 --- a/opensearch-mixin/panels.libsonnet +++ b/opensearch-mixin/panels.libsonnet @@ -1,228 +1,1039 @@ -// variables.libsonnet local g = import './g.libsonnet'; local var = g.dashboard.variable; local commonlib = import 'common-lib/common/main.libsonnet'; local utils = commonlib.utils; { - new( - groupLabels, - instanceLabels, - variables, - ): { - - local promDatasource = { - uid: '${%s}' % variables.datasources.prometheus.name, - }, - osRolesTimeline: - g.panel.statusHistory.new('Roles timeline') - + g.panel.statusHistory.panelOptions.withDescription('OpenSearch node roles over time.') - + g.panel.statusHistory.options.withShowValue('never') - + g.panel.statusHistory.options.withLegend(false) - + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) - + g.panel.statusHistory.queryOptions.withTargets( - [ - g.query.prometheus.new( - promDatasource.uid, - ||| - max by (node,role) ( - max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="data"}[1m]) == 1 - ) * 2 - ||| - % { - queriesSelector: variables.queriesSelector, - }, - ) - + g.query.prometheus.withLegendFormat('{{node}}'), - g.query.prometheus.new( - promDatasource.uid, - ||| - max by (node,role) ( - max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="master"}[1m]) == 1 - ) * 3 - ||| - % { queriesSelector: variables.queriesSelector }, - ) - + g.query.prometheus.withLegendFormat('{{node}}'), - g.query.prometheus.new( - promDatasource.uid, - ||| - max by (node,role) ( - max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="ingest"}[1m]) == 1 - ) * 4 - ||| - % { queriesSelector: variables.queriesSelector }, - ) - + g.query.prometheus.withLegendFormat('{{node}}'), - g.query.prometheus.new( - promDatasource.uid, - ||| - max by (node,role) ( - max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="cluster_manager"}[1m]) == 1 - ) * 5 - ||| - % { queriesSelector: variables.queriesSelector }, - ) - + g.query.prometheus.withLegendFormat('{{node}}'), - g.query.prometheus.new( - promDatasource.uid, - ||| - max by (node,role) ( - max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="remote_cluster_client"}[1m]) == 1 - ) * 6 - ||| - % { queriesSelector: variables.queriesSelector }, - ) - + g.query.prometheus.withLegendFormat('{{node}}'), - ] - ) - + g.panel.statusHistory.standardOptions.withMappings([ - { - type: 'value', - options: { - '2': { - color: 'light-purple', - index: 0, - text: 'data', - }, - '3': { - color: 'light-green', - index: 1, - text: 'master', - }, - '4': { - color: 'light-blue', - index: 2, - text: 'ingest', - }, - '5': { - text: 'cluster_manager', - color: 'light-yellow', - index: 3, + new(this):: + { + local signals = this.signals, + + osRoles: + g.panel.table.new('Roles') + + g.panel.table.panelOptions.withDescription('OpenSearch node roles.') + + g.panel.table.queryOptions.withTargets([ + signals.roles.node_role_last_seen.asTarget() + + g.query.prometheus.withInstant(true), + ]) + + g.panel.table.queryOptions.withTransformations([ + {id: 'labelsToFields', options: {mode: 'columns', valueLabel: 'role'}}, + {id: 'merge', options: {}}, + { + id: 'organize', + options: { + excludeByName: {Time: true}, + indexByName: { + Time: 0, node: 3, nodeid: 3, master: 104, data: 105, + ingest: 106, remote_cluster_client: 107, cluster_manager: 108, + } + {[k]: 3 for k in this.config.groupLabels + this.config.instanceLabels}, + renameByName: { + Time: '', cluster: 'Cluster', cluster_manager: 'Cluster manager', + data: 'Data', ingest: 'Ingest', master: 'Master', + node: 'Node', nodeid: 'Nodeid', remote_cluster_client: 'Remote cluster client', + }, }, - '6': { - text: 'remote_cluster_client', - color: 'super-light-red', - index: 4, + }, + ]) + + g.panel.table.standardOptions.withMappings([ + g.panel.table.standardOptions.mapping.ValueMap.withType() + + g.panel.table.standardOptions.mapping.ValueMap.withOptions({ + '0': {color: 'super-light-orange', index: 5, text: 'False'}, + '1': {color: 'light-green', index: 3, text: 'True'}, + Data: {color: 'light-purple', index: 0, text: 'data'}, + Ingest: {color: 'light-blue', index: 2, text: 'ingest'}, + Master: {color: 'light-green', index: 1, text: 'master'}, + 'Remote cluster client': {color: 'light-orange', index: 4, text: 'remote_cluster_client'}, + }), + ]) + + g.panel.table.standardOptions.withOverrides([ + g.panel.table.fieldOverride.byRegexp.new('/Data|Master|Ingest|Remote.+|Cluster.+/') + + g.panel.table.fieldOverride.byRegexp.withProperty('custom.cellOptions', {type: 'color-text'}), + ]), + + osRolesTimeline: + g.panel.statusHistory.new('Roles timeline') + + g.panel.statusHistory.panelOptions.withDescription('OpenSearch node roles over time.') + + g.panel.statusHistory.options.withShowValue('never') + + g.panel.statusHistory.options.withLegend(false) + + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) + + g.panel.statusHistory.queryOptions.withTargets([ + signals.roles.node_role_data.asTarget(), + signals.roles.node_role_master.asTarget(), + signals.roles.node_role_ingest.asTarget(), + signals.roles.node_role_cluster_manager.asTarget(), + signals.roles.node_role_remote_cluster_client.asTarget(), + ]) + + g.panel.statusHistory.standardOptions.withMappings([ + { + type: 'value', + options: { + '2': {color: 'light-purple', index: 0, text: 'data'}, + '3': {color: 'light-green', index: 1, text: 'master'}, + '4': {color: 'light-blue', index: 2, text: 'ingest'}, + '5': {color: 'light-yellow', index: 3, text: 'cluster_manager'}, + '6': {color: 'super-light-red', index: 4, text: 'remote_cluster_client'}, }, }, - }, - ]), - - osRoles: - g.panel.table.new('Roles') - + g.panel.table.panelOptions.withDescription('OpenSearch node roles.') - + g.panel.table.queryOptions.withTargets([ - g.query.prometheus.new( - promDatasource.uid, - 'max by (%(agg)s) (last_over_time(opensearch_node_role_bool{%(queriesSelector)s}[1d]))' - % { - queriesSelector: variables.queriesSelector, - agg: std.join(',', groupLabels + instanceLabels + ['node', 'nodeid', 'role', 'primary_ip']), + ]), + + // Cluster Overview Panels + clusterStatusPanel: + g.panel.stat.new('Cluster status') + + g.panel.stat.panelOptions.withDescription('The overall health and availability of the OpenSearch cluster.') + + g.panel.stat.queryOptions.withTargets([ + signals.cluster.cluster_status.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.withMappings([ + g.panel.stat.standardOptions.mapping.ValueMap.withType() + + g.panel.stat.standardOptions.mapping.ValueMap.withOptions({ + '0': {index: 0, text: 'Green'}, + '1': {index: 1, text: 'Yellow'}, + '2': {index: 2, text: 'Red'}, + }), + ]) + + g.panel.stat.standardOptions.thresholds.withSteps([ + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(null), + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(0), + g.panel.stat.standardOptions.threshold.step.withColor('yellow') + + g.panel.stat.standardOptions.threshold.step.withValue(1), + g.panel.stat.standardOptions.threshold.step.withColor('red') + + g.panel.stat.standardOptions.threshold.step.withValue(2), + ]) + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + nodeCountPanel: + g.panel.stat.new('Node count') + + g.panel.stat.panelOptions.withDescription('The number of running nodes across the OpenSearch cluster.') + + g.panel.stat.queryOptions.withTargets([ + signals.cluster.cluster_nodes_number.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.thresholds.withSteps([ + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(null), + g.panel.stat.standardOptions.threshold.step.withColor('red') + + g.panel.stat.standardOptions.threshold.step.withValue(0), + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(1), + ]) + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + dataNodeCountPanel: + g.panel.stat.new('Data node count') + + g.panel.stat.panelOptions.withDescription('The number of data nodes in the OpenSearch cluster.') + + g.panel.stat.queryOptions.withTargets([ + signals.cluster.cluster_datanodes_number.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.thresholds.withSteps([ + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(null), + g.panel.stat.standardOptions.threshold.step.withColor('red') + + g.panel.stat.standardOptions.threshold.step.withValue(0), + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(1), + ]) + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + shardCountPanel: + g.panel.stat.new('Shard count') + + g.panel.stat.panelOptions.withDescription('The number of shards in the OpenSearch cluster across all indices.') + + g.panel.stat.queryOptions.withTargets([ + signals.cluster.cluster_shards_number_total.withExprWrappersMixin(['sum(', ')']).asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.thresholds.withSteps([ + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(null), + g.panel.stat.standardOptions.threshold.step.withColor('red') + + g.panel.stat.standardOptions.threshold.step.withValue(0), + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(1), + ]) + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + activeShardsPercentagePanel: + g.panel.stat.new('Active shards %') + + g.panel.stat.panelOptions.withDescription('Percent of active shards across the OpenSearch cluster.') + + g.panel.stat.queryOptions.withTargets([ + signals.cluster.cluster_shards_active_percent.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.stat.standardOptions.color.withMode('thresholds') + + g.panel.stat.standardOptions.thresholds.withSteps([ + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(null), + g.panel.stat.standardOptions.threshold.step.withColor('red') + + g.panel.stat.standardOptions.threshold.step.withValue(0), + g.panel.stat.standardOptions.threshold.step.withColor('yellow') + + g.panel.stat.standardOptions.threshold.step.withValue(1), + g.panel.stat.standardOptions.threshold.step.withColor('green') + + g.panel.stat.standardOptions.threshold.step.withValue(100), + ]) + + g.panel.stat.standardOptions.withUnit('percent') + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + topNodesByCPUUsagePanel: + g.panel.barGauge.new('Top nodes by CPU usage') + + g.panel.barGauge.panelOptions.withDescription('Top nodes by OS CPU usage across the OpenSearch cluster.') + + g.panel.barGauge.queryOptions.withTargets([ + signals.topk.os_cpu_percent_topk.withExprWrappersMixin(['topk(10, sort_desc(', ')']).asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.barGauge.standardOptions.color.withMode('thresholds') + + g.panel.barGauge.standardOptions.thresholds.withSteps([ + g.panel.barGauge.standardOptions.threshold.step.withColor('green') + + g.panel.barGauge.standardOptions.threshold.step.withValue(null), + g.panel.barGauge.standardOptions.threshold.step.withColor('red') + + g.panel.barGauge.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.barGauge.standardOptions.withMin(0) + + g.panel.barGauge.standardOptions.withMax(100) + + g.panel.barGauge.standardOptions.withUnit('percent') + + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']), + + breakersTrippedPanel: + g.panel.barGauge.new('Breakers tripped') + + g.panel.barGauge.panelOptions.withDescription('The total count of circuit breakers tripped across the OpenSearch cluster.') + + g.panel.barGauge.queryOptions.withTargets([ + signals.topk.circuitbreaker_tripped_count_sum.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.barGauge.standardOptions.color.withMode('thresholds') + + g.panel.barGauge.standardOptions.thresholds.withSteps([ + g.panel.barGauge.standardOptions.threshold.step.withColor('green') + + g.panel.barGauge.standardOptions.threshold.step.withValue(null), + g.panel.barGauge.standardOptions.threshold.step.withColor('red') + + g.panel.barGauge.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.barGauge.standardOptions.withUnit('trips') + + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']), + + shardStatusPanel: + g.panel.barGauge.new('Shard status') + + g.panel.barGauge.panelOptions.withDescription('Shard status counts across the OpenSearch cluster.') + + g.panel.barGauge.queryOptions.withTargets([ + signals.cluster.cluster_shards_number_by_type.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.barGauge.standardOptions.color.withMode('thresholds') + + g.panel.barGauge.standardOptions.thresholds.withSteps([ + g.panel.barGauge.standardOptions.threshold.step.withColor('green') + + g.panel.barGauge.standardOptions.threshold.step.withValue(null), + g.panel.barGauge.standardOptions.threshold.step.withColor('red') + + g.panel.barGauge.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.barGauge.standardOptions.withUnit('shards') + + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']), + + topNodesByDiskUsagePanel: + g.panel.barGauge.new('Top nodes by disk usage') + + g.panel.barGauge.panelOptions.withDescription('Top nodes by disk usage across the OpenSearch cluster.') + + g.panel.barGauge.queryOptions.withTargets([ + signals.topk.fs_path_used_percent_topk.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.barGauge.standardOptions.color.withMode('thresholds') + + g.panel.barGauge.standardOptions.thresholds.withSteps([ + g.panel.barGauge.standardOptions.threshold.step.withColor('green') + + g.panel.barGauge.standardOptions.threshold.step.withValue(null), + g.panel.barGauge.standardOptions.threshold.step.withColor('red') + + g.panel.barGauge.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.barGauge.standardOptions.withMin(0) + + g.panel.barGauge.standardOptions.withMax(100) + + g.panel.barGauge.standardOptions.withUnit('percent') + + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']), + + totalDocumentsPanel: + g.panel.timeSeries.new('Total documents') + + g.panel.timeSeries.panelOptions.withDescription('The total count of documents indexed across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.cluster.indices_indexing_index_count_avg.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('documents'), + + pendingTasksPanel: + g.panel.timeSeries.new('Pending tasks') + + g.panel.timeSeries.panelOptions.withDescription('The number of tasks waiting to be executed across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.cluster.cluster_pending_tasks_number.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('tasks'), + + storeSizePanel: + g.panel.timeSeries.new('Store size') + + g.panel.timeSeries.panelOptions.withDescription('The total size of the store across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.cluster.indices_store_size_bytes_avg.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes'), + + maxTaskWaitTimePanel: + g.panel.timeSeries.new('Max task wait time') + + g.panel.timeSeries.panelOptions.withDescription('The max wait time for tasks to be executed across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([signals.cluster.cluster_task_max_wait_seconds.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + topIndicesByRequestRatePanel: + g.panel.timeSeries.new('Top indices by request rate') + + g.panel.timeSeries.panelOptions.withDescription('Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([signals.topk.search_current_inflight_topk.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + topIndicesByRequestLatencyPanel: + g.panel.timeSeries.new('Top indices by request latency') + + g.panel.timeSeries.panelOptions.withDescription('Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.search_avg_latency_topk.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + topIndicesByCombinedCacheHitRatioPanel: + g.panel.timeSeries.new('Top indices by combined cache hit ratio') + + g.panel.timeSeries.panelOptions.withDescription('Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.request_query_cache_hit_rate_topk.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('percent'), + + topNodesByIngestRatePanel: + g.panel.timeSeries.new('Top nodes by ingest rate') + + g.panel.timeSeries.panelOptions.withDescription('Top nodes by rate of ingest across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.ingest_throughput_topk.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('Bps'), + + topNodesByIngestLatencyPanel: + g.panel.timeSeries.new('Top nodes by ingest latency') + + g.panel.timeSeries.panelOptions.withDescription('Top nodes by ingestion latency across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.ingest_latency_topk.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + topNodesByIngestErrorsPanel: + g.panel.timeSeries.new('Top nodes by ingest errors') + + g.panel.timeSeries.panelOptions.withDescription('Top nodes by ingestion failures across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.ingest_failures_topk.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('errors'), + + topIndicesByIndexRatePanel: + g.panel.timeSeries.new('Top indices by index rate') + + g.panel.timeSeries.panelOptions.withDescription('Top indices by rate of document indexing across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.indexing_current_topk.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('documents/s'), + + topIndicesByIndexLatencyPanel: + g.panel.timeSeries.new('Top indices by index latency') + + g.panel.timeSeries.panelOptions.withDescription('Top indices by indexing latency across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.indexing_latency_topk.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s'), + + topIndicesByIndexFailuresPanel: + g.panel.timeSeries.new('Top indices by index failures') + + g.panel.timeSeries.panelOptions.withDescription('Top indices by index document failures across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.topk.indexing_failed_topk.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('failures'), + + // Node Overview Panels - Refactored to use modern patterns and signals + + // Node CPU usage + nodeCpuUsage: + g.panel.timeSeries.new('Node CPU usage') + + g.panel.timeSeries.panelOptions.withDescription('CPU usage percentage of the node\'s Operating System.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.os_cpu_percent.asTarget()]) + + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + + g.panel.timeSeries.standardOptions.withDecimals(1) + + g.panel.timeSeries.standardOptions.withMax(100) + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Node memory usage + nodeMemoryUsage: + g.panel.timeSeries.new('Node memory usage') + + g.panel.timeSeries.panelOptions.withDescription('Memory usage percentage of the node for the Operating System and OpenSearch') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.os_mem_used_percent.asTarget()]) + + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + + g.panel.timeSeries.standardOptions.withDecimals(1) + + g.panel.timeSeries.standardOptions.withMax(100) + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Node I/O + nodeIO: + g.panel.timeSeries.new('Node I/O') + + g.panel.timeSeries.panelOptions.withDescription('Node file system read and write data.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.node.fs_read_bps.asTarget(), + signals.node.fs_write_bps.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('Bps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(1) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('opacity') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byRegexp.new('/time|used|busy|util/') + + g.panel.timeSeries.fieldOverride.byRegexp.withProperty('custom.axisSoftMax', 100) + + g.panel.timeSeries.fieldOverride.byRegexp.withProperty('custom.drawStyle', 'points') + + g.panel.timeSeries.fieldOverride.byRegexp.withProperty('unit', 'percent'), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Node open connections + nodeOpenConnections: + g.panel.timeSeries.new('Node open connections') + + g.panel.timeSeries.panelOptions.withDescription('Number of open connections for the selected node.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.transport_open_connections.asTarget()]) + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(30) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('opacity') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Node disk usage + nodeDiskUsage: + g.panel.timeSeries.new('Node disk usage') + + g.panel.timeSeries.panelOptions.withDescription('Disk usage percentage of the selected node.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.fs_used_percent.asTarget()]) + + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + + g.panel.timeSeries.standardOptions.withDecimals(1) + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withMax(100) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(1) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Node memory swap + nodeMemorySwap: + g.panel.timeSeries.new('Node memory swap') + + g.panel.timeSeries.panelOptions.withDescription('Percentage of swap space used by OpenSearch and the Operating System on the selected node.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.os_swap_used_percent.asTarget()]) + + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + + g.panel.timeSeries.standardOptions.withDecimals(1) + + g.panel.timeSeries.standardOptions.withMin(0) + + g.panel.timeSeries.standardOptions.withMax(100) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.options.tooltip.withMode('multi') + + g.panel.timeSeries.options.tooltip.withSort('desc'), + + // Node network traffic + nodeNetworkTraffic: + g.panel.timeSeries.new('Node network traffic') + + g.panel.timeSeries.panelOptions.withDescription('Network traffic on the node\'s Operating System.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.node.transport_rx_bps.asTarget(), + signals.node.transport_tx_bps.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('Bps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // Circuit breakers + circuitBreakers: + g.panel.timeSeries.new('Circuit breakers') + + g.panel.timeSeries.panelOptions.withDescription('Circuit breakers tripped on the selected node by type') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.node.circuitbreaker_tripped_sum_by_name.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('trips'), + + // JVM heap used vs committed + jvmHeapUsedVsCommitted: + g.panel.timeSeries.new('JVM heap used vs committed') + + g.panel.timeSeries.panelOptions.withDescription('JVM heap memory usage vs committed.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.node.jvm_heap_used_bytes.asTarget(), + signals.node.jvm_heap_committed_bytes.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // JVM non-heap used vs committed + jvmNonheapUsedVsCommitted: + g.panel.timeSeries.new('JVM non-heap used vs committed') + + g.panel.timeSeries.panelOptions.withDescription('JVM non-heap memory usage vs committed.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.node.jvm_nonheap_used_bytes.asTarget(), + signals.node.jvm_nonheap_committed_bytes.asTarget(), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // JVM threads + jvmThreads: + g.panel.timeSeries.new('JVM threads') + + g.panel.timeSeries.panelOptions.withDescription('JVM thread count.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_threads.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('threads') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // JVM buffer pools + jvmBufferPools: + g.panel.timeSeries.new('JVM buffer pools') + + g.panel.timeSeries.panelOptions.withDescription('JVM buffer pool usage.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_bufferpool_number.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // JVM uptime + jvmUptime: + g.panel.timeSeries.new('JVM uptime') + + g.panel.timeSeries.panelOptions.withDescription('JVM uptime in seconds.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_uptime.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // JVM garbage collections + jvmGarbageCollections: + g.panel.timeSeries.new('JVM garbage collections') + + g.panel.timeSeries.panelOptions.withDescription('JVM garbage collection count.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_gc_collections.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('collections') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // JVM garbage collection time + jvmGarbageCollectionTime: + g.panel.timeSeries.new('JVM garbage collection time') + + g.panel.timeSeries.panelOptions.withDescription('JVM garbage collection time in milliseconds.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_gc_time.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('ms') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), + + // JVM buffer pool usage + jvmBufferPoolUsage: + g.panel.timeSeries.new('JVM buffer pool usage') + + g.panel.timeSeries.panelOptions.withDescription('JVM buffer pool usage by pool.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_bufferpool_used_percent.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // Thread pool threads + threadPoolThreads: + g.panel.timeSeries.new('Thread pool threads') + + g.panel.timeSeries.panelOptions.withDescription('Thread pool thread count.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.threadpool_threads.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('threads') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // Thread pool tasks + threadPoolTasks: + g.panel.timeSeries.new('Thread pool tasks') + + g.panel.timeSeries.panelOptions.withDescription('Thread pool task count.') + + g.panel.timeSeries.queryOptions.withTargets([signals.node.threadpool_tasks.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('tasks') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // Search and Index Overview Panels - Refactored to use modern patterns and signals + // Search Performance Panels + searchRequestRatePanel: + g.panel.timeSeries.new('Request rate') + + g.panel.timeSeries.panelOptions.withDescription('Rate of fetch, scroll, and query requests by selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.search.search_query_current_avg.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.search.search_fetch_current_avg.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.search.search_scroll_current_avg.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.standardOptions.withOverrides([ + { + matcher: {id: 'byValue', options: {reducer: 'allIsZero', op: 'gte', value: 0}}, + properties: [{id: 'custom.hideFrom', value: {tooltip: true, viz: false, legend: true}}], }, - ) - + g.query.prometheus.withLegendFormat(utils.labelsToPanelLegend(instanceLabels)) - + g.query.prometheus.withInstant(true), - ]) - + g.panel.table.standardOptions.withMappings([ - { - options: { - '0': { - color: 'super-light-orange', - index: 5, - text: 'False', - }, - '1': { - color: 'light-green', - index: 3, - text: 'True', - }, - Data: { - color: 'light-purple', - index: 0, - text: 'data', - }, - Ingest: { - color: 'light-blue', - index: 2, - text: 'ingest', - }, - Master: { - color: 'light-green', - index: 1, - text: 'master', - }, - 'Remote cluster client': { - color: 'light-orange', - index: 4, - text: 'remote_cluster_client', - }, + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi'), + + searchRequestLatencyPanel: + g.panel.timeSeries.new('Request latency') + + g.panel.timeSeries.panelOptions.withDescription('Latency of fetch, scroll, and query requests by selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.search.search_query_latency_avg.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + signals.search.search_fetch_latency_avg.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + signals.search.search_scroll_latency_avg.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.standardOptions.withOverrides([ + { + matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, + properties: [{id: 'custom.hideFrom', value: {legend: true, tooltip: true, viz: false}}], }, - type: 'value', - }, - ]) - + g.panel.table.standardOptions.withOverrides([ - { - matcher: { - id: 'byRegexp', - options: '/Data|Master|Ingest|Remote.+|Cluster.+/', + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi'), + + searchCacheHitRatioPanel: + g.panel.timeSeries.new('Cache hit ratio') + + g.panel.timeSeries.panelOptions.withDescription('Ratio of query cache and request cache hits and misses.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.search.request_cache_hit_rate.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.search.query_cache_hit_rate.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.standardOptions.withOverrides([ + { + matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, + properties: [{id: 'custom.hideFrom', value: {legend: true, tooltip: true, viz: false}}], }, - properties: [ - { - id: 'custom.cellOptions', - value: { - type: 'color-text', - }, - }, - ], - }, - ]) - + g.panel.table.queryOptions.withTransformations([ - { - id: 'labelsToFields', - options: { - mode: 'columns', - valueLabel: 'role', + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi'), + + searchCacheEvictionsPanel: + g.panel.timeSeries.new('Evictions') + + g.panel.timeSeries.panelOptions.withDescription('Total evictions count by cache type for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.search.query_cache_evictions.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + signals.search.request_cache_evictions.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + signals.search.fielddata_evictions.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('evictions') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + { + matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, + properties: [{id: 'custom.hideFrom', value: {legend: true, tooltip: true, viz: false}}], }, - }, - { - id: 'merge', - options: {}, - }, - { - id: 'organize', - options: { - excludeByName: { - Time: true, - }, - indexByName: { - Time: 0, // hide time - node: 3, - nodeid: 3, - master: 104, - data: 105, - ingest: 106, - remote_cluster_client: 107, - cluster_manager: 108, - } + { - [k]: 3 - for k in groupLabels + instanceLabels - } - , - renameByName: { - Time: '', - cluster: 'Cluster', - //roles: - cluster_manager: 'Cluster manager', - data: 'Data', - ingest: 'Ingest', - master: 'Master', - node: 'Node', - nodeid: 'Nodeid', - remote_cluster_client: 'Remote cluster client', - }, + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi'), + + // Indexing Performance Panels + indexingRatePanel: + g.panel.timeSeries.new('Index rate') + + g.panel.timeSeries.panelOptions.withDescription('Rate of indexed documents for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.indexing_current.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('documents/s') + + g.panel.timeSeries.standardOptions.withOverrides([ + { + matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, + properties: [{id: 'custom.hideFrom', value: {legend: true, tooltip: true, viz: false}}], }, - }, - ]), - }, + ]), + + indexingLatencyPanel: + g.panel.timeSeries.new('Index latency') + + g.panel.timeSeries.panelOptions.withDescription('Document indexing latency for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.indexing_latency.asTarget() + + g.query.prometheus.withInterval('1m'), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]), + + indexingFailuresPanel: + g.panel.timeSeries.new('Index failures') + + g.panel.timeSeries.panelOptions.withDescription('Number of indexing failures for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.indexing_failed.asTarget() + + g.query.prometheus.withInterval('1m') + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('failures') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + // Index Operations Panels + flushLatencyPanel: + g.panel.timeSeries.new('Flush latency') + + g.panel.timeSeries.panelOptions.withDescription('Index flush latency for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.flush_latency.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + mergeTimePanel: + g.panel.timeSeries.new('Merge time') + + g.panel.timeSeries.panelOptions.withDescription('Index merge time for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.merge_time.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.indexing.merge_stopped_time.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.indexing.merge_throttled_time.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi'), + + refreshLatencyPanel: + g.panel.timeSeries.new('Refresh latency') + + g.panel.timeSeries.panelOptions.withDescription('Index refresh latency for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.refresh_latency.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + // Index Statistics Panels + translogOperationsPanel: + g.panel.timeSeries.new('Translog operations') + + g.panel.timeSeries.panelOptions.withDescription('Current number of translog operations for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.translog_ops.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('operations') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + docsDeletedPanel: + g.panel.timeSeries.new('Docs deleted') + + g.panel.timeSeries.panelOptions.withDescription('Rate of documents deleted for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.indexing_delete_current.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('documents/s') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + documentsIndexedPanel: + g.panel.timeSeries.new('Documents indexed') + + g.panel.timeSeries.panelOptions.withDescription('Number of indexed documents for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.indexing_count.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('documents') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + // Index Structure Panels + segmentCountPanel: + g.panel.timeSeries.new('Segment count') + + g.panel.timeSeries.panelOptions.withDescription('Current number of segments for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.segments_number.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('segments') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + mergeCountPanel: + g.panel.timeSeries.new('Merge count') + + g.panel.timeSeries.panelOptions.withDescription('Number of merge operations for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.merge_docs.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('merges') + + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + // Cache and Memory Panels + cacheSizePanel: + g.panel.timeSeries.new('Cache size') + + g.panel.timeSeries.panelOptions.withDescription('Size of query cache and request cache.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.search.query_cache_memory.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.search.request_cache_memory.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]) + + g.panel.timeSeries.options.tooltip.withMode('multi'), + + searchAndIndexStoreSizePanel: + g.panel.timeSeries.new('Store size') + + g.panel.timeSeries.panelOptions.withDescription('Size of the store in bytes for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.store_size_bytes.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + segmentSizePanel: + g.panel.timeSeries.new('Segment size') + + g.panel.timeSeries.panelOptions.withDescription('Memory used by segments for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.segments_memory_bytes.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + mergeSizePanel: + g.panel.timeSeries.new('Merge size') + + g.panel.timeSeries.panelOptions.withDescription('Size of merge operations in bytes for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.merge_current_size.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + + searchAndIndexShardCountPanel: + g.panel.timeSeries.new('Shard count') + + g.panel.timeSeries.panelOptions.withDescription('The number of index shards for the selected index.') + + g.panel.timeSeries.queryOptions.withTargets([ + signals.indexing.shards_per_index.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + + g.panel.timeSeries.standardOptions.withUnit('shards') + + g.panel.timeSeries.standardOptions.thresholds.withSteps([ + g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), + g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), + ]) + + g.panel.timeSeries.standardOptions.withOverrides([ + g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + ]), + }, } diff --git a/opensearch-mixin/rows.libsonnet b/opensearch-mixin/rows.libsonnet new file mode 100644 index 000000000..a80b2526d --- /dev/null +++ b/opensearch-mixin/rows.libsonnet @@ -0,0 +1,156 @@ +local g = import './g.libsonnet'; + +{ + new(this): { + clusterOverviewRow: + g.panel.row.new('Cluster Overview') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.clusterStatusPanel { gridPos+: { w: 5, h: 6 } }, + this.grafana.panels.nodeCountPanel { gridPos+: { w: 5, h: 6 } }, + this.grafana.panels.dataNodeCountPanel { gridPos+: { w: 5, h: 6 } }, + this.grafana.panels.shardCountPanel { gridPos+: { w: 5, h: 6 } }, + this.grafana.panels.activeShardsPercentagePanel { gridPos+: { w: 4, h: 6 } }, + ]), + + rolesRow: + g.panel.row.new('Node Roles') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.osRoles { gridPos+: { w: 24 } }, + this.grafana.panels.osRolesTimeline { gridPos+: { w: 24 } }, + ]), + + resourceUsageRow: + g.panel.row.new('Resource Usage') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.topNodesByCPUUsagePanel { gridPos+: { w: 8 } }, + this.grafana.panels.breakersTrippedPanel { gridPos+: { w: 8 } }, + this.grafana.panels.shardStatusPanel { gridPos+: { w: 8 } }, + ]), + + storageAndTasksRow: + g.panel.row.new('Storage and Tasks') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.topNodesByDiskUsagePanel { gridPos+: { w: 8 } }, + this.grafana.panels.totalDocumentsPanel { gridPos+: { w: 16 } }, + this.grafana.panels.pendingTasksPanel { gridPos+: { w: 8 } }, + this.grafana.panels.storeSizePanel { gridPos+: { w: 8 } }, + this.grafana.panels.maxTaskWaitTimePanel { gridPos+: { w: 8 } }, + ]), + + searchPerformanceRow: + g.panel.row.new('Search Performance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.topIndicesByRequestRatePanel { gridPos+: { w: 8 } }, + this.grafana.panels.topIndicesByRequestLatencyPanel { gridPos+: { w: 8 } }, + this.grafana.panels.topIndicesByCombinedCacheHitRatioPanel { gridPos+: { w: 8 } }, + ]), + + ingestPerformanceRow: + g.panel.row.new('Ingest Performance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.topNodesByIngestRatePanel { gridPos+: { w: 8 } }, + this.grafana.panels.topNodesByIngestLatencyPanel { gridPos+: { w: 8 } }, + this.grafana.panels.topNodesByIngestErrorsPanel { gridPos+: { w: 8 } }, + ]), + + indexingPerformanceRow: + g.panel.row.new('Indexing Performance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.topIndicesByIndexRatePanel { gridPos+: { w: 8 } }, + this.grafana.panels.topIndicesByIndexLatencyPanel { gridPos+: { w: 8 } }, + this.grafana.panels.topIndicesByIndexFailuresPanel { gridPos+: { w: 8 } }, + ]), + + // Node Overview Dashboard Rows + nodeRolesRow: + g.panel.row.new('Node Roles') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.osRolesTimeline { gridPos+: { w: 24 } }, + this.grafana.panels.osRoles { gridPos+: { w: 24 } }, + ]), + + nodeHealthRow: + g.panel.row.new('Node health') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.nodeCpuUsage { gridPos+: { w: 6 } }, + this.grafana.panels.nodeMemoryUsage { gridPos+: { w: 6 } }, + this.grafana.panels.nodeIO { gridPos+: { w: 6 } }, + this.grafana.panels.nodeOpenConnections { gridPos+: { w: 6 } }, + this.grafana.panels.nodeDiskUsage { gridPos+: { w: 6 } }, + this.grafana.panels.nodeMemorySwap { gridPos+: { w: 6 } }, + this.grafana.panels.nodeNetworkTraffic { gridPos+: { w: 6 } }, + this.grafana.panels.circuitBreakers { gridPos+: { w: 6 } }, + ]), + + nodeJVMRow: + g.panel.row.new('Node JVM') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.jvmHeapUsedVsCommitted { gridPos+: { w: 6 } }, + this.grafana.panels.jvmNonheapUsedVsCommitted { gridPos+: { w: 6 } }, + this.grafana.panels.jvmThreads { gridPos+: { w: 6 } }, + this.grafana.panels.jvmBufferPools { gridPos+: { w: 6 } }, + this.grafana.panels.jvmUptime { gridPos+: { w: 6 } }, + this.grafana.panels.jvmGarbageCollections { gridPos+: { w: 6 } }, + this.grafana.panels.jvmGarbageCollectionTime { gridPos+: { w: 6 } }, + this.grafana.panels.jvmBufferPoolUsage { gridPos+: { w: 6 } }, + ]), + + threadPoolsRow: + g.panel.row.new('Thread pools') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.threadPoolThreads { gridPos+: { w: 12 } }, + this.grafana.panels.threadPoolTasks { gridPos+: { w: 12 } }, + ]), + + + // Search and Index Overview Dashboard Rows + searchAndIndexSearchPerformanceRow: + g.panel.row.new('Search Performance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.searchRequestRatePanel { gridPos+: { w: 6 } }, + this.grafana.panels.searchRequestLatencyPanel { gridPos+: { w: 6 } }, + this.grafana.panels.searchCacheHitRatioPanel { gridPos+: { w: 6 } }, + this.grafana.panels.searchCacheEvictionsPanel { gridPos+: { w: 6 } }, + ]), + + searchAndIndexIndexingPerformanceRow: + g.panel.row.new('Indexing Performance') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.indexingRatePanel { gridPos+: { w: 6 } }, + this.grafana.panels.indexingLatencyPanel { gridPos+: { w: 6 } }, + this.grafana.panels.indexingFailuresPanel { gridPos+: { w: 6 } }, + this.grafana.panels.flushLatencyPanel { gridPos+: { w: 6 } }, + this.grafana.panels.mergeTimePanel { gridPos+: { w: 6 } }, + this.grafana.panels.refreshLatencyPanel { gridPos+: { w: 6 } }, + this.grafana.panels.translogOperationsPanel { gridPos+: { w: 6 } }, + this.grafana.panels.docsDeletedPanel { gridPos+: { w: 6 } }, + ]), + + searchAndIndexCapacityRow: + g.panel.row.new('Index Capacity') + + g.panel.row.withCollapsed(false) + + g.panel.row.withPanels([ + this.grafana.panels.documentsIndexedPanel { gridPos+: { w: 6 } }, + this.grafana.panels.segmentCountPanel { gridPos+: { w: 6 } }, + this.grafana.panels.mergeCountPanel { gridPos+: { w: 6 } }, + this.grafana.panels.cacheSizePanel { gridPos+: { w: 6 } }, + this.grafana.panels.searchAndIndexStoreSizePanel { gridPos+: { w: 6 } }, + this.grafana.panels.segmentSizePanel { gridPos+: { w: 6 } }, + this.grafana.panels.mergeSizePanel { gridPos+: { w: 6 } }, + this.grafana.panels.searchAndIndexShardCountPanel { gridPos+: { w: 6 } }, + ]), + }, +} diff --git a/opensearch-mixin/signals/cluster.libsonnet b/opensearch-mixin/signals/cluster.libsonnet new file mode 100644 index 000000000..d67a91f70 --- /dev/null +++ b/opensearch-mixin/signals/cluster.libsonnet @@ -0,0 +1,152 @@ +// Cluster-level signals for OpenSearch +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_cluster_status', + }, + signals: { + cluster_status: { + name: 'Cluster status', + description: 'Overall cluster health status as a numeric code.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + sources: { + prometheus: { + expr: 'opensearch_cluster_status{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_nodes_number: { + name: 'Node count', + description: 'The number of running nodes across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + sources: { + prometheus: { + expr: 'opensearch_cluster_nodes_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_datanodes_number: { + name: 'Data node count', + description: 'The number of data nodes in the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + sources: { + prometheus: { + expr: 'opensearch_cluster_datanodes_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_shards_number_total: { + name: 'Shard count', + description: 'The number of shards in the OpenSearch cluster across all indices.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'opensearch_cluster_shards_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + aggKeepLabels: ['type'], + }, + }, + }, + cluster_shards_number_by_type: { + name: 'Shard status', + description: 'Shard status counts across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + sources: { + prometheus: { + expr: 'opensearch_cluster_shards_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{type}}', + aggKeepLabels: ['type'], + }, + }, + }, + cluster_shards_active_percent: { + name: 'Active shards %%', + description: 'Percent of active shards across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + unit: 'percent', + sources: { + prometheus: { + expr: 'opensearch_cluster_shards_active_percent{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_pending_tasks_number: { + name: 'Pending tasks', + description: 'The number of tasks waiting to be executed across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + sources: { + prometheus: { + expr: 'opensearch_cluster_pending_tasks_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_task_max_wait_seconds: { + name: 'Max task wait time', + description: 'The max wait time for tasks to be executed across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'max', + unit: 's', + sources: { + prometheus: { + expr: 'opensearch_cluster_task_max_waiting_time_seconds{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + indices_indexing_index_count_avg: { + name: 'Total documents', + description: 'The total count of documents indexed across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_indices_indexing_index_count{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + indices_store_size_bytes_avg: { + name: 'Store size', + description: 'The total size of the store across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_indices_store_size_bytes{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/signals/indexing.libsonnet b/opensearch-mixin/signals/indexing.libsonnet new file mode 100644 index 000000000..b0189182b --- /dev/null +++ b/opensearch-mixin/signals/indexing.libsonnet @@ -0,0 +1,265 @@ +// Indexing operation signals for OpenSearch +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_index_indexing_index_current_number', + }, + signals: { + indexing_current: { + name: 'Indexing current', + description: 'In-flight indexing operations.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_index_current_number{%(queriesSelectorGroupOnly)s,index=~"$index",context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + indexing_latency: { + name: 'Indexing latency (avg)', + description: 'Average indexing latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ') (increase(opensearch_index_indexing_index_time_seconds{%(queriesSelector)s, context=~"total"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelector)s, context=~"total"}[$__interval:]),1))', + }, + }, + }, + indexing_count: { + name: 'Indexing count (avg)', + description: 'Indexing ops count.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'documents', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_index_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + indexing_failed: { + name: 'Indexing failed (avg)', + description: 'Indexing failures per interval.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'failures', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_index_failed_count{%(queriesSelectorGroupOnly)s,index=~"$index",context="total"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + indexing_delete_current: { + name: 'Indexing delete current', + description: 'In-flight delete operations.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'documents/s', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_delete_current_number{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + flush_latency: { + name: 'Flush latency (avg)', + description: 'Average flush latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ',index) (increase(opensearch_index_flush_total_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_flush_total_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]),1))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + flush_count: { + name: 'Flush count (avg)', + description: 'Flush count proxy (per mapping).', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ') (increase(opensearch_index_flush_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_flush_total_count{%(queriesSelector)s, context="total"}[$__interval:]),1))', + }, + }, + }, + merge_time: { + name: 'Merge time increase', + description: 'Merge time increase (boolean >0).', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:])) > 0', + legendCustomTemplate: '{{index}} - total', + }, + }, + }, + merge_stopped_time: { + name: 'Merge stopped time increase', + description: 'Merge stopped time increase (boolean >0).', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_stopped_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:])) > 0', + legendCustomTemplate: '{{index}} - stopped', + }, + }, + }, + merge_throttled_time: { + name: 'Merge throttled time increase', + description: 'Merge throttled time increase (boolean >0).', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_throttled_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:])) > 0', + legendCustomTemplate: '{{index}} - throttled', + }, + }, + }, + merge_docs: { + name: 'Merge docs increase', + description: 'Merge docs increase (boolean >0).', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ') (increase(opensearch_index_merges_total_docs_count{%(queriesSelector)s, context="total"}[$__interval:])) > 0', + }, + }, + }, + merge_current_size: { + name: 'Merge current size bytes', + description: 'Merge current size (boolean >0).', + type: 'raw', + unit: 'bytes', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ',index) (opensearch_index_merges_current_size_bytes{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}) > 0', + legendCustomTemplate: '{{index}}', + }, + }, + }, + refresh_latency: { + name: 'Refresh latency (avg)', + description: 'Average refresh latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (increase(opensearch_index_refresh_total_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_refresh_total_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]),1))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + refresh_count: { + name: 'Refresh count (avg)', + description: 'Refresh count proxy (per mapping).', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ') (increase(opensearch_index_refresh_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_refresh_total_count{%(queriesSelector)s, context="total"}[$__interval:]),1))', + }, + }, + }, + translog_ops: { + name: 'Translog operations', + description: 'Translog operation count.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'operations', + sources: { + prometheus: { + expr: 'opensearch_index_translog_operations_number{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + segments_number: { + name: 'Segments number', + description: 'Number of segments.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'segments', + sources: { + prometheus: { + expr: 'opensearch_index_segments_number{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + segments_memory_bytes: { + name: 'Segments memory bytes', + description: 'Segment memory usage.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_segments_memory_bytes{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + store_size_bytes: { + name: 'Store size bytes', + description: 'Store size in bytes.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_store_size_bytes{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + legendCustomTemplate: '{{index}}', + aggKeepLabels: ['index'], + }, + }, + }, + shards_per_index: { + name: 'Active shards per index', + description: 'Active shards per index.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'sum by (index) (avg by(' + this.groupAggList + ') (opensearch_index_shards_number{%(queriesSelector)s, type=~"active|active_primary"}))', + legendCustomTemplate: '{{ index }}', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/signals/node.libsonnet b/opensearch-mixin/signals/node.libsonnet new file mode 100644 index 000000000..978675fe9 --- /dev/null +++ b/opensearch-mixin/signals/node.libsonnet @@ -0,0 +1,270 @@ +// Node-level signals for OpenSearch +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_os_cpu_percent', + }, + signals: { + os_cpu_percent: { + name: 'CPU %%', + description: 'Node CPU percent.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'opensearch_os_cpu_percent{%(queriesSelector)s}', + legendCustomTemplate: '{{node}}', + }, + }, + }, + os_mem_used_percent: { + name: 'Memory used %%', + description: 'Node memory used percent.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'opensearch_os_mem_used_percent{%(queriesSelector)s}', + legendCustomTemplate: '{{node}}', + }, + }, + }, + os_swap_used_percent: { + name: 'Swap used %%', + description: 'Swap used percent.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: '100 * opensearch_os_swap_used_bytes{%(queriesSelector)s} / clamp_min((opensearch_os_swap_used_bytes{%(queriesSelector)s} + opensearch_os_swap_free_bytes{%(queriesSelector)s}), 1)', + legendCustomTemplate: '{{node}}', + }, + }, + }, + fs_read_bps: { + name: 'FS read bytes/s', + description: 'Filesystem read rate.', + type: 'raw', + unit: 'Bps', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggListWithInstance + ') (rate(opensearch_fs_io_total_read_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{node}} - read', + }, + }, + }, + fs_write_bps: { + name: 'FS write bytes/s', + description: 'Filesystem write rate.', + type: 'raw', + unit: 'Bps', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggListWithInstance + ') (rate(opensearch_fs_io_total_write_bytes{%(queriesSelector)s}[$__rate_interval]))', + legendCustomTemplate: '{{node}} - write', + }, + }, + }, + fs_used_percent: { + name: 'FS used %%', + description: 'Filesystem used percent.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: '100 - (100 * opensearch_fs_path_free_bytes{%(queriesSelector)s} / clamp_min(opensearch_fs_path_total_bytes{%(queriesSelector)s}, 1))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + transport_open_connections: { + name: 'Transport server open', + description: 'Open transport server connections.', + type: 'raw', + unit: 'connections', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggListWithInstance + ') (opensearch_transport_server_open_number{%(queriesSelector)s})', + legendCustomTemplate: '{{node}}', + }, + }, + }, + transport_tx_bps: { + name: 'Transport TX bitrate', + description: 'Transport transmit bitrate.', + type: 'raw', + unit: 'bit/s', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggListWithInstance + ') (rate(opensearch_transport_tx_bytes_count{%(queriesSelector)s}[$__rate_interval])) * 8', + legendCustomTemplate: '{{node}} - sent', + }, + }, + }, + transport_rx_bps: { + name: 'Transport RX bitrate', + description: 'Transport receive bitrate.', + type: 'raw', + unit: 'bit/s', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggListWithInstance + ') (rate(opensearch_transport_rx_bytes_count{%(queriesSelector)s}[$__rate_interval])) * 8', + legendCustomTemplate: '{{node}} - received', + }, + }, + }, + circuitbreaker_tripped_sum_by_name: { + name: 'Circuit breaker trips by name', + description: 'Circuit breaker trips by breaker name.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'sum by (name, ' + this.groupAggListWithInstance + ') (increase(opensearch_circuitbreaker_tripped_count{%(queriesSelector)s}[$__interval:]))', + legendCustomTemplate: '{{node}} - {{ name }}', + }, + }, + }, + jvm_heap_used_bytes: { + name: 'JVM heap used', + description: 'JVM heap used.', + type: 'raw', + unit: 'bytes', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_mem_heap_used_bytes{%(queriesSelector)s})', + }, + }, + }, + jvm_heap_committed_bytes: { + name: 'JVM heap committed', + description: 'JVM heap committed.', + type: 'raw', + unit: 'bytes', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_mem_heap_committed_bytes{%(queriesSelector)s})', + }, + }, + }, + jvm_nonheap_used_bytes: { + name: 'JVM non-heap used', + description: 'JVM non-heap used.', + type: 'raw', + unit: 'bytes', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_mem_nonheap_used_bytes{%(queriesSelector)s})', + }, + }, + }, + jvm_nonheap_committed_bytes: { + name: 'JVM non-heap committed', + description: 'JVM non-heap committed.', + type: 'raw', + unit: 'bytes', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_mem_nonheap_committed_bytes{%(queriesSelector)s})', + }, + }, + }, + jvm_threads: { + name: 'JVM threads', + description: 'JVM thread count.', + type: 'raw', + unit: 'threads', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_threads_number{%(queriesSelector)s})', + }, + }, + }, + jvm_bufferpool_number: { + name: 'JVM buffer pools', + description: 'Number of JVM buffer pools.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ', bufferpool) (opensearch_jvm_bufferpool_number{%(queriesSelector)s})', + legendCustomTemplate: '{{ bufferpool }}', + }, + }, + }, + jvm_uptime: { + name: 'JVM uptime', + description: 'JVM uptime seconds.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_uptime_seconds{%(queriesSelector)s})', + }, + }, + }, + jvm_gc_collections: { + name: 'JVM GC collections', + description: 'GC collections per interval.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (increase(opensearch_jvm_gc_collection_count{%(queriesSelector)s}[$__interval:]))', + }, + }, + }, + jvm_gc_time: { + name: 'JVM GC time', + description: 'GC time per interval.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (increase(opensearch_jvm_gc_collection_time_seconds{%(queriesSelector)s}[$__interval:]))', + }, + }, + }, + jvm_bufferpool_used_percent: { + name: 'JVM bufferpool used %%', + description: 'Percent of bufferpool used.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: '100 * (sum by (' + this.groupAggList + ', bufferpool) (opensearch_jvm_bufferpool_used_bytes{%(queriesSelector)s})) / clamp_min((sum by (job, bufferpool, cluster) (opensearch_jvm_bufferpool_total_capacity_bytes{%(queriesSelector)s})),1)', + legendCustomTemplate: '{{ bufferpool }}', + }, + }, + }, + threadpool_threads: { + name: 'Threadpool threads', + description: 'Total threadpool threads.', + type: 'raw', + unit: 'threads', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') ((opensearch_threadpool_threads_number{%(queriesSelector)s}))', + }, + }, + }, + threadpool_tasks: { + name: 'Threadpool tasks', + description: 'Threadpool tasks.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'sum by (' + this.groupAggList + ') (opensearch_threadpool_tasks_number{%(queriesSelector)s})', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/signals/roles.libsonnet b/opensearch-mixin/signals/roles.libsonnet new file mode 100644 index 000000000..ad7cd8d8d --- /dev/null +++ b/opensearch-mixin/signals/roles.libsonnet @@ -0,0 +1,81 @@ +// Node role signals for OpenSearch +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_node_role_bool', + }, + signals: { + node_role_data: { + name: 'Node role: data', + description: 'Data role present flag.', + type: 'raw', + sources: { + prometheus: { + expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="data"}[1m]) == 1) * 2', + legendCustomTemplate: '{{ node }} / data', + }, + }, + }, + node_role_master: { + name: 'Node role: master', + description: 'Master role present flag.', + type: 'raw', + sources: { + prometheus: { + expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="master"}[1m]) == 1) * 3', + legendCustomTemplate: '{{ node }} / master', + }, + }, + }, + node_role_ingest: { + name: 'Node role: ingest', + description: 'Ingest role present flag.', + type: 'raw', + sources: { + prometheus: { + expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="ingest"}[1m]) == 1) * 4', + legendCustomTemplate: '{{ node }} / ingest', + }, + }, + }, + node_role_cluster_manager: { + name: 'Node role: cluster_manager', + description: 'Cluster manager role present flag.', + type: 'raw', + sources: { + prometheus: { + expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="cluster_manager"}[1m]) == 1) * 5', + legendCustomTemplate: '{{ node }} / cluster_manager', + }, + }, + }, + node_role_remote_cluster_client: { + name: 'Node role: remote_cluster_client', + description: 'Remote cluster client role present flag.', + type: 'raw', + sources: { + prometheus: { + expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="remote_cluster_client"}[1m]) == 1) * 6', + legendCustomTemplate: '{{ node }} / remote_client', + }, + }, + }, + node_role_last_seen: { + name: 'Node role bool last seen', + description: 'Last seen role bool within 1d.', + type: 'raw', + sources: { + prometheus: { + expr: 'max by (' + this.groupAggList + ', nodeid, role, primary_ip) (last_over_time(opensearch_node_role_bool{%(queriesSelector)s}[1d]))', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/signals/search.libsonnet b/opensearch-mixin/signals/search.libsonnet new file mode 100644 index 000000000..3a2d0f21b --- /dev/null +++ b/opensearch-mixin/signals/search.libsonnet @@ -0,0 +1,195 @@ +// Search operation signals for OpenSearch +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_index_search_query_current_number', + }, + signals: { + search_query_current_avg: { + name: 'Search queries in-flight', + description: 'In-flight search queries.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_search_query_current_number{%(queriesSelectorGroupOnly)s,index=~"$index", context=~"total"}', + legendCustomTemplate: '{{index}} - query', + aggKeepLabels: ['index'], + }, + }, + }, + search_fetch_current_avg: { + name: 'Search fetch in-flight', + description: 'In-flight fetch operations.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_search_fetch_current_number{%(queriesSelectorGroupOnly)s,index=~"$index", context=~"total"}', + legendCustomTemplate: '{{index}} - fetch', + aggKeepLabels: ['index'], + }, + }, + }, + search_scroll_current_avg: { + name: 'Search scroll in-flight', + description: 'In-flight scroll operations.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_search_scroll_current_number{%(queriesSelectorGroupOnly)s,index=~"$index", context=~"total"}', + legendCustomTemplate: '{{index}} - scroll', + aggKeepLabels: ['index'], + }, + }, + }, + search_query_latency_avg: { + name: 'Search query latency (avg)', + description: 'Average query latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_query_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index"}[$__interval:]) / clamp_min(increase(opensearch_index_search_query_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]), 1))', + legendCustomTemplate: '{{index}} - query', + }, + }, + }, + search_fetch_latency_avg: { + name: 'Search fetch latency (avg)', + description: 'Average fetch latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_fetch_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]), 1))', + legendCustomTemplate: '{{index}} - fetch', + }, + }, + }, + search_scroll_latency_avg: { + name: 'Search scroll latency (avg)', + description: 'Average scroll latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_scroll_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_search_scroll_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]), 1))', + legendCustomTemplate: '{{index}} - scroll', + }, + }, + }, + request_cache_hit_rate: { + name: 'Request cache hit rate %%', + description: 'Request cache hit rate.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (100 * (opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}) / clamp_min(opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"} + opensearch_index_requestcache_miss_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}, 1))', + legendCustomTemplate: '{{index}} - request', + }, + }, + }, + query_cache_hit_rate: { + name: 'Query cache hit rate %%', + description: 'Query cache hit rate.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (100 * (opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}) / clamp_min(opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"} + opensearch_index_querycache_miss_number{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}, 1))', + legendCustomTemplate: '{{index}} - query', + }, + }, + }, + query_cache_evictions: { + name: 'Query cache evictions', + description: 'Query cache evictions per interval.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_index_querycache_evictions_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + rangeFunction: 'increase', + aggKeepLabels: ['index'], + legendCustomTemplate: '{{index}} - query cache', + }, + }, + }, + request_cache_evictions: { + name: 'Request cache evictions', + description: 'Request cache evictions per interval.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_index_requestcache_evictions_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + rangeFunction: 'increase', + aggKeepLabels: ['index'], + legendCustomTemplate: '{{index}} - request cache', + }, + }, + }, + fielddata_evictions: { + name: 'Fielddata evictions', + description: 'Fielddata evictions per interval.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_index_fielddata_evictions_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', + rangeFunction: 'increase', + aggKeepLabels: ['index'], + legendCustomTemplate: '{{index}} - field data', + }, + }, + }, + query_cache_memory: { + name: 'Query cache memory bytes', + description: 'Query cache memory.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_querycache_memory_size_bytes{%(queriesSelector)s, context="total"}', + }, + }, + }, + request_cache_memory: { + name: 'Request cache memory bytes', + description: 'Request cache memory.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_requestcache_memory_size_bytes{%(queriesSelector)s, context="total"}', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/signals/topk.libsonnet b/opensearch-mixin/signals/topk.libsonnet new file mode 100644 index 000000000..b1309cf68 --- /dev/null +++ b/opensearch-mixin/signals/topk.libsonnet @@ -0,0 +1,163 @@ +// TopK and ranking signals for OpenSearch +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_os_cpu_percent', + }, + signals: { + os_cpu_percent_topk: { + name: 'Top nodes by CPU usage', + description: 'Top nodes by OS CPU usage across the OpenSearch cluster.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'topk(10, sort_desc(sum by(' + this.groupAggListWithInstance + ') (opensearch_os_cpu_percent{%(queriesSelectorGroupOnly)s})))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + fs_path_used_percent_topk: { + name: 'Top nodes by disk usage', + description: 'Top nodes by disk usage across the OpenSearch cluster.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'topk(10, sort_desc((100 * (sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_total_bytes{%(queriesSelectorGroupOnly)s})- sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_free_bytes{%(queriesSelectorGroupOnly)s})) / sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_total_bytes{%(queriesSelectorGroupOnly)s}))))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + circuitbreaker_tripped_count_sum: { + name: 'Breakers tripped', + description: 'The total count of circuit breakers tripped across the OpenSearch cluster.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'sum', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_circuitbreaker_tripped_count{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{node}}', + rangeFunction: 'increase', + }, + }, + }, + search_current_inflight_topk: { + name: 'Top indices by request rate', + description: 'Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.', + type: 'raw', + unit: 'reqps', + sources: { + prometheus: { + expr: 'topk(10, sort_desc(avg by(index, ' + this.groupAggList + ') (\n opensearch_index_search_fetch_current_number{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_search_query_current_number{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_search_scroll_current_number{%(queriesSelectorGroupOnly)s, context="total"}\n)))\n', + legendCustomTemplate: '{{index}}', + }, + }, + }, + search_avg_latency_topk: { + name: 'Top indices by request latency', + description: 'Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'topk(10, sort_desc(sum by(index, ' + this.groupAggList + ') ((increase(opensearch_index_search_fetch_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:])\n+increase(opensearch_index_search_query_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:])\n+increase(opensearch_index_search_scroll_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:]))\n/ clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:])\n+increase(opensearch_index_search_query_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:])\n+increase(opensearch_index_search_scroll_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:]), 1))))\n', + legendCustomTemplate: '{{index}}', + }, + }, + }, + request_query_cache_hit_rate_topk: { + name: 'Top indices by combined cache hit ratio', + description: 'Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'topk(10, sort_desc(avg by(index, ' + this.groupAggList + ') (\n 100 * (opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s, context="total"}) / \n clamp_min((opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_requestcache_miss_count{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_querycache_miss_number{%(queriesSelectorGroupOnly)s, context="total"}), 1\n ))))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + ingest_throughput_topk: { + name: 'Top nodes by ingest rate', + description: 'Top nodes by rate of ingest across the OpenSearch cluster.', + type: 'raw', + unit: 'ops', + sources: { + prometheus: { + expr: 'topk(10, sum by(' + this.groupAggListWithInstance + ') (rate(opensearch_ingest_total_count{%(queriesSelectorGroupOnly)s}[$__rate_interval])))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + ingest_latency_topk: { + name: 'Top nodes by ingest latency', + description: 'Top nodes by ingestion latency across the OpenSearch cluster.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'topk(10, sum by(' + this.groupAggListWithInstance + ') (increase(opensearch_ingest_total_time_seconds{%(queriesSelectorGroupOnly)s}[$__interval:]) / clamp_min(increase(opensearch_ingest_total_count{%(queriesSelectorGroupOnly)s}[$__interval:]), 1)))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + ingest_failures_topk: { + name: 'Top nodes by ingest errors', + description: 'Top nodes by ingestion failures across the OpenSearch cluster.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'topk(10, sum by(' + this.groupAggListWithInstance + ') (increase(opensearch_ingest_total_failed_count{%(queriesSelectorGroupOnly)s}[$__interval:])))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + indexing_current_topk: { + name: 'Top indices by index rate', + description: 'Top indices by rate of document indexing across the OpenSearch cluster.', + type: 'raw', + unit: 'ops', + sources: { + prometheus: { + expr: 'topk(10, avg by(index, ' + this.groupAggList + ') (opensearch_index_indexing_index_current_number{%(queriesSelectorGroupOnly)s}))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + indexing_latency_topk: { + name: 'Top indices by index latency', + description: 'Top indices by indexing latency across the OpenSearch cluster.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'topk(10, avg by(index, ' + this.groupAggList + ') (increase(opensearch_index_indexing_index_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:]), 1)))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + indexing_failed_topk: { + name: 'Top indices by index failures', + description: 'Top indices by index document failures across the OpenSearch cluster.', + type: 'raw', + unit: 'count', + sources: { + prometheus: { + expr: 'topk(10, avg by(index, ' + this.groupAggList + ') (increase(opensearch_index_indexing_index_failed_count{%(queriesSelectorGroupOnly)s}[$__interval:])))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/variables.libsonnet b/opensearch-mixin/variables.libsonnet deleted file mode 100644 index 8122dafe1..000000000 --- a/opensearch-mixin/variables.libsonnet +++ /dev/null @@ -1,76 +0,0 @@ -// variables.libsonnet -local g = import './g.libsonnet'; -local var = g.dashboard.variable; -local commonlib = import 'common-lib/common/main.libsonnet'; -local utils = commonlib.utils; - -{ - new( - filteringSelector, - groupLabels, - instanceLabels, - varMetric, - enableLokiLogs=false, - ): { - local root = self, - local variablesFromLabels(groupLabels, instanceLabels, filteringSelector, multiInstance=true) = - local chainVarProto(index, chainVar) = - var.query.new(chainVar.label) - + var.query.withDatasourceFromVariable(root.datasources.prometheus) - + var.query.queryTypes.withLabelValues( - chainVar.label, - '%s{%s}' % [varMetric, chainVar.chainSelector], - ) - + var.query.generalOptions.withLabel(utils.toSentenceCase(chainVar.label)) - + var.query.selectionOptions.withIncludeAll( - value=if (!multiInstance && std.member(instanceLabels, chainVar.label)) then false else true, - customAllValue='.+' - ) - + var.query.selectionOptions.withMulti( - if (!multiInstance && std.member(instanceLabels, chainVar.label)) then false else true, - ) - + var.query.refresh.onTime() - + var.query.withSort( - i=1, - type='alphabetical', - asc=true, - caseInsensitive=false - ); - std.mapWithIndex(chainVarProto, utils.chainLabels(groupLabels + instanceLabels, [filteringSelector])), - datasources: { - prometheus: - var.datasource.new('prometheus_datasource', 'prometheus') - + var.datasource.generalOptions.withLabel('Prometheus data source') - + var.datasource.withRegex(''), - }, - // Use on dashboards where multiple entities can be selected, like fleet dashboards - multiInstance: - [root.datasources.prometheus] - + variablesFromLabels(groupLabels, instanceLabels, filteringSelector), - // Use on dashboards where only single entity can be selected - singleInstance: - [root.datasources.prometheus] - + variablesFromLabels(groupLabels, instanceLabels, filteringSelector, multiInstance=false), - - queriesSelector: - '%s,%s' % [ - filteringSelector, - utils.labelsToPromQLSelector(groupLabels + instanceLabels), - ], - } - + if enableLokiLogs then self.withLokiLogs() else {}, - - withLokiLogs(): { - datasources+: { - loki: - var.datasource.new('loki_datasource', 'loki') - + var.datasource.generalOptions.withLabel('Loki data source') - + var.datasource.withRegex('') - + var.datasource.generalOptions.showOnDashboard.withNothing(), - }, - - multiInstance+: [self.datasources.loki], - singleInstance+: [self.datasources.loki], - }, - -} From b44be6209ace95d971a8999e08d00ea762bdc4fe Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Fri, 21 Nov 2025 07:49:13 -0500 Subject: [PATCH 02/13] Updated signal files to align to dashboards. Reworked signals to better use builder pattern. Updated panels for best practices --- opensearch-mixin/config.libsonnet | 34 +- opensearch-mixin/dashboards.libsonnet | 31 +- .../opensearch-cluster-overview.json | 248 ++++--- .../dashboards_out/opensearch-logs.json | 116 ++- .../opensearch-node-overview.json | 667 ++++++++---------- .../opensearch-search-and-index-overview.json | 284 +++++--- opensearch-mixin/g.libsonnet | 2 +- opensearch-mixin/jsonnetfile.json | 92 ++- opensearch-mixin/links.libsonnet | 26 +- opensearch-mixin/main.libsonnet | 1 + opensearch-mixin/panels.libsonnet | 451 ++++++++---- .../prometheus_alerts.yaml | 24 +- opensearch-mixin/rows.libsonnet | 31 +- .../signals/cluster-overview.libsonnet | 447 ++++++++++++ opensearch-mixin/signals/cluster.libsonnet | 152 ---- opensearch-mixin/signals/indexing.libsonnet | 265 ------- .../signals/node-overview.libsonnet | 394 +++++++++++ opensearch-mixin/signals/node.libsonnet | 270 ------- opensearch-mixin/signals/roles.libsonnet | 81 --- .../search-and-index-overview.libsonnet | 461 ++++++++++++ opensearch-mixin/signals/search.libsonnet | 195 ----- opensearch-mixin/signals/topk.libsonnet | 163 ----- 22 files changed, 2464 insertions(+), 1971 deletions(-) create mode 100644 opensearch-mixin/signals/cluster-overview.libsonnet delete mode 100644 opensearch-mixin/signals/cluster.libsonnet delete mode 100644 opensearch-mixin/signals/indexing.libsonnet create mode 100644 opensearch-mixin/signals/node-overview.libsonnet delete mode 100644 opensearch-mixin/signals/node.libsonnet delete mode 100644 opensearch-mixin/signals/roles.libsonnet create mode 100644 opensearch-mixin/signals/search-and-index-overview.libsonnet delete mode 100644 opensearch-mixin/signals/search.libsonnet delete mode 100644 opensearch-mixin/signals/topk.libsonnet diff --git a/opensearch-mixin/config.libsonnet b/opensearch-mixin/config.libsonnet index 25b3d6868..6c354e2e8 100644 --- a/opensearch-mixin/config.libsonnet +++ b/opensearch-mixin/config.libsonnet @@ -1,33 +1,28 @@ { local this = self, - filteringSelector: if self.enableMultiCluster then 'cluster!="",opensearch_cluster!=""' else 'opensearch_cluster!=""', - groupLabels: if self.enableMultiCluster then ['job', 'cluster', 'opensearch_cluster'] else ['job', 'opensearch_cluster'], - logLabels: ['job', 'cluster', 'node'], - instanceLabels: ['node'], + filteringSelector: 'job="integrations/opensearch"', + groupLabels: ['job', 'cluster', 'opensearch_cluster'], + logLabels: ['job', 'cluster', 'opensearch_cluster'], + instanceLabels: ['instance'], - dashboardTags: [self.uid], uid: 'opensearch', + dashboardTags: [self.uid], dashboardNamePrefix: 'OpenSearch', dashboardPeriod: 'now-1h', dashboardTimezone: 'default', dashboardRefresh: '1m', metricsSource: 'prometheus', // metrics source for signals - // Agg Lists - groupAggList: std.join(',', this.groupLabels), - groupAggListWithInstance: std.join(',', this.groupLabels + this.instanceLabels), - - // Multi-cluster support - enableMultiCluster: false, - opensearchSelector: if self.enableMultiCluster then 'job=~"$job", instance=~"$instance", cluster=~"$cluster"' else 'job=~"$job", instance=~"$instance"', - // Logging configuration enableLokiLogs: true, extraLogLabels: ['level', 'severity'], // Required by logs-lib logsVolumeGroupBy: 'level', showLogsVolume: true, - logExpression: '{job=~"$job", cluster=~"$cluster", instance=~"$instance", exception_class=~".+"} | json | line_format "{{.severity}} {{.exception_class}} - {{.exception_message}}" | drop time_extracted, severity_extracted, exception_class_extracted, correlation_id_extracted', + // Agg Lists + groupAggList: std.join(',', this.groupLabels), + groupAggListWithInstance: std.join(',', this.groupLabels + this.instanceLabels), + // Alerts configuration alertsWarningShardReallocations: 0, // count alertsWarningShardUnassigned: 0, // count @@ -42,11 +37,8 @@ // Signals configuration signals+: { - cluster: (import './signals/cluster.libsonnet')(this), - node: (import './signals/node.libsonnet')(this), - topk: (import './signals/topk.libsonnet')(this), - roles: (import './signals/roles.libsonnet')(this), - search: (import './signals/search.libsonnet')(this), - indexing: (import './signals/indexing.libsonnet')(this), + clusterOverview: (import './signals/cluster-overview.libsonnet')(this), + nodeOverview: (import './signals/node-overview.libsonnet')(this), + searchAndIndexOverview: (import './signals/search-and-index-overview.libsonnet')(this), }, -} +} \ No newline at end of file diff --git a/opensearch-mixin/dashboards.libsonnet b/opensearch-mixin/dashboards.libsonnet index 4f541e816..e1e63da42 100644 --- a/opensearch-mixin/dashboards.libsonnet +++ b/opensearch-mixin/dashboards.libsonnet @@ -16,12 +16,12 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; { 'opensearch-cluster-overview.json': - g.dashboard.new(this.config.dashboardNamePrefix + ' Cluster Overview') + g.dashboard.new(this.config.dashboardNamePrefix + ' cluster overview') + g.dashboard.withPanels( g.util.panel.resolveCollapsedFlagOnRows( g.util.grid.wrapPanels([ this.grafana.rows.clusterOverviewRow, - this.grafana.rows.rolesRow, + this.grafana.rows.clusterRolesRow, this.grafana.rows.resourceUsageRow, this.grafana.rows.storageAndTasksRow, this.grafana.rows.searchPerformanceRow, @@ -40,18 +40,24 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; period, ), 'opensearch-node-overview.json': - g.dashboard.new(this.config.dashboardNamePrefix + ' Node Overview') + g.dashboard.new(this.config.dashboardNamePrefix + ' node overview') + g.dashboard.withPanels( g.util.panel.resolveCollapsedFlagOnRows( g.util.grid.wrapPanels([ - this.grafana.rows.nodeRolesRow, this.grafana.rows.nodeHealthRow, + this.grafana.rows.nodeRolesRow, this.grafana.rows.nodeJVMRow, this.grafana.rows.threadPoolsRow, ]) ) ) + root.applyCommon( - vars.multiInstance, + vars.multiInstance + [ + g.dashboard.variable.query.new('node') + + g.dashboard.variable.custom.selectionOptions.withMulti(true) + + g.dashboard.variable.query.queryTypes.withLabelValues(label='node', metric='opensearch_os_cpu_percent{%(queriesSelector)s}' % vars) + + g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus), + + ], uid + '-node-overview', tags, links { opensearchNodeOverview+:: {} }, @@ -61,17 +67,22 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; period, ), 'opensearch-search-and-index-overview.json': - g.dashboard.new(this.config.dashboardNamePrefix + ' Search and Index Overview') + g.dashboard.new(this.config.dashboardNamePrefix + ' search and index overview') + g.dashboard.withPanels( g.util.panel.resolveCollapsedFlagOnRows( g.util.grid.wrapPanels([ - this.grafana.rows.searchAndIndexSearchPerformanceRow, + this.grafana.rows.searchAndIndexRequestPerformanceRow, this.grafana.rows.searchAndIndexIndexingPerformanceRow, this.grafana.rows.searchAndIndexCapacityRow, ]) ) ) + root.applyCommon( - vars.multiInstance, + vars.multiInstance + [ + g.dashboard.variable.query.new('index') + + g.dashboard.variable.custom.selectionOptions.withMulti(true) + + g.dashboard.variable.query.queryTypes.withLabelValues(label='index', metric='opensearch_index_search_fetch_count{%(queriesSelectorGroupOnly)s}' % vars) + + g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus), + ], uid + '-search-and-index-overview', tags, links { opensearchSearchAndIndexOverview+:: {} }, @@ -84,7 +95,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; } + if this.config.enableLokiLogs then { 'opensearch-logs.json': logslib.new( - this.config.dashboardNamePrefix + ' Logs', + this.config.dashboardNamePrefix + ' logs', datasourceName=this.grafana.variables.datasources.loki.name, datasourceRegex=this.grafana.variables.datasources.loki.regex, filterSelector=this.config.filteringSelector, @@ -96,7 +107,7 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; dashboards+: { logs+: - root.applyCommon(vars.multiInstance, uid=uid + '-logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period), + root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period), }, panels+: { diff --git a/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json b/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json index 89f7eebdd..e17dbca8a 100644 --- a/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json @@ -7,19 +7,21 @@ "links": [ { "keepTime": true, - "title": "Opensearch Logs", + "title": "Opensearch logs", "type": "link", "url": "/d/opensearch-logs" }, { - "asDropdown": true, - "includeVars": true, "keepTime": true, - "tags": [ - "opensearch" - ], - "title": "All dashboards", - "type": "dashboards" + "title": "Opensearch node overview", + "type": "link", + "url": "/d/opensearch-node-overview" + }, + { + "keepTime": true, + "title": "Opensearch search and index overview", + "type": "link", + "url": "/d/opensearch-search-and-index-overview" } ], "panels": [ @@ -33,7 +35,7 @@ }, "id": 1, "panels": [ ], - "title": "Cluster Overview", + "title": "Cluster overview", "type": "row" }, { @@ -96,20 +98,21 @@ }, "id": 2, "options": { + "graphMode": "none", "reduceOptions": { "calcs": [ "lastNotNull" ] } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by (job,opensearch_cluster) (\n opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", + "expr": "min by (job,cluster,opensearch_cluster) (\n opensearch_cluster_status{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -157,20 +160,21 @@ }, "id": 3, "options": { + "graphMode": "none", "reduceOptions": { "calcs": [ "lastNotNull" ] } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by (job,opensearch_cluster) (\n opensearch_cluster_nodes_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", + "expr": "min by (job,cluster,opensearch_cluster) (\n opensearch_cluster_nodes_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -218,20 +222,21 @@ }, "id": 4, "options": { + "graphMode": "none", "reduceOptions": { "calcs": [ "lastNotNull" ] } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by (job,opensearch_cluster) (\n opensearch_cluster_datanodes_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", + "expr": "min by (job,cluster,opensearch_cluster) (\n opensearch_cluster_datanodes_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -279,20 +284,21 @@ }, "id": 5, "options": { + "graphMode": "none", "reduceOptions": { "calcs": [ "lastNotNull" ] } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum(\n max by (job,opensearch_cluster,type) (\n opensearch_cluster_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)\n)", + "expr": "sum(\n max by (job,cluster,opensearch_cluster,type) (\n opensearch_cluster_shards_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}\n)\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -345,20 +351,21 @@ }, "id": 6, "options": { + "graphMode": "none", "reduceOptions": { "calcs": [ "lastNotNull" ] } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by (job,opensearch_cluster) (\n opensearch_cluster_shards_active_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", + "expr": "min by (job,cluster,opensearch_cluster) (\n opensearch_cluster_shards_active_percent{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -379,7 +386,7 @@ }, "id": 7, "panels": [ ], - "title": "Node Roles", + "title": "Node roles", "type": "row" }, { @@ -452,17 +459,17 @@ "y": 8 }, "id": 8, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (job,opensearch_cluster, nodeid, role, primary_ip) (last_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[1d]))", + "expr": "max by (job,cluster,opensearch_cluster,node,nodeid,role,primary_ip) (\n last_over_time(opensearch_node_role_bool{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}[1d])\n)", "format": "time_series", "instant": true, - "legendFormat": "{{node}}: Node role bool last seen", + "legendFormat": "{{role}}: Node role bool last seen", "refId": "Node role bool last seen" } ], @@ -487,9 +494,11 @@ }, "indexByName": { "Time": 0, + "cluster": 3, "cluster_manager": 108, "data": 105, "ingest": 106, + "instance": 3, "job": 3, "master": 104, "node": 3, @@ -567,14 +576,14 @@ "legend": false, "showValue": "never" }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"data\"}[1m]) == 1) * 2", + "expr": "\n max by (job,cluster,opensearch_cluster,node,role) (\n max_over_time(opensearch_node_role_bool{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", role=\"data\"}[1m]) == 1\n)\n * 2", "format": "time_series", "instant": false, "legendFormat": "{{ node }} / data", @@ -585,7 +594,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"master\"}[1m]) == 1) * 3", + "expr": "\n max by (job,cluster,opensearch_cluster,node,role) (\n max_over_time(opensearch_node_role_bool{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", role=\"master\"}[1m]) == 1\n)\n * 3", "format": "time_series", "instant": false, "legendFormat": "{{ node }} / master", @@ -596,7 +605,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"ingest\"}[1m]) == 1) * 4", + "expr": "\n max by (job,cluster,opensearch_cluster,node,role) (\n max_over_time(opensearch_node_role_bool{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", role=\"ingest\"}[1m]) == 1\n)\n * 4", "format": "time_series", "instant": false, "legendFormat": "{{ node }} / ingest", @@ -607,7 +616,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"cluster_manager\"}[1m]) == 1) * 5", + "expr": "\n max by (job,cluster,opensearch_cluster,node,role) (\n max_over_time(opensearch_node_role_bool{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", role=\"cluster_manager\"}[1m]) == 1\n)\n * 5", "format": "time_series", "instant": false, "legendFormat": "{{ node }} / cluster_manager", @@ -618,7 +627,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"remote_cluster_client\"}[1m]) == 1) * 6", + "expr": "\n max by (job,cluster,opensearch_cluster,node,role) (\n max_over_time(opensearch_node_role_bool{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", role=\"remote_cluster_client\"}[1m]) == 1\n)\n * 6", "format": "time_series", "instant": false, "legendFormat": "{{ node }} / remote_client", @@ -638,7 +647,7 @@ }, "id": 10, "panels": [ ], - "title": "Resource Usage", + "title": "Resource usage", "type": "row" }, { @@ -677,20 +686,21 @@ }, "id": 11, "options": { + "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ] } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sort_desc(\n topk(10, sort_desc(sum by(job,opensearch_cluster,node) (opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})))\n)", + "expr": "topk(10, sort_desc(\n sum by (job,cluster,opensearch_cluster) (\n opensearch_os_cpu_percent{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}\n)\n))", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -735,23 +745,24 @@ }, "id": 12, "options": { + "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ] } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster) (\n increase(opensearch_circuitbreaker_tripped_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:] offset -$__interval)\n)", + "expr": "sum by (job,cluster,opensearch_cluster) (\n increase(opensearch_circuitbreaker_tripped_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{node}}", "refId": "Breakers tripped" @@ -794,20 +805,21 @@ }, "id": 13, "options": { + "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ] } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "min by (job,opensearch_cluster,type) (\n opensearch_cluster_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", + "expr": "min by (job,cluster,opensearch_cluster,type) (\n opensearch_cluster_shards_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -828,7 +840,7 @@ }, "id": 14, "panels": [ ], - "title": "Storage and Tasks", + "title": "Storage and tasks", "type": "row" }, { @@ -867,20 +879,21 @@ }, "id": 15, "options": { + "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ] } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sort_desc((100 * (sum by(job,opensearch_cluster,node) (opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})- sum by(job,opensearch_cluster,node) (opensearch_fs_path_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"})) / sum by(job,opensearch_cluster,node) (opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))))", + "expr": "topk(10, sort_desc((100 * (\n sum by(job,cluster,opensearch_cluster,instance) (opensearch_fs_path_total_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}) - \n sum by(job,cluster,opensearch_cluster,instance) (opensearch_fs_path_free_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"})\n) / \nsum by(job,cluster,opensearch_cluster,instance) (opensearch_fs_path_total_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"})\n)))", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -899,6 +912,11 @@ "description": "The total count of documents indexed across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "documents" } }, @@ -909,14 +927,14 @@ "y": 34 }, "id": 16, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster) (\n opensearch_indices_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster) (\n opensearch_indices_indexing_index_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -935,6 +953,11 @@ "description": "The number of tasks waiting to be executed across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "tasks" } }, @@ -945,14 +968,14 @@ "y": 42 }, "id": 17, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster) (\n opensearch_cluster_pending_tasks_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster) (\n opensearch_cluster_pending_tasks_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -971,6 +994,11 @@ "description": "The total size of the store across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "bytes" } }, @@ -981,14 +1009,14 @@ "y": 42 }, "id": 18, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster) (\n opensearch_indices_store_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster) (\n opensearch_indices_store_size_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1007,6 +1035,11 @@ "description": "The max wait time for tasks to be executed across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "s" } }, @@ -1017,14 +1050,14 @@ "y": 42 }, "id": 19, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "max by (job,opensearch_cluster) (\n opensearch_cluster_task_max_waiting_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", + "expr": "max by (job,cluster,opensearch_cluster) (\n opensearch_cluster_task_max_waiting_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}\n)", "format": "time_series", "instant": false, "legendFormat": "{{opensearch_cluster}}", @@ -1044,7 +1077,7 @@ }, "id": 20, "panels": [ ], - "title": "Search Performance", + "title": "Search performance", "type": "row" }, { @@ -1055,6 +1088,11 @@ "description": "Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "reqps" } }, @@ -1065,14 +1103,14 @@ "y": 51 }, "id": 21, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sort_desc(avg by(index, job,opensearch_cluster) (\n opensearch_index_search_fetch_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_search_query_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_search_scroll_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}\n)))\n", + "expr": "topk(10, sort_desc(avg by(index, job,cluster,opensearch_cluster) (\n opensearch_index_search_fetch_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_search_query_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_search_scroll_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}\n)))", "format": "time_series", "instant": false, "legendFormat": "{{index}}", @@ -1090,6 +1128,11 @@ "description": "Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "s" } }, @@ -1100,17 +1143,17 @@ "y": 51 }, "id": 22, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sort_desc(sum by(index, job,opensearch_cluster) ((increase(opensearch_index_search_fetch_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_query_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_scroll_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]))\n/ clamp_min(increase(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_query_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:])\n+increase(opensearch_index_search_scroll_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]), 1))))\n", + "expr": "topk(10, sort_desc(sum by(index, job,cluster,opensearch_cluster) ((\n increase(opensearch_index_search_fetch_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:] offset $__interval) + \n increase(opensearch_index_search_query_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:] offset $__interval) + \n increase(opensearch_index_search_scroll_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:] offset $__interval)\n) / clamp_min(\n increase(opensearch_index_search_fetch_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:] offset $__interval) + \n increase(opensearch_index_search_query_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:] offset $__interval) + \n increase(opensearch_index_search_scroll_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:] offset $__interval), 1\n))))", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "legendFormat": "{{index}}", "refId": "Top indices by request latency" } @@ -1126,6 +1169,11 @@ "description": "Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "percent" } }, @@ -1136,14 +1184,14 @@ "y": 51 }, "id": 23, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sort_desc(avg by(index, job,opensearch_cluster) (\n 100 * (opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}) / \n clamp_min((opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_requestcache_miss_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_miss_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}), 1\n ))))", + "expr": "topk(10, sort_desc(avg by(index, job,cluster,opensearch_cluster) (\n 100 * (opensearch_index_requestcache_hit_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}) / \n clamp_min((opensearch_index_requestcache_hit_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_hit_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_requestcache_miss_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"} + \n opensearch_index_querycache_miss_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}), 1\n))))", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1164,7 +1212,7 @@ }, "id": 24, "panels": [ ], - "title": "Ingest Performance", + "title": "Ingest performance", "type": "row" }, { @@ -1185,14 +1233,14 @@ "y": 60 }, "id": 25, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sum by(job,opensearch_cluster,node) (rate(opensearch_ingest_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__rate_interval])))", + "expr": "topk(10, \n sum by (job,cluster,opensearch_cluster,node) (\n rate(opensearch_ingest_total_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}[$__rate_interval])\n)\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1211,6 +1259,11 @@ "description": "Top nodes by ingestion latency across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "s" } }, @@ -1221,17 +1274,17 @@ "y": 60 }, "id": 26, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sum by(job,opensearch_cluster,node) (increase(opensearch_ingest_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]) / clamp_min(increase(opensearch_ingest_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:]), 1)))", + "expr": "topk(10, sum by(job,cluster,opensearch_cluster,instance) (\n increase(opensearch_ingest_total_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:] offset $__interval) / \n clamp_min(increase(opensearch_ingest_total_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:] offset $__interval), 1)\n))", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{node}}", "refId": "Top nodes by ingest latency" @@ -1248,6 +1301,11 @@ "description": "Top nodes by ingestion failures across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "errors" } }, @@ -1258,17 +1316,17 @@ "y": 60 }, "id": 27, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, sum by(job,opensearch_cluster,node) (increase(opensearch_ingest_total_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:])))", + "expr": "topk(10, \n sum by (job,cluster,opensearch_cluster,node) (\n increase(opensearch_ingest_total_failed_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:] offset -$__interval)\n)\n)", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{node}}", "refId": "Top nodes by ingest errors" @@ -1287,7 +1345,7 @@ }, "id": 28, "panels": [ ], - "title": "Indexing Performance", + "title": "Indexing performance", "type": "row" }, { @@ -1298,6 +1356,11 @@ "description": "Top indices by rate of document indexing across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "documents/s" } }, @@ -1308,14 +1371,14 @@ "y": 69 }, "id": 29, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, avg by(index, job,opensearch_cluster) (opensearch_index_indexing_index_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}))", + "expr": "topk(10, \n avg by (job,cluster,opensearch_cluster,index) (\n opensearch_index_indexing_index_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}\n)\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1334,6 +1397,11 @@ "description": "Top indices by indexing latency across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "s" } }, @@ -1344,17 +1412,17 @@ "y": 69 }, "id": 30, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, avg by(index, job,opensearch_cluster) (increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:]), 1)))", + "expr": "topk(10, avg by(index, job,cluster,opensearch_cluster) (\n increase(opensearch_index_indexing_index_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:] offset $__interval) / \n clamp_min(increase(opensearch_index_indexing_index_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\", context=\"total\"}[$__interval:] offset $__interval), 1)\n))", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}}", "refId": "Top indices by index latency" @@ -1371,6 +1439,11 @@ "description": "Top indices by index document failures across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "failures" } }, @@ -1381,17 +1454,17 @@ "y": 69 }, "id": 31, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "topk(10, avg by(index, job,opensearch_cluster) (increase(opensearch_index_indexing_index_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:])))", + "expr": "topk(10, \n avg by (job,cluster,opensearch_cluster,index) (\n increase(opensearch_index_indexing_index_failed_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}[$__interval:] offset -$__interval)\n)\n)", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}}", "refId": "Top indices by index failures" @@ -1425,7 +1498,22 @@ "label": "Job", "multi": true, "name": "job", - "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\"}, job)", + "query": "label_values(opensearch_cluster_status{job=\"integrations/opensearch\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Cluster", + "multi": true, + "name": "cluster", + "query": "label_values(opensearch_cluster_status{job=\"integrations/opensearch\",job=~\"$job\"}, cluster)", "refresh": 2, "sort": 1, "type": "query" @@ -1440,7 +1528,7 @@ "label": "Opensearch_cluster", "multi": true, "name": "opensearch_cluster", - "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "query": "label_values(opensearch_cluster_status{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\"}, opensearch_cluster)", "refresh": 2, "sort": 1, "type": "query" @@ -1452,10 +1540,10 @@ "uid": "${prometheus_datasource}" }, "includeAll": true, - "label": "Node", + "label": "Instance", "multi": true, - "name": "node", - "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", + "name": "instance", + "query": "label_values(opensearch_cluster_status{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}, instance)", "refresh": 2, "sort": 1, "type": "query" @@ -1475,6 +1563,6 @@ "to": "now" }, "timezone": "default", - "title": "OpenSearch Cluster Overview", + "title": "OpenSearch cluster overview", "uid": "opensearch-cluster-overview" } \ No newline at end of file diff --git a/opensearch-mixin/dashboards_out/opensearch-logs.json b/opensearch-mixin/dashboards_out/opensearch-logs.json index 6ea00e8ea..c6c03facc 100644 --- a/opensearch-mixin/dashboards_out/opensearch-logs.json +++ b/opensearch-mixin/dashboards_out/opensearch-logs.json @@ -7,19 +7,21 @@ "links": [ { "keepTime": true, - "title": "Opensearch Cluster Overview", + "title": "Opensearch cluster overview", "type": "link", "url": "/d/opensearch-cluster-overview" }, { - "asDropdown": true, - "includeVars": true, "keepTime": true, - "tags": [ - "opensearch" - ], - "title": "All dashboards", - "type": "dashboards" + "title": "Opensearch node overview", + "type": "link", + "url": "/d/opensearch-node-overview" + }, + { + "keepTime": true, + "title": "Opensearch search and index overview", + "type": "link", + "url": "/d/opensearch-search-and-index-overview" } ], "panels": [ @@ -169,7 +171,7 @@ "type": "loki", "uid": "${loki_datasource}" }, - "expr": "sum by (level) (count_over_time({opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",level=~\"$level\",severity=~\"$severity\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", + "expr": "sum by (level) (count_over_time({job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",level=~\"$level\",severity=~\"$severity\"}\n|~ \"$regex_search\"\n\n[$__auto]))\n", "legendFormat": "{{ level }}" } ], @@ -211,7 +213,7 @@ "type": "loki", "uid": "${loki_datasource}" }, - "expr": "{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",level=~\"$level\",severity=~\"$severity\"} \n|~ \"$regex_search\"\n\n\n" + "expr": "{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",level=~\"$level\",severity=~\"$severity\"} \n|~ \"$regex_search\"\n\n\n" } ], "title": "Logs", @@ -226,63 +228,111 @@ "templating": { "list": [ { - "label": "Prometheus data source", - "name": "prometheus_datasource", - "query": "prometheus", - "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", + "label": "Loki data source", + "name": "loki_datasource", + "query": "loki", + "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", "type": "datasource" }, { - "allValue": ".+", + "allValue": ".*", "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" + "type": "loki", + "uid": "${loki_datasource}" }, "includeAll": true, "label": "Job", "multi": true, "name": "job", - "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\"}, job)", + "query": "label_values({job=\"integrations/opensearch\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Cluster", + "multi": true, + "name": "cluster", + "query": "label_values({job=\"integrations/opensearch\",job=~\"$job\"}, cluster)", "refresh": 2, "sort": 1, "type": "query" }, { - "allValue": ".+", + "allValue": ".*", "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" + "type": "loki", + "uid": "${loki_datasource}" }, "includeAll": true, "label": "Opensearch_cluster", "multi": true, "name": "opensearch_cluster", - "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "query": "label_values({job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\"}, opensearch_cluster)", "refresh": 2, "sort": 1, "type": "query" }, { - "allValue": ".+", + "allValue": ".*", "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" + "type": "loki", + "uid": "${loki_datasource}" }, "includeAll": true, - "label": "Node", + "label": "Level", "multi": true, - "name": "node", - "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", + "name": "level", + "query": "label_values({job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}, level)", "refresh": 2, "sort": 1, "type": "query" }, + { + "allValue": ".*", + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "includeAll": true, + "label": "Severity", + "multi": true, + "name": "severity", + "query": "label_values({job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",level=~\"$level\"}, severity)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "label": "Regex search", + "name": "regex_search", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "type": "textbox" + }, { "hide": 2, - "label": "Loki data source", - "name": "loki_datasource", - "query": "loki", - "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", + "label": "Prometheus data source", + "name": "prometheus_datasource", + "query": "prometheus", + "regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+", "type": "datasource" } ] @@ -292,6 +342,6 @@ "to": "now" }, "timezone": "default", - "title": "OpenSearch Logs", + "title": "OpenSearch logs", "uid": "opensearch-logs" } \ No newline at end of file diff --git a/opensearch-mixin/dashboards_out/opensearch-node-overview.json b/opensearch-mixin/dashboards_out/opensearch-node-overview.json index 845b8673d..96904a47e 100644 --- a/opensearch-mixin/dashboards_out/opensearch-node-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-node-overview.json @@ -7,25 +7,21 @@ "links": [ { "keepTime": true, - "title": "Opensearch Logs", + "title": "Opensearch logs", "type": "link", "url": "/d/opensearch-logs" }, { "keepTime": true, - "title": "Opensearch Cluster Overview", + "title": "Opensearch cluster overview", "type": "link", "url": "/d/opensearch-cluster-overview" }, { - "asDropdown": true, - "includeVars": true, "keepTime": true, - "tags": [ - "opensearch" - ], - "title": "All dashboards", - "type": "dashboards" + "title": "Opensearch search and index overview", + "type": "link", + "url": "/d/opensearch-search-and-index-overview" } ], "panels": [ @@ -39,265 +35,6 @@ }, "id": 1, "panels": [ ], - "title": "Node Roles", - "type": "row" - }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "OpenSearch node roles over time.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "2": { - "color": "light-purple", - "index": 0, - "text": "data" - }, - "3": { - "color": "light-green", - "index": 1, - "text": "master" - }, - "4": { - "color": "light-blue", - "index": 2, - "text": "ingest" - }, - "5": { - "color": "light-yellow", - "index": 3, - "text": "cluster_manager" - }, - "6": { - "color": "super-light-red", - "index": 4, - "text": "remote_cluster_client" - } - }, - "type": "value" - } - ] - } - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 2, - "maxDataPoints": 100, - "options": { - "legend": false, - "showValue": "never" - }, - "pluginVersion": "v11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"data\"}[1m]) == 1) * 2", - "format": "time_series", - "instant": false, - "legendFormat": "{{ node }} / data", - "refId": "Node role: data" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"master\"}[1m]) == 1) * 3", - "format": "time_series", - "instant": false, - "legendFormat": "{{ node }} / master", - "refId": "Node role: master" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"ingest\"}[1m]) == 1) * 4", - "format": "time_series", - "instant": false, - "legendFormat": "{{ node }} / ingest", - "refId": "Node role: ingest" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"cluster_manager\"}[1m]) == 1) * 5", - "format": "time_series", - "instant": false, - "legendFormat": "{{ node }} / cluster_manager", - "refId": "Node role: cluster_manager" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (node, role) (max_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", role=\"remote_cluster_client\"}[1m]) == 1) * 6", - "format": "time_series", - "instant": false, - "legendFormat": "{{ node }} / remote_client", - "refId": "Node role: remote_cluster_client" - } - ], - "title": "Roles timeline", - "type": "status-history" - }, - { - "datasource": { - "type": "datasource", - "uid": "-- Mixed --" - }, - "description": "OpenSearch node roles.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "0": { - "color": "super-light-orange", - "index": 5, - "text": "False" - }, - "1": { - "color": "light-green", - "index": 3, - "text": "True" - }, - "Data": { - "color": "light-purple", - "index": 0, - "text": "data" - }, - "Ingest": { - "color": "light-blue", - "index": 2, - "text": "ingest" - }, - "Master": { - "color": "light-green", - "index": 1, - "text": "master" - }, - "Remote cluster client": { - "color": "light-orange", - "index": 4, - "text": "remote_cluster_client" - } - }, - "type": "value" - } - ] - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/Data|Master|Ingest|Remote.+|Cluster.+/" - }, - "properties": [ - { - "id": "custom.cellOptions", - "value": { - "type": "color-text" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 3, - "pluginVersion": "v11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${prometheus_datasource}" - }, - "expr": "max by (job,opensearch_cluster, nodeid, role, primary_ip) (last_over_time(opensearch_node_role_bool{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[1d]))", - "format": "time_series", - "instant": true, - "legendFormat": "{{node}}: Node role bool last seen", - "refId": "Node role bool last seen" - } - ], - "title": "Roles", - "transformations": [ - { - "id": "labelsToFields", - "options": { - "mode": "columns", - "valueLabel": "role" - } - }, - { - "id": "merge", - "options": { } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": { - "Time": 0, - "cluster_manager": 108, - "data": 105, - "ingest": 106, - "job": 3, - "master": 104, - "node": 3, - "nodeid": 3, - "opensearch_cluster": 3, - "remote_cluster_client": 107 - }, - "renameByName": { - "Time": "", - "cluster": "Cluster", - "cluster_manager": "Cluster manager", - "data": "Data", - "ingest": "Ingest", - "master": "Master", - "node": "Node", - "nodeid": "Nodeid", - "remote_cluster_client": "Remote cluster client" - } - } - } - ], - "type": "table" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 0, - "x": 0, - "y": 17 - }, - "id": 4, - "panels": [ ], "title": "Node health", "type": "row" }, @@ -313,13 +50,12 @@ "mode": "continuous-BlYlRd" }, "custom": { - "fillOpacity": 5, + "fillOpacity": 15, "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" }, - "decimals": 1, "max": 100, "min": 0, "unit": "percent" @@ -329,23 +65,23 @@ "h": 8, "w": 6, "x": 0, - "y": 18 + "y": 1 }, - "id": 5, + "id": 2, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "opensearch_os_cpu_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}", + "expr": "opensearch_os_cpu_percent{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}", "format": "time_series", "instant": false, "legendFormat": "{{node}}", @@ -360,20 +96,19 @@ "type": "datasource", "uid": "-- Mixed --" }, - "description": "Memory usage percentage of the node for the Operating System and OpenSearch", + "description": "Memory usage percentage of the node for the operating system and OpenSearch", "fieldConfig": { "defaults": { "color": { "mode": "continuous-BlYlRd" }, "custom": { - "fillOpacity": 5, + "fillOpacity": 15, "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" }, - "decimals": 1, "max": 100, "min": 0, "unit": "percent" @@ -383,23 +118,23 @@ "h": 8, "w": 6, "x": 6, - "y": 18 + "y": 1 }, - "id": 6, + "id": 3, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "opensearch_os_mem_used_percent{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}", + "expr": "opensearch_os_mem_used_percent{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}", "format": "time_series", "instant": false, "legendFormat": "{{node}}", @@ -418,7 +153,7 @@ "fieldConfig": { "defaults": { "custom": { - "fillOpacity": 1, + "fillOpacity": 15, "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, @@ -456,25 +191,26 @@ "h": 8, "w": 6, "x": 12, - "y": 18 + "y": 1 }, - "id": 7, + "id": 4, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_fs_io_total_read_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval]))", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n rate(opensearch_fs_io_total_read_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}[$__rate_interval])\n)", "format": "time_series", "instant": false, + "interval": "2m", "legendFormat": "{{node}} - read", "refId": "FS read bytes/s" }, @@ -483,9 +219,10 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_fs_io_total_write_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval]))", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n rate(opensearch_fs_io_total_write_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}[$__rate_interval])\n)", "format": "time_series", "instant": false, + "interval": "2m", "legendFormat": "{{node}} - write", "refId": "FS write bytes/s" } @@ -502,7 +239,7 @@ "fieldConfig": { "defaults": { "custom": { - "fillOpacity": 30, + "fillOpacity": 15, "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, @@ -517,23 +254,23 @@ "h": 8, "w": 6, "x": 18, - "y": 18 + "y": 1 }, - "id": 8, + "id": 5, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster,node) (opensearch_transport_server_open_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n opensearch_transport_server_open_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}\n)", "format": "time_series", "instant": false, "legendFormat": "{{node}}", @@ -555,13 +292,12 @@ "mode": "continuous-BlYlRd" }, "custom": { - "fillOpacity": 1, + "fillOpacity": 15, "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" }, - "decimals": 1, "max": 100, "min": 0, "unit": "percent" @@ -571,23 +307,23 @@ "h": 8, "w": 6, "x": 0, - "y": 26 + "y": 9 }, - "id": 9, + "id": 6, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "100 - (100 * opensearch_fs_path_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} / clamp_min(opensearch_fs_path_total_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}, 1))", + "expr": "100 - (100 * opensearch_fs_path_free_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"} / clamp_min(opensearch_fs_path_total_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}, 1))", "format": "time_series", "instant": false, "legendFormat": "{{node}}", @@ -602,20 +338,19 @@ "type": "datasource", "uid": "-- Mixed --" }, - "description": "Percentage of swap space used by OpenSearch and the Operating System on the selected node.", + "description": "Percentage of swap space used by OpenSearch and the operating system on the selected node.", "fieldConfig": { "defaults": { "color": { "mode": "continuous-BlYlRd" }, "custom": { - "fillOpacity": 5, + "fillOpacity": 15, "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" }, - "decimals": 1, "max": 100, "min": 0, "unit": "percent" @@ -625,23 +360,23 @@ "h": 8, "w": 6, "x": 6, - "y": 26 + "y": 9 }, - "id": 10, + "id": 7, "options": { "tooltip": { "mode": "multi", "sort": "desc" } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "100 * opensearch_os_swap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} / clamp_min((opensearch_os_swap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"} + opensearch_os_swap_free_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}), 1)", + "expr": "100 * opensearch_os_swap_used_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"} / clamp_min((opensearch_os_swap_used_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"} + opensearch_os_swap_free_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}), 1)", "format": "time_series", "instant": false, "legendFormat": "{{node}}", @@ -656,11 +391,11 @@ "type": "datasource", "uid": "-- Mixed --" }, - "description": "Network traffic on the node's Operating System.", + "description": "Network traffic on the node's operating system.", "fieldConfig": { "defaults": { "custom": { - "fillOpacity": 5, + "fillOpacity": 15, "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, @@ -673,19 +408,20 @@ "h": 8, "w": 6, "x": 12, - "y": 26 + "y": 9 }, - "id": 11, - "pluginVersion": "v11.4.0", + "id": 8, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_transport_rx_bytes_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval])) * 8", + "expr": "\n sum by (job,cluster,opensearch_cluster,instance,node) (\n rate(opensearch_transport_rx_bytes_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}[$__rate_interval])\n)\n * 8", "format": "time_series", "instant": false, + "interval": "2m", "legendFormat": "{{node}} - received", "refId": "Transport RX bitrate" }, @@ -694,9 +430,10 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster,node) (rate(opensearch_transport_tx_bytes_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__rate_interval])) * 8", + "expr": "\n sum by (job,cluster,opensearch_cluster,instance,node) (\n rate(opensearch_transport_tx_bytes_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}[$__rate_interval])\n)\n * 8", "format": "time_series", "instant": false, + "interval": "2m", "legendFormat": "{{node}} - sent", "refId": "Transport TX bitrate" } @@ -712,6 +449,13 @@ "description": "Circuit breakers tripped on the selected node by type", "fieldConfig": { "defaults": { + "custom": { + "fillOpacity": 15, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "trips" } }, @@ -719,20 +463,20 @@ "h": 8, "w": 6, "x": 18, - "y": 26 + "y": 9 }, - "id": 12, - "pluginVersion": "v11.4.0", + "id": 9, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (name, job,opensearch_cluster,node) (increase(opensearch_circuitbreaker_tripped_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n increase(opensearch_circuitbreaker_tripped_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{node}} - {{ name }}", "refId": "Circuit breaker trips by name" @@ -747,9 +491,137 @@ "h": 1, "w": 0, "x": 0, - "y": 34 + "y": 17 }, - "id": 13, + "id": 10, + "panels": [ ], + "title": "Node Roles", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "OpenSearch node roles over time.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "2": { + "color": "light-purple", + "index": 0, + "text": "data" + }, + "3": { + "color": "light-green", + "index": 1, + "text": "master" + }, + "4": { + "color": "light-blue", + "index": 2, + "text": "ingest" + }, + "5": { + "color": "light-yellow", + "index": 3, + "text": "cluster_manager" + }, + "6": { + "color": "super-light-red", + "index": 4, + "text": "remote_cluster_client" + } + }, + "type": "value" + } + ] + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 11, + "maxDataPoints": 100, + "options": { + "legend": false, + "showValue": "never" + }, + "pluginVersion": "v11.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "\n max by (job,cluster,opensearch_cluster,instance,node,role) (\n max_over_time(opensearch_node_role_bool{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\", role=\"data\"}[1m]) == 1\n)\n * 2", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / data", + "refId": "Node role: data" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "\n max by (job,cluster,opensearch_cluster,instance,node,role) (\n max_over_time(opensearch_node_role_bool{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\", role=\"master\"}[1m]) == 1\n)\n * 3", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / master", + "refId": "Node role: master" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "\n max by (job,cluster,opensearch_cluster,instance,node,role) (\n max_over_time(opensearch_node_role_bool{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\", role=\"ingest\"}[1m]) == 1\n)\n * 4", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / ingest", + "refId": "Node role: ingest" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "\n max by (job,cluster,opensearch_cluster,instance,node,role) (\n max_over_time(opensearch_node_role_bool{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\", role=\"cluster_manager\"}[1m]) == 1\n)\n * 5", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / cluster_manager", + "refId": "Node role: cluster_manager" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "expr": "\n max by (job,cluster,opensearch_cluster,instance,node,role) (\n max_over_time(opensearch_node_role_bool{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\", role=\"remote_cluster_client\"}[1m]) == 1\n)\n * 6", + "format": "time_series", + "instant": false, + "legendFormat": "{{ node }} / remote_client", + "refId": "Node role: remote_cluster_client" + } + ], + "title": "Roles timeline", + "type": "status-history" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 0, + "x": 0, + "y": 26 + }, + "id": 12, "panels": [ ], "title": "Node JVM", "type": "row" @@ -764,7 +636,6 @@ "defaults": { "custom": { "fillOpacity": 5, - "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -776,19 +647,20 @@ "h": 8, "w": 6, "x": 0, - "y": 35 + "y": 27 }, - "id": 14, - "pluginVersion": "v11.4.0", + "id": 13, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_mem_heap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n opensearch_jvm_mem_heap_used_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}\n)", "format": "time_series", "instant": false, + "intervalFactor": 2, "legendFormat": "{{node}}: JVM heap used", "refId": "JVM heap used" }, @@ -797,9 +669,10 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_mem_heap_committed_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n opensearch_jvm_mem_heap_committed_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}\n)", "format": "time_series", "instant": false, + "intervalFactor": 2, "legendFormat": "{{node}}: JVM heap committed", "refId": "JVM heap committed" } @@ -817,7 +690,6 @@ "defaults": { "custom": { "fillOpacity": 5, - "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -829,19 +701,20 @@ "h": 8, "w": 6, "x": 6, - "y": 35 + "y": 27 }, - "id": 15, - "pluginVersion": "v11.4.0", + "id": 14, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_mem_nonheap_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n opensearch_jvm_mem_nonheap_used_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}\n)", "format": "time_series", "instant": false, + "intervalFactor": 2, "legendFormat": "{{node}}: JVM non-heap used", "refId": "JVM non-heap used" }, @@ -850,9 +723,10 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_mem_nonheap_committed_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n opensearch_jvm_mem_nonheap_committed_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}\n)", "format": "time_series", "instant": false, + "intervalFactor": 2, "legendFormat": "{{node}}: JVM non-heap committed", "refId": "JVM non-heap committed" } @@ -870,7 +744,6 @@ "defaults": { "custom": { "fillOpacity": 5, - "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -882,19 +755,20 @@ "h": 8, "w": 6, "x": 12, - "y": 35 + "y": 27 }, - "id": 16, - "pluginVersion": "v11.4.0", + "id": 15, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_threads_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n opensearch_jvm_threads_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}\n)", "format": "time_series", "instant": false, + "intervalFactor": 2, "legendFormat": "{{node}}: JVM threads", "refId": "JVM threads" } @@ -912,7 +786,6 @@ "defaults": { "custom": { "fillOpacity": 5, - "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -924,20 +797,21 @@ "h": 8, "w": 6, "x": 18, - "y": 35 + "y": 27 }, - "id": 17, - "pluginVersion": "v11.4.0", + "id": 16, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster, bufferpool) (opensearch_jvm_bufferpool_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node,bufferpool) (\n opensearch_jvm_bufferpool_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}\n)", "format": "time_series", "instant": false, - "legendFormat": "{{ bufferpool }}", + "intervalFactor": 2, + "legendFormat": "{{ node }} - {{ bufferpool }}", "refId": "JVM buffer pools" } ], @@ -954,7 +828,6 @@ "defaults": { "custom": { "fillOpacity": 5, - "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -966,19 +839,20 @@ "h": 8, "w": 6, "x": 0, - "y": 43 + "y": 35 }, - "id": 18, - "pluginVersion": "v11.4.0", + "id": 17, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster) (opensearch_jvm_uptime_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n opensearch_jvm_uptime_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}\n)", "format": "time_series", "instant": false, + "intervalFactor": 2, "legendFormat": "{{node}}: JVM uptime", "refId": "JVM uptime" } @@ -996,7 +870,6 @@ "defaults": { "custom": { "fillOpacity": 5, - "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1008,19 +881,21 @@ "h": 8, "w": 6, "x": 6, - "y": 43 + "y": 35 }, - "id": 19, - "pluginVersion": "v11.4.0", + "id": 18, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster) (increase(opensearch_jvm_gc_collection_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n increase(opensearch_jvm_gc_collection_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", "instant": false, + "interval": "2m", + "intervalFactor": 2, "legendFormat": "{{node}}: JVM GC collections", "refId": "JVM GC collections" } @@ -1038,9 +913,9 @@ "defaults": { "custom": { "fillOpacity": 5, - "gradientMode": "scheme", "lineInterpolation": "smooth", - "lineWidth": 2 + "lineWidth": 2, + "showPoints": "never" }, "unit": "ms" } @@ -1049,19 +924,21 @@ "h": 8, "w": 6, "x": 12, - "y": 43 + "y": 35 }, - "id": 20, - "pluginVersion": "v11.4.0", + "id": 19, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster) (increase(opensearch_jvm_gc_collection_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}[$__interval:]))", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n increase(opensearch_jvm_gc_collection_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", "instant": false, + "interval": "2m", + "intervalFactor": 2, "legendFormat": "{{node}}: JVM GC time", "refId": "JVM GC time" } @@ -1079,7 +956,6 @@ "defaults": { "custom": { "fillOpacity": 5, - "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1091,20 +967,21 @@ "h": 8, "w": 6, "x": 18, - "y": 43 + "y": 35 }, - "id": 21, - "pluginVersion": "v11.4.0", + "id": 20, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "100 * (sum by (job,opensearch_cluster, bufferpool) (opensearch_jvm_bufferpool_used_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})) / clamp_min((sum by (job, bufferpool, cluster) (opensearch_jvm_bufferpool_total_capacity_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})),1)", + "expr": "100 * (sum by (job,cluster,opensearch_cluster,instance,node,bufferpool) (opensearch_jvm_bufferpool_used_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"})) / clamp_min((sum by (job,cluster,opensearch_cluster,instance,node,bufferpool) (opensearch_jvm_bufferpool_total_capacity_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"})),1)", "format": "time_series", "instant": false, - "legendFormat": "{{ bufferpool }}", + "intervalFactor": 2, + "legendFormat": "{{ node }} - {{ bufferpool }}", "refId": "JVM bufferpool used %%" } ], @@ -1117,9 +994,9 @@ "h": 1, "w": 0, "x": 0, - "y": 51 + "y": 43 }, - "id": 22, + "id": 21, "panels": [ ], "title": "Thread pools", "type": "row" @@ -1134,7 +1011,6 @@ "defaults": { "custom": { "fillOpacity": 5, - "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1146,19 +1022,20 @@ "h": 8, "w": 12, "x": 0, - "y": 52 + "y": 44 }, - "id": 23, - "pluginVersion": "v11.4.0", + "id": 22, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster) ((opensearch_threadpool_threads_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"}))", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n opensearch_threadpool_threads_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}\n)", "format": "time_series", "instant": false, + "intervalFactor": 2, "legendFormat": "{{node}}: Threadpool threads", "refId": "Threadpool threads" } @@ -1176,7 +1053,6 @@ "defaults": { "custom": { "fillOpacity": 5, - "gradientMode": "scheme", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1188,19 +1064,20 @@ "h": 8, "w": 12, "x": 12, - "y": 52 + "y": 44 }, - "id": 24, - "pluginVersion": "v11.4.0", + "id": 23, + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (job,opensearch_cluster) (opensearch_threadpool_tasks_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\"})", + "expr": "sum by (job,cluster,opensearch_cluster,instance,node) (\n opensearch_threadpool_tasks_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",node=~\"$node\"}\n)", "format": "time_series", "instant": false, + "intervalFactor": 2, "legendFormat": "{{node}}: Threadpool tasks", "refId": "Threadpool tasks" } @@ -1233,7 +1110,22 @@ "label": "Job", "multi": true, "name": "job", - "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\"}, job)", + "query": "label_values(opensearch_cluster_status{job=\"integrations/opensearch\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Cluster", + "multi": true, + "name": "cluster", + "query": "label_values(opensearch_cluster_status{job=\"integrations/opensearch\",job=~\"$job\"}, cluster)", "refresh": 2, "sort": 1, "type": "query" @@ -1248,7 +1140,7 @@ "label": "Opensearch_cluster", "multi": true, "name": "opensearch_cluster", - "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "query": "label_values(opensearch_cluster_status{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\"}, opensearch_cluster)", "refresh": 2, "sort": 1, "type": "query" @@ -1260,10 +1152,10 @@ "uid": "${prometheus_datasource}" }, "includeAll": true, - "label": "Node", + "label": "Instance", "multi": true, - "name": "node", - "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", + "name": "instance", + "query": "label_values(opensearch_cluster_status{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}, instance)", "refresh": 2, "sort": 1, "type": "query" @@ -1275,6 +1167,17 @@ "query": "loki", "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "multi": true, + "name": "node", + "query": "label_values(opensearch_os_cpu_percent{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\"}, node)", + "refresh": 2, + "type": "query" } ] }, @@ -1283,6 +1186,6 @@ "to": "now" }, "timezone": "default", - "title": "OpenSearch Node Overview", + "title": "OpenSearch node overview", "uid": "opensearch-node-overview" } \ No newline at end of file diff --git a/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json b/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json index 4ead34a3c..ceabef8f7 100644 --- a/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json @@ -7,25 +7,21 @@ "links": [ { "keepTime": true, - "title": "Opensearch Logs", + "title": "Opensearch logs", "type": "link", "url": "/d/opensearch-logs" }, { "keepTime": true, - "title": "Opensearch Cluster Overview", + "title": "Opensearch cluster overview", "type": "link", "url": "/d/opensearch-cluster-overview" }, { - "asDropdown": true, - "includeVars": true, "keepTime": true, - "tags": [ - "opensearch" - ], - "title": "All dashboards", - "type": "dashboards" + "title": "Opensearch node overview", + "type": "link", + "url": "/d/opensearch-node-overview" } ], "panels": [ @@ -39,7 +35,7 @@ }, "id": 1, "panels": [ ], - "title": "Search Performance", + "title": "Request performance", "type": "row" }, { @@ -50,6 +46,11 @@ "description": "Rate of fetch, scroll, and query requests by selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "reqps" }, "overrides": [ @@ -87,14 +88,14 @@ "mode": "multi" } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_search_query_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_search_query_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=~\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -106,7 +107,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_search_fetch_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_search_fetch_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=~\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -118,7 +119,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_search_scroll_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=~\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_search_scroll_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=~\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -137,6 +138,11 @@ "description": "Latency of fetch, scroll, and query requests by selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "s" }, "overrides": [ @@ -174,17 +180,17 @@ "mode": "multi" } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_query_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\"}[$__interval:]) / clamp_min(increase(opensearch_index_search_query_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]), 1))", + "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_search_query_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\"}[$__interval:] offset $__interval) / \n clamp_min(increase(opensearch_index_search_query_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset $__interval), 1)\n)", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}} - query", "refId": "Search query latency (avg)" @@ -194,10 +200,10 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_fetch_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_search_fetch_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]), 1))", + "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_search_fetch_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset $__interval) / \n clamp_min(increase(opensearch_index_search_fetch_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset $__interval), 1)\n)", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}} - fetch", "refId": "Search fetch latency (avg)" @@ -207,10 +213,10 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_scroll_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_search_scroll_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]), 1))", + "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_search_scroll_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset $__interval) / \n clamp_min(increase(opensearch_index_search_scroll_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset $__interval), 1)\n)", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}} - scroll", "refId": "Search scroll latency (avg)" @@ -227,6 +233,11 @@ "description": "Ratio of query cache and request cache hits and misses.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "percent" }, "overrides": [ @@ -264,14 +275,14 @@ "mode": "multi" } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (100 * (opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}) / clamp_min(opensearch_index_requestcache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"} + opensearch_index_requestcache_miss_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}, 1))", + "expr": "avg by(job,opensearch_cluster,index) (\n 100 * (opensearch_index_requestcache_hit_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}) / \n clamp_min(opensearch_index_requestcache_hit_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"} + \n opensearch_index_requestcache_miss_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}, 1)\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -283,7 +294,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (100 * (opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}) / clamp_min(opensearch_index_querycache_hit_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"} + opensearch_index_querycache_miss_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}, 1))", + "expr": "avg by(job,opensearch_cluster,index) (\n 100 * (opensearch_index_querycache_hit_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}) / \n clamp_min(opensearch_index_querycache_hit_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"} + \n opensearch_index_querycache_miss_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}, 1)\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -302,6 +313,11 @@ "description": "Total evictions count by cache type for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -351,17 +367,17 @@ "mode": "multi" } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_querycache_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n increase(opensearch_index_querycache_evictions_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}} - query cache", "refId": "Query cache evictions" @@ -371,10 +387,10 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_requestcache_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n increase(opensearch_index_requestcache_evictions_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}} - request cache", "refId": "Request cache evictions" @@ -384,10 +400,10 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_fielddata_evictions_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n increase(opensearch_index_fielddata_evictions_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}} - field data", "refId": "Fielddata evictions" @@ -406,7 +422,7 @@ }, "id": 6, "panels": [ ], - "title": "Indexing Performance", + "title": "Indexing performance", "type": "row" }, { @@ -417,6 +433,11 @@ "description": "Rate of indexed documents for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "documents/s" }, "overrides": [ @@ -449,14 +470,14 @@ "y": 10 }, "id": 7, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_indexing_index_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\",context=\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_indexing_index_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\",context=\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -475,6 +496,11 @@ "description": "Document indexing latency for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -497,18 +523,19 @@ "y": 10 }, "id": 8, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster) (increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=~\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=~\"total\"}[$__interval:]),1))", + "expr": "avg by(job,cluster,opensearch_cluster) (\n increase(opensearch_index_indexing_index_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=~\"total\"}[$__interval:] offset $__interval) / \n clamp_min(increase(opensearch_index_indexing_index_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=~\"total\"}[$__interval:] offset $__interval), 1)\n)", "format": "time_series", "instant": false, - "interval": "1m", - "legendFormat": "{{node}}: Indexing latency (avg)", + "interval": "2m", + "intervalFactor": 2, + "legendFormat": "{{index}}: Indexing latency (avg)", "refId": "Indexing latency (avg)" } ], @@ -523,6 +550,11 @@ "description": "Number of indexing failures for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -567,17 +599,17 @@ "y": 10 }, "id": 9, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n increase(opensearch_index_indexing_index_failed_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\",context=\"total\"}[$__interval:] offset -$__interval)\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n increase(opensearch_index_indexing_index_failed_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\",context=\"total\"}[$__interval:] offset -$__interval)\n)", "format": "time_series", "instant": false, - "interval": "1m", + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}}", "refId": "Indexing failed (avg)" @@ -594,6 +626,11 @@ "description": "Index flush latency for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -638,16 +675,17 @@ "y": 10 }, "id": 10, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_flush_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_flush_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]),1))", + "expr": "avg by(job,cluster,opensearch_cluster,index) (\n increase(opensearch_index_flush_total_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset $__interval) / \n clamp_min(increase(opensearch_index_flush_total_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset $__interval), 1)\n)", "format": "time_series", "instant": false, + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}}", "refId": "Flush latency (avg)" @@ -665,7 +703,10 @@ "fieldConfig": { "defaults": { "custom": { - "drawStyle": "points" + "drawStyle": "points", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "thresholds": { "steps": [ @@ -716,16 +757,17 @@ "mode": "multi" } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", + "expr": "(\n avg by (job,cluster,opensearch_cluster,instance,index) (\n increase(opensearch_index_merges_total_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)\n) > 0", "format": "time_series", "instant": false, + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}} - total", "refId": "Merge time increase" @@ -735,9 +777,10 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_stopped_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", + "expr": "(\n avg by (job,cluster,opensearch_cluster,instance,index) (\n increase(opensearch_index_merges_total_stopped_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)\n) > 0", "format": "time_series", "instant": false, + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}} - stopped", "refId": "Merge stopped time increase" @@ -747,9 +790,10 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_throttled_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:])) > 0", + "expr": "(\n avg by (job,cluster,opensearch_cluster,instance,index) (\n increase(opensearch_index_merges_total_throttled_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)\n) > 0", "format": "time_series", "instant": false, + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}} - throttled", "refId": "Merge throttled time increase" @@ -766,6 +810,11 @@ "description": "Index refresh latency for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -810,16 +859,17 @@ "y": 18 }, "id": 12, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (increase(opensearch_index_refresh_total_time_seconds{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]) / clamp_min(increase(opensearch_index_refresh_total_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}[$__interval:]),1))", + "expr": "avg by(job,opensearch_cluster,index) (\n increase(opensearch_index_refresh_total_time_seconds{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset $__interval) / \n clamp_min(increase(opensearch_index_refresh_total_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset $__interval), 1)\n)", "format": "time_series", "instant": false, + "interval": "2m", "intervalFactor": 2, "legendFormat": "{{index}}", "refId": "Refresh latency (avg)" @@ -836,6 +886,11 @@ "description": "Current number of translog operations for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -880,14 +935,14 @@ "y": 18 }, "id": 13, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_translog_operations_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_translog_operations_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -906,6 +961,11 @@ "description": "Rate of documents deleted for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -950,14 +1010,14 @@ "y": 18 }, "id": 14, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_indexing_delete_current_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_indexing_delete_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -978,7 +1038,7 @@ }, "id": 15, "panels": [ ], - "title": "Index Capacity", + "title": "Index capacity", "type": "row" }, { @@ -989,6 +1049,11 @@ "description": "Number of indexed documents for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -1033,14 +1098,14 @@ "y": 27 }, "id": 16, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_indexing_index_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_indexing_index_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1059,6 +1124,11 @@ "description": "Current number of segments for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -1103,14 +1173,14 @@ "y": 27 }, "id": 17, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_segments_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_segments_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1130,7 +1200,10 @@ "fieldConfig": { "defaults": { "custom": { - "drawStyle": "points" + "drawStyle": "points", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "thresholds": { "steps": [ @@ -1176,18 +1249,18 @@ "y": 27 }, "id": 18, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster) (increase(opensearch_index_merges_total_docs_count{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=\"total\"}[$__interval:])) > 0", + "expr": "(\n avg by (job,cluster,opensearch_cluster,instance,index) (\n increase(opensearch_index_merges_total_docs_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}[$__interval:] offset -$__interval)\n)\n) > 0", "format": "time_series", "instant": false, "intervalFactor": 2, - "legendFormat": "{{node}}: Merge docs increase", + "legendFormat": "{{index}}: Merge docs increase", "refId": "Merge docs increase" } ], @@ -1202,6 +1275,11 @@ "description": "Size of query cache and request cache.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -1251,14 +1329,14 @@ "mode": "multi" } }, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster) (\n opensearch_index_querycache_memory_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster) (\n opensearch_index_querycache_memory_size_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1270,7 +1348,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster) (\n opensearch_index_requestcache_memory_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", context=\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster) (\n opensearch_index_requestcache_memory_size_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1289,6 +1367,11 @@ "description": "Size of the store in bytes for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -1333,14 +1416,14 @@ "y": 35 }, "id": 20, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_store_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_store_size_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1359,6 +1442,11 @@ "description": "Memory used by segments for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -1403,14 +1491,14 @@ "y": 35 }, "id": 21, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,opensearch_cluster,index) (\n opensearch_index_segments_memory_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_segments_memory_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1430,7 +1518,10 @@ "fieldConfig": { "defaults": { "custom": { - "drawStyle": "points" + "drawStyle": "points", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" }, "thresholds": { "steps": [ @@ -1476,14 +1567,14 @@ "y": 35 }, "id": 22, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by(job,opensearch_cluster,index) (opensearch_index_merges_current_size_bytes{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",index=~\"$index\", context=\"total\"}) > 0", + "expr": "(\n avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_merges_current_size_bytes{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}\n)\n) > 0", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1502,6 +1593,11 @@ "description": "The number of index shards for the selected index.", "fieldConfig": { "defaults": { + "custom": { + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "thresholds": { "steps": [ { @@ -1546,14 +1642,14 @@ "y": 35 }, "id": 23, - "pluginVersion": "v11.4.0", + "pluginVersion": "v11.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "sum by (index) (avg by(job,opensearch_cluster) (opensearch_index_shards_number{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\",node=~\"$node\", type=~\"active|active_primary\"}))", + "expr": "sum by (index) (\n avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_shards_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", type=~\"active|active_primary\"}\n)\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1589,7 +1685,22 @@ "label": "Job", "multi": true, "name": "job", - "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\"}, job)", + "query": "label_values(opensearch_cluster_status{job=\"integrations/opensearch\"}, job)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "includeAll": true, + "label": "Cluster", + "multi": true, + "name": "cluster", + "query": "label_values(opensearch_cluster_status{job=\"integrations/opensearch\",job=~\"$job\"}, cluster)", "refresh": 2, "sort": 1, "type": "query" @@ -1604,7 +1715,7 @@ "label": "Opensearch_cluster", "multi": true, "name": "opensearch_cluster", - "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\"}, opensearch_cluster)", + "query": "label_values(opensearch_cluster_status{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\"}, opensearch_cluster)", "refresh": 2, "sort": 1, "type": "query" @@ -1616,10 +1727,10 @@ "uid": "${prometheus_datasource}" }, "includeAll": true, - "label": "Node", + "label": "Instance", "multi": true, - "name": "node", - "query": "label_values(opensearch_cluster_status{opensearch_cluster!=\"\",job=~\"$job\",opensearch_cluster=~\"$opensearch_cluster\"}, node)", + "name": "instance", + "query": "label_values(opensearch_cluster_status{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}, instance)", "refresh": 2, "sort": 1, "type": "query" @@ -1631,6 +1742,17 @@ "query": "loki", "regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+", "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${prometheus_datasource}" + }, + "multi": true, + "name": "index", + "query": "label_values(opensearch_index_search_fetch_count{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\"}, index)", + "refresh": 2, + "type": "query" } ] }, @@ -1639,6 +1761,6 @@ "to": "now" }, "timezone": "default", - "title": "OpenSearch Search and Index Overview", + "title": "OpenSearch search and index overview", "uid": "opensearch-search-and-index-overview" } \ No newline at end of file diff --git a/opensearch-mixin/g.libsonnet b/opensearch-mixin/g.libsonnet index e6a2060ee..f89dcc064 100644 --- a/opensearch-mixin/g.libsonnet +++ b/opensearch-mixin/g.libsonnet @@ -1 +1 @@ -import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet' +import 'github.com/grafana/grafonnet/gen/grafonnet-v11.0.0/main.libsonnet' diff --git a/opensearch-mixin/jsonnetfile.json b/opensearch-mixin/jsonnetfile.json index 7205eeac9..ed5b224a1 100644 --- a/opensearch-mixin/jsonnetfile.json +++ b/opensearch-mixin/jsonnetfile.json @@ -1,51 +1,43 @@ { - "version": 1, - "dependencies": [ - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" - } - }, - "version": "master" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet.git", - "subdir": "gen/grafonnet-v11.4.0" - } - }, - "version": "main" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/jsonnet-libs.git", - "subdir": "common-lib" - } - }, - "version": "master" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/jsonnet-libs.git", - "subdir": "grafana-cloud-integration-utils" - } - }, - "version": "master" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/jsonnet-libs.git", - "subdir": "logs-lib" - } - }, - "version": "master" - } - ], - "legacyImports": true -} + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet-lib.git", + "subdir": "grafonnet" + } + }, + "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "common-lib" + } + }, + "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "grafana-cloud-integration-utils" + } + }, + "version": "master" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs.git", + "subdir": "logs-lib" + } + }, + "version": "master" + } + ], + "legacyImports": true + } + \ No newline at end of file diff --git a/opensearch-mixin/links.libsonnet b/opensearch-mixin/links.libsonnet index 867b91809..f5f5dec0a 100644 --- a/opensearch-mixin/links.libsonnet +++ b/opensearch-mixin/links.libsonnet @@ -5,28 +5,20 @@ local g = import './g.libsonnet'; new(this): { opensearchClusterOverview: - link.link.new('Opensearch Cluster Overview', '/d/' + this.grafana.dashboards['opensearch-cluster-overview.json'].uid) + link.link.new('Opensearch cluster overview', '/d/' + this.grafana.dashboards['opensearch-cluster-overview.json'].uid) + link.link.options.withKeepTime(true), - // opensearchNodeOverview: - // link.link.new('Opensearch Node Overview', '/d/' + this.grafana.dashboards['opensearch-node-overview.json'].uid) - // + link.link.options.withKeepTime(true), - - // opensearchSearchAndIndexOverview: - // link.link.new('Opensearch Search and Index Overview', '/d/' + this.grafana.dashboards['opensearch-search-and-index-overview.json'].uid) - // + link.link.options.withKeepTime(true), + opensearchNodeOverview: + link.link.new('Opensearch node overview', '/d/' + this.grafana.dashboards['opensearch-node-overview.json'].uid) + + link.link.options.withKeepTime(true), - otherDashboards: - link.dashboards.new('All dashboards', this.config.dashboardTags) - + link.dashboards.options.withIncludeVars(true) - + link.dashboards.options.withKeepTime(true) - + link.dashboards.options.withAsDropdown(true), - } - + - if this.config.enableLokiLogs then + opensearchSearchAndIndexOverview: + link.link.new('Opensearch search and index overview', '/d/' + this.grafana.dashboards['opensearch-search-and-index-overview.json'].uid) + + link.link.options.withKeepTime(true), + } + if this.config.enableLokiLogs then { logs: - link.link.new('Opensearch Logs', '/d/' + this.grafana.dashboards['opensearch-logs.json'].uid) + link.link.new('Opensearch logs', '/d/' + this.grafana.dashboards['opensearch-logs.json'].uid) + link.link.options.withKeepTime(true), } else {}, diff --git a/opensearch-mixin/main.libsonnet b/opensearch-mixin/main.libsonnet index 598ae832b..ddd83d3c5 100644 --- a/opensearch-mixin/main.libsonnet +++ b/opensearch-mixin/main.libsonnet @@ -15,6 +15,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; new(): { local this = self, config: config, + signals: { [sig]: commonlib.signals.unmarshallJsonMulti( diff --git a/opensearch-mixin/panels.libsonnet b/opensearch-mixin/panels.libsonnet index 1b2e9e165..cef7b973a 100644 --- a/opensearch-mixin/panels.libsonnet +++ b/opensearch-mixin/panels.libsonnet @@ -1,18 +1,16 @@ local g = import './g.libsonnet'; local var = g.dashboard.variable; -local commonlib = import 'common-lib/common/main.libsonnet'; -local utils = commonlib.utils; { new(this):: { local signals = this.signals, - osRoles: + clusterOSRoles: g.panel.table.new('Roles') + g.panel.table.panelOptions.withDescription('OpenSearch node roles.') + g.panel.table.queryOptions.withTargets([ - signals.roles.node_role_last_seen.asTarget() + signals.clusterOverview.node_role_last_seen.asTarget() + g.query.prometheus.withInstant(true), ]) + g.panel.table.queryOptions.withTransformations([ @@ -50,18 +48,18 @@ local utils = commonlib.utils; + g.panel.table.fieldOverride.byRegexp.withProperty('custom.cellOptions', {type: 'color-text'}), ]), - osRolesTimeline: + clusterOSRolesTimeline: g.panel.statusHistory.new('Roles timeline') + g.panel.statusHistory.panelOptions.withDescription('OpenSearch node roles over time.') + g.panel.statusHistory.options.withShowValue('never') + g.panel.statusHistory.options.withLegend(false) + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) + g.panel.statusHistory.queryOptions.withTargets([ - signals.roles.node_role_data.asTarget(), - signals.roles.node_role_master.asTarget(), - signals.roles.node_role_ingest.asTarget(), - signals.roles.node_role_cluster_manager.asTarget(), - signals.roles.node_role_remote_cluster_client.asTarget(), + signals.clusterOverview.node_role_data.asTarget(), + signals.clusterOverview.node_role_master.asTarget(), + signals.clusterOverview.node_role_ingest.asTarget(), + signals.clusterOverview.node_role_cluster_manager.asTarget(), + signals.clusterOverview.node_role_remote_cluster_client.asTarget(), ]) + g.panel.statusHistory.standardOptions.withMappings([ { @@ -81,7 +79,7 @@ local utils = commonlib.utils; g.panel.stat.new('Cluster status') + g.panel.stat.panelOptions.withDescription('The overall health and availability of the OpenSearch cluster.') + g.panel.stat.queryOptions.withTargets([ - signals.cluster.cluster_status.asTarget() + signals.clusterOverview.cluster_status.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.stat.standardOptions.color.withMode('thresholds') @@ -103,13 +101,16 @@ local utils = commonlib.utils; g.panel.stat.standardOptions.threshold.step.withColor('red') + g.panel.stat.standardOptions.threshold.step.withValue(2), ]) - + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']) + // + g.panel.stat.standardOptions.graphMode.withMode('none'), + + g.panel.stat.options.withGraphMode('none'), + nodeCountPanel: g.panel.stat.new('Node count') + g.panel.stat.panelOptions.withDescription('The number of running nodes across the OpenSearch cluster.') + g.panel.stat.queryOptions.withTargets([ - signals.cluster.cluster_nodes_number.asTarget() + signals.clusterOverview.cluster_nodes_number.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.stat.standardOptions.color.withMode('thresholds') @@ -121,13 +122,15 @@ local utils = commonlib.utils; g.panel.stat.standardOptions.threshold.step.withColor('green') + g.panel.stat.standardOptions.threshold.step.withValue(1), ]) - + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.stat.options.withGraphMode('none'), + dataNodeCountPanel: g.panel.stat.new('Data node count') + g.panel.stat.panelOptions.withDescription('The number of data nodes in the OpenSearch cluster.') + g.panel.stat.queryOptions.withTargets([ - signals.cluster.cluster_datanodes_number.asTarget() + signals.clusterOverview.cluster_datanodes_number.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.stat.standardOptions.color.withMode('thresholds') @@ -139,13 +142,14 @@ local utils = commonlib.utils; g.panel.stat.standardOptions.threshold.step.withColor('green') + g.panel.stat.standardOptions.threshold.step.withValue(1), ]) - + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.stat.options.withGraphMode('none'), shardCountPanel: g.panel.stat.new('Shard count') + g.panel.stat.panelOptions.withDescription('The number of shards in the OpenSearch cluster across all indices.') + g.panel.stat.queryOptions.withTargets([ - signals.cluster.cluster_shards_number_total.withExprWrappersMixin(['sum(', ')']).asTarget() + signals.clusterOverview.cluster_shards_number_total.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.stat.standardOptions.color.withMode('thresholds') @@ -157,13 +161,14 @@ local utils = commonlib.utils; g.panel.stat.standardOptions.threshold.step.withColor('green') + g.panel.stat.standardOptions.threshold.step.withValue(1), ]) - + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.stat.options.withGraphMode('none'), activeShardsPercentagePanel: g.panel.stat.new('Active shards %') + g.panel.stat.panelOptions.withDescription('Percent of active shards across the OpenSearch cluster.') + g.panel.stat.queryOptions.withTargets([ - signals.cluster.cluster_shards_active_percent.asTarget() + signals.clusterOverview.cluster_shards_active_percent.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.stat.standardOptions.color.withMode('thresholds') @@ -178,13 +183,14 @@ local utils = commonlib.utils; + g.panel.stat.standardOptions.threshold.step.withValue(100), ]) + g.panel.stat.standardOptions.withUnit('percent') - + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']), + + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.stat.options.withGraphMode('none'), topNodesByCPUUsagePanel: g.panel.barGauge.new('Top nodes by CPU usage') + g.panel.barGauge.panelOptions.withDescription('Top nodes by OS CPU usage across the OpenSearch cluster.') + g.panel.barGauge.queryOptions.withTargets([ - signals.topk.os_cpu_percent_topk.withExprWrappersMixin(['topk(10, sort_desc(', ')']).asTarget() + signals.clusterOverview.os_cpu_percent_topk.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.barGauge.standardOptions.color.withMode('thresholds') @@ -197,14 +203,15 @@ local utils = commonlib.utils; + g.panel.barGauge.standardOptions.withMin(0) + g.panel.barGauge.standardOptions.withMax(100) + g.panel.barGauge.standardOptions.withUnit('percent') - + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']), + + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.barGauge.options.withOrientation('horizontal'), breakersTrippedPanel: g.panel.barGauge.new('Breakers tripped') + g.panel.barGauge.panelOptions.withDescription('The total count of circuit breakers tripped across the OpenSearch cluster.') + g.panel.barGauge.queryOptions.withTargets([ - signals.topk.circuitbreaker_tripped_count_sum.asTarget() - + g.query.prometheus.withInterval('1m') + signals.clusterOverview.circuitbreaker_tripped_count_sum.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.barGauge.standardOptions.color.withMode('thresholds') @@ -215,13 +222,14 @@ local utils = commonlib.utils; + g.panel.barGauge.standardOptions.threshold.step.withValue(80), ]) + g.panel.barGauge.standardOptions.withUnit('trips') - + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']), + + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.barGauge.options.withOrientation('horizontal'), shardStatusPanel: g.panel.barGauge.new('Shard status') + g.panel.barGauge.panelOptions.withDescription('Shard status counts across the OpenSearch cluster.') + g.panel.barGauge.queryOptions.withTargets([ - signals.cluster.cluster_shards_number_by_type.asTarget() + signals.clusterOverview.cluster_shards_number_by_type.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.barGauge.standardOptions.color.withMode('thresholds') @@ -232,13 +240,14 @@ local utils = commonlib.utils; + g.panel.barGauge.standardOptions.threshold.step.withValue(80), ]) + g.panel.barGauge.standardOptions.withUnit('shards') - + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']), + + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.barGauge.options.withOrientation('horizontal'), topNodesByDiskUsagePanel: g.panel.barGauge.new('Top nodes by disk usage') + g.panel.barGauge.panelOptions.withDescription('Top nodes by disk usage across the OpenSearch cluster.') + g.panel.barGauge.queryOptions.withTargets([ - signals.topk.fs_path_used_percent_topk.asTarget() + signals.clusterOverview.fs_path_used_percent_topk.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.barGauge.standardOptions.color.withMode('thresholds') @@ -251,70 +260,92 @@ local utils = commonlib.utils; + g.panel.barGauge.standardOptions.withMin(0) + g.panel.barGauge.standardOptions.withMax(100) + g.panel.barGauge.standardOptions.withUnit('percent') - + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']), + + g.panel.barGauge.options.reduceOptions.withCalcs(['lastNotNull']) + + g.panel.barGauge.options.withOrientation('horizontal'), totalDocumentsPanel: g.panel.timeSeries.new('Total documents') + g.panel.timeSeries.panelOptions.withDescription('The total count of documents indexed across the OpenSearch cluster.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.cluster.indices_indexing_index_count_avg.asTarget() + signals.clusterOverview.indices_indexing_index_count_avg.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) - + g.panel.timeSeries.standardOptions.withUnit('documents'), + + g.panel.timeSeries.standardOptions.withUnit('documents') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), pendingTasksPanel: g.panel.timeSeries.new('Pending tasks') + g.panel.timeSeries.panelOptions.withDescription('The number of tasks waiting to be executed across the OpenSearch cluster.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.cluster.cluster_pending_tasks_number.asTarget() + signals.clusterOverview.cluster_pending_tasks_number.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) - + g.panel.timeSeries.standardOptions.withUnit('tasks'), + + g.panel.timeSeries.standardOptions.withUnit('tasks') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), storeSizePanel: g.panel.timeSeries.new('Store size') + g.panel.timeSeries.panelOptions.withDescription('The total size of the store across the OpenSearch cluster.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.cluster.indices_store_size_bytes_avg.asTarget() + signals.clusterOverview.indices_store_size_bytes_avg.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) - + g.panel.timeSeries.standardOptions.withUnit('bytes'), + + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), maxTaskWaitTimePanel: g.panel.timeSeries.new('Max task wait time') + g.panel.timeSeries.panelOptions.withDescription('The max wait time for tasks to be executed across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withTargets([signals.cluster.cluster_task_max_wait_seconds.asTarget()]) - + g.panel.timeSeries.standardOptions.withUnit('s'), + + g.panel.timeSeries.queryOptions.withTargets([signals.clusterOverview.cluster_task_max_wait_seconds.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), topIndicesByRequestRatePanel: g.panel.timeSeries.new('Top indices by request rate') + g.panel.timeSeries.panelOptions.withDescription('Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withTargets([signals.topk.search_current_inflight_topk.asTarget()]) - + g.panel.timeSeries.standardOptions.withUnit('reqps'), + + g.panel.timeSeries.queryOptions.withTargets([signals.clusterOverview.search_current_inflight_topk.asTarget()]) + + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), topIndicesByRequestLatencyPanel: g.panel.timeSeries.new('Top indices by request latency') + g.panel.timeSeries.panelOptions.withDescription('Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.topk.search_avg_latency_topk.asTarget() - + g.query.prometheus.withInterval('1m'), + signals.clusterOverview.search_avg_latency_topk.asTarget() + + g.query.prometheus.withInterval('2m'), ]) - + g.panel.timeSeries.standardOptions.withUnit('s'), + + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), topIndicesByCombinedCacheHitRatioPanel: g.panel.timeSeries.new('Top indices by combined cache hit ratio') + g.panel.timeSeries.panelOptions.withDescription('Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.topk.request_query_cache_hit_rate_topk.asTarget() + signals.clusterOverview.request_query_cache_hit_rate_topk.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) - + g.panel.timeSeries.standardOptions.withUnit('percent'), + + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), topNodesByIngestRatePanel: g.panel.timeSeries.new('Top nodes by ingest rate') + g.panel.timeSeries.panelOptions.withDescription('Top nodes by rate of ingest across the OpenSearch cluster.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.topk.ingest_throughput_topk.asTarget() + signals.clusterOverview.ingest_throughput_topk.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('Bps'), @@ -323,64 +354,77 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Top nodes by ingest latency') + g.panel.timeSeries.panelOptions.withDescription('Top nodes by ingestion latency across the OpenSearch cluster.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.topk.ingest_latency_topk.asTarget() - + g.query.prometheus.withInterval('1m') + signals.clusterOverview.ingest_latency_topk.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), ]) + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withUnit('s'), topNodesByIngestErrorsPanel: g.panel.timeSeries.new('Top nodes by ingest errors') + g.panel.timeSeries.panelOptions.withDescription('Top nodes by ingestion failures across the OpenSearch cluster.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.topk.ingest_failures_topk.asTarget() - + g.query.prometheus.withInterval('1m') + signals.clusterOverview.ingest_failures_topk.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), ]) + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withUnit('errors'), topIndicesByIndexRatePanel: g.panel.timeSeries.new('Top indices by index rate') + g.panel.timeSeries.panelOptions.withDescription('Top indices by rate of document indexing across the OpenSearch cluster.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.topk.indexing_current_topk.asTarget() + signals.clusterOverview.indexing_current_topk.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withUnit('documents/s'), topIndicesByIndexLatencyPanel: g.panel.timeSeries.new('Top indices by index latency') + g.panel.timeSeries.panelOptions.withDescription('Top indices by indexing latency across the OpenSearch cluster.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.topk.indexing_latency_topk.asTarget() - + g.query.prometheus.withInterval('1m') + signals.clusterOverview.indexing_latency_topk.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), ]) + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withUnit('s'), topIndicesByIndexFailuresPanel: g.panel.timeSeries.new('Top indices by index failures') + g.panel.timeSeries.panelOptions.withDescription('Top indices by index document failures across the OpenSearch cluster.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.topk.indexing_failed_topk.asTarget() - + g.query.prometheus.withInterval('1m') + signals.clusterOverview.indexing_failed_topk.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), ]) + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withUnit('failures'), // Node Overview Panels - Refactored to use modern patterns and signals - // Node CPU usage nodeCpuUsage: g.panel.timeSeries.new('Node CPU usage') + g.panel.timeSeries.panelOptions.withDescription('CPU usage percentage of the node\'s Operating System.') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.os_cpu_percent.asTarget()]) + + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.os_cpu_percent.asTarget()]) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') - + g.panel.timeSeries.standardOptions.withDecimals(1) + g.panel.timeSeries.standardOptions.withMax(100) + g.panel.timeSeries.standardOptions.withMin(0) + g.panel.timeSeries.standardOptions.withUnit('percent') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) @@ -391,14 +435,13 @@ local utils = commonlib.utils; // Node memory usage nodeMemoryUsage: g.panel.timeSeries.new('Node memory usage') - + g.panel.timeSeries.panelOptions.withDescription('Memory usage percentage of the node for the Operating System and OpenSearch') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.os_mem_used_percent.asTarget()]) + + g.panel.timeSeries.panelOptions.withDescription('Memory usage percentage of the node for the operating system and OpenSearch') + + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.os_mem_used_percent.asTarget()]) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') - + g.panel.timeSeries.standardOptions.withDecimals(1) + g.panel.timeSeries.standardOptions.withMax(100) + g.panel.timeSeries.standardOptions.withMin(0) + g.panel.timeSeries.standardOptions.withUnit('percent') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) @@ -411,11 +454,13 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Node I/O') + g.panel.timeSeries.panelOptions.withDescription('Node file system read and write data.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.node.fs_read_bps.asTarget(), - signals.node.fs_write_bps.asTarget(), + signals.nodeOverview.fs_read_bps.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.nodeOverview.fs_write_bps.asTarget() + + g.query.prometheus.withInterval('2m') ]) + g.panel.timeSeries.standardOptions.withUnit('Bps') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(1) + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('opacity') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) @@ -434,8 +479,8 @@ local utils = commonlib.utils; nodeOpenConnections: g.panel.timeSeries.new('Node open connections') + g.panel.timeSeries.panelOptions.withDescription('Number of open connections for the selected node.') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.transport_open_connections.asTarget()]) - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(30) + + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.transport_open_connections.asTarget()]) + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('opacity') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) @@ -448,13 +493,12 @@ local utils = commonlib.utils; nodeDiskUsage: g.panel.timeSeries.new('Node disk usage') + g.panel.timeSeries.panelOptions.withDescription('Disk usage percentage of the selected node.') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.fs_used_percent.asTarget()]) + + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.fs_used_percent.asTarget()]) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') - + g.panel.timeSeries.standardOptions.withDecimals(1) + g.panel.timeSeries.standardOptions.withMin(0) + g.panel.timeSeries.standardOptions.withMax(100) + g.panel.timeSeries.standardOptions.withUnit('percent') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(1) + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) @@ -465,14 +509,13 @@ local utils = commonlib.utils; // Node memory swap nodeMemorySwap: g.panel.timeSeries.new('Node memory swap') - + g.panel.timeSeries.panelOptions.withDescription('Percentage of swap space used by OpenSearch and the Operating System on the selected node.') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.os_swap_used_percent.asTarget()]) + + g.panel.timeSeries.panelOptions.withDescription('Percentage of swap space used by OpenSearch and the operating system on the selected node.') + + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.os_swap_used_percent.asTarget()]) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') - + g.panel.timeSeries.standardOptions.withDecimals(1) + g.panel.timeSeries.standardOptions.withMin(0) + g.panel.timeSeries.standardOptions.withMax(100) + g.panel.timeSeries.standardOptions.withUnit('percent') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) @@ -483,13 +526,15 @@ local utils = commonlib.utils; // Node network traffic nodeNetworkTraffic: g.panel.timeSeries.new('Node network traffic') - + g.panel.timeSeries.panelOptions.withDescription('Network traffic on the node\'s Operating System.') + + g.panel.timeSeries.panelOptions.withDescription('Network traffic on the node\'s operating system.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.node.transport_rx_bps.asTarget(), - signals.node.transport_tx_bps.asTarget(), + signals.nodeOverview.transport_rx_bps.asTarget() + + g.query.prometheus.withInterval('2m'), + signals.nodeOverview.transport_tx_bps.asTarget() + + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.standardOptions.withUnit('Bps') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) @@ -500,23 +545,57 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Circuit breakers') + g.panel.timeSeries.panelOptions.withDescription('Circuit breakers tripped on the selected node by type') + g.panel.timeSeries.queryOptions.withTargets([ - signals.node.circuitbreaker_tripped_sum_by_name.asTarget() - + g.query.prometheus.withInterval('1m') + signals.nodeOverview.circuitbreaker_tripped_sum_by_name.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), ]) - + g.panel.timeSeries.standardOptions.withUnit('trips'), + + g.panel.timeSeries.standardOptions.withUnit('trips') + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + + // Node roles timeline + nodeOSRolesTimeline: + g.panel.statusHistory.new('Roles timeline') + + g.panel.statusHistory.panelOptions.withDescription('OpenSearch node roles over time.') + + g.panel.statusHistory.options.withShowValue('never') + + g.panel.statusHistory.options.withLegend(false) + + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) + + g.panel.statusHistory.queryOptions.withTargets([ + signals.nodeOverview.node_role_data.asTarget(), + signals.nodeOverview.node_role_master.asTarget(), + signals.nodeOverview.node_role_ingest.asTarget(), + signals.nodeOverview.node_role_cluster_manager.asTarget(), + signals.nodeOverview.node_role_remote_cluster_client.asTarget(), + ]) + + g.panel.statusHistory.standardOptions.withMappings([ + { + type: 'value', + options: { + '2': {color: 'light-purple', index: 0, text: 'data'}, + '3': {color: 'light-green', index: 1, text: 'master'}, + '4': {color: 'light-blue', index: 2, text: 'ingest'}, + '5': {color: 'light-yellow', index: 3, text: 'cluster_manager'}, + '6': {color: 'super-light-red', index: 4, text: 'remote_cluster_client'}, + }, + }, + ]), // JVM heap used vs committed jvmHeapUsedVsCommitted: g.panel.timeSeries.new('JVM heap used vs committed') + g.panel.timeSeries.panelOptions.withDescription('JVM heap memory usage vs committed.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.node.jvm_heap_used_bytes.asTarget(), - signals.node.jvm_heap_committed_bytes.asTarget(), + signals.nodeOverview.jvm_heap_used_bytes.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.nodeOverview.jvm_heap_committed_bytes.asTarget() + + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('bytes') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), @@ -526,12 +605,13 @@ local utils = commonlib.utils; g.panel.timeSeries.new('JVM non-heap used vs committed') + g.panel.timeSeries.panelOptions.withDescription('JVM non-heap memory usage vs committed.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.node.jvm_nonheap_used_bytes.asTarget(), - signals.node.jvm_nonheap_committed_bytes.asTarget(), + signals.nodeOverview.jvm_nonheap_used_bytes.asTarget() + + g.query.prometheus.withIntervalFactor(2), + signals.nodeOverview.jvm_nonheap_committed_bytes.asTarget() + + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('bytes') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), @@ -540,10 +620,12 @@ local utils = commonlib.utils; jvmThreads: g.panel.timeSeries.new('JVM threads') + g.panel.timeSeries.panelOptions.withDescription('JVM thread count.') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_threads.asTarget()]) + + g.panel.timeSeries.queryOptions.withTargets([ + signals.nodeOverview.jvm_threads.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + g.panel.timeSeries.standardOptions.withUnit('threads') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), @@ -552,10 +634,12 @@ local utils = commonlib.utils; jvmBufferPools: g.panel.timeSeries.new('JVM buffer pools') + g.panel.timeSeries.panelOptions.withDescription('JVM buffer pool usage.') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_bufferpool_number.asTarget()]) + + g.panel.timeSeries.queryOptions.withTargets([ + signals.nodeOverview.jvm_bufferpool_number.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + g.panel.timeSeries.standardOptions.withUnit('bytes') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), @@ -564,10 +648,12 @@ local utils = commonlib.utils; jvmUptime: g.panel.timeSeries.new('JVM uptime') + g.panel.timeSeries.panelOptions.withDescription('JVM uptime in seconds.') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_uptime.asTarget()]) + + g.panel.timeSeries.queryOptions.withTargets([ + signals.nodeOverview.jvm_uptime.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + g.panel.timeSeries.standardOptions.withUnit('s') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), @@ -576,10 +662,13 @@ local utils = commonlib.utils; jvmGarbageCollections: g.panel.timeSeries.new('JVM garbage collections') + g.panel.timeSeries.panelOptions.withDescription('JVM garbage collection count.') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_gc_collections.asTarget()]) + + g.panel.timeSeries.queryOptions.withTargets([ + signals.nodeOverview.jvm_gc_collections.asTarget() + + g.query.prometheus.withInterval('2m') + + g.query.prometheus.withIntervalFactor(2), + ]) + g.panel.timeSeries.standardOptions.withUnit('collections') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), @@ -588,21 +677,27 @@ local utils = commonlib.utils; jvmGarbageCollectionTime: g.panel.timeSeries.new('JVM garbage collection time') + g.panel.timeSeries.panelOptions.withDescription('JVM garbage collection time in milliseconds.') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_gc_time.asTarget()]) + + g.panel.timeSeries.queryOptions.withTargets([ + signals.nodeOverview.jvm_gc_time.asTarget() + + g.query.prometheus.withInterval('2m') + + g.query.prometheus.withIntervalFactor(2), + ]) + g.panel.timeSeries.standardOptions.withUnit('ms') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), // JVM buffer pool usage jvmBufferPoolUsage: g.panel.timeSeries.new('JVM buffer pool usage') + g.panel.timeSeries.panelOptions.withDescription('JVM buffer pool usage by pool.') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.jvm_bufferpool_used_percent.asTarget()]) + + g.panel.timeSeries.queryOptions.withTargets([ + signals.nodeOverview.jvm_bufferpool_used_percent.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + g.panel.timeSeries.standardOptions.withUnit('bytes') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), @@ -611,10 +706,12 @@ local utils = commonlib.utils; threadPoolThreads: g.panel.timeSeries.new('Thread pool threads') + g.panel.timeSeries.panelOptions.withDescription('Thread pool thread count.') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.threadpool_threads.asTarget()]) + + g.panel.timeSeries.queryOptions.withTargets([ + signals.nodeOverview.threadpool_threads.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + g.panel.timeSeries.standardOptions.withUnit('threads') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), @@ -623,28 +720,35 @@ local utils = commonlib.utils; threadPoolTasks: g.panel.timeSeries.new('Thread pool tasks') + g.panel.timeSeries.panelOptions.withDescription('Thread pool task count.') - + g.panel.timeSeries.queryOptions.withTargets([signals.node.threadpool_tasks.asTarget()]) + + g.panel.timeSeries.queryOptions.withTargets([ + signals.nodeOverview.threadpool_tasks.asTarget() + + g.query.prometheus.withIntervalFactor(2), + ]) + g.panel.timeSeries.standardOptions.withUnit('tasks') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + // Search and Index Overview Panels - Refactored to use modern patterns and signals // Search Performance Panels searchRequestRatePanel: g.panel.timeSeries.new('Request rate') + g.panel.timeSeries.panelOptions.withDescription('Rate of fetch, scroll, and query requests by selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.search.search_query_current_avg.asTarget() + signals.searchAndIndexOverview.search_query_current_avg.asTarget() + g.query.prometheus.withIntervalFactor(2), - signals.search.search_fetch_current_avg.asTarget() + signals.searchAndIndexOverview.search_fetch_current_avg.asTarget() + g.query.prometheus.withIntervalFactor(2), - signals.search.search_scroll_current_avg.asTarget() + signals.searchAndIndexOverview.search_scroll_current_avg.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('reqps') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withOverrides([ { matcher: {id: 'byValue', options: {reducer: 'allIsZero', op: 'gte', value: 0}}, @@ -657,17 +761,20 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Request latency') + g.panel.timeSeries.panelOptions.withDescription('Latency of fetch, scroll, and query requests by selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.search.search_query_latency_avg.asTarget() - + g.query.prometheus.withInterval('1m') + signals.searchAndIndexOverview.search_query_latency_avg.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), - signals.search.search_fetch_latency_avg.asTarget() - + g.query.prometheus.withInterval('1m') + signals.searchAndIndexOverview.search_fetch_latency_avg.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), - signals.search.search_scroll_latency_avg.asTarget() - + g.query.prometheus.withInterval('1m') + signals.searchAndIndexOverview.search_scroll_latency_avg.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withOverrides([ { matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, @@ -680,12 +787,15 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Cache hit ratio') + g.panel.timeSeries.panelOptions.withDescription('Ratio of query cache and request cache hits and misses.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.search.request_cache_hit_rate.asTarget() + signals.searchAndIndexOverview.request_cache_hit_rate.asTarget() + g.query.prometheus.withIntervalFactor(2), - signals.search.query_cache_hit_rate.asTarget() + signals.searchAndIndexOverview.query_cache_hit_rate.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('percent') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withOverrides([ { matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, @@ -698,17 +808,20 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Evictions') + g.panel.timeSeries.panelOptions.withDescription('Total evictions count by cache type for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.search.query_cache_evictions.asTarget() - + g.query.prometheus.withInterval('1m') + signals.searchAndIndexOverview.query_cache_evictions.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), - signals.search.request_cache_evictions.asTarget() - + g.query.prometheus.withInterval('1m') + signals.searchAndIndexOverview.request_cache_evictions.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), - signals.search.fielddata_evictions.asTarget() - + g.query.prometheus.withInterval('1m') + signals.searchAndIndexOverview.fielddata_evictions.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('evictions') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -728,10 +841,13 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Index rate') + g.panel.timeSeries.panelOptions.withDescription('Rate of indexed documents for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.indexing_current.asTarget() + signals.searchAndIndexOverview.indexing_current.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('documents/s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withOverrides([ { matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, @@ -743,10 +859,14 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Index latency') + g.panel.timeSeries.panelOptions.withDescription('Document indexing latency for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.indexing_latency.asTarget() - + g.query.prometheus.withInterval('1m'), + signals.searchAndIndexOverview.indexing_latency.asTarget() + + g.query.prometheus.withInterval('2m') + + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -758,11 +878,14 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Index failures') + g.panel.timeSeries.panelOptions.withDescription('Number of indexing failures for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.indexing_failed.asTarget() - + g.query.prometheus.withInterval('1m') + signals.searchAndIndexOverview.indexing_failed.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('failures') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -779,10 +902,14 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Flush latency') + g.panel.timeSeries.panelOptions.withDescription('Index flush latency for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.flush_latency.asTarget() + signals.searchAndIndexOverview.flush_latency.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -798,14 +925,20 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Merge time') + g.panel.timeSeries.panelOptions.withDescription('Index merge time for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.merge_time.asTarget() + signals.searchAndIndexOverview.merge_time.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), - signals.indexing.merge_stopped_time.asTarget() + signals.searchAndIndexOverview.merge_stopped_time.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), - signals.indexing.merge_throttled_time.asTarget() + signals.searchAndIndexOverview.merge_throttled_time.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') @@ -823,10 +956,14 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Refresh latency') + g.panel.timeSeries.panelOptions.withDescription('Index refresh latency for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.refresh_latency.asTarget() + signals.searchAndIndexOverview.refresh_latency.asTarget() + + g.query.prometheus.withInterval('2m') + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -843,10 +980,13 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Translog operations') + g.panel.timeSeries.panelOptions.withDescription('Current number of translog operations for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.translog_ops.asTarget() + signals.searchAndIndexOverview.translog_ops.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('operations') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -862,10 +1002,13 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Docs deleted') + g.panel.timeSeries.panelOptions.withDescription('Rate of documents deleted for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.indexing_delete_current.asTarget() + signals.searchAndIndexOverview.indexing_delete_current.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('documents/s') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -881,10 +1024,13 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Documents indexed') + g.panel.timeSeries.panelOptions.withDescription('Number of indexed documents for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.indexing_count.asTarget() + signals.searchAndIndexOverview.indexing_count.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('documents') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -901,10 +1047,13 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Segment count') + g.panel.timeSeries.panelOptions.withDescription('Current number of segments for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.segments_number.asTarget() + signals.searchAndIndexOverview.segments_number.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('segments') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -920,10 +1069,13 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Merge count') + g.panel.timeSeries.panelOptions.withDescription('Number of merge operations for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.merge_docs.asTarget() + signals.searchAndIndexOverview.merge_docs.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('merges') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') @@ -941,12 +1093,15 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Cache size') + g.panel.timeSeries.panelOptions.withDescription('Size of query cache and request cache.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.search.query_cache_memory.asTarget() + signals.searchAndIndexOverview.query_cache_memory.asTarget() + g.query.prometheus.withIntervalFactor(2), - signals.search.request_cache_memory.asTarget() + signals.searchAndIndexOverview.request_cache_memory.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -963,10 +1118,13 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Store size') + g.panel.timeSeries.panelOptions.withDescription('Size of the store in bytes for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.store_size_bytes.asTarget() + signals.searchAndIndexOverview.store_size_bytes.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -982,10 +1140,13 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Segment size') + g.panel.timeSeries.panelOptions.withDescription('Memory used by segments for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.segments_memory_bytes.asTarget() + signals.searchAndIndexOverview.segments_memory_bytes.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -1001,10 +1162,13 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Merge size') + g.panel.timeSeries.panelOptions.withDescription('Size of merge operations in bytes for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.merge_current_size.asTarget() + signals.searchAndIndexOverview.merge_current_size.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('bytes') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') @@ -1021,10 +1185,13 @@ local utils = commonlib.utils; g.panel.timeSeries.new('Shard count') + g.panel.timeSeries.panelOptions.withDescription('The number of index shards for the selected index.') + g.panel.timeSeries.queryOptions.withTargets([ - signals.indexing.shards_per_index.asTarget() + signals.searchAndIndexOverview.shards_per_index.asTarget() + g.query.prometheus.withIntervalFactor(2), ]) + g.panel.timeSeries.standardOptions.withUnit('shards') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') + + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), diff --git a/opensearch-mixin/prometheus_rules_out/prometheus_alerts.yaml b/opensearch-mixin/prometheus_rules_out/prometheus_alerts.yaml index a84d22176..b6712ab0d 100644 --- a/opensearch-mixin/prometheus_rules_out/prometheus_alerts.yaml +++ b/opensearch-mixin/prometheus_rules_out/prometheus_alerts.yaml @@ -6,7 +6,7 @@ groups: description: '{{$labels.cluster}} health status is yellow over the last 5 minutes' summary: At least one of the clusters is reporting a yellow status. expr: | - opensearch_cluster_status{opensearch_cluster!=""} == 1 + opensearch_cluster_status{job="integrations/opensearch"} == 1 for: 5m labels: severity: warning @@ -15,7 +15,7 @@ groups: description: '{{$labels.cluster}} health status is red over the last 5 minutes' summary: At least one of the clusters is reporting a red status. expr: | - opensearch_cluster_status{opensearch_cluster!=""} == 2 + opensearch_cluster_status{job="integrations/opensearch"} == 2 for: 5m labels: severity: critical @@ -25,7 +25,7 @@ groups: {{$labels.cluster}} has had {{ printf "%.0f" $value }} shard reallocation over the last 1m which is above the threshold of 0. summary: A node has gone offline or has been disconnected triggering shard reallocation. expr: | - sum without(type) (opensearch_cluster_shards_number{opensearch_cluster!="", type="relocating"}) > 0 + sum without(type) (opensearch_cluster_shards_number{job="integrations/opensearch", type="relocating"}) > 0 for: 1m labels: severity: warning @@ -35,7 +35,7 @@ groups: {{$labels.cluster}} has had {{ printf "%.0f" $value }} shard unassigned over the last 5m which is above the threshold of 0. summary: There are shards that have been detected as unassigned. expr: | - sum without(type) (opensearch_cluster_shards_number{opensearch_cluster!="", type="unassigned"}) > 0 + sum without(type) (opensearch_cluster_shards_number{job="integrations/opensearch", type="unassigned"}) > 0 for: 5m labels: severity: warning @@ -45,7 +45,7 @@ groups: {{$labels.node}} has had {{ printf "%.0f" $value }} disk usage over the last 5m which is above the threshold of 60. summary: The node disk usage has exceeded the warning threshold. expr: | - 100 * sum without(nodeid, path, mount, type) ((opensearch_fs_path_total_bytes{opensearch_cluster!=""} - opensearch_fs_path_free_bytes{opensearch_cluster!=""}) / opensearch_fs_path_total_bytes{opensearch_cluster!=""}) > 60 + 100 * sum without(nodeid, path, mount, type) ((opensearch_fs_path_total_bytes{job="integrations/opensearch"} - opensearch_fs_path_free_bytes{job="integrations/opensearch"}) / opensearch_fs_path_total_bytes{job="integrations/opensearch"}) > 60 for: 5m labels: severity: warning @@ -55,7 +55,7 @@ groups: {{$labels.node}} has had {{ printf "%.0f" $value }}% disk usage over the last 5m which is above the threshold of 80. summary: The node disk usage has exceeded the critical threshold. expr: | - 100 * sum without(nodeid, path, mount, type) ((opensearch_fs_path_total_bytes{opensearch_cluster!=""} - opensearch_fs_path_free_bytes{opensearch_cluster!=""}) / opensearch_fs_path_total_bytes{opensearch_cluster!=""}) > 80 + 100 * sum without(nodeid, path, mount, type) ((opensearch_fs_path_total_bytes{job="integrations/opensearch"} - opensearch_fs_path_free_bytes{job="integrations/opensearch"}) / opensearch_fs_path_total_bytes{job="integrations/opensearch"}) > 80 for: 5m labels: severity: critical @@ -65,7 +65,7 @@ groups: {{$labels.node}} has had {{ printf "%.0f" $value }}% CPU usage over the last 5m which is above the threshold of 70. summary: The node CPU usage has exceeded the warning threshold. expr: | - sum without(nodeid) (opensearch_os_cpu_percent{opensearch_cluster!=""}) > 70 + sum without(nodeid) (opensearch_os_cpu_percent{job="integrations/opensearch"}) > 70 for: 5m labels: severity: warning @@ -75,7 +75,7 @@ groups: {{$labels.node}} has had {{ printf "%.0f" $value }}% CPU usage over the last 5m which is above the threshold of 85. summary: The node CPU usage has exceeded the critical threshold. expr: | - sum without(nodeid) (opensearch_os_cpu_percent{opensearch_cluster!=""}) > 85 + sum without(nodeid) (opensearch_os_cpu_percent{job="integrations/opensearch"}) > 85 for: 5m labels: severity: critical @@ -85,7 +85,7 @@ groups: {{$labels.node}} has had {{ printf "%.0f" $value }}% memory usage over the last 5m which is above the threshold of 70. summary: The node memory usage has exceeded the warning threshold. expr: | - sum without(nodeid) (opensearch_os_mem_used_percent{opensearch_cluster!=""}) > 70 + sum without(nodeid) (opensearch_os_mem_used_percent{job="integrations/opensearch"}) > 70 for: 5m labels: severity: warning @@ -95,7 +95,7 @@ groups: {{$labels.node}} has had {{ printf "%.0f" $value }}% memory usage over the last 5m which is above the threshold of 85. summary: The node memory usage has exceeded the critical threshold. expr: | - sum without(nodeid) (opensearch_os_mem_used_percent{opensearch_cluster!=""}) > 85 + sum without(nodeid) (opensearch_os_mem_used_percent{job="integrations/opensearch"}) > 85 for: 5m labels: severity: critical @@ -105,7 +105,7 @@ groups: {{$labels.index}} has had {{ printf "%.0f" $value }}s of request latency over the last 5m which is above the threshold of 0.5. summary: The request latency has exceeded the warning threshold. expr: | - sum without(context) ((increase(opensearch_index_search_fetch_time_seconds{opensearch_cluster!="", context="total"}[5m])+increase(opensearch_index_search_query_time_seconds{context="total"}[5m])+increase(opensearch_index_search_scroll_time_seconds{context="total"}[5m])) / clamp_min(increase(opensearch_index_search_fetch_count{context="total"}[5m])+increase(opensearch_index_search_query_count{context="total"}[5m])+increase(opensearch_index_search_scroll_count{context="total"}[5m]), 1)) > 0.5 + sum without(context) ((increase(opensearch_index_search_fetch_time_seconds{job="integrations/opensearch", context="total"}[5m])+increase(opensearch_index_search_query_time_seconds{context="total"}[5m])+increase(opensearch_index_search_scroll_time_seconds{context="total"}[5m])) / clamp_min(increase(opensearch_index_search_fetch_count{context="total"}[5m])+increase(opensearch_index_search_query_count{context="total"}[5m])+increase(opensearch_index_search_scroll_count{context="total"}[5m]), 1)) > 0.5 for: 5m labels: severity: warning @@ -115,7 +115,7 @@ groups: {{$labels.index}} has had {{ printf "%.0f" $value }}s of index latency over the last 5m which is above the threshold of 0.5. summary: The index latency has exceeded the warning threshold. expr: | - sum without(context) (increase(opensearch_index_indexing_index_time_seconds{opensearch_cluster!="", context="total"}[5m]) / clamp_min(increase(opensearch_index_indexing_index_count{context="total"}[5m]), 1)) > 0.5 + sum without(context) (increase(opensearch_index_indexing_index_time_seconds{job="integrations/opensearch", context="total"}[5m]) / clamp_min(increase(opensearch_index_indexing_index_count{context="total"}[5m]), 1)) > 0.5 for: 5m labels: severity: warning diff --git a/opensearch-mixin/rows.libsonnet b/opensearch-mixin/rows.libsonnet index a80b2526d..214c6ba5a 100644 --- a/opensearch-mixin/rows.libsonnet +++ b/opensearch-mixin/rows.libsonnet @@ -3,7 +3,7 @@ local g = import './g.libsonnet'; { new(this): { clusterOverviewRow: - g.panel.row.new('Cluster Overview') + g.panel.row.new('Cluster overview') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ this.grafana.panels.clusterStatusPanel { gridPos+: { w: 5, h: 6 } }, @@ -13,16 +13,16 @@ local g = import './g.libsonnet'; this.grafana.panels.activeShardsPercentagePanel { gridPos+: { w: 4, h: 6 } }, ]), - rolesRow: - g.panel.row.new('Node Roles') + clusterRolesRow: + g.panel.row.new('Node roles') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ - this.grafana.panels.osRoles { gridPos+: { w: 24 } }, - this.grafana.panels.osRolesTimeline { gridPos+: { w: 24 } }, + this.grafana.panels.clusterOSRoles { gridPos+: { w: 24 } }, + this.grafana.panels.clusterOSRolesTimeline { gridPos+: { w: 24 } }, ]), resourceUsageRow: - g.panel.row.new('Resource Usage') + g.panel.row.new('Resource usage') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ this.grafana.panels.topNodesByCPUUsagePanel { gridPos+: { w: 8 } }, @@ -31,7 +31,7 @@ local g = import './g.libsonnet'; ]), storageAndTasksRow: - g.panel.row.new('Storage and Tasks') + g.panel.row.new('Storage and tasks') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ this.grafana.panels.topNodesByDiskUsagePanel { gridPos+: { w: 8 } }, @@ -42,7 +42,7 @@ local g = import './g.libsonnet'; ]), searchPerformanceRow: - g.panel.row.new('Search Performance') + g.panel.row.new('Search performance') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ this.grafana.panels.topIndicesByRequestRatePanel { gridPos+: { w: 8 } }, @@ -51,7 +51,7 @@ local g = import './g.libsonnet'; ]), ingestPerformanceRow: - g.panel.row.new('Ingest Performance') + g.panel.row.new('Ingest performance') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ this.grafana.panels.topNodesByIngestRatePanel { gridPos+: { w: 8 } }, @@ -60,7 +60,7 @@ local g = import './g.libsonnet'; ]), indexingPerformanceRow: - g.panel.row.new('Indexing Performance') + g.panel.row.new('Indexing performance') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ this.grafana.panels.topIndicesByIndexRatePanel { gridPos+: { w: 8 } }, @@ -73,8 +73,7 @@ local g = import './g.libsonnet'; g.panel.row.new('Node Roles') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ - this.grafana.panels.osRolesTimeline { gridPos+: { w: 24 } }, - this.grafana.panels.osRoles { gridPos+: { w: 24 } }, + this.grafana.panels.nodeOSRolesTimeline { gridPos+: { w: 24 } }, ]), nodeHealthRow: @@ -115,8 +114,8 @@ local g = import './g.libsonnet'; // Search and Index Overview Dashboard Rows - searchAndIndexSearchPerformanceRow: - g.panel.row.new('Search Performance') + searchAndIndexRequestPerformanceRow: + g.panel.row.new('Request performance') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ this.grafana.panels.searchRequestRatePanel { gridPos+: { w: 6 } }, @@ -126,7 +125,7 @@ local g = import './g.libsonnet'; ]), searchAndIndexIndexingPerformanceRow: - g.panel.row.new('Indexing Performance') + g.panel.row.new('Indexing performance') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ this.grafana.panels.indexingRatePanel { gridPos+: { w: 6 } }, @@ -140,7 +139,7 @@ local g = import './g.libsonnet'; ]), searchAndIndexCapacityRow: - g.panel.row.new('Index Capacity') + g.panel.row.new('Index capacity') + g.panel.row.withCollapsed(false) + g.panel.row.withPanels([ this.grafana.panels.documentsIndexedPanel { gridPos+: { w: 6 } }, diff --git a/opensearch-mixin/signals/cluster-overview.libsonnet b/opensearch-mixin/signals/cluster-overview.libsonnet new file mode 100644 index 000000000..3d42cb7b9 --- /dev/null +++ b/opensearch-mixin/signals/cluster-overview.libsonnet @@ -0,0 +1,447 @@ +// Cluster Overview dashboard signals for OpenSearch +// Combines signals from cluster, roles, and topk domains +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels, + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_cluster_status', + }, + signals: { + // Cluster status and metrics + cluster_status: { + name: 'Cluster status', + description: 'Overall cluster health status as a numeric code.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + sources: { + prometheus: { + expr: 'opensearch_cluster_status{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_nodes_number: { + name: 'Node count', + description: 'The number of running nodes across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + sources: { + prometheus: { + expr: 'opensearch_cluster_nodes_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_datanodes_number: { + name: 'Data node count', + description: 'The number of data nodes in the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + sources: { + prometheus: { + expr: 'opensearch_cluster_datanodes_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_shards_number_total: { + name: 'Shard count', + description: 'The number of shards in the OpenSearch cluster across all indices.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'opensearch_cluster_shards_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + aggKeepLabels: ['type'], + exprWrappers: [['sum(', ')']], + }, + }, + }, + cluster_shards_number_by_type: { + name: 'Shard status', + description: 'Shard status counts across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + sources: { + prometheus: { + expr: 'opensearch_cluster_shards_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{type}}', + aggKeepLabels: ['type'], + }, + }, + }, + cluster_shards_active_percent: { + name: 'Active shards %%', + description: 'Percent of active shards across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'min', + unit: 'percent', + sources: { + prometheus: { + expr: 'opensearch_cluster_shards_active_percent{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_pending_tasks_number: { + name: 'Pending tasks', + description: 'The number of tasks waiting to be executed across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + sources: { + prometheus: { + expr: 'opensearch_cluster_pending_tasks_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + cluster_task_max_wait_seconds: { + name: 'Max task wait time', + description: 'The max wait time for tasks to be executed across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'max', + unit: 's', + sources: { + prometheus: { + expr: 'opensearch_cluster_task_max_waiting_time_seconds{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + indices_indexing_index_count_avg: { + name: 'Total documents', + description: 'The total count of documents indexed across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_indices_indexing_index_count{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + indices_store_size_bytes_avg: { + name: 'Store size', + description: 'The total size of the store across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_indices_store_size_bytes{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{opensearch_cluster}}', + }, + }, + }, + + // Cluster role signals + node_role_data: { + name: 'Node role: data', + description: 'Data role present flag.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'max_over_time(opensearch_node_role_bool{%(queriesSelectorGroupOnly)s, role="data"}[1m]) == 1', + legendCustomTemplate: '{{ node }} / data', + aggKeepLabels: ['node', 'role'], + exprWrappers: [['', ' * 2']], + }, + }, + }, + node_role_master: { + name: 'Node role: master', + description: 'Master role present flag.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'max_over_time(opensearch_node_role_bool{%(queriesSelectorGroupOnly)s, role="master"}[1m]) == 1', + legendCustomTemplate: '{{ node }} / master', + aggKeepLabels: ['node', 'role'], + exprWrappers: [['', ' * 3']], + + }, + }, + }, + node_role_ingest: { + name: 'Node role: ingest', + description: 'Ingest role present flag.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'max_over_time(opensearch_node_role_bool{%(queriesSelectorGroupOnly)s, role="ingest"}[1m]) == 1', + legendCustomTemplate: '{{ node }} / ingest', + aggKeepLabels: ['node', 'role'], + exprWrappers: [['', ' * 4']], + }, + }, + }, + node_role_cluster_manager: { + name: 'Node role: cluster_manager', + description: 'Cluster manager role present flag.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'max_over_time(opensearch_node_role_bool{%(queriesSelectorGroupOnly)s, role="cluster_manager"}[1m]) == 1', + legendCustomTemplate: '{{ node }} / cluster_manager', + aggKeepLabels: ['node', 'role'], + exprWrappers: [['', ' * 5']], + }, + }, + }, + node_role_remote_cluster_client: { + name: 'Node role: remote_cluster_client', + description: 'Remote cluster client role present flag.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'max_over_time(opensearch_node_role_bool{%(queriesSelectorGroupOnly)s, role="remote_cluster_client"}[1m]) == 1', + legendCustomTemplate: '{{ node }} / remote_client', + aggKeepLabels: ['node', 'role'], + exprWrappers: [['', ' * 6']], + }, + }, + }, + node_role_last_seen: { + name: 'Node role bool last seen', + description: 'Last seen role bool within 1d.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'last_over_time(opensearch_node_role_bool{%(queriesSelectorGroupOnly)s}[1d])', + aggKeepLabels: ['node', 'nodeid', 'role', 'primary_ip'], + }, + }, + }, + + // Top K signals + os_cpu_percent_topk: { + name: 'Top nodes by CPU usage', + description: 'Top nodes by OS CPU usage across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'sum', + unit: 'percent', + sources: { + prometheus: { + expr: 'opensearch_os_cpu_percent{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{node}}', + exprWrappers: [['topk(10, sort_desc(', '))']], + }, + }, + }, + fs_path_used_percent_topk: { + name: 'Top nodes by disk usage', + description: 'Top nodes by disk usage across the OpenSearch cluster.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'topk(10, sort_desc((100 * (\n' + + ' sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_total_bytes{%(queriesSelectorGroupOnly)s}) - \n' + + ' sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_free_bytes{%(queriesSelectorGroupOnly)s})\n' + + ') / \n' + + 'sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_total_bytes{%(queriesSelectorGroupOnly)s})\n' + + ')))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + circuitbreaker_tripped_count_sum: { + name: 'Breakers tripped', + description: 'The total count of circuit breakers tripped across the OpenSearch cluster.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'sum', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_circuitbreaker_tripped_count{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{node}}', + rangeFunction: 'increase', + }, + }, + }, + search_current_inflight_topk: { + name: 'Top indices by request rate', + description: 'Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.', + type: 'raw', + unit: 'reqps', + sources: { + prometheus: { + expr: 'topk(10, sort_desc(avg by(index, ' + this.groupAggList + ') (\n' + + ' opensearch_index_search_fetch_current_number{%(queriesSelectorGroupOnly)s, context="total"} + \n' + + ' opensearch_index_search_query_current_number{%(queriesSelectorGroupOnly)s, context="total"} + \n' + + ' opensearch_index_search_scroll_current_number{%(queriesSelectorGroupOnly)s, context="total"}\n' + + ')))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + search_avg_latency_topk: { + name: 'Top indices by request latency', + description: 'Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'topk(10, sort_desc(sum by(index, ' + this.groupAggList + ') ((\n' + + ' increase(opensearch_index_search_fetch_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) + \n' + + ' increase(opensearch_index_search_query_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) + \n' + + ' increase(opensearch_index_search_scroll_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval)\n' + + ') / clamp_min(\n' + + ' increase(opensearch_index_search_fetch_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) + \n' + + ' increase(opensearch_index_search_query_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) + \n' + + ' increase(opensearch_index_search_scroll_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval), 1\n' + + '))))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + request_query_cache_hit_rate_topk: { + name: 'Top indices by combined cache hit ratio', + description: 'Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'topk(10, sort_desc(avg by(index, ' + this.groupAggList + ') (\n' + + ' 100 * (opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n' + + ' opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s, context="total"}) / \n' + + ' clamp_min((opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n' + + ' opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n' + + ' opensearch_index_requestcache_miss_count{%(queriesSelectorGroupOnly)s, context="total"} + \n' + + ' opensearch_index_querycache_miss_number{%(queriesSelectorGroupOnly)s, context="total"}), 1\n' + + '))))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + ingest_throughput_topk: { + name: 'Top nodes by ingest rate', + description: 'Top nodes by rate of ingest across the OpenSearch cluster.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'sum', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_ingest_total_count{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{node}}', + exprWrappers: [['topk(10, ', ')']], + aggKeepLabels: ['node'], + }, + }, + }, + ingest_latency_topk: { + name: 'Top nodes by ingest latency', + description: 'Top nodes by ingestion latency across the OpenSearch cluster.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'topk(10, sum by(' + this.groupAggListWithInstance + ') (\n' + + ' increase(opensearch_ingest_total_time_seconds{%(queriesSelectorGroupOnly)s}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_ingest_total_count{%(queriesSelectorGroupOnly)s}[$__interval:] offset $__interval), 1)\n' + + '))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + ingest_failures_topk: { + name: 'Top nodes by ingest errors', + description: 'Top nodes by ingestion failures across the OpenSearch cluster.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'sum', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_ingest_total_failed_count{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{node}}', + rangeFunction: 'increase', + exprWrappers: [['topk(10, ', ')']], + aggKeepLabels: ['node'], + + }, + }, + }, + indexing_current_topk: { + name: 'Top indices by index rate', + description: 'Top indices by rate of document indexing across the OpenSearch cluster.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_index_current_number{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{index}}', + exprWrappers: [['topk(10, ', ')']], + aggKeepLabels: ['index'], + }, + }, + }, + indexing_latency_topk: { + name: 'Top indices by index latency', + description: 'Top indices by indexing latency across the OpenSearch cluster.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'topk(10, avg by(index, ' + this.groupAggList + ') (\n' + + ' increase(opensearch_index_indexing_index_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval), 1)\n' + + '))', + legendCustomTemplate: '{{index}}', + }, + }, + }, + indexing_failed_topk: { + name: 'Top indices by index failures', + description: 'Top indices by index document failures across the OpenSearch cluster.', + type: 'counter', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_index_failed_count{%(queriesSelectorGroupOnly)s}', + legendCustomTemplate: '{{index}}', + rangeFunction: 'increase', + exprWrappers: [['topk(10, ', ')']], + aggKeepLabels: ['index'], + }, + }, + }, + }, + } \ No newline at end of file diff --git a/opensearch-mixin/signals/cluster.libsonnet b/opensearch-mixin/signals/cluster.libsonnet deleted file mode 100644 index d67a91f70..000000000 --- a/opensearch-mixin/signals/cluster.libsonnet +++ /dev/null @@ -1,152 +0,0 @@ -// Cluster-level signals for OpenSearch -function(this) - { - filteringSelector: this.filteringSelector, - groupLabels: this.groupLabels, - instanceLabels: this.instanceLabels, - enableLokiLogs: this.enableLokiLogs, - aggLevel: 'none', - aggFunction: 'avg', - alertsInterval: '5m', - discoveryMetric: { - prometheus: 'opensearch_cluster_status', - }, - signals: { - cluster_status: { - name: 'Cluster status', - description: 'Overall cluster health status as a numeric code.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'min', - sources: { - prometheus: { - expr: 'opensearch_cluster_status{%(queriesSelectorGroupOnly)s}', - legendCustomTemplate: '{{opensearch_cluster}}', - }, - }, - }, - cluster_nodes_number: { - name: 'Node count', - description: 'The number of running nodes across the OpenSearch cluster.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'min', - sources: { - prometheus: { - expr: 'opensearch_cluster_nodes_number{%(queriesSelectorGroupOnly)s}', - legendCustomTemplate: '{{opensearch_cluster}}', - }, - }, - }, - cluster_datanodes_number: { - name: 'Data node count', - description: 'The number of data nodes in the OpenSearch cluster.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'min', - sources: { - prometheus: { - expr: 'opensearch_cluster_datanodes_number{%(queriesSelectorGroupOnly)s}', - legendCustomTemplate: '{{opensearch_cluster}}', - }, - }, - }, - cluster_shards_number_total: { - name: 'Shard count', - description: 'The number of shards in the OpenSearch cluster across all indices.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'max', - sources: { - prometheus: { - expr: 'opensearch_cluster_shards_number{%(queriesSelectorGroupOnly)s}', - legendCustomTemplate: '{{opensearch_cluster}}', - aggKeepLabels: ['type'], - }, - }, - }, - cluster_shards_number_by_type: { - name: 'Shard status', - description: 'Shard status counts across the OpenSearch cluster.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'min', - sources: { - prometheus: { - expr: 'opensearch_cluster_shards_number{%(queriesSelectorGroupOnly)s}', - legendCustomTemplate: '{{type}}', - aggKeepLabels: ['type'], - }, - }, - }, - cluster_shards_active_percent: { - name: 'Active shards %%', - description: 'Percent of active shards across the OpenSearch cluster.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'min', - unit: 'percent', - sources: { - prometheus: { - expr: 'opensearch_cluster_shards_active_percent{%(queriesSelectorGroupOnly)s}', - legendCustomTemplate: '{{opensearch_cluster}}', - }, - }, - }, - cluster_pending_tasks_number: { - name: 'Pending tasks', - description: 'The number of tasks waiting to be executed across the OpenSearch cluster.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - sources: { - prometheus: { - expr: 'opensearch_cluster_pending_tasks_number{%(queriesSelectorGroupOnly)s}', - legendCustomTemplate: '{{opensearch_cluster}}', - }, - }, - }, - cluster_task_max_wait_seconds: { - name: 'Max task wait time', - description: 'The max wait time for tasks to be executed across the OpenSearch cluster.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'max', - unit: 's', - sources: { - prometheus: { - expr: 'opensearch_cluster_task_max_waiting_time_seconds{%(queriesSelectorGroupOnly)s}', - legendCustomTemplate: '{{opensearch_cluster}}', - }, - }, - }, - indices_indexing_index_count_avg: { - name: 'Total documents', - description: 'The total count of documents indexed across the OpenSearch cluster.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'count', - sources: { - prometheus: { - expr: 'opensearch_indices_indexing_index_count{%(queriesSelectorGroupOnly)s}', - legendCustomTemplate: '{{opensearch_cluster}}', - }, - }, - }, - indices_store_size_bytes_avg: { - name: 'Store size', - description: 'The total size of the store across the OpenSearch cluster.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'bytes', - sources: { - prometheus: { - expr: 'opensearch_indices_store_size_bytes{%(queriesSelectorGroupOnly)s}', - legendCustomTemplate: '{{opensearch_cluster}}', - }, - }, - }, - }, - } diff --git a/opensearch-mixin/signals/indexing.libsonnet b/opensearch-mixin/signals/indexing.libsonnet deleted file mode 100644 index b0189182b..000000000 --- a/opensearch-mixin/signals/indexing.libsonnet +++ /dev/null @@ -1,265 +0,0 @@ -// Indexing operation signals for OpenSearch -function(this) - { - filteringSelector: this.filteringSelector, - groupLabels: this.groupLabels, - instanceLabels: this.instanceLabels, - enableLokiLogs: this.enableLokiLogs, - aggLevel: 'none', - aggFunction: 'avg', - alertsInterval: '5m', - discoveryMetric: { - prometheus: 'opensearch_index_indexing_index_current_number', - }, - signals: { - indexing_current: { - name: 'Indexing current', - description: 'In-flight indexing operations.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'ops', - sources: { - prometheus: { - expr: 'opensearch_index_indexing_index_current_number{%(queriesSelectorGroupOnly)s,index=~"$index",context="total"}', - legendCustomTemplate: '{{index}}', - aggKeepLabels: ['index'], - }, - }, - }, - indexing_latency: { - name: 'Indexing latency (avg)', - description: 'Average indexing latency.', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'avg by(' + this.groupAggList + ') (increase(opensearch_index_indexing_index_time_seconds{%(queriesSelector)s, context=~"total"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelector)s, context=~"total"}[$__interval:]),1))', - }, - }, - }, - indexing_count: { - name: 'Indexing count (avg)', - description: 'Indexing ops count.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'documents', - sources: { - prometheus: { - expr: 'opensearch_index_indexing_index_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', - legendCustomTemplate: '{{index}}', - aggKeepLabels: ['index'], - }, - }, - }, - indexing_failed: { - name: 'Indexing failed (avg)', - description: 'Indexing failures per interval.', - type: 'counter', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'failures', - sources: { - prometheus: { - expr: 'opensearch_index_indexing_index_failed_count{%(queriesSelectorGroupOnly)s,index=~"$index",context="total"}', - rangeFunction: 'increase', - legendCustomTemplate: '{{index}}', - aggKeepLabels: ['index'], - }, - }, - }, - indexing_delete_current: { - name: 'Indexing delete current', - description: 'In-flight delete operations.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'documents/s', - sources: { - prometheus: { - expr: 'opensearch_index_indexing_delete_current_number{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', - legendCustomTemplate: '{{index}}', - aggKeepLabels: ['index'], - }, - }, - }, - flush_latency: { - name: 'Flush latency (avg)', - description: 'Average flush latency.', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'avg by(' + this.groupAggList + ',index) (increase(opensearch_index_flush_total_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_flush_total_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]),1))', - legendCustomTemplate: '{{index}}', - }, - }, - }, - flush_count: { - name: 'Flush count (avg)', - description: 'Flush count proxy (per mapping).', - type: 'raw', - unit: 'count', - sources: { - prometheus: { - expr: 'avg by(' + this.groupAggList + ') (increase(opensearch_index_flush_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_flush_total_count{%(queriesSelector)s, context="total"}[$__interval:]),1))', - }, - }, - }, - merge_time: { - name: 'Merge time increase', - description: 'Merge time increase (boolean >0).', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:])) > 0', - legendCustomTemplate: '{{index}} - total', - }, - }, - }, - merge_stopped_time: { - name: 'Merge stopped time increase', - description: 'Merge stopped time increase (boolean >0).', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_stopped_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:])) > 0', - legendCustomTemplate: '{{index}} - stopped', - }, - }, - }, - merge_throttled_time: { - name: 'Merge throttled time increase', - description: 'Merge throttled time increase (boolean >0).', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'avg by(job,opensearch_cluster,index) (increase(opensearch_index_merges_total_throttled_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:])) > 0', - legendCustomTemplate: '{{index}} - throttled', - }, - }, - }, - merge_docs: { - name: 'Merge docs increase', - description: 'Merge docs increase (boolean >0).', - type: 'raw', - unit: 'count', - sources: { - prometheus: { - expr: 'avg by(' + this.groupAggList + ') (increase(opensearch_index_merges_total_docs_count{%(queriesSelector)s, context="total"}[$__interval:])) > 0', - }, - }, - }, - merge_current_size: { - name: 'Merge current size bytes', - description: 'Merge current size (boolean >0).', - type: 'raw', - unit: 'bytes', - sources: { - prometheus: { - expr: 'avg by(' + this.groupAggList + ',index) (opensearch_index_merges_current_size_bytes{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}) > 0', - legendCustomTemplate: '{{index}}', - }, - }, - }, - refresh_latency: { - name: 'Refresh latency (avg)', - description: 'Average refresh latency.', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'avg by(job,opensearch_cluster,index) (increase(opensearch_index_refresh_total_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_refresh_total_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]),1))', - legendCustomTemplate: '{{index}}', - }, - }, - }, - refresh_count: { - name: 'Refresh count (avg)', - description: 'Refresh count proxy (per mapping).', - type: 'raw', - unit: 'count', - sources: { - prometheus: { - expr: 'avg by(' + this.groupAggList + ') (increase(opensearch_index_refresh_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_refresh_total_count{%(queriesSelector)s, context="total"}[$__interval:]),1))', - }, - }, - }, - translog_ops: { - name: 'Translog operations', - description: 'Translog operation count.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'operations', - sources: { - prometheus: { - expr: 'opensearch_index_translog_operations_number{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', - legendCustomTemplate: '{{index}}', - aggKeepLabels: ['index'], - }, - }, - }, - segments_number: { - name: 'Segments number', - description: 'Number of segments.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'segments', - sources: { - prometheus: { - expr: 'opensearch_index_segments_number{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', - legendCustomTemplate: '{{index}}', - aggKeepLabels: ['index'], - }, - }, - }, - segments_memory_bytes: { - name: 'Segments memory bytes', - description: 'Segment memory usage.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'bytes', - sources: { - prometheus: { - expr: 'opensearch_index_segments_memory_bytes{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', - legendCustomTemplate: '{{index}}', - aggKeepLabels: ['index'], - }, - }, - }, - store_size_bytes: { - name: 'Store size bytes', - description: 'Store size in bytes.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'bytes', - sources: { - prometheus: { - expr: 'opensearch_index_store_size_bytes{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', - legendCustomTemplate: '{{index}}', - aggKeepLabels: ['index'], - }, - }, - }, - shards_per_index: { - name: 'Active shards per index', - description: 'Active shards per index.', - type: 'raw', - unit: 'count', - sources: { - prometheus: { - expr: 'sum by (index) (avg by(' + this.groupAggList + ') (opensearch_index_shards_number{%(queriesSelector)s, type=~"active|active_primary"}))', - legendCustomTemplate: '{{ index }}', - }, - }, - }, - }, - } diff --git a/opensearch-mixin/signals/node-overview.libsonnet b/opensearch-mixin/signals/node-overview.libsonnet new file mode 100644 index 000000000..95601a6e7 --- /dev/null +++ b/opensearch-mixin/signals/node-overview.libsonnet @@ -0,0 +1,394 @@ +// Node Overview dashboard signals for OpenSearch +// Combines signals from roles and node domains +function(this) + local aggLabelsList = this.groupAggListWithInstance + ',node'; + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels + ['node'], + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_os_cpu_percent', + }, + signals: { + // Node role signals + node_role_data: { + name: 'Node role: data', + description: 'Data role present flag.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="data"}[1m]) == 1', + legendCustomTemplate: '{{ node }} / data', + aggKeepLabels: ['role'], + exprWrappers: [['', ' * 2']], + }, + }, + }, + node_role_master: { + name: 'Node role: master', + description: 'Master role present flag.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="master"}[1m]) == 1', + legendCustomTemplate: '{{ node }} / master', + aggKeepLabels: ['role'], + exprWrappers: [['', ' * 3']], + }, + }, + }, + node_role_ingest: { + name: 'Node role: ingest', + description: 'Ingest role present flag.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="ingest"}[1m]) == 1', + legendCustomTemplate: '{{ node }} / ingest', + aggKeepLabels: ['role'], + exprWrappers: [['', ' * 4']], + }, + }, + }, + node_role_cluster_manager: { + name: 'Node role: cluster_manager', + description: 'Cluster manager role present flag.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="cluster_manager"}[1m]) == 1', + legendCustomTemplate: '{{ node }} / cluster_manager', + aggKeepLabels: ['role'], + exprWrappers: [['', ' * 5']], + }, + }, + }, + node_role_remote_cluster_client: { + name: 'Node role: remote_cluster_client', + description: 'Remote cluster client role present flag.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'max', + sources: { + prometheus: { + expr: 'max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="remote_cluster_client"}[1m]) == 1', + legendCustomTemplate: '{{ node }} / remote_client', + aggKeepLabels: ['role'], + exprWrappers: [['', ' * 6']], + }, + }, + }, + + // Node health and resource signals + os_cpu_percent: { + name: 'CPU %%', + description: 'Node CPU percent.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'opensearch_os_cpu_percent{%(queriesSelector)s}', + legendCustomTemplate: '{{node}}', + }, + }, + }, + os_mem_used_percent: { + name: 'Memory used %%', + description: 'Node memory used percent.', + type: 'gauge', + unit: 'percent', + sources: { + prometheus: { + expr: 'opensearch_os_mem_used_percent{%(queriesSelector)s}', + legendCustomTemplate: '{{node}}', + }, + }, + }, + os_swap_used_percent: { + name: 'Swap used %%', + description: 'Swap used percent.', + type: 'gauge', + unit: 'percent', + sources: { + prometheus: { + expr: '100 * opensearch_os_swap_used_bytes{%(queriesSelector)s} / clamp_min((opensearch_os_swap_used_bytes{%(queriesSelector)s} + opensearch_os_swap_free_bytes{%(queriesSelector)s}), 1)', + legendCustomTemplate: '{{node}}', + }, + }, + }, + fs_read_bps: { + name: 'FS read bytes/s', + description: 'Filesystem read rate.', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'Bps', + sources: { + prometheus: { + expr: 'opensearch_fs_io_total_read_bytes{%(queriesSelector)s}', + legendCustomTemplate: '{{node}} - read', + }, + }, + }, + fs_write_bps: { + name: 'FS write bytes/s', + description: 'Filesystem write rate.', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'Bps', + sources: { + prometheus: { + expr: 'opensearch_fs_io_total_write_bytes{%(queriesSelector)s}', + legendCustomTemplate: '{{node}} - write', + }, + }, + }, + fs_used_percent: { + name: 'FS used %%', + description: 'Filesystem used percent.', + type: 'gauge', + unit: 'percent', + sources: { + prometheus: { + expr: '100 - (100 * opensearch_fs_path_free_bytes{%(queriesSelector)s} / clamp_min(opensearch_fs_path_total_bytes{%(queriesSelector)s}, 1))', + legendCustomTemplate: '{{node}}', + }, + }, + }, + transport_open_connections: { + name: 'Transport server open', + description: 'Open transport server connections.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'connections', + sources: { + prometheus: { + expr: 'opensearch_transport_server_open_number{%(queriesSelector)s}', + legendCustomTemplate: '{{node}}', + }, + }, + }, + transport_tx_bps: { + name: 'Transport TX bitrate', + description: 'Transport transmit bitrate.', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'bit/s', + sources: { + prometheus: { + expr: 'opensearch_transport_tx_bytes_count{%(queriesSelector)s}', + exprWrappers: [['', ' * 8']], + legendCustomTemplate: '{{node}} - sent', + }, + }, + }, + transport_rx_bps: { + name: 'Transport RX bitrate', + description: 'Transport receive bitrate.', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'bit/s', + sources: { + prometheus: { + expr: 'opensearch_transport_rx_bytes_count{%(queriesSelector)s}', + exprWrappers: [['', ' * 8']], + legendCustomTemplate: '{{node}} - received', + }, + }, + }, + circuitbreaker_tripped_sum_by_name: { + name: 'Circuit breaker trips by name', + description: 'Circuit breaker trips by breaker name.', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_circuitbreaker_tripped_count{%(queriesSelector)s}', + legendCustomTemplate: '{{node}} - {{ name }}', + rangeFunction: 'increase', + }, + }, + }, + + // JVM signals + jvm_heap_used_bytes: { + name: 'JVM heap used', + description: 'JVM heap used.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_jvm_mem_heap_used_bytes{%(queriesSelector)s}', + }, + }, + }, + jvm_heap_committed_bytes: { + name: 'JVM heap committed', + description: 'JVM heap committed.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_jvm_mem_heap_committed_bytes{%(queriesSelector)s}', + }, + }, + }, + jvm_nonheap_used_bytes: { + name: 'JVM non-heap used', + description: 'JVM non-heap used.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_jvm_mem_nonheap_used_bytes{%(queriesSelector)s}', + }, + }, + }, + jvm_nonheap_committed_bytes: { + name: 'JVM non-heap committed', + description: 'JVM non-heap committed.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_jvm_mem_nonheap_committed_bytes{%(queriesSelector)s}', + }, + }, + }, + jvm_threads: { + name: 'JVM threads', + description: 'JVM thread count.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'threads', + sources: { + prometheus: { + expr: 'opensearch_jvm_threads_number{%(queriesSelector)s}', + }, + }, + }, + jvm_bufferpool_number: { + name: 'JVM buffer pools', + description: 'Number of JVM buffer pools.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_jvm_bufferpool_number{%(queriesSelector)s}', + legendCustomTemplate: '{{ node }} - {{ bufferpool }}', + aggKeepLabels: ['bufferpool'], + }, + }, + }, + jvm_uptime: { + name: 'JVM uptime', + description: 'JVM uptime seconds.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 's', + sources: { + prometheus: { + expr: 'opensearch_jvm_uptime_seconds{%(queriesSelector)s}', + }, + }, + }, + jvm_gc_collections: { + name: 'JVM GC collections', + description: 'GC collections per interval.', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_jvm_gc_collection_count{%(queriesSelector)s}', + rangeFunction: 'increase', + }, + }, + }, + jvm_gc_time: { + name: 'JVM GC time', + description: 'GC time per interval.', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 's', + sources: { + prometheus: { + expr: 'opensearch_jvm_gc_collection_time_seconds{%(queriesSelector)s}', + rangeFunction: 'increase', + }, + }, + }, + jvm_bufferpool_used_percent: { + name: 'JVM bufferpool used %%', + description: 'Percent of bufferpool used.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: '100 * (sum by (' + aggLabelsList + ',bufferpool) (opensearch_jvm_bufferpool_used_bytes{%(queriesSelector)s})) / clamp_min((sum by (' + aggLabelsList + ',bufferpool) (opensearch_jvm_bufferpool_total_capacity_bytes{%(queriesSelector)s})),1)', + legendCustomTemplate: '{{ node }} - {{ bufferpool }}', + }, + }, + }, + + // Thread pool signals + threadpool_threads: { + name: 'Threadpool threads', + description: 'Total threadpool threads.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'threads', + sources: { + prometheus: { + expr: 'opensearch_threadpool_threads_number{%(queriesSelector)s}', + }, + }, + }, + threadpool_tasks: { + name: 'Threadpool tasks', + description: 'Threadpool tasks.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'sum', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_threadpool_tasks_number{%(queriesSelector)s}', + }, + }, + }, + }, + } diff --git a/opensearch-mixin/signals/node.libsonnet b/opensearch-mixin/signals/node.libsonnet deleted file mode 100644 index 978675fe9..000000000 --- a/opensearch-mixin/signals/node.libsonnet +++ /dev/null @@ -1,270 +0,0 @@ -// Node-level signals for OpenSearch -function(this) - { - filteringSelector: this.filteringSelector, - groupLabels: this.groupLabels, - instanceLabels: this.instanceLabels, - enableLokiLogs: this.enableLokiLogs, - aggLevel: 'none', - aggFunction: 'avg', - alertsInterval: '5m', - discoveryMetric: { - prometheus: 'opensearch_os_cpu_percent', - }, - signals: { - os_cpu_percent: { - name: 'CPU %%', - description: 'Node CPU percent.', - type: 'raw', - unit: 'percent', - sources: { - prometheus: { - expr: 'opensearch_os_cpu_percent{%(queriesSelector)s}', - legendCustomTemplate: '{{node}}', - }, - }, - }, - os_mem_used_percent: { - name: 'Memory used %%', - description: 'Node memory used percent.', - type: 'raw', - unit: 'percent', - sources: { - prometheus: { - expr: 'opensearch_os_mem_used_percent{%(queriesSelector)s}', - legendCustomTemplate: '{{node}}', - }, - }, - }, - os_swap_used_percent: { - name: 'Swap used %%', - description: 'Swap used percent.', - type: 'raw', - unit: 'percent', - sources: { - prometheus: { - expr: '100 * opensearch_os_swap_used_bytes{%(queriesSelector)s} / clamp_min((opensearch_os_swap_used_bytes{%(queriesSelector)s} + opensearch_os_swap_free_bytes{%(queriesSelector)s}), 1)', - legendCustomTemplate: '{{node}}', - }, - }, - }, - fs_read_bps: { - name: 'FS read bytes/s', - description: 'Filesystem read rate.', - type: 'raw', - unit: 'Bps', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggListWithInstance + ') (rate(opensearch_fs_io_total_read_bytes{%(queriesSelector)s}[$__rate_interval]))', - legendCustomTemplate: '{{node}} - read', - }, - }, - }, - fs_write_bps: { - name: 'FS write bytes/s', - description: 'Filesystem write rate.', - type: 'raw', - unit: 'Bps', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggListWithInstance + ') (rate(opensearch_fs_io_total_write_bytes{%(queriesSelector)s}[$__rate_interval]))', - legendCustomTemplate: '{{node}} - write', - }, - }, - }, - fs_used_percent: { - name: 'FS used %%', - description: 'Filesystem used percent.', - type: 'raw', - unit: 'percent', - sources: { - prometheus: { - expr: '100 - (100 * opensearch_fs_path_free_bytes{%(queriesSelector)s} / clamp_min(opensearch_fs_path_total_bytes{%(queriesSelector)s}, 1))', - legendCustomTemplate: '{{node}}', - }, - }, - }, - transport_open_connections: { - name: 'Transport server open', - description: 'Open transport server connections.', - type: 'raw', - unit: 'connections', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggListWithInstance + ') (opensearch_transport_server_open_number{%(queriesSelector)s})', - legendCustomTemplate: '{{node}}', - }, - }, - }, - transport_tx_bps: { - name: 'Transport TX bitrate', - description: 'Transport transmit bitrate.', - type: 'raw', - unit: 'bit/s', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggListWithInstance + ') (rate(opensearch_transport_tx_bytes_count{%(queriesSelector)s}[$__rate_interval])) * 8', - legendCustomTemplate: '{{node}} - sent', - }, - }, - }, - transport_rx_bps: { - name: 'Transport RX bitrate', - description: 'Transport receive bitrate.', - type: 'raw', - unit: 'bit/s', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggListWithInstance + ') (rate(opensearch_transport_rx_bytes_count{%(queriesSelector)s}[$__rate_interval])) * 8', - legendCustomTemplate: '{{node}} - received', - }, - }, - }, - circuitbreaker_tripped_sum_by_name: { - name: 'Circuit breaker trips by name', - description: 'Circuit breaker trips by breaker name.', - type: 'raw', - unit: 'count', - sources: { - prometheus: { - expr: 'sum by (name, ' + this.groupAggListWithInstance + ') (increase(opensearch_circuitbreaker_tripped_count{%(queriesSelector)s}[$__interval:]))', - legendCustomTemplate: '{{node}} - {{ name }}', - }, - }, - }, - jvm_heap_used_bytes: { - name: 'JVM heap used', - description: 'JVM heap used.', - type: 'raw', - unit: 'bytes', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_mem_heap_used_bytes{%(queriesSelector)s})', - }, - }, - }, - jvm_heap_committed_bytes: { - name: 'JVM heap committed', - description: 'JVM heap committed.', - type: 'raw', - unit: 'bytes', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_mem_heap_committed_bytes{%(queriesSelector)s})', - }, - }, - }, - jvm_nonheap_used_bytes: { - name: 'JVM non-heap used', - description: 'JVM non-heap used.', - type: 'raw', - unit: 'bytes', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_mem_nonheap_used_bytes{%(queriesSelector)s})', - }, - }, - }, - jvm_nonheap_committed_bytes: { - name: 'JVM non-heap committed', - description: 'JVM non-heap committed.', - type: 'raw', - unit: 'bytes', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_mem_nonheap_committed_bytes{%(queriesSelector)s})', - }, - }, - }, - jvm_threads: { - name: 'JVM threads', - description: 'JVM thread count.', - type: 'raw', - unit: 'threads', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_threads_number{%(queriesSelector)s})', - }, - }, - }, - jvm_bufferpool_number: { - name: 'JVM buffer pools', - description: 'Number of JVM buffer pools.', - type: 'raw', - unit: 'count', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggList + ', bufferpool) (opensearch_jvm_bufferpool_number{%(queriesSelector)s})', - legendCustomTemplate: '{{ bufferpool }}', - }, - }, - }, - jvm_uptime: { - name: 'JVM uptime', - description: 'JVM uptime seconds.', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggList + ') (opensearch_jvm_uptime_seconds{%(queriesSelector)s})', - }, - }, - }, - jvm_gc_collections: { - name: 'JVM GC collections', - description: 'GC collections per interval.', - type: 'raw', - unit: 'count', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggList + ') (increase(opensearch_jvm_gc_collection_count{%(queriesSelector)s}[$__interval:]))', - }, - }, - }, - jvm_gc_time: { - name: 'JVM GC time', - description: 'GC time per interval.', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggList + ') (increase(opensearch_jvm_gc_collection_time_seconds{%(queriesSelector)s}[$__interval:]))', - }, - }, - }, - jvm_bufferpool_used_percent: { - name: 'JVM bufferpool used %%', - description: 'Percent of bufferpool used.', - type: 'raw', - unit: 'percent', - sources: { - prometheus: { - expr: '100 * (sum by (' + this.groupAggList + ', bufferpool) (opensearch_jvm_bufferpool_used_bytes{%(queriesSelector)s})) / clamp_min((sum by (job, bufferpool, cluster) (opensearch_jvm_bufferpool_total_capacity_bytes{%(queriesSelector)s})),1)', - legendCustomTemplate: '{{ bufferpool }}', - }, - }, - }, - threadpool_threads: { - name: 'Threadpool threads', - description: 'Total threadpool threads.', - type: 'raw', - unit: 'threads', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggList + ') ((opensearch_threadpool_threads_number{%(queriesSelector)s}))', - }, - }, - }, - threadpool_tasks: { - name: 'Threadpool tasks', - description: 'Threadpool tasks.', - type: 'raw', - unit: 'count', - sources: { - prometheus: { - expr: 'sum by (' + this.groupAggList + ') (opensearch_threadpool_tasks_number{%(queriesSelector)s})', - }, - }, - }, - }, - } diff --git a/opensearch-mixin/signals/roles.libsonnet b/opensearch-mixin/signals/roles.libsonnet deleted file mode 100644 index ad7cd8d8d..000000000 --- a/opensearch-mixin/signals/roles.libsonnet +++ /dev/null @@ -1,81 +0,0 @@ -// Node role signals for OpenSearch -function(this) - { - filteringSelector: this.filteringSelector, - groupLabels: this.groupLabels, - instanceLabels: this.instanceLabels, - enableLokiLogs: this.enableLokiLogs, - aggLevel: 'none', - aggFunction: 'avg', - alertsInterval: '5m', - discoveryMetric: { - prometheus: 'opensearch_node_role_bool', - }, - signals: { - node_role_data: { - name: 'Node role: data', - description: 'Data role present flag.', - type: 'raw', - sources: { - prometheus: { - expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="data"}[1m]) == 1) * 2', - legendCustomTemplate: '{{ node }} / data', - }, - }, - }, - node_role_master: { - name: 'Node role: master', - description: 'Master role present flag.', - type: 'raw', - sources: { - prometheus: { - expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="master"}[1m]) == 1) * 3', - legendCustomTemplate: '{{ node }} / master', - }, - }, - }, - node_role_ingest: { - name: 'Node role: ingest', - description: 'Ingest role present flag.', - type: 'raw', - sources: { - prometheus: { - expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="ingest"}[1m]) == 1) * 4', - legendCustomTemplate: '{{ node }} / ingest', - }, - }, - }, - node_role_cluster_manager: { - name: 'Node role: cluster_manager', - description: 'Cluster manager role present flag.', - type: 'raw', - sources: { - prometheus: { - expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="cluster_manager"}[1m]) == 1) * 5', - legendCustomTemplate: '{{ node }} / cluster_manager', - }, - }, - }, - node_role_remote_cluster_client: { - name: 'Node role: remote_cluster_client', - description: 'Remote cluster client role present flag.', - type: 'raw', - sources: { - prometheus: { - expr: 'max by (node, role) (max_over_time(opensearch_node_role_bool{%(queriesSelector)s, role="remote_cluster_client"}[1m]) == 1) * 6', - legendCustomTemplate: '{{ node }} / remote_client', - }, - }, - }, - node_role_last_seen: { - name: 'Node role bool last seen', - description: 'Last seen role bool within 1d.', - type: 'raw', - sources: { - prometheus: { - expr: 'max by (' + this.groupAggList + ', nodeid, role, primary_ip) (last_over_time(opensearch_node_role_bool{%(queriesSelector)s}[1d]))', - }, - }, - }, - }, - } diff --git a/opensearch-mixin/signals/search-and-index-overview.libsonnet b/opensearch-mixin/signals/search-and-index-overview.libsonnet new file mode 100644 index 000000000..f1d6ed742 --- /dev/null +++ b/opensearch-mixin/signals/search-and-index-overview.libsonnet @@ -0,0 +1,461 @@ +// Search and Index Overview dashboard signals for OpenSearch +// Combines signals from search and indexing domains +function(this) + { + filteringSelector: this.filteringSelector, + groupLabels: this.groupLabels, + instanceLabels: this.instanceLabels + ['index'], + enableLokiLogs: this.enableLokiLogs, + aggLevel: 'none', + aggFunction: 'avg', + alertsInterval: '5m', + discoveryMetric: { + prometheus: 'opensearch_index_search_fetch_count', + }, + signals: { + // Search performance signals + search_query_current_avg: { + name: 'Search queries in-flight', + description: 'In-flight search queries.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_search_query_current_number{%(queriesSelector)s, context=~"total"}', + legendCustomTemplate: '{{index}} - query', + }, + }, + }, + search_fetch_current_avg: { + name: 'Search fetch in-flight', + description: 'In-flight fetch operations.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_search_fetch_current_number{%(queriesSelector)s, context=~"total"}', + legendCustomTemplate: '{{index}} - fetch', + }, + }, + }, + search_scroll_current_avg: { + name: 'Search scroll in-flight', + description: 'In-flight scroll operations.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_search_scroll_current_number{%(queriesSelector)s, context=~"total"}', + legendCustomTemplate: '{{index}} - scroll', + }, + }, + }, + search_query_latency_avg: { + name: 'Search query latency (avg)', + description: 'Average query latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by (job,opensearch_cluster,index) (\n' + + ' increase(opensearch_index_search_query_time_seconds{%(queriesSelector)s}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_search_query_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' + + ')', + legendCustomTemplate: '{{index}} - query', + }, + }, + }, + search_fetch_latency_avg: { + name: 'Search fetch latency (avg)', + description: 'Average fetch latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by (job,opensearch_cluster,index) (\n' + + ' increase(opensearch_index_search_fetch_time_seconds{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' + + ')', + legendCustomTemplate: '{{index}} - fetch', + }, + }, + }, + search_scroll_latency_avg: { + name: 'Search scroll latency (avg)', + description: 'Average scroll latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by (job,opensearch_cluster,index) (\n' + + ' increase(opensearch_index_search_scroll_time_seconds{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_search_scroll_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' + + ')', + legendCustomTemplate: '{{index}} - scroll', + }, + }, + }, + request_cache_hit_rate: { + name: 'Request cache hit rate %%', + description: 'Request cache hit rate.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (\n' + + ' 100 * (opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"}) / \n' + + ' clamp_min(opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"} + \n' + + ' opensearch_index_requestcache_miss_count{%(queriesSelector)s, context="total"}, 1)\n' + + ')', + legendCustomTemplate: '{{index}} - request', + }, + }, + }, + query_cache_hit_rate: { + name: 'Query cache hit rate %%', + description: 'Query cache hit rate.', + type: 'raw', + unit: 'percent', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (\n' + + ' 100 * (opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"}) / \n' + + ' clamp_min(opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"} + \n' + + ' opensearch_index_querycache_miss_number{%(queriesSelector)s, context="total"}, 1)\n' + + ')', + legendCustomTemplate: '{{index}} - query', + }, + }, + }, + query_cache_evictions: { + name: 'Query cache evictions', + description: 'Query cache evictions per interval.', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_index_querycache_evictions_count{%(queriesSelector)s, context="total"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{index}} - query cache', + }, + }, + }, + request_cache_evictions: { + name: 'Request cache evictions', + description: 'Request cache evictions per interval.', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_index_requestcache_evictions_count{%(queriesSelector)s, context="total"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{index}} - request cache', + }, + }, + }, + fielddata_evictions: { + name: 'Fielddata evictions', + description: 'Fielddata evictions per interval.', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_index_fielddata_evictions_count{%(queriesSelector)s, context="total"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{index}} - field data', + }, + }, + }, + query_cache_memory: { + name: 'Query cache memory bytes', + description: 'Query cache memory.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_querycache_memory_size_bytes{%(queriesSelector)s, context="total"}', + }, + }, + }, + request_cache_memory: { + name: 'Request cache memory bytes', + description: 'Request cache memory.', + type: 'gauge', + aggLevel: 'group', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_requestcache_memory_size_bytes{%(queriesSelector)s, context="total"}', + }, + }, + }, + + // Indexing performance signals + indexing_current: { + name: 'Indexing current', + description: 'In-flight indexing operations.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'ops', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_index_current_number{%(queriesSelector)s,context="total"}', + legendCustomTemplate: '{{index}}', + }, + }, + }, + indexing_latency: { + name: 'Indexing latency (avg)', + description: 'Average indexing latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ') (\n' + + ' increase(opensearch_index_indexing_index_time_seconds{%(queriesSelector)s, context=~"total"}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelector)s, context=~"total"}[$__interval:] offset $__interval), 1)\n' + + ')', + }, + }, + }, + indexing_count: { + name: 'Indexing count (avg)', + description: 'Indexing ops count.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'documents', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_index_count{%(queriesSelector)s, context="total"}', + legendCustomTemplate: '{{index}}', + }, + }, + }, + indexing_failed: { + name: 'Indexing failed (avg)', + description: 'Indexing failures per interval.', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'failures', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_index_failed_count{%(queriesSelector)s,context="total"}', + rangeFunction: 'increase', + legendCustomTemplate: '{{index}}', + }, + }, + }, + indexing_delete_current: { + name: 'Indexing delete current', + description: 'In-flight delete operations.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'documents/s', + sources: { + prometheus: { + expr: 'opensearch_index_indexing_delete_current_number{%(queriesSelector)s, context="total"}', + legendCustomTemplate: '{{index}}', + }, + }, + }, + flush_latency: { + name: 'Flush latency (avg)', + description: 'Average flush latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(' + this.groupAggList + ',index) (\n' + + ' increase(opensearch_index_flush_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_flush_total_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' + + ')', + legendCustomTemplate: '{{index}}', + }, + }, + }, + merge_time: { + name: 'Merge time increase', + description: 'Merge time increase (boolean >0).', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 's', + sources: { + prometheus: { + expr: 'opensearch_index_merges_total_time_seconds{%(queriesSelector)s, context="total"}', + legendCustomTemplate: '{{index}} - total', + rangeFunction: 'increase', + exprWrappers: [['(', ') > 0']], + }, + }, + }, + + merge_stopped_time: { + name: 'Merge stopped time increase', + description: 'Merge stopped time increase (boolean >0).', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 's', + sources: { + prometheus: { + expr: 'opensearch_index_merges_total_stopped_time_seconds{%(queriesSelector)s, context="total"}', + legendCustomTemplate: '{{index}} - stopped', + rangeFunction: 'increase', + exprWrappers: [['(', ') > 0']], + }, + }, + }, + merge_throttled_time: { + name: 'Merge throttled time increase', + description: 'Merge throttled time increase (boolean >0).', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 's', + sources: { + prometheus: { + expr: 'opensearch_index_merges_total_throttled_time_seconds{%(queriesSelector)s, context="total"}', + legendCustomTemplate: '{{index}} - throttled', + rangeFunction: 'increase', + exprWrappers: [['(', ') > 0']], + }, + }, + }, + merge_docs: { + name: 'Merge docs increase', + description: 'Merge docs increase (boolean >0).', + type: 'counter', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_index_merges_total_docs_count{%(queriesSelector)s, context="total"}', + rangeFunction: 'increase', + exprWrappers: [['(', ') > 0']], + }, + }, + }, + merge_current_size: { + name: 'Merge current size bytes', + description: 'Merge current size (boolean >0).', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_merges_current_size_bytes{%(queriesSelector)s, context="total"}', + legendCustomTemplate: '{{index}}', + exprWrappers: [['(', ') > 0']], + }, + }, + }, + refresh_latency: { + name: 'Refresh latency (avg)', + description: 'Average refresh latency.', + type: 'raw', + unit: 's', + sources: { + prometheus: { + expr: 'avg by(job,opensearch_cluster,index) (\n' + + ' increase(opensearch_index_refresh_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_refresh_total_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' + + ')', + legendCustomTemplate: '{{index}}', + }, + }, + }, + translog_ops: { + name: 'Translog operations', + description: 'Translog operation count.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'operations', + sources: { + prometheus: { + expr: 'opensearch_index_translog_operations_number{%(queriesSelector)s, context="total"}', + legendCustomTemplate: '{{index}}', + }, + }, + }, + segments_number: { + name: 'Segments number', + description: 'Number of segments.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'segments', + sources: { + prometheus: { + expr: 'opensearch_index_segments_number{%(queriesSelector)s, context="total"}', + legendCustomTemplate: '{{index}}', + }, + }, + }, + segments_memory_bytes: { + name: 'Segments memory bytes', + description: 'Segment memory usage.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_segments_memory_bytes{%(queriesSelector)s, context="total"}', + legendCustomTemplate: '{{index}}', + }, + }, + }, + store_size_bytes: { + name: 'Store size bytes', + description: 'Store size in bytes.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'bytes', + sources: { + prometheus: { + expr: 'opensearch_index_store_size_bytes{%(queriesSelector)s, context="total"}', + legendCustomTemplate: '{{index}}', + }, + }, + }, + shards_per_index: { + name: 'Active shards per index', + description: 'Active shards per index.', + type: 'gauge', + aggLevel: 'instance', + aggFunction: 'avg', + unit: 'count', + sources: { + prometheus: { + expr: 'opensearch_index_shards_number{%(queriesSelector)s, type=~"active|active_primary"}', + legendCustomTemplate: '{{ index }}', + exprWrappers: [['sum by (index) (', ')']], + }, + }, + }, + }, + } diff --git a/opensearch-mixin/signals/search.libsonnet b/opensearch-mixin/signals/search.libsonnet deleted file mode 100644 index 3a2d0f21b..000000000 --- a/opensearch-mixin/signals/search.libsonnet +++ /dev/null @@ -1,195 +0,0 @@ -// Search operation signals for OpenSearch -function(this) - { - filteringSelector: this.filteringSelector, - groupLabels: this.groupLabels, - instanceLabels: this.instanceLabels, - enableLokiLogs: this.enableLokiLogs, - aggLevel: 'none', - aggFunction: 'avg', - alertsInterval: '5m', - discoveryMetric: { - prometheus: 'opensearch_index_search_query_current_number', - }, - signals: { - search_query_current_avg: { - name: 'Search queries in-flight', - description: 'In-flight search queries.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'ops', - sources: { - prometheus: { - expr: 'opensearch_index_search_query_current_number{%(queriesSelectorGroupOnly)s,index=~"$index", context=~"total"}', - legendCustomTemplate: '{{index}} - query', - aggKeepLabels: ['index'], - }, - }, - }, - search_fetch_current_avg: { - name: 'Search fetch in-flight', - description: 'In-flight fetch operations.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'ops', - sources: { - prometheus: { - expr: 'opensearch_index_search_fetch_current_number{%(queriesSelectorGroupOnly)s,index=~"$index", context=~"total"}', - legendCustomTemplate: '{{index}} - fetch', - aggKeepLabels: ['index'], - }, - }, - }, - search_scroll_current_avg: { - name: 'Search scroll in-flight', - description: 'In-flight scroll operations.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'ops', - sources: { - prometheus: { - expr: 'opensearch_index_search_scroll_current_number{%(queriesSelectorGroupOnly)s,index=~"$index", context=~"total"}', - legendCustomTemplate: '{{index}} - scroll', - aggKeepLabels: ['index'], - }, - }, - }, - search_query_latency_avg: { - name: 'Search query latency (avg)', - description: 'Average query latency.', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_query_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index"}[$__interval:]) / clamp_min(increase(opensearch_index_search_query_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]), 1))', - legendCustomTemplate: '{{index}} - query', - }, - }, - }, - search_fetch_latency_avg: { - name: 'Search fetch latency (avg)', - description: 'Average fetch latency.', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_fetch_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]), 1))', - legendCustomTemplate: '{{index}} - fetch', - }, - }, - }, - search_scroll_latency_avg: { - name: 'Search scroll latency (avg)', - description: 'Average scroll latency.', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'avg by (job,opensearch_cluster,index) (increase(opensearch_index_search_scroll_time_seconds{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_search_scroll_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}[$__interval:]), 1))', - legendCustomTemplate: '{{index}} - scroll', - }, - }, - }, - request_cache_hit_rate: { - name: 'Request cache hit rate %%', - description: 'Request cache hit rate.', - type: 'raw', - unit: 'percent', - sources: { - prometheus: { - expr: 'avg by(job,opensearch_cluster,index) (100 * (opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}) / clamp_min(opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"} + opensearch_index_requestcache_miss_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}, 1))', - legendCustomTemplate: '{{index}} - request', - }, - }, - }, - query_cache_hit_rate: { - name: 'Query cache hit rate %%', - description: 'Query cache hit rate.', - type: 'raw', - unit: 'percent', - sources: { - prometheus: { - expr: 'avg by(job,opensearch_cluster,index) (100 * (opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}) / clamp_min(opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"} + opensearch_index_querycache_miss_number{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}, 1))', - legendCustomTemplate: '{{index}} - query', - }, - }, - }, - query_cache_evictions: { - name: 'Query cache evictions', - description: 'Query cache evictions per interval.', - type: 'counter', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'count', - sources: { - prometheus: { - expr: 'opensearch_index_querycache_evictions_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', - rangeFunction: 'increase', - aggKeepLabels: ['index'], - legendCustomTemplate: '{{index}} - query cache', - }, - }, - }, - request_cache_evictions: { - name: 'Request cache evictions', - description: 'Request cache evictions per interval.', - type: 'counter', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'count', - sources: { - prometheus: { - expr: 'opensearch_index_requestcache_evictions_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', - rangeFunction: 'increase', - aggKeepLabels: ['index'], - legendCustomTemplate: '{{index}} - request cache', - }, - }, - }, - fielddata_evictions: { - name: 'Fielddata evictions', - description: 'Fielddata evictions per interval.', - type: 'counter', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'count', - sources: { - prometheus: { - expr: 'opensearch_index_fielddata_evictions_count{%(queriesSelectorGroupOnly)s,index=~"$index", context="total"}', - rangeFunction: 'increase', - aggKeepLabels: ['index'], - legendCustomTemplate: '{{index}} - field data', - }, - }, - }, - query_cache_memory: { - name: 'Query cache memory bytes', - description: 'Query cache memory.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'bytes', - sources: { - prometheus: { - expr: 'opensearch_index_querycache_memory_size_bytes{%(queriesSelector)s, context="total"}', - }, - }, - }, - request_cache_memory: { - name: 'Request cache memory bytes', - description: 'Request cache memory.', - type: 'gauge', - aggLevel: 'group', - aggFunction: 'avg', - unit: 'bytes', - sources: { - prometheus: { - expr: 'opensearch_index_requestcache_memory_size_bytes{%(queriesSelector)s, context="total"}', - }, - }, - }, - }, - } diff --git a/opensearch-mixin/signals/topk.libsonnet b/opensearch-mixin/signals/topk.libsonnet deleted file mode 100644 index b1309cf68..000000000 --- a/opensearch-mixin/signals/topk.libsonnet +++ /dev/null @@ -1,163 +0,0 @@ -// TopK and ranking signals for OpenSearch -function(this) - { - filteringSelector: this.filteringSelector, - groupLabels: this.groupLabels, - instanceLabels: this.instanceLabels, - enableLokiLogs: this.enableLokiLogs, - aggLevel: 'none', - aggFunction: 'avg', - alertsInterval: '5m', - discoveryMetric: { - prometheus: 'opensearch_os_cpu_percent', - }, - signals: { - os_cpu_percent_topk: { - name: 'Top nodes by CPU usage', - description: 'Top nodes by OS CPU usage across the OpenSearch cluster.', - type: 'raw', - unit: 'percent', - sources: { - prometheus: { - expr: 'topk(10, sort_desc(sum by(' + this.groupAggListWithInstance + ') (opensearch_os_cpu_percent{%(queriesSelectorGroupOnly)s})))', - legendCustomTemplate: '{{node}}', - }, - }, - }, - fs_path_used_percent_topk: { - name: 'Top nodes by disk usage', - description: 'Top nodes by disk usage across the OpenSearch cluster.', - type: 'raw', - unit: 'percent', - sources: { - prometheus: { - expr: 'topk(10, sort_desc((100 * (sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_total_bytes{%(queriesSelectorGroupOnly)s})- sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_free_bytes{%(queriesSelectorGroupOnly)s})) / sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_total_bytes{%(queriesSelectorGroupOnly)s}))))', - legendCustomTemplate: '{{node}}', - }, - }, - }, - circuitbreaker_tripped_count_sum: { - name: 'Breakers tripped', - description: 'The total count of circuit breakers tripped across the OpenSearch cluster.', - type: 'counter', - aggLevel: 'group', - aggFunction: 'sum', - unit: 'count', - sources: { - prometheus: { - expr: 'opensearch_circuitbreaker_tripped_count{%(queriesSelectorGroupOnly)s}', - legendCustomTemplate: '{{node}}', - rangeFunction: 'increase', - }, - }, - }, - search_current_inflight_topk: { - name: 'Top indices by request rate', - description: 'Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.', - type: 'raw', - unit: 'reqps', - sources: { - prometheus: { - expr: 'topk(10, sort_desc(avg by(index, ' + this.groupAggList + ') (\n opensearch_index_search_fetch_current_number{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_search_query_current_number{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_search_scroll_current_number{%(queriesSelectorGroupOnly)s, context="total"}\n)))\n', - legendCustomTemplate: '{{index}}', - }, - }, - }, - search_avg_latency_topk: { - name: 'Top indices by request latency', - description: 'Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'topk(10, sort_desc(sum by(index, ' + this.groupAggList + ') ((increase(opensearch_index_search_fetch_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:])\n+increase(opensearch_index_search_query_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:])\n+increase(opensearch_index_search_scroll_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:]))\n/ clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:])\n+increase(opensearch_index_search_query_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:])\n+increase(opensearch_index_search_scroll_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:]), 1))))\n', - legendCustomTemplate: '{{index}}', - }, - }, - }, - request_query_cache_hit_rate_topk: { - name: 'Top indices by combined cache hit ratio', - description: 'Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.', - type: 'raw', - unit: 'percent', - sources: { - prometheus: { - expr: 'topk(10, sort_desc(avg by(index, ' + this.groupAggList + ') (\n 100 * (opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s, context="total"}) / \n clamp_min((opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_requestcache_miss_count{%(queriesSelectorGroupOnly)s, context="total"} + \n opensearch_index_querycache_miss_number{%(queriesSelectorGroupOnly)s, context="total"}), 1\n ))))', - legendCustomTemplate: '{{index}}', - }, - }, - }, - ingest_throughput_topk: { - name: 'Top nodes by ingest rate', - description: 'Top nodes by rate of ingest across the OpenSearch cluster.', - type: 'raw', - unit: 'ops', - sources: { - prometheus: { - expr: 'topk(10, sum by(' + this.groupAggListWithInstance + ') (rate(opensearch_ingest_total_count{%(queriesSelectorGroupOnly)s}[$__rate_interval])))', - legendCustomTemplate: '{{node}}', - }, - }, - }, - ingest_latency_topk: { - name: 'Top nodes by ingest latency', - description: 'Top nodes by ingestion latency across the OpenSearch cluster.', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'topk(10, sum by(' + this.groupAggListWithInstance + ') (increase(opensearch_ingest_total_time_seconds{%(queriesSelectorGroupOnly)s}[$__interval:]) / clamp_min(increase(opensearch_ingest_total_count{%(queriesSelectorGroupOnly)s}[$__interval:]), 1)))', - legendCustomTemplate: '{{node}}', - }, - }, - }, - ingest_failures_topk: { - name: 'Top nodes by ingest errors', - description: 'Top nodes by ingestion failures across the OpenSearch cluster.', - type: 'raw', - unit: 'count', - sources: { - prometheus: { - expr: 'topk(10, sum by(' + this.groupAggListWithInstance + ') (increase(opensearch_ingest_total_failed_count{%(queriesSelectorGroupOnly)s}[$__interval:])))', - legendCustomTemplate: '{{node}}', - }, - }, - }, - indexing_current_topk: { - name: 'Top indices by index rate', - description: 'Top indices by rate of document indexing across the OpenSearch cluster.', - type: 'raw', - unit: 'ops', - sources: { - prometheus: { - expr: 'topk(10, avg by(index, ' + this.groupAggList + ') (opensearch_index_indexing_index_current_number{%(queriesSelectorGroupOnly)s}))', - legendCustomTemplate: '{{index}}', - }, - }, - }, - indexing_latency_topk: { - name: 'Top indices by index latency', - description: 'Top indices by indexing latency across the OpenSearch cluster.', - type: 'raw', - unit: 's', - sources: { - prometheus: { - expr: 'topk(10, avg by(index, ' + this.groupAggList + ') (increase(opensearch_index_indexing_index_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:]) / clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:]), 1)))', - legendCustomTemplate: '{{index}}', - }, - }, - }, - indexing_failed_topk: { - name: 'Top indices by index failures', - description: 'Top indices by index document failures across the OpenSearch cluster.', - type: 'raw', - unit: 'count', - sources: { - prometheus: { - expr: 'topk(10, avg by(index, ' + this.groupAggList + ') (increase(opensearch_index_indexing_index_failed_count{%(queriesSelectorGroupOnly)s}[$__interval:])))', - legendCustomTemplate: '{{index}}', - }, - }, - }, - }, - } From 3c144b5cd4a896c934ecc94f0b55998179761a6a Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Fri, 21 Nov 2025 08:26:43 -0500 Subject: [PATCH 03/13] Revert .gitignore to master version --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1a57d51f0..d68c86c04 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,3 @@ vendor jsonnetfile.lock.json *.zip -.worktrees From 4b44b921dc8b02f95cc6a4e420b63b96bf3e1787 Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Fri, 21 Nov 2025 09:17:10 -0500 Subject: [PATCH 04/13] Ran make fmt --- opensearch-mixin/config.libsonnet | 6 +- opensearch-mixin/dashboards.libsonnet | 8 +- opensearch-mixin/main.libsonnet | 2 +- opensearch-mixin/panels.libsonnet | 161 ++++++++++-------- opensearch-mixin/rows.libsonnet | 2 +- .../signals/cluster-overview.libsonnet | 62 +++---- .../search-and-index-overview.libsonnet | 54 +++--- 7 files changed, 153 insertions(+), 142 deletions(-) diff --git a/opensearch-mixin/config.libsonnet b/opensearch-mixin/config.libsonnet index 6c354e2e8..e23c5b707 100644 --- a/opensearch-mixin/config.libsonnet +++ b/opensearch-mixin/config.libsonnet @@ -22,7 +22,7 @@ // Agg Lists groupAggList: std.join(',', this.groupLabels), groupAggListWithInstance: std.join(',', this.groupLabels + this.instanceLabels), - + // Alerts configuration alertsWarningShardReallocations: 0, // count alertsWarningShardUnassigned: 0, // count @@ -34,11 +34,11 @@ alertsCriticalMemoryUsage: 85, // % alertsWarningRequestLatency: 0.5, // seconds alertsWarningIndexLatency: 0.5, // seconds - + // Signals configuration signals+: { clusterOverview: (import './signals/cluster-overview.libsonnet')(this), nodeOverview: (import './signals/node-overview.libsonnet')(this), searchAndIndexOverview: (import './signals/search-and-index-overview.libsonnet')(this), }, -} \ No newline at end of file +} diff --git a/opensearch-mixin/dashboards.libsonnet b/opensearch-mixin/dashboards.libsonnet index e1e63da42..c28201446 100644 --- a/opensearch-mixin/dashboards.libsonnet +++ b/opensearch-mixin/dashboards.libsonnet @@ -44,10 +44,10 @@ local logslib = import 'logs-lib/logs/main.libsonnet'; + g.dashboard.withPanels( g.util.panel.resolveCollapsedFlagOnRows( g.util.grid.wrapPanels([ - this.grafana.rows.nodeHealthRow, - this.grafana.rows.nodeRolesRow, - this.grafana.rows.nodeJVMRow, - this.grafana.rows.threadPoolsRow, + this.grafana.rows.nodeHealthRow, + this.grafana.rows.nodeRolesRow, + this.grafana.rows.nodeJVMRow, + this.grafana.rows.threadPoolsRow, ]) ) ) + root.applyCommon( diff --git a/opensearch-mixin/main.libsonnet b/opensearch-mixin/main.libsonnet index ddd83d3c5..01ce2f56e 100644 --- a/opensearch-mixin/main.libsonnet +++ b/opensearch-mixin/main.libsonnet @@ -15,7 +15,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; new(): { local this = self, config: config, - + signals: { [sig]: commonlib.signals.unmarshallJsonMulti( diff --git a/opensearch-mixin/panels.libsonnet b/opensearch-mixin/panels.libsonnet index cef7b973a..dbdf17517 100644 --- a/opensearch-mixin/panels.libsonnet +++ b/opensearch-mixin/panels.libsonnet @@ -5,7 +5,7 @@ local var = g.dashboard.variable; new(this):: { local signals = this.signals, - + clusterOSRoles: g.panel.table.new('Roles') + g.panel.table.panelOptions.withDescription('OpenSearch node roles.') @@ -14,20 +14,32 @@ local var = g.dashboard.variable; + g.query.prometheus.withInstant(true), ]) + g.panel.table.queryOptions.withTransformations([ - {id: 'labelsToFields', options: {mode: 'columns', valueLabel: 'role'}}, - {id: 'merge', options: {}}, + { id: 'labelsToFields', options: { mode: 'columns', valueLabel: 'role' } }, + { id: 'merge', options: {} }, { id: 'organize', options: { - excludeByName: {Time: true}, + excludeByName: { Time: true }, indexByName: { - Time: 0, node: 3, nodeid: 3, master: 104, data: 105, - ingest: 106, remote_cluster_client: 107, cluster_manager: 108, - } + {[k]: 3 for k in this.config.groupLabels + this.config.instanceLabels}, + Time: 0, + node: 3, + nodeid: 3, + master: 104, + data: 105, + ingest: 106, + remote_cluster_client: 107, + cluster_manager: 108, + } + { [k]: 3 for k in this.config.groupLabels + this.config.instanceLabels }, renameByName: { - Time: '', cluster: 'Cluster', cluster_manager: 'Cluster manager', - data: 'Data', ingest: 'Ingest', master: 'Master', - node: 'Node', nodeid: 'Nodeid', remote_cluster_client: 'Remote cluster client', + Time: '', + cluster: 'Cluster', + cluster_manager: 'Cluster manager', + data: 'Data', + ingest: 'Ingest', + master: 'Master', + node: 'Node', + nodeid: 'Nodeid', + remote_cluster_client: 'Remote cluster client', }, }, }, @@ -35,17 +47,17 @@ local var = g.dashboard.variable; + g.panel.table.standardOptions.withMappings([ g.panel.table.standardOptions.mapping.ValueMap.withType() + g.panel.table.standardOptions.mapping.ValueMap.withOptions({ - '0': {color: 'super-light-orange', index: 5, text: 'False'}, - '1': {color: 'light-green', index: 3, text: 'True'}, - Data: {color: 'light-purple', index: 0, text: 'data'}, - Ingest: {color: 'light-blue', index: 2, text: 'ingest'}, - Master: {color: 'light-green', index: 1, text: 'master'}, - 'Remote cluster client': {color: 'light-orange', index: 4, text: 'remote_cluster_client'}, + '0': { color: 'super-light-orange', index: 5, text: 'False' }, + '1': { color: 'light-green', index: 3, text: 'True' }, + Data: { color: 'light-purple', index: 0, text: 'data' }, + Ingest: { color: 'light-blue', index: 2, text: 'ingest' }, + Master: { color: 'light-green', index: 1, text: 'master' }, + 'Remote cluster client': { color: 'light-orange', index: 4, text: 'remote_cluster_client' }, }), ]) + g.panel.table.standardOptions.withOverrides([ g.panel.table.fieldOverride.byRegexp.new('/Data|Master|Ingest|Remote.+|Cluster.+/') - + g.panel.table.fieldOverride.byRegexp.withProperty('custom.cellOptions', {type: 'color-text'}), + + g.panel.table.fieldOverride.byRegexp.withProperty('custom.cellOptions', { type: 'color-text' }), ]), clusterOSRolesTimeline: @@ -65,11 +77,11 @@ local var = g.dashboard.variable; { type: 'value', options: { - '2': {color: 'light-purple', index: 0, text: 'data'}, - '3': {color: 'light-green', index: 1, text: 'master'}, - '4': {color: 'light-blue', index: 2, text: 'ingest'}, - '5': {color: 'light-yellow', index: 3, text: 'cluster_manager'}, - '6': {color: 'super-light-red', index: 4, text: 'remote_cluster_client'}, + '2': { color: 'light-purple', index: 0, text: 'data' }, + '3': { color: 'light-green', index: 1, text: 'master' }, + '4': { color: 'light-blue', index: 2, text: 'ingest' }, + '5': { color: 'light-yellow', index: 3, text: 'cluster_manager' }, + '6': { color: 'super-light-red', index: 4, text: 'remote_cluster_client' }, }, }, ]), @@ -86,9 +98,9 @@ local var = g.dashboard.variable; + g.panel.stat.standardOptions.withMappings([ g.panel.stat.standardOptions.mapping.ValueMap.withType() + g.panel.stat.standardOptions.mapping.ValueMap.withOptions({ - '0': {index: 0, text: 'Green'}, - '1': {index: 1, text: 'Yellow'}, - '2': {index: 2, text: 'Red'}, + '0': { index: 0, text: 'Green' }, + '1': { index: 1, text: 'Yellow' }, + '2': { index: 2, text: 'Red' }, }), ]) + g.panel.stat.standardOptions.thresholds.withSteps([ @@ -102,7 +114,7 @@ local var = g.dashboard.variable; + g.panel.stat.standardOptions.threshold.step.withValue(2), ]) + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']) - // + g.panel.stat.standardOptions.graphMode.withMode('none'), + // + g.panel.stat.standardOptions.graphMode.withMode('none'), + g.panel.stat.options.withGraphMode('none'), @@ -185,7 +197,7 @@ local var = g.dashboard.variable; + g.panel.stat.standardOptions.withUnit('percent') + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']) + g.panel.stat.options.withGraphMode('none'), - + topNodesByCPUUsagePanel: g.panel.barGauge.new('Top nodes by CPU usage') + g.panel.barGauge.panelOptions.withDescription('Top nodes by OS CPU usage across the OpenSearch cluster.') @@ -231,7 +243,7 @@ local var = g.dashboard.variable; + g.panel.barGauge.queryOptions.withTargets([ signals.clusterOverview.cluster_shards_number_by_type.asTarget() + g.query.prometheus.withIntervalFactor(2), - ]) + ]) + g.panel.barGauge.standardOptions.color.withMode('thresholds') + g.panel.barGauge.standardOptions.thresholds.withSteps([ g.panel.barGauge.standardOptions.threshold.step.withColor('green') @@ -418,7 +430,7 @@ local var = g.dashboard.variable; // Node CPU usage nodeCpuUsage: g.panel.timeSeries.new('Node CPU usage') - + g.panel.timeSeries.panelOptions.withDescription('CPU usage percentage of the node\'s Operating System.') + + g.panel.timeSeries.panelOptions.withDescription("CPU usage percentage of the node's Operating System.") + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.os_cpu_percent.asTarget()]) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + g.panel.timeSeries.standardOptions.withMax(100) @@ -457,7 +469,7 @@ local var = g.dashboard.variable; signals.nodeOverview.fs_read_bps.asTarget() + g.query.prometheus.withInterval('2m'), signals.nodeOverview.fs_write_bps.asTarget() - + g.query.prometheus.withInterval('2m') + + g.query.prometheus.withInterval('2m'), ]) + g.panel.timeSeries.standardOptions.withUnit('Bps') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) @@ -526,7 +538,7 @@ local var = g.dashboard.variable; // Node network traffic nodeNetworkTraffic: g.panel.timeSeries.new('Node network traffic') - + g.panel.timeSeries.panelOptions.withDescription('Network traffic on the node\'s operating system.') + + g.panel.timeSeries.panelOptions.withDescription("Network traffic on the node's operating system.") + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.transport_rx_bps.asTarget() + g.query.prometheus.withInterval('2m'), @@ -575,11 +587,11 @@ local var = g.dashboard.variable; { type: 'value', options: { - '2': {color: 'light-purple', index: 0, text: 'data'}, - '3': {color: 'light-green', index: 1, text: 'master'}, - '4': {color: 'light-blue', index: 2, text: 'ingest'}, - '5': {color: 'light-yellow', index: 3, text: 'cluster_manager'}, - '6': {color: 'super-light-red', index: 4, text: 'remote_cluster_client'}, + '2': { color: 'light-purple', index: 0, text: 'data' }, + '3': { color: 'light-green', index: 1, text: 'master' }, + '4': { color: 'light-blue', index: 2, text: 'ingest' }, + '5': { color: 'light-yellow', index: 3, text: 'cluster_manager' }, + '6': { color: 'super-light-red', index: 4, text: 'remote_cluster_client' }, }, }, ]), @@ -731,7 +743,6 @@ local var = g.dashboard.variable; + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), - // Search and Index Overview Panels - Refactored to use modern patterns and signals // Search Performance Panels searchRequestRatePanel: @@ -751,8 +762,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withOverrides([ { - matcher: {id: 'byValue', options: {reducer: 'allIsZero', op: 'gte', value: 0}}, - properties: [{id: 'custom.hideFrom', value: {tooltip: true, viz: false, legend: true}}], + matcher: { id: 'byValue', options: { reducer: 'allIsZero', op: 'gte', value: 0 } }, + properties: [{ id: 'custom.hideFrom', value: { tooltip: true, viz: false, legend: true } }], }, ]) + g.panel.timeSeries.options.tooltip.withMode('multi'), @@ -777,8 +788,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withOverrides([ { - matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, - properties: [{id: 'custom.hideFrom', value: {legend: true, tooltip: true, viz: false}}], + matcher: { id: 'byValue', options: { op: 'gte', reducer: 'allIsZero', value: 0 } }, + properties: [{ id: 'custom.hideFrom', value: { legend: true, tooltip: true, viz: false } }], }, ]) + g.panel.timeSeries.options.tooltip.withMode('multi'), @@ -798,8 +809,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withOverrides([ { - matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, - properties: [{id: 'custom.hideFrom', value: {legend: true, tooltip: true, viz: false}}], + matcher: { id: 'byValue', options: { op: 'gte', reducer: 'allIsZero', value: 0 } }, + properties: [{ id: 'custom.hideFrom', value: { legend: true, tooltip: true, viz: false } }], }, ]) + g.panel.timeSeries.options.tooltip.withMode('multi'), @@ -830,8 +841,8 @@ local var = g.dashboard.variable; ]) + g.panel.timeSeries.standardOptions.withOverrides([ { - matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, - properties: [{id: 'custom.hideFrom', value: {legend: true, tooltip: true, viz: false}}], + matcher: { id: 'byValue', options: { op: 'gte', reducer: 'allIsZero', value: 0 } }, + properties: [{ id: 'custom.hideFrom', value: { legend: true, tooltip: true, viz: false } }], }, ]) + g.panel.timeSeries.options.tooltip.withMode('multi'), @@ -850,8 +861,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.withOverrides([ { - matcher: {id: 'byValue', options: {op: 'gte', reducer: 'allIsZero', value: 0}}, - properties: [{id: 'custom.hideFrom', value: {legend: true, tooltip: true, viz: false}}], + matcher: { id: 'byValue', options: { op: 'gte', reducer: 'allIsZero', value: 0 } }, + properties: [{ id: 'custom.hideFrom', value: { legend: true, tooltip: true, viz: false } }], }, ]), @@ -893,8 +904,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), // Index Operations Panels @@ -917,8 +928,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), mergeTimePanel: @@ -947,8 +958,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]) + g.panel.timeSeries.options.tooltip.withMode('multi'), @@ -971,8 +982,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), // Index Statistics Panels @@ -994,8 +1005,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), docsDeletedPanel: @@ -1016,8 +1027,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), documentsIndexedPanel: @@ -1038,8 +1049,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), // Index Structure Panels @@ -1061,8 +1072,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), mergeCountPanel: @@ -1084,8 +1095,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), // Cache and Memory Panels @@ -1109,8 +1120,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]) + g.panel.timeSeries.options.tooltip.withMode('multi'), @@ -1132,8 +1143,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), segmentSizePanel: @@ -1154,8 +1165,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), mergeSizePanel: @@ -1177,8 +1188,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), searchAndIndexShardCountPanel: @@ -1199,8 +1210,8 @@ local var = g.dashboard.variable; + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), ]) + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({op: 'gte', reducer: 'allIsZero', value: 0}) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', {legend: true, tooltip: true, viz: false}), + g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) + + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), }, } diff --git a/opensearch-mixin/rows.libsonnet b/opensearch-mixin/rows.libsonnet index 214c6ba5a..b7d8ed240 100644 --- a/opensearch-mixin/rows.libsonnet +++ b/opensearch-mixin/rows.libsonnet @@ -8,7 +8,7 @@ local g = import './g.libsonnet'; + g.panel.row.withPanels([ this.grafana.panels.clusterStatusPanel { gridPos+: { w: 5, h: 6 } }, this.grafana.panels.nodeCountPanel { gridPos+: { w: 5, h: 6 } }, - this.grafana.panels.dataNodeCountPanel { gridPos+: { w: 5, h: 6 } }, + this.grafana.panels.dataNodeCountPanel { gridPos+: { w: 5, h: 6 } }, this.grafana.panels.shardCountPanel { gridPos+: { w: 5, h: 6 } }, this.grafana.panels.activeShardsPercentagePanel { gridPos+: { w: 4, h: 6 } }, ]), diff --git a/opensearch-mixin/signals/cluster-overview.libsonnet b/opensearch-mixin/signals/cluster-overview.libsonnet index 3d42cb7b9..cd08fed27 100644 --- a/opensearch-mixin/signals/cluster-overview.libsonnet +++ b/opensearch-mixin/signals/cluster-overview.libsonnet @@ -267,11 +267,11 @@ function(this) sources: { prometheus: { expr: 'topk(10, sort_desc((100 * (\n' - + ' sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_total_bytes{%(queriesSelectorGroupOnly)s}) - \n' - + ' sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_free_bytes{%(queriesSelectorGroupOnly)s})\n' - + ') / \n' - + 'sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_total_bytes{%(queriesSelectorGroupOnly)s})\n' - + ')))', + + ' sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_total_bytes{%(queriesSelectorGroupOnly)s}) - \n' + + ' sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_free_bytes{%(queriesSelectorGroupOnly)s})\n' + + ') / \n' + + 'sum by(' + this.groupAggListWithInstance + ') (opensearch_fs_path_total_bytes{%(queriesSelectorGroupOnly)s})\n' + + ')))', legendCustomTemplate: '{{node}}', }, }, @@ -299,10 +299,10 @@ function(this) sources: { prometheus: { expr: 'topk(10, sort_desc(avg by(index, ' + this.groupAggList + ') (\n' - + ' opensearch_index_search_fetch_current_number{%(queriesSelectorGroupOnly)s, context="total"} + \n' - + ' opensearch_index_search_query_current_number{%(queriesSelectorGroupOnly)s, context="total"} + \n' - + ' opensearch_index_search_scroll_current_number{%(queriesSelectorGroupOnly)s, context="total"}\n' - + ')))', + + ' opensearch_index_search_fetch_current_number{%(queriesSelectorGroupOnly)s, context="total"} + \n' + + ' opensearch_index_search_query_current_number{%(queriesSelectorGroupOnly)s, context="total"} + \n' + + ' opensearch_index_search_scroll_current_number{%(queriesSelectorGroupOnly)s, context="total"}\n' + + ')))', legendCustomTemplate: '{{index}}', }, }, @@ -315,14 +315,14 @@ function(this) sources: { prometheus: { expr: 'topk(10, sort_desc(sum by(index, ' + this.groupAggList + ') ((\n' - + ' increase(opensearch_index_search_fetch_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) + \n' - + ' increase(opensearch_index_search_query_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) + \n' - + ' increase(opensearch_index_search_scroll_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval)\n' - + ') / clamp_min(\n' - + ' increase(opensearch_index_search_fetch_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) + \n' - + ' increase(opensearch_index_search_query_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) + \n' - + ' increase(opensearch_index_search_scroll_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval), 1\n' - + '))))', + + ' increase(opensearch_index_search_fetch_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) + \n' + + ' increase(opensearch_index_search_query_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) + \n' + + ' increase(opensearch_index_search_scroll_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval)\n' + + ') / clamp_min(\n' + + ' increase(opensearch_index_search_fetch_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) + \n' + + ' increase(opensearch_index_search_query_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) + \n' + + ' increase(opensearch_index_search_scroll_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval), 1\n' + + '))))', legendCustomTemplate: '{{index}}', }, }, @@ -335,13 +335,13 @@ function(this) sources: { prometheus: { expr: 'topk(10, sort_desc(avg by(index, ' + this.groupAggList + ') (\n' - + ' 100 * (opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n' - + ' opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s, context="total"}) / \n' - + ' clamp_min((opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n' - + ' opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n' - + ' opensearch_index_requestcache_miss_count{%(queriesSelectorGroupOnly)s, context="total"} + \n' - + ' opensearch_index_querycache_miss_number{%(queriesSelectorGroupOnly)s, context="total"}), 1\n' - + '))))', + + ' 100 * (opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n' + + ' opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s, context="total"}) / \n' + + ' clamp_min((opensearch_index_requestcache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n' + + ' opensearch_index_querycache_hit_count{%(queriesSelectorGroupOnly)s, context="total"} + \n' + + ' opensearch_index_requestcache_miss_count{%(queriesSelectorGroupOnly)s, context="total"} + \n' + + ' opensearch_index_querycache_miss_number{%(queriesSelectorGroupOnly)s, context="total"}), 1\n' + + '))))', legendCustomTemplate: '{{index}}', }, }, @@ -370,9 +370,9 @@ function(this) sources: { prometheus: { expr: 'topk(10, sum by(' + this.groupAggListWithInstance + ') (\n' - + ' increase(opensearch_ingest_total_time_seconds{%(queriesSelectorGroupOnly)s}[$__interval:] offset $__interval) / \n' - + ' clamp_min(increase(opensearch_ingest_total_count{%(queriesSelectorGroupOnly)s}[$__interval:] offset $__interval), 1)\n' - + '))', + + ' increase(opensearch_ingest_total_time_seconds{%(queriesSelectorGroupOnly)s}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_ingest_total_count{%(queriesSelectorGroupOnly)s}[$__interval:] offset $__interval), 1)\n' + + '))', legendCustomTemplate: '{{node}}', }, }, @@ -419,9 +419,9 @@ function(this) sources: { prometheus: { expr: 'topk(10, avg by(index, ' + this.groupAggList + ') (\n' - + ' increase(opensearch_index_indexing_index_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) / \n' - + ' clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval), 1)\n' - + '))', + + ' increase(opensearch_index_indexing_index_time_seconds{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelectorGroupOnly)s, context="total"}[$__interval:] offset $__interval), 1)\n' + + '))', legendCustomTemplate: '{{index}}', }, }, @@ -444,4 +444,4 @@ function(this) }, }, }, - } \ No newline at end of file + } diff --git a/opensearch-mixin/signals/search-and-index-overview.libsonnet b/opensearch-mixin/signals/search-and-index-overview.libsonnet index f1d6ed742..4eaa9fa33 100644 --- a/opensearch-mixin/signals/search-and-index-overview.libsonnet +++ b/opensearch-mixin/signals/search-and-index-overview.libsonnet @@ -64,9 +64,9 @@ function(this) sources: { prometheus: { expr: 'avg by (job,opensearch_cluster,index) (\n' - + ' increase(opensearch_index_search_query_time_seconds{%(queriesSelector)s}[$__interval:] offset $__interval) / \n' - + ' clamp_min(increase(opensearch_index_search_query_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' - + ')', + + ' increase(opensearch_index_search_query_time_seconds{%(queriesSelector)s}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_search_query_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' + + ')', legendCustomTemplate: '{{index}} - query', }, }, @@ -79,9 +79,9 @@ function(this) sources: { prometheus: { expr: 'avg by (job,opensearch_cluster,index) (\n' - + ' increase(opensearch_index_search_fetch_time_seconds{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval) / \n' - + ' clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' - + ')', + + ' increase(opensearch_index_search_fetch_time_seconds{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_search_fetch_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' + + ')', legendCustomTemplate: '{{index}} - fetch', }, }, @@ -94,9 +94,9 @@ function(this) sources: { prometheus: { expr: 'avg by (job,opensearch_cluster,index) (\n' - + ' increase(opensearch_index_search_scroll_time_seconds{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval) / \n' - + ' clamp_min(increase(opensearch_index_search_scroll_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' - + ')', + + ' increase(opensearch_index_search_scroll_time_seconds{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_search_scroll_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' + + ')', legendCustomTemplate: '{{index}} - scroll', }, }, @@ -109,10 +109,10 @@ function(this) sources: { prometheus: { expr: 'avg by(job,opensearch_cluster,index) (\n' - + ' 100 * (opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"}) / \n' - + ' clamp_min(opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"} + \n' - + ' opensearch_index_requestcache_miss_count{%(queriesSelector)s, context="total"}, 1)\n' - + ')', + + ' 100 * (opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"}) / \n' + + ' clamp_min(opensearch_index_requestcache_hit_count{%(queriesSelector)s, context="total"} + \n' + + ' opensearch_index_requestcache_miss_count{%(queriesSelector)s, context="total"}, 1)\n' + + ')', legendCustomTemplate: '{{index}} - request', }, }, @@ -125,10 +125,10 @@ function(this) sources: { prometheus: { expr: 'avg by(job,opensearch_cluster,index) (\n' - + ' 100 * (opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"}) / \n' - + ' clamp_min(opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"} + \n' - + ' opensearch_index_querycache_miss_number{%(queriesSelector)s, context="total"}, 1)\n' - + ')', + + ' 100 * (opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"}) / \n' + + ' clamp_min(opensearch_index_querycache_hit_count{%(queriesSelector)s, context="total"} + \n' + + ' opensearch_index_querycache_miss_number{%(queriesSelector)s, context="total"}, 1)\n' + + ')', legendCustomTemplate: '{{index}} - query', }, }, @@ -228,9 +228,9 @@ function(this) sources: { prometheus: { expr: 'avg by(' + this.groupAggList + ') (\n' - + ' increase(opensearch_index_indexing_index_time_seconds{%(queriesSelector)s, context=~"total"}[$__interval:] offset $__interval) / \n' - + ' clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelector)s, context=~"total"}[$__interval:] offset $__interval), 1)\n' - + ')', + + ' increase(opensearch_index_indexing_index_time_seconds{%(queriesSelector)s, context=~"total"}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_indexing_index_count{%(queriesSelector)s, context=~"total"}[$__interval:] offset $__interval), 1)\n' + + ')', }, }, }, @@ -285,9 +285,9 @@ function(this) sources: { prometheus: { expr: 'avg by(' + this.groupAggList + ',index) (\n' - + ' increase(opensearch_index_flush_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval) / \n' - + ' clamp_min(increase(opensearch_index_flush_total_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' - + ')', + + ' increase(opensearch_index_flush_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_flush_total_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' + + ')', legendCustomTemplate: '{{index}}', }, }, @@ -308,7 +308,7 @@ function(this) }, }, }, - + merge_stopped_time: { name: 'Merge stopped time increase', description: 'Merge stopped time increase (boolean >0).', @@ -379,9 +379,9 @@ function(this) sources: { prometheus: { expr: 'avg by(job,opensearch_cluster,index) (\n' - + ' increase(opensearch_index_refresh_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval) / \n' - + ' clamp_min(increase(opensearch_index_refresh_total_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' - + ')', + + ' increase(opensearch_index_refresh_total_time_seconds{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval) / \n' + + ' clamp_min(increase(opensearch_index_refresh_total_count{%(queriesSelector)s, context="total"}[$__interval:] offset $__interval), 1)\n' + + ')', legendCustomTemplate: '{{index}}', }, }, From c96eebccf8c51d125f257e5b5d46d9c5ed025529 Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Fri, 21 Nov 2025 09:37:29 -0500 Subject: [PATCH 05/13] Fixed linting errors for prometheus datasource. --- .../opensearch-cluster-overview.json | 96 +++++++++---------- .../opensearch-node-overview.json | 76 +++++++-------- .../opensearch-search-and-index-overview.json | 80 ++++++++-------- opensearch-mixin/panels.libsonnet | 64 +++++++++++++ 4 files changed, 190 insertions(+), 126 deletions(-) diff --git a/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json b/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json index e17dbca8a..a23bdd779 100644 --- a/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json @@ -40,8 +40,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "The overall health and availability of the OpenSearch cluster.", "fieldConfig": { @@ -125,8 +125,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "The number of running nodes across the OpenSearch cluster.", "fieldConfig": { @@ -187,8 +187,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "The number of data nodes in the OpenSearch cluster.", "fieldConfig": { @@ -249,8 +249,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "The number of shards in the OpenSearch cluster across all indices.", "fieldConfig": { @@ -311,8 +311,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Percent of active shards across the OpenSearch cluster.", "fieldConfig": { @@ -391,8 +391,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "OpenSearch node roles.", "fieldConfig": { @@ -524,8 +524,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "OpenSearch node roles over time.", "fieldConfig": { @@ -652,8 +652,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Top nodes by OS CPU usage across the OpenSearch cluster.", "fieldConfig": { @@ -713,8 +713,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "The total count of circuit breakers tripped across the OpenSearch cluster.", "fieldConfig": { @@ -773,8 +773,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Shard status counts across the OpenSearch cluster.", "fieldConfig": { @@ -845,8 +845,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Top nodes by disk usage across the OpenSearch cluster.", "fieldConfig": { @@ -906,8 +906,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "The total count of documents indexed across the OpenSearch cluster.", "fieldConfig": { @@ -947,8 +947,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "The number of tasks waiting to be executed across the OpenSearch cluster.", "fieldConfig": { @@ -988,8 +988,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "The total size of the store across the OpenSearch cluster.", "fieldConfig": { @@ -1029,8 +1029,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "The max wait time for tasks to be executed across the OpenSearch cluster.", "fieldConfig": { @@ -1082,8 +1082,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.", "fieldConfig": { @@ -1122,8 +1122,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.", "fieldConfig": { @@ -1163,8 +1163,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.", "fieldConfig": { @@ -1217,8 +1217,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Top nodes by rate of ingest across the OpenSearch cluster.", "fieldConfig": { @@ -1253,8 +1253,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Top nodes by ingestion latency across the OpenSearch cluster.", "fieldConfig": { @@ -1295,8 +1295,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Top nodes by ingestion failures across the OpenSearch cluster.", "fieldConfig": { @@ -1350,8 +1350,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Top indices by rate of document indexing across the OpenSearch cluster.", "fieldConfig": { @@ -1391,8 +1391,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Top indices by indexing latency across the OpenSearch cluster.", "fieldConfig": { @@ -1433,8 +1433,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Top indices by index document failures across the OpenSearch cluster.", "fieldConfig": { diff --git a/opensearch-mixin/dashboards_out/opensearch-node-overview.json b/opensearch-mixin/dashboards_out/opensearch-node-overview.json index 96904a47e..d50a88b8e 100644 --- a/opensearch-mixin/dashboards_out/opensearch-node-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-node-overview.json @@ -40,8 +40,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "CPU usage percentage of the node's Operating System.", "fieldConfig": { @@ -93,8 +93,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Memory usage percentage of the node for the operating system and OpenSearch", "fieldConfig": { @@ -146,8 +146,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Node file system read and write data.", "fieldConfig": { @@ -232,8 +232,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Number of open connections for the selected node.", "fieldConfig": { @@ -282,8 +282,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Disk usage percentage of the selected node.", "fieldConfig": { @@ -335,8 +335,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Percentage of swap space used by OpenSearch and the operating system on the selected node.", "fieldConfig": { @@ -388,8 +388,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Network traffic on the node's operating system.", "fieldConfig": { @@ -443,8 +443,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Circuit breakers tripped on the selected node by type", "fieldConfig": { @@ -500,8 +500,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "OpenSearch node roles over time.", "fieldConfig": { @@ -628,8 +628,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "JVM heap memory usage vs committed.", "fieldConfig": { @@ -682,8 +682,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "JVM non-heap memory usage vs committed.", "fieldConfig": { @@ -736,8 +736,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "JVM thread count.", "fieldConfig": { @@ -778,8 +778,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "JVM buffer pool usage.", "fieldConfig": { @@ -820,8 +820,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "JVM uptime in seconds.", "fieldConfig": { @@ -862,8 +862,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "JVM garbage collection count.", "fieldConfig": { @@ -905,8 +905,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "JVM garbage collection time in milliseconds.", "fieldConfig": { @@ -948,8 +948,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "JVM buffer pool usage by pool.", "fieldConfig": { @@ -1003,8 +1003,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Thread pool thread count.", "fieldConfig": { @@ -1045,8 +1045,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Thread pool task count.", "fieldConfig": { diff --git a/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json b/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json index ceabef8f7..1dc4b0012 100644 --- a/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json @@ -40,8 +40,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Rate of fetch, scroll, and query requests by selected index.", "fieldConfig": { @@ -132,8 +132,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Latency of fetch, scroll, and query requests by selected index.", "fieldConfig": { @@ -227,8 +227,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Ratio of query cache and request cache hits and misses.", "fieldConfig": { @@ -307,8 +307,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Total evictions count by cache type for the selected index.", "fieldConfig": { @@ -427,8 +427,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Rate of indexed documents for the selected index.", "fieldConfig": { @@ -490,8 +490,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Document indexing latency for the selected index.", "fieldConfig": { @@ -544,8 +544,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Number of indexing failures for the selected index.", "fieldConfig": { @@ -620,8 +620,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Index flush latency for the selected index.", "fieldConfig": { @@ -696,8 +696,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Index merge time for the selected index.", "fieldConfig": { @@ -804,8 +804,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Index refresh latency for the selected index.", "fieldConfig": { @@ -880,8 +880,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Current number of translog operations for the selected index.", "fieldConfig": { @@ -955,8 +955,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Rate of documents deleted for the selected index.", "fieldConfig": { @@ -1043,8 +1043,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Number of indexed documents for the selected index.", "fieldConfig": { @@ -1118,8 +1118,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Current number of segments for the selected index.", "fieldConfig": { @@ -1193,8 +1193,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Number of merge operations for the selected index.", "fieldConfig": { @@ -1269,8 +1269,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Size of query cache and request cache.", "fieldConfig": { @@ -1361,8 +1361,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Size of the store in bytes for the selected index.", "fieldConfig": { @@ -1436,8 +1436,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Memory used by segments for the selected index.", "fieldConfig": { @@ -1511,8 +1511,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "Size of merge operations in bytes for the selected index.", "fieldConfig": { @@ -1587,8 +1587,8 @@ }, { "datasource": { - "type": "datasource", - "uid": "-- Mixed --" + "type": "prometheus", + "uid": "${prometheus_datasource}" }, "description": "The number of index shards for the selected index.", "fieldConfig": { diff --git a/opensearch-mixin/panels.libsonnet b/opensearch-mixin/panels.libsonnet index dbdf17517..445a1617a 100644 --- a/opensearch-mixin/panels.libsonnet +++ b/opensearch-mixin/panels.libsonnet @@ -9,6 +9,7 @@ local var = g.dashboard.variable; clusterOSRoles: g.panel.table.new('Roles') + g.panel.table.panelOptions.withDescription('OpenSearch node roles.') + + g.panel.table.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.table.queryOptions.withTargets([ signals.clusterOverview.node_role_last_seen.asTarget() + g.query.prometheus.withInstant(true), @@ -63,9 +64,11 @@ local var = g.dashboard.variable; clusterOSRolesTimeline: g.panel.statusHistory.new('Roles timeline') + g.panel.statusHistory.panelOptions.withDescription('OpenSearch node roles over time.') + + g.panel.statusHistory.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.statusHistory.options.withShowValue('never') + g.panel.statusHistory.options.withLegend(false) + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) + + g.panel.statusHistory.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.statusHistory.queryOptions.withTargets([ signals.clusterOverview.node_role_data.asTarget(), signals.clusterOverview.node_role_master.asTarget(), @@ -90,6 +93,7 @@ local var = g.dashboard.variable; clusterStatusPanel: g.panel.stat.new('Cluster status') + g.panel.stat.panelOptions.withDescription('The overall health and availability of the OpenSearch cluster.') + + g.panel.stat.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.stat.queryOptions.withTargets([ signals.clusterOverview.cluster_status.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -121,6 +125,7 @@ local var = g.dashboard.variable; nodeCountPanel: g.panel.stat.new('Node count') + g.panel.stat.panelOptions.withDescription('The number of running nodes across the OpenSearch cluster.') + + g.panel.stat.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.stat.queryOptions.withTargets([ signals.clusterOverview.cluster_nodes_number.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -141,6 +146,7 @@ local var = g.dashboard.variable; dataNodeCountPanel: g.panel.stat.new('Data node count') + g.panel.stat.panelOptions.withDescription('The number of data nodes in the OpenSearch cluster.') + + g.panel.stat.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.stat.queryOptions.withTargets([ signals.clusterOverview.cluster_datanodes_number.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -160,6 +166,7 @@ local var = g.dashboard.variable; shardCountPanel: g.panel.stat.new('Shard count') + g.panel.stat.panelOptions.withDescription('The number of shards in the OpenSearch cluster across all indices.') + + g.panel.stat.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.stat.queryOptions.withTargets([ signals.clusterOverview.cluster_shards_number_total.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -179,6 +186,7 @@ local var = g.dashboard.variable; activeShardsPercentagePanel: g.panel.stat.new('Active shards %') + g.panel.stat.panelOptions.withDescription('Percent of active shards across the OpenSearch cluster.') + + g.panel.stat.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.stat.queryOptions.withTargets([ signals.clusterOverview.cluster_shards_active_percent.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -201,6 +209,7 @@ local var = g.dashboard.variable; topNodesByCPUUsagePanel: g.panel.barGauge.new('Top nodes by CPU usage') + g.panel.barGauge.panelOptions.withDescription('Top nodes by OS CPU usage across the OpenSearch cluster.') + + g.panel.barGauge.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.barGauge.queryOptions.withTargets([ signals.clusterOverview.os_cpu_percent_topk.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -221,6 +230,7 @@ local var = g.dashboard.variable; breakersTrippedPanel: g.panel.barGauge.new('Breakers tripped') + g.panel.barGauge.panelOptions.withDescription('The total count of circuit breakers tripped across the OpenSearch cluster.') + + g.panel.barGauge.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.barGauge.queryOptions.withTargets([ signals.clusterOverview.circuitbreaker_tripped_count_sum.asTarget() + g.query.prometheus.withInterval('2m') @@ -240,6 +250,7 @@ local var = g.dashboard.variable; shardStatusPanel: g.panel.barGauge.new('Shard status') + g.panel.barGauge.panelOptions.withDescription('Shard status counts across the OpenSearch cluster.') + + g.panel.barGauge.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.barGauge.queryOptions.withTargets([ signals.clusterOverview.cluster_shards_number_by_type.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -258,6 +269,7 @@ local var = g.dashboard.variable; topNodesByDiskUsagePanel: g.panel.barGauge.new('Top nodes by disk usage') + g.panel.barGauge.panelOptions.withDescription('Top nodes by disk usage across the OpenSearch cluster.') + + g.panel.barGauge.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.barGauge.queryOptions.withTargets([ signals.clusterOverview.fs_path_used_percent_topk.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -278,6 +290,7 @@ local var = g.dashboard.variable; totalDocumentsPanel: g.panel.timeSeries.new('Total documents') + g.panel.timeSeries.panelOptions.withDescription('The total count of documents indexed across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.clusterOverview.indices_indexing_index_count_avg.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -290,6 +303,7 @@ local var = g.dashboard.variable; pendingTasksPanel: g.panel.timeSeries.new('Pending tasks') + g.panel.timeSeries.panelOptions.withDescription('The number of tasks waiting to be executed across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.clusterOverview.cluster_pending_tasks_number.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -302,6 +316,7 @@ local var = g.dashboard.variable; storeSizePanel: g.panel.timeSeries.new('Store size') + g.panel.timeSeries.panelOptions.withDescription('The total size of the store across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.clusterOverview.indices_store_size_bytes_avg.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -314,6 +329,7 @@ local var = g.dashboard.variable; maxTaskWaitTimePanel: g.panel.timeSeries.new('Max task wait time') + g.panel.timeSeries.panelOptions.withDescription('The max wait time for tasks to be executed across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([signals.clusterOverview.cluster_task_max_wait_seconds.asTarget()]) + g.panel.timeSeries.standardOptions.withUnit('s') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') @@ -323,6 +339,7 @@ local var = g.dashboard.variable; topIndicesByRequestRatePanel: g.panel.timeSeries.new('Top indices by request rate') + g.panel.timeSeries.panelOptions.withDescription('Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([signals.clusterOverview.search_current_inflight_topk.asTarget()]) + g.panel.timeSeries.standardOptions.withUnit('reqps') + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') @@ -332,6 +349,7 @@ local var = g.dashboard.variable; topIndicesByRequestLatencyPanel: g.panel.timeSeries.new('Top indices by request latency') + g.panel.timeSeries.panelOptions.withDescription('Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.clusterOverview.search_avg_latency_topk.asTarget() + g.query.prometheus.withInterval('2m'), @@ -344,6 +362,7 @@ local var = g.dashboard.variable; topIndicesByCombinedCacheHitRatioPanel: g.panel.timeSeries.new('Top indices by combined cache hit ratio') + g.panel.timeSeries.panelOptions.withDescription('Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.clusterOverview.request_query_cache_hit_rate_topk.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -356,6 +375,7 @@ local var = g.dashboard.variable; topNodesByIngestRatePanel: g.panel.timeSeries.new('Top nodes by ingest rate') + g.panel.timeSeries.panelOptions.withDescription('Top nodes by rate of ingest across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.clusterOverview.ingest_throughput_topk.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -365,6 +385,7 @@ local var = g.dashboard.variable; topNodesByIngestLatencyPanel: g.panel.timeSeries.new('Top nodes by ingest latency') + g.panel.timeSeries.panelOptions.withDescription('Top nodes by ingestion latency across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.clusterOverview.ingest_latency_topk.asTarget() + g.query.prometheus.withInterval('2m') @@ -378,6 +399,7 @@ local var = g.dashboard.variable; topNodesByIngestErrorsPanel: g.panel.timeSeries.new('Top nodes by ingest errors') + g.panel.timeSeries.panelOptions.withDescription('Top nodes by ingestion failures across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.clusterOverview.ingest_failures_topk.asTarget() + g.query.prometheus.withInterval('2m') @@ -391,6 +413,7 @@ local var = g.dashboard.variable; topIndicesByIndexRatePanel: g.panel.timeSeries.new('Top indices by index rate') + g.panel.timeSeries.panelOptions.withDescription('Top indices by rate of document indexing across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.clusterOverview.indexing_current_topk.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -403,6 +426,7 @@ local var = g.dashboard.variable; topIndicesByIndexLatencyPanel: g.panel.timeSeries.new('Top indices by index latency') + g.panel.timeSeries.panelOptions.withDescription('Top indices by indexing latency across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.clusterOverview.indexing_latency_topk.asTarget() + g.query.prometheus.withInterval('2m') @@ -416,6 +440,7 @@ local var = g.dashboard.variable; topIndicesByIndexFailuresPanel: g.panel.timeSeries.new('Top indices by index failures') + g.panel.timeSeries.panelOptions.withDescription('Top indices by index document failures across the OpenSearch cluster.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.clusterOverview.indexing_failed_topk.asTarget() + g.query.prometheus.withInterval('2m') @@ -431,6 +456,7 @@ local var = g.dashboard.variable; nodeCpuUsage: g.panel.timeSeries.new('Node CPU usage') + g.panel.timeSeries.panelOptions.withDescription("CPU usage percentage of the node's Operating System.") + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.os_cpu_percent.asTarget()]) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + g.panel.timeSeries.standardOptions.withMax(100) @@ -448,6 +474,7 @@ local var = g.dashboard.variable; nodeMemoryUsage: g.panel.timeSeries.new('Node memory usage') + g.panel.timeSeries.panelOptions.withDescription('Memory usage percentage of the node for the operating system and OpenSearch') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.os_mem_used_percent.asTarget()]) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + g.panel.timeSeries.standardOptions.withMax(100) @@ -465,6 +492,7 @@ local var = g.dashboard.variable; nodeIO: g.panel.timeSeries.new('Node I/O') + g.panel.timeSeries.panelOptions.withDescription('Node file system read and write data.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.fs_read_bps.asTarget() + g.query.prometheus.withInterval('2m'), @@ -491,6 +519,7 @@ local var = g.dashboard.variable; nodeOpenConnections: g.panel.timeSeries.new('Node open connections') + g.panel.timeSeries.panelOptions.withDescription('Number of open connections for the selected node.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.transport_open_connections.asTarget()]) + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('opacity') @@ -505,6 +534,7 @@ local var = g.dashboard.variable; nodeDiskUsage: g.panel.timeSeries.new('Node disk usage') + g.panel.timeSeries.panelOptions.withDescription('Disk usage percentage of the selected node.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.fs_used_percent.asTarget()]) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + g.panel.timeSeries.standardOptions.withMin(0) @@ -522,6 +552,7 @@ local var = g.dashboard.variable; nodeMemorySwap: g.panel.timeSeries.new('Node memory swap') + g.panel.timeSeries.panelOptions.withDescription('Percentage of swap space used by OpenSearch and the operating system on the selected node.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.os_swap_used_percent.asTarget()]) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + g.panel.timeSeries.standardOptions.withMin(0) @@ -539,6 +570,7 @@ local var = g.dashboard.variable; nodeNetworkTraffic: g.panel.timeSeries.new('Node network traffic') + g.panel.timeSeries.panelOptions.withDescription("Network traffic on the node's operating system.") + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.transport_rx_bps.asTarget() + g.query.prometheus.withInterval('2m'), @@ -556,6 +588,7 @@ local var = g.dashboard.variable; circuitBreakers: g.panel.timeSeries.new('Circuit breakers') + g.panel.timeSeries.panelOptions.withDescription('Circuit breakers tripped on the selected node by type') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.circuitbreaker_tripped_sum_by_name.asTarget() + g.query.prometheus.withInterval('2m') @@ -576,6 +609,7 @@ local var = g.dashboard.variable; + g.panel.statusHistory.options.withShowValue('never') + g.panel.statusHistory.options.withLegend(false) + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) + + g.panel.statusHistory.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.statusHistory.queryOptions.withTargets([ signals.nodeOverview.node_role_data.asTarget(), signals.nodeOverview.node_role_master.asTarget(), @@ -600,6 +634,7 @@ local var = g.dashboard.variable; jvmHeapUsedVsCommitted: g.panel.timeSeries.new('JVM heap used vs committed') + g.panel.timeSeries.panelOptions.withDescription('JVM heap memory usage vs committed.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.jvm_heap_used_bytes.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -616,6 +651,7 @@ local var = g.dashboard.variable; jvmNonheapUsedVsCommitted: g.panel.timeSeries.new('JVM non-heap used vs committed') + g.panel.timeSeries.panelOptions.withDescription('JVM non-heap memory usage vs committed.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.jvm_nonheap_used_bytes.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -632,6 +668,7 @@ local var = g.dashboard.variable; jvmThreads: g.panel.timeSeries.new('JVM threads') + g.panel.timeSeries.panelOptions.withDescription('JVM thread count.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.jvm_threads.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -646,6 +683,7 @@ local var = g.dashboard.variable; jvmBufferPools: g.panel.timeSeries.new('JVM buffer pools') + g.panel.timeSeries.panelOptions.withDescription('JVM buffer pool usage.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.jvm_bufferpool_number.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -660,6 +698,7 @@ local var = g.dashboard.variable; jvmUptime: g.panel.timeSeries.new('JVM uptime') + g.panel.timeSeries.panelOptions.withDescription('JVM uptime in seconds.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.jvm_uptime.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -674,6 +713,7 @@ local var = g.dashboard.variable; jvmGarbageCollections: g.panel.timeSeries.new('JVM garbage collections') + g.panel.timeSeries.panelOptions.withDescription('JVM garbage collection count.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.jvm_gc_collections.asTarget() + g.query.prometheus.withInterval('2m') @@ -689,6 +729,7 @@ local var = g.dashboard.variable; jvmGarbageCollectionTime: g.panel.timeSeries.new('JVM garbage collection time') + g.panel.timeSeries.panelOptions.withDescription('JVM garbage collection time in milliseconds.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.jvm_gc_time.asTarget() + g.query.prometheus.withInterval('2m') @@ -704,6 +745,7 @@ local var = g.dashboard.variable; jvmBufferPoolUsage: g.panel.timeSeries.new('JVM buffer pool usage') + g.panel.timeSeries.panelOptions.withDescription('JVM buffer pool usage by pool.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.jvm_bufferpool_used_percent.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -718,6 +760,7 @@ local var = g.dashboard.variable; threadPoolThreads: g.panel.timeSeries.new('Thread pool threads') + g.panel.timeSeries.panelOptions.withDescription('Thread pool thread count.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.threadpool_threads.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -732,6 +775,7 @@ local var = g.dashboard.variable; threadPoolTasks: g.panel.timeSeries.new('Thread pool tasks') + g.panel.timeSeries.panelOptions.withDescription('Thread pool task count.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.nodeOverview.threadpool_tasks.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -748,6 +792,7 @@ local var = g.dashboard.variable; searchRequestRatePanel: g.panel.timeSeries.new('Request rate') + g.panel.timeSeries.panelOptions.withDescription('Rate of fetch, scroll, and query requests by selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.search_query_current_avg.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -771,6 +816,7 @@ local var = g.dashboard.variable; searchRequestLatencyPanel: g.panel.timeSeries.new('Request latency') + g.panel.timeSeries.panelOptions.withDescription('Latency of fetch, scroll, and query requests by selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.search_query_latency_avg.asTarget() + g.query.prometheus.withInterval('2m') @@ -797,6 +843,7 @@ local var = g.dashboard.variable; searchCacheHitRatioPanel: g.panel.timeSeries.new('Cache hit ratio') + g.panel.timeSeries.panelOptions.withDescription('Ratio of query cache and request cache hits and misses.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.request_cache_hit_rate.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -818,6 +865,7 @@ local var = g.dashboard.variable; searchCacheEvictionsPanel: g.panel.timeSeries.new('Evictions') + g.panel.timeSeries.panelOptions.withDescription('Total evictions count by cache type for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.query_cache_evictions.asTarget() + g.query.prometheus.withInterval('2m') @@ -851,6 +899,7 @@ local var = g.dashboard.variable; indexingRatePanel: g.panel.timeSeries.new('Index rate') + g.panel.timeSeries.panelOptions.withDescription('Rate of indexed documents for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.indexing_current.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -869,6 +918,7 @@ local var = g.dashboard.variable; indexingLatencyPanel: g.panel.timeSeries.new('Index latency') + g.panel.timeSeries.panelOptions.withDescription('Document indexing latency for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.indexing_latency.asTarget() + g.query.prometheus.withInterval('2m') @@ -888,6 +938,7 @@ local var = g.dashboard.variable; indexingFailuresPanel: g.panel.timeSeries.new('Index failures') + g.panel.timeSeries.panelOptions.withDescription('Number of indexing failures for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.indexing_failed.asTarget() + g.query.prometheus.withInterval('2m') @@ -912,6 +963,7 @@ local var = g.dashboard.variable; flushLatencyPanel: g.panel.timeSeries.new('Flush latency') + g.panel.timeSeries.panelOptions.withDescription('Index flush latency for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.flush_latency.asTarget() + g.query.prometheus.withInterval('2m') @@ -935,6 +987,7 @@ local var = g.dashboard.variable; mergeTimePanel: g.panel.timeSeries.new('Merge time') + g.panel.timeSeries.panelOptions.withDescription('Index merge time for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.merge_time.asTarget() + g.query.prometheus.withInterval('2m') @@ -966,6 +1019,7 @@ local var = g.dashboard.variable; refreshLatencyPanel: g.panel.timeSeries.new('Refresh latency') + g.panel.timeSeries.panelOptions.withDescription('Index refresh latency for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.refresh_latency.asTarget() + g.query.prometheus.withInterval('2m') @@ -990,6 +1044,7 @@ local var = g.dashboard.variable; translogOperationsPanel: g.panel.timeSeries.new('Translog operations') + g.panel.timeSeries.panelOptions.withDescription('Current number of translog operations for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.translog_ops.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -1012,6 +1067,7 @@ local var = g.dashboard.variable; docsDeletedPanel: g.panel.timeSeries.new('Docs deleted') + g.panel.timeSeries.panelOptions.withDescription('Rate of documents deleted for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.indexing_delete_current.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -1034,6 +1090,7 @@ local var = g.dashboard.variable; documentsIndexedPanel: g.panel.timeSeries.new('Documents indexed') + g.panel.timeSeries.panelOptions.withDescription('Number of indexed documents for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.indexing_count.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -1057,6 +1114,7 @@ local var = g.dashboard.variable; segmentCountPanel: g.panel.timeSeries.new('Segment count') + g.panel.timeSeries.panelOptions.withDescription('Current number of segments for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.segments_number.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -1079,6 +1137,7 @@ local var = g.dashboard.variable; mergeCountPanel: g.panel.timeSeries.new('Merge count') + g.panel.timeSeries.panelOptions.withDescription('Number of merge operations for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.merge_docs.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -1103,6 +1162,7 @@ local var = g.dashboard.variable; cacheSizePanel: g.panel.timeSeries.new('Cache size') + g.panel.timeSeries.panelOptions.withDescription('Size of query cache and request cache.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.query_cache_memory.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -1128,6 +1188,7 @@ local var = g.dashboard.variable; searchAndIndexStoreSizePanel: g.panel.timeSeries.new('Store size') + g.panel.timeSeries.panelOptions.withDescription('Size of the store in bytes for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.store_size_bytes.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -1150,6 +1211,7 @@ local var = g.dashboard.variable; segmentSizePanel: g.panel.timeSeries.new('Segment size') + g.panel.timeSeries.panelOptions.withDescription('Memory used by segments for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.segments_memory_bytes.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -1172,6 +1234,7 @@ local var = g.dashboard.variable; mergeSizePanel: g.panel.timeSeries.new('Merge size') + g.panel.timeSeries.panelOptions.withDescription('Size of merge operations in bytes for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.merge_current_size.asTarget() + g.query.prometheus.withIntervalFactor(2), @@ -1195,6 +1258,7 @@ local var = g.dashboard.variable; searchAndIndexShardCountPanel: g.panel.timeSeries.new('Shard count') + g.panel.timeSeries.panelOptions.withDescription('The number of index shards for the selected index.') + + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.timeSeries.queryOptions.withTargets([ signals.searchAndIndexOverview.shards_per_index.asTarget() + g.query.prometheus.withIntervalFactor(2), From d1f30896b6da558672ea8182a52e2dc5ce134049 Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Fri, 21 Nov 2025 13:39:16 -0500 Subject: [PATCH 06/13] Reworked all panels to use commonlib as base. --- .../opensearch-cluster-overview.json | 166 +++ .../opensearch-node-overview.json | 157 ++- .../opensearch-search-and-index-overview.json | 678 ++++------ opensearch-mixin/panels.libsonnet | 1116 ++++++----------- 4 files changed, 933 insertions(+), 1184 deletions(-) diff --git a/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json b/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json index a23bdd779..a18e52a49 100644 --- a/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json @@ -47,6 +47,7 @@ "fieldConfig": { "defaults": { "color": { + "fixedColor": "text", "mode": "thresholds" }, "mappings": [ @@ -132,6 +133,7 @@ "fieldConfig": { "defaults": { "color": { + "fixedColor": "text", "mode": "thresholds" }, "thresholds": { @@ -194,6 +196,7 @@ "fieldConfig": { "defaults": { "color": { + "fixedColor": "text", "mode": "thresholds" }, "thresholds": { @@ -256,6 +259,7 @@ "fieldConfig": { "defaults": { "color": { + "fixedColor": "text", "mode": "thresholds" }, "thresholds": { @@ -318,6 +322,7 @@ "fieldConfig": { "defaults": { "color": { + "fixedColor": "text", "mode": "thresholds" }, "thresholds": { @@ -913,6 +918,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -927,6 +934,16 @@ "y": 34 }, "id": 16, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -954,6 +971,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -968,6 +987,16 @@ "y": 42 }, "id": 17, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -995,6 +1024,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1009,6 +1040,16 @@ "y": 42 }, "id": 18, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1036,6 +1077,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1050,6 +1093,16 @@ "y": 42 }, "id": 19, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1089,6 +1142,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1103,6 +1158,16 @@ "y": 51 }, "id": 21, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1129,6 +1194,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1143,6 +1210,16 @@ "y": 51 }, "id": 22, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1170,6 +1247,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1184,6 +1263,16 @@ "y": 51 }, "id": 23, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1223,6 +1312,13 @@ "description": "Top nodes by rate of ingest across the OpenSearch cluster.", "fieldConfig": { "defaults": { + "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", + "lineInterpolation": "smooth", + "lineWidth": 2, + "showPoints": "never" + }, "unit": "Bps" } }, @@ -1233,6 +1329,16 @@ "y": 60 }, "id": 25, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1260,6 +1366,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1274,6 +1382,16 @@ "y": 60 }, "id": 26, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1302,6 +1420,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1316,6 +1436,16 @@ "y": 60 }, "id": 27, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1357,6 +1487,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1371,6 +1503,16 @@ "y": 69 }, "id": 29, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1398,6 +1540,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1412,6 +1556,16 @@ "y": 69 }, "id": 30, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1440,6 +1594,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1454,6 +1610,16 @@ "y": 69 }, "id": 31, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { diff --git a/opensearch-mixin/dashboards_out/opensearch-node-overview.json b/opensearch-mixin/dashboards_out/opensearch-node-overview.json index d50a88b8e..bd656429d 100644 --- a/opensearch-mixin/dashboards_out/opensearch-node-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-node-overview.json @@ -69,6 +69,10 @@ }, "id": 2, "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, "tooltip": { "mode": "multi", "sort": "desc" @@ -122,6 +126,10 @@ }, "id": 3, "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, "tooltip": { "mode": "multi", "sort": "desc" @@ -195,6 +203,10 @@ }, "id": 4, "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, "tooltip": { "mode": "multi", "sort": "desc" @@ -247,7 +259,8 @@ "stacking": { "mode": "normal" } - } + }, + "unit": "connections" } }, "gridPos": { @@ -258,6 +271,10 @@ }, "id": 5, "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, "tooltip": { "mode": "multi", "sort": "desc" @@ -311,6 +328,10 @@ }, "id": 6, "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, "tooltip": { "mode": "multi", "sort": "desc" @@ -364,6 +385,10 @@ }, "id": 7, "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, "tooltip": { "mode": "multi", "sort": "desc" @@ -411,6 +436,16 @@ "y": 9 }, "id": 8, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -466,6 +501,16 @@ "y": 9 }, "id": 9, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -636,6 +681,7 @@ "defaults": { "custom": { "fillOpacity": 5, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -650,6 +696,16 @@ "y": 27 }, "id": 13, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -690,6 +746,7 @@ "defaults": { "custom": { "fillOpacity": 5, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -704,6 +761,16 @@ "y": 27 }, "id": 14, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -744,6 +811,7 @@ "defaults": { "custom": { "fillOpacity": 5, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -758,6 +826,16 @@ "y": 27 }, "id": 15, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -786,6 +864,7 @@ "defaults": { "custom": { "fillOpacity": 5, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -800,6 +879,16 @@ "y": 27 }, "id": 16, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -828,6 +917,7 @@ "defaults": { "custom": { "fillOpacity": 5, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -842,6 +932,16 @@ "y": 35 }, "id": 17, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -870,6 +970,7 @@ "defaults": { "custom": { "fillOpacity": 5, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -884,6 +985,16 @@ "y": 35 }, "id": 18, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -913,6 +1024,7 @@ "defaults": { "custom": { "fillOpacity": 5, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -927,6 +1039,16 @@ "y": 35 }, "id": 19, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -956,6 +1078,7 @@ "defaults": { "custom": { "fillOpacity": 5, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -970,6 +1093,16 @@ "y": 35 }, "id": 20, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1011,6 +1144,7 @@ "defaults": { "custom": { "fillOpacity": 5, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1025,6 +1159,16 @@ "y": 44 }, "id": 22, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1053,6 +1197,7 @@ "defaults": { "custom": { "fillOpacity": 5, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1067,6 +1212,16 @@ "y": 44 }, "id": 23, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { diff --git a/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json b/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json index 1dc4b0012..d2dd4e492 100644 --- a/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json @@ -47,34 +47,14 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" }, "unit": "reqps" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -84,8 +64,13 @@ }, "id": 2, "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, "tooltip": { - "mode": "multi" + "mode": "multi", + "sort": "desc" } }, "pluginVersion": "v11.0.0", @@ -139,34 +124,14 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" }, "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -176,8 +141,13 @@ }, "id": 3, "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, "tooltip": { - "mode": "multi" + "mode": "multi", + "sort": "desc" } }, "pluginVersion": "v11.0.0", @@ -234,34 +204,14 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" }, "unit": "percent" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -271,8 +221,13 @@ }, "id": 4, "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, "tooltip": { - "mode": "multi" + "mode": "multi", + "sort": "desc" } }, "pluginVersion": "v11.0.0", @@ -314,6 +269,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -331,29 +288,7 @@ ] }, "unit": "evictions" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -363,8 +298,13 @@ }, "id": 5, "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, "tooltip": { - "mode": "multi" + "mode": "multi", + "sort": "desc" } }, "pluginVersion": "v11.0.0", @@ -434,34 +374,14 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" }, "unit": "documents/s" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -470,6 +390,16 @@ "y": 10 }, "id": 7, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -497,6 +427,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -523,6 +455,16 @@ "y": 10 }, "id": 8, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -551,6 +493,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -568,29 +512,7 @@ ] }, "unit": "failures" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -599,6 +521,16 @@ "y": 10 }, "id": 9, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -627,6 +559,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -644,29 +578,7 @@ ] }, "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -675,6 +587,16 @@ "y": 10 }, "id": 10, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -704,6 +626,8 @@ "defaults": { "custom": { "drawStyle": "points", + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -721,29 +645,7 @@ ] }, "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -753,8 +655,13 @@ }, "id": 11, "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, "tooltip": { - "mode": "multi" + "mode": "multi", + "sort": "desc" } }, "pluginVersion": "v11.0.0", @@ -811,6 +718,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -828,29 +737,7 @@ ] }, "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -859,6 +746,16 @@ "y": 18 }, "id": 12, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -887,6 +784,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -904,29 +803,7 @@ ] }, "unit": "operations" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -935,6 +812,16 @@ "y": 18 }, "id": 13, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -962,6 +849,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -979,29 +868,7 @@ ] }, "unit": "documents/s" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -1010,6 +877,16 @@ "y": 18 }, "id": 14, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1050,6 +927,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1067,29 +946,7 @@ ] }, "unit": "documents" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -1098,6 +955,16 @@ "y": 27 }, "id": 16, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1125,6 +992,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1142,29 +1011,7 @@ ] }, "unit": "segments" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -1173,6 +1020,16 @@ "y": 27 }, "id": 17, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1201,6 +1058,8 @@ "defaults": { "custom": { "drawStyle": "points", + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1218,29 +1077,7 @@ ] }, "unit": "merges" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -1249,6 +1086,16 @@ "y": 27 }, "id": 18, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1276,6 +1123,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1293,29 +1142,7 @@ ] }, "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -1325,8 +1152,13 @@ }, "id": 19, "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, "tooltip": { - "mode": "multi" + "mode": "multi", + "sort": "desc" } }, "pluginVersion": "v11.0.0", @@ -1368,6 +1200,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1385,29 +1219,7 @@ ] }, "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -1416,6 +1228,16 @@ "y": 35 }, "id": 20, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1443,6 +1265,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1460,29 +1284,7 @@ ] }, "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -1491,6 +1293,16 @@ "y": 35 }, "id": 21, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1519,6 +1331,8 @@ "defaults": { "custom": { "drawStyle": "points", + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1536,29 +1350,7 @@ ] }, "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -1567,6 +1359,16 @@ "y": 35 }, "id": 22, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { @@ -1594,6 +1396,8 @@ "fieldConfig": { "defaults": { "custom": { + "fillOpacity": 30, + "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" @@ -1611,29 +1415,7 @@ ] }, "unit": "shards" - }, - "overrides": [ - { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] - } - ] + } }, "gridPos": { "h": 8, @@ -1642,6 +1424,16 @@ "y": 35 }, "id": 23, + "options": { + "legend": { + "calcs": [ ], + "displayMode": "list" + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, "pluginVersion": "v11.0.0", "targets": [ { diff --git a/opensearch-mixin/panels.libsonnet b/opensearch-mixin/panels.libsonnet index 445a1617a..f3a6755d0 100644 --- a/opensearch-mixin/panels.libsonnet +++ b/opensearch-mixin/panels.libsonnet @@ -1,4 +1,5 @@ local g = import './g.libsonnet'; +local commonlib = import 'common-lib/common/main.libsonnet'; local var = g.dashboard.variable; { @@ -62,20 +63,20 @@ local var = g.dashboard.variable; ]), clusterOSRolesTimeline: - g.panel.statusHistory.new('Roles timeline') - + g.panel.statusHistory.panelOptions.withDescription('OpenSearch node roles over time.') - + g.panel.statusHistory.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + commonlib.panels.generic.statusHistory.base.new( + 'Roles timeline', + targets=[ + signals.clusterOverview.node_role_data.asTarget(), + signals.clusterOverview.node_role_master.asTarget(), + signals.clusterOverview.node_role_ingest.asTarget(), + signals.clusterOverview.node_role_cluster_manager.asTarget(), + signals.clusterOverview.node_role_remote_cluster_client.asTarget(), + ], + description='OpenSearch node roles over time.' + ) + + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) + g.panel.statusHistory.options.withShowValue('never') + g.panel.statusHistory.options.withLegend(false) - + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) - + g.panel.statusHistory.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.statusHistory.queryOptions.withTargets([ - signals.clusterOverview.node_role_data.asTarget(), - signals.clusterOverview.node_role_master.asTarget(), - signals.clusterOverview.node_role_ingest.asTarget(), - signals.clusterOverview.node_role_cluster_manager.asTarget(), - signals.clusterOverview.node_role_remote_cluster_client.asTarget(), - ]) + g.panel.statusHistory.standardOptions.withMappings([ { type: 'value', @@ -91,13 +92,11 @@ local var = g.dashboard.variable; // Cluster Overview Panels clusterStatusPanel: - g.panel.stat.new('Cluster status') - + g.panel.stat.panelOptions.withDescription('The overall health and availability of the OpenSearch cluster.') - + g.panel.stat.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.stat.queryOptions.withTargets([ - signals.clusterOverview.cluster_status.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.stat.base.new( + 'Cluster status', + targets=[signals.clusterOverview.cluster_status.asTarget() { intervalFactor: 2 }], + description='The overall health and availability of the OpenSearch cluster.' + ) + g.panel.stat.standardOptions.color.withMode('thresholds') + g.panel.stat.standardOptions.withMappings([ g.panel.stat.standardOptions.mapping.ValueMap.withType() @@ -118,18 +117,15 @@ local var = g.dashboard.variable; + g.panel.stat.standardOptions.threshold.step.withValue(2), ]) + g.panel.stat.options.reduceOptions.withCalcs(['lastNotNull']) - // + g.panel.stat.standardOptions.graphMode.withMode('none'), + g.panel.stat.options.withGraphMode('none'), nodeCountPanel: - g.panel.stat.new('Node count') - + g.panel.stat.panelOptions.withDescription('The number of running nodes across the OpenSearch cluster.') - + g.panel.stat.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.stat.queryOptions.withTargets([ - signals.clusterOverview.cluster_nodes_number.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.stat.base.new( + 'Node count', + targets=[signals.clusterOverview.cluster_nodes_number.asTarget() { intervalFactor: 2 }], + description='The number of running nodes across the OpenSearch cluster.' + ) + g.panel.stat.standardOptions.color.withMode('thresholds') + g.panel.stat.standardOptions.thresholds.withSteps([ g.panel.stat.standardOptions.threshold.step.withColor('green') @@ -144,13 +140,11 @@ local var = g.dashboard.variable; dataNodeCountPanel: - g.panel.stat.new('Data node count') - + g.panel.stat.panelOptions.withDescription('The number of data nodes in the OpenSearch cluster.') - + g.panel.stat.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.stat.queryOptions.withTargets([ - signals.clusterOverview.cluster_datanodes_number.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.stat.base.new( + 'Data node count', + targets=[signals.clusterOverview.cluster_datanodes_number.asTarget() { intervalFactor: 2 }], + description='The number of data nodes in the OpenSearch cluster.' + ) + g.panel.stat.standardOptions.color.withMode('thresholds') + g.panel.stat.standardOptions.thresholds.withSteps([ g.panel.stat.standardOptions.threshold.step.withColor('green') @@ -164,13 +158,11 @@ local var = g.dashboard.variable; + g.panel.stat.options.withGraphMode('none'), shardCountPanel: - g.panel.stat.new('Shard count') - + g.panel.stat.panelOptions.withDescription('The number of shards in the OpenSearch cluster across all indices.') - + g.panel.stat.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.stat.queryOptions.withTargets([ - signals.clusterOverview.cluster_shards_number_total.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.stat.base.new( + 'Shard count', + targets=[signals.clusterOverview.cluster_shards_number_total.asTarget() { intervalFactor: 2 }], + description='The number of shards in the OpenSearch cluster across all indices.' + ) + g.panel.stat.standardOptions.color.withMode('thresholds') + g.panel.stat.standardOptions.thresholds.withSteps([ g.panel.stat.standardOptions.threshold.step.withColor('green') @@ -184,13 +176,11 @@ local var = g.dashboard.variable; + g.panel.stat.options.withGraphMode('none'), activeShardsPercentagePanel: - g.panel.stat.new('Active shards %') - + g.panel.stat.panelOptions.withDescription('Percent of active shards across the OpenSearch cluster.') - + g.panel.stat.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.stat.queryOptions.withTargets([ - signals.clusterOverview.cluster_shards_active_percent.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.stat.base.new( + 'Active shards %', + targets=[signals.clusterOverview.cluster_shards_active_percent.asTarget() { intervalFactor: 2 }], + description='Percent of active shards across the OpenSearch cluster.' + ) + g.panel.stat.standardOptions.color.withMode('thresholds') + g.panel.stat.standardOptions.thresholds.withSteps([ g.panel.stat.standardOptions.threshold.step.withColor('green') @@ -211,8 +201,7 @@ local var = g.dashboard.variable; + g.panel.barGauge.panelOptions.withDescription('Top nodes by OS CPU usage across the OpenSearch cluster.') + g.panel.barGauge.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.barGauge.queryOptions.withTargets([ - signals.clusterOverview.os_cpu_percent_topk.asTarget() - + g.query.prometheus.withIntervalFactor(2), + signals.clusterOverview.os_cpu_percent_topk.asTarget() { intervalFactor: 2 }, ]) + g.panel.barGauge.standardOptions.color.withMode('thresholds') + g.panel.barGauge.standardOptions.thresholds.withSteps([ @@ -232,9 +221,7 @@ local var = g.dashboard.variable; + g.panel.barGauge.panelOptions.withDescription('The total count of circuit breakers tripped across the OpenSearch cluster.') + g.panel.barGauge.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.barGauge.queryOptions.withTargets([ - signals.clusterOverview.circuitbreaker_tripped_count_sum.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), + signals.clusterOverview.circuitbreaker_tripped_count_sum.asTarget() { interval: '2m', intervalFactor: 2 }, ]) + g.panel.barGauge.standardOptions.color.withMode('thresholds') + g.panel.barGauge.standardOptions.thresholds.withSteps([ @@ -252,8 +239,7 @@ local var = g.dashboard.variable; + g.panel.barGauge.panelOptions.withDescription('Shard status counts across the OpenSearch cluster.') + g.panel.barGauge.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.barGauge.queryOptions.withTargets([ - signals.clusterOverview.cluster_shards_number_by_type.asTarget() - + g.query.prometheus.withIntervalFactor(2), + signals.clusterOverview.cluster_shards_number_by_type.asTarget() { intervalFactor: 2 }, ]) + g.panel.barGauge.standardOptions.color.withMode('thresholds') + g.panel.barGauge.standardOptions.thresholds.withSteps([ @@ -271,8 +257,7 @@ local var = g.dashboard.variable; + g.panel.barGauge.panelOptions.withDescription('Top nodes by disk usage across the OpenSearch cluster.') + g.panel.barGauge.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') + g.panel.barGauge.queryOptions.withTargets([ - signals.clusterOverview.fs_path_used_percent_topk.asTarget() - + g.query.prometheus.withIntervalFactor(2), + signals.clusterOverview.fs_path_used_percent_topk.asTarget() { intervalFactor: 2 }, ]) + g.panel.barGauge.standardOptions.color.withMode('thresholds') + g.panel.barGauge.standardOptions.thresholds.withSteps([ @@ -288,335 +273,239 @@ local var = g.dashboard.variable; + g.panel.barGauge.options.withOrientation('horizontal'), totalDocumentsPanel: - g.panel.timeSeries.new('Total documents') - + g.panel.timeSeries.panelOptions.withDescription('The total count of documents indexed across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.clusterOverview.indices_indexing_index_count_avg.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.standardOptions.withUnit('documents') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), + commonlib.panels.generic.timeSeries.base.new( + 'Total documents', + targets=[signals.clusterOverview.indices_indexing_index_count_avg.asTarget() { intervalFactor: 2 }], + description='The total count of documents indexed across the OpenSearch cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('documents'), pendingTasksPanel: - g.panel.timeSeries.new('Pending tasks') - + g.panel.timeSeries.panelOptions.withDescription('The number of tasks waiting to be executed across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.clusterOverview.cluster_pending_tasks_number.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.standardOptions.withUnit('tasks') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), + commonlib.panels.generic.timeSeries.base.new( + 'Pending tasks', + targets=[signals.clusterOverview.cluster_pending_tasks_number.asTarget() { intervalFactor: 2 }], + description='The number of tasks waiting to be executed across the OpenSearch cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('tasks'), storeSizePanel: - g.panel.timeSeries.new('Store size') - + g.panel.timeSeries.panelOptions.withDescription('The total size of the store across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.clusterOverview.indices_store_size_bytes_avg.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.standardOptions.withUnit('bytes') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), + commonlib.panels.generic.timeSeries.base.new( + 'Store size', + targets=[signals.clusterOverview.indices_store_size_bytes_avg.asTarget() { intervalFactor: 2 }], + description='The total size of the store across the OpenSearch cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('bytes'), maxTaskWaitTimePanel: - g.panel.timeSeries.new('Max task wait time') - + g.panel.timeSeries.panelOptions.withDescription('The max wait time for tasks to be executed across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([signals.clusterOverview.cluster_task_max_wait_seconds.asTarget()]) - + g.panel.timeSeries.standardOptions.withUnit('s') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), + commonlib.panels.generic.timeSeries.base.new( + 'Max task wait time', + targets=[signals.clusterOverview.cluster_task_max_wait_seconds.asTarget()], + description='The max wait time for tasks to be executed across the OpenSearch cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('s'), topIndicesByRequestRatePanel: - g.panel.timeSeries.new('Top indices by request rate') - + g.panel.timeSeries.panelOptions.withDescription('Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([signals.clusterOverview.search_current_inflight_topk.asTarget()]) - + g.panel.timeSeries.standardOptions.withUnit('reqps') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), + commonlib.panels.generic.timeSeries.base.new( + 'Top indices by request rate', + targets=[signals.clusterOverview.search_current_inflight_topk.asTarget()], + description='Top indices by combined fetch, query, and scroll request rate across the OpenSearch cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps'), topIndicesByRequestLatencyPanel: - g.panel.timeSeries.new('Top indices by request latency') - + g.panel.timeSeries.panelOptions.withDescription('Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.clusterOverview.search_avg_latency_topk.asTarget() - + g.query.prometheus.withInterval('2m'), - ]) - + g.panel.timeSeries.standardOptions.withUnit('s') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), + commonlib.panels.generic.timeSeries.base.new( + 'Top indices by request latency', + targets=[signals.clusterOverview.search_avg_latency_topk.asTarget() { interval: '2m' }], + description='Top indices by combined fetch, query, and scroll latency across the OpenSearch cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('s'), topIndicesByCombinedCacheHitRatioPanel: - g.panel.timeSeries.new('Top indices by combined cache hit ratio') - + g.panel.timeSeries.panelOptions.withDescription('Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.clusterOverview.request_query_cache_hit_rate_topk.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.standardOptions.withUnit('percent') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2), + commonlib.panels.generic.timeSeries.base.new( + 'Top indices by combined cache hit ratio', + targets=[signals.clusterOverview.request_query_cache_hit_rate_topk.asTarget() { intervalFactor: 2 }], + description='Top indices by cache hit ratio for the combined request and query cache across the OpenSearch cluster.' + ) + + g.panel.timeSeries.standardOptions.withUnit('percent'), topNodesByIngestRatePanel: - g.panel.timeSeries.new('Top nodes by ingest rate') - + g.panel.timeSeries.panelOptions.withDescription('Top nodes by rate of ingest across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.clusterOverview.ingest_throughput_topk.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Top nodes by ingest rate', + targets=[signals.clusterOverview.ingest_throughput_topk.asTarget() { intervalFactor: 2 }], + description='Top nodes by rate of ingest across the OpenSearch cluster.' + ) + g.panel.timeSeries.standardOptions.withUnit('Bps'), topNodesByIngestLatencyPanel: - g.panel.timeSeries.new('Top nodes by ingest latency') - + g.panel.timeSeries.panelOptions.withDescription('Top nodes by ingestion latency across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.clusterOverview.ingest_latency_topk.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + commonlib.panels.generic.timeSeries.base.new( + 'Top nodes by ingest latency', + targets=[signals.clusterOverview.ingest_latency_topk.asTarget() { interval: '2m', intervalFactor: 2 }], + description='Top nodes by ingestion latency across the OpenSearch cluster.' + ) + g.panel.timeSeries.standardOptions.withUnit('s'), topNodesByIngestErrorsPanel: - g.panel.timeSeries.new('Top nodes by ingest errors') - + g.panel.timeSeries.panelOptions.withDescription('Top nodes by ingestion failures across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.clusterOverview.ingest_failures_topk.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + commonlib.panels.generic.timeSeries.base.new( + 'Top nodes by ingest errors', + targets=[signals.clusterOverview.ingest_failures_topk.asTarget() { interval: '2m', intervalFactor: 2 }], + description='Top nodes by ingestion failures across the OpenSearch cluster.' + ) + g.panel.timeSeries.standardOptions.withUnit('errors'), topIndicesByIndexRatePanel: - g.panel.timeSeries.new('Top indices by index rate') - + g.panel.timeSeries.panelOptions.withDescription('Top indices by rate of document indexing across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.clusterOverview.indexing_current_topk.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + commonlib.panels.generic.timeSeries.base.new( + 'Top indices by index rate', + targets=[signals.clusterOverview.indexing_current_topk.asTarget() { intervalFactor: 2 }], + description='Top indices by rate of document indexing across the OpenSearch cluster.' + ) + g.panel.timeSeries.standardOptions.withUnit('documents/s'), topIndicesByIndexLatencyPanel: - g.panel.timeSeries.new('Top indices by index latency') - + g.panel.timeSeries.panelOptions.withDescription('Top indices by indexing latency across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.clusterOverview.indexing_latency_topk.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + commonlib.panels.generic.timeSeries.base.new( + 'Top indices by index latency', + targets=[signals.clusterOverview.indexing_latency_topk.asTarget() { interval: '2m', intervalFactor: 2 }], + description='Top indices by indexing latency across the OpenSearch cluster.' + ) + g.panel.timeSeries.standardOptions.withUnit('s'), topIndicesByIndexFailuresPanel: - g.panel.timeSeries.new('Top indices by index failures') - + g.panel.timeSeries.panelOptions.withDescription('Top indices by index document failures across the OpenSearch cluster.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.clusterOverview.indexing_failed_topk.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + commonlib.panels.generic.timeSeries.base.new( + 'Top indices by index failures', + targets=[signals.clusterOverview.indexing_failed_topk.asTarget() { interval: '2m', intervalFactor: 2 }], + description='Top indices by index document failures across the OpenSearch cluster.' + ) + g.panel.timeSeries.standardOptions.withUnit('failures'), // Node Overview Panels - Refactored to use modern patterns and signals // Node CPU usage nodeCpuUsage: - g.panel.timeSeries.new('Node CPU usage') - + g.panel.timeSeries.panelOptions.withDescription("CPU usage percentage of the node's Operating System.") - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.os_cpu_percent.asTarget()]) + commonlib.panels.generic.timeSeries.base.new( + 'Node CPU usage', + targets=[signals.nodeOverview.os_cpu_percent.asTarget()], + description="CPU usage percentage of the node's Operating System." + ) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + g.panel.timeSeries.standardOptions.withMax(100) + g.panel.timeSeries.standardOptions.withMin(0) + g.panel.timeSeries.standardOptions.withUnit('percent') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme'), // Node memory usage nodeMemoryUsage: - g.panel.timeSeries.new('Node memory usage') - + g.panel.timeSeries.panelOptions.withDescription('Memory usage percentage of the node for the operating system and OpenSearch') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.os_mem_used_percent.asTarget()]) + commonlib.panels.generic.timeSeries.base.new( + 'Node memory usage', + targets=[signals.nodeOverview.os_mem_used_percent.asTarget()], + description='Memory usage percentage of the node for the operating system and OpenSearch' + ) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + g.panel.timeSeries.standardOptions.withMax(100) + g.panel.timeSeries.standardOptions.withMin(0) + g.panel.timeSeries.standardOptions.withUnit('percent') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme'), // Node I/O nodeIO: - g.panel.timeSeries.new('Node I/O') - + g.panel.timeSeries.panelOptions.withDescription('Node file system read and write data.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.fs_read_bps.asTarget() - + g.query.prometheus.withInterval('2m'), - signals.nodeOverview.fs_write_bps.asTarget() - + g.query.prometheus.withInterval('2m'), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Node I/O', + targets=[ + signals.nodeOverview.fs_read_bps.asTarget() { interval: '2m' }, + signals.nodeOverview.fs_write_bps.asTarget() { interval: '2m' }, + ], + description='Node file system read and write data.' + ) + g.panel.timeSeries.standardOptions.withUnit('Bps') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('opacity') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') + g.panel.timeSeries.standardOptions.withOverrides([ g.panel.timeSeries.fieldOverride.byRegexp.new('/time|used|busy|util/') + g.panel.timeSeries.fieldOverride.byRegexp.withProperty('custom.axisSoftMax', 100) + g.panel.timeSeries.fieldOverride.byRegexp.withProperty('custom.drawStyle', 'points') + g.panel.timeSeries.fieldOverride.byRegexp.withProperty('unit', 'percent'), - ]) - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), + ]), // Node open connections nodeOpenConnections: - g.panel.timeSeries.new('Node open connections') - + g.panel.timeSeries.panelOptions.withDescription('Number of open connections for the selected node.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.transport_open_connections.asTarget()]) + commonlib.panels.generic.timeSeries.base.new( + 'Node open connections', + targets=[signals.nodeOverview.transport_open_connections.asTarget()], + description='Number of open connections for the selected node.' + ) + + g.panel.timeSeries.standardOptions.withUnit('connections') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('opacity') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), + + g.panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal'), // Node disk usage nodeDiskUsage: - g.panel.timeSeries.new('Node disk usage') - + g.panel.timeSeries.panelOptions.withDescription('Disk usage percentage of the selected node.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.fs_used_percent.asTarget()]) + commonlib.panels.generic.timeSeries.base.new( + 'Node disk usage', + targets=[signals.nodeOverview.fs_used_percent.asTarget()], + description='Disk usage percentage of the selected node.' + ) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + g.panel.timeSeries.standardOptions.withMin(0) + g.panel.timeSeries.standardOptions.withMax(100) + g.panel.timeSeries.standardOptions.withUnit('percent') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme'), // Node memory swap nodeMemorySwap: - g.panel.timeSeries.new('Node memory swap') - + g.panel.timeSeries.panelOptions.withDescription('Percentage of swap space used by OpenSearch and the operating system on the selected node.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([signals.nodeOverview.os_swap_used_percent.asTarget()]) + commonlib.panels.generic.timeSeries.base.new( + 'Node memory swap', + targets=[signals.nodeOverview.os_swap_used_percent.asTarget()], + description='Percentage of swap space used by OpenSearch and the operating system on the selected node.' + ) + g.panel.timeSeries.standardOptions.color.withMode('continuous-BlYlRd') + g.panel.timeSeries.standardOptions.withMin(0) + g.panel.timeSeries.standardOptions.withMax(100) + g.panel.timeSeries.standardOptions.withUnit('percent') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.options.tooltip.withMode('multi') - + g.panel.timeSeries.options.tooltip.withSort('desc'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme'), // Node network traffic nodeNetworkTraffic: - g.panel.timeSeries.new('Node network traffic') - + g.panel.timeSeries.panelOptions.withDescription("Network traffic on the node's operating system.") - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.transport_rx_bps.asTarget() - + g.query.prometheus.withInterval('2m'), - signals.nodeOverview.transport_tx_bps.asTarget() - + g.query.prometheus.withInterval('2m'), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Node network traffic', + targets=[ + signals.nodeOverview.transport_rx_bps.asTarget() { interval: '2m' }, + signals.nodeOverview.transport_tx_bps.asTarget() { interval: '2m' }, + ], + description="Network traffic on the node's operating system." + ) + g.panel.timeSeries.standardOptions.withUnit('Bps') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme'), // Circuit breakers circuitBreakers: - g.panel.timeSeries.new('Circuit breakers') - + g.panel.timeSeries.panelOptions.withDescription('Circuit breakers tripped on the selected node by type') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.circuitbreaker_tripped_sum_by_name.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Circuit breakers', + targets=[signals.nodeOverview.circuitbreaker_tripped_sum_by_name.asTarget() { interval: '2m', intervalFactor: 2 }], + description='Circuit breakers tripped on the selected node by type' + ) + g.panel.timeSeries.standardOptions.withUnit('trips') + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(15) - + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withGradientMode('scheme'), // Node roles timeline nodeOSRolesTimeline: - g.panel.statusHistory.new('Roles timeline') - + g.panel.statusHistory.panelOptions.withDescription('OpenSearch node roles over time.') + commonlib.panels.generic.statusHistory.base.new( + 'Roles timeline', + targets=[ + signals.nodeOverview.node_role_data.asTarget(), + signals.nodeOverview.node_role_master.asTarget(), + signals.nodeOverview.node_role_ingest.asTarget(), + signals.nodeOverview.node_role_cluster_manager.asTarget(), + signals.nodeOverview.node_role_remote_cluster_client.asTarget(), + ], + description='OpenSearch node roles over time.' + ) + + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) + g.panel.statusHistory.options.withShowValue('never') + g.panel.statusHistory.options.withLegend(false) - + g.panel.statusHistory.queryOptions.withMaxDataPoints(100) - + g.panel.statusHistory.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.statusHistory.queryOptions.withTargets([ - signals.nodeOverview.node_role_data.asTarget(), - signals.nodeOverview.node_role_master.asTarget(), - signals.nodeOverview.node_role_ingest.asTarget(), - signals.nodeOverview.node_role_cluster_manager.asTarget(), - signals.nodeOverview.node_role_remote_cluster_client.asTarget(), - ]) + g.panel.statusHistory.standardOptions.withMappings([ { type: 'value', @@ -632,302 +521,182 @@ local var = g.dashboard.variable; // JVM heap used vs committed jvmHeapUsedVsCommitted: - g.panel.timeSeries.new('JVM heap used vs committed') - + g.panel.timeSeries.panelOptions.withDescription('JVM heap memory usage vs committed.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.jvm_heap_used_bytes.asTarget() - + g.query.prometheus.withIntervalFactor(2), - signals.nodeOverview.jvm_heap_committed_bytes.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'JVM heap used vs committed', + targets=[ + signals.nodeOverview.jvm_heap_used_bytes.asTarget() { intervalFactor: 2 }, + signals.nodeOverview.jvm_heap_committed_bytes.asTarget() { intervalFactor: 2 }, + ], + description='JVM heap memory usage vs committed.' + ) + g.panel.timeSeries.standardOptions.withUnit('bytes') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), // JVM non-heap used vs committed jvmNonheapUsedVsCommitted: - g.panel.timeSeries.new('JVM non-heap used vs committed') - + g.panel.timeSeries.panelOptions.withDescription('JVM non-heap memory usage vs committed.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.jvm_nonheap_used_bytes.asTarget() - + g.query.prometheus.withIntervalFactor(2), - signals.nodeOverview.jvm_nonheap_committed_bytes.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'JVM non-heap used vs committed', + targets=[ + signals.nodeOverview.jvm_nonheap_used_bytes.asTarget() { intervalFactor: 2 }, + signals.nodeOverview.jvm_nonheap_committed_bytes.asTarget() { intervalFactor: 2 }, + ], + description='JVM non-heap memory usage vs committed.' + ) + g.panel.timeSeries.standardOptions.withUnit('bytes') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), // JVM threads jvmThreads: - g.panel.timeSeries.new('JVM threads') - + g.panel.timeSeries.panelOptions.withDescription('JVM thread count.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.jvm_threads.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'JVM threads', + targets=[signals.nodeOverview.jvm_threads.asTarget() { intervalFactor: 2 }], + description='JVM thread count.' + ) + g.panel.timeSeries.standardOptions.withUnit('threads') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), // JVM buffer pools jvmBufferPools: - g.panel.timeSeries.new('JVM buffer pools') - + g.panel.timeSeries.panelOptions.withDescription('JVM buffer pool usage.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.jvm_bufferpool_number.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'JVM buffer pools', + targets=[signals.nodeOverview.jvm_bufferpool_number.asTarget() { intervalFactor: 2 }], + description='JVM buffer pool usage.' + ) + g.panel.timeSeries.standardOptions.withUnit('bytes') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), // JVM uptime jvmUptime: - g.panel.timeSeries.new('JVM uptime') - + g.panel.timeSeries.panelOptions.withDescription('JVM uptime in seconds.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.jvm_uptime.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'JVM uptime', + targets=[signals.nodeOverview.jvm_uptime.asTarget() { intervalFactor: 2 }], + description='JVM uptime in seconds.' + ) + g.panel.timeSeries.standardOptions.withUnit('s') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), // JVM garbage collections jvmGarbageCollections: - g.panel.timeSeries.new('JVM garbage collections') - + g.panel.timeSeries.panelOptions.withDescription('JVM garbage collection count.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.jvm_gc_collections.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'JVM garbage collections', + targets=[signals.nodeOverview.jvm_gc_collections.asTarget() { interval: '2m', intervalFactor: 2 }], + description='JVM garbage collection count.' + ) + g.panel.timeSeries.standardOptions.withUnit('collections') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), // JVM garbage collection time jvmGarbageCollectionTime: - g.panel.timeSeries.new('JVM garbage collection time') - + g.panel.timeSeries.panelOptions.withDescription('JVM garbage collection time in milliseconds.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.jvm_gc_time.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'JVM garbage collection time', + targets=[signals.nodeOverview.jvm_gc_time.asTarget() { interval: '2m', intervalFactor: 2 }], + description='JVM garbage collection time in milliseconds.' + ) + g.panel.timeSeries.standardOptions.withUnit('ms') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), // JVM buffer pool usage jvmBufferPoolUsage: - g.panel.timeSeries.new('JVM buffer pool usage') - + g.panel.timeSeries.panelOptions.withDescription('JVM buffer pool usage by pool.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.jvm_bufferpool_used_percent.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'JVM buffer pool usage', + targets=[signals.nodeOverview.jvm_bufferpool_used_percent.asTarget() { intervalFactor: 2 }], + description='JVM buffer pool usage by pool.' + ) + g.panel.timeSeries.standardOptions.withUnit('bytes') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), // Thread pool threads threadPoolThreads: - g.panel.timeSeries.new('Thread pool threads') - + g.panel.timeSeries.panelOptions.withDescription('Thread pool thread count.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.threadpool_threads.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Thread pool threads', + targets=[signals.nodeOverview.threadpool_threads.asTarget() { intervalFactor: 2 }], + description='Thread pool thread count.' + ) + g.panel.timeSeries.standardOptions.withUnit('threads') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), // Thread pool tasks threadPoolTasks: - g.panel.timeSeries.new('Thread pool tasks') - + g.panel.timeSeries.panelOptions.withDescription('Thread pool task count.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.nodeOverview.threadpool_tasks.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Thread pool tasks', + targets=[signals.nodeOverview.threadpool_tasks.asTarget() { intervalFactor: 2 }], + description='Thread pool task count.' + ) + g.panel.timeSeries.standardOptions.withUnit('tasks') - + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5) - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never'), + + g.panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(5), // Search and Index Overview Panels - Refactored to use modern patterns and signals // Search Performance Panels searchRequestRatePanel: - g.panel.timeSeries.new('Request rate') - + g.panel.timeSeries.panelOptions.withDescription('Rate of fetch, scroll, and query requests by selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.search_query_current_avg.asTarget() - + g.query.prometheus.withIntervalFactor(2), - signals.searchAndIndexOverview.search_fetch_current_avg.asTarget() - + g.query.prometheus.withIntervalFactor(2), - signals.searchAndIndexOverview.search_scroll_current_avg.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.standardOptions.withUnit('reqps') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.standardOptions.withOverrides([ - { - matcher: { id: 'byValue', options: { reducer: 'allIsZero', op: 'gte', value: 0 } }, - properties: [{ id: 'custom.hideFrom', value: { tooltip: true, viz: false, legend: true } }], - }, - ]) - + g.panel.timeSeries.options.tooltip.withMode('multi'), + commonlib.panels.generic.timeSeries.base.new( + 'Request rate', + targets=[ + signals.searchAndIndexOverview.search_query_current_avg.asTarget() { intervalFactor: 2 }, + signals.searchAndIndexOverview.search_fetch_current_avg.asTarget() { intervalFactor: 2 }, + signals.searchAndIndexOverview.search_scroll_current_avg.asTarget() { intervalFactor: 2 }, + ], + description='Rate of fetch, scroll, and query requests by selected index.' + ) + + g.panel.timeSeries.standardOptions.withUnit('reqps'), searchRequestLatencyPanel: - g.panel.timeSeries.new('Request latency') - + g.panel.timeSeries.panelOptions.withDescription('Latency of fetch, scroll, and query requests by selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.search_query_latency_avg.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - signals.searchAndIndexOverview.search_fetch_latency_avg.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - signals.searchAndIndexOverview.search_scroll_latency_avg.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.standardOptions.withUnit('s') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.standardOptions.withOverrides([ - { - matcher: { id: 'byValue', options: { op: 'gte', reducer: 'allIsZero', value: 0 } }, - properties: [{ id: 'custom.hideFrom', value: { legend: true, tooltip: true, viz: false } }], - }, - ]) - + g.panel.timeSeries.options.tooltip.withMode('multi'), + commonlib.panels.generic.timeSeries.base.new( + 'Request latency', + targets=[ + signals.searchAndIndexOverview.search_query_latency_avg.asTarget() { interval: '2m', intervalFactor: 2 }, + signals.searchAndIndexOverview.search_fetch_latency_avg.asTarget() { interval: '2m', intervalFactor: 2 }, + signals.searchAndIndexOverview.search_scroll_latency_avg.asTarget() { interval: '2m', intervalFactor: 2 }, + ], + description='Latency of fetch, scroll, and query requests by selected index.' + ) + + g.panel.timeSeries.standardOptions.withUnit('s'), searchCacheHitRatioPanel: - g.panel.timeSeries.new('Cache hit ratio') - + g.panel.timeSeries.panelOptions.withDescription('Ratio of query cache and request cache hits and misses.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.request_cache_hit_rate.asTarget() - + g.query.prometheus.withIntervalFactor(2), - signals.searchAndIndexOverview.query_cache_hit_rate.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.standardOptions.withUnit('percent') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.standardOptions.withOverrides([ - { - matcher: { id: 'byValue', options: { op: 'gte', reducer: 'allIsZero', value: 0 } }, - properties: [{ id: 'custom.hideFrom', value: { legend: true, tooltip: true, viz: false } }], - }, - ]) - + g.panel.timeSeries.options.tooltip.withMode('multi'), + commonlib.panels.generic.timeSeries.base.new( + 'Cache hit ratio', + targets=[ + signals.searchAndIndexOverview.request_cache_hit_rate.asTarget() { intervalFactor: 2 }, + signals.searchAndIndexOverview.query_cache_hit_rate.asTarget() { intervalFactor: 2 }, + ], + description='Ratio of query cache and request cache hits and misses.' + ) + + g.panel.timeSeries.standardOptions.withUnit('percent'), searchCacheEvictionsPanel: - g.panel.timeSeries.new('Evictions') - + g.panel.timeSeries.panelOptions.withDescription('Total evictions count by cache type for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.query_cache_evictions.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - signals.searchAndIndexOverview.request_cache_evictions.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - signals.searchAndIndexOverview.fielddata_evictions.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Evictions', + targets=[ + signals.searchAndIndexOverview.query_cache_evictions.asTarget() { interval: '2m', intervalFactor: 2 }, + signals.searchAndIndexOverview.request_cache_evictions.asTarget() { interval: '2m', intervalFactor: 2 }, + signals.searchAndIndexOverview.fielddata_evictions.asTarget() { interval: '2m', intervalFactor: 2 }, + ], + description='Total evictions count by cache type for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('evictions') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - { - matcher: { id: 'byValue', options: { op: 'gte', reducer: 'allIsZero', value: 0 } }, - properties: [{ id: 'custom.hideFrom', value: { legend: true, tooltip: true, viz: false } }], - }, - ]) - + g.panel.timeSeries.options.tooltip.withMode('multi'), + ]), // Indexing Performance Panels indexingRatePanel: - g.panel.timeSeries.new('Index rate') - + g.panel.timeSeries.panelOptions.withDescription('Rate of indexed documents for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.indexing_current.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) - + g.panel.timeSeries.standardOptions.withUnit('documents/s') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) - + g.panel.timeSeries.standardOptions.withOverrides([ - { - matcher: { id: 'byValue', options: { op: 'gte', reducer: 'allIsZero', value: 0 } }, - properties: [{ id: 'custom.hideFrom', value: { legend: true, tooltip: true, viz: false } }], - }, - ]), + commonlib.panels.generic.timeSeries.base.new( + 'Index rate', + targets=[signals.searchAndIndexOverview.indexing_current.asTarget() { intervalFactor: 2 }], + description='Rate of indexed documents for the selected index.' + ) + + g.panel.timeSeries.standardOptions.withUnit('documents/s'), indexingLatencyPanel: - g.panel.timeSeries.new('Index latency') - + g.panel.timeSeries.panelOptions.withDescription('Document indexing latency for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.indexing_latency.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Index latency', + targets=[signals.searchAndIndexOverview.indexing_latency.asTarget() { interval: '2m', intervalFactor: 2 }], + description='Document indexing latency for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('s') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), @@ -936,346 +705,213 @@ local var = g.dashboard.variable; ]), indexingFailuresPanel: - g.panel.timeSeries.new('Index failures') - + g.panel.timeSeries.panelOptions.withDescription('Number of indexing failures for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.indexing_failed.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Index failures', + targets=[signals.searchAndIndexOverview.indexing_failed.asTarget() { interval: '2m', intervalFactor: 2 }], + description='Number of indexing failures for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('failures') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), // Index Operations Panels flushLatencyPanel: - g.panel.timeSeries.new('Flush latency') - + g.panel.timeSeries.panelOptions.withDescription('Index flush latency for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.flush_latency.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Flush latency', + targets=[signals.searchAndIndexOverview.flush_latency.asTarget() { interval: '2m', intervalFactor: 2 }], + description='Index flush latency for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('s') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), mergeTimePanel: - g.panel.timeSeries.new('Merge time') - + g.panel.timeSeries.panelOptions.withDescription('Index merge time for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.merge_time.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - signals.searchAndIndexOverview.merge_stopped_time.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - signals.searchAndIndexOverview.merge_throttled_time.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Merge time', + targets=[ + signals.searchAndIndexOverview.merge_time.asTarget() { interval: '2m', intervalFactor: 2 }, + signals.searchAndIndexOverview.merge_stopped_time.asTarget() { interval: '2m', intervalFactor: 2 }, + signals.searchAndIndexOverview.merge_throttled_time.asTarget() { interval: '2m', intervalFactor: 2 }, + ], + description='Index merge time for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('s') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), - ]) - + g.panel.timeSeries.options.tooltip.withMode('multi'), + ]), refreshLatencyPanel: - g.panel.timeSeries.new('Refresh latency') - + g.panel.timeSeries.panelOptions.withDescription('Index refresh latency for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.refresh_latency.asTarget() - + g.query.prometheus.withInterval('2m') - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Refresh latency', + targets=[signals.searchAndIndexOverview.refresh_latency.asTarget() { interval: '2m', intervalFactor: 2 }], + description='Index refresh latency for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('s') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), // Index Statistics Panels translogOperationsPanel: - g.panel.timeSeries.new('Translog operations') - + g.panel.timeSeries.panelOptions.withDescription('Current number of translog operations for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.translog_ops.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Translog operations', + targets=[signals.searchAndIndexOverview.translog_ops.asTarget() { intervalFactor: 2 }], + description='Current number of translog operations for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('operations') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), docsDeletedPanel: - g.panel.timeSeries.new('Docs deleted') - + g.panel.timeSeries.panelOptions.withDescription('Rate of documents deleted for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.indexing_delete_current.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Docs deleted', + targets=[signals.searchAndIndexOverview.indexing_delete_current.asTarget() { intervalFactor: 2 }], + description='Rate of documents deleted for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('documents/s') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), documentsIndexedPanel: - g.panel.timeSeries.new('Documents indexed') - + g.panel.timeSeries.panelOptions.withDescription('Number of indexed documents for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.indexing_count.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Documents indexed', + targets=[signals.searchAndIndexOverview.indexing_count.asTarget() { intervalFactor: 2 }], + description='Number of indexed documents for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('documents') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), // Index Structure Panels segmentCountPanel: - g.panel.timeSeries.new('Segment count') - + g.panel.timeSeries.panelOptions.withDescription('Current number of segments for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.segments_number.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Segment count', + targets=[signals.searchAndIndexOverview.segments_number.asTarget() { intervalFactor: 2 }], + description='Current number of segments for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('segments') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), mergeCountPanel: - g.panel.timeSeries.new('Merge count') - + g.panel.timeSeries.panelOptions.withDescription('Number of merge operations for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.merge_docs.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Merge count', + targets=[signals.searchAndIndexOverview.merge_docs.asTarget() { intervalFactor: 2 }], + description='Number of merge operations for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('merges') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), // Cache and Memory Panels cacheSizePanel: - g.panel.timeSeries.new('Cache size') - + g.panel.timeSeries.panelOptions.withDescription('Size of query cache and request cache.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.query_cache_memory.asTarget() - + g.query.prometheus.withIntervalFactor(2), - signals.searchAndIndexOverview.request_cache_memory.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Cache size', + targets=[ + signals.searchAndIndexOverview.query_cache_memory.asTarget() { intervalFactor: 2 }, + signals.searchAndIndexOverview.request_cache_memory.asTarget() { intervalFactor: 2 }, + ], + description='Size of query cache and request cache.' + ) + g.panel.timeSeries.standardOptions.withUnit('bytes') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), - ]) - + g.panel.timeSeries.options.tooltip.withMode('multi'), + ]), searchAndIndexStoreSizePanel: - g.panel.timeSeries.new('Store size') - + g.panel.timeSeries.panelOptions.withDescription('Size of the store in bytes for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.store_size_bytes.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Store size', + targets=[signals.searchAndIndexOverview.store_size_bytes.asTarget() { intervalFactor: 2 }], + description='Size of the store in bytes for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('bytes') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), segmentSizePanel: - g.panel.timeSeries.new('Segment size') - + g.panel.timeSeries.panelOptions.withDescription('Memory used by segments for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.segments_memory_bytes.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Segment size', + targets=[signals.searchAndIndexOverview.segments_memory_bytes.asTarget() { intervalFactor: 2 }], + description='Memory used by segments for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('bytes') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), mergeSizePanel: - g.panel.timeSeries.new('Merge size') - + g.panel.timeSeries.panelOptions.withDescription('Size of merge operations in bytes for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.merge_current_size.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Merge size', + targets=[signals.searchAndIndexOverview.merge_current_size.asTarget() { intervalFactor: 2 }], + description='Size of merge operations in bytes for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('bytes') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.fieldConfig.defaults.custom.withDrawStyle('points') + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), searchAndIndexShardCountPanel: - g.panel.timeSeries.new('Shard count') - + g.panel.timeSeries.panelOptions.withDescription('The number of index shards for the selected index.') - + g.panel.timeSeries.queryOptions.withDatasource('prometheus', '${prometheus_datasource}') - + g.panel.timeSeries.queryOptions.withTargets([ - signals.searchAndIndexOverview.shards_per_index.asTarget() - + g.query.prometheus.withIntervalFactor(2), - ]) + commonlib.panels.generic.timeSeries.base.new( + 'Shard count', + targets=[signals.searchAndIndexOverview.shards_per_index.asTarget() { intervalFactor: 2 }], + description='The number of index shards for the selected index.' + ) + g.panel.timeSeries.standardOptions.withUnit('shards') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineInterpolation('smooth') - + g.panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') - + g.panel.timeSeries.fieldConfig.defaults.custom.withLineWidth(2) + g.panel.timeSeries.standardOptions.thresholds.withSteps([ g.panel.timeSeries.standardOptions.threshold.step.withColor('green') + g.panel.timeSeries.standardOptions.threshold.step.withValue(null), g.panel.timeSeries.standardOptions.threshold.step.withColor('red') + g.panel.timeSeries.standardOptions.threshold.step.withValue(80), - ]) - + g.panel.timeSeries.standardOptions.withOverrides([ - g.panel.timeSeries.fieldOverride.byValue.new({ op: 'gte', reducer: 'allIsZero', value: 0 }) - + g.panel.timeSeries.fieldOverride.byValue.withProperty('custom.hideFrom', { legend: true, tooltip: true, viz: false }), ]), }, } From a7a452c38ebe8b10c15705223e6a9007101eed77 Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Mon, 24 Nov 2025 14:30:58 -0500 Subject: [PATCH 07/13] Updated README to match modernization changes. --- opensearch-mixin/README.md | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/opensearch-mixin/README.md b/opensearch-mixin/README.md index 02f714def..346f2a32f 100644 --- a/opensearch-mixin/README.md +++ b/opensearch-mixin/README.md @@ -14,12 +14,9 @@ and the following alerts: - OpenSearchRedCluster - OpenSearchUnstableShardReallocation - OpenSearchUnstableShardUnassigned -- OpenSearchModerateNodeDiskUsage -- OpenSearchHighNodeDiskUsage -- OpenSearchModerateNodeCPUUsage -- OpenSearchHighNodeCPUUsage -- OpenSearchModerateNodeMemoryUsage -- OpenSearchHighNodeMemoryUsage +- OpenSearchHighNodeDiskUsage (warning and critical) +- OpenSearchHighNodeCpuUsage (warning and critical) +- OpenSearchHighNodeMemoryUsage (warning and critical) - OpenSearchModerateRequestLatency - OpenSearchModerateIndexLatency @@ -85,22 +82,20 @@ The OpenSearch search and index overview dashboard provides details on request p ## Alerts Overview - | Alert | Summary | |-------------------------------------|---------------------------------------------------------------------------------| | OpenSearchYellowCluster | At least one of the clusters is reporting a yellow status. | | OpenSearchRedCluster | At least one of the clusters is reporting a red status. | | OpenSearchUnstableShardReallocation | A node has gone offline or has been disconnected triggering shard reallocation. | | OpenSearchUnstableShardUnassigned | There are shards that have been detected as unassigned. | -| OpenSearchModerateNodeDiskUsage | The node disk usage has exceeded the warning threshold. | -| OpenSearchHighNodeDiskUsage | The node disk usage has exceeded the critical threshold. | -| OpenSearchModerateNodeCpuUsage | The node CPU usage has exceeded the warning threshold. | -| OpenSearchHighNodeCpuUsage | The node CPU usage has exceeded the critical threshold. | -| OpenSearchModerateNodeMemoryUsage | The node memory usage has exceeded the warning threshold. | -| OpenSearchHighNodeMemoryUsage | The node memory usage has exceeded the critical threshold. | +| OpenSearchHighNodeDiskUsage | The node disk usage has exceeded the configured threshold (warning or critical). | +| OpenSearchHighNodeCpuUsage | The node CPU usage has exceeded the configured threshold (warning or critical). | +| OpenSearchHighNodeMemoryUsage | The node memory usage has exceeded the configured threshold (warning or critical). | | OpenSearchModerateRequestLatency | The request latency has exceeded the warning threshold. | | OpenSearchModerateIndexLatency | The index latency has exceeded the warning threshold. | +Node resource alerts (disk, CPU, memory) use the same alert name for both warning and critical severity levels. This follows the Alertmanager inhibition pattern, allowing warning alerts to be automatically suppressed when critical alerts fire. + Default thresholds can be configured in `config.libsonnet` ```js From 5d3c2f9b2e3f283c1b037eca2ecf204a7907420b Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Mon, 1 Dec 2025 10:36:29 -0500 Subject: [PATCH 08/13] Update opensearch-mixin/signals/search-and-index-overview.libsonnet Co-authored-by: Keith Schmitt <32067685+schmikei@users.noreply.github.com> --- opensearch-mixin/signals/search-and-index-overview.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opensearch-mixin/signals/search-and-index-overview.libsonnet b/opensearch-mixin/signals/search-and-index-overview.libsonnet index 4eaa9fa33..301a81b97 100644 --- a/opensearch-mixin/signals/search-and-index-overview.libsonnet +++ b/opensearch-mixin/signals/search-and-index-overview.libsonnet @@ -51,7 +51,7 @@ function(this) unit: 'ops', sources: { prometheus: { - expr: 'opensearch_index_search_scroll_current_number{%(queriesSelector)s, context=~"total"}', + expr: 'opensearch_index_search_scroll_current_number{%(queriesSelector)s, context="total"}', legendCustomTemplate: '{{index}} - scroll', }, }, From e6b5df5dd48a6e780683cc837cb90410479bd547 Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Mon, 1 Dec 2025 10:36:35 -0500 Subject: [PATCH 09/13] Update opensearch-mixin/signals/search-and-index-overview.libsonnet Co-authored-by: Keith Schmitt <32067685+schmikei@users.noreply.github.com> --- opensearch-mixin/signals/search-and-index-overview.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opensearch-mixin/signals/search-and-index-overview.libsonnet b/opensearch-mixin/signals/search-and-index-overview.libsonnet index 301a81b97..4c2f6dfcf 100644 --- a/opensearch-mixin/signals/search-and-index-overview.libsonnet +++ b/opensearch-mixin/signals/search-and-index-overview.libsonnet @@ -37,7 +37,7 @@ function(this) unit: 'ops', sources: { prometheus: { - expr: 'opensearch_index_search_fetch_current_number{%(queriesSelector)s, context=~"total"}', + expr: 'opensearch_index_search_fetch_current_number{%(queriesSelector)s, context="total"}', legendCustomTemplate: '{{index}} - fetch', }, }, From 53c1f089e1cc7e428ebb45496ca77491722c43eb Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Mon, 1 Dec 2025 10:36:41 -0500 Subject: [PATCH 10/13] Update opensearch-mixin/signals/search-and-index-overview.libsonnet Co-authored-by: Keith Schmitt <32067685+schmikei@users.noreply.github.com> --- opensearch-mixin/signals/search-and-index-overview.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opensearch-mixin/signals/search-and-index-overview.libsonnet b/opensearch-mixin/signals/search-and-index-overview.libsonnet index 4c2f6dfcf..4e34d454d 100644 --- a/opensearch-mixin/signals/search-and-index-overview.libsonnet +++ b/opensearch-mixin/signals/search-and-index-overview.libsonnet @@ -23,7 +23,7 @@ function(this) unit: 'ops', sources: { prometheus: { - expr: 'opensearch_index_search_query_current_number{%(queriesSelector)s, context=~"total"}', + expr: 'opensearch_index_search_query_current_number{%(queriesSelector)s, context="total"}', legendCustomTemplate: '{{index}} - query', }, }, From d5df0b1460be1e5e3d418d2619b47de8d07dd7d2 Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Mon, 1 Dec 2025 10:37:08 -0500 Subject: [PATCH 11/13] Update opensearch-mixin/mixin.libsonnet Co-authored-by: Keith Schmitt <32067685+schmikei@users.noreply.github.com> --- opensearch-mixin/mixin.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opensearch-mixin/mixin.libsonnet b/opensearch-mixin/mixin.libsonnet index d28e80b23..5ba554e75 100644 --- a/opensearch-mixin/mixin.libsonnet +++ b/opensearch-mixin/mixin.libsonnet @@ -8,7 +8,7 @@ local mixin = mixinlib.new() { filteringSelecter: config.filteringSelector, uid: config.uid, - enableLokiLogs: true, + enableLokiLogs: config.enableLokiLogs, } ); From ba276ec7423e0b4ef566a3f616d328ac06dcff21 Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Mon, 1 Dec 2025 10:37:21 -0500 Subject: [PATCH 12/13] Update opensearch-mixin/g.libsonnet Co-authored-by: Keith Schmitt <32067685+schmikei@users.noreply.github.com> --- opensearch-mixin/g.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opensearch-mixin/g.libsonnet b/opensearch-mixin/g.libsonnet index f89dcc064..e6a2060ee 100644 --- a/opensearch-mixin/g.libsonnet +++ b/opensearch-mixin/g.libsonnet @@ -1 +1 @@ -import 'github.com/grafana/grafonnet/gen/grafonnet-v11.0.0/main.libsonnet' +import 'github.com/grafana/grafonnet/gen/grafonnet-v11.4.0/main.libsonnet' From 13d0dc518631524300391a5b97db034cca7b1f98 Mon Sep 17 00:00:00 2001 From: Greg Pattison Date: Thu, 11 Dec 2025 13:20:10 -0500 Subject: [PATCH 13/13] Added PR feedback --- opensearch-mixin/config.libsonnet | 2 +- .../dashboards_out/opensearch-cluster-overview.json | 12 ++++++------ opensearch-mixin/dashboards_out/opensearch-logs.json | 2 +- .../dashboards_out/opensearch-node-overview.json | 2 +- .../opensearch-search-and-index-overview.json | 8 ++++---- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/opensearch-mixin/config.libsonnet b/opensearch-mixin/config.libsonnet index e23c5b707..855966153 100644 --- a/opensearch-mixin/config.libsonnet +++ b/opensearch-mixin/config.libsonnet @@ -6,7 +6,7 @@ instanceLabels: ['instance'], uid: 'opensearch', - dashboardTags: [self.uid], + dashboardTags: [self.uid + '-mixin'], dashboardNamePrefix: 'OpenSearch', dashboardPeriod: 'now-1h', dashboardTimezone: 'default', diff --git a/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json b/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json index a18e52a49..97776104d 100644 --- a/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-cluster-overview.json @@ -464,7 +464,7 @@ "y": 8 }, "id": 8, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -698,7 +698,7 @@ ] } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -757,7 +757,7 @@ ] } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -817,7 +817,7 @@ ] } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -891,7 +891,7 @@ ] } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -1643,7 +1643,7 @@ "refresh": "30s", "schemaVersion": 39, "tags": [ - "opensearch" + "opensearch-mixin" ], "templating": { "list": [ diff --git a/opensearch-mixin/dashboards_out/opensearch-logs.json b/opensearch-mixin/dashboards_out/opensearch-logs.json index c6c03facc..b7aba3593 100644 --- a/opensearch-mixin/dashboards_out/opensearch-logs.json +++ b/opensearch-mixin/dashboards_out/opensearch-logs.json @@ -223,7 +223,7 @@ "refresh": "30s", "schemaVersion": 39, "tags": [ - "opensearch" + "opensearch-mixin" ], "templating": { "list": [ diff --git a/opensearch-mixin/dashboards_out/opensearch-node-overview.json b/opensearch-mixin/dashboards_out/opensearch-node-overview.json index bd656429d..d888cbe42 100644 --- a/opensearch-mixin/dashboards_out/opensearch-node-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-node-overview.json @@ -1244,7 +1244,7 @@ "refresh": "30s", "schemaVersion": 39, "tags": [ - "opensearch" + "opensearch-mixin" ], "templating": { "list": [ diff --git a/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json b/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json index d2dd4e492..733f49f9b 100644 --- a/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json +++ b/opensearch-mixin/dashboards_out/opensearch-search-and-index-overview.json @@ -80,7 +80,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_search_query_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=~\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_search_query_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -92,7 +92,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_search_fetch_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=~\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_search_fetch_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -104,7 +104,7 @@ "type": "prometheus", "uid": "${prometheus_datasource}" }, - "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_search_scroll_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=~\"total\"}\n)", + "expr": "avg by (job,cluster,opensearch_cluster,instance,index) (\n opensearch_index_search_scroll_current_number{job=\"integrations/opensearch\",job=~\"$job\",cluster=~\"$cluster\",opensearch_cluster=~\"$opensearch_cluster\",instance=~\"$instance\",index=~\"$index\", context=\"total\"}\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -1456,7 +1456,7 @@ "refresh": "30s", "schemaVersion": 39, "tags": [ - "opensearch" + "opensearch-mixin" ], "templating": { "list": [