Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
{
prometheusAlerts+:: {
new(this): {
groups+: [
{
name: 'ApacheTomcatAlerts',
rules: [
{
alert: 'ApacheTomcatAlertsHighCpuUsage',
expr: |||
sum by (%(agg)s) (jvm_process_cpu_load{%(filteringSelector)s}) > %(ApacheTomcatAlertsCriticalCpuUsage)s
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
sum by (%(agg)s) (jvm_process_cpu_load{%(filteringSelector)s}) > %(alertsCriticalCpuUsage)s
||| % this.config { agg: std.join(',', this.config.groupLabels + this.config.instanceLabels) },
'for': '5m',
labels: {
severity: 'critical',
Expand All @@ -18,15 +18,15 @@
description:
(
'The CPU usage has been at {{ printf "%%.0f" $value }} percent over the last 5 minutes on {{$labels.instance}}, ' +
'which is above the threshold of %(ApacheTomcatAlertsCriticalCpuUsage)s percent.'
) % $._config,
'which is above the threshold of %(alertsCriticalCpuUsage)s percent.'
) % this.config,
},
},
{
alert: 'ApacheTomcatAlertsHighMemoryUsage',
expr: |||
sum(jvm_memory_usage_used_bytes{%(filteringSelector)s}) by (%(agg)s) / sum(jvm_physical_memory_bytes{%(filteringSelector)s}) by (%(agg)s) * 100 > %(ApacheTomcatAlertsCriticalMemoryUsage)s
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
sum(jvm_memory_usage_used_bytes{%(filteringSelector)s}) by (%(agg)s) / sum(jvm_physical_memory_bytes{%(filteringSelector)s}) by (%(agg)s) * 100 > %(alertsCriticalMemoryUsage)s
||| % this.config { agg: std.join(',', this.config.groupLabels + this.config.instanceLabels) },
'for': '5m',
labels: {
severity: 'critical',
Expand All @@ -36,15 +36,15 @@
description:
(
'The memory usage has been at {{ printf "%%.0f" $value }} percent over the last 5 minutes on {{$labels.instance}}, ' +
'which is above the threshold of %(ApacheTomcatAlertsCriticalMemoryUsage)s percent.'
) % $._config,
'which is above the threshold of %(alertsCriticalMemoryUsage)s percent.'
) % this.config,
},
},
{
alert: 'ApacheTomcatAlertsHighRequestErrorPercent',
alert: 'ApacheTomcatAlertsRequestErrors',
expr: |||
sum by (%(agg)s) (increase(tomcat_errorcount_total{%(filteringSelector)s}[5m]) / increase(tomcat_requestcount_total{%(filteringSelector)s}[5m]) * 100) > %(ApacheTomcatAlertsCriticalRequestErrorPercentage)s
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
sum by (%(agg)s) (increase(tomcat_errorcount_total{%(filteringSelector)s}[5m]) / increase(tomcat_requestcount_total{%(filteringSelector)s}[5m]) * 100) > %(alertsCriticalRequestErrorPercentage)s
||| % this.config { agg: std.join(',', this.config.groupLabels + this.config.instanceLabels) },
'for': '5m',
labels: {
severity: 'critical',
Expand All @@ -54,15 +54,15 @@
description:
(
'The percentage of request errors has been at {{ printf "%%.0f" $value }} percent over the last 5 minutes on {{$labels.instance}}, ' +
'which is above the threshold of %(ApacheTomcatAlertsCriticalRequestErrorPercentage)s percent.'
) % $._config,
'which is above the threshold of %(alertsCriticalRequestErrorPercentage)s percent.'
) % this.config,
},
},
{
alert: 'ApacheTomcatAlertsModeratelyHighProcessingTime',
alert: 'ApacheTomcatAlertsHighProcessingTime',
expr: |||
sum by (%(agg)s) (increase(tomcat_processingtime_total{%(filteringSelector)s}[5m]) / increase(tomcat_requestcount_total{%(filteringSelector)s}[5m])) > %(ApacheTomcatAlertsWarningProcessingTime)s
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
sum by (%(agg)s) (increase(tomcat_processingtime_total{%(filteringSelector)s}[5m]) / increase(tomcat_requestcount_total{%(filteringSelector)s}[5m])) > %(alertsWarningProcessingTime)s
||| % this.config { agg: std.join(',', this.config.groupLabels + this.config.instanceLabels) },
'for': '5m',
labels: {
severity: 'warning',
Expand All @@ -72,8 +72,8 @@
description:
(
'The processing time has been at {{ printf "%%.0f" $value }}ms over the last 5 minutes on {{$labels.instance}}, ' +
'which is above the threshold of %(ApacheTomcatAlertsWarningProcessingTime)sms.'
) % $._config,
'which is above the threshold of %(alertsWarningProcessingTime)sms.'
) % this.config,
},
},
],
Expand Down
45 changes: 27 additions & 18 deletions apache-tomcat-mixin/config.libsonnet
Original file line number Diff line number Diff line change
@@ -1,24 +1,33 @@
{
_config+:: {
dashboardTags: ['apache-tomcat-mixin'],
dashboardPeriod: 'now-1h',
dashboardTimezone: 'default',
dashboardRefresh: '1m',
local this = self,
filteringSelector: 'job="integrations/tomcat"',
groupLabels: ['job', 'cluster'],
logLabels: [],
instanceLabels: ['instance'],

//alert thresholds
ApacheTomcatAlertsCriticalCpuUsage: 80, //%
ApacheTomcatAlertsCriticalMemoryUsage: 80, //%
ApacheTomcatAlertsCriticalRequestErrorPercentage: 5, //%
ApacheTomcatAlertsWarningProcessingTime: 300, //ms
uid: 'apache-tomcat',
dashboardTags: [self.uid + '-mixin'],
dashboardNamePrefix: 'Apache Tomcat',
dashboardPeriod: 'now-1h',
dashboardTimezone: 'default',
dashboardRefresh: '1m',
metricsSource: ['prometheus'], // metrics source for signals

// used in alerts:
filteringSelector: 'job="integrations/tomcat"',
groupLabels: if self.enableMultiCluster then ['job', 'cluster'] else ['job'],
instanceLabels: ['instance'],

enableLokiLogs: true,
enableMultiCluster: false,
multiclusterSelector: 'job=~"$job"',
tomcatSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"',
// Logging configuration
enableLokiLogs: true,
extraLogLabels: ['level'], // Required by logs-lib
logsVolumeGroupBy: 'level',
showLogsVolume: true,

// alert thresholds
alertsCriticalCpuUsage: 80, //%
alertsCriticalMemoryUsage: 80, //%
alertsCriticalRequestErrorPercentage: 5, //%
alertsWarningProcessingTime: 300, //ms

signals+: {
overview: (import './signals/overview.libsonnet')(this),
hosts: (import './signals/hosts.libsonnet')(this),
},
}
132 changes: 132 additions & 0 deletions apache-tomcat-mixin/dashboards.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
local g = import './g.libsonnet';
local commonlib = import 'common-lib/common/main.libsonnet';
local logslib = import 'logs-lib/logs/main.libsonnet';

{
local root = self,
new(this)::
local prefix = this.config.dashboardNamePrefix;
local links = this.grafana.links;
local tags = this.config.dashboardTags;
local uid = g.util.string.slugify(this.config.uid);
local vars = this.grafana.variables;
local annotations = this.grafana.annotations;
local refresh = this.config.dashboardRefresh;
local period = this.config.dashboardPeriod;
local timezone = this.config.dashboardTimezone;
{
'apache-tomcat-overview.json':
g.dashboard.new(prefix + ' overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.overview,
],
),
),
) + root.applyCommon(
vars.multiInstance + [
g.dashboard.variable.query.new(
'protocol',
query='label_values(tomcat_bytesreceived_total{%(queriesSelector)s}, protocol)' % vars
)
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='protocol', metric='tomcat_bytesreceived_total{%(queriesSelector)s}' % vars)
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus),

g.dashboard.variable.query.new(
'port',
query='label_values(tomcat_bytesreceived_total{%(queriesSelector)s}, port)' % vars
)
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='port', metric='tomcat_bytesreceived_total{%(queriesSelector)s}' % vars)
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus),
],
uid + '_overview',
tags,
links { apacheTomcatOverview:: {} },
annotations,
timezone,
refresh,
period
),

'apache-tomcat-hosts.json':
g.dashboard.new(prefix + ' hosts')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.hosts,
this.grafana.rows.hostServlets,
],
),
),
) + root.applyCommon(
vars.multiInstance + [
g.dashboard.variable.query.new('host')
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='host', metric='tomcat_session_sessioncounter_total{%(queriesSelector)s}' % vars)
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus),

g.dashboard.variable.query.new('context')
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='context', metric='tomcat_session_sessioncounter_total{%(queriesSelector)s}' % vars)
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus),

g.dashboard.variable.query.new('servlet')
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='servlet', metric='tomcat_servlet_requestcount_total{%(queriesSelector)s}' % vars)
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus),
],
uid + '_hosts',
tags,
links { apacheTomcatHosts:: {} },
annotations,
timezone,
refresh,
period
),
} + if this.config.enableLokiLogs then {
'apache-tomcat-logs.json':
logslib.new(
prefix + ' logs',
datasourceName=this.grafana.variables.datasources.loki.name,
datasourceRegex=this.grafana.variables.datasources.loki.regex,
filterSelector=this.config.filteringSelector,
labels=this.config.groupLabels + this.config.extraLogLabels,
formatParser=null,
showLogsVolume=this.config.showLogsVolume,
)
{
dashboards+:
{
logs+:
root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { apacheTomcatLogs:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period),
},
panels+:
{
logs+:
g.panel.logs.options.withEnableLogDetails(true)
+ g.panel.logs.options.withShowTime(false)
+ g.panel.logs.options.withWrapLogMessage(false),
},
variables+: {
toArray+: [
this.grafana.variables.datasources.prometheus { hide: 2 },
],
},
}.dashboards.logs,
},

applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period):
g.dashboard.withTags(tags)
+ g.dashboard.withUid(uid)
+ g.dashboard.withLinks(std.objectValues(links))
+ g.dashboard.withTimezone(timezone)
+ g.dashboard.withRefresh(refresh)
+ g.dashboard.time.withFrom(period)
+ g.dashboard.withVariables(vars)
+ g.dashboard.withAnnotations(std.objectValues(annotations)),
}
Loading
Loading