Skip to content

Commit ed8af0f

Browse files
feat: TRACING-5214: Add APM dashboard (#837)
* feat: add APM dashboard This adds the initial version of an Application Performance Monitoring (APM) dashboard based on span metrics of traces, generated by the OpenTelemetry collector. Resolves: https://issues.redhat.com/browse/TRACING-5214 Signed-off-by: Andreas Gerstmayr <[email protected]> * chore: try increasing timeout of e2e test try increasing timeout of e2e test to make it pass Signed-off-by: Andreas Gerstmayr <[email protected]> --------- Signed-off-by: Andreas Gerstmayr <[email protected]>
1 parent 634bceb commit ed8af0f

File tree

6 files changed

+235
-3
lines changed

6 files changed

+235
-3
lines changed

go.mod

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ require (
3434
replace github.com/openshift/api => github.com/openshift/api v0.0.0-20240404200104-96ed2d49b255
3535

3636
require (
37+
github.com/perses/perses v0.51.1
38+
github.com/perses/plugins/prometheus v0.52.1
39+
github.com/perses/plugins/table v0.0.0-20250709083656-34e29fed0083
40+
github.com/perses/plugins/timeserieschart v0.9.1
3741
github.com/rhobs/observability-operator/pkg/apis v0.0.0-20251009091129-76135c924ed6
3842
github.com/rhobs/perses v0.0.0-20250612171017-5d7686af9ae4
3943
github.com/rhobs/perses-operator v0.1.10-0.20250612173146-78eb619430df

go.sum

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,14 @@ github.com/ovh/go-ovh v1.9.0 h1:6K8VoL3BYjVV3In9tPJUdT7qMx9h0GExN9EXx1r2kKE=
382382
github.com/ovh/go-ovh v1.9.0/go.mod h1:cTVDnl94z4tl8pP1uZ/8jlVxntjSIf09bNcQ5TJSC7c=
383383
github.com/perses/common v0.27.1-0.20250326140707-96e439b14e0e h1:AormqtWdtHdoQyGO90U1fRoElR0XQHmP0W9oJUsCOZY=
384384
github.com/perses/common v0.27.1-0.20250326140707-96e439b14e0e/go.mod h1:CMTbKu0uWCFKgo4oDVoT8GcMC0bKyDH4cNG3GVfi+rA=
385+
github.com/perses/perses v0.51.1 h1:3M/D0FHDMAofVLyVwQoADZL6b2L5MqeXdlIxgjfi7S0=
386+
github.com/perses/perses v0.51.1/go.mod h1:DrGiL+itTLl2mwEvNa0wGokELfZTsqOc3TEg+2B0uwY=
387+
github.com/perses/plugins/prometheus v0.52.1 h1:VzV+oAEXYQMw7eCYyVi6ZWhCyQY+4QMhpOK81LidB+k=
388+
github.com/perses/plugins/prometheus v0.52.1/go.mod h1:yloGFbZLSK6jSnW4A7wIb2NQQn6v/ryLgTC3Dw44EXc=
389+
github.com/perses/plugins/table v0.0.0-20250709083656-34e29fed0083 h1:JB1BR9IYmChVv3bD1hOEyISV75VscQ7tHScD9Ue7reU=
390+
github.com/perses/plugins/table v0.0.0-20250709083656-34e29fed0083/go.mod h1:gmmyiOzCxX+ixPOtsy0S2Ufb+F7f7cK49dmaI5UxOzc=
391+
github.com/perses/plugins/timeserieschart v0.9.1 h1:ojfRdfdKvmJINcKn0At/LZh0h8WQpE7fo9Ge4kocTuQ=
392+
github.com/perses/plugins/timeserieschart v0.9.1/go.mod h1:6ZmA47mrEIEeAC/gqYujzHjoEyFPBYTphcDB5gypYRc=
385393
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
386394
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
387395
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=

pkg/controllers/uiplugin/apm.go

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
package uiplugin
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
7+
"github.com/perses/perses/go-sdk/common"
8+
"github.com/perses/perses/go-sdk/dashboard"
9+
"github.com/perses/perses/go-sdk/panel"
10+
panelgroup "github.com/perses/perses/go-sdk/panel-group"
11+
listvariable "github.com/perses/perses/go-sdk/variable/list-variable"
12+
"github.com/perses/plugins/prometheus/sdk/go/query"
13+
labelvalues "github.com/perses/plugins/prometheus/sdk/go/variable/label-values"
14+
table "github.com/perses/plugins/table/sdk/go"
15+
timeseries "github.com/perses/plugins/timeserieschart/sdk/go"
16+
persesv1alpha1 "github.com/rhobs/perses-operator/api/v1alpha1"
17+
persesv1 "github.com/rhobs/perses/pkg/model/api/v1"
18+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
19+
)
20+
21+
func withServiceMetrics(variableMatchers string) dashboard.Option {
22+
return dashboard.AddPanelGroup("Service Metrics",
23+
panelgroup.PanelsPerLine(3),
24+
panelgroup.AddPanel("Request rate",
25+
timeseries.Chart(),
26+
panel.AddQuery(
27+
query.PromQL(
28+
fmt.Sprintf("sum(rate(traces_span_metrics_calls{%s}[$__rate_interval]))", variableMatchers),
29+
query.SeriesNameFormat("req/s"),
30+
),
31+
),
32+
),
33+
panelgroup.AddPanel("Error rate",
34+
timeseries.Chart(),
35+
panel.AddQuery(
36+
query.PromQL(
37+
fmt.Sprintf("sum(rate(traces_span_metrics_calls{%s, status_code=\"STATUS_CODE_ERROR\"}[$__rate_interval])) or vector(0)", variableMatchers),
38+
query.SeriesNameFormat("error/s"),
39+
),
40+
),
41+
),
42+
panelgroup.AddPanel("Duration",
43+
timeseries.Chart(
44+
timeseries.WithYAxis(timeseries.YAxis{
45+
Format: &common.Format{
46+
Unit: string(common.MilliSecondsUnit),
47+
},
48+
}),
49+
timeseries.WithLegend(timeseries.Legend{
50+
Position: timeseries.BottomPosition,
51+
}),
52+
),
53+
panel.AddQuery(
54+
query.PromQL(
55+
fmt.Sprintf("histogram_quantile(.95, sum(rate(traces_span_metrics_duration_bucket{%s}[$__rate_interval])) by (le))", variableMatchers),
56+
query.SeriesNameFormat("95th"),
57+
),
58+
),
59+
panel.AddQuery(
60+
query.PromQL(
61+
fmt.Sprintf("histogram_quantile(.75, sum(rate(traces_span_metrics_duration_bucket{%s}[$__rate_interval])) by (le))", variableMatchers),
62+
query.SeriesNameFormat("75th"),
63+
),
64+
),
65+
panel.AddQuery(
66+
query.PromQL(
67+
fmt.Sprintf("histogram_quantile(.50, sum(rate(traces_span_metrics_duration_bucket{%s}[$__rate_interval])) by (le))", variableMatchers),
68+
query.SeriesNameFormat("50th"),
69+
),
70+
),
71+
),
72+
)
73+
}
74+
75+
func withOperationMetrics(variableMatchers string) dashboard.Option {
76+
return dashboard.AddPanelGroup("Operations",
77+
panelgroup.PanelsPerLine(1),
78+
panelgroup.AddPanel("Operation metrics",
79+
table.Table(
80+
table.Transform([]common.Transform{
81+
{
82+
Kind: common.MergeSeriesKind,
83+
Spec: common.MergeSeriesSpec{},
84+
},
85+
}),
86+
table.WithColumnSettings([]table.ColumnSettings{
87+
{
88+
Name: "span_name",
89+
Header: "Name",
90+
EnableSorting: true,
91+
},
92+
{
93+
Name: "value #1",
94+
Header: "Request rate",
95+
Format: &common.Format{
96+
Unit: string(common.RequestsPerSecondsUnit),
97+
DecimalPlaces: 3,
98+
},
99+
},
100+
{
101+
Name: "value #2",
102+
Header: "Error rate",
103+
Format: &common.Format{
104+
Unit: string(common.DecimalUnit),
105+
DecimalPlaces: 3,
106+
},
107+
},
108+
{
109+
Name: "value #3",
110+
Header: "Duration",
111+
Format: &common.Format{
112+
Unit: string(common.MilliSecondsUnit),
113+
DecimalPlaces: 3,
114+
},
115+
},
116+
{
117+
Name: "timestamp",
118+
Hide: true,
119+
},
120+
}),
121+
),
122+
panel.AddQuery(
123+
query.PromQL(
124+
fmt.Sprintf("sum(rate(traces_span_metrics_calls{%s}[$__rate_interval])) by (span_name) > 0", variableMatchers),
125+
query.SeriesNameFormat("req/s"),
126+
),
127+
),
128+
panel.AddQuery(
129+
query.PromQL(
130+
fmt.Sprintf("sum(rate(traces_span_metrics_calls{%s, status_code=\"STATUS_CODE_ERROR\"}[$__rate_interval])) by (span_name) > 0", variableMatchers),
131+
query.SeriesNameFormat("Error rate"),
132+
),
133+
),
134+
panel.AddQuery(
135+
query.PromQL(
136+
fmt.Sprintf("sum(rate(traces_span_metrics_duration_sum{%s}[5m]) / rate(traces_span_metrics_duration_count{%s}[5m])) by (span_name) > 0", variableMatchers, variableMatchers),
137+
query.SeriesNameFormat("95th"),
138+
),
139+
),
140+
),
141+
)
142+
}
143+
144+
func buildAPMDashboard() (dashboard.Builder, error) {
145+
variableMatchers := "namespace=\"$namespace\", service=\"$collector\", service_name=\"$service\""
146+
147+
return dashboard.New("apm",
148+
dashboard.Name("Application Performance Monitoring (APM)"),
149+
dashboard.AddVariable("namespace",
150+
listvariable.List(
151+
listvariable.DisplayName("OTEL Collector Namespace"),
152+
labelvalues.PrometheusLabelValues("namespace",
153+
labelvalues.Matchers("traces_span_metrics_calls{}"),
154+
),
155+
),
156+
),
157+
dashboard.AddVariable("collector",
158+
listvariable.List(
159+
listvariable.DisplayName("OTEL Collector"),
160+
labelvalues.PrometheusLabelValues("service",
161+
labelvalues.Matchers("traces_span_metrics_calls{namespace=\"$namespace\"}"),
162+
),
163+
),
164+
),
165+
dashboard.AddVariable("service",
166+
listvariable.List(
167+
listvariable.DisplayName("Service"),
168+
labelvalues.PrometheusLabelValues("service_name",
169+
labelvalues.Matchers("traces_span_metrics_calls{namespace=\"$namespace\", service=\"$collector\"}"),
170+
),
171+
),
172+
),
173+
withServiceMetrics(variableMatchers),
174+
withOperationMetrics(variableMatchers),
175+
)
176+
}
177+
178+
func newAPMDashboard(namespace string) (*persesv1alpha1.PersesDashboard, error) {
179+
builder, err := buildAPMDashboard()
180+
if err != nil {
181+
return nil, err
182+
}
183+
184+
// Workaround because of type conflict between Perses plugin types and Perses fork in rhobs org
185+
rhobsDashboard := persesv1.Dashboard{}
186+
bytes, err := json.Marshal(builder.Dashboard)
187+
if err != nil {
188+
return nil, err
189+
}
190+
err = rhobsDashboard.UnmarshalJSON(bytes)
191+
if err != nil {
192+
return nil, err
193+
}
194+
195+
return &persesv1alpha1.PersesDashboard{
196+
TypeMeta: metav1.TypeMeta{
197+
APIVersion: persesv1alpha1.GroupVersion.String(),
198+
Kind: "PersesDashboard",
199+
},
200+
ObjectMeta: metav1.ObjectMeta{
201+
Name: "apm-dashboard",
202+
Namespace: namespace,
203+
Labels: map[string]string{
204+
"app.kubernetes.io/managed-by": "observability-operator",
205+
},
206+
},
207+
Spec: persesv1alpha1.Dashboard{
208+
DashboardSpec: rhobsDashboard.Spec,
209+
},
210+
}, nil
211+
}

pkg/controllers/uiplugin/components.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"strings"
1111
"text/template"
1212

13+
"github.com/go-logr/logr"
1314
osv1 "github.com/openshift/api/console/v1"
1415
osv1alpha1 "github.com/openshift/api/console/v1alpha1"
1516
"golang.org/x/mod/semver"
@@ -62,7 +63,7 @@ func isVersionAheadOrEqual(currentVersion, version string) bool {
6263
return semver.Compare(currentVersion, canonicalMinVersion) >= 0
6364
}
6465

65-
func pluginComponentReconcilers(plugin *uiv1alpha1.UIPlugin, pluginInfo UIPluginInfo, clusterVersion string) []reconciler.Reconciler {
66+
func pluginComponentReconcilers(plugin *uiv1alpha1.UIPlugin, pluginInfo UIPluginInfo, clusterVersion string, logger logr.Logger) []reconciler.Reconciler {
6667
namespace := pluginInfo.ResourceNamespace
6768

6869
components := []reconciler.Reconciler{
@@ -137,6 +138,13 @@ func pluginComponentReconcilers(plugin *uiv1alpha1.UIPlugin, pluginInfo UIPlugin
137138
reconciler.NewOptionalUpdater(newAcceleratorsDatasource(namespace), plugin, persesEnabled),
138139
reconciler.NewOptionalUpdater(newAcceleratorsDashboard(namespace), plugin, persesEnabled),
139140
)
141+
142+
apmDashboard, err := newAPMDashboard(namespace)
143+
if err != nil {
144+
logger.Error(err, "Cannot build APM dashboard")
145+
} else {
146+
components = append(components, reconciler.NewOptionalUpdater(apmDashboard, plugin, persesEnabled))
147+
}
140148
}
141149

142150
return components

pkg/controllers/uiplugin/controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
261261
pluginInfo, pluginInfoErr := PluginInfoBuilder(ctx, rm.k8sClient, rm.k8sDynamicClient, plugin, rm.pluginConf, compatibilityInfo, rm.clusterVersion, rm.logger)
262262

263263
if pluginInfo != nil {
264-
reconcilers := pluginComponentReconcilers(plugin, *pluginInfo, rm.clusterVersion)
264+
reconcilers := pluginComponentReconcilers(plugin, *pluginInfo, rm.clusterVersion, rm.logger)
265265
for _, reconciler := range reconcilers {
266266
err := reconciler.Reconcile(ctx, rm.k8sClient, rm.scheme)
267267
// handle creation / updation errors that can happen due to a stale cache by

test/run-e2e.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,8 @@ run_bundle() {
127127
header "Running ObO Bundle"
128128

129129
./tmp/bin/operator-sdk run bundle "$BUNDLE_IMG" \
130-
--install-mode AllNamespaces --namespace "$OPERATORS_NS" --skip-tls
130+
--install-mode AllNamespaces --namespace "$OPERATORS_NS" --skip-tls \
131+
--timeout 10m
131132
}
132133

133134
log_events() {

0 commit comments

Comments
 (0)