Skip to content

Commit bf2b79f

Browse files
Test and fix alertmanager-ui with sharding (#5293)
* Test alertmanager-ui with sharding different replication factors Signed-off-by: Friedrich Gonzalez <[email protected]> * Let web UI requests to alertmanager be served by the distributor Signed-off-by: Shunji Takano <[email protected]> Signed-off-by: Takano, Shunji | Shunji | CPD <[email protected]> * Update changelog Signed-off-by: Friedrich Gonzalez <[email protected]> --------- Signed-off-by: Friedrich Gonzalez <[email protected]> Signed-off-by: Shunji Takano <[email protected]> Signed-off-by: Takano, Shunji | Shunji | CPD <[email protected]> Co-authored-by: Takano, Shunji | Shunji | CPD <[email protected]>
1 parent 8e36c84 commit bf2b79f

File tree

5 files changed

+68
-53
lines changed

5 files changed

+68
-53
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
* [CHANGE] Alertmanager: Validating new fields on the PagerDuty AM config. #5290
55
* [BUGFIX] Ruler: Validate if rule group can be safely converted back to rule group yaml from protobuf message #5265
66
* [BUGFIX] Querier: Convert gRPC `ResourceExhausted` status code from store gateway to 422 limit error. #5286
7+
* [BUGFIX] Alertmanager: Route web-ui requests to the alertmanager distributor when sharding is enabled. #5293
78

89
## 1.15.0 2023-04-19
910

pkg/alertmanager/distributor.go

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,6 @@ func (d *Distributor) running(ctx context.Context) error {
6565
return nil
6666
}
6767

68-
// IsPathSupported returns true if the given route is currently supported by the Distributor.
69-
func (d *Distributor) IsPathSupported(p string) bool {
70-
// API can be found at https://petstore.swagger.io/?url=https://raw.githubusercontent.com/prometheus/alertmanager/master/api/v2/openapi.yaml.
71-
isQuorumReadPath, _ := d.isQuorumReadPath(p)
72-
return d.isQuorumWritePath(p) || d.isUnaryWritePath(p) || d.isUnaryDeletePath(p) || d.isUnaryReadPath(p) || isQuorumReadPath
73-
}
74-
7568
func (d *Distributor) isQuorumWritePath(p string) bool {
7669
return strings.HasSuffix(p, "/alerts")
7770
}
@@ -109,11 +102,6 @@ func (d *Distributor) isQuorumReadPath(p string) (bool, merger.Merger) {
109102
return false, nil
110103
}
111104

112-
func (d *Distributor) isUnaryReadPath(p string) bool {
113-
return strings.HasSuffix(p, "/status") ||
114-
strings.HasSuffix(p, "/receivers")
115-
}
116-
117105
// DistributeRequest shards the writes and returns as soon as the quorum is satisfied.
118106
// In case of reads, it proxies the request to one of the alertmanagers.
119107
// DistributeRequest assumes that the caller has verified IsPathSupported returns
@@ -156,10 +144,8 @@ func (d *Distributor) DistributeRequest(w http.ResponseWriter, r *http.Request,
156144
d.doQuorum(userID, w, r, logger, m)
157145
return
158146
}
159-
if d.isUnaryReadPath(r.URL.Path) {
160-
d.doUnary(userID, w, r, logger)
161-
return
162-
}
147+
d.doUnary(userID, w, r, logger)
148+
return
163149
}
164150

165151
http.Error(w, "route not supported by distributor", http.StatusNotFound)
@@ -261,7 +247,7 @@ func (d *Distributor) doUnary(userID string, w http.ResponseWriter, r *http.Requ
261247
// we forward the request to only only of the alertmanagers.
262248

263249
var instances []ring.InstanceDesc
264-
if req.GetMethod() == "GET" && d.isUnaryReadPath(r.URL.Path) {
250+
if req.GetMethod() == "GET" {
265251
instances = replicationSet.Instances
266252
// Randomize the list of instances to not always query the same one.
267253
rand.Shuffle(len(instances), func(i, j int) {

pkg/alertmanager/distributor_test.go

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -315,39 +315,6 @@ func TestDistributor_DistributeRequest(t *testing.T) {
315315

316316
}
317317

318-
func TestDistributor_IsPathSupported(t *testing.T) {
319-
supported := map[string]bool{
320-
"/alertmanager/api/v1/alerts": true,
321-
"/alertmanager/api/v1/alerts/groups": false,
322-
"/alertmanager/api/v1/silences": true,
323-
"/alertmanager/api/v1/silence/id": true,
324-
"/alertmanager/api/v1/silence/anything": true,
325-
"/alertmanager/api/v1/silence/really": true,
326-
"/alertmanager/api/v1/status": true,
327-
"/alertmanager/api/v1/receivers": true,
328-
"/alertmanager/api/v1/other": false,
329-
"/alertmanager/api/v2/alerts": true,
330-
"/alertmanager/api/v2/alerts/groups": true,
331-
"/alertmanager/api/v2/silences": true,
332-
"/alertmanager/api/v2/silence/id": true,
333-
"/alertmanager/api/v2/silence/anything": true,
334-
"/alertmanager/api/v2/silence/really": true,
335-
"/alertmanager/api/v2/status": true,
336-
"/alertmanager/api/v2/receivers": true,
337-
"/alertmanager/api/v2/other": false,
338-
"/alertmanager/other": false,
339-
"/other": false,
340-
}
341-
342-
for path, isSupported := range supported {
343-
t.Run(path, func(t *testing.T) {
344-
d, _, cleanup := prepare(t, 1, 1, 1, []byte{})
345-
t.Cleanup(cleanup)
346-
require.Equal(t, isSupported, d.IsPathSupported(path))
347-
})
348-
}
349-
}
350-
351318
func prepare(t *testing.T, numAM, numHappyAM, replicationFactor int, responseBody []byte) (*Distributor, []*mockAlertmanager, func()) {
352319
ams := []*mockAlertmanager{}
353320
remainingFailure := atomic.NewInt32(int32(numAM - numHappyAM))

pkg/alertmanager/multitenant.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1011,7 +1011,7 @@ func (am *MultitenantAlertmanager) ServeHTTP(w http.ResponseWriter, req *http.Re
10111011
return
10121012
}
10131013

1014-
if am.cfg.ShardingEnabled && am.distributor.IsPathSupported(req.URL.Path) {
1014+
if am.cfg.ShardingEnabled {
10151015
am.distributor.DistributeRequest(w, req, am.allowedTenants)
10161016
return
10171017
}

pkg/alertmanager/multitenant_test.go

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,6 +1102,9 @@ func TestMultitenantAlertmanager_InitialSyncWithSharding(t *testing.T) {
11021102
}
11031103

11041104
func TestMultitenantAlertmanager_PerTenantSharding(t *testing.T) {
1105+
externalURL := flagext.URLValue{}
1106+
err := externalURL.Set("http://localhost:8080/alertmanager")
1107+
require.NoError(t, err)
11051108
tc := []struct {
11061109
name string
11071110
tenantShardSize int
@@ -1112,6 +1115,7 @@ func TestMultitenantAlertmanager_PerTenantSharding(t *testing.T) {
11121115
withSharding bool
11131116
enabledTenants []string
11141117
disabledTenants []string
1118+
statusCode func(user string) int
11151119
}{
11161120
{
11171121
name: "sharding disabled, 1 instance",
@@ -1125,13 +1129,25 @@ func TestMultitenantAlertmanager_PerTenantSharding(t *testing.T) {
11251129
configs: 10,
11261130
expectedTenants: 1,
11271131
enabledTenants: []string{"u-1"},
1132+
statusCode: func(user string) int {
1133+
if user == "u-1" {
1134+
return http.StatusOK
1135+
}
1136+
return http.StatusUnauthorized
1137+
},
11281138
},
11291139
{
11301140
name: "sharding disabled, 1 instance, single user disabled",
11311141
instances: 1,
11321142
configs: 10,
11331143
expectedTenants: 9,
11341144
disabledTenants: []string{"u-2"},
1145+
statusCode: func(user string) int {
1146+
if user == "u-2" {
1147+
return http.StatusUnauthorized
1148+
}
1149+
return http.StatusOK
1150+
},
11351151
},
11361152
{
11371153
name: "sharding disabled, 2 instances",
@@ -1155,6 +1171,12 @@ func TestMultitenantAlertmanager_PerTenantSharding(t *testing.T) {
11551171
configs: 10,
11561172
expectedTenants: 1,
11571173
enabledTenants: []string{"u-3"},
1174+
statusCode: func(user string) int {
1175+
if user == "u-3" {
1176+
return http.StatusOK
1177+
}
1178+
return http.StatusUnauthorized
1179+
},
11581180
},
11591181
{
11601182
name: "sharding enabled, 1 instance, enabled tenants, single user disabled",
@@ -1164,6 +1186,12 @@ func TestMultitenantAlertmanager_PerTenantSharding(t *testing.T) {
11641186
configs: 10,
11651187
expectedTenants: 9,
11661188
disabledTenants: []string{"u-4"},
1189+
statusCode: func(user string) int {
1190+
if user == "u-4" {
1191+
return http.StatusUnauthorized
1192+
}
1193+
return http.StatusOK
1194+
},
11671195
},
11681196
{
11691197
name: "sharding enabled, 2 instances, RF = 1",
@@ -1197,6 +1225,15 @@ func TestMultitenantAlertmanager_PerTenantSharding(t *testing.T) {
11971225
configs: 10,
11981226
expectedTenants: 24, // (configs - disabled-tenants) * replication factor
11991227
disabledTenants: []string{"u-1", "u-2"},
1228+
statusCode: func(user string) int {
1229+
switch user {
1230+
case "u-1":
1231+
return http.StatusUnauthorized
1232+
case "u-2":
1233+
return http.StatusUnauthorized
1234+
}
1235+
return http.StatusOK
1236+
},
12001237
},
12011238
}
12021239

@@ -1221,13 +1258,15 @@ func TestMultitenantAlertmanager_PerTenantSharding(t *testing.T) {
12211258
Templates: []*alertspb.TemplateDesc{},
12221259
}))
12231260
}
1261+
clientPool := newPassthroughAlertmanagerClientPool()
12241262

12251263
// Then, create the alertmanager instances, start them and add their registries to the slice.
12261264
for i := 1; i <= tt.instances; i++ {
12271265
instanceIDs = append(instanceIDs, fmt.Sprintf("alertmanager-%d", i))
12281266
instanceID := fmt.Sprintf("alertmanager-%d", i)
12291267

12301268
amConfig := mockAlertmanagerConfig(t)
1269+
amConfig.ExternalURL = externalURL
12311270
amConfig.ShardingRing.ReplicationFactor = tt.replicationFactor
12321271
amConfig.ShardingRing.InstanceID = instanceID
12331272
amConfig.ShardingRing.InstanceAddr = fmt.Sprintf("127.0.0.%d", i)
@@ -1252,6 +1291,11 @@ func TestMultitenantAlertmanager_PerTenantSharding(t *testing.T) {
12521291
instances = append(instances, am)
12531292
instanceIDs = append(instanceIDs, instanceID)
12541293
registries.AddUserRegistry(instanceID, reg)
1294+
1295+
if tt.withSharding {
1296+
clientPool.setServer(amConfig.ShardingRing.InstanceAddr+":0", am)
1297+
am.distributor.alertmanagerClientsPool = clientPool
1298+
}
12551299
}
12561300

12571301
// If we're testing sharding, we need make sure the ring is settled.
@@ -1281,6 +1325,23 @@ func TestMultitenantAlertmanager_PerTenantSharding(t *testing.T) {
12811325
assert.Equal(t, tt.expectedTenants, numInstances)
12821326
assert.Equal(t, float64(tt.expectedTenants), metrics.GetSumOfGauges("cortex_alertmanager_tenants_owned"))
12831327
assert.Equal(t, float64(tt.configs*tt.instances), metrics.GetSumOfGauges("cortex_alertmanager_tenants_discovered"))
1328+
1329+
statusCode := tt.statusCode
1330+
if statusCode == nil {
1331+
statusCode = func(user string) int {
1332+
return http.StatusOK
1333+
}
1334+
}
1335+
for ami, am := range instances {
1336+
for i := 1; i <= tt.configs; i++ {
1337+
u := fmt.Sprintf("u-%d", i)
1338+
req := httptest.NewRequest("GET", "http://localhost:8080/alertmanager/", nil)
1339+
w := httptest.NewRecorder()
1340+
am.ServeHTTP(w, req.WithContext(user.InjectOrgID(req.Context(), u)))
1341+
resp := w.Result()
1342+
require.Equal(t, statusCode(u), resp.StatusCode, ami)
1343+
}
1344+
}
12841345
})
12851346
}
12861347
}
@@ -2109,8 +2170,8 @@ func (am *passthroughAlertmanagerClient) ReadState(ctx context.Context, in *aler
21092170
return am.server.ReadState(ctx, in)
21102171
}
21112172

2112-
func (am *passthroughAlertmanagerClient) HandleRequest(context.Context, *httpgrpc.HTTPRequest, ...grpc.CallOption) (*httpgrpc.HTTPResponse, error) {
2113-
return nil, fmt.Errorf("unexpected call to HandleRequest")
2173+
func (am *passthroughAlertmanagerClient) HandleRequest(ctx context.Context, in *httpgrpc.HTTPRequest, opts ...grpc.CallOption) (*httpgrpc.HTTPResponse, error) {
2174+
return am.server.HandleRequest(ctx, in)
21142175
}
21152176

21162177
func (am *passthroughAlertmanagerClient) RemoteAddress() string {

0 commit comments

Comments
 (0)