Skip to content

Commit 6bca2d5

Browse files
authored
Refactor query frontend to return prometheus error response (#5811)
* refactor query frontend to return prometheus error response Signed-off-by: Ben Ye <[email protected]> * stash Signed-off-by: Ben Ye <[email protected]> * add test Signed-off-by: Ben Ye <[email protected]> * fix lint Signed-off-by: Ben Ye <[email protected]> --------- Signed-off-by: Ben Ye <[email protected]>
1 parent e39eace commit 6bca2d5

File tree

16 files changed

+505
-130
lines changed

16 files changed

+505
-130
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
* [ENHANCEMENT] Ruler: Improve GetRules response time by refactoring mutexes and introducing a temporary rules cache in `ruler/manager.go`. #5805
4040
* [ENHANCEMENT] Querier: Add context error check when merging slices from ingesters for GetLabel operations. #5837
4141
* [ENHANCEMENT] Ring: Add experimental `-ingester.tokens-generator-strategy=minimize-spread` flag to enable the new minimize spread token generator strategy. #5855
42+
* [ENHANCEMENT] Query Frontend: Ensure error response returned by Query Frontend follows Prometheus API error response format. #5811
4243
* [BUGFIX] Distributor: Do not use label with empty values for sharding #5717
4344
* [BUGFIX] Query Frontend: queries with negative offset should check whether it is cacheable or not. #5719
4445
* [BUGFIX] Redis Cache: pass `cache_size` config correctly. #5734

pkg/frontend/transport/handler.go

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ import (
1919
"github.com/prometheus/client_golang/prometheus"
2020
"github.com/prometheus/client_golang/prometheus/promauto"
2121
"github.com/weaveworks/common/httpgrpc"
22-
"github.com/weaveworks/common/httpgrpc/server"
2322
"google.golang.org/grpc/status"
2423

2524
querier_stats "github.com/cortexproject/cortex/pkg/querier/stats"
2625
"github.com/cortexproject/cortex/pkg/querier/tripperware"
2726
"github.com/cortexproject/cortex/pkg/tenant"
2827
"github.com/cortexproject/cortex/pkg/util"
28+
util_api "github.com/cortexproject/cortex/pkg/util/api"
2929
util_log "github.com/cortexproject/cortex/pkg/util/log"
3030
)
3131

@@ -239,8 +239,9 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
239239
writeServiceTimingHeader(queryResponseTime, hs, stats)
240240
}
241241

242+
logger := util_log.WithContext(r.Context(), f.log)
242243
if err != nil {
243-
writeError(w, err, hs)
244+
writeError(logger, w, err, hs)
244245
return
245246
}
246247

@@ -252,7 +253,7 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
252253
// log copy response body error so that we will know even though success response code returned
253254
bytesCopied, err := io.Copy(w, resp.Body)
254255
if err != nil && !errors.Is(err, syscall.EPIPE) {
255-
level.Error(util_log.WithContext(r.Context(), f.log)).Log("msg", "write response body error", "bytesCopied", bytesCopied, "err", err)
256+
level.Error(logger).Log("msg", "write response body error", "bytesCopied", bytesCopied, "err", err)
256257
}
257258
}
258259

@@ -441,7 +442,7 @@ func formatQueryString(queryString url.Values) (fields []interface{}) {
441442
return fields
442443
}
443444

444-
func writeError(w http.ResponseWriter, err error, additionalHeaders http.Header) {
445+
func writeError(logger log.Logger, w http.ResponseWriter, err error, additionalHeaders http.Header) {
445446
switch err {
446447
case context.Canceled:
447448
err = errCanceled
@@ -453,21 +454,13 @@ func writeError(w http.ResponseWriter, err error, additionalHeaders http.Header)
453454
}
454455
}
455456

456-
resp, ok := httpgrpc.HTTPResponseFromError(err)
457-
if ok {
458-
for k, values := range additionalHeaders {
459-
resp.Headers = append(resp.Headers, &httpgrpc.Header{Key: k, Values: values})
460-
}
461-
_ = server.WriteResponse(w, resp)
462-
} else {
463-
headers := w.Header()
464-
for k, values := range additionalHeaders {
465-
for _, value := range values {
466-
headers.Set(k, value)
467-
}
457+
headers := w.Header()
458+
for k, values := range additionalHeaders {
459+
for _, value := range values {
460+
headers.Set(k, value)
468461
}
469-
http.Error(w, err.Error(), http.StatusInternalServerError)
470462
}
463+
util_api.RespondFromGRPCError(logger, w, err)
471464
}
472465

473466
func writeServiceTimingHeader(queryResponseTime time.Duration, headers http.Header, stats *querier_stats.QueryStats) {
@@ -488,7 +481,7 @@ func statsValue(name string, d time.Duration) string {
488481
func getStatusCodeFromError(err error) int {
489482
switch err {
490483
case context.Canceled:
491-
return StatusClientClosedRequest
484+
return util_api.StatusClientClosedRequest
492485
case context.DeadlineExceeded:
493486
return http.StatusGatewayTimeout
494487
default:

pkg/frontend/transport/handler_test.go

Lines changed: 118 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package transport
33
import (
44
"bytes"
55
"context"
6+
"encoding/json"
67
"io"
78
"net/http"
89
"net/http/httptest"
@@ -13,14 +14,18 @@ import (
1314

1415
"github.com/go-kit/log"
1516
"github.com/pkg/errors"
17+
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
1618
"github.com/prometheus/client_golang/prometheus"
1719
promtest "github.com/prometheus/client_golang/prometheus/testutil"
1820
"github.com/stretchr/testify/assert"
1921
"github.com/stretchr/testify/require"
2022
"github.com/weaveworks/common/httpgrpc"
2123
"github.com/weaveworks/common/user"
24+
"google.golang.org/grpc/codes"
2225

2326
querier_stats "github.com/cortexproject/cortex/pkg/querier/stats"
27+
util_api "github.com/cortexproject/cortex/pkg/util/api"
28+
util_log "github.com/cortexproject/cortex/pkg/util/log"
2429
)
2530

2631
type roundTripperFunc func(*http.Request) (*http.Response, error)
@@ -34,19 +39,111 @@ func TestWriteError(t *testing.T) {
3439
status int
3540
err error
3641
additionalHeaders http.Header
42+
expectedErrResp util_api.Response
3743
}{
38-
{http.StatusInternalServerError, errors.New("unknown"), http.Header{"User-Agent": []string{"Golang"}}},
39-
{http.StatusInternalServerError, errors.New("unknown"), nil},
40-
{http.StatusGatewayTimeout, context.DeadlineExceeded, nil},
41-
{StatusClientClosedRequest, context.Canceled, nil},
42-
{StatusClientClosedRequest, context.Canceled, http.Header{"User-Agent": []string{"Golang"}}},
43-
{StatusClientClosedRequest, context.Canceled, http.Header{"User-Agent": []string{"Golang"}, "Content-Type": []string{"application/json"}}},
44-
{http.StatusBadRequest, httpgrpc.Errorf(http.StatusBadRequest, ""), http.Header{}},
45-
{http.StatusRequestEntityTooLarge, errors.New("http: request body too large"), http.Header{}},
44+
{
45+
http.StatusInternalServerError,
46+
errors.New("unknown"),
47+
http.Header{"User-Agent": []string{"Golang"}},
48+
util_api.Response{
49+
Status: "error",
50+
ErrorType: v1.ErrServer,
51+
Error: "unknown",
52+
},
53+
},
54+
{
55+
http.StatusInternalServerError,
56+
errors.New("unknown"),
57+
nil,
58+
util_api.Response{
59+
Status: "error",
60+
ErrorType: v1.ErrServer,
61+
Error: "unknown",
62+
},
63+
},
64+
{
65+
http.StatusGatewayTimeout,
66+
context.DeadlineExceeded,
67+
nil,
68+
util_api.Response{
69+
Status: "error",
70+
ErrorType: v1.ErrTimeout,
71+
Error: "",
72+
},
73+
},
74+
{
75+
StatusClientClosedRequest,
76+
context.Canceled,
77+
nil,
78+
util_api.Response{
79+
Status: "error",
80+
ErrorType: v1.ErrCanceled,
81+
Error: "",
82+
},
83+
},
84+
{
85+
StatusClientClosedRequest,
86+
context.Canceled,
87+
http.Header{"User-Agent": []string{"Golang"}},
88+
util_api.Response{
89+
Status: "error",
90+
ErrorType: v1.ErrCanceled,
91+
Error: "",
92+
},
93+
},
94+
{
95+
StatusClientClosedRequest,
96+
context.Canceled,
97+
http.Header{"User-Agent": []string{"Golang"}, "Content-Type": []string{"application/json"}},
98+
util_api.Response{
99+
Status: "error",
100+
ErrorType: v1.ErrCanceled,
101+
Error: "",
102+
},
103+
},
104+
{http.StatusBadRequest,
105+
httpgrpc.Errorf(http.StatusBadRequest, ""),
106+
http.Header{},
107+
util_api.Response{
108+
Status: "error",
109+
ErrorType: v1.ErrBadData,
110+
Error: "",
111+
},
112+
},
113+
{
114+
http.StatusRequestEntityTooLarge,
115+
errors.New("http: request body too large"),
116+
http.Header{},
117+
util_api.Response{
118+
Status: "error",
119+
ErrorType: v1.ErrBadData,
120+
Error: "http: request body too large",
121+
},
122+
},
123+
{
124+
http.StatusUnprocessableEntity,
125+
httpgrpc.Errorf(http.StatusUnprocessableEntity, "limit hit"),
126+
http.Header{},
127+
util_api.Response{
128+
Status: "error",
129+
ErrorType: v1.ErrExec,
130+
Error: "limit hit",
131+
},
132+
},
133+
{
134+
http.StatusUnprocessableEntity,
135+
httpgrpc.Errorf(int(codes.PermissionDenied), "permission denied"),
136+
http.Header{},
137+
util_api.Response{
138+
Status: "error",
139+
ErrorType: v1.ErrBadData,
140+
Error: "permission denied",
141+
},
142+
},
46143
} {
47144
t.Run(test.err.Error(), func(t *testing.T) {
48145
w := httptest.NewRecorder()
49-
writeError(w, test.err, test.additionalHeaders)
146+
writeError(util_log.Logger, w, test.err, test.additionalHeaders)
50147
require.Equal(t, test.status, w.Result().StatusCode)
51148
expectedAdditionalHeaders := test.additionalHeaders
52149
if expectedAdditionalHeaders != nil {
@@ -56,6 +153,18 @@ func TestWriteError(t *testing.T) {
56153
}
57154
}
58155
}
156+
data, err := io.ReadAll(w.Result().Body)
157+
require.NoError(t, err)
158+
var res util_api.Response
159+
err = json.Unmarshal(data, &res)
160+
require.NoError(t, err)
161+
resp, ok := httpgrpc.HTTPResponseFromError(test.err)
162+
if ok {
163+
require.Equal(t, string(resp.Body), res.Error)
164+
} else {
165+
require.Equal(t, test.err.Error(), res.Error)
166+
167+
}
59168
})
60169
}
61170
}

pkg/querier/tripperware/instantquery/limits.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ func (l limitsMiddleware) Do(ctx context.Context, r tripperware.Request) (trippe
4848
if maxQueryLength := validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, l.MaxQueryLength); maxQueryLength > 0 {
4949
expr, err := parser.ParseExpr(r.GetQuery())
5050
if err != nil {
51-
// Let Querier propagates the parsing error.
52-
return l.next.Do(ctx, r)
51+
return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())
5352
}
5453

5554
// Enforce query length across all selectors in the query.

pkg/querier/tripperware/instantquery/limits_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@ package instantquery
22

33
import (
44
"context"
5+
"net/http"
56
"testing"
67
"time"
78

9+
"github.com/prometheus/prometheus/promql/parser"
810
"github.com/stretchr/testify/assert"
911
"github.com/stretchr/testify/mock"
1012
"github.com/stretchr/testify/require"
13+
"github.com/weaveworks/common/httpgrpc"
1114
"github.com/weaveworks/common/user"
1215

1316
"github.com/cortexproject/cortex/pkg/querier/tripperware"
@@ -20,6 +23,9 @@ func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {
2023
thirtyDays = 30 * 24 * time.Hour
2124
)
2225

26+
wrongQuery := `up[`
27+
_, parserErr := parser.ParseExpr(wrongQuery)
28+
2329
tests := map[string]struct {
2430
maxQueryLength time.Duration
2531
query string
@@ -31,6 +37,7 @@ func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {
3137
"even though failed to parse expression, should return no error since request will pass to next middleware": {
3238
query: `up[`,
3339
maxQueryLength: thirtyDays,
40+
expectedErr: httpgrpc.Errorf(http.StatusBadRequest, parserErr.Error()).Error(),
3441
},
3542
"should succeed on a query not exceeding time range": {
3643
query: `up`,

pkg/querier/tripperware/queryrange/limits.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,7 @@ func (l limitsMiddleware) Do(ctx context.Context, r tripperware.Request) (trippe
8484

8585
expr, err := parser.ParseExpr(r.GetQuery())
8686
if err != nil {
87-
// Let Querier propagates the parsing error.
88-
return l.next.Do(ctx, r)
87+
return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())
8988
}
9089

9190
// Enforce query length across all selectors in the query.

pkg/querier/tripperware/queryrange/limits_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@ package queryrange
22

33
import (
44
"context"
5+
"net/http"
56
"testing"
67
"time"
78

9+
"github.com/prometheus/prometheus/promql/parser"
810
"github.com/stretchr/testify/assert"
911
"github.com/stretchr/testify/mock"
1012
"github.com/stretchr/testify/require"
13+
"github.com/weaveworks/common/httpgrpc"
1114
"github.com/weaveworks/common/user"
1215

1316
"github.com/cortexproject/cortex/pkg/querier/tripperware"
@@ -115,6 +118,9 @@ func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {
115118

116119
now := time.Now()
117120

121+
wrongQuery := `up[`
122+
_, parserErr := parser.ParseExpr(wrongQuery)
123+
118124
tests := map[string]struct {
119125
maxQueryLength time.Duration
120126
query string
@@ -132,6 +138,7 @@ func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {
132138
reqStartTime: now.Add(-time.Hour),
133139
reqEndTime: now,
134140
maxQueryLength: thirtyDays,
141+
expectedErr: httpgrpc.Errorf(http.StatusBadRequest, parserErr.Error()).Error(),
135142
},
136143
"should succeed on a query on short time range, ending now": {
137144
maxQueryLength: thirtyDays,

pkg/querier/tripperware/queryrange/split_by_interval.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,7 @@ func (s splitByInterval) Do(ctx context.Context, r tripperware.Request) (tripper
4747
// to line up the boundaries with step.
4848
reqs, err := splitQuery(r, s.interval(r))
4949
if err != nil {
50-
// If the query itself is bad, we don't return error but send the query
51-
// to querier to return the expected error message. This is not very efficient
52-
// but should be okay for now.
53-
// TODO(yeya24): query frontend can reuse the Prometheus API handler and return
54-
// expected error message locally without passing it to the querier through network.
55-
return s.next.Do(ctx, r)
50+
return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())
5651
}
5752
s.splitByCounter.Add(float64(len(reqs)))
5853

pkg/querier/tripperware/roundtrip.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ func NewQueryTripperware(
142142
tenantIDs, err := tenant.TenantIDs(r.Context())
143143
// This should never happen anyways because we have auth middleware before this.
144144
if err != nil {
145-
return nil, err
145+
return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())
146146
}
147147
now := time.Now()
148148
userStr := tenant.JoinTenantIDs(tenantIDs)
@@ -161,8 +161,7 @@ func NewQueryTripperware(
161161

162162
expr, err := parser.ParseExpr(query)
163163
if err != nil {
164-
// If query is invalid, no need to go through tripperwares for further splitting.
165-
return next.RoundTrip(r)
164+
return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())
166165
}
167166

168167
reqStats := stats.FromContext(r.Context())

pkg/querier/tripperware/shard_by.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ func (s shardBy) Do(ctx context.Context, r Request) (Response, error) {
5555
analysis, err := s.analyzer.Analyze(r.GetQuery())
5656
if err != nil {
5757
level.Warn(logger).Log("msg", "error analyzing query", "q", r.GetQuery(), "err", err)
58+
return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())
5859
}
5960

6061
stats.AddExtraFields(
@@ -63,7 +64,7 @@ func (s shardBy) Do(ctx context.Context, r Request) (Response, error) {
6364
"shard_by.sharding_labels", analysis.ShardingLabels(),
6465
)
6566

66-
if err != nil || !analysis.IsShardable() {
67+
if !analysis.IsShardable() {
6768
return s.next.Do(ctx, r)
6869
}
6970

0 commit comments

Comments
 (0)