From 01fd2c4a4a9a1f8391fafa2889a610dc5b745715 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Fri, 9 Dec 2022 10:12:04 -0800 Subject: [PATCH 1/4] add counter metrics for total requests going to queue Signed-off-by: Ben Ye --- pkg/frontend/v1/frontend.go | 3 ++- pkg/frontend/v1/frontend_test.go | 1 + pkg/scheduler/queue/queue.go | 11 +++++++++-- pkg/scheduler/queue/queue_test.go | 3 +++ pkg/scheduler/scheduler.go | 4 +++- 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pkg/frontend/v1/frontend.go b/pkg/frontend/v1/frontend.go index 541e17dc4b4..25cdea24ba5 100644 --- a/pkg/frontend/v1/frontend.go +++ b/pkg/frontend/v1/frontend.go @@ -77,6 +77,7 @@ type Frontend struct { // Metrics. queueLength *prometheus.GaugeVec discardedRequests *prometheus.CounterVec + totalRequests *prometheus.CounterVec numClients prometheus.GaugeFunc queueDuration prometheus.Histogram } @@ -112,7 +113,7 @@ func New(cfg Config, limits Limits, log log.Logger, registerer prometheus.Regist }), } - f.requestQueue = queue.NewRequestQueue(cfg.MaxOutstandingPerTenant, cfg.QuerierForgetDelay, f.queueLength, f.discardedRequests, f.limits) + f.requestQueue = queue.NewRequestQueue(cfg.MaxOutstandingPerTenant, cfg.QuerierForgetDelay, f.queueLength, f.discardedRequests, f.limits, registerer) f.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(f.cleanupInactiveUserMetrics) var err error diff --git a/pkg/frontend/v1/frontend_test.go b/pkg/frontend/v1/frontend_test.go index 3c512aef800..cb0457b0bd9 100644 --- a/pkg/frontend/v1/frontend_test.go +++ b/pkg/frontend/v1/frontend_test.go @@ -133,6 +133,7 @@ func TestFrontendCheckReady(t *testing.T) { prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}), prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}), limits, + nil, ), } for i := 0; i < tt.connectedClients; i++ { diff --git a/pkg/scheduler/queue/queue.go b/pkg/scheduler/queue/queue.go index bdaad09ee6d..8aa79548ea7 100644 --- a/pkg/scheduler/queue/queue.go +++ b/pkg/scheduler/queue/queue.go @@ -2,6 +2,7 @@ package queue import ( "context" + "github.com/prometheus/client_golang/prometheus/promauto" "sync" "time" @@ -58,15 +59,20 @@ type RequestQueue struct { stopped bool queueLength *prometheus.GaugeVec // Per user and reason. + totalRequests *prometheus.CounterVec // Per user. discardedRequests *prometheus.CounterVec // Per user. } -func NewRequestQueue(maxOutstandingPerTenant int, forgetDelay time.Duration, queueLength *prometheus.GaugeVec, discardedRequests *prometheus.CounterVec, limits Limits) *RequestQueue { +func NewRequestQueue(maxOutstandingPerTenant int, forgetDelay time.Duration, queueLength *prometheus.GaugeVec, discardedRequests *prometheus.CounterVec, limits Limits, registerer prometheus.Registerer) *RequestQueue { q := &RequestQueue{ queues: newUserQueues(maxOutstandingPerTenant, forgetDelay, limits), connectedQuerierWorkers: atomic.NewInt32(0), queueLength: queueLength, - discardedRequests: discardedRequests, + totalRequests: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_request_queue_requests_total", + Help: "Total number of query requests going to the request queue.", + }, []string{"user"}), + discardedRequests: discardedRequests, } q.cond = sync.NewCond(&q.mtx) @@ -94,6 +100,7 @@ func (q *RequestQueue) EnqueueRequest(userID string, req Request, maxQueriers in return errors.New("no queue found") } + q.totalRequests.WithLabelValues(userID).Inc() select { case queue <- req: q.queueLength.WithLabelValues(userID).Inc() diff --git a/pkg/scheduler/queue/queue_test.go b/pkg/scheduler/queue/queue_test.go index ef02a492f3b..8e8f0a94d08 100644 --- a/pkg/scheduler/queue/queue_test.go +++ b/pkg/scheduler/queue/queue_test.go @@ -27,6 +27,7 @@ func BenchmarkGetNextRequest(b *testing.B) { prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}), prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}), MockLimits{MaxOutstanding: 100}, + nil, ) queues = append(queues, queue) @@ -85,6 +86,7 @@ func BenchmarkQueueRequest(b *testing.B) { prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}), prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}), MockLimits{MaxOutstanding: 100}, + nil, ) for ix := 0; ix < queriers; ix++ { @@ -119,6 +121,7 @@ func TestRequestQueue_GetNextRequestForQuerier_ShouldGetRequestAfterReshardingBe prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}), prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}), MockLimits{MaxOutstanding: 100}, + nil, ) // Start the queue service. diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 115de295c31..641ac169151 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -59,6 +59,7 @@ type Scheduler struct { // Metrics. queueLength *prometheus.GaugeVec + totalRequests *prometheus.CounterVec discardedRequests *prometheus.CounterVec connectedQuerierClients prometheus.GaugeFunc connectedFrontendClients prometheus.GaugeFunc @@ -111,7 +112,8 @@ func NewScheduler(cfg Config, limits Limits, log log.Logger, registerer promethe Name: "cortex_query_scheduler_discarded_requests_total", Help: "Total number of query requests discarded.", }, []string{"user"}) - s.requestQueue = queue.NewRequestQueue(cfg.MaxOutstandingPerTenant, cfg.QuerierForgetDelay, s.queueLength, s.discardedRequests, s.limits) + + s.requestQueue = queue.NewRequestQueue(cfg.MaxOutstandingPerTenant, cfg.QuerierForgetDelay, s.queueLength, s.discardedRequests, s.limits, registerer) s.queueDuration = promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ Name: "cortex_query_scheduler_queue_duration_seconds", From dd6b6dc1c88dc56a3dd1dfd1df12a88cee620d4e Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Fri, 9 Dec 2022 10:22:36 -0800 Subject: [PATCH 2/4] update changelog Signed-off-by: Ben Ye --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f254c731af..3a8aff1188a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ * [FEATURE] Ingester: Added `-blocks-storage.tsdb.head-chunks-write-queue-size` allowing to configure the size of the in-memory queue used before flushing chunks to the disk . #5000 * [FEATURE] Query Frontend: Log query params in query frontend even if error happens. #5005 * [FEATURE] Ingester: Enable snapshotting of In-memory TSDB on disk during shutdown via `-blocks-storage.tsdb.memory-snapshot-on-shutdown`. #5011 +* [FEATURE] Query Frontend/Scheduler: Add a new counter metric `cortex_request_queue_requests_total` for total requests going to queue. #5030 * [BUGFIX] Updated `golang.org/x/net` dependency to fix CVE-2022-27664. #5008 ## 1.14.0 2022-12-02 From 0ae7483f48fb81d0e3d0587447d8afebc84b10c7 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Fri, 9 Dec 2022 10:57:30 -0800 Subject: [PATCH 3/4] fix lint Signed-off-by: Ben Ye --- pkg/frontend/v1/frontend.go | 1 - pkg/scheduler/scheduler.go | 1 - 2 files changed, 2 deletions(-) diff --git a/pkg/frontend/v1/frontend.go b/pkg/frontend/v1/frontend.go index 25cdea24ba5..4fc928f0523 100644 --- a/pkg/frontend/v1/frontend.go +++ b/pkg/frontend/v1/frontend.go @@ -77,7 +77,6 @@ type Frontend struct { // Metrics. queueLength *prometheus.GaugeVec discardedRequests *prometheus.CounterVec - totalRequests *prometheus.CounterVec numClients prometheus.GaugeFunc queueDuration prometheus.Histogram } diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 641ac169151..f3f7c33e939 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -59,7 +59,6 @@ type Scheduler struct { // Metrics. queueLength *prometheus.GaugeVec - totalRequests *prometheus.CounterVec discardedRequests *prometheus.CounterVec connectedQuerierClients prometheus.GaugeFunc connectedFrontendClients prometheus.GaugeFunc From ba6de156113c7cfecf6e12e1e3c96b43de9f2faf Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Fri, 9 Dec 2022 12:04:34 -0800 Subject: [PATCH 4/4] lint Signed-off-by: Ben Ye --- pkg/scheduler/queue/queue.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/scheduler/queue/queue.go b/pkg/scheduler/queue/queue.go index 8aa79548ea7..21fe69b25ce 100644 --- a/pkg/scheduler/queue/queue.go +++ b/pkg/scheduler/queue/queue.go @@ -2,12 +2,12 @@ package queue import ( "context" - "github.com/prometheus/client_golang/prometheus/promauto" "sync" "time" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "go.uber.org/atomic" "github.com/cortexproject/cortex/pkg/util/services"