diff --git a/src/app.rs b/src/app.rs index 263c00a1ee3..e164a373386 100644 --- a/src/app.rs +++ b/src/app.rs @@ -128,9 +128,6 @@ impl App { instance_metrics .database_time_to_obtain_connection .with_label_values(&["primary"]), - instance_metrics - .database_used_conns_histogram - .with_label_values(&["primary"]), ) .unwrap() }; @@ -158,9 +155,6 @@ impl App { instance_metrics .database_time_to_obtain_connection .with_label_values(&["follower"]), - instance_metrics - .database_used_conns_histogram - .with_label_values(&["follower"]), ) .unwrap(), ) diff --git a/src/db.rs b/src/db.rs index 25c961f8b8d..fb9418fb96f 100644 --- a/src/db.rs +++ b/src/db.rs @@ -13,7 +13,6 @@ use crate::middleware::app::RequestApp; pub enum DieselPool { Pool { pool: r2d2::Pool>, - used_conns_metric: Histogram, time_to_obtain_connection_metric: Histogram, }, Test(Arc>), @@ -23,7 +22,6 @@ impl DieselPool { pub(crate) fn new( url: &str, config: r2d2::Builder>, - used_conns_metric: Histogram, time_to_obtain_connection_metric: Histogram, ) -> Result { let manager = ConnectionManager::new(connection_url(url)); @@ -41,7 +39,6 @@ impl DieselPool { // automatically be marked as unhealthy and the rest of the application will adapt. let pool = DieselPool::Pool { pool: config.build_unchecked(manager), - used_conns_metric, time_to_obtain_connection_metric, }; match pool.wait_until_healthy(Duration::from_secs(5)) { @@ -65,13 +62,8 @@ impl DieselPool { match self { DieselPool::Pool { pool, - used_conns_metric, time_to_obtain_connection_metric, } => time_to_obtain_connection_metric.observe_closure_duration(|| { - // Record the number of used connections before obtaining the current one. - let state = pool.state(); - used_conns_metric.observe((state.connections - state.idle_connections) as f64); - if let Some(conn) = pool.try_get() { Ok(DieselPooledConn::Pool(conn)) } else if !self.is_healthy() { diff --git a/src/metrics/histogram.rs b/src/metrics/histogram.rs deleted file mode 100644 index 8c692aad146..00000000000 --- a/src/metrics/histogram.rs +++ /dev/null @@ -1,103 +0,0 @@ -use crate::metrics::macros::MetricFromOpts; -use std::marker::PhantomData; -use std::ops::{Deref, DerefMut}; - -/// Prometheus's histograms work by dividing datapoints in buckets, with each bucket containing the -/// count of datapoints equal or greater to the bucket value. -/// -/// Histogram buckets are not an exact science, so feel free to tweak the buckets or create new -/// ones if you see that the histograms are not really accurate. Just avoid adding too many buckets -/// for a single type as that increases the number of exported metric series. -pub trait HistogramBuckets { - const BUCKETS: &'static [f64]; -} - -/// Buckets geared towards measuring timings, such as the response time of our requests, going from -/// 0.5ms to 100ms with a higher resolution and from 100ms to 5 seconds with a slightly lower -/// resolution. This allows us to properly measure download requests (which take around 1ms) and -/// other requests (our 95h is around 10-20ms). -pub struct TimingBuckets; - -impl HistogramBuckets for TimingBuckets { - const BUCKETS: &'static [f64] = &[ - 0.0005, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.1, 0.5, 1.0, 5.0, - ]; -} - -/// Buckets geared towards measuring how many database connections are used in the database pool. -pub struct DatabasePoolBuckets; - -impl HistogramBuckets for DatabasePoolBuckets { - const BUCKETS: &'static [f64] = &[0.0, 1.0, 2.0, 5.0, 10.0, 15.0, 20.0, 30.0, 50.0, 100.0]; -} - -/// Wrapper type over [`prometheus::Histogram`] to support defining buckets. -pub struct Histogram { - inner: prometheus::Histogram, - _phantom: PhantomData, -} - -impl MetricFromOpts for Histogram { - fn from_opts(opts: prometheus::Opts) -> Result { - Ok(Histogram { - inner: prometheus::Histogram::with_opts(prometheus::HistogramOpts { - common_opts: opts, - buckets: B::BUCKETS.to_vec(), - })?, - _phantom: PhantomData, - }) - } -} - -impl Deref for Histogram { - type Target = prometheus::Histogram; - - fn deref(&self) -> &Self::Target { - &self.inner - } -} - -impl DerefMut for Histogram { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.inner - } -} - -/// Wrapper type over [`prometheus::HistogramVec`] to support defining buckets. -pub struct HistogramVec { - inner: prometheus::HistogramVec, - _phantom: PhantomData, -} - -impl MetricFromOpts for HistogramVec { - fn from_opts(opts: prometheus::Opts) -> Result { - Ok(HistogramVec { - inner: prometheus::HistogramVec::new( - prometheus::HistogramOpts { - common_opts: opts.clone(), - buckets: B::BUCKETS.to_vec(), - }, - opts.variable_labels - .iter() - .map(|s| s.as_str()) - .collect::>() - .as_slice(), - )?, - _phantom: PhantomData, - }) - } -} - -impl Deref for HistogramVec { - type Target = prometheus::HistogramVec; - - fn deref(&self) -> &Self::Target { - &self.inner - } -} - -impl DerefMut for HistogramVec { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.inner - } -} diff --git a/src/metrics/instance.rs b/src/metrics/instance.rs index 34d992d2569..bed355dbcaf 100644 --- a/src/metrics/instance.rs +++ b/src/metrics/instance.rs @@ -17,10 +17,11 @@ //! As a rule of thumb, if the metric requires a database query to be updated it's probably a //! service-level metric, and you should add it to `src/metrics/service.rs` instead. -use crate::metrics::histogram::{DatabasePoolBuckets, Histogram, HistogramVec, TimingBuckets}; use crate::util::errors::AppResult; use crate::{app::App, db::DieselPool}; -use prometheus::{proto::MetricFamily, IntCounter, IntCounterVec, IntGauge, IntGaugeVec}; +use prometheus::{ + proto::MetricFamily, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, +}; metrics! { pub struct InstanceMetrics { @@ -29,9 +30,7 @@ metrics! { /// Number of used database connections in the pool database_used_conns: IntGaugeVec["pool"], /// Amount of time required to obtain a database connection - pub database_time_to_obtain_connection: HistogramVec["pool"], - /// Number of used database connections in the pool, as histogram - pub database_used_conns_histogram: HistogramVec["pool"], + pub database_time_to_obtain_connection: HistogramVec["pool"], /// Number of requests processed by this instance pub requests_total: IntCounter, @@ -39,7 +38,7 @@ metrics! { pub requests_in_flight: IntGauge, /// Response times of our endpoints - pub response_times: HistogramVec["endpoint"], + pub response_times: HistogramVec["endpoint"], /// Nmber of responses per status code pub responses_by_status_code_total: IntCounterVec["status"], @@ -48,7 +47,7 @@ metrics! { /// Number of download requests with a non-canonical crate name. pub downloads_non_canonical_crate_name_total: IntCounter, /// How long it takes to execute the SELECT query in the download endpoint. - pub downloads_select_query_execution_time: Histogram, + pub downloads_select_query_execution_time: Histogram, /// Number of download requests that are not counted yet. downloads_not_counted_total: IntGauge, } diff --git a/src/metrics/macros.rs b/src/metrics/macros.rs index aa41ed2c587..4c6bb830400 100644 --- a/src/metrics/macros.rs +++ b/src/metrics/macros.rs @@ -1,4 +1,19 @@ -use prometheus::Opts; +use prometheus::{Histogram, HistogramOpts, HistogramVec, Opts}; + +/// Prometheus's histograms work by dividing datapoints in buckets, with each bucket containing +/// the count of datapoints equal or greater to the bucket value. +/// +/// The buckets used by crates.io are geared towards measuring the response time of our requests, +/// going from 0.5ms to 100ms with a higher resolution and from 100ms to 5 seconds with a slightly +/// lower resolution. This allows us to properly measure download requests (which take around 1ms) +/// and other requests (our 95h is around 10-20ms). +/// +/// Histogram buckets are not an exact science, so feel free to tweak the buckets if you see that +/// the histograms are not really accurate. Just avoid adding too many buckets as that increases +/// the number of exported metric series. +const HISTOGRAM_BUCKETS: &[f64] = &[ + 0.0005, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.1, 0.5, 1.0, 5.0, +]; pub(super) trait MetricFromOpts: Sized { fn from_opts(opts: Opts) -> Result; @@ -90,4 +105,29 @@ load_metric_type!(GaugeVec as vec); load_metric_type!(IntGauge as single); load_metric_type!(IntGaugeVec as vec); -// Histograms are defined in histogram.rs +// Use a custom implementation for histograms to customize the buckets. + +impl MetricFromOpts for Histogram { + fn from_opts(opts: Opts) -> Result { + Histogram::with_opts(HistogramOpts { + common_opts: opts, + buckets: HISTOGRAM_BUCKETS.to_vec(), + }) + } +} + +impl MetricFromOpts for HistogramVec { + fn from_opts(opts: Opts) -> Result { + HistogramVec::new( + HistogramOpts { + common_opts: opts.clone(), + buckets: HISTOGRAM_BUCKETS.to_vec(), + }, + opts.variable_labels + .iter() + .map(|s| s.as_str()) + .collect::>() + .as_slice(), + ) + } +} diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 15b29b3fa62..656ab5aa35b 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -5,7 +5,6 @@ pub use self::service::ServiceMetrics; #[macro_use] mod macros; -mod histogram; mod instance; mod log_encoder; mod service;