diff --git a/Cargo.lock b/Cargo.lock index 4da524e7ce8c0..75da70c0fda54 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6090,6 +6090,7 @@ dependencies = [ "sp-keyring", "sp-runtime", "sp-state-machine", + "substrate-prometheus-endpoint", "substrate-test-runtime-client", "tempfile", "tokio 0.2.12", @@ -6174,6 +6175,7 @@ dependencies = [ "sp-keyring", "sp-runtime", "sp-test-primitives", + "substrate-prometheus-endpoint", "substrate-test-runtime", "substrate-test-runtime-client", "tempfile", diff --git a/bin/node-template/node/src/service.rs b/bin/node-template/node/src/service.rs index 7a714d5d7a371..f87aaf5ec25a8 100644 --- a/bin/node-template/node/src/service.rs +++ b/bin/node-template/node/src/service.rs @@ -157,6 +157,7 @@ pub fn new_full(config: Configuration) on_exit: service.on_exit(), telemetry_on_connect: Some(service.telemetry_on_connect_stream()), voting_rule: grandpa::VotingRulesBuilder::default().build(), + prometheus_registry: service.prometheus_registry() }; // the GRANDPA voter task is considered infallible, i.e. diff --git a/bin/node/cli/src/service.rs b/bin/node/cli/src/service.rs index 7df266a687add..f31fa0b9730b0 100644 --- a/bin/node/cli/src/service.rs +++ b/bin/node/cli/src/service.rs @@ -228,6 +228,7 @@ macro_rules! new_full { on_exit: service.on_exit(), telemetry_on_connect: Some(service.telemetry_on_connect_stream()), voting_rule: grandpa::VotingRulesBuilder::default().build(), + prometheus_registry: service.prometheus_registry(), }; // the GRANDPA voter task is considered infallible, i.e. diff --git a/client/finality-grandpa/Cargo.toml b/client/finality-grandpa/Cargo.toml index 8dcc24c3da113..a58da417c0126 100644 --- a/client/finality-grandpa/Cargo.toml +++ b/client/finality-grandpa/Cargo.toml @@ -35,6 +35,7 @@ sc-network = { version = "0.8.0-alpha.2", path = "../network" } sc-network-gossip = { version = "0.8.0-alpha.2", path = "../network-gossip" } sp-finality-tracker = { version = "2.0.0-alpha.2", path = "../../primitives/finality-tracker" } sp-finality-grandpa = { version = "2.0.0-alpha.2", path = "../../primitives/finality-grandpa" } +prometheus-endpoint = { package = "substrate-prometheus-endpoint", path = "../../utils/prometheus", version = "0.8.0-alpha.2" } sc-block-builder = { version = "0.8.0-alpha.2", path = "../block-builder" } finality-grandpa = { version = "0.11.1", features = ["derive-codec"] } pin-project = "0.4.6" diff --git a/client/finality-grandpa/src/environment.rs b/client/finality-grandpa/src/environment.rs index dca1eab993262..21a44c32dc9ea 100644 --- a/client/finality-grandpa/src/environment.rs +++ b/client/finality-grandpa/src/environment.rs @@ -58,6 +58,7 @@ use crate::justification::GrandpaJustification; use crate::until_imported::UntilVoteTargetImported; use crate::voting_rule::VotingRule; use sp_finality_grandpa::{AuthorityId, AuthoritySignature, SetId, RoundNumber}; +use prometheus_endpoint::{Gauge, U64, register, PrometheusError}; type HistoricalVotes = finality_grandpa::HistoricalVotes< ::Hash, @@ -372,6 +373,24 @@ impl SharedVoterSetState { } } +/// Prometheus metrics for GRANDPA. +#[derive(Clone)] +pub(crate) struct Metrics { + finality_grandpa_round: Gauge, +} + +impl Metrics { + pub(crate) fn register(registry: &prometheus_endpoint::Registry) -> Result { + Ok(Self { + finality_grandpa_round: register( + Gauge::new("finality_grandpa_round", "Highest completed GRANDPA round.")?, + registry + )?, + }) + } +} + + /// The environment we run GRANDPA in. pub(crate) struct Environment, SC, VR> { pub(crate) client: Arc, @@ -384,6 +403,7 @@ pub(crate) struct Environment, SC, pub(crate) set_id: SetId, pub(crate) voter_set_state: SharedVoterSetState, pub(crate) voting_rule: VR, + pub(crate) metrics: Option, pub(crate) _phantom: PhantomData, } @@ -397,6 +417,17 @@ impl, SC, VR> Environment { pub telemetry_on_connect: Option>, /// A voting rule used to potentially restrict target votes. pub voting_rule: VR, + /// The prometheus metrics registry. + pub prometheus_registry: Option, } /// Run a GRANDPA voter as a task. Provide configuration and a link to a @@ -576,6 +578,7 @@ pub fn run_grandpa_voter( on_exit, telemetry_on_connect, voting_rule, + prometheus_registry, } = grandpa_params; // NOTE: we have recently removed `run_grandpa_observer` from the public @@ -634,6 +637,7 @@ pub fn run_grandpa_voter( voting_rule, persistent_data, voter_commands_rx, + prometheus_registry, ); let voter_work = voter_work @@ -673,6 +677,7 @@ where voting_rule: VR, persistent_data: PersistentData, voter_commands_rx: mpsc::UnboundedReceiver>>, + prometheus_registry: Option, ) -> Self { let voters = persistent_data.authority_set.current_authorities(); @@ -687,6 +692,10 @@ where authority_set: persistent_data.authority_set.clone(), consensus_changes: persistent_data.consensus_changes.clone(), voter_set_state: persistent_data.set_state.clone(), + metrics: prometheus_registry.map(|registry| { + Metrics::register(®istry) + .expect("Other metrics would have failed to register before these; qed") + }), _phantom: PhantomData, }); @@ -807,6 +816,7 @@ where consensus_changes: self.env.consensus_changes.clone(), network: self.env.network.clone(), voting_rule: self.env.voting_rule.clone(), + metrics: self.env.metrics.clone(), _phantom: PhantomData, }); diff --git a/client/finality-grandpa/src/tests.rs b/client/finality-grandpa/src/tests.rs index 6e81c2f00af5d..7b5880f7ee5eb 100644 --- a/client/finality-grandpa/src/tests.rs +++ b/client/finality-grandpa/src/tests.rs @@ -447,6 +447,7 @@ fn run_to_completion_with( on_exit: Exit, telemetry_on_connect: None, voting_rule: (), + prometheus_registry: None, }; let voter = run_grandpa_voter(grandpa_params).expect("all in order with client and network"); @@ -578,6 +579,7 @@ fn finalize_3_voters_1_full_observer() { on_exit: Exit, telemetry_on_connect: None, voting_rule: (), + prometheus_registry: None, }; voters.push(run_grandpa_voter(grandpa_params).expect("all in order with client and network")); @@ -741,6 +743,7 @@ fn transition_3_voters_twice_1_full_observer() { on_exit: Exit, telemetry_on_connect: None, voting_rule: (), + prometheus_registry: None, }; let voter = run_grandpa_voter(grandpa_params).expect("all in order with client and network"); @@ -1166,6 +1169,7 @@ fn voter_persists_its_votes() { on_exit: Exit, telemetry_on_connect: None, voting_rule: VotingRulesBuilder::default().build(), + prometheus_registry: None, }; let voter = run_grandpa_voter(grandpa_params) @@ -1511,6 +1515,7 @@ fn voter_catches_up_to_latest_round_when_behind() { on_exit: Exit, telemetry_on_connect: None, voting_rule: (), + prometheus_registry: None, }; Box::pin(run_grandpa_voter(grandpa_params).expect("all in order with client and network")) @@ -1642,6 +1647,7 @@ fn grandpa_environment_respects_voting_rules() { voters: Arc::new(authority_set.current_authorities()), network, voting_rule, + metrics: None, _phantom: PhantomData, } }; diff --git a/client/network/Cargo.toml b/client/network/Cargo.toml index 50a19e9da0584..565796e3198a7 100644 --- a/client/network/Cargo.toml +++ b/client/network/Cargo.toml @@ -52,6 +52,7 @@ sp-consensus = { version = "0.8.0-alpha.2", path = "../../primitives/consensus/c sp-consensus-babe = { version = "0.8.0-alpha.2", path = "../../primitives/consensus/babe" } sp-core = { version = "2.0.0-alpha.2", path = "../../primitives/core" } sp-runtime = { version = "2.0.0-alpha.2", path = "../../primitives/runtime" } +prometheus-endpoint = { package = "substrate-prometheus-endpoint", version = "0.8.0-alpha.2", path = "../../utils/prometheus" } thiserror = "1" unsigned-varint = { version = "0.3.1", features = ["futures", "futures-codec"] } void = "1.0.2" diff --git a/client/network/src/config.rs b/client/network/src/config.rs index f5cad5977fc47..76c0c9b5440f7 100644 --- a/client/network/src/config.rs +++ b/client/network/src/config.rs @@ -41,6 +41,7 @@ use core::{fmt, iter}; use std::{future::Future, pin::Pin}; use std::{error::Error, fs, io::{self, Write}, net::Ipv4Addr, path::{Path, PathBuf}, sync::Arc}; use zeroize::Zeroize; +use prometheus_endpoint::Registry; /// Network initialization parameters. pub struct Params { @@ -90,6 +91,9 @@ pub struct Params { /// Type to check incoming block announcements. pub block_announce_validator: Box + Send>, + + /// Registry for recording prometheus metrics to. + pub metrics_registry: Option, } bitflags! { diff --git a/client/network/src/error.rs b/client/network/src/error.rs index ba5d5c2d0d2b5..158e75fcf1d72 100644 --- a/client/network/src/error.rs +++ b/client/network/src/error.rs @@ -45,6 +45,8 @@ pub enum Error { /// The second peer id that was found for the bootnode. second_id: PeerId, }, + /// Prometheus metrics error. + Prometheus(prometheus_endpoint::PrometheusError) } // Make `Debug` use the `Display` implementation. @@ -60,6 +62,7 @@ impl std::error::Error for Error { Error::Io(ref err) => Some(err), Error::Client(ref err) => Some(err), Error::DuplicateBootnode { .. } => None, + Error::Prometheus(ref err) => Some(err), } } } diff --git a/client/network/src/service.rs b/client/network/src/service.rs index 288289d95c81e..821847add1df1 100644 --- a/client/network/src/service.rs +++ b/client/network/src/service.rs @@ -39,6 +39,7 @@ use libp2p::swarm::{NetworkBehaviour, SwarmBuilder, SwarmEvent}; use parking_lot::Mutex; use sc_peerset::PeersetHandle; use sp_runtime::{traits::{Block as BlockT, NumberFor}, ConsensusEngineId}; +use prometheus_endpoint::{Registry, Gauge, U64, register, PrometheusError}; use crate::{behaviour::{Behaviour, BehaviourOut}, config::{parse_str_addr, parse_addr}}; use crate::{transport, config::NonReservedPeerMode, ReputationChange}; @@ -294,6 +295,10 @@ impl NetworkWorker { from_worker, light_client_rqs: params.on_demand.and_then(|od| od.extract_receiver()), event_streams: Vec::new(), + metrics: match params.metrics_registry { + Some(registry) => Some(Metrics::register(®istry)?), + None => None + } }) } @@ -727,6 +732,26 @@ pub struct NetworkWorker { light_client_rqs: Option>>, /// Senders for events that happen on the network. event_streams: Vec>, + /// Prometheus network metrics. + metrics: Option +} + +struct Metrics { + is_major_syncing: Gauge, + peers_count: Gauge, +} + +impl Metrics { + fn register(registry: &Registry) -> Result { + Ok(Self { + is_major_syncing: register(Gauge::new( + "is_major_syncing", "Whether the node is performing a major sync or not.", + )?, registry)?, + peers_count: register(Gauge::new( + "peers_count", "Number of network gossip peers", + )?, registry)?, + }) + } } impl Future for NetworkWorker { @@ -818,16 +843,26 @@ impl Future for NetworkWorker { }; } + let num_connected_peers = this.network_service.user_protocol_mut().num_connected_peers(); + // Update the variables shared with the `NetworkService`. - this.num_connected.store(this.network_service.user_protocol_mut().num_connected_peers(), Ordering::Relaxed); + this.num_connected.store(num_connected_peers, Ordering::Relaxed); { let external_addresses = Swarm::::external_addresses(&this.network_service).cloned().collect(); *this.external_addresses.lock() = external_addresses; } - this.is_major_syncing.store(match this.network_service.user_protocol_mut().sync_state() { + + let is_major_syncing = match this.network_service.user_protocol_mut().sync_state() { SyncState::Idle => false, SyncState::Downloading => true, - }, Ordering::Relaxed); + }; + + this.is_major_syncing.store(is_major_syncing, Ordering::Relaxed); + + if let Some(metrics) = this.metrics.as_ref() { + metrics.is_major_syncing.set(is_major_syncing as u64); + metrics.peers_count.set(num_connected_peers as u64); + } Poll::Pending } diff --git a/client/network/test/src/lib.rs b/client/network/test/src/lib.rs index 94eeb8e5fbc0f..0add6c63d5ac9 100644 --- a/client/network/test/src/lib.rs +++ b/client/network/test/src/lib.rs @@ -640,7 +640,8 @@ pub trait TestNetFactory: Sized { transaction_pool: Arc::new(EmptyTransactionPool), protocol_id: ProtocolId::from(&b"test-protocol-name"[..]), import_queue, - block_announce_validator: Box::new(DefaultBlockAnnounceValidator::new(client.clone())) + block_announce_validator: Box::new(DefaultBlockAnnounceValidator::new(client.clone())), + metrics_registry: None, }).unwrap(); self.mut_peers(|peers| { @@ -713,7 +714,8 @@ pub trait TestNetFactory: Sized { transaction_pool: Arc::new(EmptyTransactionPool), protocol_id: ProtocolId::from(&b"test-protocol-name"[..]), import_queue, - block_announce_validator: Box::new(DefaultBlockAnnounceValidator::new(client.clone())) + block_announce_validator: Box::new(DefaultBlockAnnounceValidator::new(client.clone())), + metrics_registry: None, }).unwrap(); self.mut_peers(|peers| { diff --git a/client/service/Cargo.toml b/client/service/Cargo.toml index 80ba12ca0006e..5ae4a3a526aae 100644 --- a/client/service/Cargo.toml +++ b/client/service/Cargo.toml @@ -56,7 +56,7 @@ sc-rpc = { version = "2.0.0-alpha.2", path = "../rpc" } sc-telemetry = { version = "2.0.0-alpha.2", path = "../telemetry" } sc-offchain = { version = "2.0.0-alpha.2", path = "../offchain" } parity-multiaddr = { package = "parity-multiaddr", version = "0.5.0" } -substrate-prometheus-endpoint = { path = "../../utils/prometheus" , version = "0.8.0-alpha.2"} +prometheus-endpoint = { package = "substrate-prometheus-endpoint", path = "../../utils/prometheus" , version = "0.8.0-alpha.2"} sc-tracing = { version = "2.0.0-alpha.2", path = "../tracing" } tracing = "0.1.10" parity-util-mem = { version = "0.5.1", default-features = false, features = ["primitive-types"] } diff --git a/client/service/src/builder.rs b/client/service/src/builder.rs index e5e4e132f9cd9..ebe91176ef449 100644 --- a/client/service/src/builder.rs +++ b/client/service/src/builder.rs @@ -53,15 +53,15 @@ use sysinfo::{get_current_pid, ProcessExt, System, SystemExt}; use sc_telemetry::{telemetry, SUBSTRATE_INFO}; use sp_transaction_pool::{MaintainedTransactionPool, ChainEvent}; use sp_blockchain; -use substrate_prometheus_endpoint::{register, Gauge, U64, F64, Registry, PrometheusError, Opts, GaugeVec}; +use prometheus_endpoint::{register, Gauge, U64, F64, Registry, PrometheusError, Opts, GaugeVec}; struct ServiceMetrics { block_height_number: GaugeVec, - peers_count: Gauge, ready_transactions_number: Gauge, memory_usage_bytes: Gauge, cpu_usage_percentage: Gauge, network_per_sec_bytes: GaugeVec, + node_roles: Gauge, } impl ServiceMetrics { @@ -71,9 +71,6 @@ impl ServiceMetrics { Opts::new("block_height_number", "Height of the chain"), &["status"] )?, registry)?, - peers_count: register(Gauge::new( - "peers_count", "Number of network gossip peers", - )?, registry)?, ready_transactions_number: register(Gauge::new( "ready_transactions_number", "Number of transactions in the ready queue", )?, registry)?, @@ -87,6 +84,10 @@ impl ServiceMetrics { Opts::new("network_per_sec_bytes", "Networking bytes per second"), &["direction"] )?, registry)?, + node_roles: register(Gauge::new( + "node_roles", "The roles the node is running as", + )?, registry)?, + }) } } @@ -887,6 +888,14 @@ ServiceBuilder< let block_announce_validator = Box::new(sp_consensus::block_validation::DefaultBlockAnnounceValidator::new(client.clone())); + let prometheus_registry_and_port = match config.prometheus_port { + Some(port) => match prometheus_registry { + Some(registry) => Some((registry, port)), + None => Some((Registry::new_custom(Some("substrate".into()), None)?, port)) + }, + None => None + }; + let network_params = sc_network::config::Params { roles: config.roles, executor: { @@ -906,6 +915,7 @@ ServiceBuilder< import_queue, protocol_id, block_announce_validator, + metrics_registry: prometheus_registry_and_port.as_ref().map(|(r, _)| r.clone()) }; let has_bootnodes = !network_params.network_config.boot_nodes.is_empty(); @@ -1020,17 +1030,14 @@ ServiceBuilder< )); } - // Prometheus endpoint and metrics - let metrics = if let Some(port) = config.prometheus_port { - let registry = match prometheus_registry { - Some(registry) => registry, - None => Registry::new_custom(Some("substrate".into()), None)? - }; - + // Prometheus metrics + let metrics = if let Some((registry, port)) = prometheus_registry_and_port.clone() { let metrics = ServiceMetrics::register(®istry)?; + metrics.node_roles.set(u64::from(config.roles.bits())); + let future = select( - substrate_prometheus_endpoint::init_prometheus(port, registry).boxed(), + prometheus_endpoint::init_prometheus(port, registry).boxed(), exit.clone() ).map(drop); @@ -1043,7 +1050,6 @@ ServiceBuilder< } else { None }; - // Periodically notify the telemetry. let transaction_pool_ = transaction_pool.clone(); let client_ = client.clone(); @@ -1094,7 +1100,6 @@ ServiceBuilder< metrics.memory_usage_bytes.set(memory); metrics.cpu_usage_percentage.set(f64::from(cpu_usage)); metrics.ready_transactions_number.set(txpool_status.ready as u64); - metrics.peers_count.set(num_peers as u64); metrics.network_per_sec_bytes.with_label_values(&["download"]).set(net_status.average_download_per_sec); metrics.network_per_sec_bytes.with_label_values(&["upload"]).set(net_status.average_upload_per_sec); @@ -1297,6 +1302,7 @@ ServiceBuilder< _telemetry_on_connect_sinks: telemetry_connection_sinks.clone(), keystore, marker: PhantomData::, + prometheus_registry: prometheus_registry_and_port.map(|(r, _)| r) }) } } diff --git a/client/service/src/error.rs b/client/service/src/error.rs index adf630e44c02a..5a78a18789230 100644 --- a/client/service/src/error.rs +++ b/client/service/src/error.rs @@ -53,8 +53,8 @@ impl<'a> From<&'a str> for Error { } } -impl From for Error { - fn from(e: substrate_prometheus_endpoint::PrometheusError) -> Self { +impl From for Error { + fn from(e: prometheus_endpoint::PrometheusError) -> Self { Error::Other(format!("Prometheus error: {}", e)) } } diff --git a/client/service/src/lib.rs b/client/service/src/lib.rs index c4678eb28d20e..5c59cdf91fcf4 100644 --- a/client/service/src/lib.rs +++ b/client/service/src/lib.rs @@ -114,6 +114,7 @@ pub struct Service { _offchain_workers: Option>, keystore: sc_keystore::KeyStorePtr, marker: PhantomData, + prometheus_registry: Option, } impl Unpin for Service {} @@ -225,6 +226,9 @@ pub trait AbstractService: 'static + Future> + /// Get a handle to a future that will resolve on exit. fn on_exit(&self) -> ::exit_future::Exit; + + /// Get the prometheus metrics registry, if available. + fn prometheus_registry(&self) -> Option; } impl AbstractService for @@ -328,6 +332,10 @@ where fn on_exit(&self) -> exit_future::Exit { self.exit.clone() } + + fn prometheus_registry(&self) -> Option { + self.prometheus_registry.clone() + } } impl Future for