Skip to content

Commit a55d37a

Browse files
committed
GHA runners collector
1 parent dd244c8 commit a55d37a

File tree

4 files changed

+196
-1
lines changed

4 files changed

+196
-1
lines changed

src/collectors/github_runners.rs

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
use crate::Config;
2+
use anyhow::Error;
3+
use log::{debug, error};
4+
use prometheus::core::Desc;
5+
use prometheus::proto::MetricFamily;
6+
use prometheus::{core::Collector, IntGauge, Opts};
7+
use reqwest::header::{ACCEPT, AUTHORIZATION, USER_AGENT};
8+
use reqwest::Method;
9+
use std::collections::HashMap;
10+
use std::sync::{Arc, RwLock};
11+
use tokio::time::Duration;
12+
13+
const GH_RUNNERS_ENDPOINT: &str = "https://api.github.com/repos/{owner_repo}/actions/runners";
14+
15+
#[derive(Debug, serde::Deserialize)]
16+
struct ApiResponse {
17+
total_count: usize,
18+
runners: Vec<Runner>,
19+
}
20+
21+
#[derive(Debug, serde::Deserialize)]
22+
struct Runner {
23+
id: usize,
24+
name: String,
25+
os: String,
26+
status: String,
27+
busy: bool,
28+
labels: Vec<Label>,
29+
}
30+
31+
#[derive(Debug, serde::Deserialize)]
32+
struct Label {
33+
id: usize,
34+
name: String,
35+
#[serde(rename = "type")]
36+
the_type: String,
37+
}
38+
39+
#[derive(Clone)]
40+
pub struct GithubRunners {
41+
//api token to use
42+
token: String,
43+
// repos to track gha runners
44+
repos: Vec<String>,
45+
// metric namespace
46+
ns: String,
47+
// actual metrics
48+
metrics: Arc<RwLock<Vec<IntGauge>>>,
49+
// default metric description
50+
desc: Desc,
51+
}
52+
53+
impl GithubRunners {
54+
pub async fn new(config: &Config) -> Result<Self, Error> {
55+
let token = config.rust_runners_token.to_string();
56+
let repos: Vec<String> = config
57+
.gha_runners_repos
58+
.split(',')
59+
.map(|v| v.trim().to_string())
60+
.collect();
61+
62+
let ns = String::from("gha_runner");
63+
let rv = Self {
64+
token,
65+
repos,
66+
ns: ns.clone(),
67+
metrics: Arc::new(RwLock::new(Vec::new())),
68+
desc: Desc::new(
69+
ns,
70+
"GHA runner's status".to_string(),
71+
Vec::new(),
72+
HashMap::new(),
73+
)
74+
.unwrap(),
75+
};
76+
77+
let refresh_rate = config.gha_runners_cache_refresh;
78+
let mut rv2 = rv.clone();
79+
tokio::spawn(async move {
80+
loop {
81+
if let Err(e) = rv2.update_stats().await {
82+
error!("{:#?}", e);
83+
}
84+
85+
tokio::time::delay_for(Duration::from_secs(refresh_rate)).await;
86+
}
87+
});
88+
89+
Ok(rv)
90+
}
91+
92+
async fn update_stats(&mut self) -> Result<(), Error> {
93+
let mut gauges = Vec::new();
94+
let client = reqwest::Client::new();
95+
96+
for repo in self.repos.iter() {
97+
let url = String::from(GH_RUNNERS_ENDPOINT).replace("{owner_repo}", repo);
98+
99+
debug!("Querying gha runner's status at: {}", url);
100+
let req = client
101+
.request(Method::GET, &url)
102+
.header(
103+
USER_AGENT,
104+
"https://github.com/rust-lang/monitorbot ([email protected])",
105+
)
106+
.header(AUTHORIZATION, format!("{} {}", "token", self.token))
107+
.header(ACCEPT, "application/vnd.github.v3+json")
108+
.build()?;
109+
110+
let resp = client.execute(req).await?.json::<ApiResponse>().await?;
111+
112+
//debug!("ApiResponse: {:#?}", resp);
113+
114+
// convert to metrics
115+
for runner in resp.runners.iter() {
116+
let status = &runner.status.clone();
117+
let value_busy = if runner.busy { 1 } else { 0 };
118+
let label_repo = repo.clone();
119+
let label_runner = runner.name.clone();
120+
121+
// online
122+
let online = IntGauge::with_opts(
123+
Opts::new("online", "runner is online.")
124+
.namespace(self.ns.clone())
125+
.const_label("repo", label_repo.clone())
126+
.const_label("runner", label_runner.clone()),
127+
)
128+
.unwrap();
129+
130+
online.set(if status == "online" { 1 } else { 0 });
131+
gauges.push(online);
132+
133+
// busy
134+
let busy = IntGauge::with_opts(
135+
Opts::new("busy", "runner is busy.")
136+
.namespace(self.ns.clone())
137+
.const_label("repo", label_repo)
138+
.const_label("runner", label_runner),
139+
)
140+
.unwrap();
141+
142+
busy.set(value_busy);
143+
gauges.push(busy);
144+
}
145+
}
146+
147+
// lock and replace old data
148+
let mut guard = self.metrics.write().unwrap();
149+
*guard = gauges;
150+
151+
Ok(())
152+
}
153+
}
154+
155+
impl Collector for GithubRunners {
156+
fn desc(&self) -> Vec<&Desc> {
157+
vec![&self.desc]
158+
}
159+
160+
fn collect(&self) -> Vec<MetricFamily> {
161+
self.metrics.read().map_or_else(
162+
|e| {
163+
error!("Unable to collect: {:#?}", e);
164+
Vec::new()
165+
},
166+
|guard| {
167+
guard.iter().fold(Vec::new(), |mut acc, item| {
168+
acc.extend(item.collect());
169+
acc
170+
})
171+
},
172+
)
173+
}
174+
}

src/collectors/mod.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
mod github_rate_limit;
2+
mod github_runners;
23

34
pub use crate::collectors::github_rate_limit::GitHubRateLimit;
5+
pub use crate::collectors::github_runners::GithubRunners;
46

57
use crate::MetricProvider;
68
use anyhow::{Error, Result};
@@ -14,5 +16,12 @@ pub async fn register_collectors(p: &MetricProvider) -> Result<(), Error> {
1416
info!("Registering GitHubRateLimit collector");
1517
p.register_collector(rl)
1618
})
19+
.await?;
20+
21+
GithubRunners::new(&p.config)
22+
.and_then(|gr| async {
23+
info!("Registering GitHubActionsRunners collector");
24+
p.register_collector(gr)
25+
})
1726
.await
1827
}

src/config.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@ const ENVIRONMENT_VARIABLE_PREFIX: &str = "MONITORBOT_";
66

77
#[derive(Clone, Debug)]
88
pub struct Config {
9+
// github api token to be used when querying for gha runner's status
10+
// note: token must have (repo scope) authorization
11+
pub rust_runners_token: String,
12+
// gh runner's repos to track they status. multiple repos are allowed
13+
// ex. "rust,cargo,docs.rs"
14+
pub gha_runners_repos: String,
15+
// gha runner's status refresh rate frequency (in seconds)
16+
pub gha_runners_cache_refresh: u64,
917
// http server port to bind to
1018
pub port: u16,
1119
// github api tokens to collect rate limit statistics
@@ -17,6 +25,9 @@ pub struct Config {
1725
impl Config {
1826
pub fn from_env() -> Result<Self, Error> {
1927
Ok(Self {
28+
rust_runners_token: require_env("RUST_ORG_TOKEN")?,
29+
gha_runners_repos: require_env("RUNNERS_REPOS")?,
30+
gha_runners_cache_refresh: default_env("GHA_RUNNERS_REFRESH", 120)?,
2031
port: default_env("PORT", 3001)?,
2132
gh_rate_limit_tokens: require_env("RATE_LIMIT_TOKENS")?,
2233
gh_rate_limit_stats_cache_refresh: default_env("GH_RATE_LIMIT_STATS_REFRESH", 120)?,

src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ pub struct MetricProvider {
2323

2424
impl MetricProvider {
2525
pub fn new(config: Config) -> Self {
26-
let register = Registry::new_custom(None, None).expect("Unable to build Registry");
26+
let register = Registry::new_custom(Some("monitorbot".to_string()), None)
27+
.expect("Unable to build Registry");
2728
Self { register, config }
2829
}
2930

0 commit comments

Comments
 (0)