-
Notifications
You must be signed in to change notification settings - Fork 0
Remove locks for aggregation and flushing, move to channels #32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0f0791e
99d22a3
b824925
752ed0c
184060d
cfd47dd
9186326
748dc1a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,177 @@ | ||||||||
// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ | ||||||||
// SPDX-License-Identifier: Apache-2.0 | ||||||||
|
||||||||
use crate::aggregator::Aggregator; | ||||||||
use crate::datadog::Series; | ||||||||
use crate::metric::{Metric, SortedTags}; | ||||||||
use datadog_protos::metrics::SketchPayload; | ||||||||
use tokio::sync::{mpsc, oneshot}; | ||||||||
use tracing::{debug, error, warn}; | ||||||||
|
||||||||
#[derive(Debug)] | ||||||||
pub enum AggregatorCommand { | ||||||||
InsertBatch(Vec<Metric>), | ||||||||
Flush(oneshot::Sender<FlushResponse>), | ||||||||
Shutdown, | ||||||||
} | ||||||||
|
||||||||
#[derive(Debug)] | ||||||||
pub struct FlushResponse { | ||||||||
pub series: Vec<Series>, | ||||||||
pub distributions: Vec<SketchPayload>, | ||||||||
} | ||||||||
|
||||||||
#[derive(Clone)] | ||||||||
pub struct AggregatorHandle { | ||||||||
tx: mpsc::UnboundedSender<AggregatorCommand>, | ||||||||
} | ||||||||
|
||||||||
impl AggregatorHandle { | ||||||||
pub fn insert_batch( | ||||||||
&self, | ||||||||
metrics: Vec<Metric>, | ||||||||
) -> Result<(), mpsc::error::SendError<AggregatorCommand>> { | ||||||||
self.tx.send(AggregatorCommand::InsertBatch(metrics)) | ||||||||
} | ||||||||
|
||||||||
pub async fn flush(&self) -> Result<FlushResponse, String> { | ||||||||
let (response_tx, response_rx) = oneshot::channel(); | ||||||||
self.tx | ||||||||
.send(AggregatorCommand::Flush(response_tx)) | ||||||||
.map_err(|e| format!("Failed to send flush command: {}", e))?; | ||||||||
|
||||||||
response_rx | ||||||||
.await | ||||||||
.map_err(|e| format!("Failed to receive flush response: {}", e)) | ||||||||
} | ||||||||
|
||||||||
pub fn shutdown(&self) -> Result<(), mpsc::error::SendError<AggregatorCommand>> { | ||||||||
self.tx.send(AggregatorCommand::Shutdown) | ||||||||
} | ||||||||
} | ||||||||
|
||||||||
pub struct AggregatorService { | ||||||||
aggregator: Aggregator, | ||||||||
rx: mpsc::UnboundedReceiver<AggregatorCommand>, | ||||||||
} | ||||||||
|
||||||||
impl AggregatorService { | ||||||||
pub fn new( | ||||||||
tags: SortedTags, | ||||||||
max_context: usize, | ||||||||
) -> Result<(Self, AggregatorHandle), crate::errors::Creation> { | ||||||||
let (tx, rx) = mpsc::unbounded_channel(); | ||||||||
let aggregator = Aggregator::new(tags, max_context)?; | ||||||||
|
||||||||
let service = Self { aggregator, rx }; | ||||||||
|
||||||||
let handle = AggregatorHandle { tx }; | ||||||||
|
||||||||
Ok((service, handle)) | ||||||||
} | ||||||||
|
||||||||
pub async fn run(mut self) { | ||||||||
debug!("Aggregator service started"); | ||||||||
|
||||||||
while let Some(command) = self.rx.recv().await { | ||||||||
match command { | ||||||||
AggregatorCommand::InsertBatch(metrics) => { | ||||||||
let mut insert_errors = 0; | ||||||||
for metric in metrics { | ||||||||
// The only possible error here is an overflow | ||||||||
if let Err(_e) = self.aggregator.insert(metric) { | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The error
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||||
insert_errors += 1; | ||||||||
} | ||||||||
} | ||||||||
if insert_errors > 0 { | ||||||||
warn!("Total of {} metrics failed to insert", insert_errors); | ||||||||
} | ||||||||
} | ||||||||
|
||||||||
AggregatorCommand::Flush(response_tx) => { | ||||||||
let series = self.aggregator.consume_metrics(); | ||||||||
let distributions = self.aggregator.consume_distributions(); | ||||||||
|
||||||||
let response = FlushResponse { | ||||||||
series, | ||||||||
distributions, | ||||||||
}; | ||||||||
|
||||||||
if let Err(_) = response_tx.send(response) { | ||||||||
error!("Failed to send flush response - receiver dropped"); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider using a more descriptive pattern match or binding the error to log it. The current
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||||||||
} | ||||||||
} | ||||||||
|
||||||||
AggregatorCommand::Shutdown => { | ||||||||
debug!("Aggregator service shutting down"); | ||||||||
break; | ||||||||
} | ||||||||
} | ||||||||
} | ||||||||
|
||||||||
debug!("Aggregator service stopped"); | ||||||||
} | ||||||||
} | ||||||||
|
||||||||
#[cfg(test)] | ||||||||
mod tests { | ||||||||
use super::*; | ||||||||
use crate::metric::{parse, EMPTY_TAGS}; | ||||||||
|
||||||||
#[tokio::test] | ||||||||
async fn test_aggregator_service_basic_flow() { | ||||||||
let (service, handle) = | ||||||||
AggregatorService::new(EMPTY_TAGS, 1000).expect("Failed to create aggregator service"); | ||||||||
|
||||||||
// Start the service in a background task | ||||||||
let service_task = tokio::spawn(service.run()); | ||||||||
|
||||||||
// Insert some metrics | ||||||||
let metrics = vec![ | ||||||||
parse("test:1|c|#k:v").expect("metric parse failed"), | ||||||||
parse("foo:2|c|#k:v").expect("metric parse failed"), | ||||||||
]; | ||||||||
|
||||||||
handle | ||||||||
.insert_batch(metrics) | ||||||||
.expect("Failed to insert metrics"); | ||||||||
|
||||||||
// Flush and check results | ||||||||
let response = handle.flush().await.expect("Failed to flush"); | ||||||||
assert_eq!(response.series.len(), 1); | ||||||||
assert_eq!(response.series[0].series.len(), 2); | ||||||||
|
||||||||
// Shutdown the service | ||||||||
handle.shutdown().expect("Failed to shutdown"); | ||||||||
service_task.await.expect("Service task failed"); | ||||||||
} | ||||||||
|
||||||||
#[tokio::test] | ||||||||
async fn test_aggregator_service_distributions() { | ||||||||
let (service, handle) = | ||||||||
AggregatorService::new(EMPTY_TAGS, 1000).expect("Failed to create aggregator service"); | ||||||||
|
||||||||
// Start the service in a background task | ||||||||
let service_task = tokio::spawn(service.run()); | ||||||||
|
||||||||
// Insert distribution metrics | ||||||||
let metrics = vec![ | ||||||||
parse("dist1:100|d|#k:v").expect("metric parse failed"), | ||||||||
parse("dist2:200|d|#k:v").expect("metric parse failed"), | ||||||||
]; | ||||||||
|
||||||||
handle | ||||||||
.insert_batch(metrics) | ||||||||
.expect("Failed to insert metrics"); | ||||||||
|
||||||||
// Flush and check results | ||||||||
let response = handle.flush().await.expect("Failed to flush"); | ||||||||
assert_eq!(response.distributions.len(), 1); | ||||||||
assert_eq!(response.distributions[0].sketches.len(), 2); | ||||||||
assert_eq!(response.series.len(), 0); | ||||||||
|
||||||||
// Shutdown the service | ||||||||
handle.shutdown().expect("Failed to shutdown"); | ||||||||
service_task.await.expect("Service task failed"); | ||||||||
} | ||||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make sense for the service.run to spawn the task, instead of us doing it? Or would this be a way for us to keep control on it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm learning that the nice part of doing things this way is that the caller of the task can decide which tokio runtime or pool to run the task in. In this case the serverless-compat binary, but in the other case it's in bottlecap. So if we want to move it to run in the blocking threadpool or run in a named tokio runtime, we can do that.