@@ -515,21 +515,140 @@ impl AlertQueryResult {
515515 }
516516}
517517
518- #[ derive( serde :: Deserialize ) ]
518+ #[ derive( Deserialize ) ]
519519pub struct NotificationStateRequest {
520520 pub state : String ,
521521}
522522
523+ /// MTTR (Mean Time To Recovery) statistics
524+ #[ derive( Debug , Clone , Serialize , Deserialize ) ]
525+ #[ serde( rename_all = "camelCase" ) ]
526+ pub struct MTTRStats {
527+ /// Total number of incidents (triggered -> not-triggered cycles)
528+ pub total_incidents : usize ,
529+ /// Mean recovery time in seconds
530+ pub mean_seconds : f64 ,
531+ /// Median recovery time in seconds
532+ pub median_seconds : f64 ,
533+ /// Minimum recovery time in seconds
534+ pub min_seconds : f64 ,
535+ /// Maximum recovery time in seconds
536+ pub max_seconds : f64 ,
537+ /// All individual recovery times in seconds
538+ pub recovery_times_seconds : Vec < i64 > ,
539+ }
540+
541+ impl MTTRStats {
542+ /// Check if there are no incidents recorded
543+ pub fn is_empty ( & self ) -> bool {
544+ self . total_incidents == 0
545+ }
546+
547+ /// Create MTTRStats from a list of recovery times
548+ pub fn from_recovery_times ( recovery_times : Vec < i64 > ) -> MTTRStats {
549+ if recovery_times. is_empty ( ) {
550+ return MTTRStats :: default ( ) ;
551+ }
552+
553+ let total_incidents = recovery_times. len ( ) ;
554+ let total_recovery_time: i64 = recovery_times. iter ( ) . sum ( ) ;
555+ let mean_seconds = total_recovery_time as f64 / total_incidents as f64 ;
556+
557+ let min_seconds = * recovery_times. iter ( ) . min ( ) . unwrap ( ) as f64 ;
558+ let max_seconds = * recovery_times. iter ( ) . max ( ) . unwrap ( ) as f64 ;
559+
560+ // Calculate median
561+ let median_seconds = if total_incidents == 1 {
562+ recovery_times[ 0 ] as f64
563+ } else {
564+ let mut sorted_times = recovery_times. clone ( ) ;
565+ sorted_times. sort_unstable ( ) ;
566+
567+ if total_incidents. is_multiple_of ( 2 ) {
568+ let mid = total_incidents / 2 ;
569+ ( sorted_times[ mid - 1 ] + sorted_times[ mid] ) as f64 / 2.0
570+ } else {
571+ sorted_times[ total_incidents / 2 ] as f64
572+ }
573+ } ;
574+
575+ MTTRStats {
576+ total_incidents,
577+ mean_seconds,
578+ median_seconds,
579+ min_seconds,
580+ max_seconds,
581+ recovery_times_seconds : recovery_times,
582+ }
583+ }
584+ }
585+
586+ impl Default for MTTRStats {
587+ fn default ( ) -> Self {
588+ Self {
589+ total_incidents : 0 ,
590+ mean_seconds : 0.0 ,
591+ median_seconds : 0.0 ,
592+ min_seconds : 0.0 ,
593+ max_seconds : 0.0 ,
594+ recovery_times_seconds : Vec :: new ( ) ,
595+ }
596+ }
597+ }
598+
599+ /// Aggregated MTTR statistics across multiple alerts
600+ #[ derive( Debug , Clone , Serialize , Deserialize ) ]
601+ #[ serde( rename_all = "camelCase" ) ]
602+ pub struct AggregatedMTTRStats {
603+ /// Overall MTTR statistics
604+ pub overall : MTTRStats ,
605+ /// Number of alerts included in the calculation
606+ pub total_alerts : usize ,
607+ /// Number of alerts that had incidents
608+ pub alerts_with_incidents : usize ,
609+ /// Per-alert breakdown (optional, for detailed analysis)
610+ pub per_alert_stats : HashMap < String , MTTRStats > ,
611+ }
612+
613+ impl AggregatedMTTRStats {
614+ /// Calculate aggregated MTTR stats from multiple alert state entries
615+ pub fn from_alert_states ( alert_states : Vec < AlertStateEntry > ) -> Self {
616+ let mut all_recovery_times = Vec :: new ( ) ;
617+ let mut per_alert_stats = HashMap :: new ( ) ;
618+ let mut alerts_with_incidents = 0 ;
619+
620+ for alert_state in & alert_states {
621+ let alert_stats = alert_state. get_mttr_stats ( ) ;
622+
623+ if !alert_stats. is_empty ( ) {
624+ alerts_with_incidents += 1 ;
625+ all_recovery_times. extend ( alert_stats. recovery_times_seconds . iter ( ) ) ;
626+
627+ per_alert_stats. insert ( alert_state. alert_id . to_string ( ) , alert_stats) ;
628+ }
629+ }
630+
631+ let overall = MTTRStats :: from_recovery_times ( all_recovery_times) ;
632+
633+ Self {
634+ overall,
635+ total_alerts : alert_states. len ( ) ,
636+ alerts_with_incidents,
637+ per_alert_stats,
638+ }
639+ }
640+ }
641+
523642/// Represents a single state transition
524- #[ derive( Debug , Clone , serde :: Serialize , serde :: Deserialize ) ]
643+ #[ derive( Debug , Clone , Serialize , Deserialize ) ]
525644pub struct StateTransition {
526645 /// The alert state
527646 pub state : AlertState ,
528647 /// Timestamp when this state was set/updated
529648 pub last_updated_at : DateTime < Utc > ,
530649}
531650
532- #[ derive( Debug , Clone , serde :: Serialize , serde :: Deserialize ) ]
651+ #[ derive( Debug , Clone , Serialize , Deserialize ) ]
533652pub struct AlertStateEntry {
534653 /// The unique identifier for the alert
535654 pub alert_id : Ulid ,
@@ -581,6 +700,60 @@ impl AlertStateEntry {
581700 pub fn current_state ( & self ) -> Option < & StateTransition > {
582701 self . states . last ( )
583702 }
703+
704+ /// Get all recovery times (in seconds) from triggered to not-triggered
705+ /// Returns recovery times in chronological order
706+ pub fn get_recovery_times ( & self ) -> Vec < i64 > {
707+ let mut recovery_times = Vec :: new ( ) ;
708+ let mut trigger_time: Option < DateTime < Utc > > = None ;
709+
710+ // Create a sorted view without mutating the original
711+ let mut sorted_states = self . states . clone ( ) ;
712+ sorted_states. sort_by ( |a, b| a. last_updated_at . cmp ( & b. last_updated_at ) ) ;
713+
714+ for transition in & sorted_states {
715+ match transition. state {
716+ AlertState :: Triggered => {
717+ // Record when alert was triggered
718+ trigger_time = Some ( transition. last_updated_at ) ;
719+ }
720+ AlertState :: NotTriggered => {
721+ // If we have a trigger time, calculate recovery time
722+ if let Some ( triggered_at) = trigger_time {
723+ let recovery_duration = transition
724+ . last_updated_at
725+ . signed_duration_since ( triggered_at) ;
726+ let recovery_seconds = recovery_duration. num_seconds ( ) ;
727+
728+ // Only include positive durations (validation against clock issues)
729+ if recovery_seconds > 0 {
730+ recovery_times. push ( recovery_seconds) ;
731+ } else {
732+ tracing:: warn!(
733+ "Negative or zero recovery time detected: {} seconds. Triggered at: {}, Recovered at: {}" ,
734+ recovery_seconds,
735+ triggered_at,
736+ transition. last_updated_at
737+ ) ;
738+ }
739+ trigger_time = None ; // Reset for next cycle
740+ }
741+ }
742+ AlertState :: Disabled => {
743+ // Ignore disabled state - it doesn't affect MTTR calculation
744+ // until it's explicitly resolved (moves to not-triggered)
745+ }
746+ }
747+ }
748+
749+ recovery_times
750+ }
751+
752+ /// This is the method that is used for MTTR statistics
753+ pub fn get_mttr_stats ( & self ) -> MTTRStats {
754+ let recovery_times = self . get_recovery_times ( ) ;
755+ MTTRStats :: from_recovery_times ( recovery_times)
756+ }
584757}
585758
586759impl MetastoreObject for AlertStateEntry {
0 commit comments