1313
1414namespace ml {
1515namespace maths {
16+ namespace {
1617
17- double CBasicStatistics::mean (const TDoubleDoublePr& samples) {
18- return 0.5 * (samples.first + samples.second );
19- }
20-
21- double CBasicStatistics::mean (const TDoubleVec& sample) {
22- return std::accumulate (sample.begin (), sample.end (), 0.0 ) /
23- static_cast <double >(sample.size ());
24- }
18+ // ! Compute the median reordering \p samples in the process.
19+ double medianInPlace (std::vector<double >& data) {
20+ std::size_t size{data.size ()};
2521
26- double CBasicStatistics::median (const TDoubleVec& dataIn) {
27- if (dataIn.empty ()) {
28- return 0.0 ;
29- }
30-
31- std::size_t size{dataIn.size ()};
32- if (size == 1 ) {
33- return dataIn[0 ];
34- }
35-
36- TDoubleVec data{dataIn};
37-
38- // If data size is even (1,2,3,4) then take mean of 2,3 = 2.5
39- // If data size is odd (1,2,3,4,5) then take middle value = 3
40- double median{0.0 };
22+ // If sample size is even (1,2,3,4) then take mean of 2,3 = 2.5
23+ // If sample size is odd (1,2,3,4,5) then take middle value = 3
24+ bool useMean{size % 2 == 0 };
4125
4226 // For an odd number of elements, this will get the median element into
4327 // place. For an even number of elements, it will get the second element
4428 // of the middle pair into place.
45- bool useMean{size % 2 == 0 };
4629 size_t index{size / 2 };
4730 std::nth_element (data.begin (), data.begin () + index, data.end ());
4831
@@ -52,12 +35,43 @@ double CBasicStatistics::median(const TDoubleVec& dataIn) {
5235 // before the nth one in the vector.
5336 auto left = std::max_element (data.begin (), data.begin () + index);
5437
55- median = (*left + data[index]) / 2.0 ;
56- } else {
57- median = data[index];
38+ return (*left + data[index]) / 2.0 ;
39+ }
40+
41+ return data[index];
42+ }
43+ }
44+
45+ double CBasicStatistics::mean (const TDoubleDoublePr& data) {
46+ return 0.5 * (data.first + data.second );
47+ }
48+
49+ double CBasicStatistics::mean (const TDoubleVec& data) {
50+ return std::accumulate (data.begin (), data.end (), 0.0 ) /
51+ static_cast <double >(data.size ());
52+ }
53+
54+ double CBasicStatistics::median (const TDoubleVec& data_) {
55+ if (data_.empty ()) {
56+ return 0.0 ;
57+ }
58+ if (data_.size () == 1 ) {
59+ return data_[0 ];
5860 }
61+ TDoubleVec data{data_};
62+ return medianInPlace (data);
63+ }
5964
60- return median;
65+ double CBasicStatistics::mad (const TDoubleVec& data_) {
66+ if (data_.size () < 2 ) {
67+ return 0.0 ;
68+ }
69+ TDoubleVec data{data_};
70+ double median{medianInPlace (data)};
71+ for (auto & datum : data) {
72+ datum = std::fabs (datum - median);
73+ }
74+ return medianInPlace (data);
6175}
6276
6377const char CBasicStatistics::INTERNAL_DELIMITER (' :' );
0 commit comments