@@ -1039,18 +1039,18 @@ CPeriodicityHypothesisTests::best(const TNestedHypothesesVec &hypotheses) const
10391039 {
10401040 STestStats stats;
10411041 CPeriodicityHypothesisTestsResult resultForHypothesis{hypothesis.test (stats)};
1042- summaries.emplace_back (stats.s_V0 , stats.s_B - stats.s_DF0 ,
1043- std::move (resultForHypothesis));
1042+ if (stats.s_B > stats.s_DF0 )
1043+ {
1044+ summaries.emplace_back (stats.s_V0 , stats.s_B - stats.s_DF0 ,
1045+ std::move (resultForHypothesis));
1046+ }
10441047 }
10451048
10461049 TMinAccumulator vCutoff;
10471050 for (const auto &summary : summaries)
10481051 {
1049- if (summary.s_DF > 0.0 )
1050- {
1051- vCutoff.add (varianceAtPercentile (summary.s_V , summary.s_DF ,
1052- 50.0 + CONFIDENCE_INTERVAL / 2.0 ));
1053- }
1052+ vCutoff.add (varianceAtPercentile (summary.s_V , summary.s_DF ,
1053+ 50.0 + CONFIDENCE_INTERVAL / 2.0 ));
10541054 }
10551055 if (vCutoff.count () > 0 )
10561056 {
@@ -1284,6 +1284,26 @@ bool CPeriodicityHypothesisTests::seenSufficientDataToTest(core_t::TTime period,
12841284 >= 2.0 * ACCURATE_TEST_POPULATED_FRACTION * static_cast <double >(period);
12851285}
12861286
1287+ bool CPeriodicityHypothesisTests::seenSufficientPeriodicallyPopulatedBucketsToTest (const TFloatMeanAccumulatorCRng &buckets,
1288+ std::size_t period) const
1289+ {
1290+ double repeats{0.0 };
1291+ for (std::size_t i = 0u ; i < period; ++i)
1292+ {
1293+ for (std::size_t j = i + period; j < buckets.size (); j += period)
1294+ {
1295+ if ( CBasicStatistics::count (buckets[j])
1296+ * CBasicStatistics::count (buckets[j - period]) > 0.0 )
1297+ {
1298+ repeats += 1.0 ;
1299+ break ;
1300+ }
1301+ }
1302+ }
1303+ LOG_TRACE (" repeated values = " << repeats);
1304+ return repeats >= static_cast <double >(period) * ACCURATE_TEST_POPULATED_FRACTION / 3.0 ;
1305+ }
1306+
12871307bool CPeriodicityHypothesisTests::testStatisticsFor (const TFloatMeanAccumulatorCRng &buckets,
12881308 STestStats &stats) const
12891309{
@@ -1433,21 +1453,7 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows,
14331453
14341454 // We need to observe a minimum number of repeated values to test with
14351455 // an acceptable false positive rate.
1436- double repeats{0.0 };
1437- for (std::size_t i = 0u ; i < period; ++i)
1438- {
1439- for (std::size_t j = i + period; j < buckets.size (); j += period)
1440- {
1441- if ( CBasicStatistics::count (buckets[j])
1442- * CBasicStatistics::count (buckets[j - period]) > 0.0 )
1443- {
1444- repeats += 1.0 ;
1445- break ;
1446- }
1447- }
1448- }
1449- LOG_TRACE (" repeated values = " << repeats);
1450- if (repeats < static_cast <double >(period) * ACCURATE_TEST_POPULATED_FRACTION / 3.0 )
1456+ if (!this ->seenSufficientPeriodicallyPopulatedBucketsToTest (buckets, period))
14511457 {
14521458 return false ;
14531459 }
@@ -1493,7 +1499,8 @@ bool CPeriodicityHypothesisTests::testPeriod(const TTimeTimePr2Vec &windows,
14931499 LOG_TRACE (" significance = " << CStatisticalTests::leftTailFTest (v1 / v0, df1, df0));
14941500
14951501 double Rt{stats.s_Rt * CTools::truncate (1.0 - 0.5 * (vt - v1) / vt, 0.9 , 1.0 )};
1496- if (v1 < vt && CStatisticalTests::leftTailFTest (v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
1502+ if ( v1 < vt && B > 1.0
1503+ && CStatisticalTests::leftTailFTest (v1 / v0, df1, df0) <= MAXIMUM_SIGNIFICANCE)
14971504 {
14981505 double R{CSignal::autocorrelation (period, values)};
14991506 R = autocorrelationAtPercentile (R, B, 50.0 - CONFIDENCE_INTERVAL / 2.0 );
@@ -1567,6 +1574,15 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
15671574 {
15681575 return false ;
15691576 }
1577+
1578+ std::size_t period{static_cast <std::size_t >(period_ / m_BucketLength)};
1579+
1580+ // We need to observe a minimum number of repeated values to test with
1581+ // an acceptable false positive rate.
1582+ if (!this ->seenSufficientPeriodicallyPopulatedBucketsToTest (buckets, period))
1583+ {
1584+ return false ;
1585+ }
15701586 if (stats.s_HasPartition )
15711587 {
15721588 return true ;
@@ -1577,7 +1593,6 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
15771593 // evidence that it reduces the residual variance and repeats.
15781594
15791595 core_t ::TTime windowLength{length (buckets, m_BucketLength)};
1580- std::size_t period{static_cast <std::size_t >(period_ / m_BucketLength)};
15811596 core_t ::TTime repeat{length (partition)};
15821597 core_t ::TTime startOfPartition{stats.s_StartOfPartition };
15831598 double B{stats.s_B };
@@ -1732,11 +1747,13 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec &partition
17321747 double BW{std::accumulate (partitionValues.begin (), partitionValues.end (), 0.0 ,
17331748 [](double n, const TFloatMeanAccumulator &value)
17341749 { return n + (CBasicStatistics::count (value) > 0.0 ? 1.0 : 0.0 ); })};
1735- R = std::max (R, autocorrelationAtPercentile (CSignal::autocorrelation (
1736- windowLength_ + period, partitionValues),
1737- BW, 50.0 - CONFIDENCE_INTERVAL / 2.0 ));
1738- LOG_TRACE (" autocorrelation = " << R);
1739- LOG_TRACE (" autocorrelationThreshold = " << Rt);
1750+ if (BW > 1.0 )
1751+ {
1752+ double RW{CSignal::autocorrelation (windowLength_ + period, partitionValues)};
1753+ R = std::max (R, autocorrelationAtPercentile (RW, BW, 50.0 - CONFIDENCE_INTERVAL / 2.0 ));
1754+ LOG_TRACE (" autocorrelation = " << R);
1755+ LOG_TRACE (" autocorrelationThreshold = " << Rt);
1756+ }
17401757 }
17411758
17421759 if (R > Rt)
0 commit comments