@@ -54,13 +54,6 @@ const std::string WEIGHT_TAG("c");
5454const std::string PRIOR_TAG (" d" );
5555const std::string DECAY_RATE_TAG (" e" );
5656
57- // ! Add elements of \p x to \p y.
58- void add (const TDouble10Vec& x, TDouble10Vec& y) {
59- for (std::size_t i = 0u ; i < x.size (); ++i) {
60- y[i] += x[i];
61- }
62- }
63-
6457// ! Get the min of \p x and \p y.
6558TDouble10Vec min (const TDouble10Vec& x, const TDouble10Vec& y) {
6659 TDouble10Vec result (x);
@@ -103,11 +96,6 @@ void updateMean(const TDouble10Vec10Vec& x, double nx, TDouble10Vec10Vec& mean,
10396 n += nx;
10497}
10598
106- // ! Get the largest element of \p x.
107- double largest (const TDouble10Vec& x) {
108- return *std::max_element (x.begin (), x.end ());
109- }
110-
11199// ! Add a model vector entry reading parameters from \p traverser.
112100bool modelAcceptRestoreTraverser (const SDistributionRestoreParams& params,
113101 TWeightPriorPtrPrVec& models,
@@ -174,7 +162,7 @@ void modelAcceptPersistInserter(const CModelWeight& weight,
174162const double DERATE = 0.99999 ;
175163const double MINUS_INF = DERATE * boost::numeric::bounds<double >::lowest();
176164const double INF = DERATE * boost::numeric::bounds<double >::highest();
177- const double LOG_INITIAL_WEIGHT = std::log(1e-6 );
165+ const double MAXIMUM_LOG_BAYES_FACTOR = std::log(1e6 );
178166const double MINIMUM_SIGNIFICANT_WEIGHT = 0.01 ;
179167}
180168
@@ -306,81 +294,94 @@ void CMultivariateOneOfNPrior::addSamples(const TDouble10Vec1Vec& samples,
306294
307295 this ->adjustOffset (samples, weights);
308296
309- double penalty = CTools::fastLog ( this ->numberSamples ()) ;
297+ double n{ this ->numberSamples ()} ;
310298 this ->CMultivariatePrior ::addSamples (samples, weights);
311- penalty = (penalty - CTools::fastLog ( this ->numberSamples ())) / 2.0 ;
299+ n = this ->numberSamples () - n ;
312300
313301 // See COneOfNPrior::addSamples for a discussion.
314302
315303 CScopeCanonicalizeWeights<TPriorPtr> canonicalize (m_Models);
316304
317305 // We need to check *before* adding samples to the constituent models.
318- bool isNonInformative = this ->isNonInformative ();
306+ bool isNonInformative{ this ->isNonInformative ()} ;
319307
320308 // Compute the unnormalized posterior weights and update the component
321309 // priors. These weights are computed on the side since they are only
322310 // updated if all marginal likelihoods can be computed.
323- TDouble3Vec logLikelihoods;
324- TMaxAccumulator maxLogLikelihood;
325- TBool3Vec used, uses;
311+ TDouble3Vec minusBics;
312+ TBool3Vec used;
313+ TBool3Vec uses;
314+ bool failed{false };
315+
316+ // If none of the models are a good fit for the new data then restrict
317+ // the maximum Bayes Factor since the data likelihood given the model
318+ // is most useful if one of the models is (mostly) correct.
319+ double m{std::max (n, 1.0 )};
320+ double maxLogBayesFactor{-m * MAXIMUM_LOG_BAYES_FACTOR};
321+
326322 for (auto & model : m_Models) {
327- bool use = model. second -> participatesInModelSelection ();
328-
329- // Update the weights with the marginal likelihoods.
330- double logLikelihood = 0.0 ;
331- maths_t ::EFloatingPointErrorStatus status =
332- use ? model. second -> jointLogMarginalLikelihood (samples, weights, logLikelihood)
333- : maths_t ::E_FpOverflowed ;
334- if ( status & maths_t ::E_FpFailed) {
335- LOG_ERROR (<< " Failed to compute log-likelihood " );
336- LOG_ERROR (<< " samples = " << core::CContainerPrinter::print (samples));
337- } else {
338- if (!(status & maths_t ::E_FpOverflowed)) {
339- logLikelihood += model. second -> unmarginalizedParameters () * penalty ;
340- logLikelihoods. push_back (logLikelihood) ;
341- maxLogLikelihood. add (logLikelihood );
342- } else {
343- logLikelihoods .push_back (MINUS_INF );
323+
324+ double minusBic{ 0.0 };
325+ maths_t ::EFloatingPointErrorStatus status{ maths_t ::E_FpOverflowed};
326+
327+ used. push_back (model. second -> participatesInModelSelection ());
328+ if (used. back ()) {
329+ double logLikelihood ;
330+ status = model. second -> jointLogMarginalLikelihood (samples, weights, logLikelihood);
331+
332+ minusBic = 2.0 * logLikelihood -
333+ model. second -> unmarginalizedParameters () * CTools::fastLog (n);
334+
335+ double modeLogLikelihood ;
336+ TDouble10Vec1Vec modes ;
337+ modes. reserve (weights. size () );
338+ for ( const auto & weight : weights) {
339+ modes .push_back (model. second -> marginalLikelihoodMode (weight) );
344340 }
341+ model.second ->jointLogMarginalLikelihood (modes, weights, modeLogLikelihood);
342+ maxLogBayesFactor = std::max (
343+ maxLogBayesFactor, logLikelihood - std::max (modeLogLikelihood, logLikelihood));
344+ }
345+
346+ minusBics.push_back ((status & maths_t ::E_FpOverflowed) ? MINUS_INF : minusBic);
347+ failed |= (status & maths_t ::E_FpFailed);
345348
346- // Update the component prior distribution.
347- model.second ->addSamples (samples, weights);
349+ // Update the component prior distribution.
350+ model.second ->addSamples (samples, weights);
348351
349- used.push_back (use);
350- uses.push_back (model.second ->participatesInModelSelection ());
352+ uses.push_back (model.second ->participatesInModelSelection ());
353+ if (!uses.back ()) {
354+ model.first .logWeight (MINUS_INF);
351355 }
352356 }
353357
354- TDouble10Vec n (m_Dimension, 0.0 );
355- for (const auto & weight : weights) {
356- add (maths_t::count (weight), n);
358+ if (failed) {
359+ LOG_ERROR (<< " Failed to compute log-likelihood" );
360+ LOG_ERROR (<< " samples = " << core::CContainerPrinter::print (samples));
361+ LOG_ERROR (<< " weights = " << core::CContainerPrinter::print (weights));
362+ return ;
363+ }
364+ if (isNonInformative) {
365+ return ;
357366 }
358367
359- if (!isNonInformative && maxLogLikelihood.count () > 0 ) {
360- LOG_TRACE (<< " logLikelihoods = " << core::CContainerPrinter::print (logLikelihoods));
368+ maxLogBayesFactor += m * MAXIMUM_LOG_BAYES_FACTOR;
361369
362- // The idea here is to limit the amount which extreme samples
363- // affect model selection, particularly early on in the model
364- // life-cycle.
365- double l = largest (n);
366- double minLogLikelihood =
367- maxLogLikelihood[0 ] -
368- l * std::min (maxModelPenalty (this ->numberSamples ()), 100.0 );
370+ LOG_TRACE (<< " BICs = " << core::CContainerPrinter::print (minusBics));
371+ LOG_TRACE (<< " max Bayes Factor = " << maxLogBayesFactor);
369372
370- TMaxAccumulator maxLogWeight;
371- for (std::size_t i = 0 ; i < logLikelihoods.size (); ++i) {
372- CModelWeight& weight = m_Models[i].first ;
373- if (!uses[i]) {
374- weight.logWeight (MINUS_INF);
375- } else if (used[i]) {
376- weight.addLogFactor (std::max (logLikelihoods[i], minLogLikelihood));
377- maxLogWeight.add (weight.logWeight ());
378- }
373+ double maxLogModelWeight{MINUS_INF + m * MAXIMUM_LOG_BAYES_FACTOR};
374+ double maxMinusBic{*std::max_element (minusBics.begin (), minusBics.end ())};
375+ for (std::size_t i = 0 ; i < m_Models.size (); ++i) {
376+ if (used[i] && uses[i]) {
377+ m_Models[i].first .addLogFactor (
378+ std::max (minusBics[i] / 2.0 , maxMinusBic / 2.0 - maxLogBayesFactor));
379+ maxLogModelWeight = std::max (maxLogModelWeight, m_Models[i].first .logWeight ());
379380 }
380- for (std:: size_t i = 0u ; i < m_Models. size (); ++i) {
381- if (!used[i] && uses[i] ) {
382- m_Models [i]. first . logWeight (maxLogWeight[ 0 ] + LOG_INITIAL_WEIGHT);
383- }
381+ }
382+ for (std:: size_t i = 0 ; i < m_Models. size (); ++i ) {
383+ if (!used [i] && uses[i]) {
384+ m_Models[i]. first . logWeight (maxLogModelWeight - m * MAXIMUM_LOG_BAYES_FACTOR);
384385 }
385386 }
386387
0 commit comments