Skip to content

Commit f93fa09

Browse files
michaelgsharpeerhardtjustinormont
authored
AutoML aggregate exception (#5631)
* added check for aggregate exception * Update src/Microsoft.ML.AutoML/Experiment/Experiment.cs Co-authored-by: Eric Erhardt <[email protected]> * Update src/Microsoft.ML.AutoML/Experiment/Experiment.cs Co-authored-by: Eric Erhardt <[email protected]> * pulled message out to private variable so its not duplicated * Update src/Microsoft.ML.AutoML/Experiment/Experiment.cs Co-authored-by: Justin Ormont <[email protected]> Co-authored-by: Eric Erhardt <[email protected]> Co-authored-by: Justin Ormont <[email protected]>
1 parent cd7c46e commit f93fa09

File tree

1 file changed

+20
-2
lines changed

1 file changed

+20
-2
lines changed

src/Microsoft.ML.AutoML/Experiment/Experiment.cs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ internal class Experiment<TRunDetail, TMetrics> where TRunDetail : RunDetail
2727
private readonly IRunner<TRunDetail> _runner;
2828
private readonly IList<SuggestedPipelineRunDetail> _history;
2929
private readonly IChannel _logger;
30+
31+
private readonly string _operationCancelledMessage = "OperationCanceledException has been caught after maximum experiment time" +
32+
"was reached, and the running MLContext was stopped. Details: {0}";
33+
3034
private Timer _maxExperimentTimeTimer;
3135
private Timer _mainContextCanceledTimer;
3236
private bool _experimentTimerExpired;
@@ -192,10 +196,24 @@ public IList<TRunDetail> Execute()
192196
// This exception is thrown when the IHost/MLContext of the trainer is canceled due to
193197
// reaching maximum experiment time. Simply catch this exception and return finished
194198
// iteration results.
195-
_logger.Warning("OperationCanceledException has been caught after maximum experiment time" +
196-
"was reached, and the running MLContext was stopped. Details: {0}", e.Message);
199+
_logger.Warning(_operationCancelledMessage, e.Message);
197200
return iterationResults;
198201
}
202+
catch (AggregateException e)
203+
{
204+
// This exception is thrown when the IHost/MLContext of the trainer is canceled due to
205+
// reaching maximum experiment time. Simply catch this exception and return finished
206+
// iteration results. For some trainers, like FastTree, because training is done in parallel
207+
// in can throw multiple OperationCancelledExceptions. This causes them to be returned as an
208+
// AggregateException and misses the first catch block. This is to handle that case.
209+
if (e.InnerExceptions.All(exception => exception is OperationCanceledException))
210+
{
211+
_logger.Warning(_operationCancelledMessage, e.Message);
212+
return iterationResults;
213+
}
214+
215+
throw;
216+
}
199217
} while (_history.Count < _experimentSettings.MaxModels &&
200218
!_experimentSettings.CancellationToken.IsCancellationRequested &&
201219
!_experimentTimerExpired);

0 commit comments

Comments
 (0)