Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions src/Microsoft.ML.AutoML.Interactive/NotebookMonitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ public class NotebookMonitor : IMonitor
public List<TrialResult> CompletedTrials { get; set; }
public DataFrame TrialData { get; set; }

public int ResourceUsageCheckInterval => 5000;

public NotebookMonitor(SweepablePipeline pipeline)
{
CompletedTrials = new List<TrialResult>();
Expand Down Expand Up @@ -86,9 +84,5 @@ public void SetUpdate(DisplayedValue valueToUpdate)
_valueToUpdate = valueToUpdate;
ThrottledUpdate();
}

public void ReportTrialResourceUsage(TrialSettings setting)
{
}
}
}
39 changes: 37 additions & 2 deletions src/Microsoft.ML.AutoML/API/AutoMLExperimentExtension.cs
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,53 @@ public static AutoMLExperiment SetPipeline(this AutoMLExperiment experiment, Swe
return experiment;
}

/// <summary>
/// Set <see cref="DefaultPerformanceMonitor"/> as <see cref="IPerformanceMonitor"/> for <see cref="AutoMLExperiment"/>.
/// </summary>
/// <param name="experiment"><see cref="AutoMLExperiment"/></param>
/// <param name="checkIntervalInMilliseconds">the interval in milliseconds for <see cref="DefaultPerformanceMonitor"/> to sample <see cref="TrialPerformanceMetrics"/></param>
/// <returns></returns>
public static AutoMLExperiment SetPerformanceMonitor(this AutoMLExperiment experiment, int checkIntervalInMilliseconds = 1000)
{
experiment.SetPerformanceMonitor((service) =>
{
var channel = service.GetService<IChannel>();

return new DefaultPerformanceMonitor(channel, checkIntervalInMilliseconds);
var settings = service.GetRequiredService<AutoMLExperiment.AutoMLExperimentSettings>();
return new DefaultPerformanceMonitor(settings, channel, checkIntervalInMilliseconds);
});

return experiment;
}

/// <summary>
/// Set a custom performance monitor as <see cref="IPerformanceMonitor"/> for <see cref="AutoMLExperiment"/>.
/// </summary>
/// <typeparam name="TPerformanceMonitor"></typeparam>
/// <param name="experiment"><see cref="AutoMLExperiment"/></param>
/// <param name="factory"></param>
/// <returns></returns>
public static AutoMLExperiment SetPerformanceMonitor<TPerformanceMonitor>(this AutoMLExperiment experiment, Func<IServiceProvider, TPerformanceMonitor> factory)
where TPerformanceMonitor : class, IPerformanceMonitor
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can inject your own IPerformanceMonitor here.

{
experiment.ServiceCollection.AddTransient<IPerformanceMonitor>(factory);

return experiment;
}

/// <summary>
/// Set a custom performance monitor as <see cref="IPerformanceMonitor"/> for <see cref="AutoMLExperiment"/>.
/// </summary>
/// <typeparam name="TPerformanceMonitor"></typeparam>
/// <param name="experiment"><see cref="AutoMLExperiment"/></param>
/// <returns></returns>
public static AutoMLExperiment SetPerformanceMonitor<TPerformanceMonitor>(this AutoMLExperiment experiment)
where TPerformanceMonitor : class, IPerformanceMonitor
{
experiment.ServiceCollection.AddTransient<IPerformanceMonitor, TPerformanceMonitor>();

return experiment;
}

/// <summary>
/// Set <see cref="SmacTuner"/> as tuner for hyper-parameter optimization. The performance of smac is in a large extend determined
/// by <paramref name="numberOfTrees"/>, <paramref name="nMinForSpit"/> and <paramref name="splitRatio"/>, which are used to fit smac's inner
Expand Down
44 changes: 1 addition & 43 deletions src/Microsoft.ML.AutoML/AutoMLExperiment/AutoMLExperiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -194,22 +194,6 @@ public AutoMLExperiment SetTuner<TTuner>()
return this;
}

internal AutoMLExperiment SetPerformanceMonitor<TPerformanceMonitor>()
where TPerformanceMonitor : class, IPerformanceMonitor
{
_serviceCollection.AddTransient<IPerformanceMonitor, TPerformanceMonitor>();

return this;
}

internal AutoMLExperiment SetPerformanceMonitor<TPerformanceMonitor>(Func<IServiceProvider, TPerformanceMonitor> factory)
where TPerformanceMonitor : class, IPerformanceMonitor
{
_serviceCollection.AddTransient<IPerformanceMonitor>(factory);

return this;
}

/// <summary>
/// Run experiment and return the best trial result synchronizely.
/// </summary>
Expand Down Expand Up @@ -257,29 +241,14 @@ public async Task<TrialResult> RunAsync(CancellationToken ct = default)
{
TrialId = trialNum++,
Parameter = Parameter.CreateNestedParameter(),
CancellationTokenSource = null,
PerformanceMetrics = new TrialPerformanceMetrics(),
};
var parameter = tuner.Propose(trialSettings);
trialSettings.Parameter = parameter;

using (var trialCancellationTokenSource = new CancellationTokenSource())
{
trialSettings.CancellationTokenSource = trialCancellationTokenSource;
monitor?.ReportRunningTrial(trialSettings);

System.Timers.Timer resourceUsageTimer = null;
if ((monitor != null) && (monitor?.ResourceUsageCheckInterval > 0))
{
resourceUsageTimer = new System.Timers.Timer(monitor.ResourceUsageCheckInterval);
resourceUsageTimer.Elapsed += (o, e) =>
{
monitor?.ReportTrialResourceUsage(trialSettings);
};
resourceUsageTimer.AutoReset = true;
resourceUsageTimer.Enabled = false;
}

void handler(object o, EventArgs e)
{
// only force-canceling running trials when there's completed trials.
Expand All @@ -296,21 +265,11 @@ void handler(object o, EventArgs e)

performanceMonitor.PerformanceMetricsUpdated += (o, metrics) =>
{
trialSettings.PerformanceMetrics = metrics;

if (_settings.MaximumMemoryUsageInMegaByte is double d && metrics.PeakMemoryUsage > d && !trialCancellationTokenSource.IsCancellationRequested)
{
logger.Trace($"cancel current trial {trialSettings.TrialId} because it uses {metrics.PeakMemoryUsage} mb memory and the maximum memory usage is {d}");
trialCancellationTokenSource.Cancel();

GC.AddMemoryPressure(Convert.ToInt64(metrics.PeakMemoryUsage) * 1024 * 1024);
GC.Collect();
}
performanceMonitor.OnPerformanceMetricsUpdatedHandler(trialSettings, metrics, trialCancellationTokenSource);
};

var trialTask = runner.RunAsync(trialSettings, trialCancellationTokenSource.Token);
performanceMonitor.Start();
resourceUsageTimer?.Start();
logger.Trace($"trial setting - {JsonSerializer.Serialize(trialSettings)}");
var trialResult = await trialTask;

Expand Down Expand Up @@ -365,7 +324,6 @@ void handler(object o, EventArgs e)
finally
{
aggregateTrainingStopManager.OnStopTraining -= handler;
resourceUsageTimer?.Stop();
}
}
}
Expand Down
16 changes: 4 additions & 12 deletions src/Microsoft.ML.AutoML/AutoMLExperiment/IMonitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,13 @@ namespace Microsoft.ML.AutoML
/// </summary>
public interface IMonitor
{
/// <summary>
/// Interval in milliseconds to report resource usage.
/// </summary>
int ResourceUsageCheckInterval { get; }
void ReportCompletedTrial(TrialResult result);

void ReportBestTrial(TrialResult result);

void ReportFailTrial(TrialSettings settings, Exception exception = null);

void ReportRunningTrial(TrialSettings settings);
void ReportTrialResourceUsage(TrialSettings settings);
}

/// <summary>
Expand All @@ -32,14 +30,12 @@ internal class MLContextMonitor : IMonitor
private readonly IChannel _logger;
private readonly List<TrialResult> _completedTrials;
private readonly SweepablePipeline _pipeline;
public int ResourceUsageCheckInterval { get; private set; }

public MLContextMonitor(IChannel logger, SweepablePipeline pipeline, int resourceUsageCheckInterval = 5000)
public MLContextMonitor(IChannel logger, SweepablePipeline pipeline)
{
_logger = logger;
_completedTrials = new List<TrialResult>();
_pipeline = pipeline;
ResourceUsageCheckInterval = resourceUsageCheckInterval;
}

public virtual void ReportBestTrial(TrialResult result)
Expand All @@ -62,10 +58,6 @@ public virtual void ReportRunningTrial(TrialSettings setting)
{
_logger.Info($"Update Running Trial - Id: {setting.TrialId}");
}

public void ReportTrialResourceUsage(TrialSettings setting)
{
}
}

internal class TrialResultMonitor<TMetrics> : MLContextMonitor
Expand Down
28 changes: 25 additions & 3 deletions src/Microsoft.ML.AutoML/AutoMLExperiment/IPerformanceMonitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Timers;
using Microsoft.ML.Runtime;
using Timer = System.Timers.Timer;

namespace Microsoft.ML.AutoML
{
internal interface IPerformanceMonitor : IDisposable
public interface IPerformanceMonitor : IDisposable
{
void Start();

Expand All @@ -24,20 +26,28 @@ internal interface IPerformanceMonitor : IDisposable

double? GetPeakCpuUsage();

/// <summary>
/// The handler function every time <see cref="PerformanceMetricsUpdated"/> get fired.
/// </summary>
void OnPerformanceMetricsUpdatedHandler(TrialSettings trialSettings, TrialPerformanceMetrics metrics, CancellationTokenSource trialCancellationTokenSource);


public event EventHandler<TrialPerformanceMetrics> PerformanceMetricsUpdated;
}

internal class DefaultPerformanceMonitor : IPerformanceMonitor
public class DefaultPerformanceMonitor : IPerformanceMonitor
{
private readonly IChannel _logger;
private readonly AutoMLExperiment.AutoMLExperimentSettings _settings;
private Timer _timer;
private double? _peakCpuUsage;
private double? _peakMemoryUsage;
private readonly int _checkIntervalInMilliseconds;
private TimeSpan _totalCpuProcessorTime;

public DefaultPerformanceMonitor(IChannel logger, int checkIntervalInMilliseconds)
public DefaultPerformanceMonitor(AutoMLExperiment.AutoMLExperimentSettings settings, IChannel logger, int checkIntervalInMilliseconds)
{
_settings = settings;
_logger = logger;
_checkIntervalInMilliseconds = checkIntervalInMilliseconds;
}
Expand Down Expand Up @@ -122,5 +132,17 @@ private void SampleCpuAndMemoryUsage()
PerformanceMetricsUpdated?.Invoke(this, metrics);
}
}

public virtual void OnPerformanceMetricsUpdatedHandler(TrialSettings trialSettings, TrialPerformanceMetrics metrics, CancellationTokenSource trialCancellationTokenSource)
{
if (_settings.MaximumMemoryUsageInMegaByte is double d && metrics.PeakMemoryUsage > d && !trialCancellationTokenSource.IsCancellationRequested)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can bring ReportTrialResourceUsage here. It will be called every time a new PerformanceMetric get sampled

{
_logger.Trace($"cancel current trial {trialSettings.TrialId} because it uses {metrics.PeakMemoryUsage} mb memory and the maximum memory usage is {d}");
trialCancellationTokenSource.Cancel();

GC.AddMemoryPressure(Convert.ToInt64(metrics.PeakMemoryUsage) * 1024 * 1024);
GC.Collect();
}
}
}
}
9 changes: 0 additions & 9 deletions src/Microsoft.ML.AutoML/AutoMLExperiment/TrialSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,5 @@ public class TrialSettings
/// Parameters for the pipeline used in this trial
/// </summary>
public Parameter Parameter { get; set; }
/// <summary>
/// Cancellation token source to have the ability to cancel the trial
/// </summary>
[JsonIgnore]
public CancellationTokenSource CancellationTokenSource { get; set; }
/// <summary>
/// Performance metrics of the trial
/// </summary>
public TrialPerformanceMetrics PerformanceMetrics { get; internal set; }
}
}
7 changes: 5 additions & 2 deletions test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ public async Task AutoMLExperiment_cancel_trial_when_exceeds_memory_limit_Async(
return new DummyTrialRunner(settings, 5, channel);
})
.SetTuner<RandomSearchTuner>()
.SetMaximumMemoryUsageInMegaByte(0.01)
.SetPerformanceMonitor<DummyPeformanceMonitor>();
.SetMaximumMemoryUsageInMegaByte(0.01);

var runExperimentAction = async () => await experiment.RunAsync();
await runExperimentAction.Should().ThrowExactlyAsync<TimeoutException>();
Expand Down Expand Up @@ -423,6 +422,10 @@ public void Dispose()
return 1000;
}

public void OnPerformanceMetricsUpdatedHandler(TrialSettings trialSettings, TrialPerformanceMetrics metrics, CancellationTokenSource trialCancellationTokenSource)
{
}

public void Start()
{
if (_timer == null)
Expand Down