From b07828c5a532f40673bdb99ca9bd35cdf2c2e226 Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Wed, 5 Jul 2023 11:19:08 -0700 Subject: [PATCH] avoid empty dataset --- .../AutoMLExperiment/IDatasetManager.cs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.AutoML/AutoMLExperiment/IDatasetManager.cs b/src/Microsoft.ML.AutoML/AutoMLExperiment/IDatasetManager.cs index fd18fd83fd..e2f373a530 100644 --- a/src/Microsoft.ML.AutoML/AutoMLExperiment/IDatasetManager.cs +++ b/src/Microsoft.ML.AutoML/AutoMLExperiment/IDatasetManager.cs @@ -78,7 +78,15 @@ public IDataView LoadTrainDataset(MLContext context, TrialSettings? settings) var subSampleRatio = parameter.ContainsKey(_subSamplingKey) ? parameter[_subSamplingKey].AsType() : 1; if (subSampleRatio < 1.0) { - var subSampledTrainDataset = context.Data.TakeRows(_trainDataset, (long)(subSampleRatio * _rowCount)); + var count = (long)(subSampleRatio * _rowCount); + if (count <= 10) + { + // fix issue https://github.com/dotnet/machinelearning-modelbuilder/issues/2734 + // take at least 10 rows to avoid empty dataset + count = 10; + } + + var subSampledTrainDataset = context.Data.TakeRows(_trainDataset, count); return subSampledTrainDataset; } }