From f69d659c92d0598acf5e420f701d3da6d83526c8 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Wed, 9 May 2018 15:58:58 -0700 Subject: [PATCH 01/16] in memory loader --- .../DataViewConstructionUtils.cs | 2 +- .../InternalSchemaDefinition.cs | 2 +- src/Microsoft.ML/CSharpApi.cs | 28 ++++++++ src/Microsoft.ML/MemoryCollection.cs | 66 +++++++++++++++++++ .../Runtime/EntryPoints/DataViewReference.cs | 34 ++++++++++ 5 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 src/Microsoft.ML/MemoryCollection.cs create mode 100644 src/Microsoft.ML/Runtime/EntryPoints/DataViewReference.cs diff --git a/src/Microsoft.ML.Api/DataViewConstructionUtils.cs b/src/Microsoft.ML.Api/DataViewConstructionUtils.cs index 03e7408e71..b7fe86cb11 100644 --- a/src/Microsoft.ML.Api/DataViewConstructionUtils.cs +++ b/src/Microsoft.ML.Api/DataViewConstructionUtils.cs @@ -14,7 +14,7 @@ namespace Microsoft.ML.Runtime.Api /// /// A helper class to create data views based on the user-provided types. /// - internal static class DataViewConstructionUtils + public static class DataViewConstructionUtils { public static IDataView CreateFromList(IHostEnvironment env, IList data, SchemaDefinition schemaDefinition = null) diff --git a/src/Microsoft.ML.Api/InternalSchemaDefinition.cs b/src/Microsoft.ML.Api/InternalSchemaDefinition.cs index 2b0f056214..9a18657a0f 100644 --- a/src/Microsoft.ML.Api/InternalSchemaDefinition.cs +++ b/src/Microsoft.ML.Api/InternalSchemaDefinition.cs @@ -16,7 +16,7 @@ namespace Microsoft.ML.Runtime.Api /// /// An internal class that holds the (already validated) mapping between a custom type and an IDataView schema. /// - internal sealed class InternalSchemaDefinition + public sealed class InternalSchemaDefinition { public readonly Column[] Columns; diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs index 46002c5abf..2fb94c498b 100644 --- a/src/Microsoft.ML/CSharpApi.cs +++ b/src/Microsoft.ML/CSharpApi.cs @@ -53,11 +53,22 @@ public Microsoft.ML.Data.TextLoader.Output Add(Microsoft.ML.Data.TextLoader inpu return output; } + public Microsoft.ML.Data.DataViewReference.Output Add(Microsoft.ML.Data.DataViewReference input) + { + var output = new Microsoft.ML.Data.DataViewReference.Output(); + Add(input, output); + return output; + } + public void Add(Microsoft.ML.Data.TextLoader input, Microsoft.ML.Data.TextLoader.Output output) { _jsonNodes.Add(Serialize("Data.TextLoader", input, output)); } + public void Add(Microsoft.ML.Data.DataViewReference input, Microsoft.ML.Data.DataViewReference.Output output) + { + _jsonNodes.Add(Serialize("Data.DataViewReference", input, output)); + } public Microsoft.ML.Models.AnomalyDetectionEvaluator.Output Add(Microsoft.ML.Models.AnomalyDetectionEvaluator input) { var output = new Microsoft.ML.Models.AnomalyDetectionEvaluator.Output(); @@ -1311,6 +1322,23 @@ public sealed partial class TextLoader public string CustomSchema { get; set; } + public sealed class Output + { + /// + /// The resulting data view + /// + public Var Data { get; set; } = new Var(); + + } + } + + public sealed partial class DataViewReference + { + /// + /// Location of the input file + /// + public Var Data { get; set; } = new Var(); + public sealed class Output { /// diff --git a/src/Microsoft.ML/MemoryCollection.cs b/src/Microsoft.ML/MemoryCollection.cs new file mode 100644 index 0000000000..c0791481aa --- /dev/null +++ b/src/Microsoft.ML/MemoryCollection.cs @@ -0,0 +1,66 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Api; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.EntryPoints; + +namespace Microsoft.ML +{ + public class MemoryCollection : ILearningPipelineLoader + where TInput : class + { + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) + { + Contracts.Assert(previousStep == null); + _dataViewEntryPoint = new Data.DataViewReference(); + var importOutput = experiment.Add(_dataViewEntryPoint); + return new MemoryCollectionPipelineStep(importOutput.Data); + } + + private readonly IList _listCollection; + private readonly IEnumerable _enumerableCollection; + + private Data.DataViewReference _dataViewEntryPoint; + private IDataView _dataView; + + public MemoryCollection(IList collection) + { + //need validation at some point + _listCollection = collection; + } + + public MemoryCollection(IEnumerable collection) + { + //need validation at some point + _enumerableCollection = collection; + } + + public void SetInput(IHostEnvironment env, Experiment experiment) + { + if (_listCollection!=null) + { + _dataView = DataViewConstructionUtils.CreateFromList(env, _listCollection); + } + if (_enumerableCollection!=null) + { + _dataView = DataViewConstructionUtils.CreateFromEnumerable(env, _listCollection); + } + env.CheckValue(_dataView, nameof(_dataView)); + experiment.SetInput(_dataViewEntryPoint.Data, _dataView); + } + + private class MemoryCollectionPipelineStep : ILearningPipelineDataStep + { + public MemoryCollectionPipelineStep(Var data) + { + Data = data; + Model = null; + } + + public Var Data { get; } + public Var Model { get; } + } + } +} diff --git a/src/Microsoft.ML/Runtime/EntryPoints/DataViewReference.cs b/src/Microsoft.ML/Runtime/EntryPoints/DataViewReference.cs new file mode 100644 index 0000000000..c40d1da111 --- /dev/null +++ b/src/Microsoft.ML/Runtime/EntryPoints/DataViewReference.cs @@ -0,0 +1,34 @@ +using Microsoft.ML.Data; +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.EntryPoints; + +[assembly: LoadableClass(typeof(void), typeof(InMemoryDataView), null, typeof(SignatureEntryPointModule), "InMemoryDataView")] +namespace Microsoft.ML.Runtime.EntryPoints +{ + public class InMemoryDataView + { + public sealed class Input + { + [Argument(ArgumentType.Required, ShortName = "data", HelpText = "Pointer to IDataView in memory", SortOrder = 1)] + public IDataView Data; + } + + public sealed class Output + { + [TlcModule.Output(Desc = "The resulting data view", SortOrder = 1)] + public IDataView Data; + } + + [TlcModule.EntryPoint(Name = "Data.DataViewReference", Desc = "Pass dataview from memory to experiment")] + public static Output ImportData(IHostEnvironment env, Input input) + { + Contracts.CheckValue(env, nameof(env)); + var host = env.Register("DataViewReference"); + env.CheckValue(input, nameof(input)); + EntryPointUtils.CheckInputArgs(host, input); + return new Output { Data = input.Data }; + } + } +} From 55b6e466348566b1be91e1a045d89bddbb667331 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Wed, 9 May 2018 16:03:05 -0700 Subject: [PATCH 02/16] add test file for memory collection --- .../MemoryCollectionTests.cs | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 test/Microsoft.ML.Tests/MemoryCollectionTests.cs diff --git a/test/Microsoft.ML.Tests/MemoryCollectionTests.cs b/test/Microsoft.ML.Tests/MemoryCollectionTests.cs new file mode 100644 index 0000000000..10ccadfe18 --- /dev/null +++ b/test/Microsoft.ML.Tests/MemoryCollectionTests.cs @@ -0,0 +1,176 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML; +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Api; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.TestFramework; +using Microsoft.ML.Trainers; +using Microsoft.ML.Transforms; +using System; +using System.Collections.Generic; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.EntryPoints.Tests +{ + public class MemoryCollectionTests : BaseTestClass + { + public MemoryCollectionTests(ITestOutputHelper output) + : base(output) + { + + } + + [Fact] + public void ConstructorDoesntThrow() + { + Assert.NotNull(new MemoryCollection(new List())); + Assert.NotNull(new MemoryCollection(new List() { new Input { Number1 = 1, String1 = "1" } })); + Assert.NotNull(new MemoryCollection(new Input[0])); + Assert.NotNull(new MemoryCollection(new Input[1] { new Input { Number1 = 1, String1 = "1" } })); + Assert.NotNull(new MemoryCollection(null)); + } + + [Fact] + public void CanSuccessfullyApplyATransform() + { + var collection = new MemoryCollection(new List() { new Input { Number1 = 1, String1 = "1" } }); + + using (var environment = new TlcEnvironment()) + { + Experiment experiment = environment.CreateExperiment(); + ILearningPipelineDataStep output = collection.ApplyStep(null, experiment) as ILearningPipelineDataStep; + + Assert.NotNull(output.Data); + Assert.NotNull(output.Data.VarName); + Assert.Null(output.Model); + } + } + + [Fact] + public void CanSuccessfullyEnumerated() + { + var collection = new MemoryCollection(new List() { + new Input { Number1 = 1, String1 = "1" }, + new Input { Number1 = 2, String1 = "2" }, + new Input { Number1 = 3, String1 = "3" } + }); + + using (var environment = new TlcEnvironment()) + { + Experiment experiment = environment.CreateExperiment(); + ILearningPipelineDataStep output = collection.ApplyStep(null, experiment) as ILearningPipelineDataStep; + + experiment.Compile(); + collection.SetInput(environment, experiment); + experiment.Run(); + + IDataView data = experiment.GetOutput(output.Data); + Assert.NotNull(data); + + using (var cursor = data.GetRowCursor((a => true))) + { + var IDGetter = cursor.GetGetter(0); + var TextGetter = cursor.GetGetter(1); + + Assert.True(cursor.MoveNext()); + + float ID = 0; + IDGetter(ref ID); + Assert.Equal(1, ID); + + DvText Text = new DvText(); + TextGetter(ref Text); + Assert.Equal("1", Text.ToString()); + + Assert.True(cursor.MoveNext()); + + ID = 0; + IDGetter(ref ID); + Assert.Equal(2, ID); + + Text = new DvText(); + TextGetter(ref Text); + Assert.Equal("2", Text.ToString()); + + Assert.True(cursor.MoveNext()); + + ID = 0; + IDGetter(ref ID); + Assert.Equal(3, ID); + + Text = new DvText(); + TextGetter(ref Text); + Assert.Equal("3", Text.ToString()); + + Assert.False(cursor.MoveNext()); + } + } + } + + [Fact] + public void CanTrain() + { + var pipeline = new LearningPipeline(); + var collection = new MemoryCollection(new List() { + new IrisData { SepalLength = 1f, SepalWidth = 1f ,PetalLength=0.3f, PetalWidth=5.1f, Label=1}, + new IrisData { SepalLength = 1f, SepalWidth = 1f ,PetalLength=0.3f, PetalWidth=5.1f, Label=1}, + new IrisData { SepalLength = 1.2f, SepalWidth = 0.5f ,PetalLength=0.3f, PetalWidth=5.1f, Label=0} + }); + + pipeline.Add(collection); + + pipeline.Add(new ColumnConcatenator(outputColumn: "Features", + "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); + + pipeline.Add(new StochasticDualCoordinateAscentClassifier()); + PredictionModel model = pipeline.Train(); + + IrisPrediction prediction = model.Predict(new IrisData() + { + SepalLength = 3.3f, + SepalWidth = 1.6f, + PetalLength = 0.2f, + PetalWidth = 5.1f, + }); + + } + + public class Input + { + [Column("0")] + public float Number1; + + [Column("1")] + public string String1; + } + + public class IrisData + { + [Column("0")] + public float Label; + + [Column("1")] + public float SepalLength; + + [Column("2")] + public float SepalWidth; + + [Column("3")] + public float PetalLength; + + [Column("4")] + public float PetalWidth; + } + + public class IrisPrediction + { + [ColumnName("Score")] + public float[] PredictedLabels; + } + + } +} From b166f055f24752c1db616ae4ac7c4eec135d3144 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Wed, 9 May 2018 16:13:14 -0700 Subject: [PATCH 03/16] even in afterlife EntryPointCatalog will chase me down. --- .../Common/EntryPoints/core_ep-list.tsv | 1 + .../Common/EntryPoints/core_manifest.json | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/ZBaselines/Common/EntryPoints/core_ep-list.tsv b/ZBaselines/Common/EntryPoints/core_ep-list.tsv index 568a6066f9..4ae55496c8 100644 --- a/ZBaselines/Common/EntryPoints/core_ep-list.tsv +++ b/ZBaselines/Common/EntryPoints/core_ep-list.tsv @@ -1,3 +1,4 @@ +Data.DataViewReference Pass dataview from memory to experiment Microsoft.ML.Runtime.EntryPoints.InMemoryDataView ImportData Microsoft.ML.Runtime.EntryPoints.InMemoryDataView+Input Microsoft.ML.Runtime.EntryPoints.InMemoryDataView+Output Data.IDataViewArrayConverter Create and array variable Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewOutput Data.PredictorModelArrayConverter Create and array variable Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelOutput Data.TextLoader Import a dataset from a text file Microsoft.ML.Runtime.EntryPoints.ImportTextData ImportText Microsoft.ML.Runtime.EntryPoints.ImportTextData+Input Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output diff --git a/ZBaselines/Common/EntryPoints/core_manifest.json b/ZBaselines/Common/EntryPoints/core_manifest.json index a3778a7f7f..fac2e1481d 100644 --- a/ZBaselines/Common/EntryPoints/core_manifest.json +++ b/ZBaselines/Common/EntryPoints/core_manifest.json @@ -1,5 +1,31 @@ { "EntryPoints": [ + { + "Name": "Data.DataViewReference", + "Desc": "Pass dataview from memory to experiment", + "FriendlyName": null, + "ShortName": null, + "Inputs": [ + { + "Name": "Data", + "Type": "DataView", + "Desc": "Pointer to IDataView in memory", + "Aliases": [ + "data" + ], + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + } + ], + "Outputs": [ + { + "Name": "Data", + "Type": "DataView", + "Desc": "The resulting data view" + } + ] + }, { "Name": "Data.IDataViewArrayConverter", "Desc": "Create and array variable", From a1761b1393c2a2db3ede3bd163baebabccbaf058 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Wed, 9 May 2018 16:36:28 -0700 Subject: [PATCH 04/16] Address some comments. --- .../DataViewConstructionUtils.cs | 2 +- .../InternalSchemaDefinition.cs | 2 +- src/Microsoft.ML/MemoryCollection.cs | 23 +++++++++---------- ...taViewReference.cs => InMemoryDataView.cs} | 5 +++- 4 files changed, 17 insertions(+), 15 deletions(-) rename src/Microsoft.ML/Runtime/EntryPoints/{DataViewReference.cs => InMemoryDataView.cs} (85%) diff --git a/src/Microsoft.ML.Api/DataViewConstructionUtils.cs b/src/Microsoft.ML.Api/DataViewConstructionUtils.cs index b7fe86cb11..03e7408e71 100644 --- a/src/Microsoft.ML.Api/DataViewConstructionUtils.cs +++ b/src/Microsoft.ML.Api/DataViewConstructionUtils.cs @@ -14,7 +14,7 @@ namespace Microsoft.ML.Runtime.Api /// /// A helper class to create data views based on the user-provided types. /// - public static class DataViewConstructionUtils + internal static class DataViewConstructionUtils { public static IDataView CreateFromList(IHostEnvironment env, IList data, SchemaDefinition schemaDefinition = null) diff --git a/src/Microsoft.ML.Api/InternalSchemaDefinition.cs b/src/Microsoft.ML.Api/InternalSchemaDefinition.cs index 9a18657a0f..2b0f056214 100644 --- a/src/Microsoft.ML.Api/InternalSchemaDefinition.cs +++ b/src/Microsoft.ML.Api/InternalSchemaDefinition.cs @@ -16,7 +16,7 @@ namespace Microsoft.ML.Runtime.Api /// /// An internal class that holds the (already validated) mapping between a custom type and an IDataView schema. /// - public sealed class InternalSchemaDefinition + internal sealed class InternalSchemaDefinition { public readonly Column[] Columns; diff --git a/src/Microsoft.ML/MemoryCollection.cs b/src/Microsoft.ML/MemoryCollection.cs index c0791481aa..eb736665f7 100644 --- a/src/Microsoft.ML/MemoryCollection.cs +++ b/src/Microsoft.ML/MemoryCollection.cs @@ -1,10 +1,13 @@ -using System; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + using System.Collections.Generic; -using System.Text; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.EntryPoints; +using Microsoft.ML.Runtime.Internal.Utilities; namespace Microsoft.ML { @@ -27,26 +30,22 @@ public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Exper public MemoryCollection(IList collection) { - //need validation at some point + Contracts.CheckParamValue(Utils.Size(collection) > 0, collection, nameof(collection), "Must be non-empty"); _listCollection = collection; } public MemoryCollection(IEnumerable collection) { - //need validation at some point + Contracts.CheckParamValue(collection != null, collection, nameof(collection), "Must be non-null"); _enumerableCollection = collection; } public void SetInput(IHostEnvironment env, Experiment experiment) { - if (_listCollection!=null) - { - _dataView = DataViewConstructionUtils.CreateFromList(env, _listCollection); - } - if (_enumerableCollection!=null) - { - _dataView = DataViewConstructionUtils.CreateFromEnumerable(env, _listCollection); - } + if (_listCollection != null) + _dataView = ComponentCreation.CreateDataView(env, _listCollection); + if (_enumerableCollection != null) + _dataView = ComponentCreation.CreateStreamingDataView(env, _listCollection); env.CheckValue(_dataView, nameof(_dataView)); experiment.SetInput(_dataViewEntryPoint.Data, _dataView); } diff --git a/src/Microsoft.ML/Runtime/EntryPoints/DataViewReference.cs b/src/Microsoft.ML/Runtime/EntryPoints/InMemoryDataView.cs similarity index 85% rename from src/Microsoft.ML/Runtime/EntryPoints/DataViewReference.cs rename to src/Microsoft.ML/Runtime/EntryPoints/InMemoryDataView.cs index c40d1da111..18cd8da812 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/DataViewReference.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/InMemoryDataView.cs @@ -1,4 +1,7 @@ -using Microsoft.ML.Data; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; From 12d1b9e69de99dbf066375f718e7deaf11cd0219 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Wed, 9 May 2018 16:36:53 -0700 Subject: [PATCH 05/16] update tests --- .../MemoryCollectionTests.cs | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/test/Microsoft.ML.Tests/MemoryCollectionTests.cs b/test/Microsoft.ML.Tests/MemoryCollectionTests.cs index 10ccadfe18..8258f5a08f 100644 --- a/test/Microsoft.ML.Tests/MemoryCollectionTests.cs +++ b/test/Microsoft.ML.Tests/MemoryCollectionTests.cs @@ -2,14 +2,12 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using Microsoft.ML; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; -using System; using System.Collections.Generic; using Xunit; using Xunit.Abstractions; @@ -25,20 +23,47 @@ public MemoryCollectionTests(ITestOutputHelper output) } [Fact] - public void ConstructorDoesntThrow() + public void CheckConstructor() { - Assert.NotNull(new MemoryCollection(new List())); Assert.NotNull(new MemoryCollection(new List() { new Input { Number1 = 1, String1 = "1" } })); - Assert.NotNull(new MemoryCollection(new Input[0])); Assert.NotNull(new MemoryCollection(new Input[1] { new Input { Number1 = 1, String1 = "1" } })); - Assert.NotNull(new MemoryCollection(null)); + bool thrown = false; + try + { + new MemoryCollection(null); + } + catch + { + thrown = true; + } + Assert.True(thrown); + thrown = false; + try + { + new MemoryCollection(new List()); + } + catch + { + thrown = true; + } + Assert.True(thrown); + + thrown = false; + try + { + new MemoryCollection(new Input[0]); + } + catch + { + thrown = true; + } + Assert.True(thrown); } [Fact] public void CanSuccessfullyApplyATransform() { var collection = new MemoryCollection(new List() { new Input { Number1 = 1, String1 = "1" } }); - using (var environment = new TlcEnvironment()) { Experiment experiment = environment.CreateExperiment(); From ebcf44844d60776754a93a216d9c96e0b74db8a6 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Wed, 9 May 2018 17:21:39 -0700 Subject: [PATCH 06/16] address more comments. --- src/Microsoft.ML/MemoryCollection.cs | 42 ++++++++++++++++--- src/Microsoft.ML/TextLoader.cs | 3 +- .../MemoryCollectionTests.cs | 29 +++++-------- 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/src/Microsoft.ML/MemoryCollection.cs b/src/Microsoft.ML/MemoryCollection.cs index eb736665f7..2558449812 100644 --- a/src/Microsoft.ML/MemoryCollection.cs +++ b/src/Microsoft.ML/MemoryCollection.cs @@ -11,7 +11,30 @@ namespace Microsoft.ML { - public class MemoryCollection : ILearningPipelineLoader + public class MemoryCollection + { + /// + /// Creates memory collection loader. + /// + public static MemoryCollectionLoader Create(IList data) where T:class + { + return new MemoryCollectionLoader(data); + } + + /// + /// Creates memory collection loader. + /// + public static MemoryCollectionLoader Create(IEnumerable data) where T : class + { + return new MemoryCollectionLoader(data); + } + } + + /// + /// Allows you to convert your memory collection into IDataview. + /// + /// + public class MemoryCollectionLoader : ILearningPipelineLoader where TInput : class { public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) @@ -28,16 +51,24 @@ public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Exper private Data.DataViewReference _dataViewEntryPoint; private IDataView _dataView; - public MemoryCollection(IList collection) + /// + /// Creates IDataview on top of collection + /// + /// + public MemoryCollectionLoader(IList collection) { Contracts.CheckParamValue(Utils.Size(collection) > 0, collection, nameof(collection), "Must be non-empty"); _listCollection = collection; } - public MemoryCollection(IEnumerable collection) + /// + /// Creates IDataview on top of collection + /// + public MemoryCollectionLoader(IEnumerable collection) { - Contracts.CheckParamValue(collection != null, collection, nameof(collection), "Must be non-null"); + Contracts.CheckValue(collection,nameof(collection), "Must be non-null"); _enumerableCollection = collection; + } public void SetInput(IHostEnvironment env, Experiment experiment) @@ -55,11 +86,10 @@ private class MemoryCollectionPipelineStep : ILearningPipelineDataStep public MemoryCollectionPipelineStep(Var data) { Data = data; - Model = null; } public Var Data { get; } - public Var Model { get; } + public Var Model => null; } } } diff --git a/src/Microsoft.ML/TextLoader.cs b/src/Microsoft.ML/TextLoader.cs index f63a14611b..cdb9df8a36 100644 --- a/src/Microsoft.ML/TextLoader.cs +++ b/src/Microsoft.ML/TextLoader.cs @@ -115,11 +115,10 @@ private class TextLoaderPipelineStep : ILearningPipelineDataStep public TextLoaderPipelineStep(Var data) { Data = data; - Model = null; } public Var Data { get; } - public Var Model { get; } + public Var Model => null; } } } diff --git a/test/Microsoft.ML.Tests/MemoryCollectionTests.cs b/test/Microsoft.ML.Tests/MemoryCollectionTests.cs index 8258f5a08f..c0ac4a0320 100644 --- a/test/Microsoft.ML.Tests/MemoryCollectionTests.cs +++ b/test/Microsoft.ML.Tests/MemoryCollectionTests.cs @@ -9,6 +9,7 @@ using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; using System.Collections.Generic; +using System.Linq; using Xunit; using Xunit.Abstractions; @@ -25,22 +26,14 @@ public MemoryCollectionTests(ITestOutputHelper output) [Fact] public void CheckConstructor() { - Assert.NotNull(new MemoryCollection(new List() { new Input { Number1 = 1, String1 = "1" } })); - Assert.NotNull(new MemoryCollection(new Input[1] { new Input { Number1 = 1, String1 = "1" } })); + Assert.NotNull(MemoryCollection.Create(new List() { new Input { Number1 = 1, String1 = "1" } })); + Assert.NotNull(MemoryCollection.Create(new Input[1] { new Input { Number1 = 1, String1 = "1" } })); + Assert.NotNull(MemoryCollection.Create(new Input[1] { new Input { Number1 = 1, String1 = "1" } }.AsEnumerable())); + bool thrown = false; try { - new MemoryCollection(null); - } - catch - { - thrown = true; - } - Assert.True(thrown); - thrown = false; - try - { - new MemoryCollection(new List()); + MemoryCollection.Create(new List()); } catch { @@ -51,7 +44,7 @@ public void CheckConstructor() thrown = false; try { - new MemoryCollection(new Input[0]); + MemoryCollection.Create(new Input[0]); } catch { @@ -63,11 +56,11 @@ public void CheckConstructor() [Fact] public void CanSuccessfullyApplyATransform() { - var collection = new MemoryCollection(new List() { new Input { Number1 = 1, String1 = "1" } }); + var collection = MemoryCollection.Create(new List() { new Input { Number1 = 1, String1 = "1" } }); using (var environment = new TlcEnvironment()) { Experiment experiment = environment.CreateExperiment(); - ILearningPipelineDataStep output = collection.ApplyStep(null, experiment) as ILearningPipelineDataStep; + ILearningPipelineDataStep output = (ILearningPipelineDataStep)collection.ApplyStep(null, experiment); Assert.NotNull(output.Data); Assert.NotNull(output.Data.VarName); @@ -78,7 +71,7 @@ public void CanSuccessfullyApplyATransform() [Fact] public void CanSuccessfullyEnumerated() { - var collection = new MemoryCollection(new List() { + var collection = MemoryCollection.Create(new List() { new Input { Number1 = 1, String1 = "1" }, new Input { Number1 = 2, String1 = "2" }, new Input { Number1 = 3, String1 = "3" } @@ -140,7 +133,7 @@ public void CanSuccessfullyEnumerated() public void CanTrain() { var pipeline = new LearningPipeline(); - var collection = new MemoryCollection(new List() { + var collection = MemoryCollection.Create(new List() { new IrisData { SepalLength = 1f, SepalWidth = 1f ,PetalLength=0.3f, PetalWidth=5.1f, Label=1}, new IrisData { SepalLength = 1f, SepalWidth = 1f ,PetalLength=0.3f, PetalWidth=5.1f, Label=1}, new IrisData { SepalLength = 1.2f, SepalWidth = 0.5f ,PetalLength=0.3f, PetalWidth=5.1f, Label=0} From 110e205ef99099be92cf2a7f5babb45df4c2a755 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Wed, 9 May 2018 17:23:03 -0700 Subject: [PATCH 07/16] remove empty param description --- src/Microsoft.ML/MemoryCollection.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Microsoft.ML/MemoryCollection.cs b/src/Microsoft.ML/MemoryCollection.cs index 2558449812..af166d8a6e 100644 --- a/src/Microsoft.ML/MemoryCollection.cs +++ b/src/Microsoft.ML/MemoryCollection.cs @@ -54,7 +54,6 @@ public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Exper /// /// Creates IDataview on top of collection /// - /// public MemoryCollectionLoader(IList collection) { Contracts.CheckParamValue(Utils.Size(collection) > 0, collection, nameof(collection), "Must be non-empty"); From 62ab5755ee31b9e21f75bb36328e2eec1eb7ef09 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Thu, 10 May 2018 09:51:04 -0700 Subject: [PATCH 08/16] hide collectionloader --- src/Microsoft.ML/MemoryCollection.cs | 105 +++++++++--------- .../MemoryCollectionTests.cs | 23 +++- 2 files changed, 72 insertions(+), 56 deletions(-) diff --git a/src/Microsoft.ML/MemoryCollection.cs b/src/Microsoft.ML/MemoryCollection.cs index af166d8a6e..3e45ecd252 100644 --- a/src/Microsoft.ML/MemoryCollection.cs +++ b/src/Microsoft.ML/MemoryCollection.cs @@ -16,7 +16,7 @@ public class MemoryCollection /// /// Creates memory collection loader. /// - public static MemoryCollectionLoader Create(IList data) where T:class + public static ILearningPipelineLoader Create(IList data) where T : class { return new MemoryCollectionLoader(data); } @@ -24,71 +24,72 @@ public static MemoryCollectionLoader Create(IList data) where T:class /// /// Creates memory collection loader. /// - public static MemoryCollectionLoader Create(IEnumerable data) where T : class + public static ILearningPipelineLoader Create(IEnumerable data) where T : class { return new MemoryCollectionLoader(data); } - } - /// - /// Allows you to convert your memory collection into IDataview. - /// - /// - public class MemoryCollectionLoader : ILearningPipelineLoader - where TInput : class - { - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - Contracts.Assert(previousStep == null); - _dataViewEntryPoint = new Data.DataViewReference(); - var importOutput = experiment.Add(_dataViewEntryPoint); - return new MemoryCollectionPipelineStep(importOutput.Data); - } - - private readonly IList _listCollection; - private readonly IEnumerable _enumerableCollection; - - private Data.DataViewReference _dataViewEntryPoint; - private IDataView _dataView; /// - /// Creates IDataview on top of collection + /// Allows you to convert your memory collection into IDataview. /// - public MemoryCollectionLoader(IList collection) + /// + private class MemoryCollectionLoader : ILearningPipelineLoader + where TInput : class { - Contracts.CheckParamValue(Utils.Size(collection) > 0, collection, nameof(collection), "Must be non-empty"); - _listCollection = collection; - } + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) + { + Contracts.Assert(previousStep == null); + _dataViewEntryPoint = new Data.DataViewReference(); + var importOutput = experiment.Add(_dataViewEntryPoint); + return new MemoryCollectionPipelineStep(importOutput.Data); + } - /// - /// Creates IDataview on top of collection - /// - public MemoryCollectionLoader(IEnumerable collection) - { - Contracts.CheckValue(collection,nameof(collection), "Must be non-null"); - _enumerableCollection = collection; - - } + private readonly IList _listCollection; + private readonly IEnumerable _enumerableCollection; - public void SetInput(IHostEnvironment env, Experiment experiment) - { - if (_listCollection != null) - _dataView = ComponentCreation.CreateDataView(env, _listCollection); - if (_enumerableCollection != null) - _dataView = ComponentCreation.CreateStreamingDataView(env, _listCollection); - env.CheckValue(_dataView, nameof(_dataView)); - experiment.SetInput(_dataViewEntryPoint.Data, _dataView); - } + private Data.DataViewReference _dataViewEntryPoint; + private IDataView _dataView; - private class MemoryCollectionPipelineStep : ILearningPipelineDataStep - { - public MemoryCollectionPipelineStep(Var data) + /// + /// Creates IDataview on top of collection + /// + public MemoryCollectionLoader(IList collection) { - Data = data; + Contracts.CheckParamValue(Utils.Size(collection) > 0, collection, nameof(collection), "Must be non-empty"); + _listCollection = collection; + } + + /// + /// Creates IDataview on top of collection + /// + public MemoryCollectionLoader(IEnumerable collection) + { + Contracts.CheckValue(collection, nameof(collection)); + _enumerableCollection = collection; + } - public Var Data { get; } - public Var Model => null; + public void SetInput(IHostEnvironment env, Experiment experiment) + { + if (_listCollection != null) + _dataView = ComponentCreation.CreateDataView(env, _listCollection); + if (_enumerableCollection != null) + _dataView = ComponentCreation.CreateStreamingDataView(env, _enumerableCollection); + env.CheckValue(_dataView, nameof(_dataView)); + experiment.SetInput(_dataViewEntryPoint.Data, _dataView); + } + + private class MemoryCollectionPipelineStep : ILearningPipelineDataStep + { + public MemoryCollectionPipelineStep(Var data) + { + Data = data; + } + + public Var Data { get; } + public Var Model => null; + } } } } diff --git a/test/Microsoft.ML.Tests/MemoryCollectionTests.cs b/test/Microsoft.ML.Tests/MemoryCollectionTests.cs index c0ac4a0320..3f96b7a7b7 100644 --- a/test/Microsoft.ML.Tests/MemoryCollectionTests.cs +++ b/test/Microsoft.ML.Tests/MemoryCollectionTests.cs @@ -133,17 +133,16 @@ public void CanSuccessfullyEnumerated() public void CanTrain() { var pipeline = new LearningPipeline(); - var collection = MemoryCollection.Create(new List() { + var data = new List() { new IrisData { SepalLength = 1f, SepalWidth = 1f ,PetalLength=0.3f, PetalWidth=5.1f, Label=1}, new IrisData { SepalLength = 1f, SepalWidth = 1f ,PetalLength=0.3f, PetalWidth=5.1f, Label=1}, new IrisData { SepalLength = 1.2f, SepalWidth = 0.5f ,PetalLength=0.3f, PetalWidth=5.1f, Label=0} - }); + }; + var collection = MemoryCollection.Create(data); pipeline.Add(collection); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); - pipeline.Add(new StochasticDualCoordinateAscentClassifier()); PredictionModel model = pipeline.Train(); @@ -155,6 +154,22 @@ public void CanTrain() PetalWidth = 5.1f, }); + pipeline = new LearningPipeline(); + collection = MemoryCollection.Create(data.AsEnumerable()); + pipeline.Add(collection); + pipeline.Add(new ColumnConcatenator(outputColumn: "Features", + "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); + pipeline.Add(new StochasticDualCoordinateAscentClassifier()); + model = pipeline.Train(); + + prediction = model.Predict(new IrisData() + { + SepalLength = 3.3f, + SepalWidth = 1.6f, + PetalLength = 0.2f, + PetalWidth = 5.1f, + }); + } public class Input From 1da42ca0439e53f0bf0b42f4ea97836b01c3c60d Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Thu, 10 May 2018 10:06:40 -0700 Subject: [PATCH 09/16] refactor classes a little. --- src/Microsoft.ML/MemoryCollection.cs | 83 +++++++++++++++------------- 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/src/Microsoft.ML/MemoryCollection.cs b/src/Microsoft.ML/MemoryCollection.cs index 3e45ecd252..7992b0bd80 100644 --- a/src/Microsoft.ML/MemoryCollection.cs +++ b/src/Microsoft.ML/MemoryCollection.cs @@ -18,7 +18,7 @@ public class MemoryCollection /// public static ILearningPipelineLoader Create(IList data) where T : class { - return new MemoryCollectionLoader(data); + return new ListCollectionLoader(data); } /// @@ -26,17 +26,14 @@ public static ILearningPipelineLoader Create(IList data) where T : class /// public static ILearningPipelineLoader Create(IEnumerable data) where T : class { - return new MemoryCollectionLoader(data); + return new EnumerableCollectionLoader(data); } - - /// - /// Allows you to convert your memory collection into IDataview. - /// - /// - private class MemoryCollectionLoader : ILearningPipelineLoader - where TInput : class + private abstract class CollectionLoader : ILearningPipelineLoader where TInput : class { + private Data.DataViewReference _dataViewEntryPoint; + private IDataView _dataView; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { Contracts.Assert(previousStep == null); @@ -45,51 +42,59 @@ public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Exper return new MemoryCollectionPipelineStep(importOutput.Data); } - private readonly IList _listCollection; - private readonly IEnumerable _enumerableCollection; - - private Data.DataViewReference _dataViewEntryPoint; - private IDataView _dataView; - - /// - /// Creates IDataview on top of collection - /// - public MemoryCollectionLoader(IList collection) + public void SetInput(IHostEnvironment environment, Experiment experiment) { - Contracts.CheckParamValue(Utils.Size(collection) > 0, collection, nameof(collection), "Must be non-empty"); - _listCollection = collection; + _dataView = GetDataView(environment); + environment.CheckValue(_dataView, nameof(_dataView)); + experiment.SetInput(_dataViewEntryPoint.Data, _dataView); } - /// - /// Creates IDataview on top of collection - /// - public MemoryCollectionLoader(IEnumerable collection) + public abstract IDataView GetDataView(IHostEnvironment environment); + } + + private class EnumerableCollectionLoader : CollectionLoader where TInput : class + { + private readonly IEnumerable _enumerableCollection; + + public EnumerableCollectionLoader(IEnumerable collection) { Contracts.CheckValue(collection, nameof(collection)); _enumerableCollection = collection; + } + public override IDataView GetDataView(IHostEnvironment environment) + { + return ComponentCreation.CreateStreamingDataView(environment, _enumerableCollection); } + } - public void SetInput(IHostEnvironment env, Experiment experiment) + private class ListCollectionLoader : CollectionLoader where TInput : class + { + private readonly IList _listCollection; + + public ListCollectionLoader(IList collection) { - if (_listCollection != null) - _dataView = ComponentCreation.CreateDataView(env, _listCollection); - if (_enumerableCollection != null) - _dataView = ComponentCreation.CreateStreamingDataView(env, _enumerableCollection); - env.CheckValue(_dataView, nameof(_dataView)); - experiment.SetInput(_dataViewEntryPoint.Data, _dataView); + Contracts.CheckParamValue(Utils.Size(collection) > 0, collection, nameof(collection), "Must be non-empty"); + _listCollection = collection; } - private class MemoryCollectionPipelineStep : ILearningPipelineDataStep + public override IDataView GetDataView(IHostEnvironment environment) { - public MemoryCollectionPipelineStep(Var data) - { - Data = data; - } + return ComponentCreation.CreateDataView(environment, _listCollection); + } + } - public Var Data { get; } - public Var Model => null; + private class MemoryCollectionPipelineStep : ILearningPipelineDataStep + { + public MemoryCollectionPipelineStep(Var data) + { + Data = data; } + + public Var Data { get; } + public Var Model => null; } + + } } From 0cac7dcc24fd7beebb21da95f6cf2be179c62934 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Thu, 10 May 2018 10:15:09 -0700 Subject: [PATCH 10/16] pesky new lines! --- src/Microsoft.ML/MemoryCollection.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Microsoft.ML/MemoryCollection.cs b/src/Microsoft.ML/MemoryCollection.cs index 7992b0bd80..cb550009d6 100644 --- a/src/Microsoft.ML/MemoryCollection.cs +++ b/src/Microsoft.ML/MemoryCollection.cs @@ -94,7 +94,5 @@ public MemoryCollectionPipelineStep(Var data) public Var Data { get; } public Var Model => null; } - - } } From ca9c031053296b248d310ada51e407028b4b9988 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Thu, 10 May 2018 15:25:36 -0700 Subject: [PATCH 11/16] slightly better comments. but only slighty --- src/Microsoft.ML/MemoryCollection.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML/MemoryCollection.cs b/src/Microsoft.ML/MemoryCollection.cs index cb550009d6..d8328b47d1 100644 --- a/src/Microsoft.ML/MemoryCollection.cs +++ b/src/Microsoft.ML/MemoryCollection.cs @@ -14,7 +14,7 @@ namespace Microsoft.ML public class MemoryCollection { /// - /// Creates memory collection loader. + /// Creates pipeline loader. Support shuffle. /// public static ILearningPipelineLoader Create(IList data) where T : class { @@ -22,7 +22,7 @@ public static ILearningPipelineLoader Create(IList data) where T : class } /// - /// Creates memory collection loader. + /// Creates pipeline loader which can't be shuffled. /// public static ILearningPipelineLoader Create(IEnumerable data) where T : class { From d78afa3919dfdae1f3ad5cdec3fa80c247e20c07 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Fri, 11 May 2018 10:01:06 -0700 Subject: [PATCH 12/16] rename it --- .../CollectionLoader.cs} | 16 ++++++------- ...ctionTests.cs => CollectionLoaderTests.cs} | 24 +++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) rename src/Microsoft.ML/{MemoryCollection.cs => Data/CollectionLoader.cs} (84%) rename test/Microsoft.ML.Tests/{MemoryCollectionTests.cs => CollectionLoaderTests.cs} (89%) diff --git a/src/Microsoft.ML/MemoryCollection.cs b/src/Microsoft.ML/Data/CollectionLoader.cs similarity index 84% rename from src/Microsoft.ML/MemoryCollection.cs rename to src/Microsoft.ML/Data/CollectionLoader.cs index d8328b47d1..f4527b1b8d 100644 --- a/src/Microsoft.ML/MemoryCollection.cs +++ b/src/Microsoft.ML/Data/CollectionLoader.cs @@ -9,9 +9,9 @@ using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Internal.Utilities; -namespace Microsoft.ML +namespace Microsoft.ML.Data { - public class MemoryCollection + public class CollectionLoader { /// /// Creates pipeline loader. Support shuffle. @@ -29,7 +29,7 @@ public static ILearningPipelineLoader Create(IEnumerable data) where T : c return new EnumerableCollectionLoader(data); } - private abstract class CollectionLoader : ILearningPipelineLoader where TInput : class + private abstract class BaseCollectionLoader : ILearningPipelineLoader where TInput : class { private Data.DataViewReference _dataViewEntryPoint; private IDataView _dataView; @@ -39,7 +39,7 @@ public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Exper Contracts.Assert(previousStep == null); _dataViewEntryPoint = new Data.DataViewReference(); var importOutput = experiment.Add(_dataViewEntryPoint); - return new MemoryCollectionPipelineStep(importOutput.Data); + return new CollectionLoaderPipelineStep(importOutput.Data); } public void SetInput(IHostEnvironment environment, Experiment experiment) @@ -52,7 +52,7 @@ public void SetInput(IHostEnvironment environment, Experiment experiment) public abstract IDataView GetDataView(IHostEnvironment environment); } - private class EnumerableCollectionLoader : CollectionLoader where TInput : class + private class EnumerableCollectionLoader : BaseCollectionLoader where TInput : class { private readonly IEnumerable _enumerableCollection; @@ -68,7 +68,7 @@ public override IDataView GetDataView(IHostEnvironment environment) } } - private class ListCollectionLoader : CollectionLoader where TInput : class + private class ListCollectionLoader : BaseCollectionLoader where TInput : class { private readonly IList _listCollection; @@ -84,9 +84,9 @@ public override IDataView GetDataView(IHostEnvironment environment) } } - private class MemoryCollectionPipelineStep : ILearningPipelineDataStep + private class CollectionLoaderPipelineStep : ILearningPipelineDataStep { - public MemoryCollectionPipelineStep(Var data) + public CollectionLoaderPipelineStep(Var data) { Data = data; } diff --git a/test/Microsoft.ML.Tests/MemoryCollectionTests.cs b/test/Microsoft.ML.Tests/CollectionLoaderTests.cs similarity index 89% rename from test/Microsoft.ML.Tests/MemoryCollectionTests.cs rename to test/Microsoft.ML.Tests/CollectionLoaderTests.cs index 3f96b7a7b7..b73cee363e 100644 --- a/test/Microsoft.ML.Tests/MemoryCollectionTests.cs +++ b/test/Microsoft.ML.Tests/CollectionLoaderTests.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; @@ -15,25 +16,24 @@ namespace Microsoft.ML.EntryPoints.Tests { - public class MemoryCollectionTests : BaseTestClass + public class CollectionLoaderTests : BaseTestClass { - public MemoryCollectionTests(ITestOutputHelper output) + public CollectionLoaderTests(ITestOutputHelper output) : base(output) { - } [Fact] public void CheckConstructor() { - Assert.NotNull(MemoryCollection.Create(new List() { new Input { Number1 = 1, String1 = "1" } })); - Assert.NotNull(MemoryCollection.Create(new Input[1] { new Input { Number1 = 1, String1 = "1" } })); - Assert.NotNull(MemoryCollection.Create(new Input[1] { new Input { Number1 = 1, String1 = "1" } }.AsEnumerable())); + Assert.NotNull(CollectionLoader.Create(new List() { new Input { Number1 = 1, String1 = "1" } })); + Assert.NotNull(CollectionLoader.Create(new Input[1] { new Input { Number1 = 1, String1 = "1" } })); + Assert.NotNull(CollectionLoader.Create(new Input[1] { new Input { Number1 = 1, String1 = "1" } }.AsEnumerable())); bool thrown = false; try { - MemoryCollection.Create(new List()); + CollectionLoader.Create(new List()); } catch { @@ -44,7 +44,7 @@ public void CheckConstructor() thrown = false; try { - MemoryCollection.Create(new Input[0]); + CollectionLoader.Create(new Input[0]); } catch { @@ -56,7 +56,7 @@ public void CheckConstructor() [Fact] public void CanSuccessfullyApplyATransform() { - var collection = MemoryCollection.Create(new List() { new Input { Number1 = 1, String1 = "1" } }); + var collection = CollectionLoader.Create(new List() { new Input { Number1 = 1, String1 = "1" } }); using (var environment = new TlcEnvironment()) { Experiment experiment = environment.CreateExperiment(); @@ -71,7 +71,7 @@ public void CanSuccessfullyApplyATransform() [Fact] public void CanSuccessfullyEnumerated() { - var collection = MemoryCollection.Create(new List() { + var collection = CollectionLoader.Create(new List() { new Input { Number1 = 1, String1 = "1" }, new Input { Number1 = 2, String1 = "2" }, new Input { Number1 = 3, String1 = "3" } @@ -138,7 +138,7 @@ public void CanTrain() new IrisData { SepalLength = 1f, SepalWidth = 1f ,PetalLength=0.3f, PetalWidth=5.1f, Label=1}, new IrisData { SepalLength = 1.2f, SepalWidth = 0.5f ,PetalLength=0.3f, PetalWidth=5.1f, Label=0} }; - var collection = MemoryCollection.Create(data); + var collection = CollectionLoader.Create(data); pipeline.Add(collection); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", @@ -155,7 +155,7 @@ public void CanTrain() }); pipeline = new LearningPipeline(); - collection = MemoryCollection.Create(data.AsEnumerable()); + collection = CollectionLoader.Create(data.AsEnumerable()); pipeline.Add(collection); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); From ab86b09bd78c156b5cc70d313dc2b3367852d998 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Fri, 11 May 2018 11:05:38 -0700 Subject: [PATCH 13/16] make class static --- src/Microsoft.ML/Data/CollectionLoader.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML/Data/CollectionLoader.cs b/src/Microsoft.ML/Data/CollectionLoader.cs index f4527b1b8d..4275363de5 100644 --- a/src/Microsoft.ML/Data/CollectionLoader.cs +++ b/src/Microsoft.ML/Data/CollectionLoader.cs @@ -11,7 +11,7 @@ namespace Microsoft.ML.Data { - public class CollectionLoader + public static class CollectionLoader { /// /// Creates pipeline loader. Support shuffle. From ebe6f33a9e576cc10bde9bdbd1f2cfc89a03681c Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Fri, 11 May 2018 14:57:44 -0700 Subject: [PATCH 14/16] not a loader --- ...ctionLoader.cs => CollectionDataSource.cs} | 26 +++++++++---------- ...rTests.cs => CollectionDataSourceTests.cs} | 22 ++++++++-------- 2 files changed, 24 insertions(+), 24 deletions(-) rename src/Microsoft.ML/Data/{CollectionLoader.cs => CollectionDataSource.cs} (74%) rename test/Microsoft.ML.Tests/{CollectionLoaderTests.cs => CollectionDataSourceTests.cs} (86%) diff --git a/src/Microsoft.ML/Data/CollectionLoader.cs b/src/Microsoft.ML/Data/CollectionDataSource.cs similarity index 74% rename from src/Microsoft.ML/Data/CollectionLoader.cs rename to src/Microsoft.ML/Data/CollectionDataSource.cs index 4275363de5..782692d100 100644 --- a/src/Microsoft.ML/Data/CollectionLoader.cs +++ b/src/Microsoft.ML/Data/CollectionDataSource.cs @@ -11,25 +11,25 @@ namespace Microsoft.ML.Data { - public static class CollectionLoader + public static class CollectionDataSource { /// - /// Creates pipeline loader. Support shuffle. + /// Creates pipeline data source. Support shuffle. /// public static ILearningPipelineLoader Create(IList data) where T : class { - return new ListCollectionLoader(data); + return new ListDataSource(data); } /// - /// Creates pipeline loader which can't be shuffled. + /// Creates pipeline data source which can't be shuffled. /// public static ILearningPipelineLoader Create(IEnumerable data) where T : class { - return new EnumerableCollectionLoader(data); + return new EnumerableDataSource(data); } - private abstract class BaseCollectionLoader : ILearningPipelineLoader where TInput : class + private abstract class BaseDataSource : ILearningPipelineLoader where TInput : class { private Data.DataViewReference _dataViewEntryPoint; private IDataView _dataView; @@ -39,7 +39,7 @@ public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Exper Contracts.Assert(previousStep == null); _dataViewEntryPoint = new Data.DataViewReference(); var importOutput = experiment.Add(_dataViewEntryPoint); - return new CollectionLoaderPipelineStep(importOutput.Data); + return new CollectionDataSourcePipelineStep(importOutput.Data); } public void SetInput(IHostEnvironment environment, Experiment experiment) @@ -52,11 +52,11 @@ public void SetInput(IHostEnvironment environment, Experiment experiment) public abstract IDataView GetDataView(IHostEnvironment environment); } - private class EnumerableCollectionLoader : BaseCollectionLoader where TInput : class + private class EnumerableDataSource : BaseDataSource where TInput : class { private readonly IEnumerable _enumerableCollection; - public EnumerableCollectionLoader(IEnumerable collection) + public EnumerableDataSource(IEnumerable collection) { Contracts.CheckValue(collection, nameof(collection)); _enumerableCollection = collection; @@ -68,11 +68,11 @@ public override IDataView GetDataView(IHostEnvironment environment) } } - private class ListCollectionLoader : BaseCollectionLoader where TInput : class + private class ListDataSource : BaseDataSource where TInput : class { private readonly IList _listCollection; - public ListCollectionLoader(IList collection) + public ListDataSource(IList collection) { Contracts.CheckParamValue(Utils.Size(collection) > 0, collection, nameof(collection), "Must be non-empty"); _listCollection = collection; @@ -84,9 +84,9 @@ public override IDataView GetDataView(IHostEnvironment environment) } } - private class CollectionLoaderPipelineStep : ILearningPipelineDataStep + private class CollectionDataSourcePipelineStep : ILearningPipelineDataStep { - public CollectionLoaderPipelineStep(Var data) + public CollectionDataSourcePipelineStep(Var data) { Data = data; } diff --git a/test/Microsoft.ML.Tests/CollectionLoaderTests.cs b/test/Microsoft.ML.Tests/CollectionDataSourceTests.cs similarity index 86% rename from test/Microsoft.ML.Tests/CollectionLoaderTests.cs rename to test/Microsoft.ML.Tests/CollectionDataSourceTests.cs index b73cee363e..923d4eb375 100644 --- a/test/Microsoft.ML.Tests/CollectionLoaderTests.cs +++ b/test/Microsoft.ML.Tests/CollectionDataSourceTests.cs @@ -16,9 +16,9 @@ namespace Microsoft.ML.EntryPoints.Tests { - public class CollectionLoaderTests : BaseTestClass + public class CollectionDataSourceTests : BaseTestClass { - public CollectionLoaderTests(ITestOutputHelper output) + public CollectionDataSourceTests(ITestOutputHelper output) : base(output) { } @@ -26,14 +26,14 @@ public CollectionLoaderTests(ITestOutputHelper output) [Fact] public void CheckConstructor() { - Assert.NotNull(CollectionLoader.Create(new List() { new Input { Number1 = 1, String1 = "1" } })); - Assert.NotNull(CollectionLoader.Create(new Input[1] { new Input { Number1 = 1, String1 = "1" } })); - Assert.NotNull(CollectionLoader.Create(new Input[1] { new Input { Number1 = 1, String1 = "1" } }.AsEnumerable())); + Assert.NotNull(CollectionDataSource.Create(new List() { new Input { Number1 = 1, String1 = "1" } })); + Assert.NotNull(CollectionDataSource.Create(new Input[1] { new Input { Number1 = 1, String1 = "1" } })); + Assert.NotNull(CollectionDataSource.Create(new Input[1] { new Input { Number1 = 1, String1 = "1" } }.AsEnumerable())); bool thrown = false; try { - CollectionLoader.Create(new List()); + CollectionDataSource.Create(new List()); } catch { @@ -44,7 +44,7 @@ public void CheckConstructor() thrown = false; try { - CollectionLoader.Create(new Input[0]); + CollectionDataSource.Create(new Input[0]); } catch { @@ -56,7 +56,7 @@ public void CheckConstructor() [Fact] public void CanSuccessfullyApplyATransform() { - var collection = CollectionLoader.Create(new List() { new Input { Number1 = 1, String1 = "1" } }); + var collection = CollectionDataSource.Create(new List() { new Input { Number1 = 1, String1 = "1" } }); using (var environment = new TlcEnvironment()) { Experiment experiment = environment.CreateExperiment(); @@ -71,7 +71,7 @@ public void CanSuccessfullyApplyATransform() [Fact] public void CanSuccessfullyEnumerated() { - var collection = CollectionLoader.Create(new List() { + var collection = CollectionDataSource.Create(new List() { new Input { Number1 = 1, String1 = "1" }, new Input { Number1 = 2, String1 = "2" }, new Input { Number1 = 3, String1 = "3" } @@ -138,7 +138,7 @@ public void CanTrain() new IrisData { SepalLength = 1f, SepalWidth = 1f ,PetalLength=0.3f, PetalWidth=5.1f, Label=1}, new IrisData { SepalLength = 1.2f, SepalWidth = 0.5f ,PetalLength=0.3f, PetalWidth=5.1f, Label=0} }; - var collection = CollectionLoader.Create(data); + var collection = CollectionDataSource.Create(data); pipeline.Add(collection); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", @@ -155,7 +155,7 @@ public void CanTrain() }); pipeline = new LearningPipeline(); - collection = CollectionLoader.Create(data.AsEnumerable()); + collection = CollectionDataSource.Create(data.AsEnumerable()); pipeline.Add(collection); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); From 04ff4693897c4a4f68de8172e204af02b1435d52 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Fri, 11 May 2018 15:41:57 -0700 Subject: [PATCH 15/16] remove alias in entrypoint --- ZBaselines/Common/EntryPoints/core_manifest.json | 3 --- src/Microsoft.ML/Runtime/EntryPoints/InMemoryDataView.cs | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/ZBaselines/Common/EntryPoints/core_manifest.json b/ZBaselines/Common/EntryPoints/core_manifest.json index fac2e1481d..7529b70212 100644 --- a/ZBaselines/Common/EntryPoints/core_manifest.json +++ b/ZBaselines/Common/EntryPoints/core_manifest.json @@ -10,9 +10,6 @@ "Name": "Data", "Type": "DataView", "Desc": "Pointer to IDataView in memory", - "Aliases": [ - "data" - ], "Required": true, "SortOrder": 1.0, "IsNullable": false diff --git a/src/Microsoft.ML/Runtime/EntryPoints/InMemoryDataView.cs b/src/Microsoft.ML/Runtime/EntryPoints/InMemoryDataView.cs index 18cd8da812..9fe7c81a35 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/InMemoryDataView.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/InMemoryDataView.cs @@ -14,7 +14,7 @@ public class InMemoryDataView { public sealed class Input { - [Argument(ArgumentType.Required, ShortName = "data", HelpText = "Pointer to IDataView in memory", SortOrder = 1)] + [Argument(ArgumentType.Required, HelpText = "Pointer to IDataView in memory", SortOrder = 1)] public IDataView Data; } From 9698d19e211f13988c57da1544eacc9cd809b859 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Mon, 14 May 2018 17:06:16 -0700 Subject: [PATCH 16/16] address comments --- ZBaselines/Common/EntryPoints/core_ep-list.tsv | 2 +- src/Microsoft.ML/Data/CollectionDataSource.cs | 3 +++ .../EntryPoints/{InMemoryDataView.cs => DataViewReference.cs} | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) rename src/Microsoft.ML/Runtime/EntryPoints/{InMemoryDataView.cs => DataViewReference.cs} (88%) diff --git a/ZBaselines/Common/EntryPoints/core_ep-list.tsv b/ZBaselines/Common/EntryPoints/core_ep-list.tsv index 4ae55496c8..3f6639caad 100644 --- a/ZBaselines/Common/EntryPoints/core_ep-list.tsv +++ b/ZBaselines/Common/EntryPoints/core_ep-list.tsv @@ -1,4 +1,4 @@ -Data.DataViewReference Pass dataview from memory to experiment Microsoft.ML.Runtime.EntryPoints.InMemoryDataView ImportData Microsoft.ML.Runtime.EntryPoints.InMemoryDataView+Input Microsoft.ML.Runtime.EntryPoints.InMemoryDataView+Output +Data.DataViewReference Pass dataview from memory to experiment Microsoft.ML.Runtime.EntryPoints.DataViewReference ImportData Microsoft.ML.Runtime.EntryPoints.DataViewReference+Input Microsoft.ML.Runtime.EntryPoints.DataViewReference+Output Data.IDataViewArrayConverter Create and array variable Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewOutput Data.PredictorModelArrayConverter Create and array variable Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelOutput Data.TextLoader Import a dataset from a text file Microsoft.ML.Runtime.EntryPoints.ImportTextData ImportText Microsoft.ML.Runtime.EntryPoints.ImportTextData+Input Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output diff --git a/src/Microsoft.ML/Data/CollectionDataSource.cs b/src/Microsoft.ML/Data/CollectionDataSource.cs index 782692d100..56523fc994 100644 --- a/src/Microsoft.ML/Data/CollectionDataSource.cs +++ b/src/Microsoft.ML/Data/CollectionDataSource.cs @@ -11,6 +11,9 @@ namespace Microsoft.ML.Data { + /// + /// Creates data source for pipeline based on provided collection of data. + /// public static class CollectionDataSource { /// diff --git a/src/Microsoft.ML/Runtime/EntryPoints/InMemoryDataView.cs b/src/Microsoft.ML/Runtime/EntryPoints/DataViewReference.cs similarity index 88% rename from src/Microsoft.ML/Runtime/EntryPoints/InMemoryDataView.cs rename to src/Microsoft.ML/Runtime/EntryPoints/DataViewReference.cs index 9fe7c81a35..3b1633456d 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/InMemoryDataView.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/DataViewReference.cs @@ -7,10 +7,10 @@ using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.EntryPoints; -[assembly: LoadableClass(typeof(void), typeof(InMemoryDataView), null, typeof(SignatureEntryPointModule), "InMemoryDataView")] +[assembly: LoadableClass(typeof(void), typeof(DataViewReference), null, typeof(SignatureEntryPointModule), "DataViewReference")] namespace Microsoft.ML.Runtime.EntryPoints { - public class InMemoryDataView + public class DataViewReference { public sealed class Input {