diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln index 6ad93c3886..58e24041f1 100644 --- a/Microsoft.ML.sln +++ b/Microsoft.ML.sln @@ -5,6 +5,9 @@ MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Core", "src\Microsoft.ML.Core\Microsoft.ML.Core.csproj", "{A6CA6CC6-5D7C-4D7F-A0F5-35E14B383B0A}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{09EADF06-BE25-4228-AB53-95AE3E15B530}" + ProjectSection(SolutionItems) = preProject + src\Source.ruleset = src\Source.ruleset + EndProjectSection EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{AED9C836-31E3-4F3F-8ABC-929555D3F3C4}" EndProject @@ -88,6 +91,12 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Microsoft.ML.CpuMath", "Mic pkg\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.symbols.nupkgproj = pkg\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.symbols.nupkgproj EndProjectSection EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools-local", "tools-local", "{7F13E156-3EBA-4021-84A5-CD56BA72F99E}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeAnalyzer", "tools-local\Microsoft.ML.CodeAnalyzer\Microsoft.ML.CodeAnalyzer.csproj", "{B4E55B2D-2A92-46E7-B72F-E76D6FD83440}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeAnalyzer.Tests", "test\Microsoft.ML.CodeAnalyzer.Tests\Microsoft.ML.CodeAnalyzer.Tests.csproj", "{3E4ABF07-7970-4BE6-B45B-A13D3C397545}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -304,6 +313,22 @@ Global {DCF46B79-1FDB-4DBA-A263-D3D64E3AAA27}.Release|Any CPU.Build.0 = Release|Any CPU {DCF46B79-1FDB-4DBA-A263-D3D64E3AAA27}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU {DCF46B79-1FDB-4DBA-A263-D3D64E3AAA27}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU + {B4E55B2D-2A92-46E7-B72F-E76D6FD83440}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B4E55B2D-2A92-46E7-B72F-E76D6FD83440}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B4E55B2D-2A92-46E7-B72F-E76D6FD83440}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU + {B4E55B2D-2A92-46E7-B72F-E76D6FD83440}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU + {B4E55B2D-2A92-46E7-B72F-E76D6FD83440}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B4E55B2D-2A92-46E7-B72F-E76D6FD83440}.Release|Any CPU.Build.0 = Release|Any CPU + {B4E55B2D-2A92-46E7-B72F-E76D6FD83440}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU + {B4E55B2D-2A92-46E7-B72F-E76D6FD83440}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU + {3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU + {3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU + {3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Release|Any CPU.Build.0 = Release|Any CPU + {3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU + {3E4ABF07-7970-4BE6-B45B-A13D3C397545}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -340,6 +365,8 @@ Global {001F3B4E-FBE4-4001-AFD2-A6A989CD1C25} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {DCF46B79-1FDB-4DBA-A263-D3D64E3AAA27} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {BF66A305-DF10-47E4-8D81-42049B149D2B} = {D3D38B03-B557-484D-8348-8BADEE4DF592} + {B4E55B2D-2A92-46E7-B72F-E76D6FD83440} = {7F13E156-3EBA-4021-84A5-CD56BA72F99E} + {3E4ABF07-7970-4BE6-B45B-A13D3C397545} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D} diff --git a/build/Dependencies.props b/build/Dependencies.props index 21e65c9007..5325011f05 100644 --- a/build/Dependencies.props +++ b/build/Dependencies.props @@ -7,6 +7,6 @@ 4.4.0 4.3.0 1.0.0-beta-62824-02 - 2.1.2.2 + 2.1.2.2 diff --git a/src/Directory.Build.props b/src/Directory.Build.props index cedfa39442..113da3575a 100644 --- a/src/Directory.Build.props +++ b/src/Directory.Build.props @@ -11,7 +11,18 @@ $(NoWarn);1591 $(WarningsNotAsErrors);1591 - + $(MSBuildThisFileDirectory)\Source.ruleset + + + false + Analyzer + + + + diff --git a/src/Microsoft.ML.Api/ApiUtils.cs b/src/Microsoft.ML.Api/ApiUtils.cs index 4b7daf0a74..8b8cb5871b 100644 --- a/src/Microsoft.ML.Api/ApiUtils.cs +++ b/src/Microsoft.ML.Api/ApiUtils.cs @@ -46,7 +46,7 @@ private static OpCode GetAssignmentOpCode(Type t) /// /// Each of the specialized 'peek' methods copies the appropriate field value of an instance of T - /// into the provided buffer. So, the call is 'peek(userObject, ref destination)' and the logic is + /// into the provided buffer. So, the call is 'peek(userObject, ref destination)' and the logic is /// indentical to 'destination = userObject.##FIELD##', where ##FIELD## is defined per peek method. /// internal static Delegate GeneratePeek(InternalSchemaDefinition.Column column) @@ -83,7 +83,7 @@ private static Delegate GeneratePeek(FieldInfo fieldInfo, Op /// /// Each of the specialized 'poke' methods sets the appropriate field value of an instance of T - /// to the provided value. So, the call is 'peek(userObject, providedValue)' and the logic is + /// to the provided value. So, the call is 'peek(userObject, providedValue)' and the logic is /// indentical to 'userObject.##FIELD## = providedValue', where ##FIELD## is defined per poke method. /// internal static Delegate GeneratePoke(InternalSchemaDefinition.Column column) diff --git a/src/Microsoft.ML.Api/CodeGenerationUtils.cs b/src/Microsoft.ML.Api/CodeGenerationUtils.cs index 74f262c57c..7af0fb85ed 100644 --- a/src/Microsoft.ML.Api/CodeGenerationUtils.cs +++ b/src/Microsoft.ML.Api/CodeGenerationUtils.cs @@ -97,12 +97,12 @@ public static string GetCSharpString(CSharpCodeProvider codeProvider, string val } /// - /// Gets the C# strings representing the type name for a variable corresponding to - /// the column type. - /// - /// If the type is a vector, then controls whether the array field is + /// Gets the C# strings representing the type name for a variable corresponding to + /// the column type. + /// + /// If the type is a vector, then controls whether the array field is /// generated or . - /// + /// /// If additional attributes are required, they are appended to the list. /// private static string GetBackingTypeName(ColumnType colType, bool useVBuffer, List attributes) diff --git a/src/Microsoft.ML.Api/ComponentCreation.cs b/src/Microsoft.ML.Api/ComponentCreation.cs index 0d164b6124..3080a8197c 100644 --- a/src/Microsoft.ML.Api/ComponentCreation.cs +++ b/src/Microsoft.ML.Api/ComponentCreation.cs @@ -11,14 +11,14 @@ namespace Microsoft.ML.Runtime.Api { /// - /// This class defines extension methods for an to facilitate creating + /// This class defines extension methods for an to facilitate creating /// components (loaders, transforms, trainers, scorers, evaluators, savers). /// public static class ComponentCreation { /// /// Create a new data view which is obtained by appending all columns of all the source data views. - /// If the data views are of different length, the resulting data view will have the length equal to the + /// If the data views are of different length, the resulting data view will have the length equal to the /// length of the shortest source. /// /// The host environment to use. @@ -59,11 +59,11 @@ public static RoleMappedData CreateExamples(this IHostEnvironment env, IDataView /// Create a new over an in-memory collection of the items of user-defined type. /// The user maintains ownership of the and the resulting data view will /// never alter the contents of the . - /// Since is assumed to be immutable, the user is expected to not + /// Since is assumed to be immutable, the user is expected to not /// modify the contents of while the data view is being actively cursored. - /// + /// /// One typical usage for in-memory data view could be: create the data view, train a predictor. - /// Once the predictor is fully trained, modify the contents of the underlying collection and + /// Once the predictor is fully trained, modify the contents of the underlying collection and /// train another predictor. /// /// The user-defined item type. @@ -88,9 +88,9 @@ public static IDataView CreateDataView(this IHostEnvironment env, IList is assumed to be immutable, the user is expected to support /// multiple enumeration of the that would return the same results, unless /// the user knows that the data will only be cursored once. - /// + /// /// One typical usage for streaming data view could be: create the data view that lazily loads data - /// as needed, then apply pre-trained transformations to it and cursor through it for transformation + /// as needed, then apply pre-trained transformations to it and cursor through it for transformation /// results. This is how is implemented. /// /// The user-defined item type. @@ -191,7 +191,7 @@ public static PredictionEngine CreatePredictionEngine(th /// /// Create a prediction engine. /// This encapsulates the 'classic' prediction problem, where the input is denoted by the float array of features, - /// and the output is a float score. For binary classification predictors that can output probability, there are output + /// and the output is a float score. For binary classification predictors that can output probability, there are output /// fields that report the predicted label and probability. /// /// The host environment to use. @@ -207,7 +207,7 @@ public static SimplePredictionEngine CreateSimplePredictionEngine(this IHostEnvi /// /// Load the transforms (but not loader) from the model steram and apply them to the specified data. - /// It is acceptable to have no transforms in the model stream: in this case the original + /// It is acceptable to have no transforms in the model stream: in this case the original /// will be returned. /// /// The host environment to use. diff --git a/src/Microsoft.ML.Api/DataViewConstructionUtils.cs b/src/Microsoft.ML.Api/DataViewConstructionUtils.cs index 6ecff5b204..341e3a72af 100644 --- a/src/Microsoft.ML.Api/DataViewConstructionUtils.cs +++ b/src/Microsoft.ML.Api/DataViewConstructionUtils.cs @@ -285,7 +285,7 @@ private Delegate CreateGetter(int index) } // REVIEW: The converting getter invokes a type conversion delegate on every call, so it's inherently slower - // than the 'direct' getter. We don't have good indication of this to the user, and the selection + // than the 'direct' getter. We don't have good indication of this to the user, and the selection // of affected types is pretty arbitrary (signed integers and bools, but not uints and floats). private Delegate CreateConvertingArrayGetterDelegate(int index, Func convert) { @@ -396,7 +396,7 @@ private void CheckColumnInRange(int columnIndex) } /// - /// An in-memory data view based on the IList of data. + /// An in-memory data view based on the IList of data. /// Supports shuffling. /// private sealed class ListDataView : DataViewBase @@ -492,11 +492,11 @@ protected override bool MoveManyCore(long count) } /// - /// An in-memory data view based on the IEnumerable of data. + /// An in-memory data view based on the IEnumerable of data. /// Doesn't support shuffling. - /// + /// /// This class is public because prediction engine wants to call its - /// for performance reasons. + /// for performance reasons. /// public sealed class StreamingDataView : DataViewBase where TRow : class @@ -578,7 +578,7 @@ protected override bool MoveNextCore() /// /// This represents the 'infinite data view' over one (mutable) user-defined object. - /// The 'current row' object can be updated at any time, this will affect all the + /// The 'current row' object can be updated at any time, this will affect all the /// newly created cursors, but not the ones already existing. /// public sealed class SingleRowLoopDataView : DataViewBase @@ -731,7 +731,7 @@ public abstract partial class MetadataInfo /// public ColumnType MetadataType; /// - /// The string identifier of the metadata. Some identifiers have special meaning, + /// The string identifier of the metadata. Some identifiers have special meaning, /// like "SlotNames", but any other identifiers can be used. /// public readonly string Kind; @@ -757,7 +757,7 @@ public sealed class MetadataInfo : MetadataInfo /// /// Constructor for metadata of value type T. /// - /// The string identifier of the metadata. Some identifiers have special meaning, + /// The string identifier of the metadata. Some identifiers have special meaning, /// like "SlotNames", but any other identifiers can be used. /// Metadata value. /// Type of the metadata. diff --git a/src/Microsoft.ML.Api/GenerateCodeCommand.cs b/src/Microsoft.ML.Api/GenerateCodeCommand.cs index 0bca5edfb3..26136971af 100644 --- a/src/Microsoft.ML.Api/GenerateCodeCommand.cs +++ b/src/Microsoft.ML.Api/GenerateCodeCommand.cs @@ -21,7 +21,7 @@ namespace Microsoft.ML.Runtime.Api { /// /// Generates the sample prediction code for a given model file, with correct input and output classes. - /// + /// /// REVIEW: Consider adding support for generating VBuffers instead of arrays, maybe for high dimensionality vectors. /// public sealed class GenerateCodeCommand : ICommand @@ -45,7 +45,7 @@ public sealed class Arguments ShortName = "sparse", SortOrder = 102)] public bool SparseVectorDeclaration; - // REVIEW: currently, it's only used in unit testing to not generate the paths into the test output folder. + // REVIEW: currently, it's only used in unit testing to not generate the paths into the test output folder. // However, it might be handy for automation scenarios, so I've added this as a hidden option. [Argument(ArgumentType.AtMostOnce, HelpText = "A location of the model file to put into generated file", Hide = true)] public string ModelNameOverride; diff --git a/src/Microsoft.ML.Api/InternalSchemaDefinition.cs b/src/Microsoft.ML.Api/InternalSchemaDefinition.cs index 2b0f056214..3edf7599a4 100644 --- a/src/Microsoft.ML.Api/InternalSchemaDefinition.cs +++ b/src/Microsoft.ML.Api/InternalSchemaDefinition.cs @@ -76,12 +76,12 @@ private Column(string columnName, ColumnType columnType, FieldInfo fieldInfo = n } /// - /// Function that checks whether the InternalSchemaDefinition.Column is a valid one. + /// Function that checks whether the InternalSchemaDefinition.Column is a valid one. /// To be valid, the Column must: /// 1. Have non-empty values for ColumnName and ColumnType /// 2. Have a non-empty value for FieldInfo iff it is a field column, else /// ReturnParameterInfo and Generator iff it is a computed column - /// 3. Generator must have the method inputs (TRow rowObject, + /// 3. Generator must have the method inputs (TRow rowObject, /// long position, ref TValue outputValue) in that order. /// [Conditional("DEBUG")] @@ -133,7 +133,7 @@ private InternalSchemaDefinition(Column[] columns) /// /// Given a field info on a type, returns whether this appears to be a vector type, /// and also the associated data kind for this type. If a data kind could not - /// be determined, this will throw. + /// be determined, this will throw. /// /// The field info to inspect. /// Whether this appears to be a vector type. @@ -149,7 +149,7 @@ public static void GetVectorAndKind(FieldInfo fieldInfo, out bool isVector, out /// /// Given a parameter info on a type, returns whether this appears to be a vector type, /// and also the associated data kind for this type. If a data kind could not - /// be determined, this will throw. + /// be determined, this will throw. /// /// The parameter info to inspect. /// Whether this appears to be a vector type. @@ -165,7 +165,7 @@ public static void GetVectorAndKind(ParameterInfo parameterInfo, out bool isVect /// /// Given a type and name for a variable, returns whether this appears to be a vector type, /// and also the associated data kind for this type. If a data kind could not - /// be determined, this will throw. + /// be determined, this will throw. /// /// The type of the variable to inspect. /// The name of the variable to inspect. @@ -222,7 +222,7 @@ public static InternalSchemaDefinition Create(Type userType, SchemaDefinition us col.MemberName, userType.FullName); - //Clause to handle the field that may be used to expose the cursor channel. + //Clause to handle the field that may be used to expose the cursor channel. //This field does not need a column. if (fieldInfo.FieldType == typeof(IChannel)) continue; @@ -251,7 +251,7 @@ public static InternalSchemaDefinition Create(Type userType, SchemaDefinition us } else { - // Make sure that the types are compatible with the declared type, including + // Make sure that the types are compatible with the declared type, including // whether it is a vector type. if (isVector != col.ColumnType.IsVector) { diff --git a/src/Microsoft.ML.Api/LambdaTransform.cs b/src/Microsoft.ML.Api/LambdaTransform.cs index 93635c6d7d..506c675524 100644 --- a/src/Microsoft.ML.Api/LambdaTransform.cs +++ b/src/Microsoft.ML.Api/LambdaTransform.cs @@ -37,7 +37,7 @@ public static class LambdaTransform /// different data by calling ), and the transformed data (which can be /// enumerated upon by calling GetRowCursor or AsCursorable{TRow}). If or /// implement the interface, they will be disposed after use. - /// + /// /// This is a 'stateless non-savable' version of the transform. /// /// The host environment to use. @@ -78,7 +78,7 @@ public static ITransformTemplate CreateMap(IHostEnvironment env, IDa /// different data by calling ), and the transformed data (which can be /// enumerated upon by calling GetRowCursor or AsCursorable{TRow}). If or /// implement the interface, they will be disposed after use. - /// + /// /// This is a 'stateless savable' version of the transform: save and load routines must be provided. /// /// The host environment to use. @@ -123,7 +123,7 @@ public static ITransformTemplate CreateMap(IHostEnvironment env, IDa /// /// This is a 'stateful non-savable' version of the map transform: the mapping function is guaranteed to be invoked once per - /// every row of the data set, in sequence; one user-defined state object will be allocated per cursor and passed to the + /// every row of the data set, in sequence; one user-defined state object will be allocated per cursor and passed to the /// map function every time. If , , or /// implement the interface, they will be disposed after use. /// @@ -164,7 +164,7 @@ public static ITransformTemplate CreateMap(IHostEnvironment /// /// This is a 'stateful savable' version of the map transform: the mapping function is guaranteed to be invoked once per - /// every row of the data set, in sequence (non-parallelizable); one user-defined state object will be allocated per cursor and passed to the + /// every row of the data set, in sequence (non-parallelizable); one user-defined state object will be allocated per cursor and passed to the /// map function every time; save and load routines must be provided. If , , /// or implement the interface, they will be disposed after use. /// @@ -217,8 +217,8 @@ public static ITransformTemplate CreateMap(IHostEnvironment /// This creates a filter transform that can 'accept' or 'decline' any row of the data based on the contents of the row /// or state of the cursor. /// This is a 'stateful non-savable' version of the filter: the filter function is guaranteed to be invoked once per - /// every row of the data set, in sequence (non-parallelizable); one user-defined state object will be allocated per cursor and passed to the - /// filter function every time. + /// every row of the data set, in sequence (non-parallelizable); one user-defined state object will be allocated per cursor and passed to the + /// filter function every time. /// If or implement the interface, they will be disposed after use. /// /// The type that describes what 'source' columns are consumed from the @@ -251,7 +251,7 @@ public static ITransformTemplate CreateFilter(IHostEnvironment env /// This creates a filter transform that can 'accept' or 'decline' any row of the data based on the contents of the row /// or state of the cursor. /// This is a 'stateful savable' version of the filter: the filter function is guaranteed to be invoked once per - /// every row of the data set, in sequence (non-parallelizable); one user-defined state object will be allocated per cursor and passed to the + /// every row of the data set, in sequence (non-parallelizable); one user-defined state object will be allocated per cursor and passed to the /// filter function every time; save and load routines must be provided. /// If or implement the interface, they will be disposed after use. /// @@ -294,11 +294,11 @@ public static ITransformTemplate CreateFilter(IHostEnvironment env } /// - /// Defines common ancestor for various flavors of lambda-based user-defined transforms that may or may not be + /// Defines common ancestor for various flavors of lambda-based user-defined transforms that may or may not be /// serializable. - /// + /// /// In order for the transform to be serializable, the user should specify a save and load delegate. - /// Specifically, for this the user has to provide the following things: + /// Specifically, for this the user has to provide the following things: /// * a custom save action that serializes the transform 'state' to the binary writer. /// * a custom load action that de-serializes the transform from the binary reader. This must be a public static method of a public class. /// diff --git a/src/Microsoft.ML.Api/MapTransform.cs b/src/Microsoft.ML.Api/MapTransform.cs index 914bb63c07..4426721620 100644 --- a/src/Microsoft.ML.Api/MapTransform.cs +++ b/src/Microsoft.ML.Api/MapTransform.cs @@ -14,7 +14,7 @@ namespace Microsoft.ML.Runtime.Api /// It doesn't change the number of rows, and can be seen as a result of application of the user's function /// to every row of the input data. /// Similarly to the existing 's, this object can be treated as both the 'transformation' algorithm - /// (which can be then applied to different data by calling ), and the transformed data (which can + /// (which can be then applied to different data by calling ), and the transformed data (which can /// be enumerated upon by calling GetRowCursor or AsCursorable{TRow}). /// /// The type that describes what 'source' columns are consumed from the input . @@ -36,8 +36,8 @@ internal sealed class MapTransform : LambdaTransformBase, ITransform private static string RegistrationName { get { return string.Format(RegistrationNameTemplate, typeof(TSrc).FullName, typeof(TDst).FullName); } } /// - /// Create a a map transform that is savable iff and are - /// not null. + /// Create a a map transform that is savable iff and are + /// not null. /// /// The host environment /// The dataview upon which we construct the transform @@ -47,7 +47,7 @@ internal sealed class MapTransform : LambdaTransformBase, ITransform /// A function that given the serialization stream and a data view, returns /// an . The intent is, this returned object should itself be a /// , but this is not strictly necessary. This delegate should be - /// a static non-lambda method that this assembly can legally call. May be null simultaneously with + /// a static non-lambda method that this assembly can legally call. May be null simultaneously with /// . /// The schema definition overrides for /// The schema definition overrides for diff --git a/src/Microsoft.ML.Api/PredictionEngine.cs b/src/Microsoft.ML.Api/PredictionEngine.cs index 14e2498c93..9410d3b50e 100644 --- a/src/Microsoft.ML.Api/PredictionEngine.cs +++ b/src/Microsoft.ML.Api/PredictionEngine.cs @@ -72,12 +72,12 @@ internal BatchPredictionEngine(IHostEnvironment env, IDataView dataPipeline, boo } /// - /// Run the prediction pipe. This will enumerate the exactly once, - /// cache all the examples (by reference) into its internal representation and then run + /// Run the prediction pipe. This will enumerate the exactly once, + /// cache all the examples (by reference) into its internal representation and then run /// the transformation pipe. /// /// The examples to run the prediction on. - /// If true, the engine will not allocate memory per output, and + /// If true, the engine will not allocate memory per output, and /// the returned objects will actually always be the same object. The user is /// expected to clone the values himself if needed. /// The that contains all the pipeline results. @@ -141,7 +141,7 @@ public void Reset() /// in-memory data, one example at a time. /// This can also be used with trained pipelines that do not end with a predictor: in this case, the /// 'prediction' will be just the outcome of all the transformations. - /// This is essentially a wrapper for that throws if + /// This is essentially a wrapper for that throws if /// more than one result is returned per call to . /// /// The user-defined type that holds the example. @@ -198,7 +198,7 @@ public TDst Predict(TSrc example) /// /// This class encapsulates the 'classic' prediction problem, where the input is denoted by the float array of features, - /// and the output is a float score. For binary classification predictors that can output probability, there are output + /// and the output is a float score. For binary classification predictors that can output probability, there are output /// fields that report the predicted label and probability. /// public sealed class SimplePredictionEngine diff --git a/src/Microsoft.ML.Api/SchemaDefinition.cs b/src/Microsoft.ML.Api/SchemaDefinition.cs index 559e3a81ee..e08845a87e 100644 --- a/src/Microsoft.ML.Api/SchemaDefinition.cs +++ b/src/Microsoft.ML.Api/SchemaDefinition.cs @@ -63,7 +63,7 @@ public VectorTypeAttribute(params int[] dims) } /// - /// Describes column information such as name and the source columns indicies that this + /// Describes column information such as name and the source columns indicies that this /// column encapsulates. /// [AttributeUsage(AttributeTargets.Field, AllowMultiple = false, Inherited = true)] @@ -81,12 +81,12 @@ public ColumnAttribute(string ordinal, string name = null) public string Name { get; } /// - /// Contains positions of indices of source columns in the form - /// of ranges. Examples of range: if we want to include just column - /// with index 1 we can write the range as 1, if we want to include + /// Contains positions of indices of source columns in the form + /// of ranges. Examples of range: if we want to include just column + /// with index 1 we can write the range as 1, if we want to include /// columns 1 to 10 then we can write the range as 1-10 and we want to include all the /// columns from column with index 1 until end then we can write 1-*. - /// + /// /// This takes sequence of ranges that are comma seperated, example: /// 1,2-5,10-* /// @@ -125,7 +125,7 @@ public sealed class NoColumnAttribute : Attribute } /// - /// Mark a member that implements exactly IChannel as being permitted to receive + /// Mark a member that implements exactly IChannel as being permitted to receive /// channel information from an external channel. /// [AttributeUsage(AttributeTargets.Field, AllowMultiple = false, Inherited = true)] @@ -133,11 +133,11 @@ public sealed class CursorChannelAttribute : Attribute { /// /// When passed some object, and a channel, it attempts to pass the channel to the object. It - /// passes the channel to the object iff the object has exactly one field marked with the - /// CursorChannelAttribute, and that field implements only the IChannel interface. - /// - /// The function returns the modified object, as well as a boolean indicator of whether it was - /// able to pass the channel to the object. + /// passes the channel to the object iff the object has exactly one field marked with the + /// CursorChannelAttribute, and that field implements only the IChannel interface. + /// + /// The function returns the modified object, as well as a boolean indicator of whether it was + /// able to pass the channel to the object. /// /// The object that attempts to acquire the channel. /// The channel to pass to the object. @@ -206,13 +206,13 @@ public sealed class Column public ColumnType ColumnType { get; set; } /// - /// Whether the column is a computed type. + /// Whether the column is a computed type. /// public bool IsComputed { get { return Generator != null; } } /// - /// The generator function. if the column is computed. - /// + /// The generator function. if the column is computed. + /// public Delegate Generator { get; set; } public Type ReturnType => Generator?.GetMethodInfo().GetParameters().LastOrDefault().ParameterType.GetElementType(); @@ -277,7 +277,7 @@ public IEnumerable> GetMetadataTypes } /// - /// Get or set the column definition by column name. + /// Get or set the column definition by column name. /// If there's no such column: /// - get returns null, /// - set adds a new column. @@ -287,9 +287,7 @@ public IEnumerable> GetMetadataTypes /// public Column this[string columnName] { -#pragma warning disable TLC_NoThis // Do not use 'this' keyword for member access get => this.FirstOrDefault(x => x.ColumnName == columnName); -#pragma warning restore TLC_NoThis // Do not use 'this' keyword for member access set { Contracts.CheckValue(value, nameof(value)); @@ -323,9 +321,9 @@ public static SchemaDefinition Create(Type userType) HashSet colNames = new HashSet(); foreach (var fieldInfo in userType.GetFields()) { - // Clause to handle the field that may be used to expose the cursor channel. + // Clause to handle the field that may be used to expose the cursor channel. // This field does not need a column. - // REVIEW: maybe validate the channel attribute now, instead + // REVIEW: maybe validate the channel attribute now, instead // of later at cursor creation. if (fieldInfo.FieldType == typeof(IChannel)) continue; diff --git a/src/Microsoft.ML.Api/SerializableLambdaTransform.cs b/src/Microsoft.ML.Api/SerializableLambdaTransform.cs index 5f761a042b..7de6e522d8 100644 --- a/src/Microsoft.ML.Api/SerializableLambdaTransform.cs +++ b/src/Microsoft.ML.Api/SerializableLambdaTransform.cs @@ -79,7 +79,7 @@ public static ITransformTemplate Create(IHostEnvironment env, ModelLoadContext c /// that method that should be enough to "recover" it, assuming it is a "recoverable" method (recoverable /// here is a loose definition, meaning that is capable /// of creating it, which includes among other things that it's static, non-lambda, accessible to - /// this assembly, etc.). + /// this assembly, etc.). /// /// The method that should be "recoverable" /// A string array describing the input method diff --git a/src/Microsoft.ML.Api/StatefulFilterTransform.cs b/src/Microsoft.ML.Api/StatefulFilterTransform.cs index b7e0cf473b..f47b8620a8 100644 --- a/src/Microsoft.ML.Api/StatefulFilterTransform.cs +++ b/src/Microsoft.ML.Api/StatefulFilterTransform.cs @@ -9,10 +9,10 @@ namespace Microsoft.ML.Runtime.Api { - // REVIEW: the current interface to 'state' object may be inadequate: instead of insisting on + // REVIEW: the current interface to 'state' object may be inadequate: instead of insisting on // parameterless constructor, we could take a delegate that would create the state per cursor. /// - /// This transform is similar to , but it allows per-cursor state, + /// This transform is similar to , but it allows per-cursor state, /// as well as the ability to 'accept' or 'filter out' some rows of the supplied . /// The downside is that the provided lambda is eagerly called on every row (not lazily when needed), and /// parallel cursors are not allowed. @@ -38,8 +38,8 @@ internal sealed class StatefulFilterTransform : LambdaTransf private static string RegistrationName { get { return string.Format(RegistrationNameTemplate, typeof(TSrc).FullName, typeof(TDst).FullName); } } /// - /// Create a filter transform that is savable iff and are - /// not null. + /// Create a filter transform that is savable iff and are + /// not null. /// /// The host environment /// The dataview upon which we construct the transform @@ -51,7 +51,7 @@ internal sealed class StatefulFilterTransform : LambdaTransf /// A function that given the serialization stream and a data view, returns /// an . The intent is, this returned object should itself be a /// , but this is not strictly necessary. This delegate should be - /// a static non-lambda method that this assembly can legally call. May be null simultaneously with + /// a static non-lambda method that this assembly can legally call. May be null simultaneously with /// . /// The schema definition overrides for /// The schema definition overrides for diff --git a/src/Microsoft.ML.Api/TypedCursor.cs b/src/Microsoft.ML.Api/TypedCursor.cs index 2ba9eeb23a..f6ebaf687f 100644 --- a/src/Microsoft.ML.Api/TypedCursor.cs +++ b/src/Microsoft.ML.Api/TypedCursor.cs @@ -57,7 +57,7 @@ public interface ICursorable /// /// Implementation of the strongly typed Cursorable. - /// Similarly to the 'DataView{T}, this class uses IL generation to create the 'poke' methods that + /// Similarly to the 'DataView{T}, this class uses IL generation to create the 'poke' methods that /// write directly into the fields of the user-defined type. /// internal sealed class TypedCursorable : ICursorable @@ -437,7 +437,7 @@ private Action GenerateSetter(IRow input, int index, InternalSchemaDefinit } // REVIEW: The converting getter invokes a type conversion delegate on every call, so it's inherently slower - // than the 'direct' getter. We don't have good indication of this to the user, and the selection + // than the 'direct' getter. We don't have good indication of this to the user, and the selection // of affected types is pretty arbitrary (signed integers and bools, but not uints and floats). private Action CreateConvertingVBufferSetter(IRow input, int col, Delegate poke, Delegate peek, Func convert) { diff --git a/src/Microsoft.ML.Core/CommandLine/CmdParser.cs b/src/Microsoft.ML.Core/CommandLine/CmdParser.cs index b9b9506cf9..eb85fcce12 100644 --- a/src/Microsoft.ML.Core/CommandLine/CmdParser.cs +++ b/src/Microsoft.ML.Core/CommandLine/CmdParser.cs @@ -493,7 +493,7 @@ public static string ArgumentsUsage(IHostEnvironment env, Type type, object defa #if CORECLR /// - /// Fix the window width for the Core build to remove the kernel32.dll dependency. + /// Fix the window width for the Core build to remove the kernel32.dll dependency. /// /// public static int GetConsoleWindowWidth() @@ -620,7 +620,7 @@ private static ArgumentInfo GetArgumentInfo(Type type, object defaults) string[] nicks; // Semantics of ShortName: // The string provided represents an array of names separated by commas and spaces, once empty entries are removed. - // 'null' or a singleton array with containing only the long field name means "use the default short name", + // 'null' or a singleton array with containing only the long field name means "use the default short name", // and is represented by the null 'nicks' array. // 'String.Empty' or a string containing only spaces and commas means "no short name", and is represented by an empty 'nicks' array. if (attr.ShortName == null) @@ -1666,7 +1666,7 @@ public bool Finish(CmdParser owner, ArgValue val, object destination) } else if (IsMultiSubComponent) { - // REVIEW: the kind should not be separated from settings: everything related + // REVIEW: the kind should not be separated from settings: everything related // to one item should go into one value, not multiple values if (IsTaggedCollection) { diff --git a/src/Microsoft.ML.Core/ComponentModel/ComponentCatalog.cs b/src/Microsoft.ML.Core/ComponentModel/ComponentCatalog.cs index 28666e7f44..3b56e8bb36 100644 --- a/src/Microsoft.ML.Core/ComponentModel/ComponentCatalog.cs +++ b/src/Microsoft.ML.Core/ComponentModel/ComponentCatalog.cs @@ -385,7 +385,7 @@ private static void CacheLoadedAssemblies() { if (_assemblyQueue == null) { - // Create the loaded assembly queue and dictionary, set up the AssemblyLoad / AssemblyResolve + // Create the loaded assembly queue and dictionary, set up the AssemblyLoad / AssemblyResolve // event handlers and populate the queue / dictionary with all assemblies that are currently loaded. Contracts.Assert(_assemblyQueue == null); Contracts.Assert(_loadedAssemblies == null); @@ -413,7 +413,7 @@ private static void CacheLoadedAssemblies() // Load all assemblies in our directory. var moduleName = typeof(ComponentCatalog).Module.FullyQualifiedName; - // If were are loaded in the context of SQL CLR then the FullyQualifiedName and Name properties are set to + // If were are loaded in the context of SQL CLR then the FullyQualifiedName and Name properties are set to // string "" and we skip scanning current directory. if (moduleName != "") { @@ -451,7 +451,7 @@ private static void CacheLoadedAssemblies() #if TRACE_ASSEMBLY_LOADING // The "" no-op argument is necessary because WriteLine has multiple overloads, and with two strings - // it will be the one that is message/category, rather than format string with + // it will be the one that is message/category, rather than format string with System.Diagnostics.Debug.WriteLine("*** Caching classes in {0}", assembly.FullName, ""); #endif int added = 0; diff --git a/src/Microsoft.ML.Core/Data/ColumnType.cs b/src/Microsoft.ML.Core/Data/ColumnType.cs index 780ef7a7d7..0cff911e77 100644 --- a/src/Microsoft.ML.Core/Data/ColumnType.cs +++ b/src/Microsoft.ML.Core/Data/ColumnType.cs @@ -325,7 +325,7 @@ public static PrimitiveType FromKind(DataKind kind) /// public sealed class TextType : PrimitiveType { - private volatile static TextType _instance; + private static volatile TextType _instance; public static TextType Instance { get @@ -370,7 +370,7 @@ private NumberType(DataKind kind, string name) Contracts.Assert(IsNumber); } - private volatile static NumberType _instI1; + private static volatile NumberType _instI1; public static NumberType I1 { get @@ -381,7 +381,7 @@ public static NumberType I1 } } - private volatile static NumberType _instU1; + private static volatile NumberType _instU1; public static NumberType U1 { get @@ -392,7 +392,7 @@ public static NumberType U1 } } - private volatile static NumberType _instI2; + private static volatile NumberType _instI2; public static NumberType I2 { get @@ -403,7 +403,7 @@ public static NumberType I2 } } - private volatile static NumberType _instU2; + private static volatile NumberType _instU2; public static NumberType U2 { get @@ -414,7 +414,7 @@ public static NumberType U2 } } - private volatile static NumberType _instI4; + private static volatile NumberType _instI4; public static NumberType I4 { get @@ -425,7 +425,7 @@ public static NumberType I4 } } - private volatile static NumberType _instU4; + private static volatile NumberType _instU4; public static NumberType U4 { get @@ -436,7 +436,7 @@ public static NumberType U4 } } - private volatile static NumberType _instI8; + private static volatile NumberType _instI8; public static NumberType I8 { get @@ -447,7 +447,7 @@ public static NumberType I8 } } - private volatile static NumberType _instU8; + private static volatile NumberType _instU8; public static NumberType U8 { get @@ -458,7 +458,7 @@ public static NumberType U8 } } - private volatile static NumberType _instUG; + private static volatile NumberType _instUG; public static NumberType UG { get @@ -469,7 +469,7 @@ public static NumberType UG } } - private volatile static NumberType _instR4; + private static volatile NumberType _instR4; public static NumberType R4 { get @@ -480,7 +480,7 @@ public static NumberType R4 } } - private volatile static NumberType _instR8; + private static volatile NumberType _instR8; public static NumberType R8 { get @@ -496,7 +496,7 @@ public static NumberType Float get { return R4; } } - public new static NumberType FromKind(DataKind kind) + public static new NumberType FromKind(DataKind kind) { switch (kind) { @@ -557,7 +557,7 @@ public override string ToString() /// public sealed class BoolType : PrimitiveType { - private volatile static BoolType _instance; + private static volatile BoolType _instance; public static BoolType Instance { get @@ -589,7 +589,7 @@ public override string ToString() public sealed class DateTimeType : PrimitiveType { - private volatile static DateTimeType _instance; + private static volatile DateTimeType _instance; public static DateTimeType Instance { get @@ -621,7 +621,7 @@ public override string ToString() public sealed class DateTimeZoneType : PrimitiveType { - private volatile static DateTimeZoneType _instance; + private static volatile DateTimeZoneType _instance; public static DateTimeZoneType Instance { get @@ -656,7 +656,7 @@ public override string ToString() /// public sealed class TimeSpanType : PrimitiveType { - private volatile static TimeSpanType _instance; + private static volatile TimeSpanType _instance; public static TimeSpanType Instance { get @@ -692,11 +692,11 @@ public override string ToString() /// meaningful. Examples are SSNs, phone numbers, auto-generated/incremented key values, /// class numbers, etc. For example, in multi-class classification, the label is typically /// a class number which is naturally a KeyType. - /// + /// /// KeyTypes can be contiguous (the class number example), in which case they can have /// a cardinality/Count. For non-contiguous KeyTypes the Count property returns zero. /// Any KeyType (contiguous or not) can have a Min value. The Min value is always >= 0. - /// + /// /// Note that the representation value does not necessarily match the logical value. /// For example, if a KeyType has range 1000-5000, then it has a Min of 1000, Count /// of 4001, but the representational values are 1-4001. The representation value zero @@ -951,7 +951,7 @@ public bool IsSubtypeOf(VectorType other) if (other == null) return false; - // REVIEW: Perhaps we should allow the case when _itemType is + // REVIEW: Perhaps we should allow the case when _itemType is // a sub-type of other._itemType (in particular for key types) if (!_itemType.Equals(other._itemType)) return false; diff --git a/src/Microsoft.ML.Core/Data/DataKind.cs b/src/Microsoft.ML.Core/Data/DataKind.cs index 358227399b..32325f44a1 100644 --- a/src/Microsoft.ML.Core/Data/DataKind.cs +++ b/src/Microsoft.ML.Core/Data/DataKind.cs @@ -30,7 +30,7 @@ public enum DataKind : byte Num = R4, TX = 11, -#pragma warning disable TLC_GeneralName // The data kind enum has its own logic, independnet of C# naming conventions. +#pragma warning disable MSML_GeneralName // The data kind enum has its own logic, independnet of C# naming conventions. TXT = TX, Text = TX, @@ -46,7 +46,7 @@ public enum DataKind : byte UG = 16, // Unsigned 16-byte integer. U16 = UG, -#pragma warning restore TLC_GeneralName +#pragma warning restore MSML_GeneralName } /// diff --git a/src/Microsoft.ML.Core/Data/DateTime.cs b/src/Microsoft.ML.Core/Data/DateTime.cs index 52b30b5bb6..d11be2a494 100644 --- a/src/Microsoft.ML.Core/Data/DateTime.cs +++ b/src/Microsoft.ML.Core/Data/DateTime.cs @@ -230,7 +230,7 @@ public DvDateTimeZone(DvDateTime dt, DvTimeSpan offset) /// are within the valid range, and returns a DvDateTime representing the UTC time (dateTime-offset). /// /// The clock time - /// The offset. This value is assumed to be validated as a legal offset: + /// The offset. This value is assumed to be validated as a legal offset: /// a value in whole minutes, between -14 and 14 hours. /// The UTC DvDateTime representing the input clock time minus the offset private static DvDateTime ValidateDate(DvDateTime dateTime, ref DvInt2 offset) diff --git a/src/Microsoft.ML.Core/Data/ICursor.cs b/src/Microsoft.ML.Core/Data/ICursor.cs index 264eaa55bb..e1efc842f4 100644 --- a/src/Microsoft.ML.Core/Data/ICursor.cs +++ b/src/Microsoft.ML.Core/Data/ICursor.cs @@ -18,7 +18,7 @@ public interface ICounted /// This is incremented for ICursor when the underlying contents changes, giving clients a way to detect change. /// Generally it's -1 when the object is in an invalid state. In particular, for an , this is -1 /// when the is or . - /// + /// /// Note that this position is not position within the underlying data, but position of this cursor only. /// If one, for example, opened a set of parallel streaming cursors, or a shuffled cursor, each such cursor's /// first valid entry would always have position 0. @@ -30,7 +30,7 @@ public interface ICounted /// batch numbers should be non-decreasing. Furthermore, any given batch number should only appear in one /// of the streams. Order is determined by batch number. The reconciler ensures that each stream (that is /// still active) has at least one item available, then takes the item with the smallest batch number. - /// + /// /// Note that there is no suggestion that the batches for a particular entry will be consistent from /// cursoring to cursoring, except for the consistency in resulting in the same overall ordering. The same /// entry could have different batch numbers from one cursoring to another. There is also no requirement @@ -45,7 +45,7 @@ public interface ICounted /// will produce the same data as a serial cursor or any other shuffled cursor, only shuffled. The ID /// exists for applications that need to reconcile which entry is actually which. Ideally this ID should /// be unique, but for practical reasons, it suffices if collisions are simply extremely improbable. - /// + /// /// Note that this ID, while it must be consistent for multiple streams according to the semantics /// above, is not considered part of the data per se. So, to take the example of a data view specifically, /// a single data view must render consistent IDs across all cursorings, but there is no suggestion at @@ -77,7 +77,7 @@ public interface ICursor : ICounted, IDisposable /// Returns the state of the cursor. Before the first call to or /// this should be . After /// any call those move functions that returns true, this should return - /// , + /// , /// CursorState State { get; } diff --git a/src/Microsoft.ML.Core/Data/IDataView.cs b/src/Microsoft.ML.Core/Data/IDataView.cs index db83c15fd9..052a07dc9e 100644 --- a/src/Microsoft.ML.Core/Data/IDataView.cs +++ b/src/Microsoft.ML.Core/Data/IDataView.cs @@ -89,7 +89,7 @@ public interface IDataView : ISchematized /// call. This indicates, that the transform does not YET know the number of rows, but /// may in the future. If lazy is false, then this is permitted to do some work (no more /// that it would normally do for cursoring) to determine the number of rows. - /// + /// /// Most components will return the same answer whether lazy is true or false. Some, like /// a cache, might return null until the cache is fully populated (when lazy is true). When /// lazy is false, such a cache would block until the cache was populated. @@ -110,7 +110,7 @@ public interface IDataView : ISchematized /// has no recommendation, and the implementation should have some default behavior to cover /// this case. Note that this is strictly a recommendation: it is entirely possible that /// an implementation can return a different number of cursors. - /// + /// /// The cursors should return the same data as returned through /// , except partitioned: no two cursors /// should return the "same" row as would have been returned through the regular serial cursor, diff --git a/src/Microsoft.ML.Core/Data/IHostEnvironment.cs b/src/Microsoft.ML.Core/Data/IHostEnvironment.cs index 7589ef13ad..b463e52a8e 100644 --- a/src/Microsoft.ML.Core/Data/IHostEnvironment.cs +++ b/src/Microsoft.ML.Core/Data/IHostEnvironment.cs @@ -62,7 +62,7 @@ public interface IHostEnvironment : IChannelProvider, IProgressChannelProvider /// Note that IFileHandle derives from IDisposable. Clients may dispose the IFileHandle when it is /// no longer needed, but they are not required to. The host environment should track all temp file /// handles and ensure that they are disposed properly when the environment is "shut down". - /// + /// /// The suffix and prefix are optional. A common use for suffix is to specify an extension, eg, ".txt". /// The use of suffix and prefix, including whether they have any affect, is up to the host enviroment. /// diff --git a/src/Microsoft.ML.Core/Data/IMlState.cs b/src/Microsoft.ML.Core/Data/IMlState.cs index 98c0e8e5aa..52b0828256 100644 --- a/src/Microsoft.ML.Core/Data/IMlState.cs +++ b/src/Microsoft.ML.Core/Data/IMlState.cs @@ -5,7 +5,7 @@ namespace Microsoft.ML.Runtime.EntryPoints { /// - /// Dummy interface to allow reference to the AutoMlState object in the C# API (since AutoMlState + /// Dummy interface to allow reference to the AutoMlState object in the C# API (since AutoMlState /// has things that reference C# API, leading to circular dependency). Makes state object an opaque /// black box to the graph. The macro itself will then case to the concrete type. /// diff --git a/src/Microsoft.ML.Core/Data/IProgressChannel.cs b/src/Microsoft.ML.Core/Data/IProgressChannel.cs index b5bae12c0b..0f673d9b2a 100644 --- a/src/Microsoft.ML.Core/Data/IProgressChannel.cs +++ b/src/Microsoft.ML.Core/Data/IProgressChannel.cs @@ -10,7 +10,7 @@ namespace Microsoft.ML.Runtime /// This is a factory interface for . /// Both and implement this interface, /// to allow for nested progress reporters. - /// + /// /// REVIEW: make implement this, instead of the environment? /// public interface IProgressChannelProvider @@ -24,10 +24,10 @@ public interface IProgressChannelProvider /// /// A common interface for progress reporting. /// It is expected that the progress channel interface is used from only one thread. - /// + /// /// Supported workflow: /// 1) Create the channel via . - /// 2) Call as many times as desired (including 0). + /// 2) Call as many times as desired (including 0). /// Each call to supersedes the previous one. /// 3) Report checkpoints (0 or more) by calling . /// 4) Repeat steps 2-3 as often as necessary. @@ -39,13 +39,13 @@ public interface IProgressChannel : IProgressChannelProvider, IDisposable /// Set up the reporting structure: /// - Set the 'header' of the progress reports, defining which progress units and metrics are going to be reported. /// - Provide a thread-safe delegate to be invoked whenever anyone needs to know the progress. - /// + /// /// It is acceptable to call multiple times (or none), regardless of whether the calculation is running - /// or not. Because of synchronization, the computation should not deny calls to the 'old' + /// or not. Because of synchronization, the computation should not deny calls to the 'old' /// delegates even after a new one is provided. /// /// The header object. - /// The delegate to provide actual progress. The parameter of + /// The delegate to provide actual progress. The parameter of /// the delegate will correspond to the provided . void SetHeader(ProgressHeader header, Action fillAction); @@ -53,10 +53,10 @@ public interface IProgressChannel : IProgressChannelProvider, IDisposable /// Submit a 'checkpoint' entry. These entries are guaranteed to be delivered to the progress listener, /// if it is interested. Typically, this would contain some intermediate metrics, that are only calculated /// at certain moments ('checkpoints') of the computation. - /// + /// /// For example, SDCA may report a checkpoint every time it computes the loss, or LBFGS may report a checkpoint /// every iteration. - /// + /// /// The only parameter, , is interpreted in the following fashion: /// * First MetricNames.Length items, if present, are metrics. /// * Subsequent ProgressNames.Length items, if present, are progress units. @@ -92,11 +92,11 @@ public sealed class ProgressHeader /// progress or metrics to report, it is always better to report them. /// /// The metrics that the calculation reports. These are completely independent, and there - /// is no contract on whether the metric values should increase or not. As naming convention, + /// is no contract on whether the metric values should increase or not. As naming convention, /// can have multiple words with spaces, and should be title-cased. /// The names of the progress units, listed from least granular to most granular. /// The idea is that the progress should be lexicographically increasing (like [0,0], [0,10], [1,0], [1,15], [2,5] etc.). - /// As naming convention, should be lower-cased and typically plural + /// As naming convention, should be lower-cased and typically plural /// (e.g. iterations, clusters, examples). public ProgressHeader(string[] metricNames, string[] unitNames) { @@ -108,7 +108,7 @@ public ProgressHeader(string[] metricNames, string[] unitNames) } /// - /// A constructor for no metrics, just progress units. As naming convention, should be lower-cased + /// A constructor for no metrics, just progress units. As naming convention, should be lower-cased /// and typically plural (e.g. iterations, clusters, examples). /// public ProgressHeader(params string[] unitNames) @@ -118,7 +118,7 @@ public ProgressHeader(params string[] unitNames) } /// - /// A metric/progress holder item. + /// A metric/progress holder item. /// public interface IProgressEntry { @@ -130,7 +130,7 @@ public interface IProgressEntry /// /// Set the progress value for the index to , - /// and the limit value to . If is a NAN, it is set to null instead. + /// and the limit value to . If is a NAN, it is set to null instead. /// void SetProgress(int index, Double value, Double lim); diff --git a/src/Microsoft.ML.Core/Data/ISchemaBindableMapper.cs b/src/Microsoft.ML.Core/Data/ISchemaBindableMapper.cs index 466611c11a..6adac55f1b 100644 --- a/src/Microsoft.ML.Core/Data/ISchemaBindableMapper.cs +++ b/src/Microsoft.ML.Core/Data/ISchemaBindableMapper.cs @@ -9,15 +9,15 @@ namespace Microsoft.ML.Runtime.Data { /// /// A mapper that can be bound to a (which is an ISchema, with mappings from column kinds - /// to columns). Binding an to a produces an + /// to columns). Binding an to a produces an /// , which is an interface that has methods to return the names and indices of the input columns /// needed by the mapper to compute its output. The is an extention to this interface, that - /// can also produce an output IRow given an input IRow. The IRow produced generally contains only the output columns of the mapper, and not + /// can also produce an output IRow given an input IRow. The IRow produced generally contains only the output columns of the mapper, and not /// the input columns (but there is nothing preventing an from mapping input columns directly to outputs). - /// This interface is implemented by wrappers of IValueMapper based predictors, which are predictors that take a single + /// This interface is implemented by wrappers of IValueMapper based predictors, which are predictors that take a single /// features column. New predictors can implement directly. Implementing /// includes implementing a corresponding (or ) and a corresponding ISchema - /// for the output schema of the . In case the interface is implemented, + /// for the output schema of the . In case the interface is implemented, /// the SimpleRow class can be used in the method. /// public interface ISchemaBindableMapper @@ -54,7 +54,7 @@ public interface ISchemaBoundMapper /// /// This interface extends with an additional method: . This method - /// takes an input IRow and a predicate indicating which output columns are active, and returns a new IRow + /// takes an input IRow and a predicate indicating which output columns are active, and returns a new IRow /// containing the output columns. /// public interface ISchemaBoundRowMapper : ISchemaBoundMapper @@ -67,11 +67,11 @@ public interface ISchemaBoundRowMapper : ISchemaBoundMapper /// /// Get an IRow based on the input IRow with the indicated active columns. The active columns are those for which - /// predicate(col) returns true. The schema of the returned IRow will be the same as the OutputSchema, but getting + /// predicate(col) returns true. The schema of the returned IRow will be the same as the OutputSchema, but getting /// values on inactive columns will throw. Null predicates are disallowed. /// The schema of input should match the InputSchema. /// This method creates a live connection between the input IRow and the output IRow. In particular, when the - /// getters of the output IRow are invoked, they invoke the getters of the input row and base the output values on + /// getters of the output IRow are invoked, they invoke the getters of the input row and base the output values on /// the current values of the input IRow. The output IRow values are re-computed when requested through the getters. /// The optional disposer is invoked by the cursor wrapping, when it no longer needs the IRow. /// If no action is needed when the cursor is Disposed, the override should set disposer to null, @@ -101,7 +101,7 @@ public interface IRowToRowMapper /// predicate(col) returns true. Getting values on inactive columns will throw. Null predicates are disallowed. /// The schema of input should match the InputSchema. /// This method creates a live connection between the input IRow and the output IRow. In particular, when the - /// getters of the output IRow are invoked, they invoke the getters of the input row and base the output values on + /// getters of the output IRow are invoked, they invoke the getters of the input row and base the output values on /// the current values of the input IRow. The output IRow values are re-computed when requested through the getters. /// The optional disposer is invoked by the cursor wrapping, when it no longer needs the IRow. /// If no action is needed when the cursor is Disposed, the override should set disposer to null, diff --git a/src/Microsoft.ML.Core/Data/ITrainerArguments.cs b/src/Microsoft.ML.Core/Data/ITrainerArguments.cs index af74a9abfc..e4fdbbdc59 100644 --- a/src/Microsoft.ML.Core/Data/ITrainerArguments.cs +++ b/src/Microsoft.ML.Core/Data/ITrainerArguments.cs @@ -6,7 +6,7 @@ namespace Microsoft.ML.Runtime { // This is basically a no-op interface put in primarily // for backward binary compat support for AFx. - // REVIEW: This interface was removed in TLC 3.0 as part of the + // REVIEW: This interface was removed in TLC 3.0 as part of the // deprecation of the *Factory interfaces, but added back as a temporary // hack. Remove it asap. public interface ITrainerArguments diff --git a/src/Microsoft.ML.Core/Data/MetadataUtils.cs b/src/Microsoft.ML.Core/Data/MetadataUtils.cs index b0a18f6d18..116d521756 100644 --- a/src/Microsoft.ML.Core/Data/MetadataUtils.cs +++ b/src/Microsoft.ML.Core/Data/MetadataUtils.cs @@ -74,9 +74,9 @@ public static class Kinds /// /// Metadata kind that indicates the ranges within a column that are categorical features. - /// The value is a vector type of ints with dimension of two. The first dimension + /// The value is a vector type of ints with dimension of two. The first dimension /// represents the number of categorical features and second dimension represents the range - /// and is of size two. The range has start and end index(both inclusive) of categorical + /// and is of size two. The range has start and end index(both inclusive) of categorical /// slots within that column. /// public const string CategoricalSlotRanges = "CategoricalSlotRanges"; @@ -156,7 +156,7 @@ public static VectorType GetNamesType(int size) } /// - /// Returns a vector type with item type int and the given size. + /// Returns a vector type with item type int and the given size. /// The range count must be a positive integer. /// This is a standard type for metadata consisting of multiple int values that represent /// categorical slot ranges with in a column. @@ -386,12 +386,12 @@ public static bool IsHidden(this ISchema schema, int col) } /// - /// The categoricalFeatures is a vector of the indices of categorical features slots. + /// The categoricalFeatures is a vector of the indices of categorical features slots. /// This vector should always have an even number of elements, and the elements should be parsed in groups of two consecutive numbers. /// So if its value is the range of numbers: 0,2,3,4,8,9 /// look at it as [0,2],[3,4],[8,9]. /// The way to interpret that is: feature with indices 0, 1, and 2 are one categorical - /// Features with indices 3 and 4 are another categorical. Features 5 and 6 don't appear there, so they are not categoricals. + /// Features with indices 3 and 4 are another categorical. Features 5 and 6 don't appear there, so they are not categoricals. /// public static bool TryGetCategoricalFeatureIndices(ISchema schema, int colIndex, out int[] categoricalFeatures) { diff --git a/src/Microsoft.ML.Core/Data/ProgressReporter.cs b/src/Microsoft.ML.Core/Data/ProgressReporter.cs index 384e1bfb61..5f9575cca5 100644 --- a/src/Microsoft.ML.Core/Data/ProgressReporter.cs +++ b/src/Microsoft.ML.Core/Data/ProgressReporter.cs @@ -202,8 +202,8 @@ private ProgressEntry BuildJointEntry(ProgressEntry rootEntry) /// /// This is a 'derived' or 'subordinate' progress channel. - /// - /// The subordinates' Start/Stop events and checkpoints will not be propagated. + /// + /// The subordinates' Start/Stop events and checkpoints will not be propagated. /// When the status is requested, all of the subordinate channels are also invoked, /// and the resulting metrics are then returned in the order of their 'subordinate level'. /// If there's more than one channel with the same level, the order is not defined. @@ -278,7 +278,7 @@ private void Stop() public void Checkpoint(params Double?[] values) { // We are ignoring all checkpoints from subordinates. - // REVIEW: maybe this could be changed in the future. Right now it seems that + // REVIEW: maybe this could be changed in the future. Right now it seems that // this limitation is reasonable. } } @@ -287,7 +287,7 @@ public void Checkpoint(params Double?[] values) /// /// This class listens to the progress reporting channels, caches all checkpoints and /// start/stop events and, on demand, requests current progress on all active calculations. - /// + /// /// The public methods of this class should only be called from one thread. /// public sealed class ProgressTracker @@ -303,7 +303,7 @@ public sealed class ProgressTracker /// /// For each calculation, its properties. /// This list is protected by , and it's updated every time a new calculation starts. - /// The entries are cleaned up when the start and stop events are reported (that is, after the first + /// The entries are cleaned up when the start and stop events are reported (that is, after the first /// pull request after the calculation's 'Stop' event). /// private readonly List _infos; @@ -319,8 +319,8 @@ public sealed class ProgressTracker private readonly HashSet _namesUsed; /// - /// This class is an 'event log' for one calculation. - /// + /// This class is an 'event log' for one calculation. + /// /// Every time a calculation is 'started', it gets its own log, so if there are multiple 'start' calls, /// there will be multiple logs. /// @@ -425,12 +425,12 @@ public void Log(ProgressChannel source, ProgressEvent.EventKind kind, ProgressEn } /// - /// Get progress reports from all current calculations. + /// Get progress reports from all current calculations. /// For every calculation the following events will be returned: /// * A start event. /// * Each checkpoint. - /// * If the calculation is finished, the stop event. - /// + /// * If the calculation is finished, the stop event. + /// /// Each of the above events will be returned exactly once. /// If, for one calculation, there's no events in the above categories, the tracker will /// request ('pull') the current progress and return this as an event. @@ -490,14 +490,14 @@ public sealed class ProgressEntry : IProgressEntry /// /// The actual progress (amount of completed units), in the units that are contained in the header. /// Parallel to the header's . Null value indicates 'not applicable now'. - /// + /// /// The computation should not modify these arrays directly, and instead rely on , /// and . /// public readonly Double?[] Progress; /// - /// The lim values of each progress unit. + /// The lim values of each progress unit. /// Parallel to the header's . Null value indicates unbounded or unknown. /// public readonly Double?[] ProgressLim; diff --git a/src/Microsoft.ML.Core/Data/RoleMappedSchema.cs b/src/Microsoft.ML.Core/Data/RoleMappedSchema.cs index 2dab48fc58..2e35be86b7 100644 --- a/src/Microsoft.ML.Core/Data/RoleMappedSchema.cs +++ b/src/Microsoft.ML.Core/Data/RoleMappedSchema.cs @@ -88,10 +88,10 @@ public static ColumnInfo CreateFromIndex(ISchema schema, int index) /// /// /// Note that instances of this class are, like instances of , immutable. - /// + /// /// It is often the case that one wishes to bundle the actual data with the role mappings, not just the schema. For /// that case, please use the class. - /// + /// /// Note that there is no need for components consuming a or /// to make use of every defined mapping. Consuming components are also expected to ignore any /// they do not handle. They may very well however complain if a mapping they wanted to see is not present, or the column(s) diff --git a/src/Microsoft.ML.Core/Data/RootCursorBase.cs b/src/Microsoft.ML.Core/Data/RootCursorBase.cs index d5cc611e1d..1ac3858636 100644 --- a/src/Microsoft.ML.Core/Data/RootCursorBase.cs +++ b/src/Microsoft.ML.Core/Data/RootCursorBase.cs @@ -6,7 +6,7 @@ namespace Microsoft.ML.Runtime.Data { - // REVIEW: Since each cursor will create a channel, it would be great that the RootCursorBase takes + // REVIEW: Since each cursor will create a channel, it would be great that the RootCursorBase takes // ownership of the channel so the derived classes don't have to. /// diff --git a/src/Microsoft.ML.Core/Data/ServerChannel.cs b/src/Microsoft.ML.Core/Data/ServerChannel.cs index 9c75c19937..5cde023e69 100644 --- a/src/Microsoft.ML.Core/Data/ServerChannel.cs +++ b/src/Microsoft.ML.Core/Data/ServerChannel.cs @@ -26,7 +26,7 @@ public sealed class ServerChannel : ServerChannel.IPendingBundleNotification, ID private readonly string _identifier; // This holds the running collection of named delegates, if any. The dictionary itself - // is lazily initialized only when a listener + // is lazily initialized only when a listener private Dictionary _toPublish; private Action _onPublish; private Bundle _published; diff --git a/src/Microsoft.ML.Core/EntryPoints/EntryPointUtils.cs b/src/Microsoft.ML.Core/EntryPoints/EntryPointUtils.cs index b94e25c9e3..ad07ec86a5 100644 --- a/src/Microsoft.ML.Core/EntryPoints/EntryPointUtils.cs +++ b/src/Microsoft.ML.Core/EntryPoints/EntryPointUtils.cs @@ -35,7 +35,7 @@ public static bool IsValueWithinRange(this TlcModule.RangeAttribute range, objec Contracts.AssertValue(val); Func fn = IsValueWithinRange; // Avoid trying to cast double as float. If range - // was specified using floats, but value being checked + // was specified using floats, but value being checked // is double, change range to be of type double if (range.Type == typeof(float) && val is double) range.CastToDouble(); diff --git a/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs b/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs index f991df73f0..8a4ab8ca43 100644 --- a/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs +++ b/src/Microsoft.ML.Core/EntryPoints/ModuleArgs.cs @@ -15,7 +15,7 @@ namespace Microsoft.ML.Runtime.EntryPoints { /// - /// This class defines attributes to annotate module inputs, outputs, entry points etc. when defining + /// This class defines attributes to annotate module inputs, outputs, entry points etc. when defining /// the module interface. /// public static class TlcModule @@ -124,7 +124,7 @@ public sealed class OutputAttribute : Attribute public string Desc { get; set; } /// - /// The rank order of the output. Because .NET reflection returns members in an unspecfied order, this + /// The rank order of the output. Because .NET reflection returns members in an unspecfied order, this /// is the only way to ensure consistency. /// public Double SortOrder { get; set; } @@ -544,11 +544,11 @@ public enum DataKind /// Unknown = 0, /// - /// Integer, including long. + /// Integer, including long. /// Int, /// - /// Unsigned integer, including ulong. + /// Unsigned integer, including ulong. /// UInt, /// @@ -588,11 +588,11 @@ public enum DataKind /// Enum, /// - /// An array (0 or more values of the same type, accessible by index). + /// An array (0 or more values of the same type, accessible by index). /// Array, /// - /// A dictionary (0 or more values of the same type, identified by a unique string key). + /// A dictionary (0 or more values of the same type, identified by a unique string key). /// The underlying C# representation is /// Dictionary, @@ -603,7 +603,7 @@ public enum DataKind /// Component, /// - /// An C# object that represents state, such as . + /// An C# object that represents state, such as . /// State } @@ -682,8 +682,8 @@ protected Optional(bool isExplicit) /// This is a 'maybe' class that is able to differentiate the cases when the value is set 'explicitly', or 'implicitly'. /// The idea is that if the default value is specified by the user, in some cases it needs to be treated differently /// than if it's auto-filled. - /// - /// An example is the weight column: the default behavior is to use 'Weight' column if it's present. But if the user explicitly sets + /// + /// An example is the weight column: the default behavior is to use 'Weight' column if it's present. But if the user explicitly sets /// the weight column to be 'Weight', we need to actually enforce the presence of the column. /// /// The type of the value @@ -719,7 +719,7 @@ public static implicit operator T(Optional optional) } /// - /// The implicit conversion from . + /// The implicit conversion from . /// This will assume that the parameter is set 'explicitly'. /// public static implicit operator Optional(T value) diff --git a/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs b/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs index 93db75c169..60511bfd39 100644 --- a/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs +++ b/src/Microsoft.ML.Core/EntryPoints/ModuleCatalog.cs @@ -261,7 +261,7 @@ private bool ScanForComponents(IExceptionContext ectx, Type nestedType) } /// - /// The valid names for the components and entry points must consist of letters, digits, underscores and dots, + /// The valid names for the components and entry points must consist of letters, digits, underscores and dots, /// and begin with a letter or digit. /// private static readonly Regex _nameRegex = new Regex(@"^\w[_\.\w]*$", RegexOptions.Compiled); diff --git a/src/Microsoft.ML.Core/Environment/HostEnvironmentBase.cs b/src/Microsoft.ML.Core/Environment/HostEnvironmentBase.cs index b4d9337695..d4ff5ccd96 100644 --- a/src/Microsoft.ML.Core/Environment/HostEnvironmentBase.cs +++ b/src/Microsoft.ML.Core/Environment/HostEnvironmentBase.cs @@ -109,7 +109,7 @@ public interface IMessageDispatcher : IHostEnvironment /// /// A basic host environment suited for many environments. - /// This also supports modifying the concurrency factor, provides the ability to subscribe to pipes via the + /// This also supports modifying the concurrency factor, provides the ability to subscribe to pipes via the /// AddListener/RemoveListener methods, and exposes the to /// query progress. /// @@ -315,7 +315,7 @@ protected sealed class Dispatcher : Dispatcher /// This field is actually used as a , which holds the listener actions /// for all listeners that are currently subscribed. The action itself is an immutable object, so every time /// any listener subscribes or unsubscribes, the field is replaced with a modified version of the delegate. - /// + /// /// The field can be null, if no listener is currently subscribed. /// private volatile Action _listenerAction; @@ -488,10 +488,8 @@ protected virtual IProgressChannel StartProgressChannelCore(HostBase host, strin /// protected virtual IFileHandle OpenInputFileCore(IHostEnvironment env, string path) { -#pragma warning disable TLC_NoThis // Do not use 'this' keyword for member access this.AssertValue(env); this.CheckNonWhiteSpace(path, nameof(path)); -#pragma warning restore TLC_NoThis // Do not use 'this' keyword for member access if (Master != null) return Master.OpenInputFileCore(env, path); return new SimpleFileHandle(env, path, needsWrite: false, autoDelete: false); @@ -511,10 +509,8 @@ public IFileHandle CreateOutputFile(string path) /// protected virtual IFileHandle CreateOutputFileCore(IHostEnvironment env, string path) { -#pragma warning disable TLC_NoThis // Do not use 'this' keyword for member access this.AssertValue(env); this.CheckNonWhiteSpace(path, nameof(path)); -#pragma warning restore TLC_NoThis // Do not use 'this' keyword for member access if (Master != null) return Master.CreateOutputFileCore(env, path); return new SimpleFileHandle(env, path, needsWrite: true, autoDelete: false); @@ -532,9 +528,7 @@ public IFileHandle CreateTempFile(string suffix = null, string prefix = null) /// protected IFileHandle CreateAndRegisterTempFile(IHostEnvironment env, string suffix = null, string prefix = null) { -#pragma warning disable TLC_NoThis // Do not use 'this' keyword for member access this.AssertValue(env); -#pragma warning restore TLC_NoThis // Do not use 'this' keyword for member access if (Master != null) return Master.CreateAndRegisterTempFile(env, suffix, prefix); @@ -556,10 +550,8 @@ protected IFileHandle CreateAndRegisterTempFile(IHostEnvironment env, string suf protected virtual IFileHandle CreateTempFileCore(IHostEnvironment env, string suffix = null, string prefix = null) { -#pragma warning disable TLC_NoThis // Do not use 'this' keyword for member access this.CheckParam(!HasBadFileCharacters(suffix), nameof(suffix)); this.CheckParam(!HasBadFileCharacters(prefix), nameof(prefix)); -#pragma warning restore TLC_NoThis // Do not use 'this' keyword for member access Guid guid = Guid.NewGuid(); string path = Path.GetFullPath(Path.Combine(Path.GetTempPath(), prefix + guid.ToString() + suffix)); diff --git a/src/Microsoft.ML.Core/Environment/TlcEnvironment.cs b/src/Microsoft.ML.Core/Environment/TlcEnvironment.cs index ccf60dc28a..13781c5c11 100644 --- a/src/Microsoft.ML.Core/Environment/TlcEnvironment.cs +++ b/src/Microsoft.ML.Core/Environment/TlcEnvironment.cs @@ -225,7 +225,7 @@ public void GetAndPrintAllProgress(ProgressReporting.ProgressTracker progressTra if (PrintDot()) { - // We need to print an extended status line. At this point, every event should be + // We need to print an extended status line. At this point, every event should be // a non-checkpoint progress event. bool needPrepend = entries.Count > 1; foreach (var ev in entries) @@ -306,7 +306,7 @@ private void EnsureNewLine(bool isError = false) return; // If _err and _out is the same writer, we need to print new line as well. - // If _out and _err writes to Console.Out and Console.Error respectively, + // If _out and _err writes to Console.Out and Console.Error respectively, // in the general user scenario they ends up with writing to the same underlying stream,. // so write a new line to the stream anyways. if (isError && _err != _out && (_out != Console.Out || _err != Console.Error)) diff --git a/src/Microsoft.ML.Core/Prediction/ISweeper.cs b/src/Microsoft.ML.Core/Prediction/ISweeper.cs index b3dd0dc3da..fe887e0ae2 100644 --- a/src/Microsoft.ML.Core/Prediction/ISweeper.cs +++ b/src/Microsoft.ML.Core/Prediction/ISweeper.cs @@ -210,8 +210,8 @@ public sealed class RunResult : IRunResult private readonly bool _isMetricMaximizing; /// - /// This switch changes the behavior of the CompareTo function, switching the greater than / less than - /// behavior, depending on if it is set to True. + /// This switch changes the behavior of the CompareTo function, switching the greater than / less than + /// behavior, depending on if it is set to True. /// public bool IsMetricMaximizing { get { return _isMetricMaximizing; } } @@ -267,8 +267,8 @@ IComparable IRunResult.MetricValue /// /// The metric class, used by smart sweeping algorithms. - /// Ideally we would like to move towards the new IDataView/ISchematized, this is - /// just a simple view instead, and it is decoupled from RunResult so we can move + /// Ideally we would like to move towards the new IDataView/ISchematized, this is + /// just a simple view instead, and it is decoupled from RunResult so we can move /// in that direction in the future. /// public sealed class RunMetric diff --git a/src/Microsoft.ML.Core/Prediction/TrainContext.cs b/src/Microsoft.ML.Core/Prediction/TrainContext.cs index 3464aa4bc9..be93ce68aa 100644 --- a/src/Microsoft.ML.Core/Prediction/TrainContext.cs +++ b/src/Microsoft.ML.Core/Prediction/TrainContext.cs @@ -33,7 +33,6 @@ public sealed class TrainContext /// public IPredictor InitialPredictor { get; } - /// /// Constructor, given a training set and optional other arguments. /// diff --git a/src/Microsoft.ML.Core/Utilities/BigArray.cs b/src/Microsoft.ML.Core/Utilities/BigArray.cs index d6c6ef7b9b..ba2e67b0d9 100644 --- a/src/Microsoft.ML.Core/Utilities/BigArray.cs +++ b/src/Microsoft.ML.Core/Utilities/BigArray.cs @@ -7,14 +7,14 @@ namespace Microsoft.ML.Runtime.Internal.Utilities { /// - /// An array-like data structure that supports storing more than - /// many entries, up to 0x7FEFFFFF00000L. - /// The entries are indexed by 64-bit integers, and a single entry can be accessed by + /// An array-like data structure that supports storing more than + /// many entries, up to 0x7FEFFFFF00000L. + /// The entries are indexed by 64-bit integers, and a single entry can be accessed by /// the indexer if no modifications to the entries is desired, or the /// method. Efficient looping can be accomplished by calling the method. - /// This data structure employs the "length and capacity" pattern. The logical length + /// This data structure employs the "length and capacity" pattern. The logical length /// can be retrieved from the property, which can possibly be strictly less - /// than the total capacity. + /// than the total capacity. /// /// The type of entries. public sealed class BigArray @@ -38,8 +38,8 @@ public sealed class BigArray // The 2-D jagged array containing the entries. // Its total size is larger than or equal to _length, but // less than Length + BlockSize. - // Each one-dimension subarray has length equal to BlockSize, - // except for the last one, which has a positive length + // Each one-dimension subarray has length equal to BlockSize, + // except for the last one, which has a positive length // less than or equal to BlockSize. private T[][] _entries; @@ -53,13 +53,13 @@ public sealed class BigArray public long Length { get { return _length; } } /// - /// Gets or sets the entry at . + /// Gets or sets the entry at . /// /// - /// This indexer is not efficient for looping. If looping access to entries is desired, + /// This indexer is not efficient for looping. If looping access to entries is desired, /// use the method instead. - /// Note that unlike a normal array, the value returned from this indexer getter cannot be modified - /// (e.g., by ++ operator or passing into a method as a ref parameter). To modify an entry, use + /// Note that unlike a normal array, the value returned from this indexer getter cannot be modified + /// (e.g., by ++ operator or passing into a method as a ref parameter). To modify an entry, use /// the method instead. /// public T this[long index] @@ -113,7 +113,7 @@ public BigArray(long size = 0) public delegate void Visitor(long index, ref T item); /// - /// Applies a method at a given . + /// Applies a method at a given . /// public void ApplyAt(long index, Visitor manip) { @@ -190,16 +190,16 @@ public void FillRange(long min, long lim, T value) } /// - /// Resizes the array so that its logical length equals . This method - /// is more efficient than initialize another array and copy the entries because it preserves + /// Resizes the array so that its logical length equals . This method + /// is more efficient than initialize another array and copy the entries because it preserves /// existing blocks. The actual capacity of the array may become larger than . /// If equals , then no operation is done. /// If is less than , the array shrinks in size /// so that both its length and its capacity equal . /// If is larger than , the array capacity grows - /// to the smallest integral multiple of that is larger than , - /// unless is less than , in which case the capacity - /// grows to double its current capacity or , which ever is larger, + /// to the smallest integral multiple of that is larger than , + /// unless is less than , in which case the capacity + /// grows to double its current capacity or , which ever is larger, /// but up to . /// public void Resize(long newLength) @@ -304,7 +304,7 @@ public void TrimCapacity() } /// - /// Appends the first elements of to the end. + /// Appends the first elements of to the end. /// This method is thread safe related to calls to (assuming those copy operations /// are happening over ranges already added), but concurrent calls to /// should not be attempted. Intended usage is that @@ -373,10 +373,10 @@ public void AddRange(T[] src, int length) } /// - /// Copies the subarray starting from index of length - /// to the destination array . - /// Concurrent calls to this method is valid even with one single concurrent call - /// to . + /// Copies the subarray starting from index of length + /// to the destination array . + /// Concurrent calls to this method is valid even with one single concurrent call + /// to . /// public void CopyTo(long idx, T[] dst, int length) { diff --git a/src/Microsoft.ML.Core/Utilities/CharUtils.cs b/src/Microsoft.ML.Core/Utilities/CharUtils.cs index e459452041..bf7ae4677e 100644 --- a/src/Microsoft.ML.Core/Utilities/CharUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/CharUtils.cs @@ -13,8 +13,8 @@ namespace Microsoft.ML.Runtime.Internal.Utilities public static class CharUtils { private const int CharsCount = 0x10000; - private volatile static char[] _lowerInvariantChars; - private volatile static char[] _upperInvariantChars; + private static volatile char[] _lowerInvariantChars; + private static volatile char[] _upperInvariantChars; private static char[] EnsureLowerInvariant() { diff --git a/src/Microsoft.ML.Core/Utilities/HashArray.cs b/src/Microsoft.ML.Core/Utilities/HashArray.cs index c76ceb9482..27f0ec9b5d 100644 --- a/src/Microsoft.ML.Core/Utilities/HashArray.cs +++ b/src/Microsoft.ML.Core/Utilities/HashArray.cs @@ -243,7 +243,7 @@ private static class HashHelpers { // Note: This HashHelpers class was adapted from the BCL code base. - // This is the maximum prime smaller than Array.MaxArrayLength + // This is the maximum prime smaller than Array.MaxArrayLength public const int MaxPrimeArrayLength = 0x7FEFFFFD; // Table of prime numbers to use as hash table sizes. @@ -271,7 +271,7 @@ public static int GetPrime(int min) return min + 1; } - // Returns size of hashtable to grow to. + // Returns size of hashtable to grow to. public static int ExpandPrime(int oldSize) { int newSize = 2 * oldSize; diff --git a/src/Microsoft.ML.Core/Utilities/MathUtils.cs b/src/Microsoft.ML.Core/Utilities/MathUtils.cs index 7fd0829708..fb68ee82d6 100644 --- a/src/Microsoft.ML.Core/Utilities/MathUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/MathUtils.cs @@ -133,7 +133,7 @@ public static Float Min(Float[] a) /// /// Finds the first index of the max element of the array. - /// NaNs are ignored. If all the elements to consider are NaNs, -1 is + /// NaNs are ignored. If all the elements to consider are NaNs, -1 is /// returned. The caller should distinguish in this case between two /// possibilities: /// 1) The number of the element to consider is zero. @@ -147,8 +147,8 @@ public static int ArgMax(Float[] a) } /// - /// Finds the first index of the max element of the array. - /// NaNs are ignored. If all the elements to consider are NaNs, -1 is + /// Finds the first index of the max element of the array. + /// NaNs are ignored. If all the elements to consider are NaNs, -1 is /// returned. The caller should distinguish in this case between two /// possibilities: /// 1) The number of the element to consider is zero. @@ -179,7 +179,7 @@ public static int ArgMax(Float[] a, int count) /// /// Finds the first index of the minimum element of the array. - /// NaNs are ignored. If all the elements to consider are NaNs, -1 is + /// NaNs are ignored. If all the elements to consider are NaNs, -1 is /// returned. The caller should distinguish in this case between two /// possibilities: /// 1) The number of the element to consider is zero. @@ -194,7 +194,7 @@ public static int ArgMin(Float[] a) /// /// Finds the first index of the minimum element of the array. - /// NaNs are ignored. If all the elements to consider are NaNs, -1 is + /// NaNs are ignored. If all the elements to consider are NaNs, -1 is /// returned. The caller should distinguish in this case between two /// possibilities: /// 1) The number of the element to consider is zero. @@ -331,9 +331,9 @@ public static bool AlmostEqual(Float a, Float b, Float maxRelErr, Float maxAbsEr return (absDiff / maxAbs) <= maxRelErr; } - private readonly static int[] _possiblePrimeMod30 = new int[] { 1, 7, 11, 13, 17, 19, 23, 29 }; - private readonly static double _constantForLogGamma = 0.5 * Math.Log(2 * Math.PI); - private readonly static double[] _coeffsForLogGamma = { 12.0, -360.0, 1260.0, -1680.0, 1188.0 }; + private static readonly int[] _possiblePrimeMod30 = new int[] { 1, 7, 11, 13, 17, 19, 23, 29 }; + private static readonly double _constantForLogGamma = 0.5 * Math.Log(2 * Math.PI); + private static readonly double[] _coeffsForLogGamma = { 12.0, -360.0, 1260.0, -1680.0, 1188.0 }; /// /// Returns the log of the gamma function, using the Stirling approximation @@ -849,7 +849,7 @@ public static Float LnSum(IEnumerable terms) } /// - /// Math.Sin returns the input value for inputs with large magnitude. We return NaN instead, for consistency + /// Math.Sin returns the input value for inputs with large magnitude. We return NaN instead, for consistency /// with Math.Sin(infinity). /// public static double Sin(double a) @@ -859,7 +859,7 @@ public static double Sin(double a) } /// - /// Math.Cos returns the input value for inputs with large magnitude. We return NaN instead, for consistency + /// Math.Cos returns the input value for inputs with large magnitude. We return NaN instead, for consistency /// with Math.Cos(infinity). /// public static double Cos(double a) diff --git a/src/Microsoft.ML.Core/Utilities/MemUtils.cs b/src/Microsoft.ML.Core/Utilities/MemUtils.cs index 736ae90892..1dba9205e9 100644 --- a/src/Microsoft.ML.Core/Utilities/MemUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/MemUtils.cs @@ -10,7 +10,7 @@ public static class MemUtils // .Net 4.6's Buffer.MemoryCopy. // REVIEW: Remove once we're on a version of .NET which includes // Buffer.MemoryCopy. - public unsafe static void MemoryCopy(void* source, void* destination, long destinationSizeInBytes, long sourceBytesToCopy) + public static unsafe void MemoryCopy(void* source, void* destination, long destinationSizeInBytes, long sourceBytesToCopy) { // MemCpy has undefined behavior when handed overlapping source and // destination buffers. diff --git a/src/Microsoft.ML.Core/Utilities/MinWaiter.cs b/src/Microsoft.ML.Core/Utilities/MinWaiter.cs index d29bfe23c1..8c44315ba6 100644 --- a/src/Microsoft.ML.Core/Utilities/MinWaiter.cs +++ b/src/Microsoft.ML.Core/Utilities/MinWaiter.cs @@ -12,7 +12,7 @@ namespace Microsoft.ML.Runtime.Internal.Utilities /// entities of known count, where you want to iteratively provide critical sections /// for each depending on which comes first, but you do not necessarily know what /// constitutes "first" until all such entities tell you where they stand in line. - /// + /// /// The anticipated usage is that whatever entity is using the /// to synchronize itself, will register itself using /// so as to unblock any "lower" waiters as soon as it knows what value it needs to @@ -65,7 +65,7 @@ public MinWaiter(int waiters) /// point when we actually want to wait. This method itself has the potential to /// signal other events, if by registering ourselves the waiter becomes aware of /// the maximum number of waiters, allowing that waiter to enter its critical state. - /// + /// /// If multiple events are associated with the minimum value, then only one will /// be signaled, and the rest will remain unsignaled. Which is chosen is undefined. /// @@ -75,7 +75,7 @@ public ManualResetEventSlim Register(long position) lock (_waiters) { Contracts.Check(_maxWaiters > 0, "All waiters have been retired, Wait should not be called at this point"); - // We should never reach the state + // We should never reach the state Contracts.Assert(_waiters.Count < _maxWaiters); ev = new WaitStats(position); // REVIEW: Optimize the case where this is the minimum? diff --git a/src/Microsoft.ML.Core/Utilities/PathUtils.cs b/src/Microsoft.ML.Core/Utilities/PathUtils.cs index 74ccec30c0..6698c11f7f 100644 --- a/src/Microsoft.ML.Core/Utilities/PathUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/PathUtils.cs @@ -36,19 +36,19 @@ private static string DllDir /// Attempts to find a file that is expected to be distributed with a TLC component. Searches /// in the following order: /// 1. In the customSearchDir directory, if it is provided. - /// 2. In the custom search directory specified by the + /// 2. In the custom search directory specified by the /// environment variable. /// 3. In the root folder of the provided assembly. /// 4. In the folder of this assembly. /// In each case it searches the file in the directory provided and combined with folderPrefix. - /// + /// /// If any of these locations contain the file, a full local path will be returned, otherwise this /// method will return null. /// /// File name to find /// folder prefix, relative to the current or customSearchDir /// - /// Custom directory to search for resources. + /// Custom directory to search for resources. /// If null, the path specified in the environment variable /// will be used. /// diff --git a/src/Microsoft.ML.Core/Utilities/ReservoirSampler.cs b/src/Microsoft.ML.Core/Utilities/ReservoirSampler.cs index b8006bd943..69b57fea45 100644 --- a/src/Microsoft.ML.Core/Utilities/ReservoirSampler.cs +++ b/src/Microsoft.ML.Core/Utilities/ReservoirSampler.cs @@ -9,8 +9,8 @@ namespace Microsoft.ML.Runtime.Internal.Utilities { /// - /// This is an interface for creating samples of a requested size from a stream of data of type . - /// The sample is created in one pass by calling for every data point in the stream. Implementations should have + /// This is an interface for creating samples of a requested size from a stream of data of type . + /// The sample is created in one pass by calling for every data point in the stream. Implementations should have /// a delegate for getting the next data point, which is invoked if the current data point should go into the reservoir. /// public interface IReservoirSampler @@ -44,8 +44,8 @@ public interface IReservoirSampler } /// - /// This class produces a sample without replacement from a stream of data of type . - /// It is instantiated with a delegate that gets the next data point, and builds a reservoir in one pass by calling + /// This class produces a sample without replacement from a stream of data of type . + /// It is instantiated with a delegate that gets the next data point, and builds a reservoir in one pass by calling /// for every data point in the stream. In case the next data point does not get 'picked' into the reservoir, the delegate is not invoked. /// Sampling is done according to the algorithm in this paper: http://epubs.siam.org/doi/pdf/10.1137/1.9781611972740.53. /// @@ -117,8 +117,8 @@ public IEnumerable GetSample() } /// - /// This class produces a sample with replacement from a stream of data of type . - /// It is instantiated with a delegate that gets the next data point, and builds a reservoir in one pass by calling + /// This class produces a sample with replacement from a stream of data of type . + /// It is instantiated with a delegate that gets the next data point, and builds a reservoir in one pass by calling /// for every data point in the stream. In case the next data point does not get 'picked' into the reservoir, the delegate is not invoked. /// Sampling is done according to the algorithm in this paper: http://epubs.siam.org/doi/pdf/10.1137/1.9781611972740.53. /// @@ -237,7 +237,7 @@ public void Lock() } /// - /// Gets a reservoir sample with replacement of the elements sampled so far. Users should not change the + /// Gets a reservoir sample with replacement of the elements sampled so far. Users should not change the /// elements returned since multiple elements in the reservoir might be pointing to the same memory. /// public IEnumerable GetSample() diff --git a/src/Microsoft.ML.Core/Utilities/ResourceManagerUtils.cs b/src/Microsoft.ML.Core/Utilities/ResourceManagerUtils.cs index ccb4b0c90c..2cfa8c185a 100644 --- a/src/Microsoft.ML.Core/Utilities/ResourceManagerUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/ResourceManagerUtils.cs @@ -18,7 +18,7 @@ namespace Microsoft.ML.Runtime.Internal.Utilities /// public sealed class ResourceManagerUtils { - private volatile static ResourceManagerUtils _instance; + private static volatile ResourceManagerUtils _instance; public static ResourceManagerUtils Instance { get @@ -91,7 +91,7 @@ public static string GetUrl(string suffix) /// The relative url from which to download. /// This is appended to the url defined in . /// The name of the file to save. - /// The directory where the file should be saved to. The file will be saved in a directory with the specified name inside + /// The directory where the file should be saved to. The file will be saved in a directory with the specified name inside /// a folder called "tlc-resources" in the directory. /// An integer indicating the number of milliseconds to wait before timing out while downloading a resource. /// The download results, containing the file path where the resources was (or should have been) downloaded to, and an error message diff --git a/src/Microsoft.ML.Core/Utilities/Stream.cs b/src/Microsoft.ML.Core/Utilities/Stream.cs index 8b22e46380..41c794e17f 100644 --- a/src/Microsoft.ML.Core/Utilities/Stream.cs +++ b/src/Microsoft.ML.Core/Utilities/Stream.cs @@ -979,7 +979,7 @@ public static BitArray ReadBitArray(this BinaryReader reader) return returnArray; } - public unsafe static void ReadBytes(this BinaryReader reader, void* destination, long destinationSizeInBytes, long bytesToRead, ref byte[] work) + public static unsafe void ReadBytes(this BinaryReader reader, void* destination, long destinationSizeInBytes, long bytesToRead, ref byte[] work) { Contracts.AssertValue(reader); Contracts.Assert(bytesToRead >= 0); @@ -1007,7 +1007,7 @@ public unsafe static void ReadBytes(this BinaryReader reader, void* destination, } } - public unsafe static void ReadBytes(this BinaryReader reader, void* destination, long destinationSizeInBytes, long bytesToRead) + public static unsafe void ReadBytes(this BinaryReader reader, void* destination, long destinationSizeInBytes, long bytesToRead) { byte[] work = null; ReadBytes(reader, destination, destinationSizeInBytes, bytesToRead, ref work); @@ -1097,10 +1097,10 @@ public static bool TryGetBuffer(this MemoryStream mem, out ArraySegment bu // REVIEW: need to plumb IExceptionContext into the method. /// /// Checks that the directory of the file name passed in already exists. - /// This is meant to be called before calling an API that creates the file, + /// This is meant to be called before calling an API that creates the file, /// so the file need not exist. /// - /// An absolute or relative file path, or null to skip the check + /// An absolute or relative file path, or null to skip the check /// (useful for optional user parameters) /// The user level parameter name, as exposed by the command line help public static void CheckOptionalUserDirectory(string file, string userArgument) @@ -1113,7 +1113,7 @@ public static void CheckOptionalUserDirectory(string file, string userArgument) return; string dir; -#pragma warning disable TLC_ContractsNameUsesNameof +#pragma warning disable MSML_ContractsNameUsesNameof try { // Relative paths are interpreted as local. @@ -1134,6 +1134,6 @@ public static void CheckOptionalUserDirectory(string file, string userArgument) if (!Directory.Exists(dir)) throw Contracts.ExceptUserArg(userArgument, "Cannot find directory '{0}'.", dir); } -#pragma warning restore TLC_ContractsNameUsesNameof +#pragma warning restore MSML_ContractsNameUsesNameof } } \ No newline at end of file diff --git a/src/Microsoft.ML.Core/Utilities/SupervisedBinFinder.cs b/src/Microsoft.ML.Core/Utilities/SupervisedBinFinder.cs index 00dbf68d2c..a96e8df5c2 100644 --- a/src/Microsoft.ML.Core/Utilities/SupervisedBinFinder.cs +++ b/src/Microsoft.ML.Core/Utilities/SupervisedBinFinder.cs @@ -11,12 +11,12 @@ namespace Microsoft.ML.Runtime.Internal.Utilities { /// - /// This class performs discretization of (value, label) pairs into bins in a way that minimizes + /// This class performs discretization of (value, label) pairs into bins in a way that minimizes /// the target function "minimum description length". /// The algorithm is outlineed in an article /// "Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning" /// [Fayyad, Usama M.; Irani, Keki B. (1993)] http://ijcai.org/Past%20Proceedings/IJCAI-93-VOL2/PDF/022.pdf - /// + /// /// The class can be used several times sequentially, it is stateful and not thread-safe. /// Both Single and Double precision processing is implemented, and is identical. /// @@ -117,7 +117,7 @@ public Single[] FindBins(int maxBins, int minBinSize, int nLabels, IList result[i] = BinFinderBase.GetSplitValue(distinctValues[split - 1], distinctValues[split]); // Even though distinctValues may contain infinities, the boundaries may not be infinite: - // GetSplitValue(a,b) only returns +-inf if a==b==+-inf, + // GetSplitValue(a,b) only returns +-inf if a==b==+-inf, // and distinctValues won't contain more than one +inf or -inf. Contracts.Assert(FloatUtils.IsFinite(result[i])); } @@ -195,7 +195,7 @@ public Double[] FindBins(int maxBins, int minBinSize, int nLabels, IList result[i] = BinFinderBase.GetSplitValue(distinctValues[split - 1], distinctValues[split]); // Even though distinctValues may contain infinities, the boundaries may not be infinite: - // GetSplitValue(a,b) only returns +-inf if a==b==+-inf, + // GetSplitValue(a,b) only returns +-inf if a==b==+-inf, // and distinctValues won't contain more than one +inf or -inf. Contracts.Assert(FloatUtils.IsFinite(result[i])); } @@ -259,7 +259,7 @@ public SplitInterval(SupervisedBinFinder binFinder, int min, int lim, bool skipS Contracts.Assert(leftCount + rightCount == totalCount); // This term corresponds to the 'fixed cost associated with a split' - // It's a simplification of a Delta(A,T;S) term calculated in the paper + // It's a simplification of a Delta(A,T;S) term calculated in the paper var delta = logN - binFinder._labelCardinality * (totalEntropy - leftEntropy - rightEntropy); var curGain = totalCount * totalEntropy // total cost of transmitting non-split content diff --git a/src/Microsoft.ML.Core/Utilities/ThreadUtils.cs b/src/Microsoft.ML.Core/Utilities/ThreadUtils.cs index e7bc27235f..859ae7b28d 100644 --- a/src/Microsoft.ML.Core/Utilities/ThreadUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/ThreadUtils.cs @@ -64,7 +64,7 @@ public sealed class ExceptionMarshaller : IDisposable private readonly CancellationTokenSource _ctSource; private readonly object _lock; - // The stored exception + // The stored exception private string _component; private Exception _ex; diff --git a/src/Microsoft.ML.Core/Utilities/Tree.cs b/src/Microsoft.ML.Core/Utilities/Tree.cs index 880afc4083..7d030cf46c 100644 --- a/src/Microsoft.ML.Core/Utilities/Tree.cs +++ b/src/Microsoft.ML.Core/Utilities/Tree.cs @@ -53,7 +53,7 @@ public Tree this[TKey key] /// /// This is the key for this child node in its parent, if any. If this is not - /// a child of any parent, that is, it is the root of its own tree, then + /// a child of any parent, that is, it is the root of its own tree, then /// public TKey Key { get { return _key; } } @@ -129,7 +129,7 @@ public void Add(KeyValuePair> item) } /// - /// Adds a node as a child of this node. This will disconnect the + /// Adds a node as a child of this node. This will disconnect the /// /// /// diff --git a/src/Microsoft.ML.Core/Utilities/Utils.cs b/src/Microsoft.ML.Core/Utilities/Utils.cs index 48993de785..96c23a0fe3 100644 --- a/src/Microsoft.ML.Core/Utilities/Utils.cs +++ b/src/Microsoft.ML.Core/Utilities/Utils.cs @@ -898,7 +898,7 @@ private static MethodInfo MarshalInvokeCheckAndCreate(Type genArg, Delegat /// but whose code depends on some sort of generic type parameter. This utility method exists to make /// this common pattern more convenient, and also safer so that the arguments, if any, can be type /// checked at compile time instead of at runtime. - /// + /// /// Because it is strongly typed, this can only be applied to methods whose return type /// is known at compile time, that is, that do not depend on the type parameter of the method itself. /// diff --git a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs index 12d5bfcccb..1be9c77ee4 100644 --- a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs @@ -349,7 +349,7 @@ public static void Apply(ref VBuffer dst, SlotValueManipulator manip) /// The vector to modify /// The slot of the vector to modify /// The manipulation function - /// A predicate that returns true if we should skip insertion of a value into + /// A predicate that returns true if we should skip insertion of a value into /// sparse vector if it was default. If the predicate is null, we insert any non-default. public static void ApplyAt(ref VBuffer dst, int slot, SlotValueManipulator manip, ValuePredicate pred = null) { @@ -489,7 +489,7 @@ public static void DensifyFirst(ref VBuffer dst, int denseCount) } /// - /// Creates a maybe sparse copy of a VBuffer. + /// Creates a maybe sparse copy of a VBuffer. /// Whether the created copy is sparse or not is determined by the proportion of non-default entries compared to the sparsity parameter. /// public static void CreateMaybeSparseCopy(ref VBuffer src, ref VBuffer dst, RefPredicate isDefaultPredicate, float sparsityThreshold = SparsityThreshold) @@ -580,9 +580,9 @@ public static void ApplyWith(ref VBuffer src, ref VBuffer /// Applies the to each pair of elements - /// where is defined, in order of index. It stores the result - /// in another vector. If there is some value at an index in - /// that is not defined in , that slot value is copied to the + /// where is defined, in order of index. It stores the result + /// in another vector. If there is some value at an index in + /// that is not defined in , that slot value is copied to the /// corresponding slot in the result vector without any further modification. /// If either of the vectors are dense, the resulting /// will be dense. Otherwise, if both are sparse, the output will be sparse iff @@ -616,7 +616,7 @@ public static void ApplyWithEitherDefined(ref VBuffer src, ref /// /// Applies the to each pair of elements /// where either or , has an element - /// defined at that index. It stores the result in another vector . + /// defined at that index. It stores the result in another vector . /// If either of the vectors are dense, the resulting /// will be dense. Otherwise, if both are sparse, the output will be sparse iff /// there is any slot that is not explicitly represented in either vector. diff --git a/src/Microsoft.ML.CpuMath/AlignedArray.cs b/src/Microsoft.ML.CpuMath/AlignedArray.cs index 1dc8e3ee46..87583a8ef6 100644 --- a/src/Microsoft.ML.CpuMath/AlignedArray.cs +++ b/src/Microsoft.ML.CpuMath/AlignedArray.cs @@ -13,7 +13,7 @@ namespace Microsoft.ML.Runtime.Internal.CpuMath /// To pin and force alignment, call the GetPin method, typically wrapped in a using (since it /// returns a Pin struct that is IDisposable). From the pin, you can get the IntPtr to pass to /// native code. - /// + /// /// The ctor takes an alignment value, which must be a power of two at least sizeof(Float). /// public sealed class AlignedArray diff --git a/src/Microsoft.ML.CpuMath/AlignedMatrix.cs b/src/Microsoft.ML.CpuMath/AlignedMatrix.cs index 5ec9b53cca..67f05ee7cf 100644 --- a/src/Microsoft.ML.CpuMath/AlignedMatrix.cs +++ b/src/Microsoft.ML.CpuMath/AlignedMatrix.cs @@ -80,7 +80,7 @@ private void AssertValid() } /// - /// The physical AligenedArray items. + /// The physical AligenedArray items. /// public AlignedArray Items { get { return _items; } } @@ -155,7 +155,7 @@ public void CopyTo(Float[] dst, ref int ivDst) } /// - /// Copy the values from this vector starting at slot ivSrc into dst, starting at slot ivDst. + /// Copy the values from this vector starting at slot ivSrc into dst, starting at slot ivDst. /// The number of values that are copied is determined by count. /// /// The staring index in this vector @@ -525,7 +525,7 @@ public CpuAlignedMatrixRow(int crow, int ccol, int cbAlign) public override int ColCountPhy { get { return RunLenPhy; } } /// - /// Copy the values from this matrix, starting from the row into dst, starting at slot ivDst and advancing ivDst. + /// Copy the values from this matrix, starting from the row into dst, starting at slot ivDst and advancing ivDst. /// /// The starting row in this matrix /// The destination array @@ -606,7 +606,7 @@ public void CopyTo(Float[] dst, ref int ivDst) } /// - /// Copy the values from this matrix, starting from the row into dst, starting at slot ivDst and advancing ivDst. + /// Copy the values from this matrix, starting from the row into dst, starting at slot ivDst and advancing ivDst. /// /// The starting row in this matrix /// The destination array diff --git a/src/Microsoft.ML.CpuMath/Avx.cs b/src/Microsoft.ML.CpuMath/Avx.cs index 68e751c86b..6dcf898b6f 100644 --- a/src/Microsoft.ML.CpuMath/Avx.cs +++ b/src/Microsoft.ML.CpuMath/Avx.cs @@ -7,7 +7,7 @@ namespace Microsoft.ML.Runtime.Internal.CpuMath { /// - /// Keep Avx.cs in sync with Sse.cs. When making changes to one, use BeyondCompare or a similar tool + /// Keep Avx.cs in sync with Sse.cs. When making changes to one, use BeyondCompare or a similar tool /// to view diffs and propagate appropriate changes to the other. /// public static class AvxUtils @@ -21,7 +21,7 @@ private static bool Compat(AlignedArray a) return a.CbAlign == CbAlign; } - private unsafe static float* Ptr(AlignedArray a, float* p) + private static unsafe float* Ptr(AlignedArray a, float* p) { Contracts.AssertValue(a); float* q = p + a.GetBase((long)p); diff --git a/src/Microsoft.ML.CpuMath/CpuAligenedMathUtils.cs b/src/Microsoft.ML.CpuMath/CpuAligenedMathUtils.cs index 363c40007b..ad53810ff3 100644 --- a/src/Microsoft.ML.CpuMath/CpuAligenedMathUtils.cs +++ b/src/Microsoft.ML.CpuMath/CpuAligenedMathUtils.cs @@ -115,7 +115,7 @@ public static void MatTranTimesSrc(bool add, ICpuFullMatrix mat, ICpuVector src, public static class GeneralUtils { /// - /// Count the number of zero bits in the lonest string of zero's from the lowest significant bit of the input integer. + /// Count the number of zero bits in the lonest string of zero's from the lowest significant bit of the input integer. /// /// The input integer /// diff --git a/src/Microsoft.ML.CpuMath/ICpuBuffer.cs b/src/Microsoft.ML.CpuMath/ICpuBuffer.cs index e58a453f9f..ad55f5c8c6 100644 --- a/src/Microsoft.ML.CpuMath/ICpuBuffer.cs +++ b/src/Microsoft.ML.CpuMath/ICpuBuffer.cs @@ -77,8 +77,8 @@ public interface ICpuFullMatrix : ICpuMatrix /// /// Zero out the items with the given indices. - /// The indices contain the logical indices to the vectorized representation of the matrix, - /// which can be different depending on whether the matrix is row-major or column-major. + /// The indices contain the logical indices to the vectorized representation of the matrix, + /// which can be different depending on whether the matrix is row-major or column-major. /// void ZeroItems(int[] indices); } diff --git a/src/Microsoft.ML.CpuMath/IntUtils.cs b/src/Microsoft.ML.CpuMath/IntUtils.cs index b0aed315c3..2492dddaff 100644 --- a/src/Microsoft.ML.CpuMath/IntUtils.cs +++ b/src/Microsoft.ML.CpuMath/IntUtils.cs @@ -84,7 +84,7 @@ private static ulong Div64(ulong lo, ulong hi, ulong den, out ulong rem) return Div64Core(lo, hi, den, out rem); } - // REVIEW: on Linux, the hardware divide-by-zero exception is not translated into + // REVIEW: on Linux, the hardware divide-by-zero exception is not translated into // a managed exception properly by CoreCLR so the process will crash. This is a temporary fix // until CoreCLR addresses this issue. [DllImport(Thunk.NativePath, CharSet = CharSet.Unicode, EntryPoint = "Div64"), SuppressUnmanagedCodeSecurity] diff --git a/src/Microsoft.ML.CpuMath/Sse.cs b/src/Microsoft.ML.CpuMath/Sse.cs index 77be547b69..68e6ee906b 100644 --- a/src/Microsoft.ML.CpuMath/Sse.cs +++ b/src/Microsoft.ML.CpuMath/Sse.cs @@ -7,7 +7,7 @@ namespace Microsoft.ML.Runtime.Internal.CpuMath { /// - /// Keep Sse.cs in sync with Avx.cs. When making changes to one, use BeyondCompare or a similar tool + /// Keep Sse.cs in sync with Avx.cs. When making changes to one, use BeyondCompare or a similar tool /// to view diffs and propagate appropriate changes to the other. /// public static class SseUtils @@ -21,7 +21,7 @@ private static bool Compat(AlignedArray a) return a.CbAlign == CbAlign; } - private unsafe static float* Ptr(AlignedArray a, float* p) + private static unsafe float* Ptr(AlignedArray a, float* p) { Contracts.AssertValue(a); float* q = p + a.GetBase((long)p); diff --git a/src/Microsoft.ML.CpuMath/Thunk.cs b/src/Microsoft.ML.CpuMath/Thunk.cs index bc23963bbe..d7082c8313 100644 --- a/src/Microsoft.ML.CpuMath/Thunk.cs +++ b/src/Microsoft.ML.CpuMath/Thunk.cs @@ -9,7 +9,7 @@ namespace Microsoft.ML.Runtime.Internal.CpuMath { - internal unsafe static class Thunk + internal static unsafe class Thunk { internal const string NativePath = "CpuMathNative"; diff --git a/src/Microsoft.ML.Data/Commands/EvaluateCommand.cs b/src/Microsoft.ML.Data/Commands/EvaluateCommand.cs index cd2eb464af..77bdf0e32f 100644 --- a/src/Microsoft.ML.Data/Commands/EvaluateCommand.cs +++ b/src/Microsoft.ML.Data/Commands/EvaluateCommand.cs @@ -19,7 +19,7 @@ namespace Microsoft.ML.Runtime.Data { - // REVIEW: For simplicity (since this is currently the case), + // REVIEW: For simplicity (since this is currently the case), // we assume that all metrics are either numeric, or numeric vectors. /// /// This class contains information about an overall metric, namely its name and whether it is a vector @@ -92,7 +92,7 @@ public string GetNameMatch(string input) public interface IEvaluator { /// - /// Compute the aggregate metrics. Return a dictionary from the metric kind + /// Compute the aggregate metrics. Return a dictionary from the metric kind /// (overal/per-fold/confusion matrix/PR-curves etc.), to a data view containing the metric. /// Dictionary Evaluate(RoleMappedData data); diff --git a/src/Microsoft.ML.Data/Commands/ScoreCommand.cs b/src/Microsoft.ML.Data/Commands/ScoreCommand.cs index c353e4a4ec..607bf119d7 100644 --- a/src/Microsoft.ML.Data/Commands/ScoreCommand.cs +++ b/src/Microsoft.ML.Data/Commands/ScoreCommand.cs @@ -291,9 +291,9 @@ public static SubComponent GetScorerC /// /// Given a predictor and an optional scorer SubComponent, produces a compatible ISchemaBindableMapper. /// First, it tries to instantiate the bindable mapper using the - /// (this will only succeed if there's a registered BindableMapper creation method with load name equal to the one + /// (this will only succeed if there's a registered BindableMapper creation method with load name equal to the one /// of the scorer). - /// If the above fails, it checks whether the predictor implements + /// If the above fails, it checks whether the predictor implements /// directly. /// If this also isn't true, it will create a 'matching' standard mapper. /// diff --git a/src/Microsoft.ML.Data/Commands/TrainCommand.cs b/src/Microsoft.ML.Data/Commands/TrainCommand.cs index 1c25275c3e..69370ad3ef 100644 --- a/src/Microsoft.ML.Data/Commands/TrainCommand.cs +++ b/src/Microsoft.ML.Data/Commands/TrainCommand.cs @@ -222,9 +222,9 @@ public static string MatchNameOrDefaultOrNull(IExceptionContext ectx, ISchema sc return userName; if (userName == defaultName) return null; -#pragma warning disable TLC_ContractsNameUsesNameof +#pragma warning disable MSML_ContractsNameUsesNameof throw ectx.ExceptUserArg(argName, $"Could not find column '{userName}'"); -#pragma warning restore TLC_ContractsNameUsesNameof +#pragma warning restore MSML_ContractsNameUsesNameof } public static IPredictor Train(IHostEnvironment env, IChannel ch, RoleMappedData data, ITrainer trainer, string name, @@ -291,7 +291,7 @@ public static bool TryLoadPredictor(IChannel ch, IHostEnvironment env, string in /// /// Save the model to the output path. - /// The method saves the loader and the transformations of dataPipe and saves optionally predictor + /// The method saves the loader and the transformations of dataPipe and saves optionally predictor /// and command. It also uses featureColumn, if provided, to extract feature names. /// /// The host environment to use. @@ -316,7 +316,7 @@ public static void SaveModel(IHostEnvironment env, IChannel ch, IFileHandle outp /// /// Save the model to the stream. - /// The method saves the loader and the transformations of dataPipe and saves optionally predictor + /// The method saves the loader and the transformations of dataPipe and saves optionally predictor /// and command. It also uses featureColumn, if provided, to extract feature names. /// /// The host environment to use. @@ -400,7 +400,7 @@ public static void SaveDataPipe(IHostEnvironment env, RepositoryWriter repositor /// /// Traces back the .Source chain of the transformation pipe up to the moment it no longer can. - /// Returns all the transforms of and the first data view (a non-transform). + /// Returns all the transforms of and the first data view (a non-transform). /// /// The transformation pipe to traverse. /// The beginning data view of the transform chain @@ -413,7 +413,7 @@ private static List BacktrackPipe(IDataView dataPipe, out IDataV while (dataPipe is IDataTransform xf) { // REVIEW: a malicious user could construct a loop in the Source chain, that would - // cause this method to iterate forever (and throw something when the list overflows). There's + // cause this method to iterate forever (and throw something when the list overflows). There's // no way to insulate from ALL malicious behavior. transforms.Add(xf); dataPipe = xf.Source; diff --git a/src/Microsoft.ML.Data/Data/Combiner.cs b/src/Microsoft.ML.Data/Data/Combiner.cs index 9a5de27ff6..ee45aee3e3 100644 --- a/src/Microsoft.ML.Data/Data/Combiner.cs +++ b/src/Microsoft.ML.Data/Data/Combiner.cs @@ -21,7 +21,7 @@ public abstract class Combiner public sealed class TextCombiner : Combiner { - private volatile static TextCombiner _instance; + private static volatile TextCombiner _instance; public static TextCombiner Instance { get @@ -46,7 +46,7 @@ public override void Combine(ref DvText dst, DvText src) public sealed class FloatAdder : Combiner { - private volatile static FloatAdder _instance; + private static volatile FloatAdder _instance; public static FloatAdder Instance { get @@ -67,7 +67,7 @@ private FloatAdder() public sealed class R4Adder : Combiner { - private volatile static R4Adder _instance; + private static volatile R4Adder _instance; public static R4Adder Instance { get @@ -88,7 +88,7 @@ private R4Adder() public sealed class R8Adder : Combiner { - private volatile static R8Adder _instance; + private static volatile R8Adder _instance; public static R8Adder Instance { get @@ -110,7 +110,7 @@ private R8Adder() // REVIEW: Delete this! public sealed class U4Adder : Combiner { - private volatile static U4Adder _instance; + private static volatile U4Adder _instance; public static U4Adder Instance { get diff --git a/src/Microsoft.ML.Data/Data/Conversion.cs b/src/Microsoft.ML.Data/Data/Conversion.cs index 974f40c39d..0a9833064a 100644 --- a/src/Microsoft.ML.Data/Data/Conversion.cs +++ b/src/Microsoft.ML.Data/Data/Conversion.cs @@ -53,7 +53,7 @@ public sealed class Conversions // REVIEW: Reconcile implementations with TypeUtils, and clarify the distinction. // Singleton pattern. - private volatile static Conversions _instance; + private static volatile Conversions _instance; public static Conversions Instance { get diff --git a/src/Microsoft.ML.Data/Data/DataViewUtils.cs b/src/Microsoft.ML.Data/Data/DataViewUtils.cs index 4772228fa0..1db4d5ad0a 100644 --- a/src/Microsoft.ML.Data/Data/DataViewUtils.cs +++ b/src/Microsoft.ML.Data/Data/DataViewUtils.cs @@ -286,9 +286,9 @@ private sealed class Splitter private enum ExtraIndex { Id, -#pragma warning disable TLC_GeneralName // Allow for this private enum. +#pragma warning disable MSML_GeneralName // Allow for this private enum. _Lim -#pragma warning restore TLC_GeneralName +#pragma warning restore MSML_GeneralName } private Splitter(ISchema schema) diff --git a/src/Microsoft.ML.Data/Data/IColumn.cs b/src/Microsoft.ML.Data/Data/IColumn.cs index 28d6ffd057..2f2f496f99 100644 --- a/src/Microsoft.ML.Data/Data/IColumn.cs +++ b/src/Microsoft.ML.Data/Data/IColumn.cs @@ -13,16 +13,16 @@ namespace Microsoft.ML.Runtime.Data /// /// This interface is an analogy to that encapsulates the contents of a single /// column. - /// + /// /// Note that in the same sense that is not thread safe, implementors of this interface /// by similar token must not be considered thread safe by users of the interface, and by the same token /// implementors should feel free to write their implementations with the expectation that only one thread /// will be calling it at a time. - /// + /// /// Similarly, in the same sense that an can have its values "change under it" by having /// the underlying cursor move, so too might this item have its values change under it, and they will if /// they were directly instantiated from a row. - /// + /// /// Generally actual implementors of this interface should not implement this directly, but instead implement /// . /// @@ -495,7 +495,7 @@ public override ValueGetter GetGetter() /// private sealed class RowColumnRow : IRow { - private readonly static DefaultCountedImpl _defCount = new DefaultCountedImpl(); + private static readonly DefaultCountedImpl _defCount = new DefaultCountedImpl(); private readonly ICounted _counted; private readonly IColumn[] _columns; private readonly SchemaImpl _schema; diff --git a/src/Microsoft.ML.Data/Data/IRowSeekable.cs b/src/Microsoft.ML.Data/Data/IRowSeekable.cs index c2fb54bf70..3c0bf0db08 100644 --- a/src/Microsoft.ML.Data/Data/IRowSeekable.cs +++ b/src/Microsoft.ML.Data/Data/IRowSeekable.cs @@ -6,7 +6,7 @@ namespace Microsoft.ML.Runtime.Data { - // REVIEW: Would it be a better apporach to add something akin to CanSeek, + // REVIEW: Would it be a better apporach to add something akin to CanSeek, // as we have a CanShuffle? The idea is trying to make IRowSeekable propagate along certain transforms. /// /// Represents a data view that supports random access to a specific row. @@ -18,14 +18,14 @@ public interface IRowSeekable : ISchematized /// /// Represents a row seeker with random access that can retrieve a specific row by the row index. - /// For IRowSeeker, when the state is valid (that is when MoveTo() returns true), it returns the - /// current row index. Otherwise it's -1. + /// For IRowSeeker, when the state is valid (that is when MoveTo() returns true), it returns the + /// current row index. Otherwise it's -1. /// public interface IRowSeeker : IRow, IDisposable { /// /// Moves the seeker to a row at a specific row index. - /// If the row index specified is out of range (less than zero or not less than the + /// If the row index specified is out of range (less than zero or not less than the /// row count), it returns false and sets its Position property to -1. /// /// The row index to move to. diff --git a/src/Microsoft.ML.Data/Data/ITransposeDataView.cs b/src/Microsoft.ML.Data/Data/ITransposeDataView.cs index 2188c766de..f247bc9859 100644 --- a/src/Microsoft.ML.Data/Data/ITransposeDataView.cs +++ b/src/Microsoft.ML.Data/Data/ITransposeDataView.cs @@ -18,7 +18,7 @@ namespace Microsoft.ML.Runtime.Data /// ). This interface is intended to be implemented by classes that /// want to provide an option for an alternate way of accessing the data stored in a /// . - /// + /// /// The interface only advertises that columns may be accessible in slot-wise fashion. A column /// is accessible in this fashion iff 's /// returns a non-null value. diff --git a/src/Microsoft.ML.Data/Data/RowCursorUtils.cs b/src/Microsoft.ML.Data/Data/RowCursorUtils.cs index 3f57266b0f..091fe26cb2 100644 --- a/src/Microsoft.ML.Data/Data/RowCursorUtils.cs +++ b/src/Microsoft.ML.Data/Data/RowCursorUtils.cs @@ -39,7 +39,7 @@ private static Delegate GetGetterAsDelegateCore(IRow row, int col) /// /// Given a destination type, IRow, and column index, return a ValueGetter for the column - /// with a conversion to typeDst, if needed. This is a weakly typed version of + /// with a conversion to typeDst, if needed. This is a weakly typed version of /// . /// /// @@ -293,7 +293,7 @@ private static ValueGetter> GetVecGetterAsCore(VectorT /// /// This method returns a small helper delegate that returns whether we are at the start - /// of a new group, that is, we have just started, or the key-value at indicated column + /// of a new group, that is, we have just started, or the key-value at indicated column /// is different than it was, in the last call. This is practically useful for determining /// group boundaries. Note that the delegate will return true on the first row. /// diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs index 4a58e097fb..7bc0a8d2ad 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs @@ -79,7 +79,7 @@ private sealed class TableOfContentsEntry public readonly ColumnType Type; /// - /// The compression scheme used on this column's blocks. + /// The compression scheme used on this column's blocks. /// public readonly CompressionKind Compression; @@ -971,7 +971,7 @@ public void Save(ModelSaveContext ctx) } /// - /// Write the parameters of a loader to the save context. Can be called by , where there's no actual + /// Write the parameters of a loader to the save context. Can be called by , where there's no actual /// loader, only default parameters. /// private static void SaveParameters(ModelSaveContext ctx, int threads, string generatedRowIndexName, Double shuffleBlocks) @@ -991,7 +991,7 @@ private static void SaveParameters(ModelSaveContext ctx, int threads, string gen } /// - /// Save a zero-row dataview that will be used to infer schema information, used in the case + /// Save a zero-row dataview that will be used to infer schema information, used in the case /// where the binary loader is instantiated with no input streams. /// private static void SaveSchema(IHostEnvironment env, ModelSaveContext ctx, ISchema schema, out int[] unsavableColIndices) @@ -1017,10 +1017,10 @@ private static void SaveSchema(IHostEnvironment env, ModelSaveContext ctx, ISche } /// - /// Given the schema and a model context, save an imaginary instance of a binary loader with the - /// specified schema. Deserialization from this context should produce a real binary loader that + /// Given the schema and a model context, save an imaginary instance of a binary loader with the + /// specified schema. Deserialization from this context should produce a real binary loader that /// has the specified schema. - /// + /// /// This is used in an API scenario, when the data originates from something other than a loader. /// Since our model file requires a loader at the beginning, we have to construct a bogus 'binary' loader /// to begin the pipe with, with the assumption that the user will bypass the loader at deserialization @@ -1042,9 +1042,9 @@ public static void SaveInstance(IHostEnvironment env, ModelSaveContext ctx, ISch int[] unsavable; SaveSchema(env, ctx, schema, out unsavable); // REVIEW: we silently ignore unsavable columns. - // This method is invoked only in an API scenario, where we need to save a loader but we only have a schema. - // In this case, the API user is likely not subscribed to our environment's channels. Also, in this case, the presence of - // unsavable columns is not necessarily a bad thing: the user typically provides his own data when loading the transforms, + // This method is invoked only in an API scenario, where we need to save a loader but we only have a schema. + // In this case, the API user is likely not subscribed to our environment's channels. Also, in this case, the presence of + // unsavable columns is not necessarily a bad thing: the user typically provides his own data when loading the transforms, // thus bypassing the bogus loader. } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinarySaver.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinarySaver.cs index 7fe9fbbf4a..e2f44df2a4 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinarySaver.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinarySaver.cs @@ -850,7 +850,7 @@ public ColumnType LoadTypeDescriptionOrNull(Stream stream) /// The type of the codec to write and utilize /// The value to encode and write /// The number of bytes written - /// Whether the write was successful or not + /// Whether the write was successful or not public bool TryWriteTypeAndValue(Stream stream, ColumnType type, ref T value, out int bytesWritten) { _host.CheckValue(stream, nameof(stream)); diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/IValueCodec.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/IValueCodec.cs index fd68a34cc9..9c6e607022 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/IValueCodec.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/IValueCodec.cs @@ -13,7 +13,7 @@ namespace Microsoft.ML.Runtime.Data.IO /// on the appropriate ColumnType, then opens multiple writers to write blocks of data /// to some stream. The idea is that each writer or reader is called on some "managable chunk" /// of data. - /// + /// /// Codecs should be thread safe, though the readers and writers they spawn do not need to /// be thread safe. /// @@ -60,7 +60,7 @@ internal interface IValueCodec : IValueCodec /// Stream on which we open reader. /// The number of items expected to be encoded in the block /// starting from the current position of the stream. Implementors should, if - /// possible, throw if it seems if the block contains a different number of + /// possible, throw if it seems if the block contains a different number of /// elements. IValueReader OpenReader(Stream stream, int items); } @@ -89,7 +89,7 @@ internal interface IValueWriter : IDisposable /// be spawned from an , its write methods called some /// number of times to write to the stream, and then Commit will be called when /// all values have been written, the stream now being at the end of the written block. - /// + /// /// The intended usage of the value writers is that blocks are composed of some small /// number of values (perhaps a few thousand), the idea being that a block is something /// that should easily fit in main memory, both for reading and writing. Some writers diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/Zlib.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/Zlib.cs index 4dcc82ac9b..024eaef4a2 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/Zlib.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/Zlib.cs @@ -13,20 +13,20 @@ internal static class Zlib public const string DllPath = "zlib.dll"; [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - private static unsafe extern Constants.RetCode deflateInit2_(ZStream* strm, int level, int method, int windowBits, + private static extern unsafe Constants.RetCode deflateInit2_(ZStream* strm, int level, int method, int windowBits, int memLevel, Constants.Strategy strategy, byte* version, int streamSize); [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - private static unsafe extern Constants.RetCode inflateInit2_(ZStream* strm, int windowBits, byte* version, int streamSize); + private static extern unsafe Constants.RetCode inflateInit2_(ZStream* strm, int windowBits, byte* version, int streamSize); [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - private static unsafe extern byte* zlibVersion(); + private static extern unsafe byte* zlibVersion(); [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - public static unsafe extern Constants.RetCode deflateEnd(ZStream* strm); + public static extern unsafe Constants.RetCode deflateEnd(ZStream* strm); [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - public static unsafe extern Constants.RetCode deflate(ZStream* strm, Constants.Flush flush); + public static extern unsafe Constants.RetCode deflate(ZStream* strm, Constants.Flush flush); public static unsafe Constants.RetCode DeflateInit2(ZStream* strm, int level, int method, int windowBits, int memLevel, Constants.Strategy strategy) @@ -40,10 +40,10 @@ public static unsafe Constants.RetCode InflateInit2(ZStream* strm, int windowBit } [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - public static unsafe extern Constants.RetCode inflate(ZStream* strm, Constants.Flush flush); + public static extern unsafe Constants.RetCode inflate(ZStream* strm, Constants.Flush flush); [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - public static unsafe extern Constants.RetCode inflateEnd(ZStream* strm); + public static extern unsafe Constants.RetCode inflateEnd(ZStream* strm); } [StructLayout(LayoutKind.Sequential)] diff --git a/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs index 69eb3bbb3b..10bf816dc1 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/PartitionedFileLoader.cs @@ -682,13 +682,12 @@ private bool TryTruncatePath(int dirCount, string path, out string truncPath) Ch.Warning($"Path {path} did not have {dirCount} directories necessary for parsing."); return false; } - + // Rejoin segments to create a valid path. truncPath = String.Join(Path.DirectorySeparatorChar.ToString(), segments); return true; } - /// /// Parse all column values from the directory path. /// diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 3678c749ba..babca545c8 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -34,7 +34,7 @@ public sealed partial class TextLoader : IDataLoader /// /// Vector column of I4 that contains values from columns 1, 3 to 10 /// col=ColumnName:I4:1,3-10 - /// + /// /// Key range column of KeyType with underlying storage type U4 that contains values from columns 1, 3 to 10, that can go from 1 to 100 (0 reserved for out of range) /// col=ColumnName:U4[1-100]:1,3-10 /// @@ -554,7 +554,7 @@ public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile) { var range = col.Source[i]; - // Check for remaining range, raise flag. + // Check for remaining range, raise flag. if (range.AllOther) { ch.CheckUserArg(iinfoOther < 0, nameof(Range.AllOther), "At most one all other range can be specified"); @@ -605,7 +605,7 @@ public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile) NameToInfoIndex[name] = iinfo; } - // Note that segsOther[isegOther] is not a real segment to be included. + // Note that segsOther[isegOther] is not a real segment to be included. // It only persists segment information such as Min, Max, autoEnd, variableEnd for later processing. // Process all other range. if (iinfoOther >= 0) @@ -641,7 +641,7 @@ public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile) foreach (var seg in segsAll) { - // At this step, all indices less than min is contained in some segment, either in + // At this step, all indices less than min is contained in some segment, either in // segsAll or segsNew. ch.Assert(min < lim); if (min < seg.Min) @@ -1014,7 +1014,7 @@ public TextLoader(IHostEnvironment env, Arguments args, IMultiStreamSource files _host.CheckNonEmpty(args.Separator, nameof(args.Separator), "Must specify a separator"); //Default arg.Separator is tab and default args.SeparatorChars is also a '\t'. - //At a time only one default can be different and whichever is different that will + //At a time only one default can be different and whichever is different that will //be used. if (args.SeparatorChars.Length > 1 || args.SeparatorChars[0] != '\t') { @@ -1110,7 +1110,7 @@ private static bool TryParseSchema(IHost host, IMultiStreamSource files, // Get settings just for core arguments, not everything. string tmp = CmdParser.GetSettings(host, args, new ArgumentsCore()); - // Try to get the schema information from the file. + // Try to get the schema information from the file. string str = Cursor.GetEmbeddedArgs(files); if (string.IsNullOrWhiteSpace(str)) return false; diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs index fde45c6f4f..582d81b546 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs @@ -27,7 +27,7 @@ public sealed partial class TextLoader : IDataLoader /// private sealed class ValueCreatorCache { - private volatile static ValueCreatorCache _instance; + private static volatile ValueCreatorCache _instance; public static ValueCreatorCache Instance { get diff --git a/src/Microsoft.ML.Data/DataLoadSave/Transpose/TransposeLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Transpose/TransposeLoader.cs index 173c588607..37cbe23b92 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Transpose/TransposeLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Transpose/TransposeLoader.cs @@ -531,7 +531,7 @@ public void Save(ModelSaveContext ctx) } /// - /// Save a zero-row dataview that will be used to infer schema information, used in the case + /// Save a zero-row dataview that will be used to infer schema information, used in the case /// where the tranpsose loader is instantiated with no input streams. /// private static void SaveSchema(IHostEnvironment env, ModelSaveContext ctx, ISchema schema) diff --git a/src/Microsoft.ML.Data/DataView/AppendRowsDataView.cs b/src/Microsoft.ML.Data/DataView/AppendRowsDataView.cs index 6633e2535f..dc713a82b2 100644 --- a/src/Microsoft.ML.Data/DataView/AppendRowsDataView.cs +++ b/src/Microsoft.ML.Data/DataView/AppendRowsDataView.cs @@ -24,7 +24,7 @@ namespace Microsoft.ML.Runtime.Data /// This class provides the functionality to combine multiple IDataView objects which share the same schema /// All sources must contain the same number of columns and their column names, sizes, and item types must match. /// The row count of the resulting IDataView will be the sum over that of each individual. - /// + /// /// An AppendRowsDataView instance is shuffleable iff all of its sources are shuffleable and their row counts are known. /// public sealed class AppendRowsDataView : IDataView @@ -46,8 +46,8 @@ public sealed class AppendRowsDataView : IDataView /// /// Create a dataview by appending the rows of the sources. - /// - /// All sources must be consistent with the passed-in schema in the number of columns, column names, + /// + /// All sources must be consistent with the passed-in schema in the number of columns, column names, /// and column types. If schema is null, the first source's schema will be used. /// /// The host environment. @@ -203,7 +203,7 @@ public bool IsColumnActive(int col) } /// - /// The deterministic cursor. It will scan through the sources sequentially. + /// The deterministic cursor. It will scan through the sources sequentially. /// private sealed class Cursor : CursorBase { @@ -293,7 +293,7 @@ public override void Dispose() /// /// A RandCursor will ask each subordinate cursor to shuffle itself. - /// Then, at each step, it randomly calls a subordinate to move next with probability (roughly) proportional to + /// Then, at each step, it randomly calls a subordinate to move next with probability (roughly) proportional to /// the number of the subordinate's remaining rows. /// private sealed class RandCursor : CursorBase @@ -383,16 +383,16 @@ public override void Dispose() /// /// Given k classes with counts (N_0, N_2, N_3, ..., N_{k-1}), the goal of this sampler is to select the i-th - /// class with probability N_i/M, where M = N_0 + N_1 + ... + N_{k-1}. + /// class with probability N_i/M, where M = N_0 + N_1 + ... + N_{k-1}. /// Once the i-th class is selected, its count will be updated to N_i - 1. - /// + /// /// For efficiency consideration, the sampling distribution is only an approximation of the desired distribution. /// private sealed class MultinomialWithoutReplacementSampler { // Implementation: generate a batch array of size BatchSize. // Each class will claim a fraction of the batch proportional to its remaining row count. - // Shuffle the array. The sampler reads from the array one at a time until the batch is consumed. + // Shuffle the array. The sampler reads from the array one at a time until the batch is consumed. // The sampler then generates a new batch and repeat the process. private const int BatchSize = 1000; diff --git a/src/Microsoft.ML.Data/DataView/CacheDataView.cs b/src/Microsoft.ML.Data/DataView/CacheDataView.cs index 3bca858d1d..72fb4b18a5 100644 --- a/src/Microsoft.ML.Data/DataView/CacheDataView.cs +++ b/src/Microsoft.ML.Data/DataView/CacheDataView.cs @@ -618,7 +618,7 @@ private interface IWaiter /// is equivalent to also having waited on i-1, i-2, etc. /// Note that this is position within the cache, that is, a row index, /// as opposed to position within the cursor. - /// + /// /// This method should be thread safe because in the parallel cursor /// case it will be used by multiple threads. /// @@ -955,23 +955,23 @@ public Wrapper(RandomIndex index) /// next job ids before they push the completed jobs to the consumer. So the workers are /// then subject to being blocked until their current completed jobs are fully accepted /// (i.e. added to the to-consume queue). - /// + /// /// How it works: /// Suppose we have 7 workers (w0,..,w6) and 14 jobs (j0,..,j13). /// Initially, jobs get assigned to workers using a shared counter. /// Here is an example outcome of using a shared counter: /// w1->j0, w6->j1, w0->j2, w3->j3, w4->j4, w5->j5, w2->j6. - /// + /// /// Suppose workers finished jobs in the following order: /// w5->j5, w0->j2, w6->j1, w4->j4, w3->j3,w1->j0, w2->j6. - /// + /// /// w5 finishes processing j5 first, but will be blocked until the processing of jobs /// j0,..,j4 completes since the consumer can consume jobs in order only. /// Therefore, the next available job (j7) should not be assigned to w5. It should be - /// assigned to the worker whose job *get consumed first* (w1 since it processes j0 - /// which is the first job) *not* to the worker who completes its job first (w5 in + /// assigned to the worker whose job *get consumed first* (w1 since it processes j0 + /// which is the first job) *not* to the worker who completes its job first (w5 in /// this example). - /// + /// /// So, a shared counter can be used to assign jobs to workers initially but should /// not be used onwards. /// diff --git a/src/Microsoft.ML.Data/DataView/CompositeSchema.cs b/src/Microsoft.ML.Data/DataView/CompositeSchema.cs index 4d387de1d5..81aef4b01e 100644 --- a/src/Microsoft.ML.Data/DataView/CompositeSchema.cs +++ b/src/Microsoft.ML.Data/DataView/CompositeSchema.cs @@ -37,7 +37,7 @@ public CompositeSchema(ISchema[] sources) /// /// Returns an array of input predicated for sources, corresponding to the input predicate. - /// The returned array size is equal to the number of sources, but if a given source is not needed at all, + /// The returned array size is equal to the number of sources, but if a given source is not needed at all, /// the corresponding predicate will be null. /// public Func[] GetInputPredicates(Func predicate) diff --git a/src/Microsoft.ML.Data/DataView/RowToRowMapperTransform.cs b/src/Microsoft.ML.Data/DataView/RowToRowMapperTransform.cs index b0fde835d8..d69379d5da 100644 --- a/src/Microsoft.ML.Data/DataView/RowToRowMapperTransform.cs +++ b/src/Microsoft.ML.Data/DataView/RowToRowMapperTransform.cs @@ -30,7 +30,7 @@ public RowMapperColumnInfo(string name, ColumnType type, ColumnMetadataInfo meta } /// - /// This interface is used to create a . + /// This interface is used to create a . /// Implementations should be given an in their constructor, and should have a /// ctor or Create method with , along with a corresponding /// . @@ -44,7 +44,7 @@ public interface IRowMapper : ICanSaveModel /// /// Returns the getters for the output columns given an active set of output columns. The length of the getters - /// array should be equal to the number of columns added by the IRowMapper. It should contain the getter for the + /// array should be equal to the number of columns added by the IRowMapper. It should contain the getter for the /// i'th output column if activeOutput(i) is true, and null otherwise. /// Delegate[] CreateGetters(IRow input, Func activeOutput, out Action disposer); diff --git a/src/Microsoft.ML.Data/DataView/Transposer.cs b/src/Microsoft.ML.Data/DataView/Transposer.cs index 74477de9b7..91bb9c8b6a 100644 --- a/src/Microsoft.ML.Data/DataView/Transposer.cs +++ b/src/Microsoft.ML.Data/DataView/Transposer.cs @@ -1041,7 +1041,6 @@ private static Splitter CreateCore(IDataView view, int col, int[] ends) } #region ISchema implementation - // Subclasses should implement ColumnCount and GetColumnType. public override bool TryGetColumnIndex(string name, out int col) { @@ -1062,8 +1061,6 @@ public override string GetColumnName(int col) Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col)); return _view.Schema.GetColumnName(SrcCol); } - - public override abstract ColumnType GetColumnType(int col); #endregion private abstract class RowBase : IRow @@ -1215,7 +1212,7 @@ private sealed class Row : RowBase> private VBuffer _inputValue; // The delegate to get the input value. private readonly ValueGetter> _inputGetter; - // The limit of _inputValue.Indices + // The limit of _inputValue.Indices private readonly int[] _srcIndicesLims; // Convenient accessor since we use this all over the place. private int[] Lims { get { return Parent._lims; } } diff --git a/src/Microsoft.ML.Data/DataView/ZipDataView.cs b/src/Microsoft.ML.Data/DataView/ZipDataView.cs index 9a7e79bab8..a472b48b36 100644 --- a/src/Microsoft.ML.Data/DataView/ZipDataView.cs +++ b/src/Microsoft.ML.Data/DataView/ZipDataView.cs @@ -11,7 +11,7 @@ namespace Microsoft.ML.Runtime.Data { /// /// This is a data view that is a 'zip' of several data views. - /// The length of the zipped data view is equal to the shortest of the lengths of the components. + /// The length of the zipped data view is equal to the shortest of the lengths of the components. /// public sealed class ZipDataView : IDataView { @@ -77,7 +77,7 @@ public IRowCursor GetRowCursor(Func predicate, IRandom rand = null) var srcPredicates = _schema.GetInputPredicates(predicate); - // REVIEW: if we know the row counts, we could only open cursor if it has needed columns, and have the + // REVIEW: if we know the row counts, we could only open cursor if it has needed columns, and have the // outer cursor handle the early stopping. If we don't know row counts, we need to open all the cursors because // we don't know which one will be the shortest. // One reason this is not done currently is because the API has 'somewhat mutable' data views, so potentially this @@ -88,8 +88,8 @@ public IRowCursor GetRowCursor(Func predicate, IRandom rand = null) } /// - /// Create an with no requested columns on a data view. - /// Potentially, this can be optimized by calling GetRowCount(lazy:true) first, and if the count is not known, + /// Create an with no requested columns on a data view. + /// Potentially, this can be optimized by calling GetRowCount(lazy:true) first, and if the count is not known, /// wrapping around GetCursor(). /// private IRowCursor GetMinimumCursor(IDataView dv) diff --git a/src/Microsoft.ML.Data/Depricated/TGUIAttribute.cs b/src/Microsoft.ML.Data/Depricated/TGUIAttribute.cs index 7a51e1a5ee..5f09c604bb 100644 --- a/src/Microsoft.ML.Data/Depricated/TGUIAttribute.cs +++ b/src/Microsoft.ML.Data/Depricated/TGUIAttribute.cs @@ -7,12 +7,12 @@ namespace Microsoft.ML.Runtime.Internal.Internallearn { -#pragma warning disable TLC_GeneralName // This structure should be deprecated anyway. +#pragma warning disable MSML_GeneralName // This structure should be deprecated anyway. // REVIEW: Get rid of this. Everything should be in the ArgumentAttribute (or a class // derived from ArgumentAttribute). [AttributeUsage(AttributeTargets.Field)] public class TGUIAttribute : Attribute -#pragma warning restore TLC_GeneralName +#pragma warning restore MSML_GeneralName { // Display parameters public string Label { get; set; } @@ -32,7 +32,7 @@ public class TGUIAttribute : Attribute public bool NoSweep { get; set; } //Settings are automatically populated for fields that are classes. - //The below is an extension of the framework to add settings for + //The below is an extension of the framework to add settings for //boolean type fields. public bool ShowSettingsForCheckbox { get; set; } public object Settings { get; set; } diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs index fc335bb52e..5b46aad0b8 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs @@ -57,7 +57,7 @@ public static Float DotProduct(ref VBuffer a, ref VBuffer b) } /// - /// Sparsify vector A (keep at most + values) + /// Sparsify vector A (keep at most + values) /// and optionally rescale values to the [-1, 1] range. /// Vector to be sparsified and normalized. /// How many top (positive) elements to preserve after sparsification. diff --git a/src/Microsoft.ML.Data/Dirty/PredictorBase.cs b/src/Microsoft.ML.Data/Dirty/PredictorBase.cs index 980a85ffd1..35c9a49133 100644 --- a/src/Microsoft.ML.Data/Dirty/PredictorBase.cs +++ b/src/Microsoft.ML.Data/Dirty/PredictorBase.cs @@ -41,9 +41,9 @@ protected PredictorBase(IHostEnvironment env, string name, ModelLoadContext ctx) // Verify that the Float type matches. int cbFloat = ctx.Reader.ReadInt32(); -#pragma warning disable TLC_NoMessagesForLoadContext // This one is actually useful. +#pragma warning disable MSML_NoMessagesForLoadContext // This one is actually useful. Host.CheckDecode(cbFloat == sizeof(Float), "This file was saved by an incompatible version"); -#pragma warning restore TLC_NoMessagesForLoadContext +#pragma warning restore MSML_NoMessagesForLoadContext } public virtual void Save(ModelSaveContext ctx) diff --git a/src/Microsoft.ML.Data/EntryPoints/CommonOutputs.cs b/src/Microsoft.ML.Data/EntryPoints/CommonOutputs.cs index 9e99bf8993..37f37f6c64 100644 --- a/src/Microsoft.ML.Data/EntryPoints/CommonOutputs.cs +++ b/src/Microsoft.ML.Data/EntryPoints/CommonOutputs.cs @@ -191,7 +191,7 @@ public interface ITrainerOutput } /// - /// Macro output class base. + /// Macro output class base. /// public abstract class MacroOutput { diff --git a/src/Microsoft.ML.Data/EntryPoints/EntryPointNode.cs b/src/Microsoft.ML.Data/EntryPoints/EntryPointNode.cs index 4f94e103b8..d2d59eb94b 100644 --- a/src/Microsoft.ML.Data/EntryPoints/EntryPointNode.cs +++ b/src/Microsoft.ML.Data/EntryPoints/EntryPointNode.cs @@ -632,7 +632,7 @@ public static EntryPointNode Create( /// /// Checks the given JSON object key-value pair is a valid EntryPoint input and /// extracts out any variables that need to be populated. These variables will be - /// added to the EntryPoint context. Input parameters that are not set to variables + /// added to the EntryPoint context. Input parameters that are not set to variables /// will be immediately set using the input builder instance. /// private void CheckAndSetInputValue(KeyValuePair pair) @@ -692,7 +692,7 @@ private void CheckAndSetInputValue(KeyValuePair pair) /// /// Checks the given JSON object key-value pair is a valid EntryPoint output. - /// Extracts out any variables that need to be populated and adds them to the + /// Extracts out any variables that need to be populated and adds them to the /// EntryPoint context. /// private void CheckAndMarkOutputValue(KeyValuePair pair) @@ -1073,8 +1073,8 @@ protected VariableBinding(string varName) VariableName = varName; } - // A regex to validate an EntryPoint variable value accessor string. Valid EntryPoint variable names - // can be any sequence of alphanumeric characters and underscores. They must start with a letter or underscore. + // A regex to validate an EntryPoint variable value accessor string. Valid EntryPoint variable names + // can be any sequence of alphanumeric characters and underscores. They must start with a letter or underscore. // An EntryPoint variable can be followed with an array or dictionary specifier, which begins // with '[', contains either an integer or alphanumeric string, optionally wrapped in single-quotes, // followed with ']'. diff --git a/src/Microsoft.ML.Data/EntryPoints/InputBuilder.cs b/src/Microsoft.ML.Data/EntryPoints/InputBuilder.cs index e5afd8dbb5..4d3b765114 100644 --- a/src/Microsoft.ML.Data/EntryPoints/InputBuilder.cs +++ b/src/Microsoft.ML.Data/EntryPoints/InputBuilder.cs @@ -14,8 +14,8 @@ namespace Microsoft.ML.Runtime.EntryPoints.JsonUtils { /// - /// The class that creates and wraps around an instance of an input object and gradually populates all fields, keeping track of missing - /// required values. The values can be set from their JSON representation (during the graph parsing stage), as well as directly + /// The class that creates and wraps around an instance of an input object and gradually populates all fields, keeping track of missing + /// required values. The values can be set from their JSON representation (during the graph parsing stage), as well as directly /// (in the process of graph execution). /// public sealed class InputBuilder @@ -515,7 +515,7 @@ private static object ParseJsonValue(IExceptionContext ectx, Type type, Attribut } /// - /// Ensures that the given value can be assigned to an entry point field with + /// Ensures that the given value can be assigned to an entry point field with /// type . This method will wrap the value in the option /// type if needed and throw an exception if the value isn't assignable. /// @@ -791,7 +791,7 @@ public static class Range /// public static class Deprecated { - public new static string ToString() => "Deprecated"; + public static new string ToString() => "Deprecated"; public const string Message = "Message"; } @@ -800,7 +800,7 @@ public static class Deprecated /// public static class SweepableLongParam { - public new static string ToString() => "SweepRange"; + public static new string ToString() => "SweepRange"; public const string RangeType = "RangeType"; public const string Max = "Max"; public const string Min = "Min"; @@ -814,7 +814,7 @@ public static class SweepableLongParam /// public static class SweepableFloatParam { - public new static string ToString() => "SweepRange"; + public static new string ToString() => "SweepRange"; public const string RangeType = "RangeType"; public const string Max = "Max"; public const string Min = "Min"; @@ -828,14 +828,14 @@ public static class SweepableFloatParam /// public static class SweepableDiscreteParam { - public new static string ToString() => "SweepRange"; + public static new string ToString() => "SweepRange"; public const string RangeType = "RangeType"; public const string Options = "Values"; } public static class PipelineSweeperSupportedMetrics { - public new static string ToString() => "SupportedMetric"; + public static new string ToString() => "SupportedMetric"; public const string Auc = BinaryClassifierEvaluator.Auc; public const string AccuracyMicro = Data.MultiClassClassifierEvaluator.AccuracyMicro; public const string AccuracyMacro = MultiClassClassifierEvaluator.AccuracyMacro; diff --git a/src/Microsoft.ML.Data/EntryPoints/PredictorModel.cs b/src/Microsoft.ML.Data/EntryPoints/PredictorModel.cs index 4b474f847c..055b2fa299 100644 --- a/src/Microsoft.ML.Data/EntryPoints/PredictorModel.cs +++ b/src/Microsoft.ML.Data/EntryPoints/PredictorModel.cs @@ -74,7 +74,7 @@ public void Save(IHostEnvironment env, Stream stream) { // REVIEW: address the asymmetry in the way we're loading and saving the model. // Effectively, we have methods to load the transform model from a model.zip, but don't have - // methods to compose the model.zip out of transform model, predictor and role mappings + // methods to compose the model.zip out of transform model, predictor and role mappings // (we use the TrainUtils.SaveModel that does all three). // Create the chain of transforms for saving. diff --git a/src/Microsoft.ML.Data/EntryPoints/ScoreModel.cs b/src/Microsoft.ML.Data/EntryPoints/ScoreModel.cs index 96ce0acac9..312a92bccc 100644 --- a/src/Microsoft.ML.Data/EntryPoints/ScoreModel.cs +++ b/src/Microsoft.ML.Data/EntryPoints/ScoreModel.cs @@ -15,9 +15,9 @@ namespace Microsoft.ML.Runtime.EntryPoints /// /// This module handles scoring a against a new dataset. /// As a result, we return both the scored data and the scoring transform as a . - /// - /// REVIEW: This module does not support 'exotic' scoring scenarios, like recommendation and quantile regression - /// (those where the user-defined scorer settings are necessary to identify the scorer). We could resolve this by + /// + /// REVIEW: This module does not support 'exotic' scoring scenarios, like recommendation and quantile regression + /// (those where the user-defined scorer settings are necessary to identify the scorer). We could resolve this by /// adding a sub-component for extra scorer args, or by creating specialized EPs for these scenarios. /// public static partial class ScoreModel diff --git a/src/Microsoft.ML.Data/EntryPoints/TransformModel.cs b/src/Microsoft.ML.Data/EntryPoints/TransformModel.cs index 9edc87df6d..ed8e7d56e2 100644 --- a/src/Microsoft.ML.Data/EntryPoints/TransformModel.cs +++ b/src/Microsoft.ML.Data/EntryPoints/TransformModel.cs @@ -43,7 +43,7 @@ public sealed class TransformModel : ITransformModel /// /// The resulting schema once applied to this model. The might have - /// columns that are not needed by this transform and these columns will be seen in the + /// columns that are not needed by this transform and these columns will be seen in the /// produced by this transform. /// public ISchema OutputSchema => _chain.Schema; diff --git a/src/Microsoft.ML.Data/Evaluators/AnomalyDetectionEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/AnomalyDetectionEvaluator.cs index 74f7ca0068..8e4f3be56c 100644 --- a/src/Microsoft.ML.Data/Evaluators/AnomalyDetectionEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/AnomalyDetectionEvaluator.cs @@ -57,7 +57,7 @@ public static class OverallMetrics } /// - /// The anomaly detection evaluator outputs a data view by this name, which contains the the examples + /// The anomaly detection evaluator outputs a data view by this name, which contains the the examples /// with the top scores in the test set. It contains the three columns listed below, with each row corresponding /// to one test example. /// diff --git a/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs index 7161f66439..71c08eecd0 100644 --- a/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs @@ -88,7 +88,7 @@ public enum Metrics /// /// Binary classification evaluator outputs a data view with this name, which contains the p/r data. - /// It contains the columns listed below, and in case data also contains a weight column, it contains + /// It contains the columns listed below, and in case data also contains a weight column, it contains /// also columns for the weighted values. /// and false positive rate. /// @@ -1211,7 +1211,7 @@ public override IEnumerable GetOverallMetricColumns() } // This method saves the p/r plots, and returns the p/r metrics data view. - // In case there are results from multiple folds, they are averaged using + // In case there are results from multiple folds, they are averaged using // vertical averaging for the p/r plot, and appended using AppendRowsDataView for // the p/r data view. private bool TryGetPrMetrics(Dictionary[] metrics, out IDataView pr) diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorBase.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorBase.cs index ef7183c2fa..c628cff1e4 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorBase.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorBase.cs @@ -217,7 +217,7 @@ protected ValueGetter> GetKeyValueGetter(AggregatorDictionaryBas /// /// This is a helper class for evaluators deriving from EvaluatorBase, used for computing aggregate metrics. /// Aggregators should keep track of the number of passes done. The method should get - /// the input getters of the given IRow that are needed for the current pass, assuming that all the needed column + /// the input getters of the given IRow that are needed for the current pass, assuming that all the needed column /// information is stored in the given . /// In the aggregator should call the getters once, and process the input as needed. /// increments the pass count after each pass. @@ -251,7 +251,7 @@ public bool Start() return IsActive(); } - /// + /// /// This method should get the getters of the new IRow that are needed for the next pass. /// public abstract void InitializeNextPass(IRow row, RoleMappedSchema schema); @@ -370,7 +370,7 @@ private static AggregatorDictionaryBase CreateDictionary(RoleMappedSchem } /// - /// This method calls the getter of the stratification column, and returns the aggregator corresponding to + /// This method calls the getter of the stratification column, and returns the aggregator corresponding to /// the stratification value. /// /// diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs index 86edac082a..942d139425 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs @@ -115,10 +115,10 @@ public static ColumnInfo GetScoreColumnInfo(IExceptionContext ectx, ISchema sche ColumnInfo info; if (!string.IsNullOrWhiteSpace(name)) { -#pragma warning disable TLC_ContractsNameUsesNameof +#pragma warning disable MSML_ContractsNameUsesNameof if (!ColumnInfo.TryCreateFromName(schema, name, out info)) throw ectx.ExceptUserArg(argName, "Score column is missing"); -#pragma warning restore TLC_ContractsNameUsesNameof +#pragma warning restore MSML_ContractsNameUsesNameof return info; } @@ -145,9 +145,9 @@ public static ColumnInfo GetScoreColumnInfo(IExceptionContext ectx, ISchema sche if (!string.IsNullOrWhiteSpace(defName) && ColumnInfo.TryCreateFromName(schema, defName, out info)) return info; -#pragma warning disable TLC_ContractsNameUsesNameof +#pragma warning disable MSML_ContractsNameUsesNameof throw ectx.ExceptUserArg(argName, "Score column is missing"); -#pragma warning restore TLC_ContractsNameUsesNameof +#pragma warning restore MSML_ContractsNameUsesNameof } /// @@ -168,12 +168,12 @@ public static ColumnInfo GetOptAuxScoreColumnInfo(IExceptionContext ectx, ISchem if (!string.IsNullOrWhiteSpace(name)) { ColumnInfo info; -#pragma warning disable TLC_ContractsNameUsesNameof +#pragma warning disable MSML_ContractsNameUsesNameof if (!ColumnInfo.TryCreateFromName(schema, name, out info)) throw ectx.ExceptUserArg(argName, "{0} column is missing", valueKind); if (!testType(info.Type)) throw ectx.ExceptUserArg(argName, "{0} column has incompatible type", valueKind); -#pragma warning restore TLC_ContractsNameUsesNameof +#pragma warning restore MSML_ContractsNameUsesNameof return info; } @@ -332,15 +332,15 @@ public static IEnumerable> GetMetrics(IDataView met if (getters[i] != null) { getters[i](ref metricVal); - // For R8 valued columns the metric name is the column name. + // For R8 valued columns the metric name is the column name. yield return new KeyValuePair(schema.GetColumnName(i), metricVal); } else if (getVectorMetrics && vBufferGetters[i] != null) { vBufferGetters[i](ref metricVals); - // For R8 vector valued columns the names of the metrics are the column name, - // followed by the slot name if it exists, or "Label_i" if it doesn't. + // For R8 vector valued columns the names of the metrics are the column name, + // followed by the slot name if it exists, or "Label_i" if it doesn't. VBuffer names = default(VBuffer); var size = schema.GetColumnType(i).VectorSize; var slotNamesType = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, i); @@ -386,7 +386,7 @@ public static IDataView AddFoldIndex(IHostEnvironment env, IDataView input, int env.CheckValue(input, nameof(input)); env.CheckParam(curFold >= 0, nameof(curFold)); - // We use the first column in the data view as an input column to the LambdaColumnMapper, + // We use the first column in the data view as an input column to the LambdaColumnMapper, // because it must have an input. int inputCol = 0; while (inputCol < input.Schema.ColumnCount && input.Schema.IsHidden(inputCol)) @@ -428,7 +428,7 @@ public static IDataView AddFoldIndex(IHostEnvironment env, IDataView input, int env.CheckParam(curFold >= 0, nameof(curFold)); env.CheckParam(numFolds > 0, nameof(numFolds)); - // We use the first column in the data view as an input column to the LambdaColumnMapper, + // We use the first column in the data view as an input column to the LambdaColumnMapper, // because it must have an input. int inputCol = 0; while (inputCol < input.Schema.ColumnCount && input.Schema.IsHidden(inputCol)) @@ -444,7 +444,7 @@ public static IDataView AddFoldIndex(IHostEnvironment env, IDataView input, int /// /// This method takes an array of data views and a specified input vector column, and adds a new output column to each of the data views. - /// First, we find the union set of the slot names in the different data views. Next we define a new vector column for each + /// First, we find the union set of the slot names in the different data views. Next we define a new vector column for each /// data view, indexed by the union of the slot names. For each data view, every slot value is the value in the slot corresponding /// to its slot name in the original column. If a reconciled slot name does not exist in an input column, the value in the output /// column is def. @@ -593,7 +593,7 @@ private static int[][] MapKeys(ISchema[] schemas, string columnName, bool isV /// /// This method takes an array of data views and a specified input key column, and adds a new output column to each of the data views. - /// First, we find the union set of the key values in the different data views. Next we define a new key column for each + /// First, we find the union set of the key values in the different data views. Next we define a new key column for each /// data view, with the union of the key values as the new key values. For each data view, the value in the output column is the value /// corresponding to the key value in the original column. /// @@ -634,7 +634,7 @@ public static void ReconcileKeyValues(IHostEnvironment env, IDataView[] views, s /// /// This method takes an array of data views and a specified input key column, and adds a new output column to each of the data views. - /// First, we find the union set of the key values in the different data views. Next we define a new key column for each + /// First, we find the union set of the key values in the different data views. Next we define a new key column for each /// data view, with the union of the key values as the new key values. For each data view, the value in the output column is the value /// corresponding to the key value in the original column. /// @@ -993,7 +993,7 @@ private static List GetMetricNames(IChannel ch, ISchema schema, IRow row ch.Assert(Utils.Size(vBufferGetters) == schema.ColumnCount); // Get the names of the metrics. For R8 valued columns the metric name is the column name. For R8 vector valued columns - // the names of the metrics are the column name, followed by the slot name if it exists, or "Label_i" if it doesn't. + // the names of the metrics are the column name, followed by the slot name if it exists, or "Label_i" if it doesn't. VBuffer names = default(VBuffer); int metricCount = 0; var metricNames = new List(); @@ -1326,7 +1326,7 @@ private static void AddScalarColumn(this ArrayDataViewBuilder dvBldr, ISchema sc } /// - /// Takes a data view containing one or more rows of metrics, and returns a data view containing additional + /// Takes a data view containing one or more rows of metrics, and returns a data view containing additional /// rows with the average and the standard deviation of the metrics in the input data view. /// public static IDataView CombineFoldMetricsDataViews(IHostEnvironment env, IDataView data, int numFolds) @@ -1509,8 +1509,8 @@ private static string GetOverallMetricsAsString(double[] sumMetrics, double[] su } // This method returns a string representation of a set of metrics. If there are stratification columns, it looks for columns named - // StratCol and StratVal, and outputs the metrics in the rows with NA in the StratCol column. If weighted is true, it looks - // for a DvBool column named "IsWeighted" and outputs the metrics in the rows with a value of true in that column. + // StratCol and StratVal, and outputs the metrics in the rows with NA in the StratCol column. If weighted is true, it looks + // for a DvBool column named "IsWeighted" and outputs the metrics in the rows with a value of true in that column. // If nonAveragedCols is non-null, it computes the average and standard deviation over all the relevant rows and populates // nonAveragedCols with columns that are either hidden, or are not of a type that we can display (i.e., either a numeric column, // or a known length vector of doubles). @@ -1749,7 +1749,7 @@ public static class MetricKinds { /// /// This data view contains the confusion matrix for N-class classification. It has N rows, and each row has - /// the following columns: + /// the following columns: /// * Count (vector indicating how many examples of this class were predicted as each one of the classes). This column /// should have metadata containing the class names. /// * (Optional) Weight (vector with the total weight of the examples of this class that were predicted as each one of the classes). diff --git a/src/Microsoft.ML.Data/Evaluators/MamlEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MamlEvaluator.cs index 1a5a2177f3..2af1b54d92 100644 --- a/src/Microsoft.ML.Data/Evaluators/MamlEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/MamlEvaluator.cs @@ -10,10 +10,10 @@ namespace Microsoft.ML.Runtime.Data { /// - /// This interface is used by Maml components (the , the + /// This interface is used by Maml components (the , the /// and the to evaluate, print and save the results. - /// The input to the and the methods - /// should be assumed to contain only the following column roles: label, group, weight and name. Any other columns needed for + /// The input to the and the methods + /// should be assumed to contain only the following column roles: label, group, weight and name. Any other columns needed for /// evaluation should be searched for by name in the . /// public interface IMamlEvaluator : IEvaluator diff --git a/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs index cdf3f9c57e..616cff8394 100644 --- a/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs @@ -48,7 +48,7 @@ public sealed class Arguments public const string MaxDcg = "MaxDCG"; /// - /// The ranking evaluator outputs a data view by this name, which contains metrics aggregated per group. + /// The ranking evaluator outputs a data view by this name, which contains metrics aggregated per group. /// It contains four columns: GroupId, NDCG, DCG and MaxDCG. Each row in the data view corresponds to one /// group in the scored data. /// diff --git a/src/Microsoft.ML.Data/Model/Onnx/OnnxContext.cs b/src/Microsoft.ML.Data/Model/Onnx/OnnxContext.cs index bdef784b29..230f2600a3 100644 --- a/src/Microsoft.ML.Data/Model/Onnx/OnnxContext.cs +++ b/src/Microsoft.ML.Data/Model/Onnx/OnnxContext.cs @@ -73,7 +73,7 @@ public abstract class OnnxContext public abstract string AddIntermediateVariable(ColumnType type, string colName, bool skip = false); /// - /// Creates an ONNX node + /// Creates an ONNX node /// /// The name of the ONNX operator to apply /// The names of the variables as inputs diff --git a/src/Microsoft.ML.Data/Model/Pfa/BoundPfaContext.cs b/src/Microsoft.ML.Data/Model/Pfa/BoundPfaContext.cs index d0923a9962..dfd5ef55fb 100644 --- a/src/Microsoft.ML.Data/Model/Pfa/BoundPfaContext.cs +++ b/src/Microsoft.ML.Data/Model/Pfa/BoundPfaContext.cs @@ -33,7 +33,7 @@ public sealed class BoundPfaContext /// private readonly Dictionary _nameToVarName; /// - /// This contains a map of those names in + /// This contains a map of those names in /// private readonly HashSet _unavailable; diff --git a/src/Microsoft.ML.Data/Model/Pfa/PfaContext.cs b/src/Microsoft.ML.Data/Model/Pfa/PfaContext.cs index 55122535d4..c0996beea1 100644 --- a/src/Microsoft.ML.Data/Model/Pfa/PfaContext.cs +++ b/src/Microsoft.ML.Data/Model/Pfa/PfaContext.cs @@ -215,7 +215,7 @@ public static JObject CreateFuncBlock(JArray prms, JToken returnType, JToken doB /// declaration. So, if you use a record type three times, that means one of the three usages must be /// accompanied by a full type declaration, whereas the other two can just then identify it by name. /// This is extremely silly, but there you go. - /// + /// /// Anyway: this will attempt to add a type to the list of registered types. If it returns true /// then the caller is responsible, then, for ensuring that their PFA code they are generating contains /// not only a reference of the type, but a declaration of the type. If however this returns false diff --git a/src/Microsoft.ML.Data/Model/Repository.cs b/src/Microsoft.ML.Data/Model/Repository.cs index b19fbc8eba..eb665f1bfc 100644 --- a/src/Microsoft.ML.Data/Model/Repository.cs +++ b/src/Microsoft.ML.Data/Model/Repository.cs @@ -231,7 +231,7 @@ protected void RemoveEntry(Entry ent) /// /// When building paths to our local file system, we want to force both forward and backward slashes /// to the system directory separator character. We do this for cases where we either used Windows-specific - /// path building logic, or concatenated filesystem paths with zip archive entries on Linux. + /// path building logic, or concatenated filesystem paths with zip archive entries on Linux. /// private static string NormalizeForFileSystem(string path) => path?.Replace('/', Path.DirectorySeparatorChar).Replace('\\', Path.DirectorySeparatorChar); diff --git a/src/Microsoft.ML.Data/Prediction/Calibrator.cs b/src/Microsoft.ML.Data/Prediction/Calibrator.cs index 895bf92273..237afb400e 100644 --- a/src/Microsoft.ML.Data/Prediction/Calibrator.cs +++ b/src/Microsoft.ML.Data/Prediction/Calibrator.cs @@ -111,7 +111,7 @@ public interface ICalibratorTrainer /// public interface ICalibrator { - /// Given a classifier output, produce the probability + /// Given a classifier output, produce the probability Float PredictProbability(Float output); /// Get the summary of current calibrator settings @@ -745,7 +745,7 @@ private static bool NeedCalibration(IHostEnvironment env, IChannel ch, ICalibrat /// The trainer used to train the predictor. /// The predictor that needs calibration. /// The examples to used for calibrator training. - /// The original predictor, if no calibration is needed, + /// The original predictor, if no calibration is needed, /// or a metapredictor that wraps the original predictor and the newly trained calibrator. public static IPredictor TrainCalibratorIfNeeded(IHostEnvironment env, IChannel ch, ICalibratorTrainer calibrator, int maxRows, ITrainer trainer, IPredictor predictor, RoleMappedData data) @@ -771,7 +771,7 @@ public static IPredictor TrainCalibratorIfNeeded(IHostEnvironment env, IChannel /// The maximum rows to use for calibrator training. /// The predictor that needs calibration. /// The examples to used for calibrator training. - /// The original predictor, if no calibration is needed, + /// The original predictor, if no calibration is needed, /// or a metapredictor that wraps the original predictor and the newly trained calibrator. public static IPredictor TrainCalibrator(IHostEnvironment env, IChannel ch, ICalibratorTrainer caliTrainer, int maxRows, IPredictor predictor, RoleMappedData data) diff --git a/src/Microsoft.ML.Data/Scorers/BinaryClassifierScorer.cs b/src/Microsoft.ML.Data/Scorers/BinaryClassifierScorer.cs index 598db3ab46..6da402431d 100644 --- a/src/Microsoft.ML.Data/Scorers/BinaryClassifierScorer.cs +++ b/src/Microsoft.ML.Data/Scorers/BinaryClassifierScorer.cs @@ -201,7 +201,7 @@ public override void SaveAsOnnx(OnnxContext ctx) for (int iinfo = 0; iinfo < Bindings.InfoCount; ++iinfo) outColumnNames[iinfo] = Bindings.GetColumnName(Bindings.MapIinfoToCol(iinfo)); - //Check if "Probability" column was generated by the base class, only then + //Check if "Probability" column was generated by the base class, only then //label can be predicted. if (Bindings.InfoCount >= 3 && ctx.ContainsColumn(outColumnNames[2])) { diff --git a/src/Microsoft.ML.Data/Scorers/GenericScorer.cs b/src/Microsoft.ML.Data/Scorers/GenericScorer.cs index a407873bac..41c12e94ed 100644 --- a/src/Microsoft.ML.Data/Scorers/GenericScorer.cs +++ b/src/Microsoft.ML.Data/Scorers/GenericScorer.cs @@ -20,7 +20,7 @@ namespace Microsoft.ML.Runtime.Data { /// /// This class is a scorer that passes through all the ISchemaBound columns without adding any "derived columns". - /// It also passes through all metadata (except for possibly changing the score column kind), and adds the + /// It also passes through all metadata (except for possibly changing the score column kind), and adds the /// score set id metadata. /// diff --git a/src/Microsoft.ML.Data/Scorers/ScoreMapperSchema.cs b/src/Microsoft.ML.Data/Scorers/ScoreMapperSchema.cs index ddb05e3686..0f115bb2f0 100644 --- a/src/Microsoft.ML.Data/Scorers/ScoreMapperSchema.cs +++ b/src/Microsoft.ML.Data/Scorers/ScoreMapperSchema.cs @@ -251,7 +251,7 @@ public SequencePredictorSchema(ColumnType type, ref VBuffer keyNames, st Contracts.CheckParam(keyNames.Length == type.ItemType.KeyCount, nameof(keyNames), "keyNames length must match type's key count"); // REVIEW: Assuming the caller takes some care, it seems - // like we can get away with + // like we can get away with _keyNames = keyNames; _keyNamesType = new VectorType(TextType.Instance, keyNames.Length); _getKeyNames = GetKeyNames; diff --git a/src/Microsoft.ML.Data/Training/EarlyStoppingCriteria.cs b/src/Microsoft.ML.Data/Training/EarlyStoppingCriteria.cs index 285db8bfe1..1da5a5562a 100644 --- a/src/Microsoft.ML.Data/Training/EarlyStoppingCriteria.cs +++ b/src/Microsoft.ML.Data/Training/EarlyStoppingCriteria.cs @@ -123,7 +123,7 @@ public override bool CheckScore(Float validationScore, Float trainingScore, out } // For the detail of the following rules, see the following paper. - // Lodwich, Aleksander, Yves Rangoni, and Thomas Breuel. "Evaluation of robustness and performance of early stopping rules with multi layer perceptrons." + // Lodwich, Aleksander, Yves Rangoni, and Thomas Breuel. "Evaluation of robustness and performance of early stopping rules with multi layer perceptrons." // Neural Networks, 2009. IJCNN 2009. International Joint Conference on. IEEE, 2009. public abstract class MovingWindowEarlyStoppingCriterion : EarlyStoppingCriterion diff --git a/src/Microsoft.ML.Data/Training/TrainerUtils.cs b/src/Microsoft.ML.Data/Training/TrainerUtils.cs index b2032bfc38..33d3d1490d 100644 --- a/src/Microsoft.ML.Data/Training/TrainerUtils.cs +++ b/src/Microsoft.ML.Data/Training/TrainerUtils.cs @@ -400,10 +400,10 @@ protected static IRowCursor CreateCursor(RoleMappedData data, CursOpt opt, IRand /// delegate of the cursor, indicating what additional options should be specified on subsequent /// passes over the data. The base implementation checks if any rows were skipped, and if none were /// skipped, it signals the context that it needn't bother with any filtering checks. - /// + /// /// Because the result will be "or"-red, a perfectly acceptable implementation is that this /// return the default , in which case the flags will not ever change. - /// + /// /// If the cursor was created with a signal delegate, the return value of this method will be sent /// to that delegate. /// diff --git a/src/Microsoft.ML.Data/Transforms/ColumnBindingsBase.cs b/src/Microsoft.ML.Data/Transforms/ColumnBindingsBase.cs index 58eee5430b..2347d2c679 100644 --- a/src/Microsoft.ML.Data/Transforms/ColumnBindingsBase.cs +++ b/src/Microsoft.ML.Data/Transforms/ColumnBindingsBase.cs @@ -324,17 +324,17 @@ protected ColumnBindingsBase(ISchema input, bool user, params string[] names) if (string.IsNullOrWhiteSpace(name)) { throw user ? -#pragma warning disable TLC_ContractsNameUsesNameof // Unfortunately, there is no base class for the columns bindings. +#pragma warning disable MSML_ContractsNameUsesNameof // Unfortunately, there is no base class for the columns bindings. Contracts.ExceptUserArg(standardColumnArgName, "New column needs a name") : -#pragma warning restore TLC_ContractsNameUsesNameof +#pragma warning restore MSML_ContractsNameUsesNameof Contracts.ExceptDecode("New column needs a name"); } if (_nameToInfoIndex.ContainsKey(name)) { throw user ? -#pragma warning disable TLC_ContractsNameUsesNameof // Unfortunately, there is no base class for the columns bindings. +#pragma warning disable MSML_ContractsNameUsesNameof // Unfortunately, there is no base class for the columns bindings. Contracts.ExceptUserArg(standardColumnArgName, "New column '{0}' specified multiple times", name) : -#pragma warning restore TLC_ContractsNameUsesNameof +#pragma warning restore MSML_ContractsNameUsesNameof Contracts.ExceptDecode("New column '{0}' specified multiple times", name); } _nameToInfoIndex.Add(name, iinfo); @@ -686,10 +686,10 @@ protected ManyToOneColumnBindingsBase(ManyToOneColumn[] column, ISchema input, F for (int j = 0; j < src.Length; j++) { Contracts.CheckUserArg(!string.IsNullOrWhiteSpace(src[j]), nameof(ManyToOneColumn.Source)); -#pragma warning disable TLC_ContractsNameUsesNameof // Unfortunately, there is no base class for the columns bindings. +#pragma warning disable MSML_ContractsNameUsesNameof // Unfortunately, there is no base class for the columns bindings. if (!input.TryGetColumnIndex(src[j], out srcIndices[j])) throw Contracts.ExceptUserArg(standardColumnArgName, "Source column '{0}' not found", src[j]); -#pragma warning restore TLC_ContractsNameUsesNameof +#pragma warning restore MSML_ContractsNameUsesNameof srcTypes[j] = input.GetColumnType(srcIndices[j]); var size = srcTypes[j].ValueCount; srcSize = size == 0 ? null : checked(srcSize + size); @@ -700,10 +700,10 @@ protected ManyToOneColumnBindingsBase(ManyToOneColumn[] column, ISchema input, F string reason = testTypes(srcTypes); if (reason != null) { -#pragma warning disable TLC_ContractsNameUsesNameof // Unfortunately, there is no base class for the columns bindings. +#pragma warning disable MSML_ContractsNameUsesNameof // Unfortunately, there is no base class for the columns bindings. throw Contracts.ExceptUserArg(standardColumnArgName, "Column '{0}' has invalid source types: {1}. Source types: '{2}'.", item.Name, reason, string.Join(", ", srcTypes.Select(type => type.ToString()))); -#pragma warning restore TLC_ContractsNameUsesNameof +#pragma warning restore MSML_ContractsNameUsesNameof } } Infos[i] = new ColInfo(srcSize.GetValueOrDefault(), srcIndices, srcTypes); @@ -861,7 +861,7 @@ public Func GetDependencies(Func predicate) } /// - /// Parsing utilities for converting between transform column argument objects and + /// Parsing utilities for converting between transform column argument objects and /// command line representations. /// public static class ColumnParsingUtils diff --git a/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs b/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs index c12dd8fad5..b2024cc18c 100644 --- a/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs @@ -55,7 +55,7 @@ public sealed class TaggedColumn public string Name; // The tag here (the key of the KeyValuePair) is the string that will be the prefix of the slot name - // in the output column. For non-vector columns, the slot name will be either the column name or the + // in the output column. For non-vector columns, the slot name will be either the column name or the // tag if it is non empty. For vector columns, the slot names will be 'ColumnName.SlotName' if the // tag is empty, 'Tag.SlotName' if tag is non empty, and simply the slot name if tag is non empty // and equal to the column name. diff --git a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs index 9a40f404ea..230cfbe680 100644 --- a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs @@ -313,7 +313,7 @@ private void GetSlotsMinMax(Column col, out int[] slotsMin, out int[] slotsMax) slotsMin[j] = range.Min; // There are two reasons for setting the max to int.MaxValue - 1: // 1. max is an index, so it has to be strictly less than int.MaxValue. - // 2. to prevent overflows when adding 1 to it. + // 2. to prevent overflows when adding 1 to it. slotsMax[j] = range.Max ?? int.MaxValue - 1; } Array.Sort(slotsMin, slotsMax); @@ -473,7 +473,7 @@ private void GetCategoricalSlotRangesCore(int iinfo, int[] slotsMin, int[] slots // Six possible ways a drop slot range interacts with categorical slots range. // - // +--------------Drop-------------+ + // +--------------Drop-------------+ // | | // // +---Drop---+ +---Drop---+ +---Drop---+ diff --git a/src/Microsoft.ML.Data/Transforms/GenerateNumberTransform.cs b/src/Microsoft.ML.Data/Transforms/GenerateNumberTransform.cs index 713f85f9df..cacd681141 100644 --- a/src/Microsoft.ML.Data/Transforms/GenerateNumberTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/GenerateNumberTransform.cs @@ -24,9 +24,9 @@ namespace Microsoft.ML.Runtime.Data { /// - /// This transform adds columns containing either random numbers distributed + /// This transform adds columns containing either random numbers distributed /// uniformly between 0 and 1 or an auto-incremented integer starting at zero. - /// It will be used in conjunction with a filter transform to create random + /// It will be used in conjunction with a filter transform to create random /// partitions of the data, used in cross validation. /// public sealed class GenerateNumberTransform : RowToRowTransformBase diff --git a/src/Microsoft.ML.Data/Transforms/HashTransform.cs b/src/Microsoft.ML.Data/Transforms/HashTransform.cs index 23ba5592b7..0519428284 100644 --- a/src/Microsoft.ML.Data/Transforms/HashTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/HashTransform.cs @@ -25,7 +25,7 @@ namespace Microsoft.ML.Runtime.Data /// /// This transform can hash either single valued columns or vector columns. For vector columns, - /// it hashes each slot separately. + /// it hashes each slot separately. /// It can hash either text values or key values. /// public sealed class HashTransform : OneToOneTransformBase, ITransformTemplate diff --git a/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs b/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs index 7a7e8fafda..d615b96894 100644 --- a/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs +++ b/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs @@ -265,7 +265,7 @@ public VBuffer GetMetadata() public void Add(int dstSlot, ValueGetter getter, ref T key) { - // REVIEW: I only call the getter if I determine I have to, but + // REVIEW: I only call the getter if I determine I have to, but // at the cost of passing along this getter and ref argument (as opposed // to just the argument). Is this really appropriate or helpful? Contracts.Assert(0 <= dstSlot && dstSlot < _slots); diff --git a/src/Microsoft.ML.Data/Transforms/KeyToValueTransform.cs b/src/Microsoft.ML.Data/Transforms/KeyToValueTransform.cs index 7c1fa19c10..997fa22d03 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToValueTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToValueTransform.cs @@ -85,7 +85,6 @@ public KeyToValueTransform(IHostEnvironment env, IDataView input, string name, s { } - /// /// Public constructor corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs b/src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs index e577b9370e..6cad82c127 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs @@ -542,7 +542,7 @@ public ImplOne(IHost host, TFloat scale, TFloat offset) { } - public new static ImplOne Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) + public static new ImplOne Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { host.Check(typeSrc.RawType == typeof(TFloat), "The column type must be R8."); List nz = null; @@ -605,7 +605,7 @@ public ImplVec(IHost host, TFloat[] scale, TFloat[] offset, int[] indicesNonZero { } - public new static ImplVec Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) + public static new ImplVec Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { host.Check(typeSrc.ItemType.RawType == typeof(TFloat), "The column type must be vector of R8."); int cv = Math.Max(1, typeSrc.VectorSize); @@ -867,7 +867,7 @@ public ImplOne(IHost host, TFloat mean, TFloat stddev, bool useLog) { } - public new static ImplOne Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) + public static new ImplOne Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { host.Check(typeSrc.RawType == typeof(TFloat), "The column type must be R8."); host.CheckValue(ctx, nameof(ctx)); @@ -932,7 +932,7 @@ public ImplVec(IHost host, TFloat[] mean, TFloat[] stddev, bool useLog) { } - public new static ImplVec Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) + public static new ImplVec Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { host.Check(typeSrc.ItemType.RawType == typeof(TFloat), "The column type must be vector of R8."); int cv = Math.Max(1, typeSrc.VectorSize); @@ -1051,7 +1051,7 @@ public ImplOne(IHost host, TFloat[] binUpperBounds, bool fixZero) Host.Assert(0 <= _offset & _offset <= 1); } - public new static ImplOne Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) + public static new ImplOne Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { host.Check(typeSrc.RawType == typeof(TFloat), "The column type must be R8."); host.CheckValue(ctx, nameof(ctx)); @@ -1133,7 +1133,7 @@ public ImplVec(IHost host, TFloat[][] binUpperBounds, bool fixZero) } } - public new static ImplVec Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) + public static new ImplVec Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { host.Check(typeSrc.ItemType.RawType == typeof(TFloat), "The column type must be vector of R8."); int cv = Math.Max(1, typeSrc.VectorSize); @@ -1280,7 +1280,7 @@ private static void ComputeScaleAndOffset(TFloat max, TFloat min, out TFloat sca // but infinities and NaN to NaN. // REVIEW: If min <= 0 and max >= 0, then why not fix zero for this slot and simply scale by 1 / max(abs(..))? // We could even be more aggressive about it, and fix zero if 0 < min < max <= 2 * min. - // Then the common case where features are in the range [1, N] (and integer valued) wouldn't subtract 1 every time.... + // Then the common case where features are in the range [1, N] (and integer valued) wouldn't subtract 1 every time.... if (!(max > min)) scale = offset = 0; else if ((scale = 1 / (max - min)) == 0) @@ -1302,7 +1302,7 @@ private static void ComputeScaleAndOffsetFixZero(TFloat max, TFloat min, out TFl // In the case where max <= min, the slot contains no useful information (since it is either constant, or // is all NaNs, or has no rows), so we force it to zero. // Note that setting scale to zero effectively maps finite values to zero, - // but infinities and NaN to NaN. + // but infinities and NaN to NaN. offset = 0; if (!(max > min)) scale = 0; @@ -1321,7 +1321,7 @@ public static void ComputeScaleAndOffset(Double mean, Double stddev, out TFloat // In the case where stdev==0, the slot contains no useful information (since it is constant), // so we force it to zero. Note that setting scale to zero effectively maps finite values to zero, - // but infinities and NaN to NaN. + // but infinities and NaN to NaN. if (stddev == 0) scale = offset = 0; else if ((scale = 1 / (TFloat)stddev) == 0) @@ -1338,7 +1338,7 @@ public static void ComputeScaleAndOffsetFixZero(Double mean, Double meanSquaredE // In the case where stdev==0, the slot contains no useful information (since it is constant), // so we force it to zero. Note that setting scale to zero effectively maps finite values to zero, - // but infinities and NaN to NaN. + // but infinities and NaN to NaN. offset = 0; if (meanSquaredError == 0) scale = 0; diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs b/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs index 4c6e1fb011..af94f31454 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs @@ -542,7 +542,7 @@ public ImplOne(IHost host, TFloat scale, TFloat offset) { } - public new static ImplOne Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) + public static new ImplOne Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { host.Check(typeSrc.RawType == typeof(TFloat), "The column type must be R4."); List nz = null; @@ -605,7 +605,7 @@ public ImplVec(IHost host, TFloat[] scale, TFloat[] offset, int[] indicesNonZero { } - public new static ImplVec Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) + public static new ImplVec Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { host.Check(typeSrc.ItemType.RawType == typeof(TFloat), "The column type must be vector of R4."); int cv = Math.Max(1, typeSrc.VectorSize); @@ -869,7 +869,7 @@ public ImplOne(IHost host, TFloat mean, TFloat stddev, bool useLog) { } - public new static ImplOne Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) + public static new ImplOne Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { host.Check(typeSrc.RawType == typeof(TFloat), "The column type must be R4."); host.CheckValue(ctx, nameof(ctx)); @@ -934,7 +934,7 @@ public ImplVec(IHost host, TFloat[] mean, TFloat[] stddev, bool useLog) { } - public new static ImplVec Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) + public static new ImplVec Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { host.Check(typeSrc.ItemType.RawType == typeof(TFloat), "The column type must be vector of R4."); int cv = Math.Max(1, typeSrc.VectorSize); @@ -1053,7 +1053,7 @@ public ImplOne(IHost host, TFloat[] binUpperBounds, bool fixZero) Host.Assert(0 <= _offset & _offset <= 1); } - public new static ImplOne Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) + public static new ImplOne Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { host.Check(typeSrc.RawType == typeof(TFloat), "The column type must be R4."); host.CheckValue(ctx, nameof(ctx)); @@ -1135,7 +1135,7 @@ public ImplVec(IHost host, TFloat[][] binUpperBounds, bool fixZero) } } - public new static ImplVec Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) + public static new ImplVec Create(ModelLoadContext ctx, IHost host, ColumnType typeSrc) { host.Check(typeSrc.ItemType.RawType == typeof(TFloat), "The column type must be vector of R4."); int cv = Math.Max(1, typeSrc.VectorSize); @@ -1282,7 +1282,7 @@ private static void ComputeScaleAndOffset(TFloat max, TFloat min, out TFloat sca // but infinities and NaN to NaN. // REVIEW: If min <= 0 and max >= 0, then why not fix zero for this slot and simply scale by 1 / max(abs(..))? // We could even be more aggressive about it, and fix zero if 0 < min < max <= 2 * min. - // Then the common case where features are in the range [1, N] (and integer valued) wouldn't subtract 1 every time.... + // Then the common case where features are in the range [1, N] (and integer valued) wouldn't subtract 1 every time.... if (!(max > min)) scale = offset = 0; else if ((scale = 1 / (max - min)) == 0) @@ -1304,7 +1304,7 @@ private static void ComputeScaleAndOffsetFixZero(TFloat max, TFloat min, out TFl // In the case where max <= min, the slot contains no useful information (since it is either constant, or // is all NaNs, or has no rows), so we force it to zero. // Note that setting scale to zero effectively maps finite values to zero, - // but infinities and NaN to NaN. + // but infinities and NaN to NaN. offset = 0; if (!(max > min)) scale = 0; @@ -1323,7 +1323,7 @@ public static void ComputeScaleAndOffset(Double mean, Double stddev, out TFloat // In the case where stdev==0, the slot contains no useful information (since it is constant), // so we force it to zero. Note that setting scale to zero effectively maps finite values to zero, - // but infinities and NaN to NaN. + // but infinities and NaN to NaN. if (stddev == 0) scale = offset = 0; else if ((scale = 1 / (TFloat)stddev) == 0) @@ -1340,7 +1340,7 @@ public static void ComputeScaleAndOffsetFixZero(Double mean, Double meanSquaredE // In the case where stdev==0, the slot contains no useful information (since it is constant), // so we force it to zero. Note that setting scale to zero effectively maps finite values to zero, - // but infinities and NaN to NaN. + // but infinities and NaN to NaN. offset = 0; if (meanSquaredError == 0) scale = 0; diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeTransform.cs b/src/Microsoft.ML.Data/Transforms/NormalizeTransform.cs index 7a4738d5e4..bf9d77ed49 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeTransform.cs @@ -170,7 +170,7 @@ private NormalizeTransform(IHost host, ArgumentsBase args, IDataView input, while (cursor.MoveNext()) { // If the row has bad values, the good values are still being used for training. - // The comparisons in the code below are arranged so that NaNs in the input are not recorded. + // The comparisons in the code below are arranged so that NaNs in the input are not recorded. // REVIEW: Should infinities and/or NaNs be filtered before the normalization? Should we not record infinities for min/max? // Currently, infinities are recorded and will result in zero scale which in turn will result in NaN output for infinity input. bool any = false; @@ -241,7 +241,7 @@ private NormalizeTransform(IHost host, ModelLoadContext ctx, IDataView input) for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { var typeSrc = Infos[iinfo].TypeSrc; - // REVIEW: this check (was even an assert) here is too late. Apparently, no-one tests compatibility + // REVIEW: this check (was even an assert) here is too late. Apparently, no-one tests compatibility // of the types at deserialization (aka re-application), which is a bug. if (typeSrc.ValueCount == 0) throw Host.Except("Column '{0}' is a vector of variable size, which is not supported for normalizers", Infos[iinfo].Name); diff --git a/src/Microsoft.ML.Data/Transforms/PerGroupTransformBase.cs b/src/Microsoft.ML.Data/Transforms/PerGroupTransformBase.cs index 35f37d39a8..7b42008b15 100644 --- a/src/Microsoft.ML.Data/Transforms/PerGroupTransformBase.cs +++ b/src/Microsoft.ML.Data/Transforms/PerGroupTransformBase.cs @@ -308,7 +308,7 @@ protected override bool MoveNextCore() if (!_newGroupInInputCursorDel()) return true; - // If this is the first step, we need to move next on _groupCursor. Otherwise, the position of _groupCursor is + // If this is the first step, we need to move next on _groupCursor. Otherwise, the position of _groupCursor is // at the start of the next group. if (_groupCursor.State == CursorState.NotStarted) { diff --git a/src/Microsoft.ML.Data/Transforms/RangeFilter.cs b/src/Microsoft.ML.Data/Transforms/RangeFilter.cs index 589a635aff..142779dee2 100644 --- a/src/Microsoft.ML.Data/Transforms/RangeFilter.cs +++ b/src/Microsoft.ML.Data/Transforms/RangeFilter.cs @@ -184,9 +184,9 @@ public override void Save(ModelSaveContext ctx) // int: id of column name // double: min // double: max - // byte: complement - // byte: includeMin - // byte: includeMax + // byte: complement + // byte: includeMin + // byte: includeMax ctx.Writer.Write(sizeof(Float)); ctx.SaveNonEmptyString(Source.Schema.GetColumnName(_index)); Host.Assert(_min < _max); diff --git a/src/Microsoft.ML.Data/Transforms/ShuffleTransform.cs b/src/Microsoft.ML.Data/Transforms/ShuffleTransform.cs index 5080208335..3940bbe979 100644 --- a/src/Microsoft.ML.Data/Transforms/ShuffleTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ShuffleTransform.cs @@ -260,7 +260,7 @@ protected override IRowCursor GetRowCursorCore(Func predicate, IRando // The desired functionality is to support some permutations of whether we allow // shuffling at the source level, or not. - // + // // Pool | Source | Options // -----------+----------+-------- // Randonly | Never | poolOnly+ @@ -301,14 +301,14 @@ public override IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolid /// over a pool of size P. Logically, externally, the cursor acts as if you have this pool /// P and whenever you randomly sample and yield a row from it, that row is then discarded /// and replaced with the next row from the input source cursor. - /// + /// /// It would also be possible to implement in a way that cleaves closely to this logical /// interpretation, but this would be inefficient. We instead have a buffer of larger size /// P+B. A consumer (running presumably in the main thread) sampling and fetching items and a /// producer (running in a task, which may be running in a different thread) filling the buffer /// with items to sample, utilizing this extra space to enable an efficient possibly /// multithreaded scheme. - /// + /// /// The consumer, for its part, at any given time "owns" a contiguous portion of this buffer. /// (A contiguous portion of this buffer we consider to be able to wrap around, from the end /// to the beginning. The buffer is accessed in a "circular" fashion.) Consider that this portion @@ -319,18 +319,18 @@ public override IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolid /// rows ready to be sampled in future iterations, but that we are not sampling yet (in order /// to behave equivalently to the simple logical model of at any given time sampling P items). /// The producer owns the complement of the portion owned by the consumer. - /// + /// /// As the cursor progresses, the producer fills in successive items in its portion of the /// buffer it owns, and passes them off to the consumer (not one item at a time, but rather in /// batches, to keep down the amount of intertask communication). The consumer in addition to /// taking ownership of these items, will also periodically pass dead items back to the producer /// (again, not one dead item at a time, but in batches when the number of dead items reaches /// a certain threshold). - /// + /// /// This communication is accomplished using a pair of BufferBlock instances, through which /// the producer and consumer are notified how many additional items they can take ownership /// of. - /// + /// /// As the consumer "selects" a row from the pool of selectable rows each time it moves to /// the next row, this randomly selected row is considered to be the "first" index, since this /// makes its subsequent transition to being a dead row much simpler. It would be inefficient to @@ -338,7 +338,7 @@ public override IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolid /// first, of course, so one rather swaps an index, so that these nicely behavior contiguous /// circular indices, get mapped in an index within the buffers, through a permutation maintained /// in the pipeIndices array. - /// + /// /// The result is something functionally equivalent to but but considerably faster than the /// simple implementation described in the first paragraph. /// diff --git a/src/Microsoft.ML.Data/Transforms/SkipTakeFilter.cs b/src/Microsoft.ML.Data/Transforms/SkipTakeFilter.cs index bfd3522f73..2adb17258e 100644 --- a/src/Microsoft.ML.Data/Transforms/SkipTakeFilter.cs +++ b/src/Microsoft.ML.Data/Transforms/SkipTakeFilter.cs @@ -164,7 +164,7 @@ public override void Save(ModelSaveContext ctx) public override bool CanShuffle { get { return false; } } /// - /// Returns the computed count of rows remaining after skip and take operation. + /// Returns the computed count of rows remaining after skip and take operation. /// Returns null if count is unknown. /// public override long? GetRowCount(bool lazy = true) diff --git a/src/Microsoft.ML.Data/Transforms/TermTransform.cs b/src/Microsoft.ML.Data/Transforms/TermTransform.cs index 1e48a5d1e9..6eaf48e995 100644 --- a/src/Microsoft.ML.Data/Transforms/TermTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/TermTransform.cs @@ -612,10 +612,10 @@ private TermTransform(IHost host, ModelLoadContext ctx, IDataView input) termMap[i] = TermMap.TextImpl.Create(c, host); } }); -#pragma warning disable TLC_NoMessagesForLoadContext // Vaguely useful. +#pragma warning disable MSML_NoMessagesForLoadContext // Vaguely useful. if (!b) throw Host.ExceptDecode("Missing {0} model", dir); -#pragma warning restore TLC_NoMessagesForLoadContext +#pragma warning restore MSML_NoMessagesForLoadContext _termMap = new BoundTermMap[cinfo]; for (int i = 0; i < cinfo; ++i) _termMap[i] = termMap[i].Bind(this, i); @@ -719,7 +719,7 @@ protected override bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, var node = ctx.CreateNode(opType, srcVariableName, dstVariableName, ctx.GetNodeName(opType)); node.AddAttribute("classes_strings", terms.DenseValues()); node.AddAttribute("default_int64", -1); - //default_string needs to be an empty string but there is a BUG in Lotus that + //default_string needs to be an empty string but there is a BUG in Lotus that //throws a validation error when default_string is empty. As a work around, set //default_string to a space. node.AddAttribute("default_string", " "); diff --git a/src/Microsoft.ML.Data/Transforms/TermTransformImpl.cs b/src/Microsoft.ML.Data/Transforms/TermTransformImpl.cs index a81575b9c9..9a43dc5517 100644 --- a/src/Microsoft.ML.Data/Transforms/TermTransformImpl.cs +++ b/src/Microsoft.ML.Data/Transforms/TermTransformImpl.cs @@ -447,10 +447,10 @@ private enum MapType : byte /// type. The input type, whatever it is, must have as its input item /// type, and will produce either , or a vector type with that output /// type if the input was a vector. - /// + /// /// Note that instances of this class can be shared among multiple /// instances. To associate this with a particular transform, use the method. - /// + /// /// These are the immutable and serializable analogs to the used in /// training. /// diff --git a/src/Microsoft.ML.Data/Transforms/TransformBase.cs b/src/Microsoft.ML.Data/Transforms/TransformBase.cs index 263a3cf4ca..2d9cedb17b 100644 --- a/src/Microsoft.ML.Data/Transforms/TransformBase.cs +++ b/src/Microsoft.ML.Data/Transforms/TransformBase.cs @@ -596,7 +596,7 @@ public void SaveAsOnnx(OnnxContext ctx) /// /// Called by . Should be implemented by subclasses that return - /// true from . Will be called + /// true from . Will be called /// /// The context. Can be used to declare cells, access other information, /// and whatnot. This method should not actually, however, declare the variable corresponding diff --git a/src/Microsoft.ML.Data/Utilities/ApplyTransformUtils.cs b/src/Microsoft.ML.Data/Utilities/ApplyTransformUtils.cs index e0a138fc91..7d0695083b 100644 --- a/src/Microsoft.ML.Data/Utilities/ApplyTransformUtils.cs +++ b/src/Microsoft.ML.Data/Utilities/ApplyTransformUtils.cs @@ -73,7 +73,7 @@ public static IDataView ApplyAllTransformsToData(IHostEnvironment env, IDataView // Backtrack the chain until we reach a chain start or a non-transform. // REVIEW: we 'unwrap' the composite data loader here and step through its pipeline. - // It's probably more robust to make CompositeDataLoader not even be an IDataView, this + // It's probably more robust to make CompositeDataLoader not even be an IDataView, this // would force the user to do the right thing and unwrap on his end. var cdl = chain as CompositeDataLoader; if (cdl != null) diff --git a/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs b/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs index 900778cd31..5b99b173fa 100644 --- a/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs +++ b/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs @@ -79,7 +79,7 @@ public static IDataView LoadPipeline(IHostEnvironment env, RepositoryReader rep, } /// - /// Loads all transforms from the model stream, applies them sequentially to the provided data, and returns + /// Loads all transforms from the model stream, applies them sequentially to the provided data, and returns /// the resulting data. If there are no transforms in the stream, or if there's no DataLoader stream at all /// (this can happen if the model is produced by old TL), returns the source data. /// If the DataLoader stream is invalid, throws. @@ -101,7 +101,7 @@ public static IDataView LoadTransforms(IHostEnvironment env, IDataView data, Str } /// - /// Loads all transforms from the model stream, applies them sequentially to the provided data, and returns + /// Loads all transforms from the model stream, applies them sequentially to the provided data, and returns /// the resulting data. If there are no transforms in the stream, or if there's no DataLoader stream at all /// (this can happen if the model is produced by old TL), returns the source data. /// If the DataLoader stream is invalid, throws. @@ -157,8 +157,8 @@ public static ModelSaveContext GetDataModelSavingContext(RepositoryWriter rep) } /// - /// Loads data view (loader and transforms) from if is set to true, - /// otherwise loads loader only. + /// Loads data view (loader and transforms) from if is set to true, + /// otherwise loads loader only. /// public static IDataLoader LoadLoader(IHostEnvironment env, RepositoryReader rep, IMultiStreamSource files, bool loadTransforms) { @@ -188,7 +188,7 @@ public static IDataLoader LoadLoader(IHostEnvironment env, RepositoryReader rep, } /// - /// REVIEW: consider adding an overload that returns + /// REVIEW: consider adding an overload that returns /// Loads optionally feature names from the repository directory. /// Returns false iff no stream was found for feature names, iff result is set to null. /// @@ -342,7 +342,7 @@ public static RoleMappedSchema LoadRoleMappedSchemaOrNull(IHostEnvironment env, } /// - /// The RepositoryStreamWrapper is a IMultiStreamSource wrapper of a Stream object in a repository. + /// The RepositoryStreamWrapper is a IMultiStreamSource wrapper of a Stream object in a repository. /// It is used to deserialize RoleMappings.txt from a model zip file. /// private sealed class RepositoryStreamWrapper : IMultiStreamSource @@ -382,7 +382,7 @@ public Stream Open(int index) public TextReader OpenTextReader(int index) { return new StreamReader(Open(index)); } /// - /// A custom entry stream wrapper that includes custom dispose logic for disposing the entry + /// A custom entry stream wrapper that includes custom dispose logic for disposing the entry /// when the stream is disposed. /// private sealed class EntryStream : Stream diff --git a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs index cd74463291..64b510a655 100644 --- a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs +++ b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs @@ -91,7 +91,7 @@ public ValueGetter> SubsetGetter(ValueGetter> getter) } /// - /// Drops slots from src and populates the dst with the resulting vector. Slots are + /// Drops slots from src and populates the dst with the resulting vector. Slots are /// dropped based on min and max slots that were passed at the constructor. /// public void DropSlots(ref VBuffer src, ref VBuffer dst) diff --git a/src/Microsoft.ML.Data/Utils/IntSequencePool.cs b/src/Microsoft.ML.Data/Utils/IntSequencePool.cs index 3efb038e6e..e27b297025 100644 --- a/src/Microsoft.ML.Data/Utils/IntSequencePool.cs +++ b/src/Microsoft.ML.Data/Utils/IntSequencePool.cs @@ -173,7 +173,7 @@ private int GetCore(uint[] sequence, int min, int lim, out uint hash) Contracts.Assert(ibCur <= ibLim); if (i >= lim) { - // Need to make sure that we have reached the end of the sequence in the pool at the + // Need to make sure that we have reached the end of the sequence in the pool at the // same time that we reached the end of sequence. if (ibCur == ibLim) return idCur; diff --git a/src/Microsoft.ML.Data/Utils/LossFunctions.cs b/src/Microsoft.ML.Data/Utils/LossFunctions.cs index 7ff47a4f9e..7df431c3d1 100644 --- a/src/Microsoft.ML.Data/Utils/LossFunctions.cs +++ b/src/Microsoft.ML.Data/Utils/LossFunctions.cs @@ -124,9 +124,9 @@ public Float ComputeDualUpdateInvariant(Float scaledFeaturesNormSquared) return 1 / Math.Max(1, (Float)0.25 + scaledFeaturesNormSquared); } - // REVIEW: this dual update uses a different log loss formulation, + // REVIEW: this dual update uses a different log loss formulation, //although the two are equivalents if the labels are restricted to 0 and 1 - //Need to update so that it can handle probability label and true to the + //Need to update so that it can handle probability label and true to the //definition, which is a smooth loss function public Float DualUpdate(Float output, Float label, Float dual, Float invariant, int maxNumThreads) { diff --git a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs index 6366321c48..ae6c2adac6 100644 --- a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs +++ b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs @@ -38,7 +38,7 @@ public static RoleMappedData SelectFeatures(IHost host, RoleMappedData data, Bit } /// - /// Fill dst with values selected from src if the indices of the src values are set in includedIndices, + /// Fill dst with values selected from src if the indices of the src values are set in includedIndices, /// otherwise assign default(T). The length of dst will be equal to src.Length. /// public static void SelectFeatures(ref VBuffer src, BitArray includedIndices, int cardinality, ref VBuffer dst) diff --git a/src/Microsoft.ML.Ensemble/EntryPoints/CreateEnsemble.cs b/src/Microsoft.ML.Ensemble/EntryPoints/CreateEnsemble.cs index f512114bbb..a9d7983adf 100644 --- a/src/Microsoft.ML.Ensemble/EntryPoints/CreateEnsemble.cs +++ b/src/Microsoft.ML.Ensemble/EntryPoints/CreateEnsemble.cs @@ -316,10 +316,10 @@ private static TOut CreatePipelineEnsemble(IHostEnvironment env, IPredicto /// /// This method takes a as input, saves it as an in-memory - /// and returns two arrays indexed by the entries in the zip: + /// and returns two arrays indexed by the entries in the zip: /// 1. An array of byte arrays, containing the byte sequences of each entry. /// 2. An array of strings, containing the name of each entry. - /// + /// /// This method is used for comparing pipelines. Its outputs can be passed to /// to check if this pipeline is identical to another pipeline. /// diff --git a/src/Microsoft.ML.Ensemble/PipelineEnsemble.cs b/src/Microsoft.ML.Ensemble/PipelineEnsemble.cs index 3cf30a3211..3ac78ed91e 100644 --- a/src/Microsoft.ML.Ensemble/PipelineEnsemble.cs +++ b/src/Microsoft.ML.Ensemble/PipelineEnsemble.cs @@ -600,7 +600,7 @@ protected static int CheckLabelColumn(IHostEnvironment env, IPredictorModel[] mo return Utils.MarshalInvoke(CheckKeyLabelColumnCore, mdType.ItemType.RawType, env, models, labelType.AsKey, schema, labelInfo.Index, mdType); } - // When the label column is not a key, we check that the number of classes is the same for all the predictors, by checking the + // When the label column is not a key, we check that the number of classes is the same for all the predictors, by checking the // OutputType property of the IValueMapper. // If any of the predictors do not implement IValueMapper we throw an exception. Returns the class count. private static int CheckNonKeyLabelColumnCore(IHostEnvironment env, IPredictor pred, IPredictorModel[] models, bool isBinary, ColumnType labelType) @@ -672,13 +672,13 @@ private static bool AreEqual(ref VBuffer v1, ref VBuffer v2) } /// - /// This method outputs a Key-Value Pair (kvp) per model in the ensemble. + /// This method outputs a Key-Value Pair (kvp) per model in the ensemble. /// * The key is the model number such as "Partition model 0 summary". If the model implements /// then this string is followed by the first line of the model summary (the first line contains a description specific to the /// model kind, such as "Feature gains" for FastTree or "Feature weights" for linear). /// * The value: /// - If the model implements then the value is the list of Key-Value pairs - /// containing the detailed summary for that model (for example, linear models have a list containing kvps where the keys + /// containing the detailed summary for that model (for example, linear models have a list containing kvps where the keys /// are the feature names and the values are the weights. FastTree has a similar list with the feature gains as values). /// - If the model does not implement but does implement , /// the value is a string containing the summary of that model. diff --git a/src/Microsoft.ML.Ensemble/Trainer/EnsemblePredictorBase.cs b/src/Microsoft.ML.Ensemble/Trainer/EnsemblePredictorBase.cs index 9f2ebfb804..3d5c871117 100644 --- a/src/Microsoft.ML.Ensemble/Trainer/EnsemblePredictorBase.cs +++ b/src/Microsoft.ML.Ensemble/Trainer/EnsemblePredictorBase.cs @@ -152,7 +152,7 @@ public void SaveSummary(TextWriter writer, RoleMappedSchema schema) writer.WriteLine(";; Partition model {0}", i); writer.WriteLine(";; Weight={0}", (Weights != null ? Weights[i] : 1)); - // REVIEW: The featureName Collection names may vary for different base learners. + // REVIEW: The featureName Collection names may vary for different base learners. // How do we get the right collection for the base learners? if (Models[i].Predictor is ICanSaveSummary summaryModel) summaryModel.SaveSummary(writer, schema); diff --git a/src/Microsoft.ML.FastTree/BinFile/IniFileParserInterface.cs b/src/Microsoft.ML.FastTree/BinFile/IniFileParserInterface.cs index c4b1af9a7c..994b711510 100644 --- a/src/Microsoft.ML.FastTree/BinFile/IniFileParserInterface.cs +++ b/src/Microsoft.ML.FastTree/BinFile/IniFileParserInterface.cs @@ -42,15 +42,15 @@ private static class Native [DllImport(DllName, CharSet = CharSet.Ansi, EntryPoint = "FeatureMapGetFeatureName")] [return: MarshalAs(UnmanagedType.U1)] - public unsafe static extern bool GetFeatureName(IntPtr pObject, UInt32 featureIndex, byte[] buffer, UInt32 sizeOfBuffer, IntPtr resultLength); + public static extern unsafe bool GetFeatureName(IntPtr pObject, UInt32 featureIndex, byte[] buffer, UInt32 sizeOfBuffer, IntPtr resultLength); [DllImport(DllName, CharSet = CharSet.Ansi, EntryPoint = "InputExtractorGetInputName")] [return: MarshalAs(UnmanagedType.U1)] - public unsafe static extern bool GetInputName(IntPtr pObject, UInt32 featureIndex, byte[] buffer, UInt32 sizeOfBuffer, IntPtr resultLength); + public static extern unsafe bool GetInputName(IntPtr pObject, UInt32 featureIndex, byte[] buffer, UInt32 sizeOfBuffer, IntPtr resultLength); [DllImport(DllName, CharSet = CharSet.Ansi)] [return: MarshalAs(UnmanagedType.U1)] - public unsafe static extern bool GetSectionContent(IntPtr pObject, string sectionName, byte[] buffer, UInt32 sizeOfBuffer, IntPtr resultLength); + public static extern unsafe bool GetSectionContent(IntPtr pObject, string sectionName, byte[] buffer, UInt32 sizeOfBuffer, IntPtr resultLength); [DllImport(DllName, EntryPoint = "InputExtractorGetInputCount")] public static extern UInt32 GetInputCount(IntPtr pObject); @@ -59,17 +59,17 @@ private static class Native public static extern IntPtr GetInput(IntPtr pObject, UInt32 index); [DllImport(DllName, EntryPoint = "InputGetFeatures")] - public static unsafe extern void GetInputFeatures(IntPtr pInput, UInt32[] features, UInt32 sizeOfFeatures, out UInt32 featureCount); + public static extern unsafe void GetInputFeatures(IntPtr pInput, UInt32[] features, UInt32 sizeOfFeatures, out UInt32 featureCount); [DllImport(DllName, EntryPoint = "InputIsCopy")] [return: MarshalAs(UnmanagedType.U1)] - public unsafe static extern bool IsCopyInput(IntPtr pInput); + public static extern unsafe bool IsCopyInput(IntPtr pInput); [DllImport(DllName, EntryPoint = "InputEvaluate")] - public static unsafe extern double EvaluateInput(IntPtr pInput, UInt32* input); + public static extern unsafe double EvaluateInput(IntPtr pInput, UInt32* input); [DllImport(DllName, EntryPoint = "InputEvaluateMany")] - public static unsafe extern void EvaluateMany(IntPtr pInput, UInt32*[] inputs, double* outputs, UInt32 count); + public static extern unsafe void EvaluateMany(IntPtr pInput, UInt32*[] inputs, double* outputs, UInt32 count); [DllImport(DllName, EntryPoint = "InputExtractorGetFeatureMap")] public static extern IntPtr GetFeatureMap(IntPtr pExtractor); diff --git a/src/Microsoft.ML.FastTree/Dataset/DenseIntArray.cs b/src/Microsoft.ML.FastTree/Dataset/DenseIntArray.cs index 508a6a5229..ca98c91d2d 100644 --- a/src/Microsoft.ML.FastTree/Dataset/DenseIntArray.cs +++ b/src/Microsoft.ML.FastTree/Dataset/DenseIntArray.cs @@ -70,18 +70,18 @@ public override IntArray[] Split(int[][] assignment) #if USE_FASTTREENATIVE [DllImport("FastTreeNative", CallingConvention = CallingConvention.StdCall)] - private unsafe static extern int C_Sumup_float( + private static extern unsafe int C_Sumup_float( int numBits, byte* pData, int* pIndices, float* pSampleOutputs, double* pSampleOutputWeights, FloatType* pSumTargetsByBin, double* pSumTargets2ByBin, int* pCountByBin, int totalCount, double totalSampleOutputs, double totalSampleOutputWeights); [DllImport("FastTreeNative", CallingConvention = CallingConvention.StdCall)] - private unsafe static extern int C_Sumup_double( + private static extern unsafe int C_Sumup_double( int numBits, byte* pData, int* pIndices, double* pSampleOutputs, double* pSampleOutputWeights, FloatType* pSumTargetsByBin, double* pSumTargets2ByBin, int* pCountByBin, int totalCount, double totalSampleOutputs, double totalSampleOutputWeights); - protected unsafe static void SumupCPlusPlusDense(SumupInputData input, FeatureHistogram histogram, + protected static unsafe void SumupCPlusPlusDense(SumupInputData input, FeatureHistogram histogram, byte* data, int numBits) { using (Timer.Time(TimerEvent.SumupCppDense)) diff --git a/src/Microsoft.ML.FastTree/Dataset/FeatureFlock.cs b/src/Microsoft.ML.FastTree/Dataset/FeatureFlock.cs index 9b5bf9b9ae..642c2349e8 100644 --- a/src/Microsoft.ML.FastTree/Dataset/FeatureFlock.cs +++ b/src/Microsoft.ML.FastTree/Dataset/FeatureFlock.cs @@ -47,7 +47,7 @@ public PerBinStats(Double sumTargets, Double sumWeights, int count) /// These objects are stateful, reusable objects that enable the collection of sufficient /// stats per feature flock, per node or leaf of a tree, to enable it to find the "best" /// splits. - /// + /// /// Each instance of this corresponds to a single flock, but multiple of these will be created /// per flock. Note that feature indices, whenever present, refer to the feature within the /// particular flock the same as they do with . @@ -176,7 +176,7 @@ public void Subtract(SufficientStatsBase other) protected abstract double GetBinGradient(int featureIndex, double bias); /// - /// Get a fullcopy of histogram for one sub feature. + /// Get a fullcopy of histogram for one sub feature. /// public void CopyFeatureHistogram(int subfeatureIndex, ref PerBinStats[] hist) { @@ -919,7 +919,7 @@ public void FillSplitCandidatesCategoricalNeighborBundling(LeastSquaresRegressio { var binStats = virtualBins[i]; catFeatureCount += 1 + binStats.SubFeatures.Length; - + sumGTTargets += binStats.SumTargets; gtCount += binStats.Count; docsInCurrentGroup += binStats.Count; @@ -1039,7 +1039,7 @@ protected sealed override void SubtractCore(SufficientStatsBase other) /// A feature flock is a collection of features, grouped together because storing the /// features and performing the key operations on them in a collection can be done /// more efficiently than if they were stored as separate features. - /// + /// /// Since this is a collection of features, feature specific quantities and methods /// will have a feature index parameter. Note that this index is always, for every /// flock, from 0 up to but not including . Now, @@ -1132,7 +1132,7 @@ public virtual IIntArrayForwardIndexer GetIndexer(int featureIndex) public abstract double[] BinUpperBounds(int featureIndex); /// - /// If you need to implement you can use + /// If you need to implement you can use /// . This will be slower than a /// specialized implementation but is at least a useful shim. /// @@ -1216,10 +1216,10 @@ internal abstract class SinglePartitionedIntArrayFlockBase : FeatureF /// /// Imagine we have a six row dataset, with two features, which if stored separately in, /// say, a , would have bin values as follows. - /// + /// /// f0 = { 0, 1, 0, 0, 2, 0} /// f1 = { 0, 0, 1, 0, 0, 1} - /// + /// /// These two are a candidate for a , because they never both /// have a non-zero bin value for any row. Then, in order to represent this in this feature, /// we would pass in this value for the : @@ -1231,18 +1231,18 @@ internal abstract class SinglePartitionedIntArrayFlockBase : FeatureF /// what feature is which can be reconstructed from , which /// for each feature specifies the range in corresponding to the /// "logical" bin value for that feature starting from 1. - /// + /// /// Note that it would also have been legal for to be /// larger than the actual observed range, e.g., it could have been: /// = { 1, 5, 8} /// or something. This could happen if binning happened over a different dataset from the data /// being represented right now, for example, but this is a more complex case. - /// + /// /// The would contain the upper bounds for both of these features, /// which would be arrays large enough so that the maximum value of the logical bin for each feature /// in the flock could index it. (So in this example, the first bin upper bound would be at least /// length 3, and the second at least length 2.) - /// + /// /// The indicates if the flock is a categorical feature. /// protected SinglePartitionedIntArrayFlockBase(TIntArray bins, int[] hotFeatureStarts, double[][] binUpperBounds, bool categorical = false) @@ -1264,19 +1264,19 @@ protected SinglePartitionedIntArrayFlockBase(TIntArray bins, int[] hotFeatureSta Contracts.Assert(AllBinUpperBounds.Select((b, f) => HotFeatureStarts[f + 1] - HotFeatureStarts[f] + 1 == b.Length).All(i => i)); } - public override sealed double[] BinUpperBounds(int featureIndex) + public sealed override double[] BinUpperBounds(int featureIndex) { Contracts.Assert(0 <= featureIndex && featureIndex < Count); return AllBinUpperBounds[featureIndex]; } - public override sealed double Trust(int featureIndex) + public sealed override double Trust(int featureIndex) { Contracts.Assert(0 <= featureIndex && featureIndex < Count); return 1; } - public override sealed int BinCount(int featureIndex) + public sealed override int BinCount(int featureIndex) { Contracts.Assert(0 <= featureIndex && featureIndex < Count); return AllBinUpperBounds[featureIndex].Length; diff --git a/src/Microsoft.ML.FastTree/Dataset/SegmentIntArray.cs b/src/Microsoft.ML.FastTree/Dataset/SegmentIntArray.cs index 02ae0f2c84..1c29e4582d 100644 --- a/src/Microsoft.ML.FastTree/Dataset/SegmentIntArray.cs +++ b/src/Microsoft.ML.FastTree/Dataset/SegmentIntArray.cs @@ -428,7 +428,7 @@ public static void SegmentFindOptimalCost(uint[] array, int len, int bitsNeeded, } } - public unsafe static void SegmentFindOptimalPath7(uint[] array, int len, out long bits, out int transitions) + public static unsafe void SegmentFindOptimalPath7(uint[] array, int len, out long bits, out int transitions) { long b = 0; int t = 0; @@ -441,7 +441,7 @@ public unsafe static void SegmentFindOptimalPath7(uint[] array, int len, out lon transitions = t; } - public unsafe static void SegmentFindOptimalPath15(uint[] array, int len, out long bits, out int transitions) + public static unsafe void SegmentFindOptimalPath15(uint[] array, int len, out long bits, out int transitions) { long b = 0; int t = 0; @@ -454,7 +454,7 @@ public unsafe static void SegmentFindOptimalPath15(uint[] array, int len, out lo transitions = t; } - public unsafe static void SegmentFindOptimalPath21(uint[] array, int len, out long bits, out int transitions) + public static unsafe void SegmentFindOptimalPath21(uint[] array, int len, out long bits, out int transitions) { long b = 0; int t = 0; @@ -467,7 +467,7 @@ public unsafe static void SegmentFindOptimalPath21(uint[] array, int len, out lo transitions = t; } - public unsafe static void SegmentFindOptimalCost15(uint[] array, int len, out long bits) + public static unsafe void SegmentFindOptimalCost15(uint[] array, int len, out long bits) { long b = 0; fixed (uint* pArray = array) @@ -478,7 +478,7 @@ public unsafe static void SegmentFindOptimalCost15(uint[] array, int len, out lo bits = b; } - public unsafe static void SegmentFindOptimalCost31(uint[] array, int len, out long bits) + public static unsafe void SegmentFindOptimalCost31(uint[] array, int len, out long bits) { long b = 0; fixed (uint* pArray = array) @@ -491,29 +491,29 @@ public unsafe static void SegmentFindOptimalCost31(uint[] array, int len, out lo #pragma warning disable TLC_GeneralName // Externs follow their own rules. [DllImport("FastTreeNative", CallingConvention = CallingConvention.StdCall, CharSet = CharSet.Ansi)] - private unsafe static extern void C_SegmentFindOptimalPath21(uint* valv, int valc, long* pBits, int* pTransitions); + private static extern unsafe void C_SegmentFindOptimalPath21(uint* valv, int valc, long* pBits, int* pTransitions); [DllImport("FastTreeNative", CallingConvention = CallingConvention.StdCall, CharSet = CharSet.Ansi)] - private unsafe static extern void C_SegmentFindOptimalPath15(uint* valv, int valc, long* pBits, int* pTransitions); + private static extern unsafe void C_SegmentFindOptimalPath15(uint* valv, int valc, long* pBits, int* pTransitions); [DllImport("FastTreeNative", CallingConvention = CallingConvention.StdCall, CharSet = CharSet.Ansi)] - private unsafe static extern void C_SegmentFindOptimalPath7(uint* valv, int valc, long* pBits, int* pTransitions); + private static extern unsafe void C_SegmentFindOptimalPath7(uint* valv, int valc, long* pBits, int* pTransitions); [DllImport("FastTreeNative", CallingConvention = CallingConvention.StdCall, CharSet = CharSet.Ansi)] - private unsafe static extern void C_SegmentFindOptimalCost15(uint* valv, int valc, long* pBits); + private static extern unsafe void C_SegmentFindOptimalCost15(uint* valv, int valc, long* pBits); [DllImport("FastTreeNative", CallingConvention = CallingConvention.StdCall, CharSet = CharSet.Ansi)] - private unsafe static extern void C_SegmentFindOptimalCost31(uint* valv, int valc, long* pBits); + private static extern unsafe void C_SegmentFindOptimalCost31(uint* valv, int valc, long* pBits); [DllImport("FastTreeNative", CallingConvention = CallingConvention.StdCall)] - private unsafe static extern int C_SumupSegment_float( + private static extern unsafe int C_SumupSegment_float( uint* pData, byte* pSegType, int* pSegLength, int* pIndices, float* pSampleOutputs, double* pSampleOutputWeights, float* pSumTargetsByBin, double* pSumWeightsByBin, int* pCountByBin, int totalCount, double totalSampleOutputs); [DllImport("FastTreeNative", CallingConvention = CallingConvention.StdCall)] - private unsafe static extern int C_SumupSegment_double( + private static extern unsafe int C_SumupSegment_double( uint* pData, byte* pSegType, int* pSegLength, int* pIndices, double* pSampleOutputs, double* pSampleOutputWeights, double* pSumTargetsByBin, double* pSumWeightsByBin, diff --git a/src/Microsoft.ML.FastTree/Dataset/SparseIntArray.cs b/src/Microsoft.ML.FastTree/Dataset/SparseIntArray.cs index fc360ddea4..5ca048647e 100644 --- a/src/Microsoft.ML.FastTree/Dataset/SparseIntArray.cs +++ b/src/Microsoft.ML.FastTree/Dataset/SparseIntArray.cs @@ -18,7 +18,7 @@ namespace Microsoft.ML.Runtime.FastTree.Internal /// This implementation represents a sequence of values using parallel /// arrays of both values, as well as deltas indicating the number of values to the next /// explicit value. Values "between" these deltas are implicitly zero. - /// + /// /// Note that it is possible to misuse the deltas by making some of them themselves 0, allowing /// us to represent multiple values per row. In this case, /// and will not have sensible values, but @@ -490,12 +490,12 @@ public override void Sumup(SumupInputData input, FeatureHistogram histogram) #if USE_FASTTREENATIVE [DllImport("FastTreeNative", CallingConvention = CallingConvention.StdCall)] - private unsafe static extern int C_SumupDeltaSparse_float(int numBits, byte* pValues, byte* pDeltas, int numDeltas, int* pIndices, float* pSampleOutputs, double* pSampleOutputWeights, + private static extern unsafe int C_SumupDeltaSparse_float(int numBits, byte* pValues, byte* pDeltas, int numDeltas, int* pIndices, float* pSampleOutputs, double* pSampleOutputWeights, float* pSumTargetsByBin, double* pSumTargets2ByBin, int* pCountByBin, int totalCount, double totalSampleOutputs, double totalSampleOutputWeights); [DllImport("FastTreeNative", CallingConvention = CallingConvention.StdCall)] - private unsafe static extern int C_SumupDeltaSparse_double(int numBits, byte* pValues, byte* pDeltas, int numDeltas, int* pIndices, double* pSampleOutputs, double* pSampleOutputWeights, + private static extern unsafe int C_SumupDeltaSparse_double(int numBits, byte* pValues, byte* pDeltas, int numDeltas, int* pIndices, double* pSampleOutputs, double* pSampleOutputWeights, double* pSumTargetsByBin, double* pSumTargets2ByBin, int* pCountByBin, int totalCount, double totalSampleOutputs, double totalSampleOutputWeights); diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index 29fe439e0a..fad40495f4 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -1143,7 +1143,7 @@ private FeatureFlockBase CreateOneHotFlockCategorical(IChannel ch, #endif Double[] bub = BinUpperBounds[fi]; ch.Assert(bub.Length == 2); - //REVIEW: leaving out check for the value to reduced memory consuption and going with + //REVIEW: leaving out check for the value to reduced memory consuption and going with //leap of faith based on what the user told. binnedValues[i] = hotFeatureStarts[subfeature] + 1; hotCount++; @@ -1380,7 +1380,7 @@ private Dataset Construct(RoleMappedData examples, ref int numExamples, int maxB // There is no good mechanism to filter out rows with missing feature values on transposed data. // So, we instead perform one featurization pass which, if successful, will remain one pass but, // if we ever encounter missing values will become a "detect missing features" pass, which will - // in turn inform a necessary featurization pass secondary + // in turn inform a necessary featurization pass secondary SlotDropper slotDropper = null; bool[] localConstructBinFeatures = Utils.CreateArray(NumFeatures, true); @@ -1661,7 +1661,7 @@ private static ValueGetter> SubsetGetter(ValueGetter> g } /// - /// Returns a slot dropper object that has ranges of slots to be dropped, + /// Returns a slot dropper object that has ranges of slots to be dropped, /// based on an examination of the feature values. /// private static SlotDropper ConstructDropSlotRanges(ISlotCursor cursor, @@ -2198,7 +2198,7 @@ private IEnumerable CreateFlocksCore(IChannel ch, IProgressCha int limMade = startFeatureIndex; int countBins = 1; // Count of bins we'll need to represent. Starts at 1, accumulates "hot" features. // Tracking for n-hot flocks. - long countHotRows = 0; // The count of hot "rows" + long countHotRows = 0; // The count of hot "rows" long hotNThreshold = (long)(0.1 * NumExamples); bool canBeOneHot = true; @@ -2617,7 +2617,7 @@ public sealed class ForwardIndexer // Parallel to the subsequence of _values in min to lim, indicates the index where // we should start to look for the next value, if the corresponding value list in // _values is sparse. If the corresponding value list is dense the entry at this - // position is not used. + // position is not used. private readonly int[] _perFeaturePosition; private readonly int[] _featureIndices; #if DEBUG @@ -2790,7 +2790,7 @@ public abstract class FastTreePredictionWrapper : // Inner args is used only for documentation purposes when saving comments to INI files. protected readonly string InnerArgs; - // The total number of features used in training (takes the value of zero if the + // The total number of features used in training (takes the value of zero if the // written version of the loaded model is less than VerNumFeaturesSerialized) protected readonly int NumFeatures; @@ -3000,13 +3000,13 @@ private enum NodeMode [Description("BRANCH_LT")] BranchLT, [Description("BRANCH_GTE")] - BranchGTE, + BranchGte, [Description("BRANCH_GT")] BranchGT, [Description("BRANCH_EQ")] - BranchEQ, + BranchEq, [Description("BRANCH_LT")] - BranchNEQ, + BranchNeq, [Description("LEAF")] Leaf }; @@ -3070,7 +3070,7 @@ public virtual bool SaveAsOnnx(OnnxContext ctx, string[] outputNames, string fea nodesValues.Add(tree.RawThresholds[nodeIndex]); nodesTrueNodeIds.Add(tree.LteChild[nodeIndex] < 0 ? ~tree.LteChild[nodeIndex] + tree.NumNodes : tree.LteChild[nodeIndex]); nodesFalseNodeIds.Add(tree.GtChild[nodeIndex] < 0 ? ~tree.GtChild[nodeIndex] + tree.NumNodes : tree.GtChild[nodeIndex]); - if (tree._defaultValueForMissing?[nodeIndex] <= tree.RawThresholds[nodeIndex]) + if (tree.DefaultValueForMissing?[nodeIndex] <= tree.RawThresholds[nodeIndex]) missingValueTracksTrue.Add(true); else missingValueTracksTrue.Add(false); @@ -3266,8 +3266,8 @@ public Float GetLeafValue(int treeId, int leafId) } /// - /// Returns the leaf node in the requested tree for the given feature vector, and populates 'path' with the list of - /// internal nodes in the path from the root to that leaf. If 'path' is null a new list is initialized. All elements + /// Returns the leaf node in the requested tree for the given feature vector, and populates 'path' with the list of + /// internal nodes in the path from the root to that leaf. If 'path' is null a new list is initialized. All elements /// in 'path' are cleared before filling in the current path nodes. /// public int GetLeaf(int treeId, ref VBuffer features, ref List path) diff --git a/src/Microsoft.ML.FastTree/FastTreeClassification.cs b/src/Microsoft.ML.FastTree/FastTreeClassification.cs index ac92867c27..6796dd6c79 100644 --- a/src/Microsoft.ML.FastTree/FastTreeClassification.cs +++ b/src/Microsoft.ML.FastTree/FastTreeClassification.cs @@ -334,7 +334,7 @@ public void AdjustTreeOutputs(IChannel ch, RegressionTree tree, } /// - /// The Entry Point for the FastTree Binary Classifier. + /// The Entry Point for the FastTree Binary Classifier. /// public static partial class FastTree { diff --git a/src/Microsoft.ML.FastTree/FastTreeRanking.cs b/src/Microsoft.ML.FastTree/FastTreeRanking.cs index cc246f25cd..7173a9f6a3 100644 --- a/src/Microsoft.ML.FastTree/FastTreeRanking.cs +++ b/src/Microsoft.ML.FastTree/FastTreeRanking.cs @@ -1028,7 +1028,7 @@ private static void PermutationSort(int[] permutation, double[] scores, short[] } [DllImport("FastTreeNative", EntryPoint = "C_GetDerivatives", CallingConvention = CallingConvention.StdCall, CharSet = CharSet.Ansi)] - private unsafe static extern void GetDerivatives( + private static extern unsafe void GetDerivatives( int numDocuments, int begin, int* pPermutation, short* pLabels, double* pScores, double* pLambdas, double* pWeights, double* pDiscount, double inverseMaxDcg, double* pGainLabels, diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index 3b3ca9e92f..51d2d809bb 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -748,7 +748,7 @@ private void Map(ref VBuffer src, ref Float dst) /// /// Returns a vector of feature contributions for a given example. - /// is used as a buffer to accumulate the contributions across trees. + /// is used as a buffer to accumulate the contributions across trees. /// If is null, it will be created, otherwise it will be reused. /// internal void GetFeatureContributions(ref VBuffer features, ref VBuffer contribs, ref BufferBuilder builder) @@ -791,7 +791,7 @@ internal double GetFeatureBinsAndScore(ref VBuffer features, int[] bins) for (int i = 0; i < features.Count; ++i) { int j; - // Where we have a sparse output, + // Where we have a sparse output, if (_inputFeatureToDatasetFeatureMap.TryGetValue(features.Indices[i], out j)) { int index = Algorithms.FindFirstGE(_binUpperBounds[j], features.Values[i]); @@ -1116,7 +1116,7 @@ public sealed class FeatureInfo public long Version { get; } /// - /// For features belonging to the same categorical, this value will be the same, + /// For features belonging to the same categorical, this value will be the same, /// Set to -1 for non-categoricals. /// public int CategoricalFeatureIndex { get; } diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs index 512e79faf9..ae79c991d3 100644 --- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs +++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs @@ -156,7 +156,7 @@ public override IPredictorWithFeatureWeights Train(TrainContext context) } // LogitBoost is naturally calibrated to // output probabilities when transformed using - // the logistic function, so if we have trained no + // the logistic function, so if we have trained no // calibrator, transform the scores using that. // REVIEW: Need a way to signal the outside world that we prefer simple sigmoid? diff --git a/src/Microsoft.ML.FastTree/SumupPerformanceCommand.cs b/src/Microsoft.ML.FastTree/SumupPerformanceCommand.cs index efe52c1f26..f1db3fae2c 100644 --- a/src/Microsoft.ML.FastTree/SumupPerformanceCommand.cs +++ b/src/Microsoft.ML.FastTree/SumupPerformanceCommand.cs @@ -110,7 +110,7 @@ private IEnumerable CreateDense(IChannel ch, Random rgen) private IEnumerable CreateSparse(IChannel ch, Random rgen) { ch.CheckUserArg(0 <= _param && _param < 1, nameof(Arguments.Parameter), "For sparse ararys"); - // The parameter is the level of sparsity. Use the geometric distribution to determine the number of + // The parameter is the level of sparsity. Use the geometric distribution to determine the number of // Geometric distribution (with 0 support) would be Math. double denom = Math.Log(1 - _param); if (double.IsNegativeInfinity(denom)) diff --git a/src/Microsoft.ML.FastTree/Training/DcgPermutationComparer.cs b/src/Microsoft.ML.FastTree/Training/DcgPermutationComparer.cs index be449598a3..1421abb589 100644 --- a/src/Microsoft.ML.FastTree/Training/DcgPermutationComparer.cs +++ b/src/Microsoft.ML.FastTree/Training/DcgPermutationComparer.cs @@ -44,10 +44,10 @@ public static DcgPermutationComparer GetDcgPermutationFactory(string name) /// public class DescendingStablePessimisticPermutationComparer : DescendingStablePermutationComparer { -#pragma warning disable TLC_GeneralName // The naming is the least of this class's problems. A setter with no getter?? +#pragma warning disable MSML_GeneralName // The naming is the least of this class's problems. A setter with no getter?? protected short[] _labels; protected int _labelsOffset; -#pragma warning restore TLC_GeneralName +#pragma warning restore MSML_GeneralName public override short[] Labels { set { _labels = value; } @@ -76,10 +76,10 @@ public override int Compare(int i, int j) /// public class DescendingStablePermutationComparer : DcgPermutationComparer { -#pragma warning disable TLC_GeneralName // The naming is the least of this class's problems. A setter with no getter?? +#pragma warning disable MSML_GeneralName // The naming is the least of this class's problems. A setter with no getter?? protected double[] _scores; protected int _scoresOffset; -#pragma warning restore TLC_GeneralName +#pragma warning restore MSML_GeneralName public override double[] Scores { set { _scores = value; } } diff --git a/src/Microsoft.ML.FastTree/Training/DocumentPartitioning.cs b/src/Microsoft.ML.FastTree/Training/DocumentPartitioning.cs index 2518fc839e..1aec08271e 100644 --- a/src/Microsoft.ML.FastTree/Training/DocumentPartitioning.cs +++ b/src/Microsoft.ML.FastTree/Training/DocumentPartitioning.cs @@ -195,7 +195,7 @@ public double[] GetDistribution(double[] targets, double[] weights, int quantile /// the leaf being split /// /// the threshold - /// Index of child node that contains documents whose split + /// Index of child node that contains documents whose split /// feature value is greater than the split threshold public unsafe void Split(int leaf, IIntArrayForwardIndexer indexer, UInt32 threshold, int gtChildIndex) { @@ -239,7 +239,7 @@ public unsafe void Split(int leaf, IIntArrayForwardIndexer indexer, UInt32 thres /// the leaf being split /// Split feature flock's bin /// Catgeorical feature indices - /// Index of child node that contains documents whose split + /// Index of child node that contains documents whose split /// feature value is greater than the split threshold public unsafe void Split(int leaf, IntArray bins, HashSet categoricalIndices, int gtChildIndex) { diff --git a/src/Microsoft.ML.FastTree/Training/OptimizationAlgorithms/GradientDescent.cs b/src/Microsoft.ML.FastTree/Training/OptimizationAlgorithms/GradientDescent.cs index 8b2b345508..a749158b2f 100644 --- a/src/Microsoft.ML.FastTree/Training/OptimizationAlgorithms/GradientDescent.cs +++ b/src/Microsoft.ML.FastTree/Training/OptimizationAlgorithms/GradientDescent.cs @@ -51,7 +51,7 @@ protected virtual double[] GetGradient(IChannel ch) if ((_numberOfDroppedTrees == 0) && (numberOfTrees > 0)) { droppedTrees = new int[] { DropoutRng.Next(numberOfTrees) }; - // force at least a single tree to be dropped + // force at least a single tree to be dropped _numberOfDroppedTrees = droppedTrees.Length; } ch.Trace("dropout: Dropping {0} trees of {1} for rate {2}", @@ -104,7 +104,7 @@ public override RegressionTree TrainingIteration(IChannel ch, bool[] activeFeatu using (Timer.Time(TimerEvent.TreeLearnerAdjustTreeOutputs)) { double[] backupScores = null; - // when doing dropouts we need to replace the TrainingScores with the scores without the dropped trees + // when doing dropouts we need to replace the TrainingScores with the scores without the dropped trees if (DropoutRate > 0) { backupScores = TrainingScores.Scores; diff --git a/src/Microsoft.ML.FastTree/Training/OptimizationAlgorithms/NoOptimizationAlgorithm.cs b/src/Microsoft.ML.FastTree/Training/OptimizationAlgorithms/NoOptimizationAlgorithm.cs index 563a7891fd..dbc1f04147 100644 --- a/src/Microsoft.ML.FastTree/Training/OptimizationAlgorithms/NoOptimizationAlgorithm.cs +++ b/src/Microsoft.ML.FastTree/Training/OptimizationAlgorithms/NoOptimizationAlgorithm.cs @@ -7,7 +7,7 @@ namespace Microsoft.ML.Runtime.FastTree.Internal /// /// This is dummy optimizer. As Random forest does not have any boosting based optimization, this is place holder to be consistent /// with other fast tree based applications - /// + /// public class RandomForestOptimizer : GradientDescent { private IGradientAdjuster _gradientWrapper; diff --git a/src/Microsoft.ML.FastTree/Training/Parallel/IParallelTraining.cs b/src/Microsoft.ML.FastTree/Training/Parallel/IParallelTraining.cs index 08ae6fb16f..c968804708 100644 --- a/src/Microsoft.ML.FastTree/Training/Parallel/IParallelTraining.cs +++ b/src/Microsoft.ML.FastTree/Training/Parallel/IParallelTraining.cs @@ -38,7 +38,7 @@ public delegate void FindBestThresholdFromRawArrayFun(LeafSplitCandidates leafSp /// To speed up the find bin process, it let different workers to find bins for different features. /// Then perform global sync up. /// In Feature parallel, every machines holds all data, so this is unneeded. - /// 2. interactive with TreeLearner: , , , + /// 2. interactive with TreeLearner: , , , /// , , , . /// A full process is: /// Use to alter local active features. @@ -75,7 +75,7 @@ public interface IParallelTraining /// /// Initialize every time before training a tree. - /// will alter activeFeatures in Feature parallel. + /// will alter activeFeatures in Feature parallel. /// Because it only need to find threshold for part of features in feature parallel. /// void InitIteration(ref bool[] activeFeatures); @@ -98,10 +98,10 @@ public interface IParallelTraining bool IsNeedFindLocalBestSplit(); /// - /// True if need to skip non-splittable histogram. - /// Only will return False in Voting parallel. + /// True if need to skip non-splittable histogram. + /// Only will return False in Voting parallel. /// That is because local doesn't have global histograms in Voting parallel, - /// So the information about NonSplittable is not correct, and we cannot skip it. + /// So the information about NonSplittable is not correct, and we cannot skip it. /// bool IsSkipNonSplittableHistogram(); @@ -133,7 +133,7 @@ void FindGlobalBestSplit(LeafSplitCandidates smallerChildSplitCandidates, /// /// Get indices of features that should be find bin in local. - /// After construct local boundary, should call + /// After construct local boundary, should call /// to get boundaries for all features. /// bool[] GetLocalBinConstructionFeatures(int numFeatures); @@ -141,8 +141,8 @@ void FindGlobalBestSplit(LeafSplitCandidates smallerChildSplitCandidates, /// /// Sync Global feature bucket. /// used in Data parallel and Voting parallel. - /// Data are partitioned by row. To speed up the Global find bin process, - /// we can let different workers construct Bin Boundary for different features, + /// Data are partitioned by row. To speed up the Global find bin process, + /// we can let different workers construct Bin Boundary for different features, /// then perform a global sync up. /// void SyncGlobalBoundary(int numFeatures, int maxBin, Double[][] binUpperBounds); diff --git a/src/Microsoft.ML.FastTree/Training/StepSearch.cs b/src/Microsoft.ML.FastTree/Training/StepSearch.cs index db6f61ada2..e3d14aeb8e 100644 --- a/src/Microsoft.ML.FastTree/Training/StepSearch.cs +++ b/src/Microsoft.ML.FastTree/Training/StepSearch.cs @@ -30,7 +30,7 @@ public LineSearch(Test lossCalculator, int lossIndex) _historicStepSize = Math.Max(1.0, _minStepSize); } - private readonly static double _phi = (1.0 + Math.Sqrt(5)) / 2; + private static readonly double _phi = (1.0 + Math.Sqrt(5)) / 2; private static void Swap(ref T a, ref T b) { diff --git a/src/Microsoft.ML.FastTree/Training/TreeLearners/LeastSquaresRegressionTreeLearner.cs b/src/Microsoft.ML.FastTree/Training/TreeLearners/LeastSquaresRegressionTreeLearner.cs index cb902fad2b..fd1fb548da 100644 --- a/src/Microsoft.ML.FastTree/Training/TreeLearners/LeastSquaresRegressionTreeLearner.cs +++ b/src/Microsoft.ML.FastTree/Training/TreeLearners/LeastSquaresRegressionTreeLearner.cs @@ -103,7 +103,7 @@ public class LeastSquaresRegressionTreeLearner : TreeLearner /// Only consider a gain if its likelihood versus a random /// choice gain is above a certain value (so 0.95 would mean restricting to gains that have less /// than a 0.05 change of being generated randomly through choice of a random split). - /// Maximum categorical split points to consider when splitting on a + /// Maximum categorical split points to consider when splitting on a /// categorical feature. /// /// -1 if best step ranking is to be disabled, otherwise it diff --git a/src/Microsoft.ML.FastTree/TreeEnsemble/Ensemble.cs b/src/Microsoft.ML.FastTree/TreeEnsemble/Ensemble.cs index 0c75b2dd18..0d48bb8123 100644 --- a/src/Microsoft.ML.FastTree/TreeEnsemble/Ensemble.cs +++ b/src/Microsoft.ML.FastTree/TreeEnsemble/Ensemble.cs @@ -336,7 +336,7 @@ public string ToGainSummary(FeaturesToContentMap fmap, Dictionary feat /// /// Returns a vector of feature contributions for a given example. - /// is used as a buffer to accumulate the contributions across trees. + /// is used as a buffer to accumulate the contributions across trees. /// If is null, it will be created, otherwise it will be reused. /// internal void GetFeatureContributions(ref VBuffer features, ref VBuffer contribs, ref BufferBuilder builder) diff --git a/src/Microsoft.ML.FastTree/TreeEnsemble/QuantileRegressionTree.cs b/src/Microsoft.ML.FastTree/TreeEnsemble/QuantileRegressionTree.cs index 642d82ede1..bcec5ac082 100644 --- a/src/Microsoft.ML.FastTree/TreeEnsemble/QuantileRegressionTree.cs +++ b/src/Microsoft.ML.FastTree/TreeEnsemble/QuantileRegressionTree.cs @@ -58,9 +58,9 @@ public override void Save(ModelSaveContext ctx) } /// - /// Loads the sampled labels of this tree to the distribution array for the sparse instance type. + /// Loads the sampled labels of this tree to the distribution array for the sparse instance type. /// By calling for all the trees, the distribution array will have all the samples from all the trees - /// + /// public void LoadSampledLabels(ref VBuffer feat, Float[] distribution, Float[] weights, int sampleCount, int destinationIndex) { int leaf = GetLeaf(ref feat); diff --git a/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs b/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs index d2edeb796a..65701f0d03 100644 --- a/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs +++ b/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs @@ -26,7 +26,7 @@ public class RegressionTree // Weight of this tree in the ensemble // for each non-leaf, we keep the following data - public Float[] _defaultValueForMissing; + public Float[] DefaultValueForMissing; private double[] _splitGain; private double[] _gainPValue; // The value of this non-leaf node, prior to split when it was a leaf. @@ -42,12 +42,12 @@ public class RegressionTree /// public bool[] CategoricalSplit { get; } /// - /// Array of categorical values for the categorical feature that might be chosen as + /// Array of categorical values for the categorical feature that might be chosen as /// a split feature for a node. /// public int[][] CategoricalSplitFeatures; /// - /// For a given categorical feature that is chosen as a split feature for a node, this + /// For a given categorical feature that is chosen as a split feature for a node, this /// array contains it's start and end range in the input feature vector at prediction time. /// public int[][] CategoricalSplitFeatureRanges; @@ -89,7 +89,7 @@ public RegressionTree(int maxLeaves) _gainPValue = new double[maxLeaves - 1]; _previousLeafValue = new double[maxLeaves - 1]; Thresholds = new UInt32[maxLeaves - 1]; - _defaultValueForMissing = null; + DefaultValueForMissing = null; LteChild = new int[maxLeaves - 1]; GtChild = new int[maxLeaves - 1]; LeafValues = new double[maxLeaves]; @@ -202,7 +202,7 @@ internal RegressionTree(int[] splitFeatures, Double[] splitGain, Double[] gainPV _splitGain = splitGain; _gainPValue = gainPValue; RawThresholds = rawThresholds; - _defaultValueForMissing = defaultValueForMissing; + DefaultValueForMissing = defaultValueForMissing; LteChild = lteChild; GtChild = gtChild; LeafValues = leafValues; @@ -222,10 +222,10 @@ internal RegressionTree(int[] splitFeatures, Double[] splitGain, Double[] gainPV CheckValid(Contracts.Check); - if (_defaultValueForMissing != null) + if (DefaultValueForMissing != null) { bool allZero = true; - foreach (var val in _defaultValueForMissing) + foreach (var val in DefaultValueForMissing) { if (val != 0.0f) { @@ -234,7 +234,7 @@ internal RegressionTree(int[] splitFeatures, Double[] splitGain, Double[] gainPV } } if (allZero) - _defaultValueForMissing = null; + DefaultValueForMissing = null; } } @@ -300,9 +300,9 @@ internal RegressionTree(ModelLoadContext ctx, bool usingDefaultValue, bool categ Thresholds = reader.ReadUIntArray(); RawThresholds = reader.ReadFloatArray(); - _defaultValueForMissing = null; + DefaultValueForMissing = null; if (usingDefaultValue) - _defaultValueForMissing = reader.ReadFloatArray(); + DefaultValueForMissing = reader.ReadFloatArray(); LeafValues = reader.ReadDoubleArray(); // Informational... @@ -313,10 +313,10 @@ internal RegressionTree(ModelLoadContext ctx, bool usingDefaultValue, bool categ CheckValid(Contracts.CheckDecode); // Check the need of _defaultValueForMissing - if (_defaultValueForMissing != null) + if (DefaultValueForMissing != null) { bool allZero = true; - foreach (var val in _defaultValueForMissing) + foreach (var val in DefaultValueForMissing) { if (val != 0.0f) { @@ -325,7 +325,7 @@ internal RegressionTree(ModelLoadContext ctx, bool usingDefaultValue, bool categ } } if (allZero) - _defaultValueForMissing = null; + DefaultValueForMissing = null; } } @@ -402,7 +402,7 @@ protected void Save(ModelSaveContext ctx, TreeType code) writer.WriteUIntArray(Thresholds); writer.WriteFloatArray(RawThresholds); - writer.WriteFloatArray(_defaultValueForMissing); + writer.WriteFloatArray(DefaultValueForMissing); writer.WriteDoubleArray(LeafValues); writer.WriteDoubleArray(_splitGain); @@ -804,12 +804,12 @@ public int GetLeaf(ref VBuffer feat, ref List path) private Float GetFeatureValue(Float x, int node) { // Not need to convert missing vaules. - if (_defaultValueForMissing == null) + if (DefaultValueForMissing == null) return x; if (Double.IsNaN(x)) { - return _defaultValueForMissing[node]; + return DefaultValueForMissing[node]; } else { @@ -1198,7 +1198,7 @@ public void ToTreeEnsembleFormat(StringBuilder sbEvaluator, StringBuilder sbInpu private void ToTreeEnsembleFormatForCategoricalSplit(StringBuilder sbEvaluator, StringBuilder sbInput, FeaturesToContentMap featureContents, ref int evaluatorCounter, Dictionary featureToId, Dictionary categoricalSplitNodeToId) { - //REVIEW: Can all these conditions even be true? + //REVIEW: Can all these conditions even be true? if (CategoricalSplitFeatures == null || CategoricalSplitFeatures.Length == 0 || CategoricalSplitFeatures.All(val => val == null)) @@ -1518,7 +1518,7 @@ public void AppendFeatureContributions(ref VBuffer src, BufferBuilder /// A bindable mapper wrapper for tree ensembles, that creates a bound mapper with three outputs: - /// 1. A vector containing the individual tree outputs of the tree ensemble. + /// 1. A vector containing the individual tree outputs of the tree ensemble. /// 2. An indicator vector for the leaves that the feature vector falls on in the tree ensemble. /// 3. An indicator vector for the internal nodes on the paths that the feature vector falls on in the tree ensemble. /// @@ -192,15 +192,15 @@ public BoundMapper(IExceptionContext ectx, TreeEnsembleFeaturizerBindableMapper // A vector containing the output of each tree on a given example. var treeValueType = new VectorType(NumberType.Float, _owner._ensemble.NumTrees); - // An indicator vector with length = the total number of leaves in the ensemble, indicating which leaf the example + // An indicator vector with length = the total number of leaves in the ensemble, indicating which leaf the example // ends up in all the trees in the ensemble. var leafIdType = new VectorType(NumberType.Float, _owner._totalLeafCount); - // An indicator vector with length = the total number of nodes in the ensemble, indicating the nodes on + // An indicator vector with length = the total number of nodes in the ensemble, indicating the nodes on // the paths of the example in all the trees in the ensemble. // The total number of nodes in a binary tree is equal to the number of internal nodes + the number of leaf nodes, // and it is also equal to the number of children of internal nodes (which is 2 * the number of internal nodes) - // plus one (since the root node is not a child of any node). So we have #internal + #leaf = 2*(#internal) + 1, - // which means that #internal = #leaf - 1. + // plus one (since the root node is not a child of any node). So we have #internal + #leaf = 2*(#internal) + 1, + // which means that #internal = #leaf - 1. // Therefore, the number of internal nodes in the ensemble is #leaf - #trees. var pathIdType = new VectorType(NumberType.Float, _owner._totalLeafCount - _owner._ensemble.NumTrees); _outputSchema = new Schema(ectx, owner, treeValueType, leafIdType, pathIdType); @@ -563,8 +563,8 @@ public sealed class Arguments : TrainAndScoreTransform.ArgumentsBase - /// REVIEW: Ideally we should have only one arguments class by using IComponentFactory for the model. - /// For now it probably warrants a REVIEW comment here in case we'd like to merge these two arguments in the future. + /// REVIEW: Ideally we should have only one arguments class by using IComponentFactory for the model. + /// For now it probably warrants a REVIEW comment here in case we'd like to merge these two arguments in the future. /// Also, it might be worthwhile to extract the common arguments to a base class. /// [TlcModule.EntryPointKind(typeof(CommonInputs.IFeaturizerInput))] @@ -803,9 +803,9 @@ private static IDataView AppendLabelTransform(IHostEnvironment env, IChannel ch, public static partial class TreeFeaturize { - [TlcModule.EntryPoint(Name = "Transforms.TreeLeafFeaturizer", - Desc = TreeEnsembleFeaturizerTransform.TreeEnsembleSummary, - UserName = TreeEnsembleFeaturizerTransform.UserName, + [TlcModule.EntryPoint(Name = "Transforms.TreeLeafFeaturizer", + Desc = TreeEnsembleFeaturizerTransform.TreeEnsembleSummary, + UserName = TreeEnsembleFeaturizerTransform.UserName, ShortName = TreeEnsembleFeaturizerBindableMapper.LoadNameShort, XmlInclude = new[] { @"" })] public static CommonOutputs.TransformOutput Featurizer(IHostEnvironment env, TreeEnsembleFeaturizerTransform.ArgumentsForEntryPoint input) diff --git a/src/Microsoft.ML.FastTree/Utils/ToByteArrayExtensions.cs b/src/Microsoft.ML.FastTree/Utils/ToByteArrayExtensions.cs index 5763e3c9dd..d4bc6e1962 100644 --- a/src/Microsoft.ML.FastTree/Utils/ToByteArrayExtensions.cs +++ b/src/Microsoft.ML.FastTree/Utils/ToByteArrayExtensions.cs @@ -47,7 +47,7 @@ public static int SizeInBytes(this short a) return sizeof(short); } - public unsafe static void ToByteArray(this short a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this short a, byte[] buffer, ref int position) { fixed (byte* pBuffer = buffer) { @@ -71,7 +71,7 @@ public static int SizeInBytes(this ushort a) return sizeof(ushort); } - public unsafe static void ToByteArray(this ushort a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this ushort a, byte[] buffer, ref int position) { fixed (byte* pBuffer = buffer) { @@ -95,7 +95,7 @@ public static int SizeInBytes(this int a) return sizeof(int); } - public unsafe static void ToByteArray(this int a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this int a, byte[] buffer, ref int position) { fixed (byte* pBuffer = buffer) { @@ -105,7 +105,7 @@ public unsafe static void ToByteArray(this int a, byte[] buffer, ref int positio position += sizeof(int); } - public unsafe static int ToInt(this byte[] buffer, ref int position) + public static unsafe int ToInt(this byte[] buffer, ref int position) { int a; fixed (byte* pBuffer = buffer) @@ -124,7 +124,7 @@ public static int SizeInBytes(this uint a) return sizeof(uint); } - public unsafe static void ToByteArray(this uint a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this uint a, byte[] buffer, ref int position) { fixed (byte* pBuffer = buffer) { @@ -134,7 +134,7 @@ public unsafe static void ToByteArray(this uint a, byte[] buffer, ref int positi position += sizeof(uint); } - public unsafe static uint ToUInt(this byte[] buffer, ref int position) + public static unsafe uint ToUInt(this byte[] buffer, ref int position) { uint a; fixed (byte* pBuffer = buffer) @@ -153,7 +153,7 @@ public static int SizeInBytes(this long a) return sizeof(long); } - public unsafe static void ToByteArray(this long a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this long a, byte[] buffer, ref int position) { fixed (byte* pBuffer = buffer) { @@ -177,7 +177,7 @@ public static int SizeInBytes(this ulong a) return sizeof(ulong); } - public unsafe static void ToByteArray(this ulong a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this ulong a, byte[] buffer, ref int position) { fixed (byte* pBuffer = buffer) { @@ -213,7 +213,7 @@ public static int SizeInBytes(this float a) return sizeof(float); } - public unsafe static void ToByteArray(this float a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this float a, byte[] buffer, ref int position) { fixed (byte* pBuffer = buffer) { @@ -237,7 +237,7 @@ public static int SizeInBytes(this double a) return sizeof(double); } - public unsafe static void ToByteArray(this double a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this double a, byte[] buffer, ref int position) { fixed (byte* pBuffer = buffer) { @@ -318,7 +318,7 @@ public static int SizeInBytes(this short[] a) return sizeof(int) + Utils.Size(a) * sizeof(short); } - public unsafe static void ToByteArray(this short[] a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this short[] a, byte[] buffer, ref int position) { int length = a.Length; length.ToByteArray(buffer, ref position); @@ -333,7 +333,7 @@ public unsafe static void ToByteArray(this short[] a, byte[] buffer, ref int pos position += length * sizeof(short); } - public unsafe static short[] ToShortArray(this byte[] buffer, ref int position) + public static unsafe short[] ToShortArray(this byte[] buffer, ref int position) { int length = buffer.ToInt(ref position); short[] a = new short[length]; @@ -357,7 +357,7 @@ public static int SizeInBytes(this ushort[] a) return sizeof(int) + Utils.Size(a) * sizeof(ushort); } - public unsafe static void ToByteArray(this ushort[] a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this ushort[] a, byte[] buffer, ref int position) { int length = a.Length; length.ToByteArray(buffer, ref position); @@ -372,7 +372,7 @@ public unsafe static void ToByteArray(this ushort[] a, byte[] buffer, ref int po position += length * sizeof(ushort); } - public unsafe static ushort[] ToUShortArray(this byte[] buffer, ref int position) + public static unsafe ushort[] ToUShortArray(this byte[] buffer, ref int position) { int length = buffer.ToInt(ref position); ushort[] a = new ushort[length]; @@ -396,7 +396,7 @@ public static int SizeInBytes(this int[] array) return sizeof(int) + Utils.Size(array) * sizeof(int); } - public unsafe static void ToByteArray(this int[] a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this int[] a, byte[] buffer, ref int position) { int length = Utils.Size(a); length.ToByteArray(buffer, ref position); @@ -411,10 +411,10 @@ public unsafe static void ToByteArray(this int[] a, byte[] buffer, ref int posit position += length * sizeof(int); } - public unsafe static int[] ToIntArray(this byte[] buffer, ref int position) + public static unsafe int[] ToIntArray(this byte[] buffer, ref int position) => buffer.ToIntArray(ref position, buffer.ToInt(ref position)); - public unsafe static int[] ToIntArray(this byte[] buffer, ref int position, int length) + public static unsafe int[] ToIntArray(this byte[] buffer, ref int position, int length) { if (length == 0) return null; @@ -440,7 +440,7 @@ public static int SizeInBytes(this uint[] array) return sizeof(int) + Utils.Size(array) * sizeof(uint); } - public unsafe static void ToByteArray(this uint[] a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this uint[] a, byte[] buffer, ref int position) { int length = a.Length; length.ToByteArray(buffer, ref position); @@ -455,7 +455,7 @@ public unsafe static void ToByteArray(this uint[] a, byte[] buffer, ref int posi position += length * sizeof(uint); } - public unsafe static uint[] ToUIntArray(this byte[] buffer, ref int position) + public static unsafe uint[] ToUIntArray(this byte[] buffer, ref int position) { int length = buffer.ToInt(ref position); uint[] a = new uint[length]; @@ -479,7 +479,7 @@ public static int SizeInBytes(this long[] array) return sizeof(int) + Utils.Size(array) * sizeof(long); } - public unsafe static void ToByteArray(this long[] a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this long[] a, byte[] buffer, ref int position) { int length = a.Length; length.ToByteArray(buffer, ref position); @@ -494,7 +494,7 @@ public unsafe static void ToByteArray(this long[] a, byte[] buffer, ref int posi position += length * sizeof(long); } - public unsafe static long[] ToLongArray(this byte[] buffer, ref int position) + public static unsafe long[] ToLongArray(this byte[] buffer, ref int position) { int length = buffer.ToInt(ref position); long[] a = new long[length]; @@ -518,7 +518,7 @@ public static int SizeInBytes(this ulong[] array) return sizeof(int) + Utils.Size(array) * sizeof(ulong); } - public unsafe static void ToByteArray(this ulong[] a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this ulong[] a, byte[] buffer, ref int position) { int length = a.Length; length.ToByteArray(buffer, ref position); @@ -533,7 +533,7 @@ public unsafe static void ToByteArray(this ulong[] a, byte[] buffer, ref int pos position += length * sizeof(ulong); } - public unsafe static ulong[] ToULongArray(this byte[] buffer, ref int position) + public static unsafe ulong[] ToULongArray(this byte[] buffer, ref int position) { int length = buffer.ToInt(ref position); ulong[] a = new ulong[length]; @@ -566,7 +566,7 @@ public static void ToByteArray(this MD5Hash[] a, byte[] buffer, ref int position } } - public unsafe static MD5Hash[] ToUInt128Array(this byte[] buffer, ref int position) + public static unsafe MD5Hash[] ToUInt128Array(this byte[] buffer, ref int position) { int length = buffer.ToInt(ref position); MD5Hash[] a = new MD5Hash[length]; @@ -584,7 +584,7 @@ public static int SizeInBytes(this float[] array) return sizeof(int) + Utils.Size(array) * sizeof(float); } - public unsafe static void ToByteArray(this float[] a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this float[] a, byte[] buffer, ref int position) { int length = a.Length; length.ToByteArray(buffer, ref position); @@ -599,7 +599,7 @@ public unsafe static void ToByteArray(this float[] a, byte[] buffer, ref int pos position += length * sizeof(float); } - public unsafe static float[] ToFloatArray(this byte[] buffer, ref int position) + public static unsafe float[] ToFloatArray(this byte[] buffer, ref int position) { int length = buffer.ToInt(ref position); float[] a = new float[length]; @@ -623,7 +623,7 @@ public static int SizeInBytes(this double[] array) return sizeof(int) + Utils.Size(array) * sizeof(double); } - public unsafe static void ToByteArray(this double[] a, byte[] buffer, ref int position) + public static unsafe void ToByteArray(this double[] a, byte[] buffer, ref int position) { int length = a.Length; length.ToByteArray(buffer, ref position); @@ -638,7 +638,7 @@ public unsafe static void ToByteArray(this double[] a, byte[] buffer, ref int po position += length * sizeof(double); } - public unsafe static double[] ToDoubleArray(this byte[] buffer, ref int position) + public static unsafe double[] ToDoubleArray(this byte[] buffer, ref int position) { int length = buffer.ToInt(ref position); double[] a = new double[length]; diff --git a/src/Microsoft.ML.FastTree/Utils/VectorUtils.cs b/src/Microsoft.ML.FastTree/Utils/VectorUtils.cs index fdf695b174..b804523cc1 100644 --- a/src/Microsoft.ML.FastTree/Utils/VectorUtils.cs +++ b/src/Microsoft.ML.FastTree/Utils/VectorUtils.cs @@ -17,7 +17,7 @@ public static double GetVectorSize(double[] vector) } // Normalizes the vector to have size of 1 - public unsafe static void NormalizeVectorSize(double[] vector) + public static unsafe void NormalizeVectorSize(double[] vector) { double size = GetVectorSize(vector); int length = vector.Length; @@ -34,7 +34,7 @@ public unsafe static void NormalizeVectorSize(double[] vector) } // Center vector to have mean = 0 - public unsafe static void CenterVector(double[] vector) + public static unsafe void CenterVector(double[] vector) { double mean = GetMean(vector); int length = vector.Length; @@ -51,7 +51,7 @@ public unsafe static void CenterVector(double[] vector) } // Normalizes the vector to have mean = 0 and std = 1 - public unsafe static void NormalizeVector(double[] vector) + public static unsafe void NormalizeVector(double[] vector) { double mean = GetMean(vector); double std = GetStandardDeviation(vector, mean); @@ -59,7 +59,7 @@ public unsafe static void NormalizeVector(double[] vector) } // Normalizes the vector to have mean = 0 and std = 1 - public unsafe static void NormalizeVector(double[] vector, double mean, double std) + public static unsafe void NormalizeVector(double[] vector, double mean, double std) { int length = vector.Length; unsafe @@ -74,17 +74,17 @@ public unsafe static void NormalizeVector(double[] vector, double mean, double s } } - public unsafe static double GetDotProduct(double[] vector1, double[] vector2) + public static unsafe double GetDotProduct(double[] vector1, double[] vector2) { return GetDotProduct(vector1, vector2, vector1.Length); } - public unsafe static double GetDotProduct(float[] vector1, float[] vector2) + public static unsafe double GetDotProduct(float[] vector1, float[] vector2) { return GetDotProduct(vector1, vector2, vector1.Length); } - public unsafe static double GetDotProduct(double[] vector1, double[] vector2, int length) + public static unsafe double GetDotProduct(double[] vector1, double[] vector2, int length) { double product = 0; unsafe @@ -101,7 +101,7 @@ public unsafe static double GetDotProduct(double[] vector1, double[] vector2, in return product; } - public unsafe static double GetDotProduct(float[] vector1, float[] vector2, int length) + public static unsafe double GetDotProduct(float[] vector1, float[] vector2, int length) { double product = 0; unsafe @@ -118,7 +118,7 @@ public unsafe static double GetDotProduct(float[] vector1, float[] vector2, int return product; } - public unsafe static double GetMean(double[] vector) + public static unsafe double GetMean(double[] vector) { double sum = 0; int length = vector.Length; @@ -135,7 +135,7 @@ public unsafe static double GetMean(double[] vector) return sum / length; } - public unsafe static double GetMean(float[] vector) + public static unsafe double GetMean(float[] vector) { double sum = 0; int length = vector.Length; @@ -157,7 +157,7 @@ public static double GetStandardDeviation(double[] vector) return GetStandardDeviation(vector, GetMean(vector)); } - public unsafe static double GetStandardDeviation(double[] vector, double mean) + public static unsafe double GetStandardDeviation(double[] vector, double mean) { double sum = 0; int length = vector.Length; @@ -176,7 +176,7 @@ public unsafe static double GetStandardDeviation(double[] vector, double mean) return Math.Sqrt(sum / length); } - public unsafe static int GetIndexOfMax(double[] vector) + public static unsafe int GetIndexOfMax(double[] vector) { int length = vector.Length; double max = vector[0]; @@ -199,7 +199,7 @@ public unsafe static int GetIndexOfMax(double[] vector) } // Subtracts the second vector from the first one (vector1[i] -= vector2[i]) - public unsafe static void SubtractInPlace(double[] vector1, double[] vector2) + public static unsafe void SubtractInPlace(double[] vector1, double[] vector2) { int length = vector1.Length; unsafe @@ -215,7 +215,7 @@ public unsafe static void SubtractInPlace(double[] vector1, double[] vector2) } } - public unsafe static double[] Subtract(double[] vector1, double[] vector2) + public static unsafe double[] Subtract(double[] vector1, double[] vector2) { int length = vector1.Length; double[] result = new double[length]; @@ -235,7 +235,7 @@ public unsafe static double[] Subtract(double[] vector1, double[] vector2) } // Subtracts the second vector from the first one (vector1[i] += vector2[i]) - public unsafe static void AddInPlace(double[] vector1, double[] vector2) + public static unsafe void AddInPlace(double[] vector1, double[] vector2) { int length = vector1.Length; unsafe @@ -252,7 +252,7 @@ public unsafe static void AddInPlace(double[] vector1, double[] vector2) } // Mutiplies the second vector from the first one (vector1[i] /= val) - public unsafe static void MutiplyInPlace(double[] vector, double val) + public static unsafe void MutiplyInPlace(double[] vector, double val) { int length = vector.Length; unsafe @@ -268,7 +268,7 @@ public unsafe static void MutiplyInPlace(double[] vector, double val) } // Divides the second vector from the first one (vector1[i] /= val) - public unsafe static void DivideInPlace(double[] vector, double val) + public static unsafe void DivideInPlace(double[] vector, double val) { int length = vector.Length; unsafe @@ -284,7 +284,7 @@ public unsafe static void DivideInPlace(double[] vector, double val) } // Divides the second vector from the first one (vector1[i] /= val) - public unsafe static void DivideInPlace(float[] vector, float val) + public static unsafe void DivideInPlace(float[] vector, float val) { int length = vector.Length; unsafe @@ -299,7 +299,7 @@ public unsafe static void DivideInPlace(float[] vector, float val) } } - public unsafe static double GetEuclideanDistance(double[] vector1, double[] vector2) + public static unsafe double GetEuclideanDistance(double[] vector1, double[] vector2) { double sum = 0; double diff; diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs index 3bab791fe1..1ab65a8cd5 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs @@ -147,7 +147,7 @@ private KMeansPredictor TrainCore(IChannel ch, RoleMappedData data, int dimensio var cursorFactory = new FeatureFloatVectorCursor.Factory(data, CursOpt.Features | CursOpt.Id | CursOpt.Weight); // REVIEW: It would be nice to extract these out into subcomponents in the future. We should // revisit and even consider breaking these all into individual KMeans-flavored trainers, they - // all produce a valid set of output centroids with various trade-offs in runtime (with perhaps + // all produce a valid set of output centroids with various trade-offs in runtime (with perhaps // random initialization creating a set that's not terribly useful.) They could also be extended to // pay attention to their incoming set of centroids and incrementally train. if (_initAlgorithm == InitAlgorithm.KMeansPlusPlus) @@ -346,13 +346,13 @@ public static void Initialize( /// /// An instance of this class is used by SharedStates in YinYangTrainer - /// and KMeansBarBarInitialization. It effectively bounds MaxInstancesToAccelerate and + /// and KMeansBarBarInitialization. It effectively bounds MaxInstancesToAccelerate and /// initializes RowIndexGetter. /// internal sealed class KMeansAcceleratedRowMap { // Retrieves the row's index for per-instance data. If the - // row is not assigned an index (it occurred after 'maxInstancesToAccelerate') + // row is not assigned an index (it occurred after 'maxInstancesToAccelerate') // or we are not accelerating then this returns -1. public readonly KMeansUtils.RowIndexGetter RowIndexGetter; @@ -434,14 +434,14 @@ internal static class KMeansBarBarInitialization /// /// Data for optimizing KMeans|| initialization. Very similar to SharedState class /// For every instance, there is a space for the best weight and best cluster computed. - /// + /// /// In this class, new clusters mean the clusters that were added to the cluster set - /// in the previous round of KMeans|| and old clusters are the rest of them (the ones + /// in the previous round of KMeans|| and old clusters are the rest of them (the ones /// that were added in the rounds before the previous one). - /// + /// /// In every round of KMeans||, numSamplesPerRound new clusters are added to the set of clusters. - /// There are 'numRounds' number of rounds. We compute and store the distance of each new - /// cluster from every round to all of the previous clusters and use it + /// There are 'numRounds' number of rounds. We compute and store the distance of each new + /// cluster from every round to all of the previous clusters and use it /// to avoid unnecessary computation by applying the triangle inequality. /// private sealed class SharedState @@ -453,12 +453,12 @@ private sealed class SharedState // Note that this array is only allocated for MaxInstancesToAccelerate elements. private readonly int[] _bestCluster; - // _bestWeight holds the weight of instance x to _bestCluster[x] where weight(x) = dist(x, _bestCluster[x])^2 - norm(x)^2. + // _bestWeight holds the weight of instance x to _bestCluster[x] where weight(x) = dist(x, _bestCluster[x])^2 - norm(x)^2. // Note that this array is only allocated for MaxInstancesToAccelerate elements. private readonly Float[] _bestWeight; // The distance of each newly added cluster from the previous round to every old cluster - // the first dimension of this array is the size of numSamplesPerRound + // the first dimension of this array is the size of numSamplesPerRound // and the second dimension is the size of numRounds * numSamplesPerRound. // _clusterDistances[i][j] = dist(cluster[i+clusterPrevCount], cluster[j]) // where clusterPrevCount-1 is the last index of the old clusters @@ -510,8 +510,8 @@ public Float GetBestWeight(int idx) /// /// When assigning an accelerated row to a cluster, we store away the weight /// to its closest cluster, as well as the identity of the new - /// closest cluster. Note that bestWeight can be negative since it is - /// corresponding to the weight of a distance which does not have + /// closest cluster. Note that bestWeight can be negative since it is + /// corresponding to the weight of a distance which does not have /// the L2 norm of the point itself. /// public void SetInstanceCluster(int n, Float bestWeight, int bestCluster) @@ -565,7 +565,7 @@ public bool CanWeightComputationBeAvoided(Float instanceDistanceToBestOldCluster // Use triangle inequality to evaluate whether weight computation can be avoided // dist(x,cNew) + dist(x,cOld) > dist(cOld,cNew) => // dist(x,cNew) > dist(cOld,cNew) - dist(x,cOld) => - // If dist(cOld,cNew) - dist(x,cOld) > dist(x,cOld), then dist(x,cNew) > dist(x,cOld). Therefore it is + // If dist(cOld,cNew) - dist(x,cOld) > dist(x,cOld), then dist(x,cNew) > dist(x,cOld). Therefore it is // not necessary to compute dist(x,cNew). if (distanceBetweenOldAndNewClusters - instanceDistanceToBestOldCluster > instanceDistanceToBestOldCluster) return true; @@ -577,7 +577,7 @@ public bool CanWeightComputationBeAvoided(Float instanceDistanceToBestOldCluster /// /// This function finds the best cluster and the best weight for an instance using /// smart triangle inequality to avoid unnecessary weight computations. - /// + /// /// Note that is used to avoid the storing the new cluster in /// final round. After the final round, best cluster information will be ignored. /// @@ -649,7 +649,7 @@ private static void FindBestCluster(ref VBuffer point, int pointRowIndex, } /// - /// This method computes the memory requirement for _clusterDistances in SharedState (clusterBytes) and + /// This method computes the memory requirement for _clusterDistances in SharedState (clusterBytes) and /// the maximum number of instances whose weight to the closest cluster can be memorized in order to avoid /// recomputation later. /// @@ -678,7 +678,7 @@ private static void ComputeAccelerationMemoryRequirement(long accelMemBudgetMb, /// /// Uses memory in initializationState to cache distances and avoids unnecessary distance computations /// akin to YinYang-KMeans paper. - /// + /// /// Everywhere in this function, weight of an instance x from a cluster c means weight(x,c) = dist(x,c)^2-norm(x)^2. /// We store weight in most cases to avoid unnecessary computation of norm(x). /// @@ -1019,7 +1019,7 @@ public void UpdateClusterAssignment(bool firstIteration, ref VBuffer feat { // update the cachedSum as the instance moves from (previous) bestCluster[n] to cluster VectorUtils.Add(ref features, ref CachedSum[cluster]); - // There doesnt seem to be a Subtract function that does a -= b, so doing a += (-1 * b) + // There doesnt seem to be a Subtract function that does a -= b, so doing a += (-1 * b) VectorUtils.AddMult(ref features, -1, ref CachedSum[previousCluster]); NumChanged++; } @@ -1151,7 +1151,7 @@ private sealed class SharedState // max value of delta[i] for 0 <= i < _k public Float DeltaMax; - // Per instance structures + // Per instance structures public int GetBestCluster(int idx) { @@ -1180,7 +1180,7 @@ public SharedState(FeatureFloatVectorCursor.Factory factory, IChannel ch, long b if (MaxInstancesToAccelerate > 0) { - // allocate data structures + // allocate data structures Delta = new Float[k]; _bestCluster = new int[MaxInstancesToAccelerate]; @@ -1478,7 +1478,7 @@ public struct RowStats /// data set with a probability of numSamples/N * weight/(sum(weight)). Buffer /// is sized to the number of threads plus one and stores the minheaps needed to /// perform the per-thread reservior samples. - /// + /// /// This method assumes that the numSamples is much smaller than the full dataset as /// it expects to be able to sample numSamples * numThreads. /// @@ -1514,13 +1514,13 @@ public static RowStats ParallelWeightedReservoirSample( // We use distance as a proxy for 'is the same point'. By excluding // all points that lie within a very small distance of our current set of // centroids we force the algorithm to explore more broadly and avoid creating a - // set of centroids containing the same, or very close to the same, point + // set of centroids containing the same, or very close to the same, point // more than once. Float sameClusterEpsilon = (Float)1e-15; Float weight = weightFn(ref point, pointRowIndex); - // If numeric instability has forced it to zero, then we bound it to epsilon to + // If numeric instability has forced it to zero, then we bound it to epsilon to // keep the key valid and avoid NaN, (although the math does tend to work out regardless: // 1 / 0 => Inf, base ^ Inf => 0, when |base| < 1) if (weight == 0) diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs index 0612135ce3..f31f67a08d 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs @@ -36,7 +36,7 @@ public interface IBoosterParameter } /// - /// Parameters names comes from LightGBM library. + /// Parameters names comes from LightGBM library. /// See https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst. /// public sealed class LightGbmArguments : LearnerInputBaseWithGroupId diff --git a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs index 7ce9f42b10..f788b4feab 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs @@ -27,7 +27,7 @@ public sealed class LightGbmBinaryPredictor : FastTreePredictionWrapper { public const string LoaderSignature = "LightGBMBinaryExec"; public const string RegistrationName = "LightGBMBinaryPredictor"; - + private static VersionInfo GetVersionInfo() { // REVIEW: can we decouple the version from FastTree predictor version ? @@ -129,9 +129,9 @@ protected override void CheckAndUpdateParametersBeforeTraining(IChannel ch, Role public static partial class LightGbm { [TlcModule.EntryPoint( - Name = "Trainers.LightGbmBinaryClassifier", + Name = "Trainers.LightGbmBinaryClassifier", Desc = LightGbmBinaryTrainer.Summary, - UserName = LightGbmBinaryTrainer.UserName, + UserName = LightGbmBinaryTrainer.UserName, ShortName = LightGbmBinaryTrainer.ShortName, XmlInclude = new[] { @"", @""})] diff --git a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs index 0534f0d660..ff44139877 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs @@ -182,9 +182,9 @@ protected override void CheckAndUpdateParametersBeforeTraining(IChannel ch, Role public static partial class LightGbm { [TlcModule.EntryPoint( - Name = "Trainers.LightGbmClassifier", - Desc = "Train a LightGBM multi class model.", - UserName = LightGbmMulticlassTrainer.Summary, + Name = "Trainers.LightGbmClassifier", + Desc = "Train a LightGBM multi class model.", + UserName = LightGbmMulticlassTrainer.Summary, ShortName = LightGbmMulticlassTrainer.ShortName, XmlInclude = new[] { @"", @""})] diff --git a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs index 3bafb09ab3..3fe4628182 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs @@ -127,9 +127,9 @@ protected override void CheckAndUpdateParametersBeforeTraining(IChannel ch, Role /// public static partial class LightGbm { - [TlcModule.EntryPoint(Name = "Trainers.LightGbmRanker", - Desc = "Train a LightGBM ranking model.", - UserName = LightGbmRankingTrainer.UserName, + [TlcModule.EntryPoint(Name = "Trainers.LightGbmRanker", + Desc = "Train a LightGBM ranking model.", + UserName = LightGbmRankingTrainer.UserName, ShortName = LightGbmRankingTrainer.ShortName, XmlInclude = new[] { @"", @""})] diff --git a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs index f1b8850a72..0011a8d8e6 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs @@ -119,9 +119,9 @@ protected override void CheckAndUpdateParametersBeforeTraining(IChannel ch, Role /// public static partial class LightGbm { - [TlcModule.EntryPoint(Name = "Trainers.LightGbmRegressor", - Desc = LightGbmRegressorTrainer.Summary, - UserName = LightGbmRegressorTrainer.UserNameValue, + [TlcModule.EntryPoint(Name = "Trainers.LightGbmRegressor", + Desc = LightGbmRegressorTrainer.Summary, + UserName = LightGbmRegressorTrainer.UserNameValue, ShortName = LightGbmRegressorTrainer.ShortName, XmlInclude = new[] { @"", @""})] diff --git a/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs b/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs index 83e0f7803b..eae632eb24 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs @@ -40,9 +40,9 @@ private sealed class CategoricalMetaData private protected readonly LightGbmArguments Args; /// - /// Stores argumments as objects to convert them to invariant string type in the end so that - /// the code is culture agnostic. When retrieving key value from this dictionary as string - /// please convert to string invariant by string.Format(CultureInfo.InvariantCulture, "{0}", Option[key]). + /// Stores argumments as objects to convert them to invariant string type in the end so that + /// the code is culture agnostic. When retrieving key value from this dictionary as string + /// please convert to string invariant by string.Format(CultureInfo.InvariantCulture, "{0}", Option[key]). /// private protected readonly Dictionary Options; private protected readonly IParallel ParallelTraining; @@ -467,7 +467,7 @@ private void GetFeatureValueDense(IChannel ch, FloatLabelCursor cursor, Categori hotIdx = j; } } - // All-Zero is category 0. + // All-Zero is category 0. fv = hotIdx - catMetaData.CategoricalBoudaries[i] + 1; } featureValues[i] = fv; @@ -479,8 +479,8 @@ private void GetFeatureValueDense(IChannel ch, FloatLabelCursor cursor, Categori } } - private void GetFeatureValueSparse(IChannel ch, FloatLabelCursor cursor, - CategoricalMetaData catMetaData, IRandom rand, out int[] indices, + private void GetFeatureValueSparse(IChannel ch, FloatLabelCursor cursor, + CategoricalMetaData catMetaData, IRandom rand, out int[] indices, out float[] featureValues, out int cnt) { if (catMetaData.CategoricalBoudaries != null) @@ -680,7 +680,7 @@ private void LoadDataset(IChannel ch, FloatLabelCursor.Factory factory, Dataset // Need push rows to LightGBM. if (numElem + cursor.Features.Count > features.Length) { - // Mini batch size is greater than size of one row. + // Mini batch size is greater than size of one row. // So, at least we have the data of one row. ch.Assert(curRowCount > 0); Utils.EnsureSize(ref indptr, curRowCount + 1); diff --git a/src/Microsoft.ML.LightGBM/WrappedLightGbmDataset.cs b/src/Microsoft.ML.LightGBM/WrappedLightGbmDataset.cs index 991aadfb14..4cb8637e2d 100644 --- a/src/Microsoft.ML.LightGBM/WrappedLightGbmDataset.cs +++ b/src/Microsoft.ML.LightGBM/WrappedLightGbmDataset.cs @@ -137,7 +137,7 @@ public unsafe void SetWeights(float[] weights) if (weights != null) { Contracts.Assert(weights.Length == GetNumRows()); - // Skip SetWeights if all weights are same. + // Skip SetWeights if all weights are same. bool allSame = true; for (int i = 1; i < weights.Length; ++i) { diff --git a/src/Microsoft.ML.LightGBM/WrappedLightGbmInterface.cs b/src/Microsoft.ML.LightGBM/WrappedLightGbmInterface.cs index eec00d9bd1..4abdb47354 100644 --- a/src/Microsoft.ML.LightGBM/WrappedLightGbmInterface.cs +++ b/src/Microsoft.ML.LightGBM/WrappedLightGbmInterface.cs @@ -46,7 +46,7 @@ public static extern int FreeArray( IntPtr ret, int type); - #endregion + #endregion #region API ERROR @@ -149,7 +149,7 @@ public static extern int BoosterCreate(IntPtr trainset, public static extern int BoosterAddValidData(IntPtr handle, IntPtr validset); [DllImport(DllName, EntryPoint = "LGBM_BoosterSaveModelToString", CallingConvention = CallingConvention.StdCall)] - public unsafe static extern int BoosterSaveModelToString(IntPtr handle, + public static extern unsafe int BoosterSaveModelToString(IntPtr handle, int numIteration, int bufferLen, ref int outLen, @@ -166,7 +166,7 @@ public unsafe static extern int BoosterSaveModelToString(IntPtr handle, public static extern int BoosterGetEvalCounts(IntPtr handle, ref int outLen); [DllImport(DllName, EntryPoint = "LGBM_BoosterGetEval", CallingConvention = CallingConvention.StdCall)] - public unsafe static extern int BoosterGetEval(IntPtr handle, int dataIdx, + public static extern unsafe int BoosterGetEval(IntPtr handle, int dataIdx, ref int outLen, double* outResult); #endregion diff --git a/src/Microsoft.ML.Maml/HelpCommand.cs b/src/Microsoft.ML.Maml/HelpCommand.cs index e0941f5a93..a815ffc0e5 100644 --- a/src/Microsoft.ML.Maml/HelpCommand.cs +++ b/src/Microsoft.ML.Maml/HelpCommand.cs @@ -344,7 +344,7 @@ private void ShowFormattedSummary(IndentingTextWriter writer, string summary, in // REVIEW: should we replace consecutive spaces with a single space as a preprocessing step? int screenWidth = (columns ?? CmdParser.GetConsoleWindowWidth()) - 1; - // GetConsoleWindowWidth returns 0 if command redirection operator is used + // GetConsoleWindowWidth returns 0 if command redirection operator is used if (screenWidth <= 0) screenWidth = 80; diff --git a/src/Microsoft.ML.Onnx/OnnxContextImpl.cs b/src/Microsoft.ML.Onnx/OnnxContextImpl.cs index f37a1ea557..5341b35d55 100644 --- a/src/Microsoft.ML.Onnx/OnnxContextImpl.cs +++ b/src/Microsoft.ML.Onnx/OnnxContextImpl.cs @@ -57,8 +57,8 @@ public OnnxContextImpl(IHostEnvironment env, string name, string producerName, public override bool ContainsColumn(string colName) => _columnNameMap.ContainsKey(colName); /// - /// Stops tracking a column. If removeVariable is true then it also removes the - /// variable associated with it, this is useful in the event where an output variable is + /// Stops tracking a column. If removeVariable is true then it also removes the + /// variable associated with it, this is useful in the event where an output variable is /// created before realizing the transform cannot actually save as ONNX. /// /// IDataView column name to stop tracking @@ -82,7 +82,7 @@ public override void RemoveColumn(string colName, bool removeVariable) } /// - /// Removes an ONNX variable. If removeColumn is true then it also removes the + /// Removes an ONNX variable. If removeColumn is true then it also removes the /// IDataView column associated with it. /// /// ONNX variable to remove. @@ -165,7 +165,7 @@ private string GetUniqueName(string prefix, Func pred) } /// - /// Retrieves the variable name that maps to the IDataView column name at a + /// Retrieves the variable name that maps to the IDataView column name at a /// given point in the pipeline execution. /// /// Column Name mapping. @@ -178,7 +178,7 @@ public override string GetVariableName(string colName) } /// - /// Retrieves the variable name that maps to the IDataView column name at a + /// Retrieves the variable name that maps to the IDataView column name at a /// given point in the pipeline execution. /// /// Column Name mapping. diff --git a/src/Microsoft.ML.PCA/PcaTrainer.cs b/src/Microsoft.ML.PCA/PcaTrainer.cs index f90b405c4e..5814d840ca 100644 --- a/src/Microsoft.ML.PCA/PcaTrainer.cs +++ b/src/Microsoft.ML.PCA/PcaTrainer.cs @@ -290,7 +290,7 @@ public static CommonOutputs.AnomalyDetectionOutput TrainPcaAnomaly(IHostEnvironm // - For each new instance, it computes the norm difference between the raw feature vector and the projected feature on that subspace. // - - If the error is close to 0, the instance is considered normal (non-anomaly). // REVIEW: move the predictor to a different file and fold EigenUtils.cs to this file. - // REVIEW: Include the above detail in the XML documentation file. + // REVIEW: Include the above detail in the XML documentation file. /// public sealed class PcaPredictor : PredictorBase, IValueMapper, diff --git a/src/Microsoft.ML.PCA/PcaTransform.cs b/src/Microsoft.ML.PCA/PcaTransform.cs index 6efbead226..6de130532e 100644 --- a/src/Microsoft.ML.PCA/PcaTransform.cs +++ b/src/Microsoft.ML.PCA/PcaTransform.cs @@ -337,7 +337,7 @@ private void Train(Arguments args, TransformInfo[] transformInfos, IDataView tra for (int iinfo = 0; iinfo < transformInfos.Length; iinfo++) { - //Orthonormalize Y in-place using stabilized Gram Schmidt algorithm + //Orthonormalize Y in-place using stabilized Gram Schmidt algorithm //Ref: http://en.wikipedia.org/wiki/Gram-Schmidt#Algorithm for (var i = 0; i < oversampledRank[iinfo]; ++i) { @@ -537,10 +537,10 @@ private static void TransformFeatures(IExceptionContext ectx, ref VBuffer dst = new VBuffer(transformInfo.Rank, values, dst.Indices); } - [TlcModule.EntryPoint(Name = "Transforms.PcaCalculator", + [TlcModule.EntryPoint(Name = "Transforms.PcaCalculator", Desc = Summary, - UserName = UserName, - ShortName = ShortName, + UserName = UserName, + ShortName = ShortName, XmlInclude = new[] { @"", @""})] public static CommonOutputs.TransformOutput Calculate(IHostEnvironment env, Arguments input) diff --git a/src/Microsoft.ML.Parquet/ParquetLoader.cs b/src/Microsoft.ML.Parquet/ParquetLoader.cs index 2def7006d2..503debae65 100644 --- a/src/Microsoft.ML.Parquet/ParquetLoader.cs +++ b/src/Microsoft.ML.Parquet/ParquetLoader.cs @@ -33,7 +33,7 @@ namespace Microsoft.ML.Runtime.Data public sealed class ParquetLoader : IDataLoader, IDisposable { /// - /// A Column is a singular representation that consolidates all the related column chunks in the + /// A Column is a singular representation that consolidates all the related column chunks in the /// Parquet file. Information stored within the Column includes its name, raw type read from Parquet, /// its corresponding ColumnType, and index. /// Complex columns in Parquet like structs, maps, and lists are flattened into multiple columns. diff --git a/src/Microsoft.ML.PipelineInference/AutoInference.cs b/src/Microsoft.ML.PipelineInference/AutoInference.cs index 6ec2894895..54e95e595d 100644 --- a/src/Microsoft.ML.PipelineInference/AutoInference.cs +++ b/src/Microsoft.ML.PipelineInference/AutoInference.cs @@ -62,22 +62,24 @@ public class EntryPointGraphDef /// /// Get the name of the variable asssigned to the Data or Training Data input, based on what is the first node of the subgraph. - /// A better way to do this would be with a ICanBeSubGraphFirstNode common interface between ITransformInput and ITrainerInputs - /// and a custom deserializer. + /// A better way to do this would be with a ICanBeSubGraphFirstNode common interface between ITransformInput and ITrainerInputs + /// and a custom deserializer. /// public string GetSubgraphFirstNodeDataVarName(IExceptionContext ectx) { var nodes = Graph.GetNodes(); - ectx.CheckValue(nodes, nameof(nodes), "Empty Subgraph"); - ectx.CheckValue(nodes[0], nameof(nodes), "Empty Subgraph"); - ectx.CheckValue(nodes[0][FieldNames.Inputs], "Inputs", "Empty subgraph node inputs."); + ectx.Check(nodes != null || nodes.Count == 0, "Empty Subgraph"); + ectx.Check(nodes[0] != null, "Subgraph's first note is empty"); + ectx.Check(nodes[0][FieldNames.Inputs] != null, "Empty subgraph node inputs."); string variableName; if (!GetDataVariableName(ectx, "Data", nodes[0][FieldNames.Inputs], out variableName)) GetDataVariableName(ectx, "TrainingData", nodes[0][FieldNames.Inputs], out variableName); - ectx.CheckNonEmpty(variableName, nameof(variableName), "Subgraph needs to start with an ITransformInput, or an ITrainerInput. Check your subgraph, or account for variation of the name of the Data input here."); + ectx.CheckNonEmpty(variableName, nameof(variableName), "Subgraph needs to start with an" + + nameof(CommonInputs.ITransformInput) + ", or an " + nameof(CommonInputs.ITrainerInput) + + ". Check your subgraph, or account for variation of the name of the Data input here."); return variableName; } @@ -157,7 +159,7 @@ public sealed class Arguments : ISupportAutoMlStateFactory public AutoMlMlState(IHostEnvironment env, Arguments args) : this(env, - PipelineSweeperSupportedMetrics.GetSupportedMetric(args.Metric), + PipelineSweeperSupportedMetrics.GetSupportedMetric(args.Metric), args.Engine.CreateComponent(env), args.TerminatorArgs.CreateComponent(env), args.TrainerKind, requestedLearners: args.RequestedLearners) { @@ -462,7 +464,7 @@ public void ClearEvaluatedPipelines() /// /// The InferPipelines methods are just public portals to the internal function that handle different /// types of data being passed in: training IDataView, path to training file, or train and test files. - /// + /// public static AutoMlMlState InferPipelines(IHostEnvironment env, PipelineOptimizerBase autoMlEngine, IDataView trainData, IDataView testData, int numTransformLevels, int batchSize, SupportedMetric metric, out PipelinePattern bestPipeline, ITerminator terminator, MacroUtils.TrainerKinds trainerKind) @@ -483,7 +485,7 @@ public static AutoMlMlState InferPipelines(IHostEnvironment env, PipelineOptimiz { Contracts.CheckValue(env, nameof(env)); - // REVIEW: Should be able to infer schema by itself, without having to + // REVIEW: Should be able to infer schema by itself, without having to // infer recipes. Look into this. // Set loader settings through inference RecipeInference.InferRecipesFromData(env, trainDataPath, schemaDefinitionFile, diff --git a/src/Microsoft.ML.PipelineInference/AutoMlEngines/DefaultsEngine.cs b/src/Microsoft.ML.PipelineInference/AutoMlEngines/DefaultsEngine.cs index 19583cef8c..929dec86cf 100644 --- a/src/Microsoft.ML.PipelineInference/AutoMlEngines/DefaultsEngine.cs +++ b/src/Microsoft.ML.PipelineInference/AutoMlEngines/DefaultsEngine.cs @@ -55,7 +55,7 @@ public override PipelinePattern[] GetNextCandidates(IEnumerable do { // Make sure transforms set is valid. Repeat until passes verifier. - pipeline = new PipelinePattern(SampleTransforms(out var transformsBitMask), + pipeline = new PipelinePattern(SampleTransforms(out var transformsBitMask), learner, "", Env); valid = PipelineVerifier(pipeline, transformsBitMask); count++; diff --git a/src/Microsoft.ML.PipelineInference/AutoMlEngines/RocketEngine.cs b/src/Microsoft.ML.PipelineInference/AutoMlEngines/RocketEngine.cs index f06fa759e8..867c53053e 100644 --- a/src/Microsoft.ML.PipelineInference/AutoMlEngines/RocketEngine.cs +++ b/src/Microsoft.ML.PipelineInference/AutoMlEngines/RocketEngine.cs @@ -165,7 +165,7 @@ private TransformInference.SuggestedTransform[] SampleTransforms(RecipeInference } } - // Take average mass as weight, and take convex combination of + // Take average mass as weight, and take convex combination of // learner-specific weight and unconditioned weight. allWeight /= allCounts > 0 ? allCounts : 1; learnerWeight /= learnerCounts > 0 ? learnerCounts : 1; @@ -182,9 +182,9 @@ private TransformInference.SuggestedTransform[] SampleTransforms(RecipeInference sampledTransforms.AddRange(remainingAvailableTransforms.Where(t => AutoMlUtils.AtomicGroupPresent(mask, t.AtomicGroupId))); - // Add final features concat transform. NOTE: computed bitmask should always - // exclude the final features concat. If we forget to exclude that one, will - // cause an error in verification, since it isn't included in the original + // Add final features concat transform. NOTE: computed bitmask should always + // exclude the final features concat. If we forget to exclude that one, will + // cause an error in verification, since it isn't included in the original // dependency mapping (i.e., its level isn't in the dictionary). sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData, DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms, DataRoles)); @@ -217,7 +217,7 @@ public override PipelinePattern[] GetNextCandidates(IEnumerable var remainingNum = Math.Min(numStageOneTrials - prevCandidates.Length, numCandidates); if (remainingNum < 1) { - // Select top k learners, update stage, then get requested + // Select top k learners, update stage, then get requested // number of candidates, using second stage logic. UpdateLearners(GetTopLearners(prevCandidates)); _currentStage++; @@ -295,10 +295,10 @@ private PipelinePattern[] NextCandidates(PipelinePattern[] history, int numCandi AutoMlUtils.PopulateSweepableParams(learner); do - { // Make sure transforms set is valid and have not seen pipeline before. + { // Make sure transforms set is valid and have not seen pipeline before. // Repeat until passes or runs out of chances. pipeline = new PipelinePattern( - SampleTransforms(learner, history, out var transformsBitMask, uniformRandomTransforms), + SampleTransforms(learner, history, out var transformsBitMask, uniformRandomTransforms), learner, "", Env); hashKey = GetHashKey(transformsBitMask, learner); valid = PipelineVerifier(pipeline, transformsBitMask) && !VisitedPipelines.Contains(hashKey); diff --git a/src/Microsoft.ML.PipelineInference/AutoMlEngines/UniformRandomEngine.cs b/src/Microsoft.ML.PipelineInference/AutoMlEngines/UniformRandomEngine.cs index 23afce66ff..2ad0137fe9 100644 --- a/src/Microsoft.ML.PipelineInference/AutoMlEngines/UniformRandomEngine.cs +++ b/src/Microsoft.ML.PipelineInference/AutoMlEngines/UniformRandomEngine.cs @@ -14,9 +14,9 @@ namespace Microsoft.ML.Runtime.PipelineInference { /// - /// Example class of an autoML engine (a pipeline optimizer) that simply tries random enumeration. - /// If we use a third-party solution for autoML, we can just implement a new wrapper for it as a - /// PipelineOptimizerBase, and use our existing autoML body code to take advantage of it. This design + /// Example class of an autoML engine (a pipeline optimizer) that simply tries random enumeration. + /// If we use a third-party solution for autoML, we can just implement a new wrapper for it as a + /// PipelineOptimizerBase, and use our existing autoML body code to take advantage of it. This design /// should allow for easy development of new autoML methods. /// public sealed class UniformRandomEngine : PipelineOptimizerBase diff --git a/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs b/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs index e0bf7dbcca..a8459c5b3c 100644 --- a/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs +++ b/src/Microsoft.ML.PipelineInference/AutoMlUtils.cs @@ -67,8 +67,8 @@ private static T CloneEvaluatorInstance(T evaler) /// /// Using the dependencyMapping and included transforms, determines whether every - /// transform present only consumes columns produced by a lower- or same-level transform, - /// or existed in the original dataset. Note, a column could be produced by a + /// transform present only consumes columns produced by a lower- or same-level transform, + /// or existed in the original dataset. Note, a column could be produced by a /// transform on the same level, such as in multipart (atomic group) transforms. /// public static bool AreColumnsConsistent(TransformInference.SuggestedTransform[] includedTransforms, @@ -173,8 +173,8 @@ private static int[] GetExcludedColumnIndices(TransformInference.SuggestedTransf { List includedColumnIndices = new List(); - // For every column, see if either present in initial dataset, or - // produced by a transform used in current pipeline. + // For every column, see if either present in initial dataset, or + // produced by a transform used in current pipeline. for (int columnIndex = 0; columnIndex < dataSample.Schema.ColumnCount; columnIndex++) { // Create ColumnInfo object for indexing dictionary @@ -185,7 +185,7 @@ private static int[] GetExcludedColumnIndices(TransformInference.SuggestedTransf IsHidden = dataSample.Schema.IsHidden(columnIndex) }; - // Exclude all hidden and non-numeric columns + // Exclude all hidden and non-numeric columns if (colInfo.IsHidden || !colInfo.ItemType.IsNumber) continue; @@ -429,7 +429,7 @@ private static void SetValue(PropertyInfo pi, IComparable value, object entryPoi /// /// Updates properties of entryPointObj instance based on the values in sweepParams - /// + /// public static bool UpdateProperties(object entryPointObj, TlcModule.SweepableParamAttribute[] sweepParams) { bool result = true; @@ -484,7 +484,7 @@ public static bool UpdateProperties(object entryPointObj, TlcModule.SweepablePar /// /// Updates properties of entryPointObj instance based on the values in sweepParams - /// + /// public static void PopulateSweepableParams(RecipeInference.SuggestedRecipe.SuggestedLearner learner) { foreach (var param in learner.PipelineNode.SweepParams) diff --git a/src/Microsoft.ML.PipelineInference/ColumnGroupingInference.cs b/src/Microsoft.ML.PipelineInference/ColumnGroupingInference.cs index 7e1c4bfc05..36392c47ac 100644 --- a/src/Microsoft.ML.PipelineInference/ColumnGroupingInference.cs +++ b/src/Microsoft.ML.PipelineInference/ColumnGroupingInference.cs @@ -56,7 +56,7 @@ public InferenceResult(GroupingColumn[] columns) /// Group together the single-valued columns with the same type and purpose and generate column names. /// /// The host environment to use. - /// Whether the original file had a header. + /// Whether the original file had a header. /// If yes, the fields are used to generate the column /// names, otherwise they are ignored. /// The (detected) column types. diff --git a/src/Microsoft.ML.PipelineInference/DatasetFeaturesInference.cs b/src/Microsoft.ML.PipelineInference/DatasetFeaturesInference.cs index e8d40fc6c8..a3a8876c7e 100644 --- a/src/Microsoft.ML.PipelineInference/DatasetFeaturesInference.cs +++ b/src/Microsoft.ML.PipelineInference/DatasetFeaturesInference.cs @@ -14,7 +14,7 @@ namespace Microsoft.ML.Runtime.PipelineInference { /// - /// Featurization ideas inspired from: + /// Featurization ideas inspired from: /// http://aad.informatik.uni-freiburg.de/papers/15-NIPS-auto-sklearn-supplementary.pdf /// public static class DatasetFeatureInference diff --git a/src/Microsoft.ML.PipelineInference/ExperimentsGenerator.cs b/src/Microsoft.ML.PipelineInference/ExperimentsGenerator.cs index 6b184e7b61..c9621029d8 100644 --- a/src/Microsoft.ML.PipelineInference/ExperimentsGenerator.cs +++ b/src/Microsoft.ML.PipelineInference/ExperimentsGenerator.cs @@ -109,14 +109,14 @@ public static List GenerateCandidates(IHostEnvironment env, string dataFi RecipeInference.SuggestedRecipe[] recipes = RecipeInference.InferRecipesFromData(env, dataFile, schemaDefinitionFile, out predictorType, out loaderSettings, out inferenceResult); //get all the trainers for this task, and generate the initial set of candidates. - // Exclude the hidden learners, and the metalinear learners. + // Exclude the hidden learners, and the metalinear learners. var trainers = ComponentCatalog.GetAllDerivedClasses(typeof(ITrainer), predictorType).Where(cls => !cls.IsHidden); var loaderSubComponent = new SubComponent("TextLoader", loaderSettings); string loader = $" loader={loaderSubComponent}"; - // REVIEW: there are more learners than recipes atm. - // Flip looping through recipes, than through learners if the cardinality changes. + // REVIEW: there are more learners than recipes atm. + // Flip looping through recipes, than through learners if the cardinality changes. foreach (ComponentCatalog.LoadableClassInfo cl in trainers) { string learnerSettings; diff --git a/src/Microsoft.ML.PipelineInference/Interfaces/IPipelineOptimizer.cs b/src/Microsoft.ML.PipelineInference/Interfaces/IPipelineOptimizer.cs index 5fe46ec61e..00d7654c25 100644 --- a/src/Microsoft.ML.PipelineInference/Interfaces/IPipelineOptimizer.cs +++ b/src/Microsoft.ML.PipelineInference/Interfaces/IPipelineOptimizer.cs @@ -143,7 +143,7 @@ protected void SampleHyperparameters(RecipeInference.SuggestedRecipe.SuggestedLe var proposedParamSet = sweeper.ProposeSweeps(1, AutoMlUtils.ConvertToRunResults(history, isMaximizingMetric)).First(); Env.Assert(proposedParamSet != null && proposedParamSet.All(ps => hyperParams.Any(hp => hp.Name == ps.Name))); - // Associate proposed param set with learner, so that smart hyperparam + // Associate proposed param set with learner, so that smart hyperparam // sweepers (like KDO) can map them back. learner.PipelineNode.HyperSweeperParamSet = proposedParamSet; diff --git a/src/Microsoft.ML.PipelineInference/Interfaces/ITerminator.cs b/src/Microsoft.ML.PipelineInference/Interfaces/ITerminator.cs index ce51e91031..488c21fc74 100644 --- a/src/Microsoft.ML.PipelineInference/Interfaces/ITerminator.cs +++ b/src/Microsoft.ML.PipelineInference/Interfaces/ITerminator.cs @@ -7,9 +7,9 @@ namespace Microsoft.ML.Runtime.PipelineInference { /// - /// Interface defining various stopping criteria for pipeline sweeps. + /// Interface defining various stopping criteria for pipeline sweeps. /// This could include number of total iterations, compute time, - /// budget expended, etc. + /// budget expended, etc. /// public interface ITerminator { diff --git a/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs b/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs index c5c23ce675..2f70645d8b 100644 --- a/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs +++ b/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs @@ -211,7 +211,7 @@ public static CommonOutputs.MacroOutput PipelineSweep( } var autoMlState = (AutoInference.AutoMlMlState)input.State; - // The indicators are just so the macro knows those pipelines need to + // The indicators are just so the macro knows those pipelines need to // be run before performing next expansion. If we add them as inputs // to the next iteration, the next iteration cannot run until they have // their values set. Thus, indicators are needed. diff --git a/src/Microsoft.ML.PipelineInference/PipelinePattern.cs b/src/Microsoft.ML.PipelineInference/PipelinePattern.cs index fd22c4d624..02926abb04 100644 --- a/src/Microsoft.ML.PipelineInference/PipelinePattern.cs +++ b/src/Microsoft.ML.PipelineInference/PipelinePattern.cs @@ -65,8 +65,8 @@ public PipelinePattern(TransformInference.SuggestedTransform[] transforms, { // Make sure internal pipeline nodes and sweep params are cloned, not shared. // Cloning the transforms and learner rather than assigning outright - // ensures that this will be the case. Doing this here allows us to not - // worry about changing hyperparameter values in candidate pipelines + // ensures that this will be the case. Doing this here allows us to not + // worry about changing hyperparameter values in candidate pipelines // possibly overwritting other pipelines. Transforms = transforms.Select(t => t.Clone()).ToArray(); Learner = learner.Clone(); @@ -122,7 +122,7 @@ public AutoInference.EntryPointGraphDef ToEntryPointGraph(Experiment experiment /// /// This method will return some indentifying string for the pipeline, - /// based on transforms, learner, and (eventually) hyperparameters. + /// based on transforms, learner, and (eventually) hyperparameters. /// public override string ToString() => $"{Learner}+{string.Join("+", Transforms.Select(t => t.ToString()))}"; @@ -141,7 +141,7 @@ public Experiment CreateTrainTestExperiment(IDataView trainData, IDataView testD Var firstInput = new Var { VarName = graphDef.GetSubgraphFirstNodeDataVarName(_env) }; var finalOutput = graphDef.ModelOutput; - // TrainTestMacro + // TrainTestMacro var trainTestInput = new Models.TrainTestEvaluator { TransformModel = null, @@ -178,7 +178,7 @@ public Models.TrainTestEvaluator.Output AddAsTrainTest(Var trainData, var firstInput = new Var { VarName = graphDef.GetSubgraphFirstNodeDataVarName(_env) }; var finalOutput = graphDef.ModelOutput; - // TrainTestMacro + // TrainTestMacro var trainTestInput = new Models.TrainTestEvaluator { Nodes = subGraph, diff --git a/src/Microsoft.ML.PipelineInference/PurposeInference.cs b/src/Microsoft.ML.PipelineInference/PurposeInference.cs index b870b392f3..8e7c32084e 100644 --- a/src/Microsoft.ML.PipelineInference/PurposeInference.cs +++ b/src/Microsoft.ML.PipelineInference/PurposeInference.cs @@ -11,7 +11,7 @@ namespace Microsoft.ML.Runtime.PipelineInference { /// - /// Automatic inference of column purposes for the data view. + /// Automatic inference of column purposes for the data view. /// This is used in the context of text import wizard, but can be used outside as well. /// public static class PurposeInference @@ -51,10 +51,10 @@ public InferenceResult(Column[] columns) } /// - /// The design is the same as for : there's a sequence of 'experts' - /// that each look at all the columns. Every expert may or may not assign the 'answer' (suggested purpose) - /// to a column. If the expert needs some information about the column (for example, the column values), - /// this information is lazily calculated by the column object, not the expert itself, to allow the reuse + /// The design is the same as for : there's a sequence of 'experts' + /// that each look at all the columns. Every expert may or may not assign the 'answer' (suggested purpose) + /// to a column. If the expert needs some information about the column (for example, the column values), + /// this information is lazily calculated by the column object, not the expert itself, to allow the reuse /// of the same information by another expert. /// private interface IPurposeInferenceExpert diff --git a/src/Microsoft.ML.PipelineInference/TextFileContents.cs b/src/Microsoft.ML.PipelineInference/TextFileContents.cs index 6b593b2123..cdf90d350b 100644 --- a/src/Microsoft.ML.PipelineInference/TextFileContents.cs +++ b/src/Microsoft.ML.PipelineInference/TextFileContents.cs @@ -12,7 +12,7 @@ namespace Microsoft.ML.Runtime.PipelineInference { /// - /// Utilities for various heuristics against text files. + /// Utilities for various heuristics against text files. /// Currently, separator inference and column count detection. /// public static class TextFileContents @@ -42,8 +42,8 @@ public ColumnSplitResult(bool isSuccess, string separator, bool allowQuote, bool /// /// Attempt to detect text loader arguments. - /// The algorithm selects the first 'acceptable' set: the one that recognizes the same number of columns in at - /// least of the sample's lines, + /// The algorithm selects the first 'acceptable' set: the one that recognizes the same number of columns in at + /// least of the sample's lines, /// and this number of columns is more than 1. /// We sweep on separator, allow sparse and allow quote parameter. /// @@ -90,7 +90,7 @@ from _sep in separatorCandidates else { // REVIEW: May need separate messages for GUI-specific and non-specific. This component can be used - // by itself outside the GUI. + // by itself outside the GUI. ch.Info("Couldn't determine columns in the file using separators {0}. Does the input file consist of only a single column? " + "If so, in TLC GUI, please close the import wizard, and then, in the loader settings to the right, manually add a column, " + "choose a name, and set source index to 0.", diff --git a/src/Microsoft.ML.PipelineInference/TextFileSample.cs b/src/Microsoft.ML.PipelineInference/TextFileSample.cs index ffec976ee0..a2820a104d 100644 --- a/src/Microsoft.ML.PipelineInference/TextFileSample.cs +++ b/src/Microsoft.ML.PipelineInference/TextFileSample.cs @@ -18,7 +18,7 @@ public sealed class TextFileSample : IMultiStreamSource { // REVIEW: consider including multiple files via IMultiStreamSource. - // REVIEW: right now, it expects 0x0A being the trailing character of line break. + // REVIEW: right now, it expects 0x0A being the trailing character of line break. // Consider a more general implementation. private const int BufferSizeMb = 4; @@ -73,12 +73,12 @@ public TextReader OpenTextReader(int index) public long? ApproximateRowCount => _approximateRowCount; /// - /// Create a by reading multiple chunks from the file (or other source) and + /// Create a by reading multiple chunks from the file (or other source) and /// then stitching them together. The algorithm is as follows: /// 0. If the source is not seekable, revert to . /// 1. If the file length is less than 2 * , revert to . /// 2. Read first MB chunk. Determine average line length in the chunk. - /// 3. Determine how large one chunk should be, and how many chunks there should be, to end up + /// 3. Determine how large one chunk should be, and how many chunks there should be, to end up /// with * MB worth of lines. /// 4. Determine seek locations and read the chunks. /// 5. Stitch and return a . @@ -102,7 +102,7 @@ public static TextFileSample CreateFromFullFile(IHostEnvironment env, string pat Contracts.Assert(count == firstChunk.Length); if (!IsEncodingOkForSampling(firstChunk)) return CreateFromHead(path); - // REVIEW: CreateFromHead still truncates the file before the last 0x0A byte. For multi-byte encoding, + // REVIEW: CreateFromHead still truncates the file before the last 0x0A byte. For multi-byte encoding, // this might cause an unfinished string to be present in the buffer. Right now this is considered an acceptable // price to pay for parse-free processing. @@ -113,7 +113,7 @@ public static TextFileSample CreateFromFullFile(IHostEnvironment env, string pat long approximateRowCount = (long)(lineCount * fileSize * 1.0 / firstChunk.Length); var firstNewline = Array.FindIndex(firstChunk, x => x == '\n'); - // First line may be header, so we exclude it. The remaining lineCount-1 line breaks are + // First line may be header, so we exclude it. The remaining lineCount-1 line breaks are // splitting the text into lineCount lines, and the last line is actually half-size. Double averageLineLength = 2.0 * (firstChunk.Length - firstNewline) / (lineCount * 2 - 1); averageLineLength = Math.Max(averageLineLength, 3); @@ -173,9 +173,9 @@ public static TextFileSample CreateFromHead(string path) /// /// Given an array of chunks of the text file, of which the first chunk is the head, - /// this method trims incomplete lines from the beginning and end of each chunk + /// this method trims incomplete lines from the beginning and end of each chunk /// (except that it doesn't trim the beginning of the first chunk and end of last chunk if we read whole file), - /// then joins the rest together to form a final byte buffer and returns a + /// then joins the rest together to form a final byte buffer and returns a /// wrapped around it. /// /// did we read whole file @@ -213,7 +213,7 @@ private static byte[] StitchChunks(bool wholeFile, params byte[][] chunks) } /// - /// Detect whether we can auto-detect EOL characters without parsing. + /// Detect whether we can auto-detect EOL characters without parsing. /// If we do, we can cheaply sample from different file locations and trim the partial strings. /// The encodings that pass the test are UTF8 and all single-byte encodings. /// @@ -222,7 +222,7 @@ private static bool IsEncodingOkForSampling(byte[] buffer) // First check if a BOM/signature exists (sourced from http://www.unicode.org/faq/utf_bom.html#bom4) if (buffer.Length >= 4 && buffer[0] == 0x00 && buffer[1] == 0x00 && buffer[2] == 0xFE && buffer[3] == 0xFF) { - // UTF-32, big-endian + // UTF-32, big-endian return false; } if (buffer.Length >= 4 && buffer[0] == 0xFF && buffer[1] == 0xFE && buffer[2] == 0x00 && buffer[3] == 0x00) @@ -251,7 +251,7 @@ private static bool IsEncodingOkForSampling(byte[] buffer) return true; } - // No BOM/signature was found, so now we need to 'sniff' the file to see if can manually discover the encoding. + // No BOM/signature was found, so now we need to 'sniff' the file to see if can manually discover the encoding. int sniffLim = Math.Min(1000, buffer.Length); // Some text files are encoded in UTF8, but have no BOM/signature. Hence the below manually checks for a UTF8 pattern. This code is based off diff --git a/src/Microsoft.ML.PipelineInference/TransformInference.cs b/src/Microsoft.ML.PipelineInference/TransformInference.cs index d0475f8637..b636c0d058 100644 --- a/src/Microsoft.ML.PipelineInference/TransformInference.cs +++ b/src/Microsoft.ML.PipelineInference/TransformInference.cs @@ -14,12 +14,12 @@ namespace Microsoft.ML.Runtime.PipelineInference { /// /// Auto-generate set of transforms for the data view, given the purposes of specified columns. - /// - /// The design is the same as for : there's a sequence of 'experts' + /// + /// The design is the same as for : there's a sequence of 'experts' /// that each look at all the columns. Every expert may or may not suggest additional transforms. - /// If the expert needs some information about the column (for example, the column values), - /// this information is lazily calculated by the column object, not the expert itself, to allow the reuse - /// of the same information by another expert. + /// If the expert needs some information about the column (for example, the column values), + /// this information is lazily calculated by the column object, not the expert itself, to allow the reuse + /// of the same information by another expert. /// public static class TransformInference { @@ -55,7 +55,7 @@ public struct SuggestedTransform : IEquatable public TransformPipelineNode PipelineNode; // Used for grouping transforms that must occur together public int AtomicGroupId { get; set; } - // Stores which columns are consumed by this transform, + // Stores which columns are consumed by this transform, // and which are produced, at which level. public ColumnRoutingStructure RoutingStructure { get; set; } public bool AlwaysInclude { get; set; } @@ -667,9 +667,9 @@ private bool IsDictionaryOk(IntermediateColumn column, Double dataSampleFraction int total; int unique; int singletons; - // REVIEW: replace with proper Good-Turing estimation. - // REVIEW: This looks correct; cf. equation (8) of Katz S. "Estimation of Probabilities from - // Sparse Data for the Language Model Component of a Speech Recognizer" (1987), taking into account that + // REVIEW: replace with proper Good-Turing estimation. + // REVIEW: This looks correct; cf. equation (8) of Katz S. "Estimation of Probabilities from + // Sparse Data for the Language Model Component of a Speech Recognizer" (1987), taking into account that // the singleton count was estimated from a fraction of the data (and assuming the estimate is // roughly the same for the entire sample). column.GetUniqueValueCounts(out unique, out singletons, out total); @@ -1141,7 +1141,7 @@ public override IEnumerable Apply(IntermediateColumn[] colum $"Apply text featurizer transform on text features for column '{column.ColumnName}'", args, typeof(Text), new TransformPipelineNode(epInput), -1, routingStructure); } - // Concat text featurized columns into existing feature column, if transformed at least one column. + // Concat text featurized columns into existing feature column, if transformed at least one column. if (!inferenceArgs.ExcludeFeaturesConcatTransforms && featureCols.Count > 0) { yield return InferenceHelpers.GetRemainingFeatures(featureCols, columns, GetType(), IncludeFeaturesOverride); diff --git a/src/Microsoft.ML.ResultProcessor/ResultProcessor.cs b/src/Microsoft.ML.ResultProcessor/ResultProcessor.cs index 71244b6536..d516fb1d5f 100644 --- a/src/Microsoft.ML.ResultProcessor/ResultProcessor.cs +++ b/src/Microsoft.ML.ResultProcessor/ResultProcessor.cs @@ -360,10 +360,10 @@ public class ResultProcessor private const string FoldSeparatorString = "----------------------------------------------------------------------------------------"; - private readonly static Regex _rxNameValue = new Regex(@"(?.+)\s*:\s*(?\S+)", RegexOptions.Compiled); - private readonly static Regex _rxNameValueDeviation = new Regex(@"(?.+)\s*:\s*(?\S+)\s*\((?\S+)\)", RegexOptions.Compiled); - private readonly static Regex _rxTimeElapsed = new Regex(@"(?.*)\t Time elapsed\(s\): (?[\d\.]*)", RegexOptions.Compiled); - private readonly static Regex _rxMemoryUsage = new Regex(@"(?[\w]+) memory usage\(MB\): (?[\d]*)", RegexOptions.Compiled); + private static readonly Regex _rxNameValue = new Regex(@"(?.+)\s*:\s*(?\S+)", RegexOptions.Compiled); + private static readonly Regex _rxNameValueDeviation = new Regex(@"(?.+)\s*:\s*(?\S+)\s*\((?\S+)\)", RegexOptions.Compiled); + private static readonly Regex _rxTimeElapsed = new Regex(@"(?.*)\t Time elapsed\(s\): (?[\d\.]*)", RegexOptions.Compiled); + private static readonly Regex _rxMemoryUsage = new Regex(@"(?[\w]+) memory usage\(MB\): (?[\d]*)", RegexOptions.Compiled); public static bool CheckEndOfFileReached(string[] lines) { @@ -1134,7 +1134,7 @@ private static Experiment CreateVisualizationExperiment(ExperimentItemResult res /// /// Deserialize a predictor, returning as an object - /// + /// private static object Load(Stream stream) { BinaryFormatter bf = new BinaryFormatter(); diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineInterface.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineInterface.cs index b5fdbd0262..a4a2b79787 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineInterface.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineInterface.cs @@ -10,7 +10,7 @@ namespace Microsoft.ML.Runtime.FactorizationMachine { - internal unsafe static class FieldAwareFactorizationMachineInterface + internal static unsafe class FieldAwareFactorizationMachineInterface { internal const string NativePath = "FactorizationMachineNative"; public const int CbAlign = 16; @@ -22,7 +22,7 @@ private static bool Compat(AlignedArray a) return a.CbAlign == CbAlign; } - private unsafe static float* Ptr(AlignedArray a, float* p) + private static unsafe float* Ptr(AlignedArray a, float* p) { Contracts.AssertValue(a); float* q = p + a.GetBase((long)p); diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/DifferentiableFunction.cs b/src/Microsoft.ML.StandardLearners/Optimizer/DifferentiableFunction.cs index d7bccdd887..7fd292eb9d 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/DifferentiableFunction.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/DifferentiableFunction.cs @@ -23,7 +23,7 @@ namespace Microsoft.ML.Runtime.Numeric /// /// A delegate for indexed sets of functions with gradients. - /// + /// /// REVIEW: I didn't add an here, since it looks like this code is not actually /// accessed from anywhere. Maybe it should go away? /// @@ -240,7 +240,7 @@ public static Float Test(DifferentiableFunction f, ref VBuffer x, bool qu /// /// The head of the test output /// - public readonly static string Header = "Trial Numeric deriv Analytic deriv Difference Normalized"; + public static readonly string Header = "Trial Numeric deriv Analytic deriv Difference Normalized"; /// /// Tests the gradient using finite differences on each axis (appropriate for small functions) diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs b/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs index 25117f2c14..88fcd47531 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs @@ -119,14 +119,14 @@ public override Float Eval(ref VBuffer input, ref VBuffer gradient /// public abstract class OptimizerState { -#pragma warning disable TLC_GeneralName // Too annoying in this case. Consider fixing later. +#pragma warning disable MSML_GeneralName // Too annoying in this case. Consider fixing later. protected internal VBuffer _x; protected internal VBuffer _grad; protected internal VBuffer _newX; protected internal VBuffer _newGrad; protected internal VBuffer _dir; protected internal VBuffer _steepestDescDir; -#pragma warning restore TLC_GeneralName +#pragma warning restore MSML_GeneralName /// /// The dimensionality of the function diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs index 81b38e6976..04d3e2d9e4 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs @@ -231,8 +231,8 @@ protected enum MetricKind // The maximum number of dual variables SDCA intends to support. // Actual bound of training dataset size may depend on hardware limit. - // Note that currently the maximum dimension linear learners can support is about 2 billion, - // it is not clear if training a linear learner with more than 10^15 examples provides + // Note that currently the maximum dimension linear learners can support is about 2 billion, + // it is not clear if training a linear learner with more than 10^15 examples provides // substantial additional benefits in terms of accuracy. private const long MaxDualTableSize = 1L << 50; private const Float L2LowerBound = 1e-09f; @@ -340,7 +340,7 @@ protected sealed override TPredictor TrainCore(IChannel ch, RoleMappedData data, // REVIEW: Is 1024 a good lower bound to enforce sparsity? if (1024 < count && count < (long)idLoMax / 5) { - // The distribution of id.Lo is sparse in [0, idLoMax]. + // The distribution of id.Lo is sparse in [0, idLoMax]. // Building a lookup table is more memory efficient. needLookup = true; } @@ -351,7 +351,7 @@ protected sealed override TPredictor TrainCore(IChannel ch, RoleMappedData data, { // Note: At this point, 'count' may be less than the actual count of training examples. // We initialize the hash table with this partial size to avoid unnecessary rehashing. - // However, it does not mean there are exactly 'count' many trainining examples. + // However, it does not mean there are exactly 'count' many trainining examples. // Necessary rehashing will still occur as the hash table grows. idToIdx = new IdToIdxLookup(count); // Resetting 'count' to zero. @@ -444,8 +444,8 @@ protected sealed override TPredictor TrainCore(IChannel ch, RoleMappedData data, else { // The dual variables do not fit into standard float[]. - // Using BigArray instead. - // Storing the invariants gives rise to too large memory consumption, + // Using BigArray instead. + // Storing the invariants gives rise to too large memory consumption, // so we favor re-computing the invariants instead of storing them. Contracts.Assert(dualsLength <= MaxDualTableSize); duals = new BigArrayDualsTable(dualsLength); @@ -531,7 +531,7 @@ protected sealed override TPredictor TrainCore(IChannel ch, RoleMappedData data, pch.SetHeader(new ProgressHeader(metricNames, new[] { "iterations" }), e => e.SetProgress(0, iter, maxIterations)); // Separate logic is needed for single-thread execution to ensure the result is deterministic. - // Note that P.Invoke does not ensure that the actions executes in order even if maximum number of threads is set to 1. + // Note that P.Invoke does not ensure that the actions executes in order even if maximum number of threads is set to 1. if (numThreads == 1) { // The synchorized SDCA procedure. @@ -678,7 +678,7 @@ private void InitializeConvergenceMetrics(out string[] names, out Double[] initi /// It may be null. When it is null, the training examples are not shuffled and are cursored in its original order. /// /// - /// The id to index mapping. May be null. If it is null, the index is given by the + /// The id to index mapping. May be null. If it is null, the index is given by the /// corresponding lower bits of the id. /// /// The number of threads used in parallel training. It is used in computing the dual update. @@ -686,33 +686,33 @@ private void InitializeConvergenceMetrics(out string[] names, out Double[] initi /// The dual variables. For binary classification and regression, there is one dual variable per row. /// For multiclass classification, there is one dual variable per class per row. /// - /// The array containing regularized bias terms. For binary classification or regression, + /// The array containing regularized bias terms. For binary classification or regression, /// it contains only a single value. For multiclass classification its size equals the number of classes. /// - /// The dual updates invariants. It may be null. If not null, it holds an array of pre-computed numerical quantities + /// The dual updates invariants. It may be null. If not null, it holds an array of pre-computed numerical quantities /// that depend on the training example label and features, not the value of dual variables. /// /// The precomputed numerical quantity 1 / (l2Const * (count of training examples)). /// - /// The weights array. For binary classification or regression, it consists of only one VBuffer. + /// The weights array. For binary classification or regression, it consists of only one VBuffer. /// For multiclass classification, its size equals the number of classes. /// /// - /// The array containing unregularized bias terms. For binary classification or regression, - /// it contains only a single value. For multiclass classification its size equals the number of classes. + /// The array containing unregularized bias terms. For binary classification or regression, + /// it contains only a single value. For multiclass classification its size equals the number of classes. /// /// - /// The array holding the intermediate weights prior to making L1 shrinkage adjustment. It is null iff l1Threshold is zero. - /// Otherwise, for binary classification or regression, it consists of only one VBuffer; + /// The array holding the intermediate weights prior to making L1 shrinkage adjustment. It is null iff l1Threshold is zero. + /// Otherwise, for binary classification or regression, it consists of only one VBuffer; /// for multiclass classification, its size equals the number of classes. /// /// - /// The array holding the intermediate bias prior to making L1 shrinkage adjustment. It is null iff l1Threshold is zero. - /// Otherwise, for binary classification or regression, it consists of only one value; + /// The array holding the intermediate bias prior to making L1 shrinkage adjustment. It is null iff l1Threshold is zero. + /// Otherwise, for binary classification or regression, it consists of only one value; /// for multiclass classification, its size equals the number of classes. /// /// - /// The array holding the pre-computed squared L2-norm of features for each training example. It may be null. It is always null for + /// The array holding the pre-computed squared L2-norm of features for each training example. It may be null. It is always null for /// binary classification and regression because this quantity is not needed. /// protected virtual void TrainWithoutLock(IProgressChannelProvider progress, FloatLabelCursor.Factory cursorFactory, IRandom rand, @@ -762,7 +762,7 @@ protected virtual void TrainWithoutLock(IProgressChannelProvider progress, Float var dualUpdate = Loss.DualUpdate(output, label, dual, invariant, numThreads); // The successive over-relaxation apporach to adjust the sum of dual variables (biasReg) to zero. - // Reference to details: http://stat.rutgers.edu/home/tzhang/papers/ml02_dual.pdf pp. 16-17. + // Reference to details: http://stat.rutgers.edu/home/tzhang/papers/ml02_dual.pdf pp. 16-17. var adjustment = l1ThresholdZero ? lr * biasReg[0] : lr * l1IntermediateBias[0]; dualUpdate -= adjustment; bool success = false; @@ -812,7 +812,7 @@ protected virtual void TrainWithoutLock(IProgressChannelProvider progress, Float } /// - /// Returns whether the algorithm converged, and also populates the + /// Returns whether the algorithm converged, and also populates the /// (which is expected to be parallel to the names returned by ). /// When called, the is expected to hold the previously reported values. /// @@ -824,33 +824,33 @@ protected virtual void TrainWithoutLock(IProgressChannelProvider progress, Float /// For multiclass classification, there is one dual variable per class per row. /// /// - /// The id to index mapping. May be null. If it is null, the index is given by the + /// The id to index mapping. May be null. If it is null, the index is given by the /// corresponding lower bits of the id. /// /// - /// The weights array. For binary classification or regression, it consists of only one VBuffer. + /// The weights array. For binary classification or regression, it consists of only one VBuffer. /// For multiclass classification, its size equals the number of classes. /// /// - /// The weights array that corresponds to the best model obtained from the training iterations thus far. + /// The weights array that corresponds to the best model obtained from the training iterations thus far. /// /// - /// The array containing unregularized bias terms. For binary classification or regression, - /// it contains only a single value. For multiclass classification its size equals the number of classes. + /// The array containing unregularized bias terms. For binary classification or regression, + /// it contains only a single value. For multiclass classification its size equals the number of classes. /// /// - /// The array containing unregularized bias terms corresponding to the best model obtained from the training iterations thus far. - /// For binary classification or regression, it contains only a single value. - /// For multiclass classification its size equals the number of classes. + /// The array containing unregularized bias terms corresponding to the best model obtained from the training iterations thus far. + /// For binary classification or regression, it contains only a single value. + /// For multiclass classification its size equals the number of classes. /// /// - /// The array containing regularized bias terms. For binary classification or regression, - /// it contains only a single value. For multiclass classification its size equals the number of classes. + /// The array containing regularized bias terms. For binary classification or regression, + /// it contains only a single value. For multiclass classification its size equals the number of classes. /// /// - /// The array containing regularized bias terms corresponding to the best model obtained from the training iterations thus far. - /// For binary classification or regression, it contains only a single value. - /// For multiclass classification its size equals the number of classes. + /// The array containing regularized bias terms corresponding to the best model obtained from the training iterations thus far. + /// For binary classification or regression, it contains only a single value. + /// For multiclass classification its size equals the number of classes. /// /// /// The count of (valid) training examples. Bad training examples are excluded from this count. @@ -929,7 +929,7 @@ protected virtual bool CheckConvergence( if (metrics[(int)MetricKind.Loss] < bestPrimalLoss) { - // Maintain a copy of weights and bias with best primal loss thus far. + // Maintain a copy of weights and bias with best primal loss thus far. // This is some extra work and uses extra memory, but it seems worth doing it. // REVIEW: Sparsify bestWeights? weights[0].CopyTo(ref bestWeights[0]); @@ -957,7 +957,7 @@ protected virtual Float[] InitializeFeatureNormSquared(int length) protected delegate void Visitor(long index, ref Float value); /// - /// Encapsulates the common functionality of storing and + /// Encapsulates the common functionality of storing and /// retrieving the dual variables. /// protected abstract class DualsTableBase @@ -1086,16 +1086,16 @@ protected Func GetIndexFromIdAndRowGetter(IdToIdxLookup idT } } - // REVIEW: This data structure is an extension of HashArray. It may have general + // REVIEW: This data structure is an extension of HashArray. It may have general // purpose of usage to store Id. Should consider lifting this class in the future. - // This class can also be made to accommodate generic type, as long as the type implements a + // This class can also be made to accommodate generic type, as long as the type implements a // good 64-bit hash function. /// /// A hash table data structure to store Id of type , - /// and accommodates size larger than 2 billion. This class is an extension based on BCL. - /// Two operations are supported: adding and retrieving an id with asymptotically constant complexity. - /// The bucket size are prime numbers, starting from 3 and grows to the next prime larger than - /// double the current size until it reaches the maximum possible size. When a table growth is triggered, + /// and accommodates size larger than 2 billion. This class is an extension based on BCL. + /// Two operations are supported: adding and retrieving an id with asymptotically constant complexity. + /// The bucket size are prime numbers, starting from 3 and grows to the next prime larger than + /// double the current size until it reaches the maximum possible size. When a table growth is triggered, /// the table growing operation initializes a new larger bucket and rehash the existing entries to /// the new bucket. Such operation has an expected complexity proportional to the size. /// @@ -1288,7 +1288,7 @@ private static class HashHelpers public const long MaxPrime = 0x7FFFFFFFFFFFFFE7; // Table of prime numbers to use as hash table sizes. - // Each subsequent prime, except the last in the list, ensures that the table will at least double in size + // Each subsequent prime, except the last in the list, ensures that the table will at least double in size // upon each growth in order to improve the efficiency of the hash table. // See https://oeis.org/A065545 for the sequence with a[1] = 3, a[k] = next_prime(2 * a[k - 1]). public static readonly long[] Primes = @@ -1303,7 +1303,7 @@ private static class HashHelpers 6173400291209582429, MaxPrime }; - // Returns size of hashtable to grow to. + // Returns size of hashtable to grow to. public static long ExpandPrime(long oldSize) { long newSize = 2 * oldSize; @@ -1566,7 +1566,7 @@ protected override TScalarPredictor TrainCore(IChannel ch, RoleMappedData data, bool converged = false; var watch = new Stopwatch(); - // REVIEW: Investigate using parallel row cursor set instead of getting cursor independently. The convergence of SDCA need to be verified. + // REVIEW: Investigate using parallel row cursor set instead of getting cursor independently. The convergence of SDCA need to be verified. Action checkConvergence = (e, pch) => { if (e % checkFrequency == 0 && e != _args.MaxIterations) @@ -1602,7 +1602,7 @@ protected override TScalarPredictor TrainCore(IChannel ch, RoleMappedData data, watch.Start(); - //Reference: Leon Bottou. Stochastic Gradient Descent Tricks. + //Reference: Leon Bottou. Stochastic Gradient Descent Tricks. //http://research.microsoft.com/pubs/192769/tricks-2012.pdf var trainingTasks = new Action[_args.MaxIterations]; @@ -1623,8 +1623,8 @@ protected override TScalarPredictor TrainCore(IChannel ch, RoleMappedData data, Float label = cursor.Label; Float derivative = cursor.Weight * lossFunc.Derivative(WScaledDot(ref features, weightScaling, ref weights, bias), label); // complexity: O(k) - //Note that multiplying the gradient by a weight h is not equivalent to doing h updates - //on the same instance. A potentially better way to do weighted update is described in + //Note that multiplying the gradient by a weight h is not equivalent to doing h updates + //on the same instance. A potentially better way to do weighted update is described in //https://dslpitt.org/uai/papers/11/p392-karampatziakis.pdf if (label > 0) derivative *= positiveInstanceWeight; @@ -1664,7 +1664,7 @@ protected override TScalarPredictor TrainCore(IChannel ch, RoleMappedData data, using (var pch = Host.StartProgressChannel("SGD Training")) { // Separate logic is needed for single-thread execution to ensure the result is deterministic. - // Note that P.Invoke does not ensure that the actions executes in order even if maximum number of threads is set to 1. + // Note that P.Invoke does not ensure that the actions executes in order even if maximum number of threads is set to 1. if (numThreads == 1) { int iter = 0; @@ -1736,7 +1736,7 @@ public static partial class Sdca Desc = "Train an SDCA binary model.", UserName = LinearClassificationTrainer.UserNameValue, ShortName = LinearClassificationTrainer.LoadNameValue, - XmlInclude = new[] { @"", + XmlInclude = new[] { @"", @"" })] public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, LinearClassificationTrainer.Arguments input) { diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs index 537364a56a..87c1cc592b 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs @@ -320,7 +320,7 @@ public override TPredictor Train(TrainContext context) return pred; } } - + private void TrainCore(IChannel ch, RoleMappedData data) { Host.AssertValue(ch); diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index e896b69e1c..09e2bbbcc4 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -127,7 +127,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0)); - // Compute null deviance, i.e., the deviance of null hypothesis. + // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. Double priorPosRate = _posWeight / WeightSum; Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1); @@ -197,7 +197,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. var hessian = new Double[hessianDimension]; // Initialize diagonal elements with L2 regularizers except for the first entry (index 0) - // Since bias is not regularized. + // Since bias is not regularized. if (L2Weight > 0) { // i is the array index of the diagonal entry at iRow-th row and iRow-th column. @@ -392,7 +392,7 @@ protected override ParameterMixingCalibratedPredictor CreatePredictor() ShortName = ShortName, XmlInclude = new[] { @"", @""})] - + public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, Arguments input) { Contracts.CheckValue(env, nameof(env)); diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index 95a76245b0..5bf0511540 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -56,11 +56,11 @@ public sealed class Arguments : ArgumentsBase // These label names are used for model saving in place of class number // to make the model summary more user friendly. These names are populated // in the CheckLabel() method. - // It could be null, if the label type is not a key type, or there is + // It could be null, if the label type is not a key type, or there is // missing label name for some class. private string[] _labelNames; - // The prior distribution of data. + // The prior distribution of data. // This array is of length equal to the number of classes. // After training, it stores the total weights of training examples in each class. private Double[] _prior; @@ -101,10 +101,10 @@ protected override void CheckLabel(RoleMappedData data) VBuffer labelNames = default(VBuffer); schema.GetMetadata(MetadataUtils.Kinds.KeyValues, labelIdx, ref labelNames); - // If label names is not dense or contain NA or default value, then it follows that + // If label names is not dense or contain NA or default value, then it follows that // at least one class does not have a valid name for its label. If the label names we // try to get from the metadata are not unique, we may also not use them in model summary. - // In both cases we set _labelNames to null and use the "Class_n", where n is the class number + // In both cases we set _labelNames to null and use the "Class_n", where n is the class number // for model summary saving instead. if (!labelNames.IsDense) { @@ -251,7 +251,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. ch.Info("Residual Deviance: \t{0}", deviance); - // Compute null deviance, i.e., the deviance of null hypothesis. + // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. Float nullDeviance = 0; for (int iLabel = 0; iLabel < _numClasses; iLabel++) @@ -366,7 +366,7 @@ internal MulticlassLogisticRegressionPredictor(IHostEnvironment env, ref VBuffer } /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// This constructor is called by to create the predictor. /// /// The host environment. @@ -484,7 +484,7 @@ private MulticlassLogisticRegressionPredictor(IHostEnvironment env, ModelLoadCon InputType = new VectorType(NumberType.Float, _numFeatures); OutputType = new VectorType(NumberType.Float, _numClasses); - // REVIEW: Should not save the label names duplicately with the predictor again. + // REVIEW: Should not save the label names duplicately with the predictor again. // Get it from the label column schema metadata instead. string[] labelNames = null; if (ctx.TryLoadBinaryStream(LabelNamesSubModelFilename, r => labelNames = LoadLabelNames(ctx, r))) diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs index 22da22e100..91874291b0 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs @@ -78,7 +78,7 @@ private static VersionInfo GetVersionInfo() // The standard errors of coefficients, including the bias. // The standard error of bias is placed at index zero. - // It could be null when there are too many non-zero weights so that + // It could be null when there are too many non-zero weights so that // the memory is insufficient to hold the Hessian matrix necessary for the computation // of the variance-covariance matrix. private readonly VBuffer? _coeffStdError; diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs index bb6c6101ae..0a73d55395 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs @@ -26,7 +26,7 @@ namespace Microsoft.ML.Runtime.Learners { - /// + /// public sealed class MultiClassNaiveBayesTrainer : TrainerBase { public const string LoadName = "MultiClassNaiveBayes"; @@ -121,8 +121,8 @@ public override MultiClassNaiveBayesPredictor Train(TrainContext context) [TlcModule.EntryPoint(Name = "Trainers.NaiveBayesClassifier", Desc = "Train a MultiClassNaiveBayesTrainer.", - UserName = UserName, - ShortName = ShortName, + UserName = UserName, + ShortName = ShortName, XmlInclude = new[] { @"", @"" })] public static CommonOutputs.MulticlassClassificationOutput TrainMultiClassNaiveBayesTrainer(IHostEnvironment env, Arguments input) diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs index ed1bc17aad..c123411edd 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs @@ -36,7 +36,7 @@ namespace Microsoft.ML.Runtime.Learners using TScalarPredictor = IPredictorProducing; using TScalarTrainer = ITrainer>; - /// + /// public sealed class Ova : MetaMulticlassTrainer { internal const string LoadNameValue = "OVA"; @@ -202,8 +202,8 @@ public static ModelOperations.PredictorModelOutput CombineOvaModels(IHostEnviron host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); host.CheckNonEmpty(input.ModelArray, nameof(input.ModelArray)); - // Something tells me we should put normalization as part of macro expansion, but since i get - // subgraph instead of learner it's a bit tricky to get learner and decide should we add + // Something tells me we should put normalization as part of macro expansion, but since i get + // subgraph instead of learner it's a bit tricky to get learner and decide should we add // normalization node or not, plus everywhere in code we leave that reposnsibility to TransformModel. var normalizedView = input.ModelArray[0].TransformModel.Apply(host, input.TrainingData); using (var ch = host.Start("CombineOvaModels")) diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs index 073488f75c..193c8f0290 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs @@ -32,7 +32,7 @@ namespace Microsoft.ML.Runtime.Learners using CR = RoleMappedSchema.ColumnRole; /// - /// In this strategy, a binary classification algorithm is trained on each pair of classes. + /// In this strategy, a binary classification algorithm is trained on each pair of classes. /// The pairs are unordered but created with replacement: so, if there were three classes, 0, 1, /// 2, we would train classifiers for the pairs (0,0), (0,1), (0,2), (1,1), (1,2), /// and(2,2). For each binary classifier, an input data point is considered a @@ -44,7 +44,7 @@ namespace Microsoft.ML.Runtime.Learners /// pair. /// /// These two can allow you to exploit trainers that do not naturally have a - /// multiclass option, e.g., using the Runtime.FastTree.FastTreeBinaryClassificationTrainer + /// multiclass option, e.g., using the Runtime.FastTree.FastTreeBinaryClassificationTrainer /// to solve a multiclass problem. /// Alternately, it can allow ML.NET to solve a "simpler" problem even in the cases /// where the trainer has a multiclass option, but using it directly is not diff --git a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs index d8bcd68c9f..a8c3af7a5e 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs @@ -123,9 +123,9 @@ protected override void ProcessPriorDistribution(Float label, Float weight) // No-op by design. } - [TlcModule.EntryPoint(Name = "Trainers.PoissonRegressor", - Desc = "Train an Poisson regression model.", - UserName = UserNameValue, + [TlcModule.EntryPoint(Name = "Trainers.PoissonRegressor", + Desc = "Train an Poisson regression model.", + UserName = UserNameValue, ShortName = ShortName, XmlInclude = new[] { @"", @""})] diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs index 0354668b31..af30335af6 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs @@ -129,7 +129,7 @@ protected override void TrainWithoutLock(IProgressChannelProvider progress, Floa continue; // Loop trials for compare-and-swap updates of duals. - // In general, concurrent update conflict to the same dual variable is rare + // In general, concurrent update conflict to the same dual variable is rare // if data is shuffled. for (int numTrials = 0; numTrials < maxUpdateTrials; numTrials++) { @@ -139,7 +139,7 @@ protected override void TrainWithoutLock(IProgressChannelProvider progress, Floa var dualUpdate = _loss.DualUpdate(output, 1, dual, invariant, numThreads); // The successive over-relaxation apporach to adjust the sum of dual variables (biasReg) to zero. - // Reference to details: http://stat.rutgers.edu/home/tzhang/papers/ml02_dual.pdf, pp. 16-17. + // Reference to details: http://stat.rutgers.edu/home/tzhang/papers/ml02_dual.pdf, pp. 16-17. var adjustment = l1ThresholdZero ? lr * biasReg[iClass] : lr * l1IntermediateBias[iClass]; dualUpdate -= adjustment; bool success = false; @@ -319,7 +319,7 @@ protected override bool CheckConvergence( { for (int iClass = 0; iClass < numClasses; iClass++) { - // Maintain a copy of weights and bias with best primal loss thus far. + // Maintain a copy of weights and bias with best primal loss thus far. // This is some extra work and uses extra memory, but it seems worth doing it. // REVIEW: Sparsify bestWeights? weights[iClass].CopyTo(ref bestWeights[iClass]); diff --git a/src/Microsoft.ML.Sweeper/Algorithms/KdoSweeper.cs b/src/Microsoft.ML.Sweeper/Algorithms/KdoSweeper.cs index cdda9e3cd6..6479c51f7e 100644 --- a/src/Microsoft.ML.Sweeper/Algorithms/KdoSweeper.cs +++ b/src/Microsoft.ML.Sweeper/Algorithms/KdoSweeper.cs @@ -19,20 +19,20 @@ namespace Microsoft.ML.Runtime.Sweeper.Algorithms { /// - /// Kernel Density Optimization (KDO) is a sequential model-based optimization method originally developed by George D. Montanez (me). + /// Kernel Density Optimization (KDO) is a sequential model-based optimization method originally developed by George D. Montanez (me). /// The search space consists of a unit hypercube, with one dimension per hyperparameter (it is a spatial method, so scaling the dimensions /// to the unit hypercube is critical). The idea is that the exploration of the cube to find good values is performed by creating an approximate /// (and biased) kernel density estimate of the space (where density corresponds to metric performance), concentrating mass in regions of better /// performance, then drawing samples from the pdf. - /// - /// To trade off exploration versus exploitation, an fitness proportional mutation scheme is used. Uniform random points are selected during + /// + /// To trade off exploration versus exploitation, an fitness proportional mutation scheme is used. Uniform random points are selected during /// initialization and during the runs (parameter controls how often). A Gaussian model is fit to the distribution of performance values, and - /// each evaluated point in the history is given a value between 0 and 1 corresponding to the CDF evaluation of its performance under the - /// Gaussian. Points with low quantile values are mutated more strongly than those with higher values, which allows the method to hone in + /// each evaluated point in the history is given a value between 0 and 1 corresponding to the CDF evaluation of its performance under the + /// Gaussian. Points with low quantile values are mutated more strongly than those with higher values, which allows the method to hone in /// precisely when approaching really good regions. - /// + /// /// Categorical parameters are handled by forming a categorical distribution on possible values weighted by observed performance of each value, - /// taken independently. + /// taken independently. /// public sealed class KdoSweeper : ISweeper @@ -113,7 +113,7 @@ public ParameterSet[] ProposeSweeps(int maxSweeps, IEnumerable previ var prevRuns = previousRuns?.ToArray() ?? new IRunResult[0]; var numSweeps = Math.Min(numOfCandidates, _args.NumberInitialPopulation - prevRuns.Length); - // Initialization: Will enter here on first iteration and use the default (random) + // Initialization: Will enter here on first iteration and use the default (random) // sweeper to generate initial candidates. if (prevRuns.Length < _args.NumberInitialPopulation) { @@ -348,7 +348,7 @@ private double[] ExtractRandomRunValues(IEnumerable previousRuns) } /// - /// New version of CategoryToWeights method, which fixes an issue where we could + /// New version of CategoryToWeights method, which fixes an issue where we could /// potentially assign a lot of mass to bad categories. /// private double[] CategoriesToWeights(DiscreteValueGenerator param, IRunResult[] previousRuns) @@ -374,7 +374,7 @@ private double[] CategoriesToWeights(DiscreteValueGenerator param, IRunResult[] for (int i = 0; i < weights.Length; i++) weights[i] /= (counts[i] > 0 ? counts[i] : 1); - // If any learner has not been seen, default it's average to + // If any learner has not been seen, default it's average to // best value to encourage exploration of untried algorithms. double bestVal = isMaximizing ? previousRuns.Cast().Where(r => r.HasMetricValue).Max(r => r.MetricValue) : diff --git a/src/Microsoft.ML.Sweeper/Algorithms/NelderMead.cs b/src/Microsoft.ML.Sweeper/Algorithms/NelderMead.cs index bffecaae05..c982fc46ec 100644 --- a/src/Microsoft.ML.Sweeper/Algorithms/NelderMead.cs +++ b/src/Microsoft.ML.Sweeper/Algorithms/NelderMead.cs @@ -104,7 +104,7 @@ public NelderMeadSweeper(IHostEnvironment env, Arguments args) // REVIEW: ideas about how to support discrete values: // 1. assign each discrete value a random number (1-n) to make mirroring possible // 2. each time we need to mirror a discrete value, sample from the remaining value - // 2.1. make the sampling non-uniform by learning "weights" for the different discrete values based on + // 2.1. make the sampling non-uniform by learning "weights" for the different discrete values based on // the metric values that we get when using them. (E.g. if, for a given discrete value, we get a bad result, // we lower its weight, but if we get a good result we increase its weight). var parameterNumeric = parameter as INumericValueGenerator; diff --git a/src/Microsoft.ML.Sweeper/Algorithms/SmacSweeper.cs b/src/Microsoft.ML.Sweeper/Algorithms/SmacSweeper.cs index 2351454709..cf08cc99c3 100644 --- a/src/Microsoft.ML.Sweeper/Algorithms/SmacSweeper.cs +++ b/src/Microsoft.ML.Sweeper/Algorithms/SmacSweeper.cs @@ -91,8 +91,8 @@ public ParameterSet[] ProposeSweeps(int maxSweeps, IEnumerable previ { int numOfCandidates = maxSweeps; - // Initialization: Will enter here on first iteration and use the default (random) - // sweeper to generate initial candidates. + // Initialization: Will enter here on first iteration and use the default (random) + // sweeper to generate initial candidates. int numRuns = previousRuns == null ? 0 : previousRuns.Count(); if (numRuns < _args.NumberInitialPopulation) return _randomSweeper.ProposeSweeps(Math.Min(numOfCandidates, _args.NumberInitialPopulation - numRuns), previousRuns); @@ -191,7 +191,7 @@ private ParameterSet[] GenerateCandidateConfigurations(int numOfCandidates, IEnu /// Array of parameter sets, which will then be evaluated. private ParameterSet[] GreedyPlusRandomSearch(ParameterSet[] parents, FastForestRegressionPredictor forest, int numOfCandidates, IEnumerable previousRuns) { - // REVIEW: The IsMetricMaximizing flag affects the comparator, so that + // REVIEW: The IsMetricMaximizing flag affects the comparator, so that // performing Max() should get the best, regardless of if it is maximizing or // minimizing. RunResult bestRun = (RunResult)previousRuns.Max(); diff --git a/src/Microsoft.ML.Sweeper/Algorithms/SweeperProbabilityUtils.cs b/src/Microsoft.ML.Sweeper/Algorithms/SweeperProbabilityUtils.cs index cf706d3495..08ef587596 100644 --- a/src/Microsoft.ML.Sweeper/Algorithms/SweeperProbabilityUtils.cs +++ b/src/Microsoft.ML.Sweeper/Algorithms/SweeperProbabilityUtils.cs @@ -79,7 +79,7 @@ public double[] NormalRVs(int numRVs, double mu, double sigma) /// /// This performs (slow) roulette-wheel sampling of a categorical distribution. Should be swapped for other - /// method as soon as one is available. + /// method as soon as one is available. /// /// Number of samples to draw. /// Weights for distribution (should sum to 1). @@ -117,7 +117,7 @@ public double SampleUniform() } /// - /// Simple binary search method for finding smallest index in array where value + /// Simple binary search method for finding smallest index in array where value /// meets or exceeds what you're looking for. /// /// Array to search diff --git a/src/Microsoft.ML.Sweeper/AsyncSweeper.cs b/src/Microsoft.ML.Sweeper/AsyncSweeper.cs index e325846f6f..a86a4755d8 100644 --- a/src/Microsoft.ML.Sweeper/AsyncSweeper.cs +++ b/src/Microsoft.ML.Sweeper/AsyncSweeper.cs @@ -143,7 +143,7 @@ public void Dispose() } /// - /// An wrapper around which enforces determinism by imposing synchronization over past runs. + /// An wrapper around which enforces determinism by imposing synchronization over past runs. /// Suppose n s are generated up to this point. The sweeper will refrain from making a decision /// until the runs with indices in [0, n - relaxation) have all finished. A new batch of s will be /// generated based on the first n - relaxation runs. @@ -181,8 +181,8 @@ public sealed class Arguments // The ith element of _results corresponds to the result of the ith run. private readonly List _results; - // The indices of the runs with null IRunResult. We have to keep track of both the indices and - // the results of finished runs to determine if the synchronization barrier is satisfied. + // The indices of the runs with null IRunResult. We have to keep track of both the indices and + // the results of finished runs to determine if the synchronization barrier is satisfied. // Using _results alone won't do it as the result could be null. // Note that we only need to record those >= _minUnfinishedId. private readonly HashSet _nullRuns; diff --git a/src/Microsoft.ML.Sweeper/Parameters.cs b/src/Microsoft.ML.Sweeper/Parameters.cs index 4e09808cb3..dd46374732 100644 --- a/src/Microsoft.ML.Sweeper/Parameters.cs +++ b/src/Microsoft.ML.Sweeper/Parameters.cs @@ -556,7 +556,7 @@ public bool TryParseParameter(string paramValue, Type paramType, string paramNam } // Extract the minimum, and the maximum value of the list of suggested sweeps. - // Positive lookahead splitting at the '-' character. + // Positive lookahead splitting at the '-' character. // It is used for the Float and Long param types. // Example format: "0.02-0.1;steps:5". string[] minMaxRegex = Regex.Split(paramValue, "(?<=[^eE])-"); diff --git a/src/Microsoft.ML.Sweeper/SynthConfigRunner.cs b/src/Microsoft.ML.Sweeper/SynthConfigRunner.cs index eb08e88d16..bee7b8a60b 100644 --- a/src/Microsoft.ML.Sweeper/SynthConfigRunner.cs +++ b/src/Microsoft.ML.Sweeper/SynthConfigRunner.cs @@ -20,7 +20,7 @@ namespace Microsoft.ML.Runtime.Sweeper { /// - /// This class gives a simple way of running optimization experiments on synthetic functions, rather than on actual learning problems. + /// This class gives a simple way of running optimization experiments on synthetic functions, rather than on actual learning problems. /// It was initially created to test the sweeper methods on the Rastrigin function. /// public sealed class SynthConfigRunner : ExeConfigRunnerBase diff --git a/src/Microsoft.ML.Transforms/CategoricalTransform.cs b/src/Microsoft.ML.Transforms/CategoricalTransform.cs index 420db1b731..5c045e4a48 100644 --- a/src/Microsoft.ML.Transforms/CategoricalTransform.cs +++ b/src/Microsoft.ML.Transforms/CategoricalTransform.cs @@ -243,9 +243,9 @@ public static IDataTransform CreateTransformCore( public static class Categorical { - [TlcModule.EntryPoint(Name = "Transforms.CategoricalOneHotVectorizer", + [TlcModule.EntryPoint(Name = "Transforms.CategoricalOneHotVectorizer", Desc = CategoricalTransform.Summary, - UserName = CategoricalTransform.UserName, + UserName = CategoricalTransform.UserName, XmlInclude = new[] { @"", @""})] public static CommonOutputs.TransformOutput CatTransformDict(IHostEnvironment env, CategoricalTransform.Arguments input) @@ -259,7 +259,7 @@ public static CommonOutputs.TransformOutput CatTransformDict(IHostEnvironment en return new CommonOutputs.TransformOutput { Model = new TransformModel(env, xf, input.Data), OutputData = xf }; } - [TlcModule.EntryPoint(Name = "Transforms.CategoricalHashOneHotVectorizer", + [TlcModule.EntryPoint(Name = "Transforms.CategoricalHashOneHotVectorizer", Desc = CategoricalHashTransform.Summary, UserName = CategoricalHashTransform.UserName , XmlInclude = new[] { @"", @@ -276,7 +276,7 @@ public static CommonOutputs.TransformOutput CatTransformHash(IHostEnvironment en } [TlcModule.EntryPoint(Name = "Transforms.TextToKeyConverter", - Desc = TermTransform.Summary, + Desc = TermTransform.Summary, UserName = TermTransform.UserName, XmlInclude = new[] { @"", @"" })] @@ -291,8 +291,8 @@ public static CommonOutputs.TransformOutput TextToKey(IHostEnvironment env, Term return new CommonOutputs.TransformOutput { Model = new TransformModel(env, xf, input.Data), OutputData = xf }; } - [TlcModule.EntryPoint(Name = "Transforms.KeyToTextConverter", - Desc = "KeyToValueTransform utilizes KeyValues metadata to map key indices to the corresponding values in the KeyValues metadata.", + [TlcModule.EntryPoint(Name = "Transforms.KeyToTextConverter", + Desc = "KeyToValueTransform utilizes KeyValues metadata to map key indices to the corresponding values in the KeyValues metadata.", UserName = KeyToValueTransform.UserName, XmlInclude = new[] { @"" })] public static CommonOutputs.TransformOutput KeyToText(IHostEnvironment env, KeyToValueTransform.Arguments input) diff --git a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs index c01508fc30..67d14dc683 100644 --- a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs @@ -18,7 +18,7 @@ namespace Microsoft.ML.Runtime.Data { - /// + /// public static class CountFeatureSelectionTransform { public const string Summary = "Selects the slots for which the count of non-default values is greater than or equal to a threshold."; diff --git a/src/Microsoft.ML.Transforms/EntryPoints/SelectFeatures.cs b/src/Microsoft.ML.Transforms/EntryPoints/SelectFeatures.cs index 2d7246763f..c3ab4ea5e0 100644 --- a/src/Microsoft.ML.Transforms/EntryPoints/SelectFeatures.cs +++ b/src/Microsoft.ML.Transforms/EntryPoints/SelectFeatures.cs @@ -11,8 +11,8 @@ namespace Microsoft.ML.Runtime.EntryPoints { public static class SelectFeatures { - [TlcModule.EntryPoint(Name = "Transforms.FeatureSelectorByCount", - Desc = CountFeatureSelectionTransform.Summary, + [TlcModule.EntryPoint(Name = "Transforms.FeatureSelectorByCount", + Desc = CountFeatureSelectionTransform.Summary, UserName = CountFeatureSelectionTransform.UserName, XmlInclude = new[] { @"", @""})] @@ -27,9 +27,9 @@ public static CommonOutputs.TransformOutput CountSelect(IHostEnvironment env, Co return new CommonOutputs.TransformOutput { Model = new TransformModel(env, xf, input.Data), OutputData = xf }; } - [TlcModule.EntryPoint(Name = "Transforms.FeatureSelectorByMutualInformation", - Desc = MutualInformationFeatureSelectionTransform.Summary, - UserName = MutualInformationFeatureSelectionTransform.UserName, + [TlcModule.EntryPoint(Name = "Transforms.FeatureSelectorByMutualInformation", + Desc = MutualInformationFeatureSelectionTransform.Summary, + UserName = MutualInformationFeatureSelectionTransform.UserName, ShortName = MutualInformationFeatureSelectionTransform.ShortName, XmlInclude = new[] { @"", @""})] diff --git a/src/Microsoft.ML.Transforms/EntryPoints/TextAnalytics.cs b/src/Microsoft.ML.Transforms/EntryPoints/TextAnalytics.cs index b37ecd5f4f..543b997e6d 100644 --- a/src/Microsoft.ML.Transforms/EntryPoints/TextAnalytics.cs +++ b/src/Microsoft.ML.Transforms/EntryPoints/TextAnalytics.cs @@ -17,9 +17,9 @@ namespace Microsoft.ML.Runtime.Transforms /// public static class TextAnalytics { - [TlcModule.EntryPoint(Name = "Transforms.TextFeaturizer", - Desc = Data.TextTransform.Summary, - UserName = Data.TextTransform.UserName, + [TlcModule.EntryPoint(Name = "Transforms.TextFeaturizer", + Desc = Data.TextTransform.Summary, + UserName = Data.TextTransform.UserName, ShortName = Data.TextTransform.LoaderSignature, XmlInclude = new[] { @"" , @""})] @@ -34,9 +34,9 @@ public static CommonOutputs.TransformOutput TextTransform(IHostEnvironment env, }; } - [TlcModule.EntryPoint(Name = "Transforms.WordTokenizer", + [TlcModule.EntryPoint(Name = "Transforms.WordTokenizer", Desc = Data.DelimitedTokenizeTransform.Summary, - UserName = Data.DelimitedTokenizeTransform.UserName, + UserName = Data.DelimitedTokenizeTransform.UserName, ShortName = Data.DelimitedTokenizeTransform.LoaderSignature, XmlInclude = new[] { @"", @""})] @@ -51,9 +51,9 @@ public static CommonOutputs.TransformOutput DelimitedTokenizeTransform(IHostEnvi }; } - [TlcModule.EntryPoint(Name = "Transforms.NGramTranslator", - Desc = NgramTransform.Summary, - UserName = NgramTransform.UserName, + [TlcModule.EntryPoint(Name = "Transforms.NGramTranslator", + Desc = NgramTransform.Summary, + UserName = NgramTransform.UserName, ShortName = NgramTransform.LoaderSignature, XmlInclude = new[] { @"" })] public static CommonOutputs.TransformOutput NGramTransform(IHostEnvironment env, NgramTransform.Arguments input) @@ -67,9 +67,9 @@ public static CommonOutputs.TransformOutput NGramTransform(IHostEnvironment env, }; } - [TlcModule.EntryPoint(Name = "Transforms.Dictionarizer", - Desc = Data.TermTransform.Summary, - UserName = Data.TermTransform.UserName, + [TlcModule.EntryPoint(Name = "Transforms.Dictionarizer", + Desc = Data.TermTransform.Summary, + UserName = Data.TermTransform.UserName, ShortName = Data.TermTransform.LoaderSignature)] public static CommonOutputs.TransformOutput TermTransform(IHostEnvironment env, TermTransform.Arguments input) { @@ -82,9 +82,9 @@ public static CommonOutputs.TransformOutput TermTransform(IHostEnvironment env, }; } - [TlcModule.EntryPoint(Name = "Transforms.SentimentAnalyzer", - Desc = "Uses a pretrained sentiment model to score input strings", - UserName = SentimentAnalyzingTransform.UserName, + [TlcModule.EntryPoint(Name = "Transforms.SentimentAnalyzer", + Desc = "Uses a pretrained sentiment model to score input strings", + UserName = SentimentAnalyzingTransform.UserName, ShortName = SentimentAnalyzingTransform.ShortName, XmlInclude = new[] { @"", @""})] @@ -99,9 +99,9 @@ public static CommonOutputs.TransformOutput AnalyzeSentiment(IHostEnvironment en }; } - [TlcModule.EntryPoint(Name = "Transforms.CharacterTokenizer", - Desc = CharTokenizeTransform.Summary, - UserName = CharTokenizeTransform.UserName, + [TlcModule.EntryPoint(Name = "Transforms.CharacterTokenizer", + Desc = CharTokenizeTransform.Summary, + UserName = CharTokenizeTransform.UserName, ShortName = CharTokenizeTransform.LoaderSignature, XmlInclude = new[] { @"" })] public static CommonOutputs.TransformOutput CharTokenize(IHostEnvironment env, CharTokenizeTransform.Arguments input) @@ -118,9 +118,9 @@ public static CommonOutputs.TransformOutput CharTokenize(IHostEnvironment env, C }; } - [TlcModule.EntryPoint(Name = "Transforms.LightLda", - Desc = LdaTransform.Summary, - UserName = LdaTransform.UserName, + [TlcModule.EntryPoint(Name = "Transforms.LightLda", + Desc = LdaTransform.Summary, + UserName = LdaTransform.UserName, ShortName = LdaTransform.ShortName, XmlInclude = new[] { @"", @"" })] diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index fd67e5fca6..6edfdfb4a2 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -666,9 +666,9 @@ private static Float Mean(Float[] src, int count, int length) public static class LpNormalization { - [TlcModule.EntryPoint(Name = "Transforms.LpNormalizer", - Desc = LpNormNormalizerTransform.Summary, - UserName = LpNormNormalizerTransform.UserNameLP, + [TlcModule.EntryPoint(Name = "Transforms.LpNormalizer", + Desc = LpNormNormalizerTransform.Summary, + UserName = LpNormNormalizerTransform.UserNameLP, ShortName = LpNormNormalizerTransform.ShortNameLP, XmlInclude = new[] { @"" })] public static CommonOutputs.TransformOutput Normalize(IHostEnvironment env, LpNormNormalizerTransform.Arguments input) @@ -682,9 +682,9 @@ public static CommonOutputs.TransformOutput Normalize(IHostEnvironment env, LpNo }; } - [TlcModule.EntryPoint(Name = "Transforms.GlobalContrastNormalizer", - Desc = LpNormNormalizerTransform.GcnSummary, - UserName = LpNormNormalizerTransform.UserNameGn, + [TlcModule.EntryPoint(Name = "Transforms.GlobalContrastNormalizer", + Desc = LpNormNormalizerTransform.GcnSummary, + UserName = LpNormNormalizerTransform.UserNameGn, ShortName = LpNormNormalizerTransform.ShortNameGn, XmlInclude = new[] { @"" })] public static CommonOutputs.TransformOutput GcNormalize(IHostEnvironment env, LpNormNormalizerTransform.GcnArguments input) diff --git a/src/Microsoft.ML.Transforms/GroupTransform.cs b/src/Microsoft.ML.Transforms/GroupTransform.cs index 49c989adbf..8e07b11e06 100644 --- a/src/Microsoft.ML.Transforms/GroupTransform.cs +++ b/src/Microsoft.ML.Transforms/GroupTransform.cs @@ -28,14 +28,14 @@ namespace Microsoft.ML.Runtime.Data /// SELECT GroupKey1, GroupKey2, ... GroupKeyK, LIST(Value1), LIST(Value2), ... LIST(ValueN) /// FROM Data /// GROUP BY GroupKey1, GroupKey2, ... GroupKeyK. - /// + /// /// It assumes that the group keys are contiguous (if a new group key sequence is encountered, the group is over). /// The GroupKeyN and ValueN columns can be of any primitive types. The code requires that every raw type T of the group key column /// is an , which is currently true for all existing primitive types. /// The produced ValueN columns will be variable-length vectors of the original value column types. - /// + /// /// The order of ValueN entries in the lists is preserved. - /// + /// /// Example: /// User Item /// Pete Book @@ -43,12 +43,12 @@ namespace Microsoft.ML.Runtime.Data /// Tom Kitten /// Pete Chair /// Pete Cup - /// + /// /// Result: /// User Item /// Pete [Book] /// Tom [Table, Kitten] - /// Pete [Chair, Cup] + /// Pete [Chair, Cup] /// public sealed class GroupTransform : TransformBase { @@ -68,13 +68,13 @@ private static VersionInfo GetVersionInfo() loaderSignature: LoaderSignature); } - // REVIEW: maybe we want to have an option to keep all non-group scalar columns, as opposed to + // REVIEW: maybe we want to have an option to keep all non-group scalar columns, as opposed to // explicitly listing the ones to keep. // REVIEW: group keys and keep columns can possibly be vectors, not implemented now. // REVIEW: it might be feasible to have columns that are constant throughout a group, without having to list them - // as group keys. + // as group keys. public sealed class Arguments : TransformInputBase { [Argument(ArgumentType.Multiple, HelpText = "Columns to group by", ShortName = "g", SortOrder = 1, @@ -175,10 +175,10 @@ public override IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolid /// /// For group columns, the schema information is intact. - /// - /// For keep columns, the type is Vector of original type and variable length. + /// + /// For keep columns, the type is Vector of original type and variable length. /// The only metadata preserved is the KeyNames and IsNormalized. - /// + /// /// All other columns are dropped. /// private sealed class GroupSchema : ISchema @@ -407,11 +407,11 @@ public void GetMetadata(string kind, int col, ref TValue value) } /// - /// This cursor will create two cursors on the input data view: + /// This cursor will create two cursors on the input data view: /// - The leading cursor will activate all the group columns, and will advance until it hits the end of the contiguous group. - /// - The trailing cursor will activate all the requested columns, and will go through the group - /// (as identified by the leading cursor), and aggregate the keep columns. - /// + /// - The trailing cursor will activate all the requested columns, and will go through the group + /// (as identified by the leading cursor), and aggregate the keep columns. + /// /// The getters are as follows: /// - The group column getters are taken directly from the trailing cursor. /// - The keep column getters are provided by the aggregators. @@ -455,7 +455,7 @@ public GroupKeyColumnChecker(IRow row, int col) } // REVIEW: potentially, there could be other aggregators. - // REVIEW: Currently, it always produces dense buffers. The anticipated use cases don't include many + // REVIEW: Currently, it always produces dense buffers. The anticipated use cases don't include many // default values at the moment. /// /// This class handles the aggregation of one 'keep' column into a vector. It wraps around an 's @@ -664,9 +664,9 @@ public ValueGetter GetGetter(int col) public static partial class GroupingOperations { - [TlcModule.EntryPoint(Name = "Transforms.CombinerByContiguousGroupId", - Desc = GroupTransform.Summary, - UserName = GroupTransform.UserName, + [TlcModule.EntryPoint(Name = "Transforms.CombinerByContiguousGroupId", + Desc = GroupTransform.Summary, + UserName = GroupTransform.UserName, ShortName = GroupTransform.ShortName, XmlInclude = new[] { @"" })] public static CommonOutputs.TransformOutput Group(IHostEnvironment env, GroupTransform.Arguments input) diff --git a/src/Microsoft.ML.Transforms/HashJoinTransform.cs b/src/Microsoft.ML.Transforms/HashJoinTransform.cs index 31f3123df5..4f72760c99 100644 --- a/src/Microsoft.ML.Transforms/HashJoinTransform.cs +++ b/src/Microsoft.ML.Transforms/HashJoinTransform.cs @@ -107,7 +107,7 @@ public bool TryUnparse(StringBuilder sb) public sealed class ColumnInfoEx { - // Either VBuffer> or a single Key. + // Either VBuffer> or a single Key. // Note that if CustomSlotMap contains only one array, the output type of the transform will a single Key. // This corresponds to the join=+ case, although now it's possible to omit certain slots entirely. // If # of hash bits is less than 31, the key type will have a positive count. @@ -702,9 +702,9 @@ protected override ColumnType GetColumnTypeCore(int iinfo) public static class HashJoin { - [TlcModule.EntryPoint(Name = "Transforms.HashConverter", - Desc = HashJoinTransform.Summary, - UserName = HashJoinTransform.UserName, + [TlcModule.EntryPoint(Name = "Transforms.HashConverter", + Desc = HashJoinTransform.Summary, + UserName = HashJoinTransform.UserName, ShortName = HashJoinTransform.RegistrationName, XmlInclude = new[] { @"", @""})] diff --git a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs index 0cfaf75500..88a4228941 100644 --- a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs +++ b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs @@ -46,7 +46,7 @@ private static VersionInfo GetVersionInfo() // These arrays are parallel to Infos. // * _concat is whether, given the current input, there are multiple output instance vectors - // to concatenate. + // to concatenate. // * _types contains the output column types. private readonly bool[] _concat; diff --git a/src/Microsoft.ML.Transforms/LoadTransform.cs b/src/Microsoft.ML.Transforms/LoadTransform.cs index 64a494702d..28eb3cb2f3 100644 --- a/src/Microsoft.ML.Transforms/LoadTransform.cs +++ b/src/Microsoft.ML.Transforms/LoadTransform.cs @@ -17,14 +17,14 @@ namespace Microsoft.ML.Runtime.Data { /// - /// Load specific transforms from the specified model file. Allows one to 'cherry pick' transforms from + /// Load specific transforms from the specified model file. Allows one to 'cherry pick' transforms from /// a serialized chain, or to apply a pre-trained transform to a different (but still compatible) data view. /// public static class LoadTransform { public class Arguments { - // REVIEW: make it not required, and make commands fill in the missing model file with the default + // REVIEW: make it not required, and make commands fill in the missing model file with the default // input model file. This requires some hacking in DataDiagnosticCommand. [Argument(ArgumentType.Required, HelpText = "Model file to load the transforms from", ShortName = "in", SortOrder = 1, IsInputFileName = true)] diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index c6c5cd23e7..55330cb6fb 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -21,7 +21,7 @@ namespace Microsoft.ML.Runtime.Data { - /// + /// public static class MutualInformationFeatureSelectionTransform { public const string Summary = diff --git a/src/Microsoft.ML.Transforms/NAHandleTransform.cs b/src/Microsoft.ML.Transforms/NAHandleTransform.cs index 746229d4f7..9e7390948b 100644 --- a/src/Microsoft.ML.Transforms/NAHandleTransform.cs +++ b/src/Microsoft.ML.Transforms/NAHandleTransform.cs @@ -118,7 +118,7 @@ public static IDataTransform Create(IHostEnvironment env, IDataView input, strin { var args = new Arguments() { - Column = new[] + Column = new[] { new Column() { Source = source ?? name, Name = name } }, diff --git a/src/Microsoft.ML.Transforms/NAHandling.cs b/src/Microsoft.ML.Transforms/NAHandling.cs index 7190291b16..0870d16461 100644 --- a/src/Microsoft.ML.Transforms/NAHandling.cs +++ b/src/Microsoft.ML.Transforms/NAHandling.cs @@ -11,9 +11,9 @@ namespace Microsoft.ML.Runtime.Data { public static class NAHandling { - [TlcModule.EntryPoint(Name = "Transforms.MissingValuesDropper", + [TlcModule.EntryPoint(Name = "Transforms.MissingValuesDropper", Desc = NADropTransform.Summary, - UserName = NADropTransform.FriendlyName, + UserName = NADropTransform.FriendlyName, ShortName = NADropTransform.ShortName, XmlInclude = new[] { @"", @"" })] @@ -28,11 +28,11 @@ public static CommonOutputs.TransformOutput Drop(IHostEnvironment env, NADropTra }; } - [TlcModule.EntryPoint(Name = "Transforms.MissingValuesRowDropper", - Desc = NAFilter.Summary, - UserName = NAFilter.FriendlyName, + [TlcModule.EntryPoint(Name = "Transforms.MissingValuesRowDropper", + Desc = NAFilter.Summary, + UserName = NAFilter.FriendlyName, ShortName = NAFilter.ShortName, - XmlInclude = new[] { @"", + XmlInclude = new[] { @"", @""})] public static CommonOutputs.TransformOutput Filter(IHostEnvironment env, NAFilter.Arguments input) { @@ -45,9 +45,9 @@ public static CommonOutputs.TransformOutput Filter(IHostEnvironment env, NAFilte }; } - [TlcModule.EntryPoint(Name = "Transforms.MissingValueHandler", - Desc = NAHandleTransform.Summary, - UserName = NAHandleTransform.FriendlyName, + [TlcModule.EntryPoint(Name = "Transforms.MissingValueHandler", + Desc = NAHandleTransform.Summary, + UserName = NAHandleTransform.FriendlyName, ShortName = NAHandleTransform.ShortName, XmlInclude = new[] { @"", @"" })] @@ -62,9 +62,9 @@ public static CommonOutputs.TransformOutput Handle(IHostEnvironment env, NAHandl }; } - [TlcModule.EntryPoint(Name = "Transforms.MissingValueIndicator", - Desc = NAIndicatorTransform.Summary, - UserName = NAIndicatorTransform.FriendlyName, + [TlcModule.EntryPoint(Name = "Transforms.MissingValueIndicator", + Desc = NAIndicatorTransform.Summary, + UserName = NAIndicatorTransform.FriendlyName, ShortName = NAIndicatorTransform.ShortName, XmlInclude = new[] { @"", @""})] @@ -79,9 +79,9 @@ public static CommonOutputs.TransformOutput Indicator(IHostEnvironment env, NAIn }; } - [TlcModule.EntryPoint(Name = "Transforms.MissingValueSubstitutor", - Desc = NAReplaceTransform.Summary, - UserName = NAReplaceTransform.FriendlyName, + [TlcModule.EntryPoint(Name = "Transforms.MissingValueSubstitutor", + Desc = NAReplaceTransform.Summary, + UserName = NAReplaceTransform.FriendlyName, ShortName = NAReplaceTransform.ShortName, XmlInclude = new[] { @"", @""})] diff --git a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs index b118266b7b..5117496194 100644 --- a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs +++ b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs @@ -471,9 +471,9 @@ private Delegate MakeGetterVec(int length) } } - [TlcModule.EntryPoint(Desc = Summary, - Name = "Transforms.OptionalColumnCreator", - UserName = UserName, + [TlcModule.EntryPoint(Desc = Summary, + Name = "Transforms.OptionalColumnCreator", + UserName = UserName, ShortName = ShortName, XmlInclude = new[] { @"", @""})] diff --git a/src/Microsoft.ML.Transforms/ProduceIdTransform.cs b/src/Microsoft.ML.Transforms/ProduceIdTransform.cs index c66489d386..0934fd0086 100644 --- a/src/Microsoft.ML.Transforms/ProduceIdTransform.cs +++ b/src/Microsoft.ML.Transforms/ProduceIdTransform.cs @@ -18,7 +18,7 @@ namespace Microsoft.ML.Runtime.Data { /// /// Produces a column with the cursor's ID as a column. This can be useful for diagnostic purposes. - /// + /// /// This class will obviously generate different data given different IDs. So, if you save data to /// some other file, then apply this transform to that dataview, it may of course have a different /// result. This is distinct from most transforms that produce results based on data alone. diff --git a/src/Microsoft.ML.Transforms/Properties/Resources.Designer.cs b/src/Microsoft.ML.Transforms/Properties/Resources.Designer.cs index 28bf1cdaaf..772151fc4f 100644 --- a/src/Microsoft.ML.Transforms/Properties/Resources.Designer.cs +++ b/src/Microsoft.ML.Transforms/Properties/Resources.Designer.cs @@ -338,7 +338,8 @@ internal static string Czech { ///nyt ///nær ///næste - ///næsten /// [rest of string was truncated]";. + ///næsten + /// [rest of string was truncated]";. /// internal static string Danish { get { @@ -581,7 +582,8 @@ internal static string English { ///aussi ///donc ///ces - ///toute /// [rest of string was truncated]";. + ///toute + /// [rest of string was truncated]";. /// internal static string French { get { diff --git a/src/Microsoft.ML.Transforms/RffTransform.cs b/src/Microsoft.ML.Transforms/RffTransform.cs index 85c4203512..b7a112e954 100644 --- a/src/Microsoft.ML.Transforms/RffTransform.cs +++ b/src/Microsoft.ML.Transforms/RffTransform.cs @@ -557,7 +557,7 @@ private static void TransformFeatures(IHost host, ref VBuffer src, ref VB } else { - // This overload of MatTimesSrc ignores the values in slots that are not in src.Indices, so there is + // This overload of MatTimesSrc ignores the values in slots that are not in src.Indices, so there is // no need to zero them out. featuresAligned.CopyFrom(src.Indices, src.Values, 0, 0, src.Count, zeroItems: false); CpuUtils.MatTimesSrc(false, false, transformInfo.RndFourierVectors, src.Indices, featuresAligned, 0, 0, diff --git a/src/Microsoft.ML.Transforms/TermLookupTransform.cs b/src/Microsoft.ML.Transforms/TermLookupTransform.cs index f10fb40092..2848082157 100644 --- a/src/Microsoft.ML.Transforms/TermLookupTransform.cs +++ b/src/Microsoft.ML.Transforms/TermLookupTransform.cs @@ -283,7 +283,7 @@ private static VersionInfo GetVersionInfo() } // This is the byte array containing the binary .idv file contents for the lookup data. - // This is persisted; the _termMap and _valueMap are constructed from it. + // This is persisted; the _termMap and _valueMap are constructed from it. private readonly byte[] _bytes; // The BinaryLoader over the byte array above. We keep this diff --git a/src/Microsoft.ML.Transforms/Text/LdaSingleBox.cs b/src/Microsoft.ML.Transforms/Text/LdaSingleBox.cs index 36e55099d9..4a9ef780ca 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaSingleBox.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaSingleBox.cs @@ -220,7 +220,7 @@ public List> GetDocTopicVector(int docID) while (currentTopic < _topics[i]) { //use a value to smooth the count so that we get dense output on each topic - //the smooth value is usually set to 0.1 + //the smooth value is usually set to 0.1 topicRet.Add(new KeyValuePair(currentTopic, (float)_alpha)); currentTopic++; } diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index 7a9b214063..b5a75a10d1 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -31,14 +31,14 @@ namespace Microsoft.ML.Runtime.TextAnalytics // Previous implementations of LDA such as SparseLDA or AliasLDA allow to achieve massive data and model scales, // for example models with tens of billions of parameters to be inferred from billions of documents. // However this requires using a cluster of thousands of machines with all ensuing costs to setup and maintain. - // LightLDA solves this problem in a more cost-effective manner by providing an implementation - // that is efficient enough for modest clusters with at most tens of machines... - // For more details please see original LightLDA paper: + // LightLDA solves this problem in a more cost-effective manner by providing an implementation + // that is efficient enough for modest clusters with at most tens of machines... + // For more details please see original LightLDA paper: // http://arxiv.org/abs/1412.1576 // http://www.www2015.it/documents/proceedings/proceedings/p1351.pdf - // and open source implementation: + // and open source implementation: // https://github.com/Microsoft/LightLDA - // + // // See // for an example on how to use LdaTransform. /// @@ -446,7 +446,7 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) private static string TestType(ColumnType t) { - // LDA consumes term frequency vectors, so I am assuming VBuffer is an appropriate input type. + // LDA consumes term frequency vectors, so I am assuming VBuffer is an appropriate input type. // It must also be of known size for the sake of the LDA trainer initialization. if (t.IsKnownSizeVector && t.ItemType is NumberType) return null; @@ -477,7 +477,7 @@ private void Train(IChannel ch, IDataView trainingData, LdaState[] states) numVocabs[i] = 0; } - //the current lda needs the memory allocation before feedin data, so needs two sweeping of the data, + //the current lda needs the memory allocation before feedin data, so needs two sweeping of the data, //one for the pre-calc memory, one for feedin data really //another solution can be prepare these two value externally and put them in the beginning of the input file. long[] corpusSize = new long[Infos.Length]; diff --git a/src/Microsoft.ML.Transforms/Text/NgramHashTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramHashTransform.cs index b90bde76a9..548b80cb8c 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramHashTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramHashTransform.cs @@ -697,7 +697,7 @@ private Delegate MakeGetter(IChannel ch, IRow input, int iinfo, FinderDecorator var keyCounts = _bindings.Infos[iinfo].SrcTypes.Select( t => t.ItemType.KeyCount > 0 ? (uint)t.ItemType.KeyCount : uint.MaxValue).ToArray(); - // REVIEW: Special casing the srcCount==1 case could potentially improve perf. + // REVIEW: Special casing the srcCount==1 case could potentially improve perf. ValueGetter> del = (ref VBuffer dst) => { @@ -990,7 +990,7 @@ public NgramIdFinder Decorate(int iinfo, NgramIdFinder finder) Contracts.Assert(0 <= icol && icol < srcIndices.Length); Contracts.AssertValue(_srcTextGetters[srcIndices[icol]]); var result = finder(ngram, lim, icol, ref more); - // For the hashing NgramIdFinder, a result of -1 indicates that + // For the hashing NgramIdFinder, a result of -1 indicates that // a slot does not exist for the given ngram. We do not pass ngrams // that do not have a slot to the InvertHash collector. if (result != -1) diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs index 10e4065d4f..546c46479d 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs @@ -376,7 +376,7 @@ private void GetSlotNames(int iinfo, ref VBuffer dst) var unigramNames = new VBuffer(); - // Get the key values of the unigrams. + // Get the key values of the unigrams. Source.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, Infos[iinfo].Source, ref unigramNames); Host.Check(unigramNames.Length == keyCount); diff --git a/src/Microsoft.ML.Transforms/Text/NgramUtils.cs b/src/Microsoft.ML.Transforms/Text/NgramUtils.cs index dd32fd6c91..8d76a20041 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramUtils.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramUtils.cs @@ -134,7 +134,7 @@ public void GetResult(ref VBuffer dst) _bldr.GetResult(ref dst); } - // Returns false if there is no need to process more ngrams. + // Returns false if there is no need to process more ngrams. private bool ProcessNgrams(int icol) { Contracts.Assert(_queue.Count > 0); @@ -171,8 +171,8 @@ private bool ProcessNgrams(int icol) return true; } - // Uses DFS. When called with i and skips, it assumes that the - // first i terms in the _ngram array are already populated using "skips" skips, + // Uses DFS. When called with i and skips, it assumes that the + // first i terms in the _ngram array are already populated using "skips" skips, // and it adds the (i+1)st term. It then recursively calls ProcessSkipNgrams // to add the next term. private bool ProcessSkipNgrams(int icol, int i, int skips) diff --git a/src/Microsoft.ML.Transforms/Text/StopWordsRemoverTransform.cs b/src/Microsoft.ML.Transforms/Text/StopWordsRemoverTransform.cs index 96bf44b4c2..d559430c81 100644 --- a/src/Microsoft.ML.Transforms/Text/StopWordsRemoverTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/StopWordsRemoverTransform.cs @@ -84,8 +84,10 @@ public enum Language Italian = 6, Spanish = 7, Portuguese = 8, +#pragma warning disable MSML_GeneralName // These names correspond to file names, so this is fine in this case. Portuguese_Brazilian = 9, Norwegian_Bokmal = 10, +#pragma warning restore MSML_GeneralName Russian = 11, Polish = 12, Czech = 13, @@ -775,7 +777,7 @@ public CustomStopWordsRemoverTransform(IHostEnvironment env, Arguments args, IDa } /// - /// Public constructor corresponding to SignatureStopWordsRemoverTransform. It accepts arguments of type LoaderArguments, + /// Public constructor corresponding to SignatureStopWordsRemoverTransform. It accepts arguments of type LoaderArguments, /// and a separate array of columns (constructed by the caller -TextTransform- arguments). /// public CustomStopWordsRemoverTransform(IHostEnvironment env, LoaderArguments loaderArgs, IDataView input, OneToOneColumn[] column) diff --git a/src/Microsoft.ML.Transforms/Text/TextTransform.cs b/src/Microsoft.ML.Transforms/Text/TextTransform.cs index 932bf63272..c839e78556 100644 --- a/src/Microsoft.ML.Transforms/Text/TextTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/TextTransform.cs @@ -24,7 +24,7 @@ namespace Microsoft.ML.Runtime.Data using StopWordsLang = StopWordsRemoverTransform.Language; using CaseNormalizationMode = TextNormalizerTransform.CaseNormalizationMode; - // A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are counts + // A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are counts // of (word or character) ngrams in a given text. It offers ngram hashing (finding the ngram token string name to feature // integer index mapping through hashing) as an option. /// @@ -271,7 +271,7 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV var col = new DelimitedTokenizeTransform.Column(); col.Source = textCols[i]; col.Name = GenerateColumnName(view.Schema, textCols[i], "WordTokenizer"); - + xfCols[i] = col; wordTokCols[i] = col.Name; diff --git a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs index c77ddbb21a..fb56fa2e0a 100644 --- a/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordBagTransform.cs @@ -30,7 +30,7 @@ namespace Microsoft.ML.Runtime.Data public delegate void SignatureNgramExtractorFactory(TermLoaderArguments termLoaderArgs); /// - /// A many-to-one column common to both + /// A many-to-one column common to both /// and . /// public sealed class ExtractorColumn : ManyToOneColumn @@ -122,7 +122,7 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV h.CheckUserArg(args.Tokenizer.IsGood(), nameof(args.Tokenizer), "tokenizer must be specified"); // Compose the WordBagTransform from a tokenize transform, - // followed by a NgramExtractionTransform. + // followed by a NgramExtractionTransform. // Since WordBagTransform is a many-to-one column transform, for each // WordBagTransform.Column with multiple sources, we first apply a ConcatTransform. @@ -180,7 +180,7 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV } /// - /// A transform that turns a collection of tokenized text (vector of DvText), or vectors of keys into numerical + /// A transform that turns a collection of tokenized text (vector of DvText), or vectors of keys into numerical /// feature vectors. The feature vectors are counts of ngrams (sequences of consecutive *tokens* -words or keys- /// of length 1-n). /// @@ -201,7 +201,7 @@ public sealed class Column : OneToOneColumn public bool? AllLengths; // REVIEW: This argument is actually confusing. If you set only one value we will use this value for all ngrams respectfully e.g. - // if we specify 3 ngrams we will have maxNumTerms * 3. And it also pick first value from this array to run term transform, so if you specify + // if we specify 3 ngrams we will have maxNumTerms * 3. And it also pick first value from this array to run term transform, so if you specify // something like 1,1,10000, term transform would be run with limitation of only one term. [Argument(ArgumentType.Multiple, HelpText = "Maximum number of ngrams to store in the dictionary", ShortName = "max")] public int[] MaxNumTerms = null; @@ -232,7 +232,7 @@ public bool TryUnparse(StringBuilder sb) } /// - /// This class is a merger of and + /// This class is a merger of and /// , with the allLength option removed. /// public abstract class ArgumentsBase @@ -309,7 +309,7 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV // If the column types of args.column are text, apply term transform to convert them to keys. // Otherwise, skip term transform and apply ngram transform directly. // This logic allows NgramExtractorTransform to handle both text and key input columns. - // Note: ngram transform handles the validation of the types natively (in case the types + // Note: ngram transform handles the validation of the types natively (in case the types // of args.column are not text nor keys). if (termCols.Count > 0) { @@ -471,7 +471,7 @@ public sealed class TermLoaderArguments public interface INgramExtractorFactory { /// - /// Whether the extractor transform created by this factory uses the hashing trick + /// Whether the extractor transform created by this factory uses the hashing trick /// (by using or , for example). /// bool UseHashingTrick { get; } @@ -569,7 +569,7 @@ public static IDataView ApplyConcatOnSources(IHostEnvironment env, ManyToOneColu } /// - /// Generates and returns unique names for columns source. Each element of the returned array is + /// Generates and returns unique names for columns source. Each element of the returned array is /// an array of unique source names per specific column. /// public static string[][] GenerateUniqueSourceNames(IHostEnvironment env, ManyToOneColumn[] columns, ISchema schema) diff --git a/src/Microsoft.ML.Transforms/Text/WordHashBagTransform.cs b/src/Microsoft.ML.Transforms/Text/WordHashBagTransform.cs index 9c43836ce1..507607ffdc 100644 --- a/src/Microsoft.ML.Transforms/Text/WordHashBagTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordHashBagTransform.cs @@ -100,8 +100,8 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV // To each input column to the WordHashBagTransform, a tokenize transform is applied, // followed by applying WordHashVectorizeTransform. - // Since WordHashBagTransform is a many-to-one column transform, for each - // WordHashBagTransform.Column we may need to define multiple tokenize transform columns. + // Since WordHashBagTransform is a many-to-one column transform, for each + // WordHashBagTransform.Column we may need to define multiple tokenize transform columns. // NgramHashExtractorTransform may need to define an identical number of HashTransform.Columns. // The intermediate columns are dropped at the end of using a DropColumnsTransform. IDataView view = input; @@ -175,7 +175,7 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV } /// - /// A transform that turns a collection of tokenized text (vector of DvText) into numerical feature vectors + /// A transform that turns a collection of tokenized text (vector of DvText) into numerical feature vectors /// using the hashing trick. /// public static class NgramHashExtractorTransform @@ -264,7 +264,7 @@ public bool TryUnparse(StringBuilder sb) } /// - /// This class is a merger of and + /// This class is a merger of and /// , with the ordered option, /// the rehashUnigrams option and the allLength option removed. /// @@ -331,7 +331,7 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column), "Columns must be specified"); - // To each input column to the NgramHashExtractorArguments, a HashTransform using 31 + // To each input column to the NgramHashExtractorArguments, a HashTransform using 31 // bits (to minimize collisions) is applied first, followed by an NgramHashTransform. IDataView view = input; diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs index 5afd177763..2e38af34e2 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs @@ -173,7 +173,7 @@ public DelimitedTokenizeTransform(IHostEnvironment env, Arguments args, IDataVie } /// - /// Public constructor corresponding to SignatureTokenizeTransform. It accepts arguments of type ArgumentsBase, + /// Public constructor corresponding to SignatureTokenizeTransform. It accepts arguments of type ArgumentsBase, /// and a separate array of columns (constructed from the caller -WordBag/WordHashBag- arguments). /// public DelimitedTokenizeTransform(IHostEnvironment env, TokenizeArguments args, IDataView input, OneToOneColumn[] columns) diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs index ad6c31ae96..d77cc48bbd 100644 --- a/src/Microsoft.ML.Transforms/UngroupTransform.cs +++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs @@ -23,24 +23,24 @@ namespace Microsoft.ML.Runtime.Data { - // This can be thought of as an inverse of GroupTransform. For all specified vector columns + // This can be thought of as an inverse of GroupTransform. For all specified vector columns // ("pivot" columns), performs the "ungroup" (or "unroll") operation as outlined below. - // - // If the only pivot column is called P, and has size K, then for every row of the input we will produce - // K rows, that are identical in all columns except P. The column P will become a scalar column, and this - // column will hold all the original values of input's P, one value per row, in order. The order of columns + // + // If the only pivot column is called P, and has size K, then for every row of the input we will produce + // K rows, that are identical in all columns except P. The column P will become a scalar column, and this + // column will hold all the original values of input's P, one value per row, in order. The order of columns // will remain the same. - // + // // Variable-length pivot columns are supported (including zero, which will eliminate the row from the result). - // + // // Multiple pivot columns are also supported: - // * A number of output rows is controlled by the 'mode' parameter. + // * A number of output rows is controlled by the 'mode' parameter. // - outer: it is equal to the maximum length of pivot columns, // - inner: it is equal to the minimum length of pivot columns, // - first: it is equal to the length of the first pivot column. // * If a particular pivot column has size that is different than the number of output rows, the extra slots will // be ignored, and the missing slots will be 'padded' with default values. - // + // // All metadata is preserved for the retained columns. For 'unrolled' columns, all known metadata // except slot names is preserved. /// @@ -171,8 +171,8 @@ public override ISchema Schema return null; } - // Technically, we could shuffle the ungrouped data if the source can shuffle. However, we want to maintain - // contiguous groups. There's also a question whether we should shuffle inside groups or just shuffle groups + // Technically, we could shuffle the ungrouped data if the source can shuffle. However, we want to maintain + // contiguous groups. There's also a question whether we should shuffle inside groups or just shuffle groups // themselves. With these issues, and no anticipated use for shuffled version, it's safer to not shuffle at all. public override bool CanShuffle { @@ -460,7 +460,7 @@ private sealed class Cursor : LinkedRootCursorBase, IRowCursor // For each pivot column that we care about, these getters return the vector size. private readonly Func[] _sizeGetters; - // As a side effect, getters also populate these actual sizes of the necessary pivot columns on MoveNext. + // As a side effect, getters also populate these actual sizes of the necessary pivot columns on MoveNext. // Parallel to columns. private int[] _colSizes; @@ -610,7 +610,7 @@ private ValueGetter MakeGetter(int col, PrimitiveType itemType) var srcGetter = Input.GetGetter>(col); // The position of the source cursor. Used to extract the source row once. long cachedPosition = -1; - // The position inside the sparse row. If the row is sparse, the invariant is + // The position inside the sparse row. If the row is sparse, the invariant is // cachedIndex == row.Count || _pivotColPosition <= row.Indices[cachedIndex]. int cachedIndex = 0; VBuffer row = default(VBuffer); @@ -653,9 +653,9 @@ private ValueGetter MakeGetter(int col, PrimitiveType itemType) public static partial class GroupingOperations { - [TlcModule.EntryPoint(Name = "Transforms.Segregator", - Desc = UngroupTransform.Summary, - UserName = UngroupTransform.UserName, + [TlcModule.EntryPoint(Name = "Transforms.Segregator", + Desc = UngroupTransform.Summary, + UserName = UngroupTransform.UserName, ShortName = UngroupTransform.ShortName, XmlInclude = new[] { @"", @""})] diff --git a/src/Microsoft.ML.Transforms/WhiteningTransform.cs b/src/Microsoft.ML.Transforms/WhiteningTransform.cs index be83e87c89..6854157f31 100644 --- a/src/Microsoft.ML.Transforms/WhiteningTransform.cs +++ b/src/Microsoft.ML.Transforms/WhiteningTransform.cs @@ -40,7 +40,7 @@ public enum WhiteningKind /// 1. Decorrelation of the input data. Input data is assumed to have zero mean. /// 2. Rescale decorrelated features to have unit variance. /// That is, PCA whitening is essentially just a PCA + rescale. - /// ZCA whitening tries to make resulting data to look more like input data by rotating it back to the + /// ZCA whitening tries to make resulting data to look more like input data by rotating it back to the /// original input space. /// More information: http://ufldl.stanford.edu/wiki/index.php/Whitening /// diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index c9b16ad24e..6e89e8a54e 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -20,7 +20,7 @@ public TextLoaderRange() } /// - /// Convenience constructor for the scalar case, when a given column + /// Convenience constructor for the scalar case, when a given column /// in the schema spans only a single column in the dataset. /// and are set to the single value . /// @@ -35,7 +35,7 @@ public TextLoaderRange(int ordinal) } /// - /// Convenience constructor for the vector case, when a given column + /// Convenience constructor for the vector case, when a given column /// in the schema spans contiguous columns in the dataset. /// /// Starting column index in the dataset. @@ -58,13 +58,13 @@ public sealed partial class TextLoader /// /// Does the file contains header? /// Column separator character. Default is '\t' - /// Whether the input may include quoted values, + /// Whether the input may include quoted values, /// which can contain separator characters, colons, - /// and distinguish empty values from missing values. When true, consecutive separators - /// denote a missing value and an empty value is denoted by \"\". + /// and distinguish empty values from missing values. When true, consecutive separators + /// denote a missing value and an empty value is denoted by \"\". /// When false, consecutive separators denote an empty value. - /// Whether the input may include sparse representations e.g. - /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero + /// Whether the input may include sparse representations e.g. + /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero /// except for 3rd and 5th columns which have values 6 and 3 /// Remove trailing whitespace from lines public TextLoader CreateFrom(bool useHeader = false, diff --git a/src/Microsoft.ML/LearningPipeline.cs b/src/Microsoft.ML/LearningPipeline.cs index 0637c5e65e..98e95c1319 100644 --- a/src/Microsoft.ML/LearningPipeline.cs +++ b/src/Microsoft.ML/LearningPipeline.cs @@ -14,7 +14,7 @@ namespace Microsoft.ML { - class ScorerPipelineStep : ILearningPipelineDataStep + public sealed class ScorerPipelineStep : ILearningPipelineDataStep { public ScorerPipelineStep(Var data, Var model) { @@ -26,12 +26,11 @@ public ScorerPipelineStep(Var data, Var model) public Var Model { get; } } - /// /// The class is used to define the steps needed to perform a desired machine learning task. - /// The steps are defined by adding a data loader (e.g. ) followed by zero or more transforms (e.g. ) + /// The steps are defined by adding a data loader (e.g. ) followed by zero or more transforms (e.g. ) /// and at most one trainer/learner (e.g. ) in the pipeline. - /// + /// /// /// /// @@ -41,14 +40,14 @@ public ScorerPipelineStep(Var data, Var model) /// pipeline.Add(new TextLoader <SentimentData> (dataPath, separator: ",")); /// pipeline.Add(new TextFeaturizer("Features", "SentimentText")); /// pipeline.Add(new FastTreeBinaryClassifier()); - /// + /// /// var model = pipeline.Train<SentimentData, SentimentPrediction>(); /// /// [DebuggerTypeProxy(typeof(LearningPipelineDebugProxy))] public class LearningPipeline : ICollection { - private List Items { get; } = new List(); + private List Items { get; } private readonly int? _seed; private readonly int _conc; @@ -56,9 +55,8 @@ public class LearningPipeline : ICollection /// Construct an empty object. /// public LearningPipeline() + : this(conc: 0) { - _seed = null; - _conc = 0; } /// @@ -68,6 +66,7 @@ public LearningPipeline() /// Specify concurrency factor (default value - autoselection) internal LearningPipeline(int? seed = null, int conc = 0) { + Items = new List(); _seed = seed; _conc = conc; } @@ -79,7 +78,7 @@ internal LearningPipeline(int? seed = null, int conc = 0) public bool IsReadOnly => false; /// - /// Add a data loader, transform or trainer into the pipeline. + /// Add a data loader, transform or trainer into the pipeline. /// Possible data loader(s), transforms and trainers options are /// /// Data Loader: diff --git a/src/Microsoft.ML/LearningPipelineDebugProxy.cs b/src/Microsoft.ML/LearningPipelineDebugProxy.cs index e9d93425e4..eab1af6386 100644 --- a/src/Microsoft.ML/LearningPipelineDebugProxy.cs +++ b/src/Microsoft.ML/LearningPipelineDebugProxy.cs @@ -225,7 +225,12 @@ internal class PipelineItemDebugColumn { public string Name { get; set; } public string Type { get; set; } - public string SlotNames { get; set; } = string.Empty; + public string SlotNames { get; set; } + + public PipelineItemDebugColumn() + { + SlotNames = string.Empty; + } } [DebuggerDisplay("{Values}")] diff --git a/src/Microsoft.ML/Models/BinaryClassificationEvaluator.cs b/src/Microsoft.ML/Models/BinaryClassificationEvaluator.cs index 1a670fc854..9d841d1e21 100644 --- a/src/Microsoft.ML/Models/BinaryClassificationEvaluator.cs +++ b/src/Microsoft.ML/Models/BinaryClassificationEvaluator.cs @@ -47,7 +47,7 @@ public BinaryClassificationMetrics Evaluate(PredictionModel model, ILearningPipe Output evaluteOutput = experiment.Add(this); experiment.Compile(); - + experiment.SetInput(datasetScorer.TransformModel, model.PredictorModel); testData.SetInput(environment, experiment); @@ -67,7 +67,8 @@ public BinaryClassificationMetrics Evaluate(PredictionModel model, ILearningPipe var metric = BinaryClassificationMetrics.FromMetrics(environment, overallMetrics, confusionMatrix); - Contracts.Check(metric.Count == 1, $"Exactly one metric set was expected but found {metric.Count} metrics"); + if (metric.Count != 1) + throw environment.Except($"Exactly one metric set was expected but found {metric.Count} metrics"); return metric[0]; } diff --git a/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs b/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs index f536f30ed0..07d534142f 100644 --- a/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs @@ -34,11 +34,11 @@ internal static List FromMetrics(IHostEnvironment e List metrics = new List(); var confusionMatrices = ConfusionMatrix.Create(env, confusionMatrix).GetEnumerator(); - int Index = 0; + int index = 0; foreach(var metric in metricsEnumerable) { - if (Index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext()) + if (index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext()) { throw env.Except("Confusion matrices didn't have enough matrices."); } diff --git a/src/Microsoft.ML/Models/ClassificationEvaluator.cs b/src/Microsoft.ML/Models/ClassificationEvaluator.cs index bc97a372a0..7adec772e2 100644 --- a/src/Microsoft.ML/Models/ClassificationEvaluator.cs +++ b/src/Microsoft.ML/Models/ClassificationEvaluator.cs @@ -68,7 +68,8 @@ public ClassificationMetrics Evaluate(PredictionModel model, ILearningPipelineLo var metric = ClassificationMetrics.FromMetrics(environment, overallMetrics, confusionMatrix); - Contracts.Check(metric.Count == 1, $"Exactly one metric set was expected but found {metric.Count} metrics"); + if (metric.Count != 1) + throw environment.Except($"Exactly one metric set was expected but found {metric.Count} metrics"); return metric[0]; } diff --git a/src/Microsoft.ML/Models/ClassificationMetrics.cs b/src/Microsoft.ML/Models/ClassificationMetrics.cs index f3a2416bca..3261a0cc60 100644 --- a/src/Microsoft.ML/Models/ClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/ClassificationMetrics.cs @@ -18,7 +18,7 @@ private ClassificationMetrics() { } - internal static List FromMetrics(IHostEnvironment env, IDataView overallMetrics, IDataView confusionMatrix, + internal static List FromMetrics(IHostEnvironment env, IDataView overallMetrics, IDataView confusionMatrix, int confusionMatriceStartIndex = 0) { Contracts.AssertValue(env); @@ -34,14 +34,14 @@ internal static List FromMetrics(IHostEnvironment env, ID List metrics = new List(); var confusionMatrices = ConfusionMatrix.Create(env, confusionMatrix).GetEnumerator(); - int Index = 0; + int index = 0; foreach (var metric in metricsEnumerable) { - if (Index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext()) + if (index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext()) { throw env.Except("Confusion matrices didn't have enough matrices."); } - + metrics.Add( new ClassificationMetrics() { @@ -64,7 +64,7 @@ internal static List FromMetrics(IHostEnvironment env, ID /// /// /// The micro-average is the fraction of instances predicted correctly. - /// + /// /// The micro-average metric weighs each class according to the number of instances that belong /// to it in the dataset. /// @@ -77,7 +77,7 @@ internal static List FromMetrics(IHostEnvironment env, ID /// The macro-average is computed by taking the average over all the classes of the fraction /// of correct predictions in this class (the number of correctly predicted instances in the class, /// divided by the total number of instances in the class). - /// + /// /// The macro-average metric gives the same weight to each class, no matter how many instances from /// that class the dataset contains. /// diff --git a/src/Microsoft.ML/Models/ClusterMetrics.cs b/src/Microsoft.ML/Models/ClusterMetrics.cs index 7f88784ef8..aec0264ff0 100644 --- a/src/Microsoft.ML/Models/ClusterMetrics.cs +++ b/src/Microsoft.ML/Models/ClusterMetrics.cs @@ -57,7 +57,7 @@ internal static List FromOverallMetrics(IHostEnvironment env, ID /// /// /// NMI is a measure of the mutual dependence between the true and predicted cluster labels for instances in the dataset. - /// NMI ranges between 0 and 1 where "0" indicates clustering is random and "1" indicates clustering is perfect w.r.t true labels. + /// NMI ranges between 0 and 1 where "0" indicates clustering is random and "1" indicates clustering is perfect w.r.t true labels. /// public double Nmi { get; private set; } @@ -66,7 +66,7 @@ internal static List FromOverallMetrics(IHostEnvironment env, ID /// /// /// AvgMinScore is the average squared-distance of examples from the respective cluster centroids. - /// It is defined as + /// It is defined as /// AvgMinScore = (1/m) * sum ((xi - c(xi))^2) /// where m is the number of instances in the dataset. /// xi is the i'th instance and c(xi) is the centriod of the predicted cluster for xi. diff --git a/src/Microsoft.ML/Models/ConfusionMatrix.cs b/src/Microsoft.ML/Models/ConfusionMatrix.cs index 72aa5061dc..9abcd2af9c 100644 --- a/src/Microsoft.ML/Models/ConfusionMatrix.cs +++ b/src/Microsoft.ML/Models/ConfusionMatrix.cs @@ -48,7 +48,7 @@ internal static List Create(IHostEnvironment env, IDataView con if (!confusionMatrix.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.Count, out int countColumn)) { - env.Except($"ConfusionMatrix data view did not contain a {nameof(MetricKinds.ColumnNames.Count)} column."); + throw env.Except($"ConfusionMatrix data view did not contain a {nameof(MetricKinds.ColumnNames.Count)} column."); } IRowCursor cursor = confusionMatrix.GetRowCursor(col => col == countColumn); @@ -65,7 +65,7 @@ internal static List Create(IHostEnvironment env, IDataView con ValueGetter> countGetter = cursor.GetGetter>(countColumn); VBuffer countValues = default; List confusionMatrices = new List(); - + int valuesRowIndex = 0; double[,] elements = null; while (cursor.MoveNext()) diff --git a/src/Microsoft.ML/Models/CrossValidator.cs b/src/Microsoft.ML/Models/CrossValidator.cs index 5f6e374e11..c34e2da13b 100644 --- a/src/Microsoft.ML/Models/CrossValidator.cs +++ b/src/Microsoft.ML/Models/CrossValidator.cs @@ -187,7 +187,7 @@ public class CrossValidationOutput public List ClusterMetrics; public PredictionModel[] PredictorModels; - //REVIEW: Add warnings and per instance results and implement + //REVIEW: Add warnings and per instance results and implement //metrics for ranking, clustering and anomaly detection. } } diff --git a/src/Microsoft.ML/Models/OneVersusAll.cs b/src/Microsoft.ML/Models/OneVersusAll.cs index b4317cc1fe..c5b20c8550 100644 --- a/src/Microsoft.ML/Models/OneVersusAll.cs +++ b/src/Microsoft.ML/Models/OneVersusAll.cs @@ -13,15 +13,15 @@ namespace Microsoft.ML.Models public sealed partial class OneVersusAll { /// - /// One-versus-all, OvA, learner (also known as One-vs.-rest, "OvR") is a multi-class learner + /// One-versus-all, OvA, learner (also known as One-vs.-rest, "OvR") is a multi-class learner /// with the strategy to fit one binary classifier per class in the dataset. - /// It trains the provided binary classifier for each class against the other classes, where the current + /// It trains the provided binary classifier for each class against the other classes, where the current /// class is treated as the positive labels and examples in other classes are treated as the negative classes. /// See wikipedia page. /// /// /// In order to use it all you need to do is add it to pipeline as regular learner: - /// + /// /// pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier())); /// /// diff --git a/src/Microsoft.ML/Models/OnnxConverter.cs b/src/Microsoft.ML/Models/OnnxConverter.cs index 6a98d9faee..c2b8e2b5c1 100644 --- a/src/Microsoft.ML/Models/OnnxConverter.cs +++ b/src/Microsoft.ML/Models/OnnxConverter.cs @@ -10,27 +10,27 @@ namespace Microsoft.ML.Models public sealed partial class OnnxConverter { /// - /// ONNX is an intermediate representation format - /// for machine learning models. It is used to make models portable such that you can + /// ONNX is an intermediate representation format + /// for machine learning models. It is used to make models portable such that you can /// train a model using a toolkit and run it in another tookit's runtime, for example, - /// you can create a model using ML.NET, export it to an ONNX-ML model file, - /// then load and run that ONNX-ML model in Windows ML, on an UWP Windows 10 app. - /// - /// This API converts an ML.NET model to ONNX-ML format by inspecting the transform pipeline - /// from the end, checking for components that know how to save themselves as ONNX. - /// The first item in the transform pipeline that does not know how to save itself - /// as ONNX, is considered the "input" to the ONNX pipeline. (Ideally this would be the - /// original loader itself, but this may not be possible if the user used unsavable - /// transforms in defining the pipe.) All the columns in the source that are a type the - /// ONNX knows how to deal with will be tracked. Intermediate transformations of the - /// data appearing as new columns will appear in the output block of the ONNX, with names - /// derived from the corresponding column names. The ONNX JSON will be serialized to a + /// you can create a model using ML.NET, export it to an ONNX-ML model file, + /// then load and run that ONNX-ML model in Windows ML, on an UWP Windows 10 app. + /// + /// This API converts an ML.NET model to ONNX-ML format by inspecting the transform pipeline + /// from the end, checking for components that know how to save themselves as ONNX. + /// The first item in the transform pipeline that does not know how to save itself + /// as ONNX, is considered the "input" to the ONNX pipeline. (Ideally this would be the + /// original loader itself, but this may not be possible if the user used unsavable + /// transforms in defining the pipe.) All the columns in the source that are a type the + /// ONNX knows how to deal with will be tracked. Intermediate transformations of the + /// data appearing as new columns will appear in the output block of the ONNX, with names + /// derived from the corresponding column names. The ONNX JSON will be serialized to a /// path defined through the Json option. /// /// This API supports the following arguments: /// indicates the file to write the ONNX protocol buffer file to. This is optional. /// indicates the file to write the JSON representation of the ONNX model. This is optional. - /// indicates the name property in the ONNX model. If left unspecified, it will + /// indicates the name property in the ONNX model. If left unspecified, it will /// be the extension-less name of the file specified in the onnx indicates the protocol buffer file /// to write the ONNX representation to. /// indicates the domain name of the model. ONNX uses reverse domain name space indicators. @@ -38,10 +38,10 @@ public sealed partial class OnnxConverter /// is a string array of input column names to omit from the input mapping. /// A common scenario might be to drop the label column, for instance, since it may not be practically /// useful for the pipeline. Note that any columns depending on these naturally cannot be saved. - /// is similar, except for the output schema. Note that the pipeline handler + /// is similar, except for the output schema. Note that the pipeline handler /// is currently not intelligent enough to drop intermediate calculations that produce this value: this will /// merely omit that value from the actual output. - /// + /// /// Transforms that can be exported to ONNX /// 1. Concat /// 2. KeyToVector @@ -49,12 +49,12 @@ public sealed partial class OnnxConverter /// 4. Normalize /// 5. Term /// 6. Categorical - /// + /// /// Learners that can be exported to ONNX /// 1. FastTree /// 2. LightGBM /// 3. Logistic Regression - /// + /// /// See /// for an example on how to train a model and then convert that model to ONNX. /// diff --git a/src/Microsoft.ML/Models/RegressionEvaluator.cs b/src/Microsoft.ML/Models/RegressionEvaluator.cs index 2cb05ee092..5425c4d91d 100644 --- a/src/Microsoft.ML/Models/RegressionEvaluator.cs +++ b/src/Microsoft.ML/Models/RegressionEvaluator.cs @@ -59,10 +59,11 @@ public RegressionMetrics Evaluate(PredictionModel model, ILearningPipelineLoader { throw environment.Except($"Could not find OverallMetrics in the results returned in {nameof(RegressionEvaluator)} Evaluate."); } - + var metric = RegressionMetrics.FromOverallMetrics(environment, overallMetrics); - Contracts.Assert(metric.Count == 1, $"Exactly one metric set was expected but found {metric.Count} metrics"); + if (metric.Count != 1) + throw environment.Except($"Exactly one metric set was expected but found {metric.Count} metrics"); return metric[0]; } diff --git a/src/Microsoft.ML/Models/TrainTestEvaluator.cs b/src/Microsoft.ML/Models/TrainTestEvaluator.cs index e3de7a4e50..9e3abbc169 100644 --- a/src/Microsoft.ML/Models/TrainTestEvaluator.cs +++ b/src/Microsoft.ML/Models/TrainTestEvaluator.cs @@ -22,7 +22,7 @@ public sealed partial class TrainTestEvaluator /// /// Class type that represents input schema. /// Class type that represents prediction schema. - /// Machine learning pipeline that contains , + /// Machine learning pipeline that contains , /// transforms and at least one trainer. /// that represents the test dataset. /// Metrics and predictor model. @@ -184,7 +184,7 @@ public class TrainTestEvaluatorOutput public ClusterMetrics ClusterMetrics; public PredictionModel PredictorModels; - //REVIEW: Add warnings and per instance results and implement + //REVIEW: Add warnings and per instance results and implement //metrics for ranking, clustering and anomaly detection. } } diff --git a/src/Microsoft.ML/PredictionModel.cs b/src/Microsoft.ML/PredictionModel.cs index c1dded82b8..e11efa9487 100644 --- a/src/Microsoft.ML/PredictionModel.cs +++ b/src/Microsoft.ML/PredictionModel.cs @@ -31,7 +31,7 @@ internal TransformModel PredictorModel } /// - /// Returns labels that correspond to indices of the score array in the case of + /// Returns labels that correspond to indices of the score array in the case of /// multi-class classification problem. /// /// Label to score mapping diff --git a/src/Microsoft.ML/Runtime/EntryPoints/CVSplit.cs b/src/Microsoft.ML/Runtime/EntryPoints/CVSplit.cs index 79c0c5957e..347fb80e7d 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/CVSplit.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/CVSplit.cs @@ -12,7 +12,7 @@ namespace Microsoft.ML.Runtime.EntryPoints { /// - /// The module that splits the input dataset into the specified number of cross-validation folds, and outputs the 'training' + /// The module that splits the input dataset into the specified number of cross-validation folds, and outputs the 'training' /// and 'testing' portion of the input for each fold. /// public static class CVSplit diff --git a/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationBinaryMacro.cs b/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationBinaryMacro.cs index cdf52cf076..1b022aaea9 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationBinaryMacro.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationBinaryMacro.cs @@ -48,12 +48,12 @@ public sealed class Arguments [Argument(ArgumentType.Required, HelpText = "The training subgraph", SortOrder = 3)] public JArray Nodes; - // This is the subgraph input, that shows that the subgraph should only require one + // This is the subgraph input, that shows that the subgraph should only require one // IDataView as input and indicates the variable name (in the subgraph) for it. [Argument(ArgumentType.Required, HelpText = "The training subgraph inputs", SortOrder = 4)] public SubGraphInput Inputs = new SubGraphInput(); - // This is the subgraph output, that shows that the subgraph should produce one + // This is the subgraph output, that shows that the subgraph should produce one // IPredictorModel as output and indicates the variable name (in the subgraph) for it. [Argument(ArgumentType.Required, HelpText = "The training subgraph outputs", SortOrder = 5)] public SubGraphOutput Outputs = new SubGraphOutput(); diff --git a/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationMacro.cs b/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationMacro.cs index 631b7b6946..eb034fe3c2 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationMacro.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationMacro.cs @@ -54,12 +54,12 @@ public sealed class Arguments [Argument(ArgumentType.Required, HelpText = "The training subgraph", SortOrder = 3)] public JArray Nodes; - // This is the subgraph input, that shows that the subgraph should only require one + // This is the subgraph input, that shows that the subgraph should only require one // IDataView as input and indicates the variable name (in the subgraph) for it. [Argument(ArgumentType.Required, HelpText = "The training subgraph inputs", SortOrder = 4)] public SubGraphInput Inputs = new SubGraphInput(); - // This is the subgraph output, that shows that the subgraph should produce one + // This is the subgraph output, that shows that the subgraph should produce one // IPredictorModel as output and indicates the variable name (in the subgraph) for it. [Argument(ArgumentType.Required, HelpText = "The training subgraph outputs", SortOrder = 5)] public SubGraphOutput Outputs = new SubGraphOutput(); @@ -172,7 +172,7 @@ public static CommonOutputs.MacroOutput CrossValidate( // This will be the final resulting list of nodes that is returned from the macro. var subGraphNodes = new List(); - //the input transform model + //the input transform model VariableBinding transformModelVarName = null; if (input.TransformModel != null) transformModelVarName = node.GetInputVariable(nameof(input.TransformModel)); diff --git a/src/Microsoft.ML/Runtime/EntryPoints/JsonUtils/GraphRunner.cs b/src/Microsoft.ML/Runtime/EntryPoints/JsonUtils/GraphRunner.cs index d29b6f10d2..f6349083af 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/JsonUtils/GraphRunner.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/JsonUtils/GraphRunner.cs @@ -13,7 +13,7 @@ namespace Microsoft.ML.Runtime.EntryPoints.JsonUtils /// The entry point graph is provided as a of graph nodes. The inputs need to be provided separately: /// the graph runner will only compile a list of required inputs, and the calling code is expected to set them prior /// to running the graph. - /// + /// /// REVIEW: currently, the graph is executed synchronously, one node at a time. This is an implementation choice, we /// probably need to consider parallel asynchronous execution, once we agree on an acceptable syntax for it. /// diff --git a/src/Microsoft.ML/Runtime/EntryPoints/JsonUtils/JsonManifestUtils.cs b/src/Microsoft.ML/Runtime/EntryPoints/JsonUtils/JsonManifestUtils.cs index fd76ad8565..db1ca0968e 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/JsonUtils/JsonManifestUtils.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/JsonUtils/JsonManifestUtils.cs @@ -433,7 +433,7 @@ private static JToken BuildValueToken(IExceptionContext ectx, object value, Type /// /// Build a token for component default value. This will look up the component in the catalog, and if it finds an entry, it will /// build a JSON structure that would be parsed into the default value. - /// + /// /// This is an inherently fragile setup in case when the factory is not trivial, but it will work well for 'property bag' factories /// that we are currently using. /// @@ -462,7 +462,7 @@ private static JToken BuildComponentToken(IExceptionContext ectx, IComponentFact } catch (MissingMemberException ex) { - // There was no default constructor found. + // There was no default constructor found. // This should never happen, since ModuleCatalog would error out if there is no default ctor. ectx.Assert(false); throw ectx.Except(ex, "Couldn't find default constructor"); diff --git a/src/Microsoft.ML/Runtime/EntryPoints/OneVersusAllMacro.cs b/src/Microsoft.ML/Runtime/EntryPoints/OneVersusAllMacro.cs index 494fe6b225..05688cd2af 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/OneVersusAllMacro.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/OneVersusAllMacro.cs @@ -137,7 +137,7 @@ private static int GetNumberOfClasses(IHostEnvironment env, Arguments input, out [TlcModule.EntryPoint(Desc = "One-vs-All macro (OVA)", Name = "Models.OneVersusAll", XmlInclude = new[] { @"" })] - public static CommonOutputs.MacroOutput OVA( + public static CommonOutputs.MacroOutput OneVersusAll( IHostEnvironment env, Arguments input, EntryPointNode node) @@ -160,7 +160,7 @@ public static CommonOutputs.MacroOutput OVA( macroNodes.AddRange(result.Item1); } - // Use OVA model combiner to combine these models into one. + // Use OVA model combiner to combine these models into one. // Takes in array of models that are binary predictor models and // produces single multiclass predictor model. var macroExperiment = new Experiment(env); diff --git a/src/Microsoft.ML/Runtime/EntryPoints/TrainTestMacro.cs b/src/Microsoft.ML/Runtime/EntryPoints/TrainTestMacro.cs index 6082e9467b..30a2ee8c4f 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/TrainTestMacro.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/TrainTestMacro.cs @@ -305,7 +305,7 @@ public static CommonOutputs.MacroOutput TrainTest( exp.Add(evalNode, evalOutput); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); - // Marks as an atomic unit that can be run in + // Marks as an atomic unit that can be run in // a distributed fashion. foreach (var subGraphNode in subGraphNodes) subGraphNode.StageId = input.PipelineId; diff --git a/src/Microsoft.ML/Runtime/EntryPoints/TrainTestSplit.cs b/src/Microsoft.ML/Runtime/EntryPoints/TrainTestSplit.cs index 40909ad108..5cea768127 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/TrainTestSplit.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/TrainTestSplit.cs @@ -77,7 +77,7 @@ public static string CreateStratificationColumn(IHost host, ref IDataView data, int j = 0; while (data.Schema.TryGetColumnIndex(stratCol, out col)) stratCol = string.Format("{0}_{1:000}", stratColName, j++); - // Construct the stratification column. If user-provided stratification column exists, use HashJoin + // Construct the stratification column. If user-provided stratification column exists, use HashJoin // of it to construct the strat column, otherwise generate a random number and use it. if (stratificationColumn == null) { diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpGeneratorUtils.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpGeneratorUtils.cs index 09dea02cc2..00facba98e 100644 --- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpGeneratorUtils.cs +++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpGeneratorUtils.cs @@ -351,7 +351,7 @@ public static string GetComponentName(ModuleCatalog.ComponentInfo component) public static void GenerateSummary(IndentingTextWriter writer, string summary, string[] xmlInclude = null) { - // if the class has an XML it should contain the summary and everything else + // if the class has an XML it should contain the summary and everything else if (xmlInclude != null) { foreach (var line in xmlInclude) diff --git a/src/Source.ruleset b/src/Source.ruleset new file mode 100644 index 0000000000..0668aec49c --- /dev/null +++ b/src/Source.ruleset @@ -0,0 +1,245 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index 644318c05a..fe46ca26e1 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -21,7 +21,7 @@ Models.FixedPlattCalibrator Apply a Platt calibrator with a fixed slope and offs Models.MultiClassPipelineEnsemble Combine multiclass classifiers into an ensemble Microsoft.ML.Runtime.EntryPoints.EnsembleCreator CreateMultiClassPipelineEnsemble Microsoft.ML.Runtime.EntryPoints.EnsembleCreator+PipelineClassifierInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MulticlassClassificationOutput Models.MultiOutputRegressionEvaluator Evaluates a multi output regression scored dataset. Microsoft.ML.Runtime.Data.Evaluate MultiOutputRegression Microsoft.ML.Runtime.Data.MultiOutputRegressionMamlEvaluator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+CommonEvaluateOutput Models.NaiveCalibrator Apply a Naive calibrator to an input model Microsoft.ML.Runtime.Internal.Calibration.Calibrate Naive Microsoft.ML.Runtime.Internal.Calibration.Calibrate+NoArgumentsInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+CalibratorOutput -Models.OneVersusAll One-vs-All macro (OVA) Microsoft.ML.Runtime.EntryPoints.OneVersusAllMacro OVA Microsoft.ML.Runtime.EntryPoints.OneVersusAllMacro+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.OneVersusAllMacro+Output] +Models.OneVersusAll One-vs-All macro (OVA) Microsoft.ML.Runtime.EntryPoints.OneVersusAllMacro OneVersusAll Microsoft.ML.Runtime.EntryPoints.OneVersusAllMacro+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.OneVersusAllMacro+Output] Models.OnnxConverter Converts the model to ONNX format. Microsoft.ML.Runtime.Model.Onnx.SaveOnnxCommand Apply Microsoft.ML.Runtime.Model.Onnx.SaveOnnxCommand+Arguments Microsoft.ML.Runtime.Model.Onnx.SaveOnnxCommand+Output Models.OvaModelCombiner Combines a sequence of PredictorModels into a single model Microsoft.ML.Runtime.Learners.OvaPredictor CombineOvaModels Microsoft.ML.Runtime.EntryPoints.ModelOperations+CombineOvaPredictorModelsInput Microsoft.ML.Runtime.EntryPoints.ModelOperations+PredictorModelOutput Models.PAVCalibrator Apply a PAV calibrator to an input model Microsoft.ML.Runtime.Internal.Calibration.Calibrate Pav Microsoft.ML.Runtime.Internal.Calibration.Calibrate+NoArgumentsInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+CalibratorOutput diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index adfa42e50d..bb76ab8523 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -23,7 +23,7 @@ public class StochasticDualCoordinateAscentClassifierBench private static IrisData[][] s_batches; private static readonly int[] s_batchSizes = new int[] { 1, 2, 5 }; private readonly Random r = new Random(0); - private readonly static IrisData s_example = new IrisData() + private static readonly IrisData s_example = new IrisData() { SepalLength = 3.3f, SepalWidth = 1.6f, diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/ContractsCheckTest.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/ContractsCheckTest.cs new file mode 100644 index 0000000000..a004994f4f --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/ContractsCheckTest.cs @@ -0,0 +1,87 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.IO; +using System.Reflection; +using System.Threading; +using Microsoft.ML.CodeAnalyzer.Tests.Helpers; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.CodeAnalyzer.Tests +{ + public sealed class ContractsCheckTest : DiagnosticVerifier + { + private static string _contractsSource; + internal static string Source => TestUtils.EnsureSourceLoaded(ref _contractsSource, "ContractsCheckResource.cs"); + + [Fact] + public void ContractsCheck() + { + var analyzer = GetCSharpDiagnosticAnalyzer(); + var diagName = analyzer.SupportedDiagnostics[0]; + var diagExceptExp = analyzer.SupportedDiagnostics[1]; + var diagMsg = analyzer.SupportedDiagnostics[2]; + var diagDecode = analyzer.SupportedDiagnostics[3]; + + const int basis = 10; + var expected = new DiagnosticResult[] { + diagName.CreateDiagnosticResult(basis + 8, 46, "CheckParam", "paramName", "\"p\""), + diagName.CreateDiagnosticResult(basis + 9, 46, "CheckParam", "paramName", "nameof(p) + nameof(p)"), + diagName.CreateDiagnosticResult(basis + 11, 28, "CheckValue", "paramName", "\"p\""), + diagName.CreateDiagnosticResult(basis + 13, 39, "CheckUserArg", "name", "\"foo\""), + diagExceptExp.CreateDiagnosticResult(basis + 15, 13, "Except"), + diagExceptExp.CreateDiagnosticResult(basis + 16, 13, "ExceptParam"), + diagName.CreateDiagnosticResult(basis + 22, 39, "ExceptParam", "paramName", "\"env\""), + diagMsg.CreateDiagnosticResult(basis + 24, 29, "Check", "$\"Hello {foo} is cool\""), + diagMsg.CreateDiagnosticResult(basis + 28, 29, "Check", "string.Format(\"Hello {0} is cool\", foo)"), + diagMsg.CreateDiagnosticResult(basis + 32, 35, "Check", "\"Less fine: \" + env.GetType().Name"), + diagName.CreateDiagnosticResult(basis + 34, 17, "CheckUserArg", "name", "\"p\""), + diagDecode.CreateDiagnosticResult(basis + 39, 41, "CheckDecode", "\"This message is suspicious\""), + }; + + VerifyCSharpDiagnostic(Source, expected); + } + + [Fact] + public void ContractsCheckDecoy() + { + // Run a test with a "decoy" Contacts that has the same signature on the method, + // except the namespace is distinct so it is a different type. We should not find + // diagnostics on a class like this! + const string decoySource = @" +using System; +namespace TestNamespace +{ + public static class Contracts + + { + public static Exception ExceptParam(string paramName, string msg) => null; + } + + public sealed class TypeName + { + public TypeName() + => Contracts.ExceptParam(""myAwesomeParameter"", ""What a great thing""); + } +}"; + VerifyCSharpDiagnostic(decoySource, new DiagnosticResult[0]); + } + } + + public sealed class ContractsCheckFixTest : CodeFixVerifier + { + private static string _preFix; + private static string _postFix; + + [Fact] + public void ContractsCheckFix() + { + string test = TestUtils.EnsureSourceLoaded(ref _preFix, "ContractsCheckBeforeFix.cs"); + string expected = TestUtils.EnsureSourceLoaded(ref _postFix, "ContractsCheckAfterFix.cs"); + + VerifyCSharpFix(test, expected); + } + } +} diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/CodeFixVerifier.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/CodeFixVerifier.cs new file mode 100644 index 0000000000..489ec5c446 --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/CodeFixVerifier.cs @@ -0,0 +1,194 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CodeActions; +using Microsoft.CodeAnalysis.CodeFixes; +using Microsoft.CodeAnalysis.Diagnostics; +using Microsoft.CodeAnalysis.Formatting; +using Microsoft.CodeAnalysis.Simplification; +using Xunit; + +namespace Microsoft.ML.CodeAnalyzer.Tests.Helpers +{ + /// + /// Superclass of all Unit tests made for diagnostics with codefixes. + /// Contains methods used to verify correctness of codefixes + /// + public abstract partial class CodeFixVerifier : DiagnosticVerifier + where TDiag : DiagnosticAnalyzer, new() + where TFix : CodeFixProvider, new() + { + /// + /// Returns the codefix being tested (C#) - to be implemented in non-abstract class + /// + /// The CodeFixProvider to be used for CSharp code + protected virtual TFix GetCSharpCodeFixProvider() + => new TFix(); + + /// + /// Returns the codefix being tested (VB) - to be implemented in non-abstract class + /// + /// The CodeFixProvider to be used for VisualBasic code + protected virtual TFix GetBasicCodeFixProvider() + => new TFix(); + + /// + /// Called to test a C# codefix when applied on the input string as a source + /// + /// A class in the form of a string before the CodeFix was applied to it + /// A class in the form of a string after the CodeFix was applied to it + /// Index determining which codefix to apply if there are multiple + /// A bool controlling whether or not the test will fail if the CodeFix introduces other warnings after being applied + protected void VerifyCSharpFix(string oldSource, string newSource, int? codeFixIndex = null, bool allowNewCompilerDiagnostics = false) + { + VerifyFix(LanguageNames.CSharp, GetCSharpDiagnosticAnalyzer(), GetCSharpCodeFixProvider(), oldSource, newSource, codeFixIndex, allowNewCompilerDiagnostics); + } + + /// + /// Called to test a VB codefix when applied on the input string as a source + /// + /// A class in the form of a string before the CodeFix was applied to it + /// A class in the form of a string after the CodeFix was applied to it + /// Index determining which codefix to apply if there are multiple + /// A bool controlling whether or not the test will fail if the CodeFix introduces other warnings after being applied + protected void VerifyBasicFix(string oldSource, string newSource, int? codeFixIndex = null, bool allowNewCompilerDiagnostics = false) + { + VerifyFix(LanguageNames.VisualBasic, GetBasicDiagnosticAnalyzer(), GetBasicCodeFixProvider(), oldSource, newSource, codeFixIndex, allowNewCompilerDiagnostics); + } + + /// + /// General verifier for codefixes. + /// Creates a Document from the source string, then gets diagnostics on it and applies the relevant codefixes. + /// Then gets the string after the codefix is applied and compares it with the expected result. + /// Note: If any codefix causes new diagnostics to show up, the test fails unless allowNewCompilerDiagnostics is set to true. + /// + /// The analyzer to be applied to the source code + /// The codefix to be applied to the code wherever the relevant Diagnostic is found + /// A class in the form of a string before the CodeFix was applied to it + /// A class in the form of a string after the CodeFix was applied to it + /// Index determining which codefix to apply if there are multiple + /// A bool controlling whether or not the test will fail if the CodeFix introduces other warnings after being applied + private void VerifyFix(string language, DiagnosticAnalyzer analyzer, CodeFixProvider codeFixProvider, string oldSource, string newSource, int? codeFixIndex, bool allowNewCompilerDiagnostics) + { + var document = CreateDocument(oldSource); + var analyzerDiagnostics = GetSortedDiagnosticsFromDocuments(analyzer, new[] { document }); + var compilerDiagnostics = GetCompilerDiagnostics(document); + int attempts = analyzerDiagnostics.Length; + + for (int i = 0; i < attempts; ++i) + { + var actions = new List(); + var context = new CodeFixContext(document, + analyzerDiagnostics[i + analyzerDiagnostics.Length - attempts], (a, d) => actions.Add(a), CancellationToken.None); + codeFixProvider.RegisterCodeFixesAsync(context).Wait(); + + if (actions.Count == 0) + continue; + + if (codeFixIndex != null) + { + document = ApplyFix(document, actions.ElementAt((int)codeFixIndex)); + break; + } + + document = ApplyFix(document, actions.ElementAt(0)); + analyzerDiagnostics = GetSortedDiagnosticsFromDocuments(analyzer, new[] { document }); + + var newCompilerDiagnostics = GetNewDiagnostics(compilerDiagnostics, GetCompilerDiagnostics(document)); + + //check if applying the code fix introduced any new compiler diagnostics + if (!allowNewCompilerDiagnostics && newCompilerDiagnostics.Any()) + { + // Format and get the compiler diagnostics again so that the locations make sense in the output + document = document.WithSyntaxRoot(Formatter.Format(document.GetSyntaxRootAsync().Result, Formatter.Annotation, document.Project.Solution.Workspace)); + newCompilerDiagnostics = GetNewDiagnostics(compilerDiagnostics, GetCompilerDiagnostics(document)); + + string diags = string.Join("\r\n", newCompilerDiagnostics.Select(d => d.ToString())); + string root = document.GetSyntaxRootAsync().Result.ToFullString(); + Assert.True(false, + $"Fix introduced new compiler diagnostics:\r\n{diags}\r\n\r\nNew document:\r\n{root}\r\n"); + } + + //check if there are analyzer diagnostics left after the code fix + if (analyzerDiagnostics.Length == 0) + break; + } + + //after applying all of the code fixes, compare the resulting string to the input one + var actual = GetStringFromDocument(document); + Assert.Equal(newSource, actual); + } + + /// + /// Apply the inputted CodeAction to the input document. + /// Meant to be used to apply codefixes. + /// + /// The Document to apply the fix on + /// A CodeAction that will be applied to the Document. + /// A Document with the changes from the CodeAction + private static Document ApplyFix(Document document, CodeAction codeAction) + { + var operations = codeAction.GetOperationsAsync(CancellationToken.None).Result; + var solution = operations.OfType().Single().ChangedSolution; + return solution.GetDocument(document.Id); + } + + /// + /// Compare two collections of Diagnostics,and return a list of any new diagnostics that appear only in the second collection. + /// Note: Considers Diagnostics to be the same if they have the same Ids. In the case of multiple diagnostics with the same Id in a row, + /// this method may not necessarily return the new one. + /// + /// The Diagnostics that existed in the code before the CodeFix was applied + /// The Diagnostics that exist in the code after the CodeFix was applied + /// A list of Diagnostics that only surfaced in the code after the CodeFix was applied + private static IEnumerable GetNewDiagnostics(IEnumerable diagnostics, IEnumerable newDiagnostics) + { + var oldArray = diagnostics.OrderBy(d => d.Location.SourceSpan.Start).ToArray(); + var newArray = newDiagnostics.OrderBy(d => d.Location.SourceSpan.Start).ToArray(); + + int oldIndex = 0; + int newIndex = 0; + + while (newIndex < newArray.Length) + { + if (oldIndex < oldArray.Length && oldArray[oldIndex].Id == newArray[newIndex].Id) + { + ++oldIndex; + ++newIndex; + } + else + { + yield return newArray[newIndex++]; + } + } + } + + /// + /// Get the existing compiler diagnostics on the input document. + /// + /// The Document to run the compiler diagnostic analyzers on + /// The compiler diagnostics that were found in the code + private static IEnumerable GetCompilerDiagnostics(Document document) + { + return document.GetSemanticModelAsync().Result.GetDiagnostics(); + } + + /// + /// Given a document, turn it into a string based on the syntax root + /// + /// The Document to be converted to a string + /// A string containing the syntax of the Document after formatting + private static string GetStringFromDocument(Document document) + { + var simplifiedDoc = Simplifier.ReduceAsync(document, Simplifier.Annotation).Result; + var root = simplifiedDoc.GetSyntaxRootAsync().Result; + root = Formatter.Format(root, Formatter.Annotation, simplifiedDoc.Project.Solution.Workspace); + return root.GetText().ToString(); + } + } +} \ No newline at end of file diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/DiagnosticResult.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/DiagnosticResult.cs new file mode 100644 index 0000000000..d915ab7cc3 --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/DiagnosticResult.cs @@ -0,0 +1,61 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.CodeAnalysis; +using System; + +namespace Microsoft.ML.CodeAnalyzer.Tests.Helpers +{ + /// + /// Location where the diagnostic appears, as determined by path, line number, and column number. + /// + public struct DiagnosticResultLocation + { + public string Path { get; } + public int Line { get; } + public int Column { get; } + + public DiagnosticResultLocation(string path, int line, int column) + { + if (line < -1) + throw new ArgumentOutOfRangeException(nameof(line), "Must be >= -1"); + + if (column < -1) + throw new ArgumentOutOfRangeException(nameof(column), "Must be >= -1"); + + Path = path; + Line = line; + Column = column; + } + } + + /// + /// Struct that stores information about a Diagnostic appearing in a source + /// + public struct DiagnosticResult + { + private DiagnosticResultLocation[] _locations; + + public DiagnosticResultLocation[] Locations { + get => _locations ?? new DiagnosticResultLocation[0]; + set => _locations = value; + } + + public DiagnosticSeverity Severity { get; set; } + public string Id { get; set; } + public string Message { get; set; } + + public DiagnosticResultLocation? Location { + get => Locations.Length > 0 ? Locations[0] : (DiagnosticResultLocation?)null; + set => _locations = value == null ? null : new DiagnosticResultLocation[] { value.Value }; + } + + public string Path => + Locations.Length > 0 ? Locations[0].Path : ""; + public int Line => + Locations.Length > 0 ? Locations[0].Line : -1; + public int Column => + Locations.Length > 0 ? Locations[0].Column : -1; + } +} \ No newline at end of file diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/DiagnosticVerifier.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/DiagnosticVerifier.cs new file mode 100644 index 0000000000..2d51ac6559 --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/DiagnosticVerifier.cs @@ -0,0 +1,415 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Reflection; +using System.Text; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.Diagnostics; +using Microsoft.CodeAnalysis.Text; +using Xunit; + +namespace Microsoft.ML.CodeAnalyzer.Tests.Helpers +{ + /// + /// Superclass of all Unit Tests for DiagnosticAnalyzers + /// + public abstract partial class DiagnosticVerifier + where TDiag : DiagnosticAnalyzer, new() + { + private protected DiagnosticVerifier() + { + } + + #region To be implemented by Test classes + /// + /// Get the CSharp analyzer being tested - to be implemented in non-abstract class + /// + protected virtual TDiag GetCSharpDiagnosticAnalyzer() + => new TDiag(); + + /// + /// Get the Visual Basic analyzer being tested (C#) - to be implemented in non-abstract class + /// + protected virtual DiagnosticAnalyzer GetBasicDiagnosticAnalyzer() + => new TDiag(); + + #endregion + + #region Verifier wrappers + + /// + /// Called to test a C# DiagnosticAnalyzer when applied on the single input string as a source + /// Note: input a DiagnosticResult for each Diagnostic expected + /// + /// A class in the form of a string to run the analyzer on + /// DiagnosticResults that should appear after the analyzer is run on the source + protected void VerifyCSharpDiagnostic(string source, params DiagnosticResult[] expected) + { + VerifyDiagnostics(new[] { source }, GetCSharpDiagnosticAnalyzer(), expected); + } + + /// + /// Called to test a C# DiagnosticAnalyzer when applied on the input strings as a source + /// Note: input a DiagnosticResult for each Diagnostic expected + /// + /// An array of strings to create source documents from to run the analyzers on + /// DiagnosticResults that should appear after the analyzer is run on the sources + protected void VerifyCSharpDiagnostic(string[] sources, params DiagnosticResult[] expected) + { + VerifyDiagnostics(sources, GetCSharpDiagnosticAnalyzer(), expected); + } + + /// + /// General method that gets a collection of actual diagnostics found in the source after the analyzer is run, + /// then verifies each of them. + /// + /// An array of strings to create source documents from to run the analyzers on + /// The analyzer to be run on the source code + /// DiagnosticResults that should appear after the analyzer is run on the sources + private void VerifyDiagnostics(string[] sources, DiagnosticAnalyzer analyzer, params DiagnosticResult[] expected) + { + var diagnostics = GetSortedDiagnostics(sources, analyzer); + VerifyDiagnosticResults(diagnostics, analyzer, expected); + } + + #endregion + + #region Actual comparisons and verifications + /// + /// Checks each of the actual Diagnostics found and compares them with the corresponding DiagnosticResult in the array of expected results. + /// Diagnostics are considered equal only if the DiagnosticResultLocation, Id, Severity, and Message of the DiagnosticResult match the actual diagnostic. + /// + /// The Diagnostics found by the compiler after running the analyzer on the source code + /// The analyzer that was being run on the sources + /// Diagnostic Results that should have appeared in the code + private static void VerifyDiagnosticResults(IEnumerable actualResults, DiagnosticAnalyzer analyzer, params DiagnosticResult[] expectedResults) + { + int expectedCount = expectedResults.Length; + int actualCount = actualResults.Count(); + + if (expectedCount != actualCount) + { + string diagnosticsOutput = actualResults.Any() ? FormatDiagnostics(analyzer, actualResults.ToArray()) : " NONE."; + + Assert.True(false, + $"Mismatch between number of diagnostics returned, expected \"{expectedCount}\" actual \"{actualCount}\"\r\n\r\nDiagnostics:\r\n{diagnosticsOutput}\r\n"); + } + + for (int i = 0; i < expectedResults.Length; i++) + { + var actual = actualResults.ElementAt(i); + var expected = expectedResults[i]; + + if (expected.Line == -1 && expected.Column == -1) + { + if (actual.Location != Location.None) + { + Assert.True(false, + $"Expected:\nA project diagnostic with No location\nActual:\n{FormatDiagnostics(analyzer, actual)}"); + } + } + else + { + VerifyDiagnosticLocation(analyzer, actual, actual.Location, expected.Locations.First()); + var additionalLocations = actual.AdditionalLocations.ToArray(); + + if (additionalLocations.Length != expected.Locations.Length - 1) + { + Assert.True(false, + string.Format("Expected {0} additional locations but got {1} for Diagnostic:\r\n {2}\r\n", + expected.Locations.Length - 1, additionalLocations.Length, + FormatDiagnostics(analyzer, actual))); + } + + for (int j = 0; j < additionalLocations.Length; ++j) + { + VerifyDiagnosticLocation(analyzer, actual, additionalLocations[j], expected.Locations[j + 1]); + } + } + + if (actual.Id != expected.Id) + { + Assert.True(false, + string.Format("Expected diagnostic id to be \"{0}\" was \"{1}\"\r\n\r\nDiagnostic:\r\n {2}\r\n", + expected.Id, actual.Id, FormatDiagnostics(analyzer, actual))); + } + + if (actual.Severity != expected.Severity) + { + Assert.True(false, + string.Format("Expected diagnostic severity to be \"{0}\" was \"{1}\"\r\n\r\nDiagnostic:\r\n {2}\r\n", + expected.Severity, actual.Severity, FormatDiagnostics(analyzer, actual))); + } + + if (actual.GetMessage() != expected.Message) + { + Assert.True(false, + string.Format("Expected diagnostic message to be \"{0}\" was \"{1}\"\r\n\r\nDiagnostic:\r\n {2}\r\n", + expected.Message, actual.GetMessage(), FormatDiagnostics(analyzer, actual))); + } + } + } + + /// + /// Helper method to VerifyDiagnosticResult that checks the location of a diagnostic and compares it with the location in the expected DiagnosticResult. + /// + /// The analyzer that was being run on the sources + /// The diagnostic that was found in the code + /// The Location of the Diagnostic found in the code + /// The DiagnosticResultLocation that should have been found + private static void VerifyDiagnosticLocation(DiagnosticAnalyzer analyzer, Diagnostic diagnostic, Location actual, DiagnosticResultLocation expected) + { + var actualSpan = actual.GetLineSpan(); + + Assert.True(actualSpan.Path == expected.Path || (actualSpan.Path != null && actualSpan.Path.Contains("Test0.") && expected.Path.Contains("Test.")), + string.Format("Expected diagnostic to be in file \"{0}\" was actually in file \"{1}\"\r\n\r\nDiagnostic:\r\n {2}\r\n", + expected.Path, actualSpan.Path, FormatDiagnostics(analyzer, diagnostic))); + + var actualLinePosition = actualSpan.StartLinePosition; + + // Only check line position if there is an actual line in the real diagnostic + if (actualLinePosition.Line > 0) + { + if (actualLinePosition.Line + 1 != expected.Line) + { + Assert.True(false, + string.Format("Expected diagnostic to be on line \"{0}\" was actually on line \"{1}\"\r\n\r\nDiagnostic:\r\n {2}\r\n", + expected.Line, actualLinePosition.Line + 1, FormatDiagnostics(analyzer, diagnostic))); + } + } + + // Only check column position if there is an actual column position in the real diagnostic + if (actualLinePosition.Character > 0) + { + if (actualLinePosition.Character + 1 != expected.Column) + { + Assert.True(false, + string.Format("Expected diagnostic to start at column \"{0}\" was actually at column \"{1}\"\r\n\r\nDiagnostic:\r\n {2}\r\n", + expected.Column, actualLinePosition.Character + 1, FormatDiagnostics(analyzer, diagnostic))); + } + } + } + #endregion + + #region Formatting Diagnostics + /// + /// Helper method to format a Diagnostic into an easily readable string + /// + /// The analyzer that this verifier tests + /// The Diagnostics to be formatted + /// The Diagnostics formatted as a string + private static string FormatDiagnostics(DiagnosticAnalyzer analyzer, params Diagnostic[] diagnostics) + { + var builder = new StringBuilder(); + for (int i = 0; i < diagnostics.Length; ++i) + { + builder.AppendLine("// " + diagnostics[i].ToString()); + + var analyzerType = analyzer.GetType(); + var rules = analyzer.SupportedDiagnostics; + + foreach (var rule in rules) + { + if (rule != null && rule.Id == diagnostics[i].Id) + { + var location = diagnostics[i].Location; + if (location == Location.None) + { + builder.AppendFormat("GetGlobalResult({0}.{1})", analyzerType.Name, rule.Id); + } + else + { + Assert.True(location.IsInSource, + $"Test base does not currently handle diagnostics in metadata locations. Diagnostic in metadata: {diagnostics[i]}\r\n"); + + string resultMethodName = diagnostics[i].Location.SourceTree.FilePath.EndsWith(".cs") ? "GetCSharpResultAt" : "GetBasicResultAt"; + var linePosition = diagnostics[i].Location.GetLineSpan().StartLinePosition; + + builder.AppendFormat("{0}({1}, {2}, {3}.{4})", + resultMethodName, + linePosition.Line + 1, + linePosition.Character + 1, + analyzerType.Name, + rule.Id); + } + + if (i != diagnostics.Length - 1) + { + builder.Append(','); + } + + builder.AppendLine(); + break; + } + } + } + return builder.ToString(); + } + #endregion + + private static readonly MetadataReference CorlibReference = RefFromType(); + // I'd hope for a better way for these two. + private static readonly MetadataReference StandardReference = MetadataReference.CreateFromFile(Assembly.Load("netstandard, Version=2.0.0.0").Location); + private static readonly MetadataReference RuntimeReference = MetadataReference.CreateFromFile(Assembly.Load("System.Runtime, Version=0.0.0.0").Location); + + private static readonly MetadataReference SystemCoreReference = MetadataReference.CreateFromFile(typeof(Enumerable).Assembly.Location); + private static readonly MetadataReference CSharpSymbolsReference = RefFromType(); + private static readonly MetadataReference CodeAnalysisReference = RefFromType(); + + private static readonly MetadataReference MLNetCoreReference = RefFromType(); + private static readonly MetadataReference MLNetDataReference = RefFromType(); + + private static MetadataReference RefFromType() + => MetadataReference.CreateFromFile(typeof(TType).Assembly.Location); + + internal const string DefaultFilePathPrefix = "Test"; + internal const string CSharpDefaultFileExt = "cs"; + internal const string TestProjectName = "TestProject"; + + #region Get Diagnostics + + /// + /// Given classes in the form of strings, their language, and an IDiagnosticAnlayzer to apply to it, return the diagnostics found in the string after converting it to a document. + /// + /// Classes in the form of strings + /// The analyzer to be run on the sources + /// An IEnumerable of Diagnostics that surfaced in the source code, sorted by Location + private static Diagnostic[] GetSortedDiagnostics(string[] sources, DiagnosticAnalyzer analyzer) + { + return GetSortedDiagnosticsFromDocuments(analyzer, GetDocuments(sources)); + } + + /// + /// Given an analyzer and a document to apply it to, run the analyzer and gather an array of diagnostics found in it. + /// The returned diagnostics are then ordered by location in the source document. + /// + /// The analyzer to run on the documents + /// The Documents that the analyzer will be run on + /// An IEnumerable of Diagnostics that surfaced in the source code, sorted by Location + protected static Diagnostic[] GetSortedDiagnosticsFromDocuments(DiagnosticAnalyzer analyzer, Document[] documents) + { + var projects = new HashSet(); + + foreach (var document in documents) + { + projects.Add(document.Project); + } + + var diagnostics = new List(); + foreach (var project in projects) + { + var comp = project.GetCompilationAsync().Result; + var compilationWithAnalyzers = comp.WithAnalyzers(ImmutableArray.Create(analyzer)); + var diags = compilationWithAnalyzers.GetAnalyzerDiagnosticsAsync().Result; + foreach (var diag in diags) + { + if (diag.Location == Location.None || diag.Location.IsInMetadata) + { + diagnostics.Add(diag); + } + else + { + for (int i = 0; i < documents.Length; i++) + { + var document = documents[i]; + var tree = document.GetSyntaxTreeAsync().Result; + if (tree == diag.Location.SourceTree) + { + diagnostics.Add(diag); + } + } + } + } + } + + var results = SortDiagnostics(diagnostics); + diagnostics.Clear(); + return results; + } + + /// + /// Sort diagnostics by location in source document + /// + /// The list of Diagnostics to be sorted + /// An IEnumerable containing the Diagnostics in order of Location + private static Diagnostic[] SortDiagnostics(IEnumerable diagnostics) + { + return diagnostics.OrderBy(d => d.Location.SourceSpan.Start).ToArray(); + } + + #endregion + + #region Set up compilation and documents + /// + /// Given an array of strings as sources and a language, turn them into a project and return the documents and spans of it. + /// + /// Classes in the form of strings + /// A Tuple containing the Documents produced from the sources and their TextSpans if relevant + private static Document[] GetDocuments(string[] sources) + { + var project = CreateProject(sources); + var documents = project.Documents.ToArray(); + + if (sources.Length != documents.Length) + { + throw new SystemException("Amount of sources did not match amount of Documents created"); + } + + return documents; + } + + /// + /// Create a Document from a string through creating a project that contains it. + /// + /// Classes in the form of a string + /// A Document created from the source string + protected static Document CreateDocument(string source) + { + return CreateProject(new[] { source }).Documents.First(); + } + + /// + /// Create a project using the input strings as sources. + /// + /// Classes in the form of strings + /// A Project created out of the Documents created from the source strings + private static Project CreateProject(string[] sources) + { + string fileNamePrefix = DefaultFilePathPrefix; + + ProjectId projectId = ProjectId.CreateNewId(debugName: TestProjectName); + + var solution = new AdhocWorkspace() + .CurrentSolution + .AddProject(projectId, TestProjectName, TestProjectName, LanguageNames.CSharp) + .AddMetadataReference(projectId, CorlibReference) + .AddMetadataReference(projectId, StandardReference) + .AddMetadataReference(projectId, RuntimeReference) + .AddMetadataReference(projectId, SystemCoreReference) + .AddMetadataReference(projectId, CSharpSymbolsReference) + .AddMetadataReference(projectId, CodeAnalysisReference) + .AddMetadataReference(projectId, MLNetCoreReference) + .AddMetadataReference(projectId, MLNetDataReference); + + int count = 0; + foreach (string source in sources) + { + string sourceTrimmed = source.TrimStart('\r', '\n'); + string newFileName = fileNamePrefix + count + "." + CSharpDefaultFileExt; + DocumentId documentId = DocumentId.CreateNewId(projectId, debugName: newFileName); + solution = solution.AddDocument(documentId, newFileName, SourceText.From(sourceTrimmed)); + count++; + } + var proj = solution.GetProject(projectId); + proj = proj.WithCompilationOptions(proj.CompilationOptions.WithOutputKind(OutputKind.DynamicallyLinkedLibrary)); + return proj; + } + #endregion + } +} diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/TestUtils.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/TestUtils.cs new file mode 100644 index 0000000000..73430ee488 --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Helpers/TestUtils.cs @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.IO; +using System.Reflection; +using System.Threading; +using Microsoft.CodeAnalysis; + +namespace Microsoft.ML.CodeAnalyzer.Tests.Helpers +{ + internal static class TestUtils + { + public static DiagnosticResult CreateDiagnosticResult(this DiagnosticDescriptor desc, int line, int column, params object[] formatArgs) + { + return new DiagnosticResult + { + Id = desc.Id, + Message = string.Format(desc.MessageFormat.ToString(), formatArgs), + Severity = desc.DefaultSeverity, + Location = new DiagnosticResultLocation("Test0.cs", line, column), + }; + } + + public static ref string EnsureSourceLoaded(ref string source, string resourceName) + { + if (source == null) + { + string loadedSource; + using (var stream = Assembly.GetExecutingAssembly().GetManifestResourceStream(resourceName)) + using (var reader = new StreamReader(stream)) + loadedSource = reader.ReadToEnd(); + Interlocked.CompareExchange(ref source, loadedSource, null); + } + return ref source; + } + } +} diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/InstanceInitializerTest.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/InstanceInitializerTest.cs new file mode 100644 index 0000000000..8baed00840 --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/InstanceInitializerTest.cs @@ -0,0 +1,44 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.CodeAnalyzer.Tests.Helpers; +using Xunit; + +namespace Microsoft.ML.CodeAnalyzer.Tests +{ + public sealed class InstanceInitializerTest : DiagnosticVerifier + { + [Fact] + public void InstanceInitializer() + { + const string test = @" +namespace TestNamespace +{ + class TypeName + { + private int _foo = 5; + private int _bar; + private const int _bizz = 2; + private static int _muck = 4; + private readonly float _blorg = 3.0f; + private string _fooBacking; + public string Foo { get; set => _fooBacking = value; } + public string Bar { get; } = ""Hello""; + public static string Bizz { get; } = ""Nice""; + } +}"; + + var analyzer = GetCSharpDiagnosticAnalyzer(); + var diag = analyzer.SupportedDiagnostics[0]; + + var expected = new DiagnosticResult[] { + diag.CreateDiagnosticResult(5, 21, "_foo", "field"), + diag.CreateDiagnosticResult(9, 32, "_blorg", "field"), + diag.CreateDiagnosticResult(12, 23, "Bar", "property"), + }; + + VerifyCSharpDiagnostic(test, expected); + } + } +} diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Microsoft.ML.CodeAnalyzer.Tests.csproj b/test/Microsoft.ML.CodeAnalyzer.Tests/Microsoft.ML.CodeAnalyzer.Tests.csproj new file mode 100644 index 0000000000..341eb710f2 --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Microsoft.ML.CodeAnalyzer.Tests.csproj @@ -0,0 +1,20 @@ + + + netcoreapp2.0 + + + + + + %(RecursiveDir)%(Filename)%(Extension) + + + + + + + + + + + \ No newline at end of file diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/NameTest.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/NameTest.cs new file mode 100644 index 0000000000..2f87acd01a --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/NameTest.cs @@ -0,0 +1,206 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.CodeAnalyzer.Tests.Helpers; +using Xunit; + +namespace Microsoft.ML.CodeAnalyzer.Tests +{ + public sealed class NameTest : DiagnosticVerifier + { + [Fact] + public void PrivateFieldName() + { + var analyzer = GetCSharpDiagnosticAnalyzer(); + var diag = analyzer.SupportedDiagnostics[0]; + + var expected = new DiagnosticResult[] { + diag.CreateDiagnosticResult(5, 21, "foo"), + diag.CreateDiagnosticResult(7, 24, "_Bubba"), + diag.CreateDiagnosticResult(8, 22, "_shouldParseHTML"), + diag.CreateDiagnosticResult(11, 23, "BillyClub"), + diag.CreateDiagnosticResult(13, 30, "bob"), + diag.CreateDiagnosticResult(14, 30, "CHAZ"), + diag.CreateDiagnosticResult(17, 21, "_liveFromNYC"), + diag.CreateDiagnosticResult(19, 28, "nice"), + }; + + VerifyCSharpDiagnostic(PrivateTestSource, expected); + } + + internal const string PrivateTestSource = @" +namespace TestNamespace +{ + class TypeName + { + private int foo; + private int _bar; + private string _Bubba; + private bool _shouldParseHTML; + private string _who2Call; + float _burgers4babies; + private float BillyClub; + private const string Alice = ""Hello""; + private const string bob = ""Hello""; + private const string CHAZ = ""Hello""; + private const string DEBora = ""Hello""; + private const string _yuck = ""Hello""; + private int _liveFromNYC; + private int _liveFromNYCity; + private static int nice; + } +}"; + + [Fact] + public void MoreNameTests() + { + var analyzer = GetCSharpDiagnosticAnalyzer(); + var diagP = analyzer.SupportedDiagnostics[0]; + var diagG = analyzer.SupportedDiagnostics[1]; + + var expected = new DiagnosticResult[] { + diagG.CreateDiagnosticResult(6, 11, "CLASS"), + diagG.CreateDiagnosticResult(8, 20, "alice"), + diagG.CreateDiagnosticResult(9, 21, "_bob"), + diagG.CreateDiagnosticResult(10, 22, "_chaz"), + diagG.CreateDiagnosticResult(11, 30, "emily"), + diagG.CreateDiagnosticResult(11, 37, "_francis"), + diagG.CreateDiagnosticResult(16, 21, "this_is_not_python"), + diagG.CreateDiagnosticResult(17, 21, "thisIsNotJava"), + diagP.CreateDiagnosticResult(21, 30, "BadEvent"), + diagG.CreateDiagnosticResult(22, 29, "bad_event"), + diagG.CreateDiagnosticResult(25, 30, "_badEv"), + diagG.CreateDiagnosticResult(27, 29, "one"), + diagG.CreateDiagnosticResult(27, 39, "three"), + diagG.CreateDiagnosticResult(28, 22, "enumb"), + diagG.CreateDiagnosticResult(28, 35, "Two_Two"), + diagG.CreateDiagnosticResult(28, 44, "_three"), + diagG.CreateDiagnosticResult(30, 25, "_m2"), + diagG.CreateDiagnosticResult(37, 12, "marco"), + diagG.CreateDiagnosticResult(37, 31, "polo"), + }; + + VerifyCSharpDiagnostic(TestSource, expected); + } + + internal const string TestSource = @" +using System; +namespace silly { } +namespace NotSilly { } +namespace foo.bar.Biz +{ + class CLASS + { + public int alice { get; } + private int _bob { get; } + internal int _chaz; + internal int Debora, emily, _francis; + int _george; + + CLASS() { } + + public void this_is_not_python() { } + public void thisIsNotJava() { } + public void ThisIsCSharp() { } + + private event Action _goodEvent; + private event Action BadEvent; + public event Action bad_event; + public event Action GoodEvent; + private event Action GoodEv { add { } remove { } } + private event Action _badEv { add { } remove { } } + + public enum EnumA { one, Two, three } + private enum enumb { One, Two_Two, _three } + + protected float _m2; + protected float M4; + } + + class A { } + class BeClass { } + + struct marco { public int polo; } +}"; + [Fact] + public void ExternName() + { + var analyzer = GetCSharpDiagnosticAnalyzer(); + var diagP = analyzer.SupportedDiagnostics[0]; + var diagG = analyzer.SupportedDiagnostics[1]; + + const string source = @" +using System; +using System.Runtime.InteropServices; + +namespace TestNamespace +{ + class CLASS + { + + [DllImport(""kernel32.dll"")] + public static extern IntPtr who_run_bartertown(string libraryPath); + + public void masterBlaster() {} + } +} +"; + + var expected = new DiagnosticResult[] { + diagG.CreateDiagnosticResult(6, 11, "CLASS"), + diagG.CreateDiagnosticResult(12, 21, "masterBlaster"), + }; + + VerifyCSharpDiagnostic(source, expected); + } + } + + public sealed class NameFixTest : CodeFixVerifier + { + [Fact] + public void NameFix() + { + VerifyCSharpFix(NameTest.TestSource, FixedTestSource); + } + + private const string FixedTestSource = @"using System; +namespace silly { } +namespace NotSilly { } +namespace foo.bar.Biz +{ + class Class + { + public int Alice { get; } + private int Bob { get; } + internal int Chaz; + internal int Debora, Emily, Francis; + int _george; + + Class() { } + + public void ThisIsNotPython() { } + public void ThisIsNotJava() { } + public void ThisIsCSharp() { } + + private event Action _goodEvent; + private event Action _badEvent; + public event Action BadEvent; + public event Action GoodEvent; + private event Action GoodEv { add { } remove { } } + private event Action BadEv { add { } remove { } } + + public enum EnumA { One, Two, Three } + private enum Enumb { One, TwoTwo, Three } + + protected float M2; + protected float M4; + } + + class A { } + class BeClass { } + + struct Marco { public int Polo; } +}"; + } +} \ No newline at end of file diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/ParameterVariableNameTest.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/ParameterVariableNameTest.cs new file mode 100644 index 0000000000..673ead697d --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/ParameterVariableNameTest.cs @@ -0,0 +1,57 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.CodeAnalyzer.Tests.Helpers; +using Xunit; + +namespace Microsoft.ML.CodeAnalyzer.Tests +{ + public sealed class ParameterVariableNameTest : DiagnosticVerifier + { + [Fact] + public void ParameterVariableName() + { + const string test = @" +using System.Linq; +namespace TestNamespace +{ + public class A + { + public int Albatross, Buttermilk, Coffee; + public A(int Unlimited, int POWER) + { + int foo = -POWER, Tada = Unlimited + POWER; + string FORMAT = $""{Unlimited} + {POWER}""; + int _coolSum = FORMAT.ToCharArray().Sum(CHAR => CHAR + POWER + Buttermilk++); + Albatross = -_coolSum; + } + + public static void B(string Hello, int goodbye, string HelloAgain) + { + int i_think_this_is_python = Hello.Length + goodbye + HelloAgain.Length; + } + } +}"; + var analyzer = GetCSharpDiagnosticAnalyzer(); + var diag = analyzer.SupportedDiagnostics[0]; + + const string param = "parameter"; + const string local = "local variable"; + + var expected = new DiagnosticResult[] { + diag.CreateDiagnosticResult(7, 22, "Unlimited", param), + diag.CreateDiagnosticResult(7, 37, "POWER", param), + diag.CreateDiagnosticResult(9, 31, "Tada", local), + diag.CreateDiagnosticResult(10, 20, "FORMAT", local), + diag.CreateDiagnosticResult(11, 17, "_coolSum", local), + diag.CreateDiagnosticResult(11, 53, "CHAR", param), + diag.CreateDiagnosticResult(15, 37, "Hello", param), + diag.CreateDiagnosticResult(15, 64, "HelloAgain", param), + diag.CreateDiagnosticResult(17, 17, "i_think_this_is_python", local), + }; + + VerifyCSharpDiagnostic(test, expected); + } + } +} diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/ContractsCheckAfterFix.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/ContractsCheckAfterFix.cs new file mode 100644 index 0000000000..c3e9ecfa31 --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/ContractsCheckAfterFix.cs @@ -0,0 +1,36 @@ +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.CommandLine; +using System; +namespace Bubba +{ + public class Foo + { + public Foo(int yuck) { if (false) Contracts.ExceptParam(nameof(yuck)); } + public static void Bar(float tom, Arguments args) + { + string str = "hello"; + Contracts.CheckValue(str, nameof(str)); + Contracts.CheckUserArg(0 <= A.B.Foo && A.B.Foo < 10, nameof(A.B.Foo), "Should be in range[0, 10)"); + Contracts.CheckUserArg(A.B.Bar.Length == 2, nameof(A.B.Bar), "Length must be exactly 2"); + Contracts.CheckUserArg(A.B.Bar.Length == 2, nameof(A), "Length must be exactly 2"); + if (false) throw Contracts.ExceptParam(nameof(A.B.Bar), $"Length should have been 2 but was { A.B.Bar.Length}"); + Func isFive = val => val == 5; + Contracts.CheckParam(!isFive(4), + nameof(isFive)); + Contracts.CheckValue(typeof(X.Y.Z), nameof(X.Y)); + if (false) throw Contracts.ExceptParam(nameof(tom)); + Contracts.CheckValue(str, "noMatch"); + Contracts.CheckUserArg(str.Length == 2, nameof(args.chumble), "Whoa!"); + Contracts.CheckUserArg(str.Length == 2, nameof(args.spuzz), "Git along, little dogies, git along..."); + } + } + public static class A { public static class B { public const int Foo = 5; public const string Bar = "Yo"; } } + public static class X { public static class Y { public static class Z { } } } + public sealed class Arguments + { + [Argument(ArgumentType.AtMostOnce, HelpText = "Yakka foob mog.", ShortName = "chum")] + public int chumble; + [Argument(ArgumentType.AtMostOnce, HelpText = "Grug pubbawup zink wattoom gazork.", ShortName = "spu,sp")] + public int spuzz; + } +} \ No newline at end of file diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/ContractsCheckBeforeFix.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/ContractsCheckBeforeFix.cs new file mode 100644 index 0000000000..2835c06794 --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/ContractsCheckBeforeFix.cs @@ -0,0 +1,36 @@ +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.CommandLine; +using System; +namespace Bubba +{ + public class Foo + { + public Foo(int yuck) { if (false) Contracts.ExceptParam("yuck"); } + public static void Bar(float tom, Arguments args) + { + string str = "hello"; + Contracts.CheckValue(str, "str"); + Contracts.CheckUserArg(0 <= A.B.Foo && A.B.Foo < 10, "Foo", "Should be in range[0, 10)"); + Contracts.CheckUserArg(A.B.Bar.Length == 2, "Bar", "Length must be exactly 2"); + Contracts.CheckUserArg(A.B.Bar.Length == 2, "A", "Length must be exactly 2"); + if (false) throw Contracts.ExceptParam("Bar", $"Length should have been 2 but was { A.B.Bar.Length}"); + Func isFive = val => val == 5; + Contracts.CheckParam(!isFive(4), + "isFive"); + Contracts.CheckValue(typeof(X.Y.Z), "Y"); + if (false) throw Contracts.ExceptParam("tom"); + Contracts.CheckValue(str, "noMatch"); + Contracts.CheckUserArg(str.Length == 2, "chumble", "Whoa!"); + Contracts.CheckUserArg(str.Length == 2, "sp", "Git along, little dogies, git along..."); + } + } + public static class A { public static class B { public const int Foo = 5; public const string Bar = "Yo"; } } + public static class X { public static class Y { public static class Z { } } } + public sealed class Arguments + { + [Argument(ArgumentType.AtMostOnce, HelpText = "Yakka foob mog.", ShortName = "chum")] + public int chumble; + [Argument(ArgumentType.AtMostOnce, HelpText = "Grug pubbawup zink wattoom gazork.", ShortName = "spu,sp")] + public int spuzz; + } +} \ No newline at end of file diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/ContractsCheckResource.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/ContractsCheckResource.cs new file mode 100644 index 0000000000..0c8ca4f332 --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/ContractsCheckResource.cs @@ -0,0 +1,59 @@ +// Note that this is *not* an actual source file, it is instead an embedded +// resource for the ContractsCheckTest. It contains both the utilizing test +// code, as well as code for "Contracts" derived from and intended to resemble +// the corresponding code in ML.NET. + +using System; +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Model; + +namespace TestNamespace +{ + public sealed class TypeName + { + public TypeName(IHostEnvironment env, float p, int foo) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckParam(0 <= p && p <= 1, nameof(p), "Should be in range [0,1]"); + env.CheckParam(0 <= p && p <= 1, "p"); // Should fail. + env.CheckParam(0 <= p && p <= 1, nameof(p) + nameof(p)); // Should fail. + env.CheckValue(paramName: nameof(p), val: "p"); // Should succeed despite confusing order. + env.CheckValue(paramName: "p", val: nameof(p)); // Should fail despite confusing order. + env.CheckValue("p", nameof(p)); + env.CheckUserArg(foo > 5, "foo", "Nice"); + env.CheckUserArg(foo > 5, nameof(foo), "Nice"); + env.Except(); // Not throwing or doing anything with the exception, so should fail. + Contracts.ExceptParam(nameof(env), "What a silly env"); // Should also fail. + if (false) + throw env.Except(); // Should not fail. + if (false) + throw env.ExceptParam(nameof(env), "What a silly env"); // Should not fail. + if (false) + throw env.ExceptParam("env", "What a silly env"); // Should fail due to name error. + var e = env.Except(); + env.Check(true, $"Hello {foo} is cool"); + env.Check(true, "Hello it is cool"); + string coolMessage = "Hello it is cool"; + env.Check(true, coolMessage); + env.Check(true, string.Format("Hello {0} is cool", foo)); + env.Check(true, Messages.CoolMessage); + env.CheckDecode(true, "Not suspicious, no ModelLoadContext"); + Contracts.Check(true, "Fine: " + nameof(env)); + Contracts.Check(true, "Less fine: " + env.GetType().Name); + Contracts.CheckUserArg(0 <= p && p <= 1, + "p", "On a new line"); + } + + private void Loader(ModelLoadContext ctx) + { + Contracts.CheckDecode(true, "This message is suspicious"); + } + + private Exception CreateException() => Contracts.Except(); // This should be fine, since it's a return value not a standalone. + } + + public static class Messages + { + public const string CoolMessage = "This is super cool"; + } +} diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/SingleVariableDeclarationTest.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/SingleVariableDeclarationTest.cs new file mode 100644 index 0000000000..add947f5dd --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/SingleVariableDeclarationTest.cs @@ -0,0 +1,49 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.CodeAnalyzer.Tests.Helpers; +using Xunit; + +namespace Microsoft.ML.CodeAnalyzer.Tests +{ + public sealed class SingleVariableDeclarationTest : DiagnosticVerifier + { + [Fact] + public void SingleVariableDeclaration() + { + const string test = @" +namespace TestNamespace +{ + class TypeName + { + int a, b, c; + int d; + int e, f; + + public TypeName(int g, int h) + { + a = b = g; + c = d = h; + int i = 2; + for (int j = 0, k = i; j < k; ++j) + { + int l = j, m = k; + } + } + } +}"; + + var analyzer = GetCSharpDiagnosticAnalyzer(); + var diag = analyzer.SupportedDiagnostics[0]; + + var expected = new DiagnosticResult[] { + diag.CreateDiagnosticResult(5, 9, "a', 'b', 'c"), + diag.CreateDiagnosticResult(7, 9, "e', 'f"), + diag.CreateDiagnosticResult(16, 17, "l', 'm"), + }; + + VerifyCSharpDiagnostic(test, expected); + } + } +} diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/TypeParamNameTest.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/TypeParamNameTest.cs new file mode 100644 index 0000000000..b9de9bf42f --- /dev/null +++ b/test/Microsoft.ML.CodeAnalyzer.Tests/TypeParamNameTest.cs @@ -0,0 +1,40 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.CodeAnalyzer.Tests.Helpers; +using Xunit; + +namespace Microsoft.ML.CodeAnalyzer.Tests +{ + public sealed class TypeParamNameTest : DiagnosticVerifier + { + [Fact] + public void TypeParamName() + { + const string test = @" +namespace TestNamespace +{ + interface IAlice {} + interface IBob : IAlice {} + interface IChaz : IAlice, Tom> {} + + public class Foo + { + public static void Bar() {} + } +}"; + var analyzer = GetCSharpDiagnosticAnalyzer(); + var diag = analyzer.SupportedDiagnostics[0]; + + var expected = new DiagnosticResult[] { + diag.CreateDiagnosticResult(3, 26, "hello"), + diag.CreateDiagnosticResult(5, 21, "Tom"), + diag.CreateDiagnosticResult(7, 22, "mytype"), + diag.CreateDiagnosticResult(9, 32, "YourType"), + }; + + VerifyCSharpDiagnostic(test, expected); + } + } +} diff --git a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs index d63d83c733..44e8f523ea 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs @@ -59,7 +59,7 @@ public void TestLearn() new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data; #pragma warning restore 0618 - // REVIEW: Theoretically, it could be the case that a new, very bad learner is introduced and + // REVIEW: Theoretically, it could be the case that a new, very bad learner is introduced and // we get unlucky and only select it every time, such that this test fails. Not // likely at all, but a non-zero probability. Should be ok, since all current learners are returning d > .80. bestPipeline.RunTrainTestExperiment(datasetTrain, datasetTest, metric, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer, diff --git a/test/Microsoft.ML.Predictor.Tests/TestDatasetInference.cs b/test/Microsoft.ML.Predictor.Tests/TestDatasetInference.cs index 495f844133..0bde64a2af 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestDatasetInference.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestDatasetInference.cs @@ -121,7 +121,7 @@ public void InferSchemaCommandTest() [Fact] public void InferRecipesCommandTest() { - var datasets = new Tuple[] + var datasets = new Tuple[] { Tuple.Create( GetDataPath(Path.Combine("..", "data", "wikipedia-detox-250-line-data.tsv")), diff --git a/test/Microsoft.ML.Predictor.Tests/TestTransposer.cs b/test/Microsoft.ML.Predictor.Tests/TestTransposer.cs index 39a8b4e8cf..ed1780c6d7 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestTransposer.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestTransposer.cs @@ -179,7 +179,7 @@ public void TransposerTest() Contracts.Assert(result); Assert.True(trueIndex == index, $"Transpose schema had column '{names[i]}' at unexpected index"); } - // Check the contents + // Check the contents Assert.Null(trans.TransposeSchema.GetSlotType(2)); // C check to see that it's not transposable. TransposeCheckHelper(view, 0, trans); // A check. TransposeCheckHelper(view, 1, trans); // B check. diff --git a/test/Microsoft.ML.Tests/LearningPipelineTests.cs b/test/Microsoft.ML.Tests/LearningPipelineTests.cs index 165b8d8fd2..f19e3285d7 100644 --- a/test/Microsoft.ML.Tests/LearningPipelineTests.cs +++ b/test/Microsoft.ML.Tests/LearningPipelineTests.cs @@ -19,7 +19,6 @@ public class LearningPipelineTests : BaseTestClass public LearningPipelineTests(ITestOutputHelper output) : base(output) { - } [Fact] diff --git a/tools-local/Microsoft.ML.CodeAnalyzer/ContractsCheckAnalyzer.cs b/tools-local/Microsoft.ML.CodeAnalyzer/ContractsCheckAnalyzer.cs new file mode 100644 index 0000000000..3ac1917e6f --- /dev/null +++ b/tools-local/Microsoft.ML.CodeAnalyzer/ContractsCheckAnalyzer.cs @@ -0,0 +1,248 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using System.Collections.Immutable; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.CSharp.Syntax; +using Microsoft.CodeAnalysis.Diagnostics; + +namespace Microsoft.ML.CodeAnalyzer +{ + [DiagnosticAnalyzer(LanguageNames.CSharp)] + public sealed class ContractsCheckAnalyzer : DiagnosticAnalyzer + { + // Detecting that a syntax call is actually on a particular method is computationally + // intensive, so once we detect that we're on Contracts methods, we put all the methods + // here. + + private const string Category = "Contracts"; + + internal static class NameofDiagnostic + { + public const string Id = "MSML_ContractsNameUsesNameof"; + private const string Title = "Contracts argument for names is not a nameof"; + private const string Format = "Call to '{0}' should use nameof(...) for {1} argument, but instead used '{2}'"; + private const string Description = + "For Contracts.Checks or Excepts with some form of parameter name, unless that " + + "argument is a nameof(...) expression there's almost certainly something wrong."; + + internal static DiagnosticDescriptor Rule = + new DiagnosticDescriptor(Id, Title, Format, Category, + DiagnosticSeverity.Warning, isEnabledByDefault: true, description: Description); + } + + internal static class ExceptionDiagnostic + { + public const string Id = "MSML_ContractsExceptAsExpression"; + private const string Title = "Contracts.Except used as expression"; + private const string Format = "Something should be done with the exception created by '{0}'"; + private const string Description = + "Contracts.Except and similar methods do not themselves throw, but provide an " + + "exception that can be thrown. This call did nothing with the exception."; + + internal static DiagnosticDescriptor Rule = + new DiagnosticDescriptor(Id, Title, Format, Category, + DiagnosticSeverity.Warning, isEnabledByDefault: true, description: Description); + } + + internal static class SimpleMessageDiagnostic + { + public const string Id = "MSML_ContractsCheckMessageNotLiteralOrIdentifier"; + private const string Title = "Contracts.Check argument for message may involve formatting"; + private const string Format = "On call to '{0}' message '{1}' could not be identified as being either a string literal or variable"; + + internal static DiagnosticDescriptor Rule = + new DiagnosticDescriptor(Id, Title, Format, Category, + DiagnosticSeverity.Warning, isEnabledByDefault: true, + description: Descriptions.ContractsCheckMessageNotLiteralOrIdentifier); + } + + internal static class DecodeMessageWithLoadContextDiagnostic + { + public const string Id = "MSML_NoMessagesForLoadContext"; + private const string Title = "Contracts.Check argument for message may involve formatting"; + private const string Format = "On call to '{0}' message '{1}' was provided, but this method had a ModelLoadContext"; + + internal static DiagnosticDescriptor Rule = + new DiagnosticDescriptor(Id, Title, Format, Category, + DiagnosticSeverity.Warning, isEnabledByDefault: true, + description: Descriptions.NoMessagesForLoadContext); + } + + public override ImmutableArray SupportedDiagnostics => + ImmutableArray.Create( + NameofDiagnostic.Rule, ExceptionDiagnostic.Rule, SimpleMessageDiagnostic.Rule, + DecodeMessageWithLoadContextDiagnostic.Rule); + + private static HashSet _targetSet = new HashSet(new[] + { + "Check", "CheckUserArg", "CheckParam", "CheckParamValue", "CheckRef", "CheckValue", + "CheckNonEmpty", "CheckNonWhiteSpace", "CheckDecode", "CheckIO", "CheckAlive", "CheckValueOrNull", + "Except", "ExceptUserArg", "ExceptParam", "ExceptParamValue", "ExceptValue", "ExceptEmpty", + "ExceptWhiteSpace", "ExceptDecode", "ExceptIO", "ExceptNotImpl", "ExceptNotSupp", + }); + + public override void Initialize(AnalysisContext context) + { + context.ConfigureGeneratedCodeAnalysis(GeneratedCodeAnalysisFlags.None); + context.RegisterSyntaxNodeAction(Analyze, SyntaxKind.InvocationExpression); + } + + /// + /// Returns an array parallel to that contains + /// the arguments in . If named parameters are used + /// then this is not necessarily the same. Note that in the event that there are + /// more arguments than parameters (e.g., via a params variable length + /// parameter) only the first match for the parameter is recorded. + /// + private static ArgumentSyntax[] ParallelArgs( + ImmutableArray parameters, + InvocationExpressionSyntax invocation) + { + ArgumentSyntax[] args = new ArgumentSyntax[parameters.Length]; + var syntaxArgs = invocation.ArgumentList.Arguments; + for (int i = 0; i < syntaxArgs.Count; ++i) + { + var arg = syntaxArgs[i]; + int index = -1; + if (arg.NameColon == null) + index = i; + else + { + string nameColonText = arg.NameColon.Name.ToString(); + for (int p = 0; p < parameters.Length; ++p) + { + if (parameters[p].Name == nameColonText) + { + index = p; + break; + } + } + } + if (0 <= index && index < args.Length && args[index] == null) + args[index] = arg; + } + return args; + } + + private static bool NameIsNameof(ExpressionSyntax exp) + { + var invokeExp = exp as InvocationExpressionSyntax; + return invokeExp != null && invokeExp.Expression.ToString() == "nameof"; + } + + private static bool IsGoodMessage(SyntaxNodeAnalysisContext context, ExpressionSyntax exp) + { + if (exp.IsKind(SyntaxKind.AddExpression)) + { + // These sorts of string concatenation things always wind up being compile + // time constants, from what I can tell from ildasm. + var binExp = (BinaryExpressionSyntax)exp; + return IsGoodMessage(context, binExp.Left) && IsGoodMessage(context, binExp.Right); + } + + if (exp.IsKind(SyntaxKind.SimpleMemberAccessExpression)) + { + var access = (MemberAccessExpressionSyntax)exp; + var field = context.SemanticModel.GetSymbolInfo(access).Symbol as IFieldSymbol; + return field?.IsConst ?? false; + } + + if (exp.IsKind(SyntaxKind.InvocationExpression)) + return ((InvocationExpressionSyntax)exp).Expression.ToString() == "nameof"; + + return exp.IsKind(SyntaxKind.StringLiteralExpression) || exp.IsKind(SyntaxKind.IdentifierName); + } + + private static bool HasModelLoadContext(SyntaxNode node) + { + while (node != null && !node.IsKind(SyntaxKind.MethodDeclaration) && !node.IsKind(SyntaxKind.ConstructorDeclaration)) + node = node.Parent; + if (node == null) + return false; + var enclosingParams = ((node as MethodDeclarationSyntax)?.ParameterList + ?? ((ConstructorDeclarationSyntax)node).ParameterList).Parameters; + foreach (var param in enclosingParams) + { + // It is possible that this may mislead us slightly, since there could be another + // unrelated type called ModelLoadContext, or someone could have type aliasing, or + // some other complicating factor that will defeat this simple check. With some + // additional computational load, we could access the semantic model for this. + if (param.Type.ToString() == "ModelLoadContext") + return true; + } + return false; + } + + private static void Analyze(SyntaxNodeAnalysisContext context) + { + var invocation = (InvocationExpressionSyntax)context.Node; + if (!(invocation.Expression is MemberAccessExpressionSyntax access)) + return; + var name = access.Name.ToString(); + // Do the quick checks first on the name. + bool isCheck = false; + bool isExcept = false; + if ((!(isCheck = name.StartsWith("Check")) && !(isExcept = name.StartsWith("Except"))) || !_targetSet.Contains(name)) + return; + // Now that we've verified we're approximately in the right neighborhood, do a more + // in depth semantic analysis to verify we're targetting the right sort of object. + var symbolInfo = context.SemanticModel.GetSymbolInfo(invocation); + if (!(symbolInfo.Symbol is IMethodSymbol methodSymbol)) + return; + var containingSymbolName = methodSymbol.ContainingSymbol.ToString(); + // The "internal" version is one used by some projects that want to benefit from Contracts, + // but for some reason cannot reference MLCore. + if (containingSymbolName != "Microsoft.ML.Runtime.Contracts" && + containingSymbolName != "Microsoft.ML.Runtime.Internal.Contracts") + { + return; + } + if (isExcept && invocation.Parent.IsKind(SyntaxKind.ExpressionStatement)) + { + context.ReportDiagnostic(Diagnostic.Create( + ExceptionDiagnostic.Rule, invocation.GetLocation(), name)); + } + + var parameters = methodSymbol.Parameters; + var args = ParallelArgs(parameters, invocation); + + for (int i = 0; i < parameters.Length; ++i) + { + if (args[i] == null) + continue; + var arg = args[i]; + var parameter = parameters[i]; + + switch (parameter.Name) + { + case "paramName": + case "name": + if (!NameIsNameof(arg.Expression)) + { + context.ReportDiagnostic(Diagnostic.Create( + NameofDiagnostic.Rule, arg.GetLocation(), name, parameter.Name, arg.Expression)); + } + break; + case "msg": + if (isCheck && !IsGoodMessage(context, arg.Expression)) + { + context.ReportDiagnostic(Diagnostic.Create( + SimpleMessageDiagnostic.Rule, arg.GetLocation(), name, arg.Expression)); + } + if ((name == "CheckDecode" || name == "ExceptDecode") && HasModelLoadContext(invocation)) + { + context.ReportDiagnostic(Diagnostic.Create( + DecodeMessageWithLoadContextDiagnostic.Rule, arg.GetLocation(), name, arg.Expression)); + } + break; + default: + break; + } + } + } + } +} diff --git a/tools-local/Microsoft.ML.CodeAnalyzer/ContractsCheckNameofFixProvider.cs b/tools-local/Microsoft.ML.CodeAnalyzer/ContractsCheckNameofFixProvider.cs new file mode 100644 index 0000000000..d2f6a1a3ea --- /dev/null +++ b/tools-local/Microsoft.ML.CodeAnalyzer/ContractsCheckNameofFixProvider.cs @@ -0,0 +1,187 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Immutable; +using System.Composition; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CodeActions; +using Microsoft.CodeAnalysis.CodeFixes; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.CSharp.Syntax; + +namespace Microsoft.ML.CodeAnalyzer +{ + using Debug = System.Diagnostics.Debug; + + [ExportCodeFixProvider(LanguageNames.CSharp, Name = nameof(ContractsCheckNameofFixProvider)), Shared] + public sealed class ContractsCheckNameofFixProvider : CodeFixProvider + { + private const string Title = "Try to introduce nameof"; + /// + /// Id of the diagnostic, and equivalence id of the fix. + /// + private string Id => ContractsCheckAnalyzer.NameofDiagnostic.Id; + + public override ImmutableArray FixableDiagnosticIds => ImmutableArray.Create(Id); + + public override FixAllProvider GetFixAllProvider() + => WellKnownFixAllProviders.BatchFixer; + + public override async Task RegisterCodeFixesAsync(CodeFixContext context) + { + var diagnostic = context.Diagnostics.FirstOrDefault(d => d.Id == Id); + if (diagnostic == null) + return; + var root = await context.Document.GetSyntaxRootAsync(context.CancellationToken).ConfigureAwait(false); + + var diagnosticSpan = diagnostic.Location.SourceSpan; + + // Find the name/paramName argument identified by the diagnostic. + var nameArg = root.FindToken(diagnosticSpan.Start).Parent.FirstAncestorOrSelf(); + string nameArgValue = (nameArg.Expression as LiteralExpressionSyntax)?.Token.ValueText; + // If not a string literal, or not a valid identifier, there really is very little we can do. Suggest nothing. + if (nameArgValue == null || !SyntaxFacts.IsValidIdentifier(nameArgValue)) + return; + + Debug.Assert(nameArg.Parent.Parent.IsKind(SyntaxKind.InvocationExpression)); + var invokeExp = (InvocationExpressionSyntax)nameArg.Parent.Parent; + var member = invokeExp.Expression as MemberAccessExpressionSyntax; + var methodName = member.ToString(); // Something like CheckParam, ExceptUserArg... + + // Check whether this is a simple case, that is, this string has the same text as some token. + var argList = (ArgumentListSyntax)nameArg.Parent; + + // One of the most common checks are checks for value. Check whether this is the case. + // If it is, we don't even have to resort to extracting the semantic model. + argList.Arguments[0].Expression.GetText(); + if (nameArg.NameColon == null && argList.Arguments.Count >= 2 && + argList.Arguments[1] == nameArg && argList.Arguments[0].Expression.ToString() == nameArgValue) + { + context.RegisterCodeFix(CodeAction.Create(Title, + c => StringReplace(context.Document, nameArgValue, nameArg, c), Id), diagnostic); + return; + } + // Check all symbols used in the Check/Except argument. Let's see if there's a match. + // In the event of ambiguity, we choose the shortest one, figuring that the least complex + // might be the most likely. + int shortestSymbol = int.MaxValue; + ExpressionSyntax bestSymbol = null; + var sameNameNodes = argList.DescendantTokens().Where(tok => tok.Text == nameArgValue) + .Select(p => p.Parent).Where(n => n.IsKind(SyntaxKind.IdentifierName)); + foreach (var node in sameNameNodes) + { + SyntaxNode candidate = node; + var pk = node.Parent.Kind(); + if (pk == SyntaxKind.SimpleMemberAccessExpression) + { + var parentAccess = (MemberAccessExpressionSyntax)node.Parent; + candidate = parentAccess.Expression == node ? node : parentAccess; + } + else if (pk == SyntaxKind.QualifiedName) + { + // A little weird, but if you have class Z nested in Y, nested in X, then typeof(X.Y.Z) will + // be a series of qualified names, but nameof(X.Y.Z) will be a series of simple member accesses. + // nameof(X.Y.Z) if phrased as qualified names will not work. + candidate = SyntaxFactory.ParseExpression(node.Parent.ToString()); + } + + if (candidate.Span.Length < shortestSymbol) + { + bestSymbol = (ExpressionSyntax)candidate; + shortestSymbol = candidate.Span.Length; + } + } + + if (bestSymbol != null) + { + context.RegisterCodeFix(CodeAction.Create(Title, + c => ExpressionReplace(context.Document, bestSymbol, nameArg, c), Id), diagnostic); + return; + } + + // No luck within the check statement itself. Next check the parameter list of this method or constructor. + SyntaxNode temp = nameArg; + while (temp != null && !temp.IsKind(SyntaxKind.MethodDeclaration) && !temp.IsKind(SyntaxKind.ConstructorDeclaration)) + temp = temp.Parent; + + ParameterSyntax argParam = null; + if (temp != null) + { + var paramList = (temp as MethodDeclarationSyntax)?.ParameterList + ?? ((ConstructorDeclarationSyntax)temp).ParameterList; + foreach (var param in paramList.Parameters) + { + if (param.Identifier.ToString() == nameArgValue) + { + context.RegisterCodeFix(CodeAction.Create(Title, + c => StringReplace(context.Document, nameArgValue, nameArg, c), Id), diagnostic); + return; + } + // A hack, but whatever works. + string paramTypeString = param.Type.ToString(); + if (argParam == null && (paramTypeString == "Arguments" || paramTypeString == "Column")) + argParam = param; + } + } + // All else has failed. The last is to try to get information from any Arguments object, if present. + if (argParam != null) + { + var semanticModel = await context.Document.GetSemanticModelAsync(context.CancellationToken); + var type = semanticModel.GetTypeInfo(argParam.Type, context.CancellationToken).Type; + var argName = argParam.Identifier.ToString(); + if (type != null && !(type is IErrorTypeSymbol)) + { + //var m = type.GetMembers().Cast; + foreach (IFieldSymbol s in type.GetMembers().Where(p => p.Kind == SymbolKind.Field)) + { + if (!s.CanBeReferencedByName) + continue; + AttributeData attr = s.GetAttributes().FirstOrDefault(a => a.AttributeClass.Name == "ArgumentAttribute"); + if (attr == null) + continue; + if (s.Name == nameArgValue) + { + context.RegisterCodeFix(CodeAction.Create(Title, + c => StringReplace(context.Document, argName + "." + s.Name, nameArg, c), Id), diagnostic); + return; + } + var shortPair = attr.NamedArguments.FirstOrDefault(p => p.Key == "ShortName"); + var shortName = shortPair.Value.Value as string; + if (shortName == null) + continue; + if (shortName.Split(',').Contains(nameArgValue)) + { + context.RegisterCodeFix(CodeAction.Create(Title, + c => StringReplace(context.Document, argName + "." + s.Name, nameArg, c), Id), diagnostic); + return; + } + } + } + } + } + + private async Task StringReplace(Document document, string name, ArgumentSyntax nameArg, CancellationToken cancellationToken) + { + var nameofExp = SyntaxFactory.ParseExpression($"nameof({name})").WithTriviaFrom(nameArg); + var tree = await document.GetSyntaxTreeAsync(cancellationToken); + var root = await tree.GetRootAsync(cancellationToken); + var newRoot = root.ReplaceNode(nameArg.Expression, nameofExp); + return document.WithSyntaxRoot(newRoot); + } + + private async Task ExpressionReplace(Document document, SyntaxNode exp, ArgumentSyntax nameArg, CancellationToken cancellationToken) + { + var nameofExp = (InvocationExpressionSyntax)SyntaxFactory.ParseExpression($"nameof(a)").WithTriviaFrom(nameArg); + var newNameofExp = nameofExp.ReplaceNode(nameofExp.ArgumentList.Arguments[0].Expression, exp.WithoutTrivia()); + + var tree = await document.GetSyntaxTreeAsync(cancellationToken); + var root = await tree.GetRootAsync(cancellationToken); + var newRoot = root.ReplaceNode(nameArg.Expression, newNameofExp); + return document.WithSyntaxRoot(newRoot); + } + } +} diff --git a/tools-local/Microsoft.ML.CodeAnalyzer/Descriptions.Designer.cs b/tools-local/Microsoft.ML.CodeAnalyzer/Descriptions.Designer.cs new file mode 100644 index 0000000000..730cfe6289 --- /dev/null +++ b/tools-local/Microsoft.ML.CodeAnalyzer/Descriptions.Designer.cs @@ -0,0 +1,91 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Runtime Version:4.0.30319.42000 +// +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +//------------------------------------------------------------------------------ + +namespace Microsoft.ML.CodeAnalyzer { + using System; + using System.Reflection; + + + /// + /// A strongly-typed resource class, for looking up localized strings, etc. + /// + // This class was auto-generated by the StronglyTypedResourceBuilder + // class via a tool like ResGen or Visual Studio. + // To add or remove a member, edit your .ResX file then rerun ResGen + // with the /str option, or rebuild your VS project. + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "15.0.0.0")] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] + internal class Descriptions { + + private static global::System.Resources.ResourceManager resourceMan; + + private static global::System.Globalization.CultureInfo resourceCulture; + + [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] + internal Descriptions() { + } + + /// + /// Returns the cached ResourceManager instance used by this class. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Resources.ResourceManager ResourceManager { + get { + if (object.ReferenceEquals(resourceMan, null)) { + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Microsoft.ML.CodeAnalyzer.Descriptions", typeof(Descriptions).GetTypeInfo().Assembly); + resourceMan = temp; + } + return resourceMan; + } + } + + /// + /// Overrides the current thread's CurrentUICulture property for all + /// resource lookups using this strongly typed resource class. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Globalization.CultureInfo Culture { + get { + return resourceCulture; + } + set { + resourceCulture = value; + } + } + + /// + /// Looks up a localized string similar to Since C# has no concept of lazy evaluation of parameters, we prefer Contracts.Check's message arguments to not involve string formatting, or other complex operations, since such operations will happen always, whether the check fails or not. If you want to have detailed messages that's great, but use Contracts.Except instead. That is instead of something like 'Check(c, msg)', prefer 'if (!c) throw Except(msg)'.. + /// + internal static string ContractsCheckMessageNotLiteralOrIdentifier { + get { + return ResourceManager.GetString("ContractsCheckMessageNotLiteralOrIdentifier", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to All instance fields or properties should be initialized in a constructor, not in the field. C# allows yone to specify initializers on instance members in either the constructor, or just on the field itself. However, taking advantage of that capability means that to comprehend the post-constructor state of an object a reader will have to read at least two places. To help keep code more comprehensible we disallow using field initializers, and insist that everything be in the constructor. Obviously this does n [rest of string was truncated]";. + /// + internal static string InstanceInitializerInConstructor { + get { + return ResourceManager.GetString("InstanceInitializerInConstructor", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to CheckDecode and ExceptDecode are used to report badly formatted data. If a message is provided then users expect that to be something actionable. So: if it is data the user themselves formatted or provided, then messages here are really useful. However, if in the context of a ModelLoadContext, this is practically always data Microsoft.ML formatted itself, and users inevitably find the specifics here confusing because they suppose these details are something they can do something about. So: generally, we sho [rest of string was truncated]";. + /// + internal static string NoMessagesForLoadContext { + get { + return ResourceManager.GetString("NoMessagesForLoadContext", resourceCulture); + } + } + } +} diff --git a/tools-local/Microsoft.ML.CodeAnalyzer/Descriptions.resx b/tools-local/Microsoft.ML.CodeAnalyzer/Descriptions.resx new file mode 100644 index 0000000000..5b616ee82a --- /dev/null +++ b/tools-local/Microsoft.ML.CodeAnalyzer/Descriptions.resx @@ -0,0 +1,129 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + Since C# has no concept of lazy evaluation of parameters, we prefer Contracts.Check's message arguments to not involve string formatting, or other complex operations, since such operations will happen always, whether the check fails or not. If you want to have detailed messages that's great, but use Contracts.Except instead. That is instead of something like 'Check(c, msg)', prefer 'if (!c) throw Except(msg)'. + + + All instance fields or properties should be initialized in a constructor, not in the field. C# allows yone to specify initializers on instance members in either the constructor, or just on the field itself. However, taking advantage of that capability means that to comprehend the post-constructor state of an object a reader will have to read at least two places. To help keep code more comprehensible we disallow using field initializers, and insist that everything be in the constructor. Obviously this does not apply to `const` or `static` members. + + + CheckDecode and ExceptDecode are used to report badly formatted data. If a message is provided then users expect that to be something actionable. So: if it is data the user themselves formatted or provided, then messages here are really useful. However, if in the context of a ModelLoadContext, this is practically always data Microsoft.ML formatted itself, and users inevitably find the specifics here confusing because they suppose these details are something they can do something about. So: generally, we should refrain from specific messages, in this context. + + \ No newline at end of file diff --git a/tools-local/Microsoft.ML.CodeAnalyzer/InstanceInitializerAnalyzer.cs b/tools-local/Microsoft.ML.CodeAnalyzer/InstanceInitializerAnalyzer.cs new file mode 100644 index 0000000000..f15d2c192e --- /dev/null +++ b/tools-local/Microsoft.ML.CodeAnalyzer/InstanceInitializerAnalyzer.cs @@ -0,0 +1,77 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Immutable; +using System.Linq; +using System.Reflection; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.Diagnostics; + +namespace Microsoft.ML.CodeAnalyzer +{ + [DiagnosticAnalyzer(LanguageNames.CSharp)] + public sealed class InstanceInitializerAnalyzer : DiagnosticAnalyzer + { + private const string Category = "Declaration"; + internal const string DiagnosticId = "MSML_NoInstanceInitializers"; + + private const string Title = "No initializers on instance fields or properties"; + private const string Format = "Member {0} has a {1} initialier outside the constructor"; + + private static DiagnosticDescriptor Rule = + new DiagnosticDescriptor(DiagnosticId, Title, Format, Category, + DiagnosticSeverity.Warning, isEnabledByDefault: true, + description: Descriptions.InstanceInitializerInConstructor); + + public override ImmutableArray SupportedDiagnostics => + ImmutableArray.Create(Rule); + + public override void Initialize(AnalysisContext context) + { + context.ConfigureGeneratedCodeAnalysis(GeneratedCodeAnalysisFlags.None); + context.RegisterSymbolAction(AnalyzeField, SymbolKind.Field); + context.RegisterSymbolAction(AnalyzeProperty, SymbolKind.Property); + } + + private static void AnalyzeField(SymbolAnalysisContext context) + { + var symbol = (IFieldSymbol)context.Symbol; + // Constant or static field initializers are desirable. If implicitly + // declared, then we can't very well ask the developer to fix. + if (symbol.IsConst || symbol.IsStatic || symbol.IsImplicitlyDeclared) + return; + // Exempt argument attributes from the test. Note that because we cannot + // depend on the Microsoft.ML source itself, we have to identify this class by name. + if (symbol.GetAttributes().Any(i => i.AttributeClass.Name == "ArgumentAttribute")) + return; + + var typeInfo = symbol.GetType().GetTypeInfo(); + var hasInitProp = typeInfo.GetDeclaredProperty("HasInitializer"); + if (hasInitProp?.PropertyType != typeof(bool)) + return; + bool hasInit = (bool)hasInitProp.GetValue(symbol); + if (!hasInit) + return; + var diagnostic = Diagnostic.Create(Rule, symbol.Locations[0], symbol.Name, "field"); + context.ReportDiagnostic(diagnostic); + } + + private static void AnalyzeProperty(SymbolAnalysisContext context) + { + var symbol = (IPropertySymbol)context.Symbol; + if (symbol.IsAbstract || symbol.IsImplicitlyDeclared || symbol.IsStatic) + return; + var syntaxRefs = symbol.DeclaringSyntaxReferences; + if (syntaxRefs.IsEmpty) + return; + var syntax = syntaxRefs[0].GetSyntax(); + if (!syntax.ChildNodes().Any(s => s.IsKind(SyntaxKind.EqualsValueClause))) + return; + + var diagnostic = Diagnostic.Create(Rule, symbol.Locations[0], symbol.Name, "property"); + context.ReportDiagnostic(diagnostic); + } + } +} diff --git a/tools-local/Microsoft.ML.CodeAnalyzer/Microsoft.ML.CodeAnalyzer.csproj b/tools-local/Microsoft.ML.CodeAnalyzer/Microsoft.ML.CodeAnalyzer.csproj new file mode 100644 index 0000000000..46f8e8df15 --- /dev/null +++ b/tools-local/Microsoft.ML.CodeAnalyzer/Microsoft.ML.CodeAnalyzer.csproj @@ -0,0 +1,28 @@ + + + + netstandard1.3 + + + + + + + + + + + True + True + Descriptions.resx + + + + + + ResXFileCodeGenerator + Descriptions.Designer.cs + + + + diff --git a/tools-local/Microsoft.ML.CodeAnalyzer/NameAnalyzer.cs b/tools-local/Microsoft.ML.CodeAnalyzer/NameAnalyzer.cs new file mode 100644 index 0000000000..6e7c77100c --- /dev/null +++ b/tools-local/Microsoft.ML.CodeAnalyzer/NameAnalyzer.cs @@ -0,0 +1,176 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.CSharp.Syntax; +using Microsoft.CodeAnalysis.Diagnostics; + +namespace Microsoft.ML.CodeAnalyzer +{ + internal enum NameType + { + UnderScoreCamelCased, // E.g., _myPrivateField + CamelCased, // E.g., myAwesomeParameter + PascalCased, // E.g., AwesomeClass + IPascalCased, // E.g., IEnumerableStuff + TPascalCased, // E.g., TDictArg + } + + [DiagnosticAnalyzer(LanguageNames.CSharp)] + public sealed class NameAnalyzer : DiagnosticAnalyzer + { + internal const string Category = "Naming"; + + internal const string NameProperty = "Original"; + internal const string DesiredNameProperty = "Desired"; + + internal static Diagnostic CreateDiagnostic(DiagnosticDescriptor rule, SyntaxToken identifier, NameType desired, params object[] args) + { + string text = identifier.Text; + var props = ImmutableDictionary.Empty + .Add(NameProperty, text).Add(DesiredNameProperty, desired.ToString()); + if (args == null || args.Length == 0) + return Diagnostic.Create(rule, identifier.GetLocation(), props, text); + var newArgs = new object[args.Length + 1]; + Array.Copy(args, 0, newArgs, 1, args.Length); + newArgs[0] = text; + return Diagnostic.Create(rule, identifier.GetLocation(), props, newArgs); + } + + internal static class PrivateFieldName + { + public const string Id = "MSML_PrivateFieldName"; + private const string Title = "Private field name not _camelCased"; + private const string Format = "Private field name '{0}' not _camelCased"; + private const string Description = + "Private fields should have an _ prefix and be _lowerCamelCased, unless they are const."; + + internal static DiagnosticDescriptor Rule = + new DiagnosticDescriptor(Id, Title, Format, Category, + DiagnosticSeverity.Warning, isEnabledByDefault: true, description: Description); + } + + internal static class GeneralName + { + public const string Id = "MSML_GeneralName"; + private const string Title = "This name should be PascalCased"; + private const string Format = "Identifier '{0}' not PascalCased"; + private const string Description = + "Identifier names other than parameters, local variables, private non-const fields, interfaces, and type parameters should be PascalCased."; + + internal static DiagnosticDescriptor Rule = + new DiagnosticDescriptor(Id, Title, Format, Category, + DiagnosticSeverity.Warning, isEnabledByDefault: true, description: Description); + } + + public override ImmutableArray SupportedDiagnostics => + ImmutableArray.Create(PrivateFieldName.Rule, GeneralName.Rule); + + public override void Initialize(AnalysisContext context) + { + context.ConfigureGeneratedCodeAnalysis(GeneratedCodeAnalysisFlags.None); + context.RegisterSyntaxNodeAction(AnalyzeField, SyntaxKind.FieldDeclaration); + context.RegisterSyntaxNodeAction(AnalyzeField, SyntaxKind.EventFieldDeclaration); + context.RegisterSyntaxNodeAction(AnalyzeClass, SyntaxKind.ClassDeclaration); + context.RegisterSyntaxNodeAction(AnalyzeEnum, SyntaxKind.EnumDeclaration); + context.RegisterSyntaxNodeAction(AnalyzeEnumMember, SyntaxKind.EnumMemberDeclaration); + context.RegisterSyntaxNodeAction(AnalyzeEvent, SyntaxKind.EventDeclaration); + context.RegisterSyntaxNodeAction(AnalyzeMethod, SyntaxKind.MethodDeclaration); + context.RegisterSyntaxNodeAction(AnalyzeNamespace, SyntaxKind.NamespaceDeclaration); + context.RegisterSyntaxNodeAction(AnalyzeProperty, SyntaxKind.PropertyDeclaration); + context.RegisterSyntaxNodeAction(AnalyzeStruct, SyntaxKind.StructDeclaration); + } + + private static void AnalyzeClass(SyntaxNodeAnalysisContext context) + => CheckPascal(((ClassDeclarationSyntax)context.Node).Identifier, context); + + private static void AnalyzeEnum(SyntaxNodeAnalysisContext context) + => CheckPascal(((EnumDeclarationSyntax)context.Node).Identifier, context); + + private static void AnalyzeEnumMember(SyntaxNodeAnalysisContext context) + => CheckPascal(((EnumMemberDeclarationSyntax)context.Node).Identifier, context); + + private static void AnalyzeEvent(SyntaxNodeAnalysisContext context) + => CheckPascal(((EventDeclarationSyntax)context.Node).Identifier, context); + + private static void AnalyzeMethod(SyntaxNodeAnalysisContext context) + { + var node = (MethodDeclarationSyntax)context.Node; + if (ModifierContainsExtern(node.Modifiers)) + return; + CheckPascal(node.Identifier, context); + } + + private static void AnalyzeNamespace(SyntaxNodeAnalysisContext context) + { + var node = ((NamespaceDeclarationSyntax)context.Node); + var name = node.Name; + // This is annoying. I can't figure out how to do this. When I get a namespace, + // this seems to trigger for *every* part of a namespace name, not just once for one. + //foreach (var id in name.DescendantTokens().Where(tok => tok.IsKind(SyntaxKind.IdentifierToken))) + // CheckPascal(id, context); + } + + private static void AnalyzeProperty(SyntaxNodeAnalysisContext context) + => CheckPascal(((PropertyDeclarationSyntax)context.Node).Identifier, context); + + private static void AnalyzeStruct(SyntaxNodeAnalysisContext context) + { + CheckPascal(((StructDeclarationSyntax)context.Node).Identifier, context); + } + + private static bool ModifierContainsExtern(SyntaxTokenList modifiers) + { + return modifiers.Any(token => token.IsKind(SyntaxKind.ExternKeyword)); + } + + private static void CheckPascal(SyntaxToken token, SyntaxNodeAnalysisContext context) + { + if (!Utils.NameIsGood(token.Text, 0, true)) + context.ReportDiagnostic(CreateDiagnostic(GeneralName.Rule, token, NameType.PascalCased)); + } + + private static bool CheckUnderscore(string name) + => !string.IsNullOrEmpty(name) && name.StartsWith("_", StringComparison.OrdinalIgnoreCase) && Utils.NameIsGood(name, 1, false); + + private static void AnalyzeField(SyntaxNodeAnalysisContext context) + { + var node = (BaseFieldDeclarationSyntax)context.Node; + + bool isConst = false; + bool isPrivate = true; // Fields are private by default. + foreach (var mod in node.Modifiers) + { + if (mod.IsKind(SyntaxKind.ConstKeyword)) + isConst = true; + else if (mod.IsKind(SyntaxKind.PublicKeyword) || mod.IsKind(SyntaxKind.ProtectedKeyword) || mod.IsKind(SyntaxKind.InternalKeyword)) + isPrivate = false; + } + foreach (var variable in node.Declaration.Variables) + { + var identifier = variable.Identifier; + var name = identifier.Text; + if (!isPrivate) + { + CheckPascal(identifier, context); + continue; + } + + // Private consts are a little bit funny. Sometimes it makes sense to have them + // be _camelCased, but often it's good to have them be PascalCased. We have decided + // that going forward they will be standardized as PascalCased, but *at the moment* + // we do not diagnose it as an error if they are _camelCased. At some point we will. + if (CheckUnderscore(name) || (isConst && Utils.NameIsGood(name, 0, true))) + continue; + var diagnostic = Diagnostic.Create(PrivateFieldName.Rule, identifier.GetLocation(), name); + context.ReportDiagnostic(CreateDiagnostic(PrivateFieldName.Rule, identifier, + isConst ? NameType.PascalCased : NameType.UnderScoreCamelCased)); + } + } + } +} diff --git a/tools-local/Microsoft.ML.CodeAnalyzer/NameFixProvider.cs b/tools-local/Microsoft.ML.CodeAnalyzer/NameFixProvider.cs new file mode 100644 index 0000000000..22ff9c383e --- /dev/null +++ b/tools-local/Microsoft.ML.CodeAnalyzer/NameFixProvider.cs @@ -0,0 +1,183 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Composition; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CodeActions; +using Microsoft.CodeAnalysis.CodeFixes; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.Rename; + +namespace Microsoft.ML.CodeAnalyzer +{ + // This is somewhat difficult. The trouble is, if a name is in a bad state, it is + // actually rather difficult to come up with a general procedure to "fix" it. We + // instead focus on the common case where a name is right according to *some* sort + // of regular scheme, and focus on that. + + using Debug = System.Diagnostics.Debug; + + [ExportCodeFixProvider(LanguageNames.CSharp, Name = nameof(NameFixProvider)), Shared] + public sealed class NameFixProvider : CodeFixProvider + { + private const string PrivateTitle = "Fix name"; + + private static ImmutableArray _fixable = ImmutableArray.Create( + NameAnalyzer.PrivateFieldName.Id, NameAnalyzer.GeneralName.Id, + ParameterVariableNameAnalyzer.Id, TypeParamNameAnalyzer.Id); + private static ImmutableHashSet _fixableSet = ImmutableHashSet.Empty.Union(_fixable); + + private static Regex _sections = new Regex( + @"(?:\p{Nd}\p{Ll}*)|" + // Numbers we consider a separate token. + @"(?:\p{Lu}+(?!\p{Ll}))|" + // Completely upper case sections. + @"(?:\p{Lu}\p{Ll}+)|" + // Title cased word. + @"(?:\p{Ll}+)"); // Lower case word. + + public override ImmutableArray FixableDiagnosticIds => _fixable; + + public override FixAllProvider GetFixAllProvider() + => WellKnownFixAllProviders.BatchFixer; + + public override async Task RegisterCodeFixesAsync(CodeFixContext context) + { + var diagnostic = context.Diagnostics.FirstOrDefault(d => _fixableSet.Contains(d.Id)); + if (diagnostic == null) + return; + + string originalName = diagnostic.Properties[NameAnalyzer.NameProperty]; + string desiredNameStr = diagnostic.Properties[NameAnalyzer.DesiredNameProperty]; + NameType desiredName; + if (!Enum.TryParse(desiredNameStr, out desiredName)) + return; + + var root = await context.Document.GetSyntaxRootAsync(context.CancellationToken); + var token = root.FindToken(diagnostic.Location.SourceSpan.Start); + if (token.Text != originalName) + return; + + string docName = context.Document.Name; + if (docName.Length - 3 == originalName.Length && docName.EndsWith(".cs", StringComparison.OrdinalIgnoreCase) + && context.Document.Name.StartsWith(originalName, StringComparison.OrdinalIgnoreCase)) + { + // So this is an entity like "FooBarBiz" in a file named "FooBarBiz.cs". + // We can continue to warn on these, but registering a *fix* for them would + // be inappropriate, since while the Roslyn API allows us to rename items like, + // these, we cannot change the file name. + return; + } + + Func renamer = null; + switch (desiredName) + { + case NameType.UnderScoreCamelCased: + renamer = RenameUnderscore; + break; + case NameType.CamelCased: + renamer = RenameCamelCase; + break; + case NameType.PascalCased: + renamer = RenamePascal; + break; + case NameType.IPascalCased: + renamer = RenameInterface; + break; + case NameType.TPascalCased: + renamer = RenameTypeParam; + break; + default: + Debug.Assert(!Enum.IsDefined(typeof(NameType), desiredName)); + break; + } + + context.RegisterCodeFix(CodeAction.Create(PrivateTitle, + c => RenameAsync(context.Document, token.Parent, originalName, renamer, c), diagnostic.Id), diagnostic); + } + + private async Task RenameAsync(Document document, + SyntaxNode identifier, string name, Func renamer, CancellationToken cancellationToken) + { + // Get the symbol representing the type to be renamed. + var semanticModel = await document.GetSemanticModelAsync(cancellationToken); + ISymbol typeSymbol = semanticModel.GetDeclaredSymbol(identifier, cancellationToken); + + string newName = renamer(name); + + // Produce a new solution that has all references to that type renamed, including the declaration. + var originalSolution = document.Project.Solution; + var optionSet = originalSolution.Workspace.Options; + var newSolution = await Renamer.RenameSymbolAsync(document.Project.Solution, typeSymbol, newName, optionSet, cancellationToken).ConfigureAwait(false); + + // Return the new solution with the now-uppercase type name. + return newSolution; + } + + private IEnumerable ExtractSections(string name) + { + foreach (Match match in _sections.Matches(name)) + yield return match.Value; + } + + private string RenameUnderscore(string name) => RenameCamelCore(name, "_"); + private string RenameCamelCase(string name) => RenameCamelCore(name, ""); + private string RenameTypeParam(string name) => RenamePascalPrefixCore(name, "T"); + private string RenameInterface(string name) => RenamePascalPrefixCore(name, "I"); + private string RenamePascal(string name) => RenamePascalPrefixCore(name, ""); + + private string RenameCamelCore(string name, string prefix) + { + if (string.IsNullOrEmpty(name)) + return prefix; + StringBuilder sb = new StringBuilder(prefix); + foreach (var section in ExtractSections(name)) + { + if (sb.Length == prefix.Length) + sb.Append(section.ToLowerInvariant()); + else + AppendTitleCase(sb, section); + } + return sb.ToString(); + } + + private string RenamePascalPrefixCore(string name, string prefix) + { + if (string.IsNullOrEmpty(name)) + return prefix; + StringBuilder sb = new StringBuilder(prefix); + bool first = true; + foreach (var section in ExtractSections(name)) + { + if (first) + { + first = false; + if (prefix == section) + continue; + } + AppendTitleCase(sb, section); + } + return sb.ToString(); + } + + private void AppendTitleCase(StringBuilder builder, string token) + { + if (string.IsNullOrEmpty(token)) + return; + if (token.Length == 2 && char.IsUpper(token[0]) && char.IsUpper(token[1])) + { + builder.Append(token); + return; + } + // Further special casing for things like: IO, UI? + builder.Append(char.ToUpperInvariant(token[0])); + builder.Append(token.Substring(1).ToLowerInvariant()); + } + } +} diff --git a/tools-local/Microsoft.ML.CodeAnalyzer/ParameterVariableNameAnalyzer.cs b/tools-local/Microsoft.ML.CodeAnalyzer/ParameterVariableNameAnalyzer.cs new file mode 100644 index 0000000000..7496609778 --- /dev/null +++ b/tools-local/Microsoft.ML.CodeAnalyzer/ParameterVariableNameAnalyzer.cs @@ -0,0 +1,60 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Immutable; +using System.Linq; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.CSharp.Syntax; +using Microsoft.CodeAnalysis.Diagnostics; + +namespace Microsoft.ML.CodeAnalyzer +{ + [DiagnosticAnalyzer(LanguageNames.CSharp)] + public sealed class ParameterVariableNameAnalyzer : DiagnosticAnalyzer + { + private const string Category = "Naming"; + + internal const string Id = "MSML_ParameterLocalVarName"; + private const string Title = "Parameter or local variable name not standard"; + private const string Format = "{1} name '{0}' not standard"; + private const string Description = + "Parameter and local variable names should be lowerCamelCased."; + + private static DiagnosticDescriptor Rule = + new DiagnosticDescriptor(Id, Title, Format, Category, + DiagnosticSeverity.Warning, isEnabledByDefault: true, description: Description); + + public override ImmutableArray SupportedDiagnostics => + ImmutableArray.Create(Rule); + + public override void Initialize(AnalysisContext context) + { + context.ConfigureGeneratedCodeAnalysis(GeneratedCodeAnalysisFlags.None); + context.RegisterSyntaxNodeAction(AnalyzeParameter, SyntaxKind.Parameter); + context.RegisterSyntaxNodeAction(AnalyzeLocal, SyntaxKind.LocalDeclarationStatement); + } + + private static void AnalyzeParameter(SyntaxNodeAnalysisContext context) + { + var node = (ParameterSyntax)context.Node; + AnalyzeCore(context, node.Identifier, "parameter"); + } + + private static void AnalyzeLocal(SyntaxNodeAnalysisContext context) + { + var node = (LocalDeclarationStatementSyntax)context.Node; + foreach (var dec in node.DescendantNodesAndSelf().Where(s => s.IsKind(SyntaxKind.VariableDeclarator))) + AnalyzeCore(context, ((VariableDeclaratorSyntax)dec).Identifier, "local variable"); + } + + private static void AnalyzeCore(SyntaxNodeAnalysisContext context, SyntaxToken identifier, string type) + { + var name = identifier.Text; + if (name == null || Utils.NameIsGood(name, 0, false)) + return; + context.ReportDiagnostic(NameAnalyzer.CreateDiagnostic(Rule, identifier, NameType.CamelCased, type)); + } + } +} \ No newline at end of file diff --git a/tools-local/Microsoft.ML.CodeAnalyzer/SingleVariableDeclarationAnalyzer.cs b/tools-local/Microsoft.ML.CodeAnalyzer/SingleVariableDeclarationAnalyzer.cs new file mode 100644 index 0000000000..eceb2b3f0d --- /dev/null +++ b/tools-local/Microsoft.ML.CodeAnalyzer/SingleVariableDeclarationAnalyzer.cs @@ -0,0 +1,49 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Immutable; +using System.Linq; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.CSharp.Syntax; +using Microsoft.CodeAnalysis.Diagnostics; + +namespace Microsoft.ML.CodeAnalyzer +{ + [DiagnosticAnalyzer(LanguageNames.CSharp)] + public sealed class SingleVariableDeclarationAnalyzer : DiagnosticAnalyzer + { + private const string Category = "Declaration"; + internal const string DiagnosticId = "MSML_SingleVariableDeclaration"; + + private const string Title = "Have only a single variable present per declaration"; + private const string Format = "Variables '{0}' were all part of a single declaration, and should be broken up"; + private const string Description = + "We prefer to have one variable per declaration."; + + private static DiagnosticDescriptor Rule = + new DiagnosticDescriptor(DiagnosticId, Title, Format, Category, + DiagnosticSeverity.Warning, isEnabledByDefault: true, description: Description); + + public override ImmutableArray SupportedDiagnostics => + ImmutableArray.Create(Rule); + + public override void Initialize(AnalysisContext context) + { + context.ConfigureGeneratedCodeAnalysis(GeneratedCodeAnalysisFlags.None); + context.RegisterSyntaxNodeAction(Analyze, SyntaxKind.VariableDeclaration); + } + + private static void Analyze(SyntaxNodeAnalysisContext context) + { + var node = (VariableDeclarationSyntax)context.Node; + var vars = node.Variables; + if (vars.Count <= 1 || node.Parent.IsKind(SyntaxKind.ForStatement)) + return; + string jointVariableNames = string.Join("', '", vars.Select(v => v.Identifier.Text)); + var diagnostic = Diagnostic.Create(Rule, context.Node.GetLocation(), jointVariableNames); + context.ReportDiagnostic(diagnostic); + } + } +} \ No newline at end of file diff --git a/tools-local/Microsoft.ML.CodeAnalyzer/TypeParamNameAnalyzer.cs b/tools-local/Microsoft.ML.CodeAnalyzer/TypeParamNameAnalyzer.cs new file mode 100644 index 0000000000..973c9a7b0b --- /dev/null +++ b/tools-local/Microsoft.ML.CodeAnalyzer/TypeParamNameAnalyzer.cs @@ -0,0 +1,47 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Immutable; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.CSharp.Syntax; +using Microsoft.CodeAnalysis.Diagnostics; + +namespace Microsoft.ML.CodeAnalyzer +{ + [DiagnosticAnalyzer(LanguageNames.CSharp)] + public sealed class TypeParamNameAnalyzer : DiagnosticAnalyzer + { + private const string Category = "Naming"; + + internal const string Id = "MSML_TypeParamName"; + private const string Title = "Type parameter name not standard"; + private const string Format = "Type parameter name '{0}' not standard"; + private const string Description = + "Type parameter names should start with 'T' and the remainder PascalCased."; + + private static DiagnosticDescriptor Rule = + new DiagnosticDescriptor(Id, Title, Format, Category, + DiagnosticSeverity.Warning, isEnabledByDefault: true, description: Description); + + public override ImmutableArray SupportedDiagnostics => + ImmutableArray.Create(Rule); + + public override void Initialize(AnalysisContext context) + { + context.ConfigureGeneratedCodeAnalysis(GeneratedCodeAnalysisFlags.None); + context.RegisterSyntaxNodeAction(Analyze, SyntaxKind.TypeParameter); + } + + private static void Analyze(SyntaxNodeAnalysisContext context) + { + var node = (TypeParameterSyntax)context.Node; + var identifier = node.Identifier; + var name = identifier.Text; + if (name == null || (name.StartsWith("T") && Utils.NameIsGood(name, 1, true))) + return; + context.ReportDiagnostic(NameAnalyzer.CreateDiagnostic(Rule, identifier, NameType.TPascalCased)); + } + } +} \ No newline at end of file diff --git a/tools-local/Microsoft.ML.CodeAnalyzer/Utils.cs b/tools-local/Microsoft.ML.CodeAnalyzer/Utils.cs new file mode 100644 index 0000000000..0bc941906f --- /dev/null +++ b/tools-local/Microsoft.ML.CodeAnalyzer/Utils.cs @@ -0,0 +1,58 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.ML.CodeAnalyzer +{ + internal static class Utils + { + /// + /// Checks whether a name is properly camelCased or PascalCased. + /// Also disallows things like HTMLStream while preferring IOStream. + /// + /// The symbol name to analyze + /// The position in the name to start + /// Whether it should be PascalCased + /// Whether this name is good + public static bool NameIsGood(string name, int min, bool upper) + { + // C# naming guidelines say, any initialism greater than two characters should not + // be all upper cased. So: _readIOStream is good, and _readHttpStream is good. You + // could imagine having two two-letter initialisms, like: _readIOUI, where you use + // two two character initialism, but I'm going to suppose that never happens since + // if someone is doing that, that's pretty odd. The upshot is: + const int maxConsecutive = 3; + // Force the first after the _ to be lower case. + int consecutive = upper ? 0 : maxConsecutive; + // Specific to numbers. You could imagine counterexamples, like, say, d3js. Should + // we be even more strict, and say that the numbers should only appear potentially + // in suffixes? + for (int i = min; i < name.Length; ++i) + { + char c = name[i]; + // Only letters and digits. + if (!char.IsLetterOrDigit(c)) + return false; + if (char.IsDigit(c)) + { + // Consider digits as being effectively upper case letters, where they appear. + upper = false; + consecutive = 0; + continue; + } + if (char.IsUpper(c)) + { + upper = false; + if (++consecutive > maxConsecutive) + return false; + continue; + } + if (upper) + return false; + consecutive = 0; + } + // Don't allow maxConsecutive on the end. So: IOStream is fine, but IOS is not. + return consecutive < maxConsecutive; + } + } +}