From e19060960cac2b51cadeae7bafc9de9fee23d06b Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Tue, 14 Jan 2020 18:08:25 -0800 Subject: [PATCH 01/11] Update TextLoader.cs --- src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 6e8168041c..b5930ad653 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -479,6 +479,7 @@ public class Options /// /// Whether the data file has a header with feature names. + /// Note: If a TextLoader is created with hasHeader=true but without a dataSample, then the TextLoader will not contain slot names, because the output schema is made when the TextLoader is made, and not when Load is called. /// [Argument(ArgumentType.AtMostOnce, ShortName = "header", HelpText = "Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified.")] From a13000740634758bc7d53ab69eba501d5e8805b5 Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Fri, 17 Jan 2020 19:50:33 -0500 Subject: [PATCH 02/11] Update TextLoader.cs --- src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index b5930ad653..fbd3c91743 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -479,7 +479,7 @@ public class Options /// /// Whether the data file has a header with feature names. - /// Note: If a TextLoader is created with hasHeader=true but without a dataSample, then the TextLoader will not contain slot names, because the output schema is made when the TextLoader is made, and not when Load is called. + /// Note: If a TextLoader is created with HasHeader=true but without a dataSample, then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, and not when TextLoader.Load(IMultiStreamSource source) is called. /// [Argument(ArgumentType.AtMostOnce, ShortName = "header", HelpText = "Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified.")] @@ -680,6 +680,9 @@ public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile, needInputSize = true; } + if (parent.HasHeader && dataSample == null) + throw ch.ExceptNotSupp("Data sample cannot be null if the HasHeader flag is set to True."); + int inputSize = parent._inputSize; ch.Assert(0 <= inputSize & inputSize < SrcLim); List> lines = null; From acfec97227736ae448ce868434c024a70829045f Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Tue, 21 Jan 2020 13:27:46 +0300 Subject: [PATCH 03/11] Update TextLoaderSaverCatalog.cs --- .../DataLoadSave/Text/TextLoaderSaverCatalog.cs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs index 8962b550bf..590838a2f9 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs @@ -21,7 +21,9 @@ public static class TextLoaderSaverCatalog /// The catalog. /// Array of columns defining the schema. /// The character used as separator between data points in a row. By default the tab character is used as separator. - /// Whether the file has a header. + /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader=true but without a dataSample, + /// then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, + /// and not when TextLoader.Load(IMultiStreamSource source) is called. /// The optional location of a data sample. The sample can be used to infer column names and number of slots in each column. /// Whether the file can contain columns defined by a quoted string. /// Remove trailing whitespace from lines @@ -67,7 +69,9 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, /// names and their data types in the schema of the loaded data. /// The catalog. /// Column separator character. Default is '\t' - /// Does the file contains header? + /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader=true but without a dataSample, + /// then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, + /// and not when TextLoader.Load(IMultiStreamSource source) is called. /// The optional location of a data sample. The sample can be used to infer information /// about the columns, such as slot names. /// Whether the input may include quoted values, @@ -97,7 +101,9 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog cat /// The path to the file. /// The columns of the schema. /// The character used as separator between data points in a row. By default the tab character is used as separator. - /// Whether the file has a header. + /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader=true but without a dataSample, + /// then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, + /// and not when TextLoader.Load(IMultiStreamSource source) is called. /// Whether the file can contain columns defined by a quoted string. /// Remove trailing whitespace from lines /// Whether the file can contain numerical vectors in sparse format. @@ -134,7 +140,9 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, /// The catalog. /// The path to the file. /// Column separator character. Default is '\t' - /// Does the file contains header? + /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader=true but without a dataSample, + /// then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, + /// and not when TextLoader.Load(IMultiStreamSource source) is called. /// Whether the input may include quoted values, /// which can contain separator characters, colons, /// and distinguish empty values from missing values. When true, consecutive separators From 17aba3de10c81d414e382235ec114eae8d25c36f Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Thu, 23 Jan 2020 14:52:25 +0300 Subject: [PATCH 04/11] Update TextLoader.cs --- src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index fbd3c91743..2e6ed5b2ed 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -479,7 +479,10 @@ public class Options /// /// Whether the data file has a header with feature names. - /// Note: If a TextLoader is created with HasHeader=true but without a dataSample, then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, and not when TextLoader.Load(IMultiStreamSource source) is called. + /// Note: If a TextLoader is created with HasHeader = true but without a dataSample, then vector columns made by TextLoader will not contain slot name + /// annotations (slots being the elements of the given vector column), because the output schema is made when the TextLoader is made, and not when + /// TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and HasHeader = true indicates to the + /// loader that when it is given a file when Load is called, it needs to skip the first line. /// [Argument(ArgumentType.AtMostOnce, ShortName = "header", HelpText = "Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified.")] @@ -680,9 +683,6 @@ public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile, needInputSize = true; } - if (parent.HasHeader && dataSample == null) - throw ch.ExceptNotSupp("Data sample cannot be null if the HasHeader flag is set to True."); - int inputSize = parent._inputSize; ch.Assert(0 <= inputSize & inputSize < SrcLim); List> lines = null; From c01c7db2348ba64d7b7738e196874c2dbf0eff3c Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Thu, 30 Jan 2020 13:02:36 +0300 Subject: [PATCH 05/11] Update TextLoader.cs --- src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 2e6ed5b2ed..4b932e1dba 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -482,7 +482,7 @@ public class Options /// Note: If a TextLoader is created with HasHeader = true but without a dataSample, then vector columns made by TextLoader will not contain slot name /// annotations (slots being the elements of the given vector column), because the output schema is made when the TextLoader is made, and not when /// TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and HasHeader = true indicates to the - /// loader that when it is given a file when Load is called, it needs to skip the first line. + /// loader that when it is given a file when Load() is called, it needs to skip the first line. /// [Argument(ArgumentType.AtMostOnce, ShortName = "header", HelpText = "Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified.")] From 2f6ee2867a1a8c090d7ce33f7671a468c6562a7b Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Thu, 30 Jan 2020 13:02:42 +0300 Subject: [PATCH 06/11] Update TextLoaderSaverCatalog.cs --- .../DataLoadSave/Text/TextLoaderSaverCatalog.cs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs index 590838a2f9..34c10fe017 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs @@ -23,7 +23,8 @@ public static class TextLoaderSaverCatalog /// The character used as separator between data points in a row. By default the tab character is used as separator. /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader=true but without a dataSample, /// then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, - /// and not when TextLoader.Load(IMultiStreamSource source) is called. + /// and not when TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and HasHeader = true indicates to the + /// loader that when it is given a file when Load() is called, it needs to skip the first line. /// The optional location of a data sample. The sample can be used to infer column names and number of slots in each column. /// Whether the file can contain columns defined by a quoted string. /// Remove trailing whitespace from lines @@ -71,7 +72,8 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, /// Column separator character. Default is '\t' /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader=true but without a dataSample, /// then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, - /// and not when TextLoader.Load(IMultiStreamSource source) is called. + /// and not when TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and HasHeader = true indicates to the + /// loader that when it is given a file when Load() is called, it needs to skip the first line. /// The optional location of a data sample. The sample can be used to infer information /// about the columns, such as slot names. /// Whether the input may include quoted values, @@ -103,7 +105,8 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog cat /// The character used as separator between data points in a row. By default the tab character is used as separator. /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader=true but without a dataSample, /// then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, - /// and not when TextLoader.Load(IMultiStreamSource source) is called. + /// and not when TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and HasHeader = true indicates to the + /// loader that when it is given a file when Load() is called, it needs to skip the first line. /// Whether the file can contain columns defined by a quoted string. /// Remove trailing whitespace from lines /// Whether the file can contain numerical vectors in sparse format. @@ -142,7 +145,8 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, /// Column separator character. Default is '\t' /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader=true but without a dataSample, /// then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, - /// and not when TextLoader.Load(IMultiStreamSource source) is called. + /// and not when TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and HasHeader = true indicates to the + /// loader that when it is given a file when Load() is called, it needs to skip the first line. /// Whether the input may include quoted values, /// which can contain separator characters, colons, /// and distinguish empty values from missing values. When true, consecutive separators From 32ae6c3b9ab940ae85a16599ad75136269234031 Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Thu, 30 Jan 2020 21:48:09 +0300 Subject: [PATCH 07/11] Update TextLoaderSaverCatalog.cs --- .../Text/TextLoaderSaverCatalog.cs | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs index 34c10fe017..f05b6096a2 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs @@ -21,10 +21,11 @@ public static class TextLoaderSaverCatalog /// The catalog. /// Array of columns defining the schema. /// The character used as separator between data points in a row. By default the tab character is used as separator. - /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader=true but without a dataSample, - /// then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, - /// and not when TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and HasHeader = true indicates to the - /// loader that when it is given a file when Load() is called, it needs to skip the first line. + /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader = true but without a + /// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), + /// because the output schema is made when the TextLoader is made, and not when TextLoader.Load(IMultiStreamSource source) is called. + /// In addition, the case where dataSample = null and HasHeader = true indicates to the loader that when it is given a file when Load() + /// is called, it needs to skip the first line. /// The optional location of a data sample. The sample can be used to infer column names and number of slots in each column. /// Whether the file can contain columns defined by a quoted string. /// Remove trailing whitespace from lines @@ -70,10 +71,11 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, /// names and their data types in the schema of the loaded data. /// The catalog. /// Column separator character. Default is '\t' - /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader=true but without a dataSample, - /// then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, - /// and not when TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and HasHeader = true indicates to the - /// loader that when it is given a file when Load() is called, it needs to skip the first line. + /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader = true but without a + /// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), + /// because the output schema is made when the TextLoader is made, and not when TextLoader.Load(IMultiStreamSource source) is called. + /// In addition, the case where dataSample = null and HasHeader = true indicates to the loader that when it is given a file when Load() + /// is called, it needs to skip the first line. /// The optional location of a data sample. The sample can be used to infer information /// about the columns, such as slot names. /// Whether the input may include quoted values, @@ -103,10 +105,11 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog cat /// The path to the file. /// The columns of the schema. /// The character used as separator between data points in a row. By default the tab character is used as separator. - /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader=true but without a dataSample, - /// then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, - /// and not when TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and HasHeader = true indicates to the - /// loader that when it is given a file when Load() is called, it needs to skip the first line. + /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader = true but without a + /// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), + /// because the output schema is made when the TextLoader is made, and not when TextLoader.Load(IMultiStreamSource source) is called. + /// In addition, the case where dataSample = null and HasHeader = true indicates to the loader that when it is given a file when Load() + /// is called, it needs to skip the first line. /// Whether the file can contain columns defined by a quoted string. /// Remove trailing whitespace from lines /// Whether the file can contain numerical vectors in sparse format. @@ -143,10 +146,11 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, /// The catalog. /// The path to the file. /// Column separator character. Default is '\t' - /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader=true but without a dataSample, - /// then the TextLoader will not contain slot (columns that are chosen for manipulation) names, because the output schema is made when the TextLoader is made, - /// and not when TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and HasHeader = true indicates to the - /// loader that when it is given a file when Load() is called, it needs to skip the first line. + /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader = true but without a + /// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), + /// because the output schema is made when the TextLoader is made, and not when TextLoader.Load(IMultiStreamSource source) is called. + /// In addition, the case where dataSample = null and HasHeader = true indicates to the loader that when it is given a file when Load() + /// is called, it needs to skip the first line. /// Whether the input may include quoted values, /// which can contain separator characters, colons, /// and distinguish empty values from missing values. When true, consecutive separators From d72dd2833b00633dca7ad307fd93e9561116253b Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Mon, 3 Feb 2020 12:54:44 +0300 Subject: [PATCH 08/11] Edits --- .../DataLoadSave/Text/TextLoader.cs | 2 +- .../Text/TextLoaderSaverCatalog.cs | 28 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 4b932e1dba..89646fb6a5 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -482,7 +482,7 @@ public class Options /// Note: If a TextLoader is created with HasHeader = true but without a dataSample, then vector columns made by TextLoader will not contain slot name /// annotations (slots being the elements of the given vector column), because the output schema is made when the TextLoader is made, and not when /// TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and HasHeader = true indicates to the - /// loader that when it is given a file when Load() is called, it needs to skip the first line. + /// loader that when it is given a file when is called, it needs to skip the first line. /// [Argument(ArgumentType.AtMostOnce, ShortName = "header", HelpText = "Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified.")] diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs index f05b6096a2..490464c5f3 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs @@ -21,10 +21,10 @@ public static class TextLoaderSaverCatalog /// The catalog. /// Array of columns defining the schema. /// The character used as separator between data points in a row. By default the tab character is used as separator. - /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader = true but without a - /// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), - /// because the output schema is made when the TextLoader is made, and not when TextLoader.Load(IMultiStreamSource source) is called. - /// In addition, the case where dataSample = null and HasHeader = true indicates to the loader that when it is given a file when Load() + /// Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a + /// , then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), + /// because the output schema is made when the TextLoader is made, and not when is called. + /// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load() /// is called, it needs to skip the first line. /// The optional location of a data sample. The sample can be used to infer column names and number of slots in each column. /// Whether the file can contain columns defined by a quoted string. @@ -71,10 +71,10 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, /// names and their data types in the schema of the loaded data. /// The catalog. /// Column separator character. Default is '\t' - /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader = true but without a - /// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), - /// because the output schema is made when the TextLoader is made, and not when TextLoader.Load(IMultiStreamSource source) is called. - /// In addition, the case where dataSample = null and HasHeader = true indicates to the loader that when it is given a file when Load() + /// Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a + /// , then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), + /// because the output schema is made when the TextLoader is made, and not when is called. + /// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load() /// is called, it needs to skip the first line. /// The optional location of a data sample. The sample can be used to infer information /// about the columns, such as slot names. @@ -105,10 +105,10 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog cat /// The path to the file. /// The columns of the schema. /// The character used as separator between data points in a row. By default the tab character is used as separator. - /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader = true but without a + /// Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a /// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), - /// because the output schema is made when the TextLoader is made, and not when TextLoader.Load(IMultiStreamSource source) is called. - /// In addition, the case where dataSample = null and HasHeader = true indicates to the loader that when it is given a file when Load() + /// because the output schema is made when the TextLoader is made, and not when is called. + /// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load() /// is called, it needs to skip the first line. /// Whether the file can contain columns defined by a quoted string. /// Remove trailing whitespace from lines @@ -146,10 +146,10 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, /// The catalog. /// The path to the file. /// Column separator character. Default is '\t' - /// Whether the file has a header with feature names. Note: If a TextLoader is created with HasHeader = true but without a + /// Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a /// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column), - /// because the output schema is made when the TextLoader is made, and not when TextLoader.Load(IMultiStreamSource source) is called. - /// In addition, the case where dataSample = null and HasHeader = true indicates to the loader that when it is given a file when Load() + /// because the output schema is made when the TextLoader is made, and not when is called. + /// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load() /// is called, it needs to skip the first line. /// Whether the input may include quoted values, /// which can contain separator characters, colons, From 2d78ae4e040f16c06a06bee9f8c0a594e10da66c Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Mon, 3 Feb 2020 21:53:18 +0300 Subject: [PATCH 09/11] Update TextLoader.cs --- src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 89646fb6a5..f939aaa000 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -2,7 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; +using System; using System.Collections.Generic; using System.Linq; using System.Reflection; @@ -1561,4 +1561,4 @@ public DataViewRowCursor[] GetRowCursorSet(IEnumerable co void ICanSaveModel.Save(ModelSaveContext ctx) => ((ICanSaveModel)_loader).Save(ctx); } } -} \ No newline at end of file +} From c4b1129cc64d553d1cb1f53a3f0fa35e6080dbf9 Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Mon, 3 Feb 2020 21:53:48 +0300 Subject: [PATCH 10/11] Update TextLoader.cs --- src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index f939aaa000..d365a0a9f8 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -2,7 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; +using System; using System.Collections.Generic; using System.Linq; using System.Reflection; From 4e399accedcc5c338db232cbaf1760bff814205e Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Fri, 7 Feb 2020 21:25:53 +0300 Subject: [PATCH 11/11] Update TextLoader.cs --- src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index d365a0a9f8..c83e9e4d9f 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -479,9 +479,9 @@ public class Options /// /// Whether the data file has a header with feature names. - /// Note: If a TextLoader is created with HasHeader = true but without a dataSample, then vector columns made by TextLoader will not contain slot name + /// Note: If a TextLoader is created with hasHeader = true but without a dataSample, then vector columns made by TextLoader will not contain slot name /// annotations (slots being the elements of the given vector column), because the output schema is made when the TextLoader is made, and not when - /// TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and HasHeader = true indicates to the + /// TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and hasHeader = true indicates to the /// loader that when it is given a file when is called, it needs to skip the first line. /// [Argument(ArgumentType.AtMostOnce, ShortName = "header",