Skip to content

Commit d6e64cb

Browse files
authored
Added documentation regarding TextLoader's hasHeader field (#4655)
* Update TextLoader.cs * Update TextLoader.cs * Update TextLoaderSaverCatalog.cs * Update TextLoader.cs * Update TextLoader.cs * Update TextLoaderSaverCatalog.cs * Update TextLoaderSaverCatalog.cs * Edits * Update TextLoader.cs * Update TextLoader.cs * Update TextLoader.cs
1 parent 2fee4a3 commit d6e64cb

File tree

2 files changed

+25
-5
lines changed

2 files changed

+25
-5
lines changed

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,10 @@ public class Options
479479

480480
/// <summary>
481481
/// Whether the data file has a header with feature names.
482+
/// Note: If a TextLoader is created with hasHeader = true but without a dataSample, then vector columns made by TextLoader will not contain slot name
483+
/// annotations (slots being the elements of the given vector column), because the output schema is made when the TextLoader is made, and not when
484+
/// TextLoader.Load(IMultiStreamSource source) is called. In addition, the case where dataSample = null and hasHeader = true indicates to the
485+
/// loader that when it is given a file when <see cref="TextLoader.Load(IMultiStreamSource)"/> is called, it needs to skip the first line.
482486
/// </summary>
483487
[Argument(ArgumentType.AtMostOnce, ShortName = "header",
484488
HelpText = "Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified.")]
@@ -1557,4 +1561,4 @@ public DataViewRowCursor[] GetRowCursorSet(IEnumerable<DataViewSchema.Column> co
15571561
void ICanSaveModel.Save(ModelSaveContext ctx) => ((ICanSaveModel)_loader).Save(ctx);
15581562
}
15591563
}
1560-
}
1564+
}

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,11 @@ public static class TextLoaderSaverCatalog
2121
/// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
2222
/// <param name="columns">Array of columns <see cref="TextLoader.Column"/> defining the schema.</param>
2323
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
24-
/// <param name="hasHeader">Whether the file has a header.</param>
24+
/// <param name="hasHeader">Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a
25+
/// <paramref name="dataSample"/>, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column),
26+
/// because the output schema is made when the TextLoader is made, and not when <see cref="TextLoader.Load(IMultiStreamSource)"/> is called.
27+
/// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load()
28+
/// is called, it needs to skip the first line.</param>
2529
/// <param name="dataSample">The optional location of a data sample. The sample can be used to infer column names and number of slots in each column.</param>
2630
/// <param name="allowQuoting">Whether the file can contain columns defined by a quoted string.</param>
2731
/// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
@@ -67,7 +71,11 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog,
6771
/// names and their data types in the schema of the loaded data.</typeparam>
6872
/// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
6973
/// <param name="separatorChar">Column separator character. Default is '\t'</param>
70-
/// <param name="hasHeader">Does the file contains header?</param>
74+
/// <param name="hasHeader">Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a
75+
/// <paramref name="dataSample"/>, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column),
76+
/// because the output schema is made when the TextLoader is made, and not when <see cref="TextLoader.Load(IMultiStreamSource)"/> is called.
77+
/// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load()
78+
/// is called, it needs to skip the first line.</param>
7179
/// <param name="dataSample">The optional location of a data sample. The sample can be used to infer information
7280
/// about the columns, such as slot names.</param>
7381
/// <param name="allowQuoting">Whether the input may include quoted values,
@@ -97,7 +105,11 @@ public static TextLoader CreateTextLoader<TInput>(this DataOperationsCatalog cat
97105
/// <param name="path">The path to the file.</param>
98106
/// <param name="columns">The columns of the schema.</param>
99107
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
100-
/// <param name="hasHeader">Whether the file has a header.</param>
108+
/// <param name="hasHeader">Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a
109+
/// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column),
110+
/// because the output schema is made when the TextLoader is made, and not when <see cref="TextLoader.Load(IMultiStreamSource)"/> is called.
111+
/// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load()
112+
/// is called, it needs to skip the first line.</param>
101113
/// <param name="allowQuoting">Whether the file can contain columns defined by a quoted string.</param>
102114
/// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
103115
/// <param name="allowSparse">Whether the file can contain numerical vectors in sparse format.</param>
@@ -138,7 +150,11 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog,
138150
/// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
139151
/// <param name="path">The path to the file.</param>
140152
/// <param name="separatorChar">Column separator character. Default is '\t'</param>
141-
/// <param name="hasHeader">Does the file contains header?</param>
153+
/// <param name="hasHeader">Whether the file has a header with feature names. Note: If a TextLoader is created with hasHeader = true but without a
154+
/// dataSample, then vector columns made by TextLoader will not contain slot name annotations (slots being the elements of the given vector column),
155+
/// because the output schema is made when the TextLoader is made, and not when <see cref="TextLoader.Load(IMultiStreamSource)"/> is called.
156+
/// In addition, the case where dataSample = null and hasHeader = true indicates to the loader that when it is given a file when Load()
157+
/// is called, it needs to skip the first line.</param>
142158
/// <param name="allowQuoting">Whether the input may include quoted values,
143159
/// which can contain separator characters, colons,
144160
/// and distinguish empty values from missing values. When true, consecutive separators

0 commit comments

Comments
 (0)