Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/Microsoft.ML.Api/DataViewConstructionUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ protected DataViewBase(IHostEnvironment env, string name, InternalSchemaDefiniti
}
}

public abstract long? GetRowCount(bool lazy = true);
public abstract long? GetRowCount();

public abstract IRowCursor GetRowCursor(Func<int, bool> predicate, IRandom rand = null);

Expand Down Expand Up @@ -555,7 +555,7 @@ public override bool CanShuffle
get { return true; }
}

public override long? GetRowCount(bool lazy = true)
public override long? GetRowCount()
{
return _data.Count;
}
Expand Down Expand Up @@ -654,7 +654,7 @@ public override bool CanShuffle
get { return false; }
}

public override long? GetRowCount(bool lazy = true)
public override long? GetRowCount()
{
return (_data as ICollection<TRow>)?.Count;
}
Expand Down Expand Up @@ -735,7 +735,7 @@ public override bool CanShuffle
get { return false; }
}

public override long? GetRowCount(bool lazy = true)
public override long? GetRowCount()
{
return null;
}
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Api/StatefulFilterTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ private StatefulFilterTransform(IHostEnvironment env, StatefulFilterTransform<TS

public Schema Schema => _bindings.Schema;

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
// REVIEW: currently stateful map is implemented via filter, and this is sub-optimal.
return null;
Expand Down
16 changes: 7 additions & 9 deletions src/Microsoft.ML.Core/Data/IDataView.cs
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,15 @@ public interface IDataView : ISchematized
bool CanShuffle { get; }

/// <summary>
/// Returns the number of rows if known. Null means unknown. If lazy is true, then
/// this is permitted to return null when it might return a non-null value on a subsequent
/// call. This indicates, that the transform does not YET know the number of rows, but
/// may in the future. If lazy is false, then this is permitted to do some work (no more
/// that it would normally do for cursoring) to determine the number of rows.
/// Returns the number of rows if known. Returning null means that the row count is unknown but
/// it might return a non-null value on a subsequent call. This indicates, that the transform does
/// not YET know the number of rows, but may in the future. Its implementation's computation
/// complexity should be O(1).
///
/// Most components will return the same answer whether lazy is true or false. Some, like
/// a cache, might return null until the cache is fully populated (when lazy is true). When
/// lazy is false, such a cache would block until the cache was populated.
/// Most implementation will return the same answer every time. Some, like a cache, might
/// return null until the cache is fully populated.
/// </summary>
long? GetRowCount(bool lazy = true);
long? GetRowCount();

/// <summary>
/// Get a row cursor. The active column indices are those for which needCol(col) returns true.
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/Data/DataViewUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public static string[] GetTempColumnNames(this ISchema schema, int n, string tag
/// </summary>
public static long ComputeRowCount(IDataView view)
{
long? countNullable = view.GetRowCount(lazy: false);
long? countNullable = view.GetRowCount();
if (countNullable != null)
return countNullable.Value;
long count = 0;
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/Data/RowCursorUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ public IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator, Fun
return new IRowCursor[] { GetRowCursor(needCol, rand) };
}

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
return 1;
}
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ public void GetMetadata<TValue>(string kind, int col, ref TValue value)

private long RowCount { get { return _header.RowCount; } }

public long? GetRowCount(bool lazy = true) { return RowCount; }
public long? GetRowCount() { return RowCount; }

public bool CanShuffle { get { return true; } }

Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/DataLoadSave/CompositeDataLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -557,9 +557,9 @@ private static string GenerateTag(int index)
return string.Format("xf{0:00}", index);
}

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
return View.GetRowCount(lazy);
return View.GetRowCount();
}

public bool CanShuffle => View.CanShuffle;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ public void Save(ModelSaveContext ctx)

public Schema Schema { get; }

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
return null;
}
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1352,7 +1352,7 @@ public BoundLoader(TextLoader reader, IMultiStreamSource files)
_files = files;
}

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
// We don't know how many rows there are.
// REVIEW: Should we try to support RowCount?
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ private void WriteDataCore(IChannel ch, TextWriter writer, IDataView data,
if (_outputSchema)
WriteSchemaAsComment(writer, header);

double rowCount = data.GetRowCount(true) ?? double.NaN;
double rowCount = data.GetRowCount() ?? double.NaN;
using (var pch = !_silent ? _host.StartProgressChannel("TextSaver: saving data") : null)
{
long stateCount = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ public VectorType GetSlotType(int col)
}
}

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
return _header.RowCount;
}
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.Data/DataView/AppendRowsDataView.cs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ private AppendRowsDataView(IHostEnvironment env, Schema schema, IDataView[] sour
_counts = null;
break;
}
long? count = dv.GetRowCount(true);
long? count = dv.GetRowCount();
if (count == null || count < 0 || count > int.MaxValue)
{
_canShuffle = false;
Expand Down Expand Up @@ -127,12 +127,12 @@ private void CheckSchemaConsistency()
}
}

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
long sum = 0;
foreach (var source in _sources)
{
var cur = source.GetRowCount(lazy);
var cur = source.GetRowCount();
if (cur == null)
return null;
_host.Check(cur.Value >= 0, "One of the sources returned a negative row count");
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ private sealed class DataView : IDataView

public Schema Schema { get { return _schema; } }

public long? GetRowCount(bool lazy = true) { return _rowCount; }
public long? GetRowCount() { return _rowCount; }

public bool CanShuffle { get { return true; } }

Expand Down
17 changes: 6 additions & 11 deletions src/Microsoft.ML.Data/DataView/CacheDataView.cs
Original file line number Diff line number Diff line change
Expand Up @@ -193,18 +193,13 @@ public int MapInputToCacheColumnIndex(int inputIndex)

public Schema Schema => _subsetInput.Schema;

public long? GetRowCount(bool lazy = true)
/// <summary>
/// Return the number of rows if available.
/// </summary>
public long? GetRowCount()
{
if (_rowCount < 0)
{
if (lazy)
return null;
if (_cacheDefaultWaiter == null)
KickoffFiller(new int[0]);
_host.Assert(_cacheDefaultWaiter != null);
_cacheDefaultWaiter.Wait(long.MaxValue);
_host.Assert(_rowCount >= 0);
}
return null;
return _rowCount;
}

Expand Down Expand Up @@ -317,7 +312,7 @@ public IRowSeeker GetSeeker(Func<int, bool> predicate)
_host.CheckValue(predicate, nameof(predicate));
// The seeker needs to know the row count when it validates the row index to move to.
// Calling GetRowCount here to force a wait indirectly so that _rowCount will have a valid value.
GetRowCount(false);
GetRowCount();
_host.Assert(_rowCount >= 0);
var waiter = WaiterWaiter.Create(this, predicate);
if (waiter.IsTrivial)
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/DataView/EmptyDataView.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public EmptyDataView(IHostEnvironment env, Schema schema)
Schema = schema;
}

public long? GetRowCount(bool lazy = true) => 0;
public long? GetRowCount() => 0;

public IRowCursor GetRowCursor(Func<int, bool> needCol, IRandom rand = null)
{
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/DataView/OpaqueDataView.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ public OpaqueDataView(IDataView source)
_source = source;
}

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
return _source.GetRowCount(lazy);
return _source.GetRowCount();
}

public IRowCursor GetRowCursor(Func<int, bool> predicate, IRandom rand = null)
Expand Down
8 changes: 4 additions & 4 deletions src/Microsoft.ML.Data/DataView/Transposer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ public IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator, Fun
return _view.GetRowCursorSet(out consolidator, predicate, n, rand);
}

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
// Not a passthrough.
return RowCount;
Expand Down Expand Up @@ -818,9 +818,9 @@ public DataViewSlicer(IHost host, IDataView input, int[] toSlice)
_schema = new SchemaImpl(this, nameToCol);
}

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
return _input.GetRowCount(lazy);
return _input.GetRowCount();
}

/// <summary>
Expand Down Expand Up @@ -1503,7 +1503,7 @@ public SlotDataView(IHostEnvironment env, ITransposeDataView data, int col)
_schemaImpl = new SchemaImpl(this);
}

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
var type = _data.Schema.GetColumnType(_col);
int valueCount = type.ValueCount;
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/DataView/ZipDataView.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ private ZipDataView(IHost host, IDataView[] sources)

public Schema Schema => _compositeSchema.AsSchema;

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
long min = -1;
foreach (var source in _sources)
{
var cur = source.GetRowCount(lazy);
var cur = source.GetRowCount();
if (cur == null)
return null;
_host.Check(cur.Value >= 0, "One of the sources returned a negative row count");
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -635,9 +635,9 @@ public void Save(ModelSaveContext ctx)
_transform.Save(ctx);
}

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
return _transform.GetRowCount(lazy);
return _transform.GetRowCount();
}

public IRowCursor GetRowCursor(Func<int, bool> needCol, IRandom rand = null)
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/Transforms/NopTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ public bool CanShuffle

public Schema Schema => Source.Schema;

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
return Source.GetRowCount(lazy);
return Source.GetRowCount();
}

public IRowCursor GetRowCursor(Func<int, bool> predicate, IRandom rand = null)
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/Transforms/PerGroupTransformBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,9 @@ public virtual void Save(ModelSaveContext ctx)

protected abstract BindingsBase GetBindings();

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
return Source.GetRowCount(lazy);
return Source.GetRowCount();
}

public IRowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator, Func<int, bool> predicate, int n, IRandom rand = null)
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/Transforms/SelectColumnsTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ public SelectColumnsDataTransform(IHostEnvironment env, SelectColumnsTransform t

Schema ISchematized.Schema => _mapper.Schema;

public long? GetRowCount(bool lazy = true) => Source.GetRowCount(lazy);
public long? GetRowCount() => Source.GetRowCount();

public IRowCursor GetRowCursor(Func<int, bool> needCol, IRandom rand = null)
{
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/Transforms/SkipTakeFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,11 @@ public override void Save(ModelSaveContext ctx)
/// Returns the computed count of rows remaining after skip and take operation.
/// Returns null if count is unknown.
/// </summary>
public override long? GetRowCount(bool lazy = true)
public override long? GetRowCount()
{
if (_take == 0)
return 0;
long? count = Source.GetRowCount(lazy);
long? count = Source.GetRowCount();
if (count == null)
return null;

Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/Transforms/TermTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ private static TermMap CreateFileTermMap(IHostEnvironment env, IChannel ch, stri
{
var header = new ProgressHeader(new[] { "Total Terms" }, new[] { "examples" });
var trainer = Trainer.Create(cursor, colSrc, autoConvert, int.MaxValue, bldr);
double rowCount = termData.GetRowCount(true) ?? double.NaN;
double rowCount = termData.GetRowCount() ?? double.NaN;
long rowCur = 0;
pch.SetHeader(header,
e =>
Expand Down Expand Up @@ -606,7 +606,7 @@ private static TermMap[] Train(IHostEnvironment env, IChannel ch, ColInfo[] info
using (var pch = env.StartProgressChannel("Building term dictionary"))
{
long rowCur = 0;
double rowCount = trainingData.GetRowCount(true) ?? double.NaN;
double rowCount = trainingData.GetRowCount() ?? double.NaN;
var header = new ProgressHeader(new[] { "Total Terms" }, new[] { "examples" });

itrainer = 0;
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.Data/Transforms/TransformBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ protected TransformBase(IHost host, IDataView input)

public abstract void Save(ModelSaveContext ctx);

public abstract long? GetRowCount(bool lazy = true);
public abstract long? GetRowCount();

public virtual bool CanShuffle { get { return Source.CanShuffle; } }

Expand Down Expand Up @@ -104,7 +104,7 @@ protected RowToRowTransformBase(IHost host, IDataView input)
{
}

public sealed override long? GetRowCount(bool lazy = true) { return Source.GetRowCount(lazy); }
public sealed override long? GetRowCount() { return Source.GetRowCount(); }
}

/// <summary>
Expand All @@ -124,7 +124,7 @@ private protected FilterBase(IHost host, IDataView input)
{
}

public override long? GetRowCount(bool lazy = true) => null;
public override long? GetRowCount() => null;

public sealed override Schema Schema => Source.Schema;

Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.FastTree/FastTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1862,7 +1862,7 @@ private void MakeBoundariesAndCheckLabels(out long missingInstances, out long to
ch.Info("Changing data from row-wise to column-wise");

long pos = 0;
double rowCountDbl = (double?)_data.Data.GetRowCount(lazy: true) ?? Double.NaN;
double rowCountDbl = (double?)_data.Data.GetRowCount() ?? Double.NaN;
pch.SetHeader(new ProgressHeader("examples"),
e => e.SetProgress(0, pos, rowCountDbl));
// REVIEW: Should we ignore rows with bad label, weight, or group? The previous code seemed to let
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Parquet/ParquetLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ private static Stream OpenStream(string filename)

public Schema Schema { get; }

public long? GetRowCount(bool lazy = true)
public long? GetRowCount()
{
return _rowCount;
}
Expand Down
Loading