From 4149cdc77a604d37afa81913908dde3dafe46d42 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 11 Oct 2018 18:42:44 -0500 Subject: [PATCH 01/14] Implement VBuffer master plan WIP #1 --- src/Microsoft.ML.Core/Data/MetadataUtils.cs | 20 +- src/Microsoft.ML.Core/Data/VBuffer.cs | 219 +++++--- src/Microsoft.ML.Core/Utilities/MathUtils.cs | 54 +- src/Microsoft.ML.Core/Utilities/Utils.cs | 64 ++- .../Utilities/VBufferUtils.cs | 531 ++++++++++-------- src/Microsoft.ML.Data/Data/BufferBuilder.cs | 76 +-- .../DataLoadSave/Binary/Codecs.cs | 20 +- .../DataLoadSave/Text/TextLoaderParser.cs | 19 +- .../DataLoadSave/Text/TextSaver.cs | 18 +- .../DataView/CompositeSchema.cs | 2 +- .../Depricated/Vector/VBufferMathUtils.cs | 166 +++--- .../Depricated/Vector/VectorUtils.cs | 145 +++-- .../Transforms/DropSlotsTransform.cs | 3 +- .../Utilities/SlotDropper.cs | 38 +- .../TreeEnsemble/RegressionTree.cs | 19 +- .../KMeansPlusPlusTrainer.cs | 37 +- .../KMeansPredictor.cs | 2 +- .../Optimizer/DifferentiableFunction.cs | 4 +- .../Optimizer/OptimizationMonitor.cs | 2 +- .../Optimizer/Optimizer.cs | 2 +- .../Optimizer/SgdOptimizer.cs | 2 +- .../LogisticRegression/LbfgsPredictorBase.cs | 10 +- .../MulticlassLogisticRegression.cs | 23 +- .../Standard/SdcaBinary.cs | 11 +- .../Standard/SdcaMultiClass.cs | 10 +- 25 files changed, 816 insertions(+), 681 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/MetadataUtils.cs b/src/Microsoft.ML.Core/Data/MetadataUtils.cs index ee70bc732a..d60f67fe31 100644 --- a/src/Microsoft.ML.Core/Data/MetadataUtils.cs +++ b/src/Microsoft.ML.Core/Data/MetadataUtils.cs @@ -318,7 +318,10 @@ public static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.Column IReadOnlyList list; if ((list = schema?.GetColumns(role)) == null || list.Count != 1 || !schema.Schema.HasSlotNames(list[0].Index, vectorSize)) - slotNames = new VBuffer>(vectorSize, 0, slotNames.Values, slotNames.Indices); + { + VBufferMutationContext.Create(ref slotNames, vectorSize, 0) + .Complete(ref slotNames); + } else schema.Schema.GetMetadata(Kinds.SlotNames, list[0].Index, ref slotNames); } @@ -447,21 +450,22 @@ public static bool TryGetCategoricalFeatureIndices(Schema schema, int colIndex, { int previousEndIndex = -1; isValid = true; - for (int i = 0; i < catIndices.Values.Length; i += 2) + var catIndicesValues = catIndices.GetValues(); + for (int i = 0; i < catIndicesValues.Length; i += 2) { - if (catIndices.Values[i] > catIndices.Values[i + 1] || - catIndices.Values[i] <= previousEndIndex || - catIndices.Values[i] >= columnSlotsCount || - catIndices.Values[i + 1] >= columnSlotsCount) + if (catIndicesValues[i] > catIndicesValues[i + 1] || + catIndicesValues[i] <= previousEndIndex || + catIndicesValues[i] >= columnSlotsCount || + catIndicesValues[i + 1] >= columnSlotsCount) { isValid = false; break; } - previousEndIndex = catIndices.Values[i + 1]; + previousEndIndex = catIndicesValues[i + 1]; } if (isValid) - categoricalFeatures = catIndices.Values.Select(val => val).ToArray(); + categoricalFeatures = catIndicesValues.ToArray(); } } diff --git a/src/Microsoft.ML.Core/Data/VBuffer.cs b/src/Microsoft.ML.Core/Data/VBuffer.cs index b5ef6de0ea..6e900a36a8 100644 --- a/src/Microsoft.ML.Core/Data/VBuffer.cs +++ b/src/Microsoft.ML.Core/Data/VBuffer.cs @@ -16,6 +16,9 @@ namespace Microsoft.ML.Runtime.Data /// public readonly struct VBuffer { + private readonly T[] _values; + private readonly int[] _indices; + /// /// The logical length of the buffer. /// @@ -27,17 +30,6 @@ public readonly struct VBuffer /// public readonly int Count; - /// - /// The values. Only the first Count of these are valid. - /// - public readonly T[] Values; - - /// - /// The indices. For a dense representation, this array is not used. For a sparse representation - /// it is parallel to values and specifies the logical indices for the corresponding values. - /// - public readonly int[] Indices; - /// /// The explicitly represented values. /// @@ -56,7 +48,8 @@ public readonly struct VBuffer public ReadOnlySpan GetIndices() => IsDense ? default : Indices.AsSpan(0, Count); /// - /// Equivalent to Count == Length. + /// Gets a value indicating whether every logical element is explicitly + /// represented in the buffer. /// public bool IsDense { @@ -78,8 +71,8 @@ public VBuffer(int length, T[] values, int[] indices = null) Length = length; Count = length; - Values = values; - Indices = indices; + _values = values; + _indices = indices; } /// @@ -110,8 +103,8 @@ public VBuffer(int length, int count, T[] values, int[] indices) Length = length; Count = count; - Values = values; - Indices = indices; + _values = values; + _indices = indices; } /// @@ -119,15 +112,13 @@ public VBuffer(int length, int count, T[] values, int[] indices) /// public void CopyToDense(ref VBuffer dst) { - var values = dst.Values; - if (Utils.Size(values) < Length) - values = new T[Length]; + var mutation = VBufferMutationContext.Create(ref dst, Length, Count); if (!IsDense) - CopyTo(values); + CopyTo(mutation.Values); else if (Length > 0) - Array.Copy(Values, values, Length); - dst = new VBuffer(Length, values, dst.Indices); + _values.AsSpan(0, Length).CopyTo(mutation.Values); + mutation.Complete(ref dst); } /// @@ -135,31 +126,24 @@ public void CopyToDense(ref VBuffer dst) /// public void CopyTo(ref VBuffer dst) { - var values = dst.Values; - var indices = dst.Indices; + var mutation = VBufferMutationContext.Create(ref dst, Length, Count); if (IsDense) { if (Length > 0) { - if (Utils.Size(values) < Length) - values = new T[Length]; - Array.Copy(Values, values, Length); + _values.AsSpan(0, Length).CopyTo(mutation.Values); } - dst = new VBuffer(Length, values, indices); + mutation.Complete(ref dst); Contracts.Assert(dst.IsDense); } else { if (Count > 0) { - if (Utils.Size(values) < Count) - values = new T[Count]; - if (Utils.Size(indices) < Count) - indices = new int[Count]; - Array.Copy(Values, values, Count); - Array.Copy(Indices, indices, Count); + _values.AsSpan(0, Count).CopyTo(mutation.Values); + _indices.AsSpan(0, Count).CopyTo(mutation.Indices); } - dst = new VBuffer(Length, Count, values, indices); + mutation.Complete(ref dst); } } @@ -170,17 +154,15 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) { Contracts.Check(0 <= srcMin && srcMin <= Length, "srcMin"); Contracts.Check(0 <= length && srcMin <= Length - length, "length"); - var values = dst.Values; - var indices = dst.Indices; + if (IsDense) { + var mutation = VBufferMutationContext.Create(ref dst, length, length); if (length > 0) { - if (Utils.Size(values) < length) - values = new T[length]; - Array.Copy(Values, srcMin, values, 0, length); + _values.AsSpan(srcMin, length).CopyTo(mutation.Values); } - dst = new VBuffer(length, values, indices); + mutation.Complete(ref dst); Contracts.Assert(dst.IsDense); } else @@ -188,29 +170,31 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) int copyCount = 0; if (Count > 0) { - int copyMin = Indices.FindIndexSorted(0, Count, srcMin); - int copyLim = Indices.FindIndexSorted(copyMin, Count, srcMin + length); + int copyMin = _indices.FindIndexSorted(0, Count, srcMin); + int copyLim = _indices.FindIndexSorted(copyMin, Count, srcMin + length); Contracts.Assert(copyMin <= copyLim); copyCount = copyLim - copyMin; + var mutation = VBufferMutationContext.Create(ref dst, length, copyCount); if (copyCount > 0) { - if (Utils.Size(values) < copyCount) - values = new T[copyCount]; - Array.Copy(Values, copyMin, values, 0, copyCount); + _values.AsSpan(copyMin, copyCount).CopyTo(mutation.Values); if (copyCount < length) { - if (Utils.Size(indices) < copyCount) - indices = new int[copyCount]; for (int i = 0; i < copyCount; ++i) - indices[i] = Indices[i + copyMin] - srcMin; + mutation.Indices[i] = _indices[i + copyMin] - srcMin; } } + mutation.Complete(ref dst); + } + else + { + var mutation = VBufferMutationContext.Create(ref dst, length, copyCount); + mutation.Complete(ref dst); } - dst = new VBuffer(length, copyCount, values, indices); } } - /// +/* /// /// Copy from this buffer to the given destination, making sure to explicitly include the /// first count indices in indicesInclude. Note that indicesInclude should be sorted /// with each index less than this.Length. Note that this can make the destination be @@ -382,43 +366,43 @@ public void CopyTo(ref VBuffer dst, int[] indicesInclude, int count) Contracts.Assert(size == ii || size == 0); dst = new VBuffer(Length, ii, values, indices); - } + }*/ /// /// Copy from this buffer to the given destination array. This "densifies". /// - public void CopyTo(T[] dst) + public void CopyTo(Span dst) { CopyTo(dst, 0); } - public void CopyTo(T[] dst, int ivDst, T defaultValue = default(T)) + public void CopyTo(Span dst, int ivDst, T defaultValue = default(T)) { - Contracts.CheckParam(0 <= ivDst && ivDst <= Utils.Size(dst) - Length, nameof(dst), "dst is not large enough"); + Contracts.CheckParam(0 <= ivDst && ivDst <= dst.Length - Length, nameof(dst), "dst is not large enough"); if (Length == 0) return; if (IsDense) { - Array.Copy(Values, 0, dst, ivDst, Length); + _values.AsSpan(0, Length).CopyTo(dst.Slice(ivDst)); return; } if (Count == 0) { - Array.Clear(dst, ivDst, Length); + dst.Slice(ivDst, Length).Clear(); return; } int iv = 0; for (int islot = 0; islot < Count; islot++) { - int slot = Indices[islot]; + int slot = _indices[islot]; Contracts.Assert(slot >= iv); while (iv < slot) dst[ivDst + iv++] = defaultValue; Contracts.Assert(iv == slot); - dst[ivDst + iv++] = Values[islot]; + dst[ivDst + iv++] = _values[islot]; } while (iv < Length) dst[ivDst + iv++] = defaultValue; @@ -431,24 +415,22 @@ public static void Copy(T[] src, int srcIndex, ref VBuffer dst, int length) { Contracts.CheckParam(0 <= length && length <= Utils.Size(src), nameof(length)); Contracts.CheckParam(0 <= srcIndex && srcIndex <= Utils.Size(src) - length, nameof(srcIndex)); - var values = dst.Values; + var mutation = VBufferMutationContext.Create(ref dst, length, length); if (length > 0) { - if (Utils.Size(values) < length) - values = new T[length]; - Array.Copy(src, srcIndex, values, 0, length); + src.AsSpan(srcIndex, length).CopyTo(mutation.Values); } - dst = new VBuffer(length, values, dst.Indices); + mutation.Complete(ref dst); } public IEnumerable> Items(bool all = false) { - return VBufferUtils.Items(Values, Indices, Length, Count, all); + return VBufferUtils.Items(_values, _indices, Length, Count, all); } public IEnumerable DenseValues() { - return VBufferUtils.DenseValues(Values, Indices, Length, Count); + return VBufferUtils.DenseValues(_values, _indices, Length, Count); } public void GetItemOrDefault(int slot, ref T dst) @@ -457,9 +439,9 @@ public void GetItemOrDefault(int slot, ref T dst) int index; if (IsDense) - dst = Values[slot]; - else if (Count > 0 && Indices.TryFindIndexSorted(0, Count, slot, out index)) - dst = Values[index]; + dst = _values[slot]; + else if (Count > 0 && _indices.TryFindIndexSorted(0, Count, slot, out index)) + dst = _values[index]; else dst = default(T); } @@ -470,13 +452,106 @@ public T GetItemOrDefault(int slot) int index; if (IsDense) - return Values[slot]; - if (Count > 0 && Indices.TryFindIndexSorted(0, Count, slot, out index)) - return Values[index]; + return _values[slot]; + if (Count > 0 && _indices.TryFindIndexSorted(0, Count, slot, out index)) + return _values[index]; return default(T); } public override string ToString() => IsDense ? $"Dense vector of size {Length}" : $"Sparse vector of size {Length}, {Count} explicit values"; + + internal VBufferMutationContext GetMutableContext( + int newLogicalLength, + int? valuesCount, + int? maxValuesCapacity, + bool keepOldOnResize, + out bool createdNewValues, + out bool createdNewIndices) + { + Contracts.CheckParam(newLogicalLength >= 0, nameof(newLogicalLength)); + Contracts.CheckParam(valuesCount == null || valuesCount.Value <= newLogicalLength, nameof(valuesCount)); + + valuesCount = valuesCount ?? newLogicalLength; + int maxCapacity = maxValuesCapacity ?? newLogicalLength; + + T[] values = _values; + Utils.EnsureSize(ref values, valuesCount.Value, maxCapacity, keepOldOnResize, out createdNewValues); + + int[] indices = _indices; + bool isDense = newLogicalLength == valuesCount.Value; + if (isDense) + { + createdNewIndices = false; + } + else + { + Utils.EnsureSize(ref indices, valuesCount.Value, maxCapacity, keepOldOnResize, out createdNewIndices); + } + + return new VBufferMutationContext(newLogicalLength, valuesCount.Value, values, indices); + } + } + + public static class VBufferMutationContext + { + public static VBufferMutationContext Create( + ref VBuffer destination, + int newLogicalLength, + int? valuesCount = null, + int? maxValuesCapacity = null, + bool keepOldOnResize = false) + { + return destination.GetMutableContext( + newLogicalLength, + valuesCount, + maxValuesCapacity, + keepOldOnResize, + out bool _, + out bool _); + } + + public static VBufferMutationContext Create( + ref VBuffer destination, + int newLogicalLength, + out bool createdNewValues, + out bool createdNewIndices, + int? valuesCount = null, + int? maxValuesCapacity = null, + bool keepOldOnResize = false) + { + return destination.GetMutableContext( + newLogicalLength, + valuesCount, + maxValuesCapacity, + keepOldOnResize, + out createdNewValues, + out createdNewIndices); + } + } + + public ref struct VBufferMutationContext + { + private readonly int _logicalLength; + private readonly T[] _values; + private readonly int[] _indices; + + public readonly Span Values; + public readonly Span Indices; + + internal VBufferMutationContext(int logicalLength, int physicalValuesCount, T[] values, int[] indices) + { + _logicalLength = logicalLength; + _values = values; + _indices = indices; + + Values = _values.AsSpan(0, physicalValuesCount); + Indices = _indices.AsSpan(0, physicalValuesCount); + } + + public void Complete(ref VBuffer destintation) + { + destintation = new VBuffer(_logicalLength, Values.Length, _values, _indices); + } } } diff --git a/src/Microsoft.ML.Core/Utilities/MathUtils.cs b/src/Microsoft.ML.Core/Utilities/MathUtils.cs index 7550a949c5..cb5028463e 100644 --- a/src/Microsoft.ML.Core/Utilities/MathUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/MathUtils.cs @@ -133,40 +133,23 @@ public static Float Min(Float[] a) } /// - /// Finds the first index of the max element of the array. + /// Finds the first index of the max element of the span. /// NaNs are ignored. If all the elements to consider are NaNs, -1 is /// returned. The caller should distinguish in this case between two /// possibilities: /// 1) The number of the element to consider is zero. /// 2) All the elements to consider are NaNs. /// - /// an array - /// the first index of the max element - public static int ArgMax(Float[] a) - { - return ArgMax(a, Utils.Size(a)); - } - - /// - /// Finds the first index of the max element of the array. - /// NaNs are ignored. If all the elements to consider are NaNs, -1 is - /// returned. The caller should distinguish in this case between two - /// possibilities: - /// 1) The number of the element to consider is zero. - /// 2) All the elements to consider are NaNs. - /// - /// an array - /// number of the element in the array to consider + /// The span of floats. /// the first index of the max element - public static int ArgMax(Float[] a, int count) + public static int ArgMax(ReadOnlySpan a) { - Contracts.Assert(0 <= count && count <= Utils.Size(a)); - if (count == 0) + if (a.IsEmpty) return -1; int amax = -1; Float max = Float.NegativeInfinity; - for (int i = count - 1; i >= 0; i--) + for (int i = a.Length - 1; i >= 0; i--) { if (max <= a[i]) { @@ -179,40 +162,23 @@ public static int ArgMax(Float[] a, int count) } /// - /// Finds the first index of the minimum element of the array. + /// Finds the first index of the minimum element of the span. /// NaNs are ignored. If all the elements to consider are NaNs, -1 is /// returned. The caller should distinguish in this case between two /// possibilities: /// 1) The number of the element to consider is zero. /// 2) All the elements to consider are NaNs. /// - /// an array - /// the first index of the minimum element - public static int ArgMin(Float[] a) - { - return ArgMin(a, Utils.Size(a)); - } - - /// - /// Finds the first index of the minimum element of the array. - /// NaNs are ignored. If all the elements to consider are NaNs, -1 is - /// returned. The caller should distinguish in this case between two - /// possibilities: - /// 1) The number of the element to consider is zero. - /// 2) All the elements to consider are NaNs. - /// - /// an array - /// number of the element in the array to consider + /// The span of floats. /// the first index of the minimum element - public static int ArgMin(Float[] a, int count) + public static int ArgMin(ReadOnlySpan a) { - Contracts.Assert(0 <= count && count <= Utils.Size(a)); - if (count == 0) + if (a.IsEmpty) return -1; int amin = -1; Float min = Float.PositiveInfinity; - for (int i = count - 1; i >= 0; i--) + for (int i = a.Length - 1; i >= 0; i--) { if (min >= a[i]) { diff --git a/src/Microsoft.ML.Core/Utilities/Utils.cs b/src/Microsoft.ML.Core/Utilities/Utils.cs index a0b9019d14..f3ed1c942e 100644 --- a/src/Microsoft.ML.Core/Utilities/Utils.cs +++ b/src/Microsoft.ML.Core/Utilities/Utils.cs @@ -181,18 +181,6 @@ public static void Push(ref Stack stack, T item) stack.Push(item); } - /// - /// Assumes input is sorted and finds value using BinarySearch. - /// If value is not found, returns the logical index of 'value' in the sorted list i.e index of the first element greater than value. - /// In case of duplicates it returns the index of the first one. - /// It guarantees that items before the returned index are < value, while those at and after the returned index are >= value. - /// - public static int FindIndexSorted(this int[] input, int value) - { - Contracts.AssertValue(input); - return FindIndexSorted(input, 0, input.Length, value); - } - /// /// Assumes input is sorted and finds value using BinarySearch. /// If value is not found, returns the logical index of 'value' in the sorted list i.e index of the first element greater than value. @@ -240,6 +228,17 @@ public static bool TryFindIndexSorted(this int[] input, int min, int lim, int va return index < lim && input[index] == value; } + /// + /// Akin to FindIndexSorted, except stores the found index in the output + /// index parameter, and returns whether that index is a valid index + /// pointing to a value equal to the input parameter value. + /// + public static bool TryFindIndexSorted(ReadOnlySpan input, int min, int lim, int value, out int index) + { + index = FindIndexSorted(input, min, lim, value); + return index < lim && input[index] == value; + } + /// /// Assumes input is sorted and finds value using BinarySearch. /// If value is not found, returns the logical index of 'value' in the sorted list i.e index of the first element greater than value. @@ -466,9 +465,8 @@ public static int[] GetIdentityPermutation(int size) return res; } - public static void FillIdentity(int[] a, int lim) + public static void FillIdentity(Span a, int lim) { - Contracts.AssertValue(a); Contracts.Assert(0 <= lim & lim <= a.Length); for (int i = 0; i < lim; ++i) @@ -857,12 +855,19 @@ public static int EnsureSize(ref T[] array, int min, bool keepOld = true) /// /// The new size, that is no less than and no more that . public static int EnsureSize(ref T[] array, int min, int max, bool keepOld = true) + => EnsureSize(ref array, min, max, keepOld, out bool _); + + public static int EnsureSize(ref T[] array, int min, int max, bool keepOld, out bool resized) { Contracts.CheckParam(min <= max, nameof(max), "min must not exceed max"); // This code adapted from the private method EnsureCapacity code of List. int size = Utils.Size(array); if (size >= min) + { + resized = false; return size; + } + int newSize = size == 0 ? 4 : size * 2; // This constant taken from the internal code of system\array.cs of mscorlib. if ((uint)newSize > max) @@ -873,6 +878,8 @@ public static int EnsureSize(ref T[] array, int min, int max, bool keepOld = Array.Resize(ref array, newSize); else array = new T[newSize]; + + resized = true; return newSize; } @@ -1098,5 +1105,34 @@ public static string GetDescription(this Enum value) } return null; } + + public static int Count(this ReadOnlySpan source, Func predicate) + { + Contracts.CheckValue(predicate, nameof(predicate)); + + int result = 0; + for (int i = 0; i < source.Length; i++) + { + if (predicate(source[i])) + { + result++; + } + } + return result; + } + + public static bool All(this ReadOnlySpan source, Func predicate) + { + Contracts.CheckValue(predicate, nameof(predicate)); + + for (int i = 0; i < source.Length; i++) + { + if (!predicate(source[i])) + { + return false; + } + } + return true; + } } } diff --git a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs index bc1e7f4f7f..c39f7f60d3 100644 --- a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs @@ -176,7 +176,7 @@ public static void ForEachDefined(in VBuffer a, Action visitor) /// Applies the to each corresponding pair of elements /// where the item is emplicitly defined in the vector. By explicitly defined, /// we mean that for a given index i, both vectors have an entry in - /// corresponding to that index. + /// corresponding to that index. /// /// The first vector /// The second vector @@ -314,9 +314,11 @@ public static void ForEachEitherDefined(in VBuffer a, in VBuffer b, Act /// public static void Clear(ref VBuffer dst) { - if (dst.Count == 0) + int dstValuesCount = dst.GetValues().Length; + if (dstValuesCount == 0) return; - Array.Clear(dst.Values, 0, dst.Count); + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dstValuesCount); + mutation.Values.Clear(); } // REVIEW: Look into removing slot in this and other manipulators, so that we @@ -344,15 +346,18 @@ public static void Apply(ref VBuffer dst, SlotValueManipulator manip) { Contracts.CheckValue(manip, nameof(manip)); + int dstValuesCount = dst.GetValues().Length; + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dstValuesCount); if (dst.IsDense) { - for (int i = 0; i < dst.Length; i++) - manip(i, ref dst.Values[i]); + for (int i = 0; i < mutation.Values.Length; i++) + manip(i, ref mutation.Values[i]); } else { - for (int i = 0; i < dst.Count; i++) - manip(dst.Indices[i], ref dst.Values[i]); + var dstIndices = dst.GetIndices(); + for (int i = 0; i < mutation.Values.Length; i++) + manip(dstIndices[i], ref mutation.Values[i]); } } @@ -376,17 +381,19 @@ public static void ApplyAt(ref VBuffer dst, int slot, SlotValueManipulator Contracts.CheckValue(manip, nameof(manip)); Contracts.CheckValueOrNull(pred); + int dstValuesCount = dst.GetValues().Length; + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dstValuesCount); if (dst.IsDense) { // The vector is dense, so we can just do a direct access. - manip(slot, ref dst.Values[slot]); + manip(slot, ref mutation.Values[slot]); return; } int idx = 0; - if (dst.Count > 0 && Utils.TryFindIndexSorted(dst.Indices, 0, dst.Count, slot, out idx)) + if (dstValuesCount > 0 && Utils.TryFindIndexSorted(mutation.Indices, 0, dstValuesCount, slot, out idx)) { // Vector is sparse, but the item exists so we can access it. - manip(slot, ref dst.Values[idx]); + manip(slot, ref mutation.Values[idx]); return; } // The vector is sparse and there is no corresponding item, yet. @@ -397,26 +404,24 @@ public static void ApplyAt(ref VBuffer dst, int slot, SlotValueManipulator if (pred(ref value)) return; // We have to insert this value, somehow. - int[] indices = dst.Indices; - T[] values = dst.Values; + // There is a modest special case where there is exactly one free slot // we are modifying in the sparse vector, in which case the vector becomes // dense. Then there is no need to do anything with indices. - bool needIndices = dst.Count + 1 < dst.Length; - if (needIndices) - Utils.EnsureSize(ref indices, dst.Count + 1, dst.Length - 1); - Utils.EnsureSize(ref values, dst.Count + 1, dst.Length); - if (idx != dst.Count) + bool needIndices = dstValuesCount + 1 < dst.Length; + mutation = VBufferMutationContext.Create(ref dst, dst.Length, dstValuesCount + 1); + if (idx != dstValuesCount) { // We have to do some sort of shift copy. + int sliceLength = dstValuesCount - idx; if (needIndices) - Array.Copy(indices, idx, indices, idx + 1, dst.Count - idx); - Array.Copy(values, idx, values, idx + 1, dst.Count - idx); + mutation.Indices.Slice(idx, sliceLength).CopyTo(mutation.Indices.Slice(idx + 1)); + mutation.Values.Slice(idx, sliceLength).CopyTo(mutation.Values.Slice(idx + 1)); } if (needIndices) - indices[idx] = slot; - values[idx] = value; - dst = new VBuffer(dst.Length, dst.Count + 1, values, indices); + mutation.Indices[idx] = slot; + mutation.Values[idx] = value; + mutation.Complete(ref dst); } /// @@ -426,37 +431,42 @@ public static void Densify(ref VBuffer dst) { if (dst.IsDense) return; - var indices = dst.Indices; - var values = dst.Values; - if (Utils.Size(values) >= dst.Length) + + var indices = dst.GetIndices(); + var values = dst.GetValues(); + var mutation = VBufferMutationContext.Create( + ref dst, + dst.Length, + out bool createdNewValues, out bool _); + + if (!createdNewValues) { // Densify in place. - for (int i = dst.Count; --i >= 0; ) + for (int i = values.Length; --i >= 0; ) { Contracts.Assert(i <= indices[i]); - values[indices[i]] = values[i]; + mutation.Values[indices[i]] = values[i]; } - if (dst.Count == 0) - Array.Clear(values, 0, dst.Length); + if (values.Length == 0) + mutation.Values.Clear(); else { int min = 0; - for (int ii = 0; ii < dst.Count; ++ii) + for (int ii = 0; ii < values.Length; ++ii) { - Array.Clear(values, min, indices[ii] - min); + mutation.Values.Slice(min, indices[ii] - min).Clear(); min = indices[ii] + 1; } - Array.Clear(values, min, dst.Length - min); + mutation.Values.Slice(min, dst.Length - min).Clear(); } } else { - T[] newValues = new T[dst.Length]; - for (int i = 0; i < dst.Count; ++i) - newValues[indices[i]] = values[i]; - values = newValues; + // createdNewValues is true, keepOldOnResize is false, so mutation.Values is already cleared + for (int i = 0; i < values.Length; ++i) + mutation.Values[indices[i]] = values[i]; } - dst = new VBuffer(dst.Length, values, indices); + mutation.Complete(ref dst); } /// @@ -466,7 +476,9 @@ public static void Densify(ref VBuffer dst) public static void DensifyFirst(ref VBuffer dst, int denseCount) { Contracts.Check(0 <= denseCount && denseCount <= dst.Length); - if (dst.IsDense || denseCount == 0 || (dst.Count >= denseCount && dst.Indices[denseCount - 1] == denseCount - 1)) + var dstValues = dst.GetValues(); + var dstIndices = dst.GetIndices(); + if (dst.IsDense || denseCount == 0 || (dstValues.Length >= denseCount && dstIndices[denseCount - 1] == denseCount - 1)) return; if (denseCount == dst.Length) { @@ -474,37 +486,36 @@ public static void DensifyFirst(ref VBuffer dst, int denseCount) return; } - // Densify the first BiasCount entries. - int[] indices = dst.Indices; - T[] values = dst.Values; - if (indices == null) + // Densify the first denseCount entries. + if (dstIndices.IsEmpty) { - Contracts.Assert(dst.Count == 0); - indices = Utils.GetIdentityPermutation(denseCount); - Utils.EnsureSize(ref values, denseCount, dst.Length, keepOld: false); - Array.Clear(values, 0, denseCount); - dst = new VBuffer(dst.Length, denseCount, values, indices); + // no previous values + var newIndicesMutation = VBufferMutationContext.Create(ref dst, dst.Length, denseCount); + Utils.FillIdentity(newIndicesMutation.Indices, denseCount); + newIndicesMutation.Values.Clear(); + newIndicesMutation.Complete(ref dst); return; } - int lim = Utils.FindIndexSorted(indices, 0, dst.Count, denseCount); + int lim = Utils.FindIndexSorted(dstIndices, 0, dstValues.Length, denseCount); Contracts.Assert(lim < denseCount); - int newLen = dst.Count + denseCount - lim; + int newLen = dstValues.Length + denseCount - lim; if (newLen == dst.Length) { Densify(ref dst); return; } - Utils.EnsureSize(ref values, newLen, dst.Length); - Utils.EnsureSize(ref indices, newLen, dst.Length); - Array.Copy(values, lim, values, denseCount, dst.Count - lim); - Array.Copy(indices, lim, indices, denseCount, dst.Count - lim); + + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, newLen, keepOldOnResize: true); + int sliceLength = dstValues.Length - lim; + mutation.Values.Slice(lim, sliceLength).CopyTo(mutation.Values.Slice(denseCount)); + mutation.Indices.Slice(lim, sliceLength).CopyTo(mutation.Indices.Slice(denseCount)); int i = lim - 1; for (int ii = denseCount; --ii >= 0; ) { - values[ii] = i >= 0 && indices[i] == ii ? values[i--] : default(T); - indices[ii] = ii; + mutation.Values[ii] = i >= 0 && dstIndices[i] == ii ? dstValues[i--] : default(T); + mutation.Indices[ii] = ii; } - dst = new VBuffer(dst.Length, newLen, values, indices); + mutation.Complete(ref dst); } /// @@ -522,9 +533,10 @@ public static void CreateMaybeSparseCopy(in VBuffer src, ref VBuffer ds int sparseCount = 0; var sparseCountThreshold = (int)(src.Length * sparsityThreshold); + var srcValues = src.GetValues(); for (int i = 0; i < src.Length; i++) { - if (!isDefaultPredicate(in src.Values[i])) + if (!isDefaultPredicate(in srcValues[i])) sparseCount++; if (sparseCount > sparseCountThreshold) @@ -534,23 +546,17 @@ public static void CreateMaybeSparseCopy(in VBuffer src, ref VBuffer ds } } - var indices = dst.Indices; - var values = dst.Values; - + var mutation = VBufferMutationContext.Create(ref dst, src.Length, sparseCount); if (sparseCount > 0) { - if (Utils.Size(values) < sparseCount) - values = new T[sparseCount]; - if (Utils.Size(indices) < sparseCount) - indices = new int[sparseCount]; int j = 0; for (int i = 0; i < src.Length; i++) { - if (!isDefaultPredicate(in src.Values[i])) + if (!isDefaultPredicate(in srcValues[i])) { Contracts.Assert(j < sparseCount); - indices[j] = i; - values[j] = src.Values[i]; + mutation.Indices[j] = i; + mutation.Values[j] = srcValues[i]; j++; } } @@ -558,7 +564,7 @@ public static void CreateMaybeSparseCopy(in VBuffer src, ref VBuffer ds Contracts.Assert(j == sparseCount); } - dst = new VBuffer(src.Length, sparseCount, values, indices); + mutation.Complete(ref dst); } /// @@ -667,10 +673,10 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // of the "outer" parameter. There are nine, top level cases. Each case is // considered in this order. - // 1. src.Count == 0. + // 1. srcValues.Length == 0. // 2. src.Dense. // 3. dst.Dense. - // 4. dst.Count == 0. + // 4. dstValues.Length == 0. // Beyond this point the cases can assume both src/dst are sparse non-empty vectors. // We then calculate the size of the resulting output array, then use that to fall @@ -688,20 +694,24 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // Case 5 does not require special handling, because it falls through to other cases // that do the special handling for them. - if (src.Count == 0) + var srcValues = src.GetValues(); + var dstValues = dst.GetValues(); + var dstIndices = dst.GetIndices(); + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dst.Count); + if (srcValues.Length == 0) { - // Major case 1, with src.Count == 0. + // Major case 1, with srcValues.Length == 0. if (!outer) return; if (dst.IsDense) { for (int i = 0; i < dst.Length; i++) - manip(i, default(TSrc), ref dst.Values[i]); + manip(i, default(TSrc), ref mutation.Values[i]); } else { - for (int i = 0; i < dst.Count; i++) - manip(dst.Indices[i], default(TSrc), ref dst.Values[i]); + for (int i = 0; i < dstValues.Length; i++) + manip(dstIndices[i], default(TSrc), ref mutation.Values[i]); } return; } @@ -712,33 +722,34 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< if (!dst.IsDense) Densify(ref dst); // Both are now dense. Both cases of outer are covered. - for (int i = 0; i < src.Length; i++) - manip(i, src.Values[i], ref dst.Values[i]); + for (int i = 0; i < srcValues.Length; i++) + manip(i, srcValues[i], ref mutation.Values[i]); return; } + var srcIndices = src.GetIndices(); if (dst.IsDense) { - // Major case 3, with dst.Dense. Note that !a.Dense. + // Major case 3, with dst.Dense. Note that !src.Dense. if (outer) { int sI = 0; - int sIndex = src.Indices[sI]; + int sIndex = srcIndices[sI]; for (int i = 0; i < dst.Length; ++i) { if (i == sIndex) { - manip(i, src.Values[sI], ref dst.Values[i]); - sIndex = ++sI == src.Count ? src.Length : src.Indices[sI]; + manip(i, srcValues[sI], ref mutation.Values[i]); + sIndex = ++sI == srcValues.Length ? src.Length : srcIndices[sI]; } else - manip(i, default(TSrc), ref dst.Values[i]); + manip(i, default(TSrc), ref mutation.Values[i]); } } else { for (int i = 0; i < src.Count; i++) - manip(src.Indices[i], src.Values[i], ref dst.Values[src.Indices[i]]); + manip(srcIndices[i], srcValues[i], ref mutation.Values[srcIndices[i]]); } return; } @@ -747,14 +758,14 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< { // Major case 4, with dst empty. Note that !src.Dense. // Neither is dense, and dst is empty. Both cases of outer are covered. - var values = dst.Values; - var indices = dst.Indices; - Utils.EnsureSize(ref values, src.Count, src.Length); - Array.Clear(values, 0, src.Count); - Utils.EnsureSize(ref indices, src.Count, src.Length); + mutation = VBufferMutationContext.Create(ref dst, + src.Length, + src.Count, + maxValuesCapacity: src.Length); + mutation.Values.Clear(); for (int i = 0; i < src.Count; i++) - manip(indices[i] = src.Indices[i], src.Values[i], ref values[i]); - dst = new VBuffer(src.Length, src.Count, values, indices); + manip(mutation.Indices[i] = srcIndices[i], srcValues[i], ref mutation.Values[i]); + mutation.Complete(ref dst); return; } @@ -764,15 +775,15 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // Try to find each src index in dst indices, counting how many more we'll add. for (int sI = 0; sI < src.Count; sI++) { - int sIndex = src.Indices[sI]; - while (dI < dst.Count && dst.Indices[dI] < sIndex) + int sIndex = srcIndices[sI]; + while (dI < dst.Count && dstIndices[dI] < sIndex) dI++; if (dI == dst.Count) { newCount += src.Count - sI; break; } - if (dst.Indices[dI] == sIndex) + if (dstIndices[dI] == sIndex) dI++; else newCount++; @@ -805,14 +816,16 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // proved to be inefficient so we go to the little bit of extra work // to handle it here. - var indices = dst.Indices; - var values = dst.Values; - Utils.EnsureSize(ref indices, newCount, dst.Length, keepOld: false); - Utils.EnsureSize(ref values, newCount, dst.Length, keepOld: false); + mutation = VBufferMutationContext.Create(ref dst, + src.Length, + newCount, + maxValuesCapacity: dst.Length); + var indices = mutation.Indices; + var values = mutation.Values; int sI = src.Count - 1; dI = dst.Count - 1; - int sIndex = src.Indices[sI]; - int dIndex = dst.Indices[dI]; + int sIndex = srcIndices[sI]; + int dIndex = dstIndices[dI]; // Go from the end, so that even if we're writing over dst's vectors in // place, we do not corrupt the data as we are reorganizing it. @@ -821,17 +834,17 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< if (sIndex < dIndex) { indices[i] = dIndex; - values[i] = dst.Values[dI]; + values[i] = dstValues[dI]; if (outer) manip(dIndex, default(TSrc), ref values[i]); - dIndex = --dI >= 0 ? dst.Indices[dI] : -1; + dIndex = --dI >= 0 ? dstIndices[dI] : -1; } else if (sIndex > dIndex) { indices[i] = sIndex; values[i] = default(TDst); - manip(sIndex, src.Values[sI], ref values[i]); - sIndex = --sI >= 0 ? src.Indices[sI] : -1; + manip(sIndex, srcValues[sI], ref values[i]); + sIndex = --sI >= 0 ? srcIndices[sI] : -1; } else { @@ -839,13 +852,13 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< Contracts.Assert(sIndex >= 0); Contracts.Assert(sIndex == dIndex); indices[i] = dIndex; - values[i] = dst.Values[dI]; - manip(sIndex, src.Values[sI], ref values[i]); - sIndex = --sI >= 0 ? src.Indices[sI] : -1; - dIndex = --dI >= 0 ? dst.Indices[dI] : -1; + values[i] = dstValues[dI]; + manip(sIndex, srcValues[sI], ref values[i]); + sIndex = --sI >= 0 ? srcIndices[sI] : -1; + dIndex = --dI >= 0 ? dstIndices[dI] : -1; } } - dst = new VBuffer(dst.Length, newCount, values, indices); + mutation.Complete(ref dst); return; } @@ -857,8 +870,8 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< Contracts.Assert(src.Count == dst.Count); for (int i = 0; i < src.Count; i++) { - Contracts.Assert(src.Indices[i] == dst.Indices[i]); - manip(src.Indices[i], src.Values[i], ref dst.Values[i]); + Contracts.Assert(srcIndices[i] == dstIndices[i]); + manip(srcIndices[i], srcValues[i], ref mutation.Values[i]); } return; } @@ -868,27 +881,27 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< if (outer) { int sI = 0; - int sIndex = src.Indices[sI]; + int sIndex = srcIndices[sI]; for (int i = 0; i < dst.Count; ++i) { - if (dst.Indices[i] == sIndex) + if (dstIndices[i] == sIndex) { - manip(sIndex, src.Values[sI], ref dst.Values[i]); - sIndex = ++sI == src.Count ? src.Length : src.Indices[sI]; + manip(sIndex, srcValues[sI], ref mutation.Values[i]); + sIndex = ++sI == src.Count ? src.Length : srcIndices[sI]; } else - manip(dst.Indices[i], default(TSrc), ref dst.Values[i]); + manip(dstIndices[i], default(TSrc), ref mutation.Values[i]); } } else { for (int sI = 0; sI < src.Count; sI++) { - int sIndex = src.Indices[sI]; - while (dst.Indices[dI] < sIndex) + int sIndex = srcIndices[sI]; + while (dstIndices[dI] < sIndex) dI++; - Contracts.Assert(dst.Indices[dI] == sIndex); - manip(sIndex, src.Values[sI], ref dst.Values[dI++]); + Contracts.Assert(dstIndices[dI] == sIndex); + manip(sIndex, srcValues[sI], ref mutation.Values[dI++]); } } return; @@ -900,23 +913,27 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // First do a "quasi" densification of dst, by making the indices // of dst correspond to those in src. + mutation = VBufferMutationContext.Create(ref dst, newCount, dst.Count); int sI = 0; for (dI = 0; dI < dst.Count; ++dI) { - int bIndex = dst.Indices[dI]; - while (src.Indices[sI] < bIndex) + int bIndex = dstIndices[dI]; + while (srcIndices[sI] < bIndex) sI++; - Contracts.Assert(src.Indices[sI] == bIndex); - dst.Indices[dI] = sI++; + Contracts.Assert(srcIndices[sI] == bIndex); + mutation.Indices[dI] = sI++; } - dst = new VBuffer(newCount, dst.Count, dst.Values, dst.Indices); + mutation.Complete(ref dst); Densify(ref dst); - int[] indices = dst.Indices; - Utils.EnsureSize(ref indices, src.Count, src.Length, keepOld: false); - Array.Copy(src.Indices, indices, newCount); - dst = new VBuffer(src.Length, newCount, dst.Values, indices); + + mutation = VBufferMutationContext.Create(ref dst, + src.Length, + newCount, + maxValuesCapacity: src.Length); + srcIndices.CopyTo(mutation.Indices); for (sI = 0; sI < src.Count; sI++) - manip(src.Indices[sI], src.Values[sI], ref dst.Values[sI]); + manip(srcIndices[sI], srcValues[sI], ref mutation.Values[sI]); + mutation.Complete(ref dst); return; } @@ -933,64 +950,69 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf { Contracts.Check(src.Length == dst.Length, "Vectors must have the same dimensionality."); Contracts.CheckValue(manip, nameof(manip)); - Contracts.Assert(Utils.Size(src.Values) >= src.Count); - Contracts.Assert(Utils.Size(dst.Values) >= dst.Count); + int length = src.Length; + var srcValues = src.GetValues(); + var dstValues = dst.GetValues(); + if (dst.Count == 0) { if (src.Count == 0) - res = new VBuffer(length, 0, res.Values, res.Indices); + { + VBufferMutationContext.Create(ref res, length, 0) + .Complete(ref res); + } else if (src.IsDense) { Contracts.Assert(src.Count == src.Length); - TDst[] resValues = Utils.Size(res.Values) >= length ? res.Values : new TDst[length]; + var mutation = VBufferMutationContext.Create(ref res, length); for (int i = 0; i < length; i++) - manip(i, src.Values[i], default(TDst), ref resValues[i]); - res = new VBuffer(length, resValues, res.Indices); + manip(i, srcValues[i], default(TDst), ref mutation.Values[i]); + mutation.Complete(ref res); } else { // src is non-empty sparse. int count = src.Count; Contracts.Assert(0 < count && count < length); - int[] resIndices = Utils.Size(res.Indices) >= count ? res.Indices : new int[count]; - TDst[] resValues = Utils.Size(res.Values) >= count ? res.Values : new TDst[count]; - Array.Copy(src.Indices, resIndices, count); + var mutation = VBufferMutationContext.Create(ref res, length, count); + var srcIndices = src.GetIndices(); + srcIndices.CopyTo(mutation.Indices); for (int ii = 0; ii < count; ii++) { - int i = src.Indices[ii]; - resIndices[ii] = i; - manip(i, src.Values[ii], default(TDst), ref resValues[ii]); + int i = srcIndices[ii]; + mutation.Indices[ii] = i; + manip(i, srcValues[ii], default(TDst), ref mutation.Values[ii]); } - res = new VBuffer(length, count, resValues, resIndices); + mutation.Complete(ref res); } } else if (dst.IsDense) { - TDst[] resValues = Utils.Size(res.Values) >= length ? res.Values : new TDst[length]; + var mutation = VBufferMutationContext.Create(ref res, length); if (src.Count == 0) { if (outer) { // Apply manip to all slots, as all slots of dst are defined. for (int j = 0; j < length; j++) - manip(j, default(TSrc), dst.Values[j], ref resValues[j]); + manip(j, default(TSrc), dstValues[j], ref mutation.Values[j]); } else { // Copy only. No slot of src is defined. for (int j = 0; j < length; j++) - resValues[j] = dst.Values[j]; + mutation.Values[j] = dstValues[j]; } - res = new VBuffer(length, resValues, res.Indices); + mutation.Complete(ref res); } else if (src.IsDense) { Contracts.Assert(src.Count == src.Length); for (int i = 0; i < length; i++) - manip(i, src.Values[i], dst.Values[i], ref resValues[i]); - res = new VBuffer(length, resValues, res.Indices); + manip(i, srcValues[i], dstValues[i], ref mutation.Values[i]); + mutation.Complete(ref res); } else { @@ -999,7 +1021,8 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf Contracts.Assert(0 < count && count < length); int ii = 0; - int i = src.Indices[ii]; + var srcIndices = src.GetIndices(); + int i = srcIndices[ii]; if (outer) { // All slots of dst are defined. Always apply manip. @@ -1007,11 +1030,11 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf { if (j == i) { - manip(j, src.Values[ii], dst.Values[j], ref resValues[j]); - i = ++ii == count ? length : src.Indices[ii]; + manip(j, srcValues[ii], dstValues[j], ref mutation.Values[j]); + i = ++ii == count ? length : srcIndices[ii]; } else - manip(j, default(TSrc), dst.Values[j], ref resValues[j]); + manip(j, default(TSrc), dstValues[j], ref mutation.Values[j]); } } else @@ -1021,61 +1044,61 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf { if (j == i) { - manip(j, src.Values[ii], dst.Values[j], ref resValues[j]); - i = ++ii == count ? length : src.Indices[ii]; + manip(j, srcValues[ii], dstValues[j], ref mutation.Values[j]); + i = ++ii == count ? length : srcIndices[ii]; } else - resValues[j] = dst.Values[j]; + mutation.Values[j] = dstValues[j]; } } - res = new VBuffer(length, resValues, res.Indices); + mutation.Complete(ref res); } } else { // dst is non-empty sparse int dstCount = dst.Count; + var dstIndices = dst.GetIndices(); Contracts.Assert(dstCount > 0); if (src.Count == 0) { - int[] resIndices = Utils.Size(res.Indices) >= dstCount ? res.Indices : new int[dstCount]; - TDst[] resValues = Utils.Size(res.Values) >= dstCount ? res.Values : new TDst[dstCount]; + var mutation = VBufferMutationContext.Create(ref res, length, dstCount); if (outer) { for (int jj = 0; jj < dstCount; jj++) { - int j = dst.Indices[jj]; - resIndices[jj] = j; - manip(j, default(TSrc), dst.Values[jj], ref resValues[jj]); + int j = dstIndices[jj]; + mutation.Indices[jj] = j; + manip(j, default(TSrc), dstValues[jj], ref mutation.Values[jj]); } } else { for (int jj = 0; jj < dstCount; jj++) { - resIndices[jj] = dst.Indices[jj]; - resValues[jj] = dst.Values[jj]; + mutation.Indices[jj] = dstIndices[jj]; + mutation.Values[jj] = dstValues[jj]; } } - res = new VBuffer(length, dstCount, resValues, resIndices); + mutation.Complete(ref res); } else if (src.IsDense) { // res will be dense. - TDst[] resValues = Utils.Size(res.Values) >= length ? res.Values : new TDst[length]; + var mutation = VBufferMutationContext.Create(ref res, length); int jj = 0; - int j = dst.Indices[jj]; + int j = dstIndices[jj]; for (int i = 0; i < length; i++) { if (i == j) { - manip(i, src.Values[i], dst.Values[jj], ref resValues[i]); - j = ++jj == dstCount ? length : dst.Indices[jj]; + manip(i, srcValues[i], dstValues[jj], ref mutation.Values[i]); + j = ++jj == dstCount ? length : dstIndices[jj]; } else - manip(i, src.Values[i], default(TDst), ref resValues[i]); + manip(i, srcValues[i], default(TDst), ref mutation.Values[i]); } - res = new VBuffer(length, resValues, res.Indices); + mutation.Complete(ref res); } else { @@ -1084,17 +1107,18 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf // Find the count of result, which is the size of the union of the indices set of src and dst. int resCount = dstCount; + var srcIndices = src.GetIndices(); for (int ii = 0, jj = 0; ii < src.Count; ii++) { - int i = src.Indices[ii]; - while (jj < dst.Count && dst.Indices[jj] < i) + int i = srcIndices[ii]; + while (jj < dst.Count && dstIndices[jj] < i) jj++; if (jj == dst.Count) { resCount += src.Count - ii; break; } - if (dst.Indices[jj] == i) + if (dstIndices[jj] == i) jj++; else resCount++; @@ -1115,13 +1139,12 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf } else { - int[] resIndices = Utils.Size(res.Indices) >= resCount ? res.Indices : new int[resCount]; - TDst[] resValues = Utils.Size(res.Values) >= resCount ? res.Values : new TDst[resCount]; + var mutation = VBufferMutationContext.Create(ref res, length, resCount); int ii = 0; - int i = src.Indices[ii]; + int i = srcIndices[ii]; int jj = 0; - int j = dst.Indices[jj]; + int j = dstIndices[jj]; for (int kk = 0; kk < resCount; kk++) { @@ -1129,35 +1152,35 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf if (i == j) { // Slot (i == j) both defined in src and dst. Apply manip. - resIndices[kk] = i; - manip(i, src.Values[ii], dst.Values[jj], ref resValues[kk]); - i = ++ii == src.Count ? length : src.Indices[ii]; - j = ++jj == dstCount ? length : dst.Indices[jj]; + mutation.Indices[kk] = i; + manip(i, srcValues[ii], dstValues[jj], ref mutation.Values[kk]); + i = ++ii == src.Count ? length : srcIndices[ii]; + j = ++jj == dstCount ? length : dstIndices[jj]; } else if (i < j) { // Slot i defined only in src, but not in dst. Apply manip. - resIndices[kk] = i; - manip(i, src.Values[ii], default(TDst), ref resValues[kk]); - i = ++ii == src.Count ? length : src.Indices[ii]; + mutation.Indices[kk] = i; + manip(i, srcValues[ii], default(TDst), ref mutation.Values[kk]); + i = ++ii == src.Count ? length : srcIndices[ii]; } else { // Slot j defined only in dst, but not in src. Apply manip if outer. // Otherwise just copy. - resIndices[kk] = j; + mutation.Indices[kk] = j; // REVIEW: Should we move checking of outer outside the loop? if (outer) - manip(j, default(TSrc), dst.Values[jj], ref resValues[kk]); + manip(j, default(TSrc), dstValues[jj], ref mutation.Values[kk]); else - resValues[kk] = dst.Values[jj]; - j = ++jj == dstCount ? length : dst.Indices[jj]; + mutation.Values[kk] = dstValues[jj]; + j = ++jj == dstCount ? length : dstIndices[jj]; } } Contracts.Assert(ii == src.Count && jj == dstCount); Contracts.Assert(i == length && j == length); - res = new VBuffer(length, resCount, resValues, resIndices); + mutation.Complete(ref res); } } } @@ -1181,25 +1204,30 @@ public static void ApplyIntoEitherDefined(in VBuffer src, ref // equal lengths, but I don't care here. if (src.Count == 0) { - dst = new VBuffer(src.Length, src.Count, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, src.Length, 0) + .Complete(ref dst); return; } - int[] indices = dst.Indices; - TDst[] values = dst.Values; - Utils.EnsureSize(ref values, src.Count, src.Length, keepOld: false); + var mutation = VBufferMutationContext.Create(ref dst, + src.Length, + src.Count, + maxValuesCapacity: src.Length); + Span values = mutation.Values; + var srcValues = src.GetValues(); if (src.IsDense) { for (int i = 0; i < src.Length; ++i) - values[i] = func(i, src.Values[i]); + values[i] = func(i, srcValues[i]); } else { - Utils.EnsureSize(ref indices, src.Count, src.Length, keepOld: false); - Array.Copy(src.Indices, indices, src.Count); - for (int i = 0; i < src.Count; ++i) - values[i] = func(src.Indices[i], src.Values[i]); + Span indices = mutation.Indices; + var srcIndices = src.GetIndices(); + srcIndices.CopyTo(indices); + for (int i = 0; i < srcValues.Length; ++i) + values[i] = func(srcIndices[i], srcValues[i]); } - dst = new VBuffer(src.Length, src.Count, values, indices); + mutation.Complete(ref dst); } /// @@ -1226,54 +1254,62 @@ public static void ApplyInto(in VBuffer a, in VBuffer // 5. b's indices are a subset of a's. // 6. Neither a nor b's indices are a subset of the other. - if (a.Count == 0 && b.Count == 0) + var aValues = a.GetValues(); + var bValues = b.GetValues(); + if (aValues.Length == 0 && bValues.Length == 0) { // Case 1. Output will be empty. - dst = new VBuffer(a.Length, 0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, a.Length, 0) + .Complete(ref dst); return; } int aI = 0; int bI = 0; - TDst[] values = dst.Values; + ReadOnlySpan aIndices; + ReadOnlySpan bIndices; + VBufferMutationContext mutation; if (a.IsDense || b.IsDense) { // Case 2. One of the two inputs is dense. The output will be dense. - Utils.EnsureSize(ref values, a.Length, a.Length, keepOld: false); - + mutation = VBufferMutationContext.Create(ref dst, a.Length); if (!a.IsDense) { // a is sparse, b is dense + aIndices = a.GetIndices(); for (int i = 0; i < b.Length; i++) { - TSrc1 aVal = (aI < a.Count && i == a.Indices[aI]) ? a.Values[aI++] : default(TSrc1); - values[i] = func(i, aVal, b.Values[i]); + TSrc1 aVal = (aI < a.Count && i == aIndices[aI]) ? aValues[aI++] : default(TSrc1); + mutation.Values[i] = func(i, aVal, bValues[i]); } } else if (!b.IsDense) { // b is sparse, a is dense + bIndices = b.GetIndices(); for (int i = 0; i < a.Length; i++) { - TSrc2 bVal = (bI < b.Count && i == b.Indices[bI]) ? b.Values[bI++] : default(TSrc2); - values[i] = func(i, a.Values[i], bVal); + TSrc2 bVal = (bI < b.Count && i == bIndices[bI]) ? bValues[bI++] : default(TSrc2); + mutation.Values[i] = func(i, aValues[i], bVal); } } else { // both dense for (int i = 0; i < a.Length; i++) - values[i] = func(i, a.Values[i], b.Values[i]); + mutation.Values[i] = func(i, aValues[i], bValues[i]); } - dst = new VBuffer(a.Length, values, dst.Indices); + mutation.Complete(ref dst); return; } // a, b both sparse. int newCount = 0; + aIndices = a.GetIndices(); + bIndices = b.GetIndices(); while (aI < a.Count && bI < b.Count) { - int aCompB = a.Indices[aI] - b.Indices[bI]; + int aCompB = aIndices[aI] - bIndices[bI]; if (aCompB <= 0) // a is no larger than b. aI++; if (aCompB >= 0) // b is no larger than a. @@ -1289,50 +1325,49 @@ public static void ApplyInto(in VBuffer a, in VBuffer // REVIEW: Worth optimizing the newCount == a.Length case? // Probably not... - int[] indices = dst.Indices; - Utils.EnsureSize(ref indices, newCount, a.Length, keepOld: false); - Utils.EnsureSize(ref values, newCount, a.Length, keepOld: false); + mutation = VBufferMutationContext.Create(ref dst, a.Length, newCount); + Span indices = mutation.Indices; if (newCount == b.Count) { if (newCount == a.Count) { // Case 3, a and b actually have the same indices! - Array.Copy(a.Indices, indices, a.Count); + aIndices.CopyTo(indices); for (aI = 0; aI < a.Count; aI++) { - Contracts.Assert(a.Indices[aI] == b.Indices[aI]); - values[aI] = func(a.Indices[aI], a.Values[aI], b.Values[aI]); + Contracts.Assert(aIndices[aI] == bIndices[aI]); + mutation.Values[aI] = func(aIndices[aI], aValues[aI], bValues[aI]); } } else { // Case 4, a's indices are a subset of b's. - Array.Copy(b.Indices, indices, b.Count); + bIndices.CopyTo(indices); aI = 0; for (bI = 0; aI < a.Count && bI < b.Count; bI++) { - Contracts.Assert(a.Indices[aI] >= b.Indices[bI]); - TSrc1 aVal = a.Indices[aI] == b.Indices[bI] ? a.Values[aI++] : default(TSrc1); - values[bI] = func(b.Indices[bI], aVal, b.Values[bI]); + Contracts.Assert(aIndices[aI] >= bIndices[bI]); + TSrc1 aVal = aIndices[aI] == bIndices[bI] ? aValues[aI++] : default(TSrc1); + mutation.Values[bI] = func(bIndices[bI], aVal, bValues[bI]); } for (; bI < b.Count; bI++) - values[bI] = func(b.Indices[bI], default(TSrc1), b.Values[bI]); + mutation.Values[bI] = func(bIndices[bI], default(TSrc1), bValues[bI]); } } else if (newCount == a.Count) { // Case 5, b's indices are a subset of a's. - Array.Copy(a.Indices, indices, a.Count); + aIndices.CopyTo(indices); bI = 0; for (aI = 0; bI < b.Count && aI < a.Count; aI++) { - Contracts.Assert(b.Indices[bI] >= a.Indices[aI]); - TSrc2 bVal = a.Indices[aI] == b.Indices[bI] ? b.Values[bI++] : default(TSrc2); - values[aI] = func(a.Indices[aI], a.Values[aI], bVal); + Contracts.Assert(bIndices[bI] >= aIndices[aI]); + TSrc2 bVal = aIndices[aI] == bIndices[bI] ? bValues[bI++] : default(TSrc2); + mutation.Values[aI] = func(aIndices[aI], aValues[aI], bVal); } for (; aI < a.Count; aI++) - values[aI] = func(a.Indices[aI], a.Values[aI], default(TSrc2)); + mutation.Values[aI] = func(aIndices[aI], aValues[aI], default(TSrc2)); } else { @@ -1342,47 +1377,47 @@ public static void ApplyInto(in VBuffer a, in VBuffer TSrc2 bVal = default(TSrc2); while (aI < a.Count && bI < b.Count) { - int aCompB = a.Indices[aI] - b.Indices[bI]; + int aCompB = aIndices[aI] - bIndices[bI]; int index = 0; if (aCompB < 0) { - index = a.Indices[aI]; - aVal = a.Values[aI++]; + index = aIndices[aI]; + aVal = aValues[aI++]; bVal = default(TSrc2); } else if (aCompB > 0) { - index = b.Indices[bI]; + index = bIndices[bI]; aVal = default(TSrc1); - bVal = b.Values[bI++]; + bVal = bValues[bI++]; } else { - index = a.Indices[aI]; - Contracts.Assert(index == b.Indices[bI]); - aVal = a.Values[aI++]; - bVal = b.Values[bI++]; + index = aIndices[aI]; + Contracts.Assert(index == bIndices[bI]); + aVal = aValues[aI++]; + bVal = bValues[bI++]; } - values[newI] = func(index, aVal, bVal); + mutation.Values[newI] = func(index, aVal, bVal); indices[newI++] = index; } for (; aI < a.Count; aI++) { - int index = a.Indices[aI]; - values[newI] = func(index, a.Values[aI], default(TSrc2)); + int index = aIndices[aI]; + mutation.Values[newI] = func(index, aValues[aI], default(TSrc2)); indices[newI++] = index; } for (; bI < b.Count; bI++) { - int index = b.Indices[bI]; - values[newI] = func(index, default(TSrc1), b.Values[bI]); + int index = bIndices[bI]; + mutation.Values[newI] = func(index, default(TSrc1), bValues[bI]); indices[newI++] = index; } } - dst = new VBuffer(a.Length, newCount, values, indices); + mutation.Complete(ref dst); } /// @@ -1391,14 +1426,16 @@ public static void ApplyInto(in VBuffer a, in VBuffer public static void Copy(List src, ref VBuffer dst, int length) { Contracts.CheckParam(0 <= length && length <= Utils.Size(src), nameof(length)); - var values = dst.Values; + var mutation = VBufferMutationContext.Create(ref dst, length); if (length > 0) { - if (Utils.Size(values) < length) - values = new T[length]; - src.CopyTo(values); + // List.CopyTo should have an overload for Span - https://github.com/dotnet/corefx/issues/33006 + for (int i = 0; i < length; i++) + { + mutation.Values[i] = src[i]; + } } - dst = new VBuffer(length, values, dst.Indices); + mutation.Complete(ref dst); } } } diff --git a/src/Microsoft.ML.Data/Data/BufferBuilder.cs b/src/Microsoft.ML.Data/Data/BufferBuilder.cs index 5020ae0418..a92cf7b4f8 100644 --- a/src/Microsoft.ML.Data/Data/BufferBuilder.cs +++ b/src/Microsoft.ML.Data/Data/BufferBuilder.cs @@ -382,45 +382,6 @@ public bool TryGetFeature(int index, out T v) return false; } - private void GetResult(ref T[] values, ref int[] indices, out int count, out int length) - { - if (_count == 0) - { - count = 0; - length = _length; - return; - } - - if (!_dense) - { - if (!_sorted) - SortAndSumDups(); - if (!_dense && _count >= _length / 2) - MakeDense(); - } - - if (_dense) - { - if (Utils.Size(values) < _length) - values = new T[_length]; - Array.Copy(_values, values, _length); - count = _length; - length = _length; - } - else - { - Contracts.Assert(_count < _length); - if (Utils.Size(values) < _count) - values = new T[_count]; - if (Utils.Size(indices) < _count) - indices = new int[_count]; - Array.Copy(_values, values, _count); - Array.Copy(_indices, indices, _count); - count = _count; - length = _length; - } - } - public void Reset(int length, bool dense) { ResetImpl(length, dense); @@ -435,7 +396,7 @@ public void AddFeatures(int index, in VBuffer buffer) if (count == 0) return; - var values = buffer.Values; + var values = buffer.GetValues(); if (buffer.IsDense) { Contracts.Assert(count == buffer.Length); @@ -454,7 +415,7 @@ public void AddFeatures(int index, in VBuffer buffer) else { // REVIEW: Validate indices! - var indices = buffer.Indices; + var indices = buffer.GetIndices(); if (_dense) { for (int i = 0; i < count; i++) @@ -471,24 +432,35 @@ public void AddFeatures(int index, in VBuffer buffer) public void GetResult(ref VBuffer buffer) { - var values = buffer.Values; - var indices = buffer.Indices; - if (IsEmpty) { - buffer = new VBuffer(_length, 0, values, indices); + VBufferMutationContext.Create(ref buffer, _length, 0) + .Complete(ref buffer); return; } - int count; - int length; - GetResult(ref values, ref indices, out count, out length); - Contracts.Assert(0 <= count && count <= length); + if (!_dense) + { + if (!_sorted) + SortAndSumDups(); + if (!_dense && _count >= _length / 2) + MakeDense(); + } - if (count == length) - buffer = new VBuffer(length, values, indices); + if (_dense) + { + var mutation = VBufferMutationContext.Create(ref buffer, _length); + _values.AsSpan(0, _length).CopyTo(mutation.Values); + mutation.Complete(ref buffer); + } else - buffer = new VBuffer(length, count, values, indices); + { + Contracts.Assert(_count < _length); + var mutation = VBufferMutationContext.Create(ref buffer, _length, _count); + _values.AsSpan(0, _count).CopyTo(mutation.Values); + _indices.AsSpan(0, _count).CopyTo(mutation.Indices); + mutation.Complete(ref buffer); + } } } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs index 792ec4f9f3..4472a0206c 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs @@ -1109,29 +1109,29 @@ public override void Get(ref VBuffer value) int length = FixedLength ? _size : _lengths[_vectorIndex]; int count = _counts[_vectorIndex]; - int[] indices = value.Indices; - T[] values = value.Values; if (count < 0) { // dense + var mutation = VBufferMutationContext.Create(ref value, length); if (length > 0) { - Utils.EnsureSize(ref values, length); - Array.Copy(_values, _valuesOffset, values, 0, length); + _values.AsSpan(_valuesOffset, length) + .CopyTo(mutation.Values); } - value = new VBuffer(length, values, indices); + mutation.Complete(ref value); } else { // sparse + var mutation = VBufferMutationContext.Create(ref value, length, count); if (count > 0) { - Utils.EnsureSize(ref values, count); - Utils.EnsureSize(ref indices, count); - Array.Copy(_values, _valuesOffset, values, 0, count); - Array.Copy(_indices, _indicesOffset, indices, 0, count); + _values.AsSpan(_valuesOffset, count) + .CopyTo(mutation.Values); + _indices.AsSpan(_indicesOffset, count) + .CopyTo(mutation.Indices); } - value = new VBuffer(length, count, values, indices); + mutation.Complete(ref value); } } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs index 130f158d4e..138ccc7a14 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs @@ -401,28 +401,23 @@ public void Get(ref VBuffer dst) { AssertValid(); - var values = dst.Values; - var indices = dst.Indices; - if (_count == 0) { - dst = new VBuffer(_size, 0, values, indices); + VBufferMutationContext.Create(ref dst, _size, 0) + .Complete(ref dst); return; } - if (Utils.Size(values) < _count) - values = new TItem[_count]; - Array.Copy(_values, values, _count); + var mutation = VBufferMutationContext.Create(ref dst, _size, _count); + _values.AsSpan(0, _count).CopyTo(mutation.Values); if (_count == _size) { - dst = new VBuffer(_size, values, indices); + mutation.Complete(ref dst); return; } - if (Utils.Size(indices) < _count) - indices = new int[_count]; - Array.Copy(_indices, indices, _count); - dst = new VBuffer(_size, _count, values, indices); + _indices.AsSpan(0, _count).CopyTo(mutation.Indices); + mutation.Complete(ref dst); } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs index 8161c8e653..e5dcf5d582 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs @@ -166,20 +166,22 @@ public VecValueWriter(IRowCursor cursor, VectorType type, int source, char sep) public override void WriteData(Action appendItem, out int length) { _getSrc(ref _src); + var srcValues = _src.GetValues(); if (_src.IsDense) { - for (int i = 0; i < _src.Length; i++) + for (int i = 0; i < srcValues.Length; i++) { - Conv(in _src.Values[i], ref Sb); + Conv(in srcValues[i], ref Sb); appendItem(Sb, i); } } else { - for (int i = 0; i < _src.Count; i++) + var srcIndices = _src.GetIndices(); + for (int i = 0; i < srcValues.Length; i++) { - Conv(in _src.Values[i], ref Sb); - appendItem(Sb, _src.Indices[i]); + Conv(in srcValues[i], ref Sb); + appendItem(Sb, srcIndices[i]); } } length = _src.Length; @@ -190,13 +192,15 @@ public override void WriteHeader(Action appendItem, out int length = _slotCount; if (_slotNames.Count == 0) return; + var slotNamesValues = _slotNames.GetValues(); + var slotNamesIndices = _slotNames.GetIndices(); for (int i = 0; i < _slotNames.Count; i++) { - var name = _slotNames.Values[i]; + var name = slotNamesValues[i]; if (name.IsEmpty) continue; MapText(in name, ref Sb); - int index = _slotNames.IsDense ? i : _slotNames.Indices[i]; + int index = _slotNames.IsDense ? i : slotNamesIndices[i]; appendItem(Sb, index); } } diff --git a/src/Microsoft.ML.Data/DataView/CompositeSchema.cs b/src/Microsoft.ML.Data/DataView/CompositeSchema.cs index 2a526f152a..d61289b55c 100644 --- a/src/Microsoft.ML.Data/DataView/CompositeSchema.cs +++ b/src/Microsoft.ML.Data/DataView/CompositeSchema.cs @@ -67,7 +67,7 @@ public void CheckColumnInRange(int col) public void GetColumnSource(int col, out int srcIndex, out int srcCol) { CheckColumnInRange(col); - if (!_cumulativeColCounts.TryFindIndexSorted(0, _cumulativeColCounts.Length, col, out srcIndex)) + if (!Utils.TryFindIndexSorted(_cumulativeColCounts, 0, _cumulativeColCounts.Length, col, out srcIndex)) srcIndex--; Contracts.Assert(0 <= srcIndex && srcIndex < _cumulativeColCounts.Length); srcCol = col - _cumulativeColCounts[srcIndex]; diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs index 1438cce601..0a0fa3255e 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs @@ -22,7 +22,7 @@ public static Float NormSquared(in VBuffer a) { if (a.Count == 0) return 0; - return CpuMathUtils.SumSq(a.Values.AsSpan(0, a.Count)); + return CpuMathUtils.SumSq(a.GetValues()); } /// @@ -50,7 +50,7 @@ public static Float L1Norm(in VBuffer a) { if (a.Count == 0) return 0; - return CpuMathUtils.SumAbs(a.Values.AsSpan(0, a.Count)); + return CpuMathUtils.SumAbs(a.GetValues()); } /// @@ -61,7 +61,7 @@ public static Float MaxNorm(in VBuffer a) { if (a.Count == 0) return 0; - return CpuMathUtils.MaxAbs(a.Values.AsSpan(0, a.Count)); + return CpuMathUtils.MaxAbs(a.GetValues()); } /// @@ -71,7 +71,7 @@ public static Float Sum(in VBuffer a) { if (a.Count == 0) return 0; - return CpuMathUtils.Sum(a.Values.AsSpan(0, a.Count)); + return CpuMathUtils.Sum(a.GetValues()); } /// @@ -83,10 +83,11 @@ public static void ScaleBy(ref VBuffer dst, Float c) { if (c == 1 || dst.Count == 0) return; + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dst.Count); if (c != 0) - CpuMathUtils.Scale(c, dst.Values.AsSpan(0, dst.Count)); + CpuMathUtils.Scale(c, mutation.Values); else // Maintain density of dst. - Array.Clear(dst.Values, 0, dst.Count); + mutation.Values.Clear(); // REVIEW: Any benefit in sparsifying? } @@ -102,30 +103,31 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float if (count == 0) { // dst is a zero vector. - dst = new VBuffer(length, 0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, length, 0) + .Complete(ref dst); return; } - var dstValues = Utils.Size(dst.Values) >= count ? dst.Values : new Float[count]; if (src.IsDense) { // Maintain the density of src to dst in order to avoid slow down of L-BFGS. + var mutation = VBufferMutationContext.Create(ref dst, length); Contracts.Assert(length == count); if (c == 0) - Array.Clear(dstValues, 0, length); + mutation.Values.Clear(); else - CpuMathUtils.Scale(c, src.Values, dstValues, length); - dst = new VBuffer(length, dstValues, dst.Indices); + CpuMathUtils.Scale(c, src.GetValues(), mutation.Values, length); + mutation.Complete(ref dst); } else { - var dstIndices = Utils.Size(dst.Indices) >= count ? dst.Indices : new int[count]; - Array.Copy(src.Indices, dstIndices, count); + var mutation = VBufferMutationContext.Create(ref dst, length, count); + src.GetIndices().CopyTo(mutation.Indices); if (c == 0) - Array.Clear(dstValues, 0, count); + mutation.Values.Clear(); else - CpuMathUtils.Scale(c, src.Values, dstValues, count); - dst = new VBuffer(length, count, dstValues, dstIndices); + CpuMathUtils.Scale(c, src.GetValues(), mutation.Values, count); + mutation.Complete(ref dst); } } @@ -141,10 +143,11 @@ public static void Add(in VBuffer src, ref VBuffer dst) if (dst.IsDense) { + var mutation = VBufferMutationContext.Create(ref dst, dst.Length); if (src.IsDense) - CpuMathUtils.Add(src.Values, dst.Values, src.Length); + CpuMathUtils.Add(src.GetValues(), mutation.Values, src.Length); else - CpuMathUtils.Add(src.Values, src.Indices, dst.Values, src.Count); + CpuMathUtils.Add(src.GetValues(), src.GetIndices(), mutation.Values, src.Count); return; } // REVIEW: Should we use SSE for any of these possibilities? @@ -167,10 +170,11 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds if (dst.IsDense) { + var mutation = VBufferMutationContext.Create(ref dst, dst.Length); if (src.IsDense) - CpuMathUtils.AddScale(c, src.Values, dst.Values, src.Length); + CpuMathUtils.AddScale(c, src.GetValues(), mutation.Values, src.Length); else - CpuMathUtils.AddScale(c, src.Values, src.Indices, dst.Values, src.Count); + CpuMathUtils.AddScale(c, src.GetValues(), src.GetIndices(), mutation.Values, src.Count); return; } // REVIEW: Should we use SSE for any of these possibilities? @@ -196,9 +200,9 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds Contracts.Assert(length > 0); if (dst.IsDense && src.IsDense) { - Float[] resValues = Utils.Size(res.Values) >= length ? res.Values : new Float[length]; - CpuMathUtils.AddScaleCopy(c, src.Values, dst.Values, resValues, length); - res = new VBuffer(length, resValues, res.Indices); + var mutation = VBufferMutationContext.Create(ref res, length); + CpuMathUtils.AddScaleCopy(c, src.GetValues(), dst.GetValues(), mutation.Values, length); + mutation.Complete(ref res); return; } @@ -235,13 +239,17 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer if (src.Count == 0 || c == 0) return; + VBufferMutationContext mutation; + Span values; if (dst.IsDense) { // This is by far the most common case. + mutation = VBufferMutationContext.Create(ref dst, dst.Length); + values = mutation.Values.Slice(offset); if (src.IsDense) - CpuMathUtils.AddScale(c, src.Values, dst.Values.AsSpan(offset), src.Count); + CpuMathUtils.AddScale(c, src.GetValues(), values, src.Count); else - CpuMathUtils.AddScale(c, src.Values, src.Indices, dst.Values.AsSpan(offset), src.Count); + CpuMathUtils.AddScale(c, src.GetValues(), src.GetIndices(), values, src.Count); return; } // REVIEW: Perhaps implementing an ApplyInto with an offset would be more @@ -250,8 +258,9 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer // dst is sparse. I expect this will see limited practical use, since accumulants // are often better off going into a dense vector in all applications of interest to us. // Correspondingly, this implementation will be functional, but not optimized. - int dMin = dst.Count == 0 ? 0 : Utils.FindIndexSorted(dst.Indices, 0, dst.Count, offset); - int dLim = dst.Count == 0 ? 0 : Utils.FindIndexSorted(dst.Indices, dMin, dst.Count, offset + src.Length); + var dstIndices = dst.GetIndices(); + int dMin = dst.Count == 0 ? 0 : Utils.FindIndexSorted(dstIndices, 0, dst.Count, offset); + int dLim = dst.Count == 0 ? 0 : Utils.FindIndexSorted(dstIndices, dMin, dst.Count, offset + src.Length); Contracts.Assert(dMin - dLim <= src.Length); // First get the number of extra values that we will need to accomodate. int gapCount; @@ -260,9 +269,10 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer else { gapCount = src.Count; + var srcIndices = src.GetIndices(); for (int iS = 0, iD = dMin; iS < src.Count && iD < dLim; ) { - var comp = src.Indices[iS] - dst.Indices[iD] + offset; + var comp = srcIndices[iS] - dstIndices[iD] + offset; if (comp < 0) // dst index is larger. iS++; else if (comp > 0) // src index is larger. @@ -276,23 +286,29 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer } } // Extend dst so that it has room for this additional stuff. Shift things over as well. - var indices = dst.Indices; - var values = dst.Values; + mutation = VBufferMutationContext.Create(ref dst, + dst.Length, + dst.Count + gapCount, + keepOldOnResize: true); + var indices = mutation.Indices; + values = mutation.Values; if (gapCount > 0) { - Utils.EnsureSize(ref indices, dst.Count + gapCount, dst.Length); - Utils.EnsureSize(ref values, dst.Count + gapCount, dst.Length); // Shift things over, unless there's nothing to shift over, or no new elements are being introduced anyway. if (dst.Count != dLim) { Contracts.Assert(dLim < dst.Count); - Array.Copy(indices, dLim, indices, dLim + gapCount, dst.Count - dLim); - Array.Copy(values, dLim, values, dLim + gapCount, dst.Count - dLim); + indices.Slice(dLim, dst.Count - dLim) + .CopyTo(indices.Slice(dLim + gapCount)); + values.Slice(dLim, dst.Count - dLim) + .CopyTo(values.Slice(dLim + gapCount)); } } // Now, fill in the stuff in this "gap." Both of these implementations work // backwards from the end, since they can potentially be working in place if // the EnsureSize calls did not actually result in a new array. + var srcValues = src.GetValues(); + var dstValues = dst.GetValues(); if (src.IsDense) { // dst is sparse, src is dense. @@ -303,10 +319,10 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer Contracts.Assert(iDD == iS + dMin); // iDD and iD are the points in where we are writing and reading from. Contracts.Assert(iDD >= iD); - if (iD >= 0 && offset + iS == dst.Indices[iD]) // Collision. - values[iDD] = dst.Values[iD--] + c * src.Values[iS]; + if (iD >= 0 && offset + iS == dstIndices[iD]) // Collision. + values[iDD] = dstValues[iD--] + c * srcValues[iS]; else // Miss. - values[iDD] = c * src.Values[iS]; + values[iDD] = c * srcValues[iS]; indices[iDD] = offset + iS; } } @@ -315,8 +331,9 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer // Both dst and src are sparse. int iD = dLim - 1; int iS = src.Count - 1; - int sIndex = iS < 0 ? -1 : src.Indices[iS]; - int dIndex = iD < 0 ? -1 : dst.Indices[iD] - offset; + var srcIndices = src.GetIndices(); + int sIndex = iS < 0 ? -1 : srcIndices[iS]; + int dIndex = iD < 0 ? -1 : dstIndices[iD] - offset; for (int iDD = dLim + gapCount; --iDD >= dMin; ) { @@ -324,26 +341,26 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer int comp = sIndex - dIndex; if (comp == 0) // Collision on both. { - indices[iDD] = dst.Indices[iD]; - values[iDD] = dst.Values[iD--] + c * src.Values[iS--]; - sIndex = iS < 0 ? -1 : src.Indices[iS]; - dIndex = iD < 0 ? -1 : dst.Indices[iD] - offset; + indices[iDD] = dstIndices[iD]; + values[iDD] = dstValues[iD--] + c * srcValues[iS--]; + sIndex = iS < 0 ? -1 : srcIndices[iS]; + dIndex = iD < 0 ? -1 : dstIndices[iD] - offset; } else if (comp < 0) // Collision on dst. { - indices[iDD] = dst.Indices[iD]; - values[iDD] = dst.Values[iD--]; - dIndex = iD < 0 ? -1 : dst.Indices[iD] - offset; + indices[iDD] = dstIndices[iD]; + values[iDD] = dstValues[iD--]; + dIndex = iD < 0 ? -1 : dstIndices[iD] - offset; } else // Collision on src. { indices[iDD] = sIndex + offset; - values[iDD] = c * src.Values[iS--]; - sIndex = iS < 0 ? -1 : src.Indices[iS]; + values[iDD] = c * srcValues[iS--]; + sIndex = iS < 0 ? -1 : srcIndices[iS]; } } } - dst = new VBuffer(dst.Length, dst.Count + gapCount, values, indices); + mutation.Complete(ref dst); } /// @@ -365,15 +382,20 @@ public static void ScaleInto(in VBuffer src, Float c, ref VBuffer { if (src.Length > 0 && src.IsDense) { - var values = dst.Values; // Due to sparsity preservation from src, dst must be dense, in the same way. - Utils.EnsureSize(ref values, src.Length, src.Length, keepOld: false); - if (values == dst.Values) // We need to clear it. - Array.Clear(values, 0, src.Length); - dst = new VBuffer(src.Length, values, dst.Indices); + var mutation = VBufferMutationContext.Create(ref dst, + src.Length, + out bool createdNewValues, + out bool _); + if (!createdNewValues) // We need to clear it + mutation.Values.Clear(); + mutation.Complete(ref dst); } else - dst = new VBuffer(src.Length, 0, dst.Values, dst.Indices); + { + VBufferMutationContext.Create(ref dst, src.Length, 0) + .Complete(ref dst); + } } else if (c == -1) VBufferUtils.ApplyIntoEitherDefined(in src, ref dst, (i, v) => -v); @@ -388,30 +410,32 @@ public static int ArgMax(in VBuffer src) if (src.Count == 0) return 0; - int ind = MathUtils.ArgMax(src.Values, src.Count); + var srcValues = src.GetValues(); + int ind = MathUtils.ArgMax(srcValues); // ind < 0 iff all explicit values are NaN. Contracts.Assert(-1 <= ind && ind < src.Count); if (src.IsDense) return ind; + var srcIndices = src.GetIndices(); if (ind >= 0) { - Contracts.Assert(src.Indices[ind] >= ind); - if (src.Values[ind] > 0) - return src.Indices[ind]; + Contracts.Assert(srcIndices[ind] >= ind); + if (srcValues[ind] > 0) + return srcIndices[ind]; // This covers the case where there is an explicit zero, and zero is the max, // and the first explicit zero is before any implicit entries. - if (src.Values[ind] == 0 && src.Indices[ind] == ind) + if (srcValues[ind] == 0 && srcIndices[ind] == ind) return ind; } // All explicit values are non-positive or NaN, so return the first index not in src.Indices. ind = 0; - while (ind < src.Count && src.Indices[ind] == ind) + while (ind < src.Count && srcIndices[ind] == ind) ind++; Contracts.Assert(ind <= src.Count); - Contracts.Assert(ind == src.Count || ind < src.Indices[ind]); + Contracts.Assert(ind == src.Count || ind < srcIndices[ind]); return ind; } @@ -422,30 +446,32 @@ public static int ArgMin(in VBuffer src) if (src.Count == 0) return 0; - int ind = MathUtils.ArgMin(src.Values, src.Count); + var srcValues = src.GetValues(); + int ind = MathUtils.ArgMin(srcValues); // ind < 0 iff all explicit values are NaN. Contracts.Assert(-1 <= ind && ind < src.Count); if (src.IsDense) return ind; + var srcIndices = src.GetIndices(); if (ind >= 0) { - Contracts.Assert(src.Indices[ind] >= ind); - if (src.Values[ind] < 0) - return src.Indices[ind]; + Contracts.Assert(srcIndices[ind] >= ind); + if (srcValues[ind] < 0) + return srcIndices[ind]; // This covers the case where there is an explicit zero, and zero is the min, // and the first explicit zero is before any implicit entries. - if (src.Values[ind] == 0 && src.Indices[ind] == ind) + if (srcValues[ind] == 0 && srcIndices[ind] == ind) return ind; } - // All explicit values are non-negative or NaN, so return the first index not in src.Indices. + // All explicit values are non-negative or NaN, so return the first index not in srcIndices. ind = 0; - while (ind < src.Count && src.Indices[ind] == ind) + while (ind < src.Count && srcIndices[ind] == ind) ind++; Contracts.Assert(ind <= src.Count); - Contracts.Assert(ind == src.Count || ind < src.Indices[ind]); + Contracts.Assert(ind == src.Count || ind < srcIndices[ind]); return ind; } } diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs index 79af700bcc..84fe2e135f 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs @@ -33,8 +33,8 @@ public static Float DotProduct(Float[] a, in VBuffer b) if (b.Count == 0) return 0; if (b.IsDense) - return CpuMathUtils.DotProductDense(a, b.Values, b.Length); - return CpuMathUtils.DotProductSparse(a, b.Values, b.Indices, b.Count); + return CpuMathUtils.DotProductDense(a, b.GetValues(), b.Length); + return CpuMathUtils.DotProductSparse(a, b.GetValues(), b.GetIndices(), b.Count); } public static Float DotProduct(in VBuffer a, in VBuffer b) @@ -47,13 +47,13 @@ public static Float DotProduct(in VBuffer a, in VBuffer b) if (a.IsDense) { if (b.IsDense) - return CpuMathUtils.DotProductDense(a.Values, b.Values, a.Length); - return CpuMathUtils.DotProductSparse(a.Values, b.Values, b.Indices, b.Count); + return CpuMathUtils.DotProductDense(a.GetValues(), b.GetValues(), a.Length); + return CpuMathUtils.DotProductSparse(a.GetValues(), b.GetValues(), b.GetIndices(), b.Count); } if (b.IsDense) - return CpuMathUtils.DotProductSparse(b.Values, a.Values, a.Indices, a.Count); - return DotProductSparse(a.Values, a.Indices, 0, a.Count, b.Values, b.Indices, 0, b.Count, 0); + return CpuMathUtils.DotProductSparse(b.GetValues(), a.GetValues(), a.GetIndices(), a.Count); + return DotProductSparse(a.GetValues(), a.GetIndices(), 0, a.Count, b.GetValues(), b.GetIndices(), 0, b.Count); } /// @@ -75,10 +75,12 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, var bottomHeap = new Heap>((left, right) => right.Value > left.Value, bottom + 1); bool isDense = a.IsDense; + var aValues = a.GetValues(); + var aIndices = a.GetIndices(); for (int i = 0; i < a.Count; i++) { - int idx = isDense ? i : a.Indices[i]; - var value = a.Values[i]; + int idx = isDense ? i : aIndices[i]; + var value = aValues[i]; if (value < 0 && bottom > 0) { @@ -108,22 +110,21 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, } var newCount = topHeap.Count + bottomHeap.Count; - var indices = a.Indices; - Utils.EnsureSize(ref indices, newCount); - Contracts.Assert(Utils.Size(a.Values) >= newCount); + var mutation = VBufferMutationContext.Create(ref a, a.Length, newCount); + var indices = mutation.Indices; int count = 0; while (topHeap.Count > 0) { var pair = topHeap.Pop(); indices[count] = pair.Key; - a.Values[count++] = pair.Value; + mutation.Values[count++] = pair.Value; } while (bottomHeap.Count > 0) { var pair = bottomHeap.Pop(); indices[count] = pair.Key; - a.Values[count++] = pair.Value; + mutation.Values[count++] = pair.Value; } Contracts.Assert(count == newCount); @@ -132,7 +133,7 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, { for (var i = 0; i < newCount; i++) { - var value = a.Values[i]; + var value = mutation.Values[i]; var absValue = Math.Abs(value); if (absValue > absMax) absMax = absValue; @@ -142,13 +143,13 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, { var ratio = 1 / absMax; for (var i = 0; i < newCount; i++) - a.Values[i] = ratio * a.Values[i]; + mutation.Values[i] = ratio * mutation.Values[i]; } } if (indices != null) Array.Sort(indices, a.Values, 0, newCount); - a = new VBuffer(a.Length, newCount, a.Values, indices); + mutation.Complete(ref a); } /// @@ -159,27 +160,24 @@ public static void MulElementWise(in VBuffer a, ref VBuffer dst) Contracts.Check(a.Length == dst.Length, "Vectors must have the same dimensionality."); if (a.IsDense && dst.IsDense) - CpuMathUtils.MulElementWise(a.Values, dst.Values, dst.Values, a.Length); + { + var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dst.Count); + CpuMathUtils.MulElementWise(a.GetValues(), dst.GetValues(), mutation.Values, a.Length); + } else VBufferUtils.ApplyWithEitherDefined(in a, ref dst, (int ind, Float v1, ref Float v2) => { v2 *= v1; }); } - private static Float L2DistSquaredSparse(Float[] valuesA, int[] indicesA, int countA, Float[] valuesB, int[] indicesB, int countB, int length) + private static Float L2DistSquaredSparse(ReadOnlySpan valuesA, ReadOnlySpan indicesA, ReadOnlySpan valuesB, ReadOnlySpan indicesB) { - Contracts.AssertValueOrNull(valuesA); - Contracts.AssertValueOrNull(indicesA); - Contracts.AssertValueOrNull(valuesB); - Contracts.AssertValueOrNull(indicesB); - Contracts.Assert(0 <= countA && countA <= Utils.Size(indicesA)); - Contracts.Assert(0 <= countB && countB <= Utils.Size(indicesB)); - Contracts.Assert(countA <= Utils.Size(valuesA)); - Contracts.Assert(countB <= Utils.Size(valuesB)); + Contracts.Assert(valuesA.Length == indicesA.Length); + Contracts.Assert(valuesB.Length == indicesB.Length); Float res = 0; int ia = 0; int ib = 0; - while (ia < countA && ib < countB) + while (ia < indicesA.Length && ib < indicesB.Length) { int diff = indicesA[ia] - indicesB[ib]; Float d; @@ -202,14 +200,14 @@ private static Float L2DistSquaredSparse(Float[] valuesA, int[] indicesA, int co res += d * d; } - while (ia < countA) + while (ia < indicesA.Length) { var d = valuesA[ia]; res += d * d; ia++; } - while (ib < countB) + while (ib < indicesB.Length) { var d = valuesB[ib]; res += d * d; @@ -219,30 +217,21 @@ private static Float L2DistSquaredSparse(Float[] valuesA, int[] indicesA, int co return res; } - private static Float L2DistSquaredHalfSparse(Float[] valuesA, int lengthA, Float[] valuesB, int[] indicesB, int countB) + private static Float L2DistSquaredHalfSparse(ReadOnlySpan valuesA, ReadOnlySpan valuesB, ReadOnlySpan indicesB) { - Contracts.AssertValueOrNull(valuesA); - Contracts.AssertValueOrNull(valuesB); - Contracts.AssertValueOrNull(indicesB); - Contracts.Assert(0 <= lengthA && lengthA <= Utils.Size(valuesA)); - Contracts.Assert(0 <= countB && countB <= Utils.Size(indicesB)); - Contracts.Assert(countB <= Utils.Size(valuesB)); - - var normA = CpuMathUtils.SumSq(valuesA.AsSpan(0, lengthA)); - if (countB == 0) + var normA = CpuMathUtils.SumSq(valuesA); + if (valuesB.Length == 0) return normA; - var normB = CpuMathUtils.SumSq(valuesB.AsSpan(0, countB)); - var dotP = CpuMathUtils.DotProductSparse(valuesA, valuesB, indicesB, countB); + var normB = CpuMathUtils.SumSq(valuesB); + var dotP = CpuMathUtils.DotProductSparse(valuesA, valuesB, indicesB, valuesB.Length); var res = normA + normB - 2 * dotP; return res < 0 ? 0 : res; } - private static Float L2DiffSquaredDense(Float[] valuesA, Float[] valuesB, int length) + private static Float L2DiffSquaredDense(ReadOnlySpan valuesA, ReadOnlySpan valuesB, int length) { - Contracts.AssertValueOrNull(valuesA); - Contracts.AssertValueOrNull(valuesB); - Contracts.Assert(0 <= length && length <= Utils.Size(valuesA)); - Contracts.Assert(0 <= length && length <= Utils.Size(valuesB)); + Contracts.Assert(0 <= length && length <= valuesA.Length); + Contracts.Assert(0 <= length && length <= valuesB.Length); if (length == 0) return 0; @@ -267,27 +256,31 @@ public static Float DotProductWithOffset(in VBuffer a, int offset, in VBu if (a.IsDense) { if (b.IsDense) - return CpuMathUtils.DotProductDense(a.Values.AsSpan(offset), b.Values, b.Length); - return CpuMathUtils.DotProductSparse(a.Values.AsSpan(offset), b.Values, b.Indices, b.Count); + return CpuMathUtils.DotProductDense(a.GetValues().Slice(offset), b.GetValues(), b.Length); + return CpuMathUtils.DotProductSparse(a.GetValues().Slice(offset), b.GetValues(), b.GetIndices(), b.Count); } else { Float result = 0; - int aMin = Utils.FindIndexSorted(a.Indices, 0, a.Count, offset); - int aLim = Utils.FindIndexSorted(a.Indices, 0, a.Count, offset + b.Length); + var aValues = a.GetValues(); + var aIndices = a.GetIndices(); + var bValues = b.GetValues(); + var bIndices = b.GetIndices(); + int aMin = Utils.FindIndexSorted(aIndices, 0, a.Count, offset); + int aLim = Utils.FindIndexSorted(aIndices, 0, a.Count, offset + b.Length); if (b.IsDense) { for (int iA = aMin; iA < aLim; ++iA) - result += a.Values[iA] * b.Values[a.Indices[iA] - offset]; + result += aValues[iA] * bValues[aIndices[iA] - offset]; return result; } for (int iA = aMin, iB = 0; iA < aLim && iB < b.Count; ) { - int aIndex = a.Indices[iA]; - int bIndex = b.Indices[iB]; + int aIndex = aIndices[iA]; + int bIndex = bIndices[iB]; int comp = (aIndex - offset) - bIndex; if (comp == 0) - result += a.Values[iA++] * b.Values[iB++]; + result += aValues[iA++] * bValues[iB++]; else if (comp < 0) iA++; else @@ -314,16 +307,16 @@ public static Float DotProductWithOffset(Float[] a, int offset, in VBuffer aValues, ReadOnlySpan aIndices, int ia, int iaLim, ReadOnlySpan bValues, ReadOnlySpan bIndices, int ib, int ibLim) { - Contracts.AssertValue(aValues); - Contracts.AssertValue(aIndices); - Contracts.AssertValue(bValues); - Contracts.AssertValue(bIndices); + Contracts.AssertNonEmpty(aValues); + Contracts.AssertNonEmpty(aIndices); + Contracts.AssertNonEmpty(bValues); + Contracts.AssertNonEmpty(bIndices); Contracts.Assert(0 <= ia && ia < iaLim && iaLim <= aIndices.Length); Contracts.Assert(0 <= ib && ib < ibLim && ibLim <= bIndices.Length); @@ -334,7 +327,7 @@ private static Float DotProductSparse(Float[] aValues, int[] aIndices, int ia, i for (; ; ) { - int d = aIndices[ia] - offset - bIndices[ib]; + int d = aIndices[ia] - bIndices[ib]; if (d == 0) { res += aValues[ia] * bValues[ib]; @@ -347,7 +340,7 @@ private static Float DotProductSparse(Float[] aValues, int[] aIndices, int ia, i { ia++; if (d < -thresh) - ia = Utils.FindIndexSorted(aIndices, ia, iaLim, bIndices[ib] + offset); + ia = Utils.FindIndexSorted(aIndices, ia, iaLim, bIndices[ib]); if (ia >= iaLim) break; } @@ -355,7 +348,7 @@ private static Float DotProductSparse(Float[] aValues, int[] aIndices, int ia, i { ib++; if (d > thresh) - ib = Utils.FindIndexSorted(bIndices, ib, ibLim, aIndices[ia] - offset); + ib = Utils.FindIndexSorted(bIndices, ib, ibLim, aIndices[ia]); if (ib >= ibLim) break; } @@ -401,12 +394,12 @@ public static Float L2DistSquared(in VBuffer a, in VBuffer b) if (a.IsDense) { if (b.IsDense) - return L2DiffSquaredDense(a.Values, b.Values, b.Length); - return L2DistSquaredHalfSparse(a.Values, a.Length, b.Values, b.Indices, b.Count); + return L2DiffSquaredDense(a.GetValues(), b.GetValues(), b.Length); + return L2DistSquaredHalfSparse(a.GetValues(), b.GetValues(), b.GetIndices()); } if (b.IsDense) - return L2DistSquaredHalfSparse(b.Values, b.Length, a.Values, a.Indices, a.Count); - return L2DistSquaredSparse(a.Values, a.Indices, a.Count, b.Values, b.Indices, b.Count, a.Length); + return L2DistSquaredHalfSparse(b.GetValues(), a.GetValues(), a.GetIndices()); + return L2DistSquaredSparse(a.GetValues(), a.GetIndices(), b.GetValues(), b.GetIndices()); } /// @@ -420,8 +413,8 @@ public static Float L2DistSquared(Float[] a, in VBuffer b) Contracts.CheckValue(a, nameof(a)); Contracts.Check(Utils.Size(a) == b.Length, "Vectors must have the same dimensionality."); if (b.IsDense) - return L2DiffSquaredDense(a, b.Values, b.Length); - return L2DistSquaredHalfSparse(a, a.Length, b.Values, b.Indices, b.Count); + return L2DiffSquaredDense(a, b.GetValues(), b.Length); + return L2DistSquaredHalfSparse(a.AsSpan(0, a.Length), b.GetValues(), b.GetIndices()); } /// @@ -451,12 +444,14 @@ public static void AddMult(in VBuffer src, Float[] dst, Float c) if (src.Count == 0 || c == 0) return; + var srcValues = src.GetValues(); if (src.IsDense) - CpuMathUtils.AddScale(c, src.Values, dst, src.Count); + CpuMathUtils.AddScale(c, srcValues, dst, src.Count); else { + var srcIndices = src.GetIndices(); for (int i = 0; i < src.Count; i++) - dst[src.Indices[i]] += c * src.Values[i]; + dst[srcIndices[i]] += c * srcValues[i]; } } @@ -477,15 +472,17 @@ public static void AddMultWithOffset(in VBuffer src, Float[] dst, int off if (src.Count == 0 || c == 0) return; + var srcValues = src.GetValues(); if (src.IsDense) { for (int i = 0; i < src.Length; i++) - dst[i + offset] += c * src.Values[i]; + dst[i + offset] += c * srcValues[i]; } else { + var srcIndices = src.GetIndices(); for (int i = 0; i < src.Count; i++) - dst[src.Indices[i] + offset] += c * src.Values[i]; + dst[srcIndices[i] + offset] += c * srcValues[i]; } } diff --git a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs index d644725618..ca28f78d9f 100644 --- a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs @@ -701,7 +701,8 @@ private ValueGetter> MakeVecTrivialGetter() // Delegates onto instance methods are more efficient than delegates onto static methods. private void VecTrivialGetter(ref VBuffer value) { - value = new VBuffer(1, 0, value.Values, value.Indices); + VBufferMutationContext.Create(ref value, 1, 0) + .Complete(ref value); } private Delegate MakeVecGetter(IRow input, int iinfo) diff --git a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs index 64b510a655..029dc15d20 100644 --- a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs +++ b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs @@ -104,11 +104,11 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) } int newLength = DstLength == 0 ? ComputeLength(src.Length) : DstLength; - var values = dst.Values; if (newLength == 0) { // All slots dropped. - dst = new VBuffer(1, 0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, 1, 0) + .Complete(ref dst); return; } @@ -116,12 +116,11 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) // End of the trivial cases // At this point, we need to drop some slots and keep some slots. + VBufferMutationContext mutation; + var srcValues = src.GetValues(); if (src.IsDense) { - Contracts.Assert(Utils.Size(values) == Utils.Size(src.Values) || src.Values != dst.Values); - - if (Utils.Size(values) < newLength) - values = new TDst[newLength]; + mutation = VBufferMutationContext.Create(ref dst, newLength); int iDst = 0; int iSrc = 0; @@ -131,33 +130,29 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) while (iSrc < lim) { Contracts.Assert(iDst <= iSrc); - values[iDst++] = src.Values[iSrc++]; + mutation.Values[iDst++] = srcValues[iSrc++]; } iSrc = SlotsMax[i] + 1; } while (iSrc < src.Length) { Contracts.Assert(iDst <= iSrc); - values[iDst++] = src.Values[iSrc++]; + mutation.Values[iDst++] = srcValues[iSrc++]; } Contracts.Assert(iDst == newLength); - dst = new VBuffer(newLength, values, dst.Indices); + mutation.Complete(ref dst); return; } // Sparse case. // Approximate new count is min(#indices, newLength). var newCount = Math.Min(src.Count, newLength); - var indices = dst.Indices; + var indices = dst.GetIndices(); + var srcIndices = src.GetIndices(); Contracts.Assert(newCount <= src.Length); - Contracts.Assert(Utils.Size(values) == Utils.Size(src.Values) || src.Values != dst.Values); - Contracts.Assert(Utils.Size(indices) == Utils.Size(src.Indices) || src.Indices != dst.Indices); - if (Utils.Size(indices) < newCount) - indices = new int[newCount]; - if (Utils.Size(values) < newCount) - values = new TDst[newCount]; + mutation = VBufferMutationContext.Create(ref dst, newLength, newCount); int iiDst = 0; int iiSrc = 0; @@ -170,12 +165,12 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) while (iiSrc < src.Count) { // Copy (with offset) the elements before the current range. - var index = src.Indices[iiSrc]; + var index = srcIndices[iiSrc]; if (index < min) { Contracts.Assert(iiDst <= iiSrc); - indices[iiDst] = index - iOffset; - values[iiDst++] = src.Values[iiSrc++]; + mutation.Indices[iiDst] = index - iOffset; + mutation.Values[iiDst++] = srcValues[iiSrc++]; continue; } if (index <= max) @@ -211,7 +206,10 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) Contracts.Assert(index <= max); } - dst = new VBuffer(newLength, iiDst, values, indices); + mutation.Complete(ref dst); + // now change the ValuesCount to iiDst to be correct + VBufferMutationContext.Create(ref dst, newLength, iiDst) + .Complete(ref dst); } } } diff --git a/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs b/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs index 05d809f8b4..50e965e1a3 100644 --- a/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs +++ b/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs @@ -762,7 +762,7 @@ public int GetLeaf(in VBuffer feat) { // REVIEW: This really should validate feat.Length! if (feat.IsDense) - return GetLeafCore(feat.Values); + return GetLeafCore(feat.GetValues()); return GetLeafCore(feat.GetIndices(), feat.GetValues()); } @@ -778,7 +778,7 @@ private int GetLeafFrom(in VBuffer feat, int root) } if (feat.IsDense) - return GetLeafCore(feat.Values, root: root); + return GetLeafCore(feat.GetValues(), root: root); return GetLeafCore(feat.GetIndices(), feat.GetValues(), root: root); } @@ -796,8 +796,9 @@ public int GetLeaf(in VBuffer feat, ref List path) path.Clear(); if (feat.IsDense) - return GetLeafCore(feat.Values, path); + return GetLeafCore(feat.GetValues(), path); return GetLeafCore(feat.GetIndices(), feat.GetValues(), path); + } private Float GetFeatureValue(Float x, int node) @@ -816,9 +817,8 @@ private Float GetFeatureValue(Float x, int node) } } - private int GetLeafCore(Float[] nonBinnedInstance, List path = null, int root = 0) + private int GetLeafCore(ReadOnlySpan nonBinnedInstance, List path = null, int root = 0) { - Contracts.AssertValue(nonBinnedInstance); Contracts.Assert(path == null || path.Count == 0); Contracts.Assert(root >= 0); @@ -907,6 +907,7 @@ private int GetLeafCore(ReadOnlySpan featIndices, ReadOnlySpan featV if (NumLeaves == 1) return 0; + int count = featIndices.Length; int node = root; while (node >= 0) @@ -921,13 +922,13 @@ private int GetLeafCore(ReadOnlySpan featIndices, ReadOnlySpan featV //REVIEW: Consider experimenting with bitmap instead of doing log(n) binary search. int newNode = LteChild[node]; - int end = featIndices.FindIndexSorted(0, count, CategoricalSplitFeatureRanges[node][1]); - for (int i = featIndices.FindIndexSorted(0, count, CategoricalSplitFeatureRanges[node][0]); + int end = Utils.FindIndexSorted(featIndices, 0, count, CategoricalSplitFeatureRanges[node][1]); + for (int i = Utils.FindIndexSorted(featIndices, 0, count, CategoricalSplitFeatureRanges[node][0]); i < count && i <= end; ++i) { int index = featIndices[i]; - if (CategoricalSplitFeatures[node].TryFindIndexSorted(0, CategoricalSplitFeatures[node].Length, index, out int ii)) + if (Utils.TryFindIndexSorted(CategoricalSplitFeatures[node], 0, CategoricalSplitFeatures[node].Length, index, out int ii)) { Float val = GetFeatureValue(featValues[i], node); if (val > 0.0f) @@ -945,7 +946,7 @@ private int GetLeafCore(ReadOnlySpan featIndices, ReadOnlySpan featV Float val = 0; int ifeat = SplitFeatures[node]; - int ii = featIndices.FindIndexSorted(0, count, ifeat); + int ii = Utils.FindIndexSorted(featIndices, 0, count, ifeat); if (ii < count && featIndices[ii] == ifeat) val = featValues[ii]; val = GetFeatureValue(val, node); diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs index 6562a1b6b6..853976885a 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs @@ -655,7 +655,7 @@ private static void FindBestCluster(in VBuffer point, int pointRowIndex, if (pointRowIndex != -1) // if the space was available for cur in initializationState. { // pointNorm is necessary for using triangle inequality. - float pointNorm = VectorUtils.NormSquared(point); + float pointNorm = VectorUtils.NormSquared(in point); // We have cached distance information for this point. bestCluster = initializationState.GetBestCluster(pointRowIndex); float bestWeight = initializationState.GetBestWeight(pointRowIndex); @@ -788,6 +788,8 @@ public static void Initialize(IHost host, int numThreads, IChannel ch, FeatureFl // The final chosen points, to be approximately clustered to determine starting // centroids. VBuffer[] clusters = new VBuffer[totalSamples]; + VBuffer[] readOnlyClusters = null; + // L2s, kept for distance trick. float[] clustersL2s = new float[totalSamples]; @@ -859,6 +861,9 @@ public static void Initialize(IHost host, int numThreads, IChannel ch, FeatureFl clusterCount++; } ch.Assert(clusterCount - clusterPrevCount <= numSamplesPerRound); + + KMeansUtils.UpdateReadOnlyCache(clusters, ref readOnlyClusters); + logicalExternalRounds++; pCh.Checkpoint(logicalExternalRounds, numRounds + 2); } @@ -1316,9 +1321,11 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe Initialize(ch, cursorFactory, totalTrainingInstances, numThreads, k, dimensionality, accelMemBudgetInMb, out state, out workState, out reducedState); float[] centroidL2s = new float[k]; + VBuffer[] readOnlyCentroids = new VBuffer[centroids.Length]; + KMeansUtils.UpdateReadOnlyCache(centroids, ref readOnlyCentroids); for (int i = 0; i < k; i++) - centroidL2s[i] = VectorUtils.NormSquared(centroids[i]); + centroidL2s[i] = VectorUtils.NormSquared(in readOnlyCentroids[i]); using (var pch = host.StartProgressChannel("KMeansTrain")) { @@ -1345,7 +1352,7 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe ops[i] = new Action(() => { using (var cursor = set[chunkId]) - ProcessChunk(cursor, state, workState[chunkId], k, centroids, centroidL2s); + ProcessChunk(cursor, state, workState[chunkId], k, readOnlyCentroids, centroidL2s); }); } @@ -1357,7 +1364,7 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe else { using (var cursor = cursorFactory.Create()) - ProcessChunk(cursor, state, reducedState, k, centroids, centroidL2s); + ProcessChunk(cursor, state, reducedState, k, readOnlyCentroids, centroidL2s); } WorkChunkState.Reduce(workState, reducedState); @@ -1394,11 +1401,13 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe } #endif reducedState.UpdateClusters(centroids, centroidL2s, state.Delta, ref state.DeltaMax); + KMeansUtils.UpdateReadOnlyCache(centroids, ref readOnlyCentroids); + isConverged = reducedState.AverageScoreDelta < convergenceThreshold; state.Iteration++; if (state.Iteration % 100 == 0) - KMeansUtils.VerifyModelConsistency(centroids); + KMeansUtils.VerifyModelConsistency(readOnlyCentroids); } } } @@ -1795,5 +1804,23 @@ public static void VerifyModelConsistency(VBuffer[] centroids) foreach (var centroid in centroids) Contracts.Check(centroid.Items().Select(x => x.Value).All(FloatUtils.IsFinite), "Model training failed: non-finite coordinates are generated"); } + + /// + /// Checks that all coordinates of all centroids are finite, and throws otherwise + /// + public static void VerifyModelConsistency(VBuffer[] centroids) + { + for (int i = 0; i < centroids.Length; i++) + Contracts.Check(centroids[i].GetValues().All(FloatUtils.IsFinite), "Model training failed: non-finite coordinates are generated"); + } + + public static void UpdateReadOnlyCache(VBuffer[] source, ref VBuffer[] destination) + { + Utils.EnsureSize(ref destination, source.Length); + for (int i = 0; i < source.Length; i++) + { + destination[i] = source[i]; + } + } } } diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs index 2fb4d29b59..c40f9bcdd1 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs @@ -162,7 +162,7 @@ private void Map(in VBuffer src, Float[] distances) { Host.Assert(Utils.Size(distances) >= _k); - Float instanceL2 = VectorUtils.NormSquared(src); + Float instanceL2 = VectorUtils.NormSquared(in src); for (int i = 0; i < _k; i++) { Float distance = Math.Max(0, diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/DifferentiableFunction.cs b/src/Microsoft.ML.StandardLearners/Optimizer/DifferentiableFunction.cs index ee928e5cda..3c8ba56724 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/DifferentiableFunction.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/DifferentiableFunction.cs @@ -247,7 +247,7 @@ public static Float Test(DifferentiableFunction f, in VBuffer x, bool qui /// /// /// - public static void TestAllCoords(DifferentiableFunction f, ref VBuffer x) + public static void TestAllCoords(DifferentiableFunction f, in VBuffer x) { // REVIEW: Delete this method? VBuffer grad = default(VBuffer); @@ -286,7 +286,7 @@ public static void TestAllCoords(DifferentiableFunction f, ref VBuffer x) /// Function to test /// Point at which to test /// List of coordinates to test - public static void TestCoords(DifferentiableFunction f, ref VBuffer x, IList coords) + public static void TestCoords(DifferentiableFunction f, in VBuffer x, IList coords) { // REVIEW: Delete this method? VBuffer grad = default(VBuffer); diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/OptimizationMonitor.cs b/src/Microsoft.ML.StandardLearners/Optimizer/OptimizationMonitor.cs index 7b231bb027..705c9f8477 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/OptimizationMonitor.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/OptimizationMonitor.cs @@ -85,7 +85,7 @@ private Float Check(Optimizer.OptimizerState state) { Console.Error.Write(_checkingMessage); Console.Error.Flush(); - var x = state.X; + VBuffer x = state.X; var lastDir = state.LastDir; Float checkResult = GradientTester.Test(state.Function, in x, ref lastDir, true, ref _newGrad, ref _newX); for (int i = 0; i < _checkingMessage.Length; i++) diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs b/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs index 4ec56d0eaa..914924d762 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/Optimizer.cs @@ -645,7 +645,7 @@ public void Minimize(DifferentiableFunction function, ref VBuffer initial double? improvement = null; double x; int end; - if (message != null && DoubleParser.TryParse(message.AsMemory().Span, out x, out end)) + if (message != null && DoubleParser.TryParse(message.AsSpan(), out x, out end)) improvement = x; pch.Checkpoint(state.Value, improvement, state.Iter); diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/SgdOptimizer.cs b/src/Microsoft.ML.StandardLearners/Optimizer/SgdOptimizer.cs index 67fcf1c18b..b814ee187e 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/SgdOptimizer.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/SgdOptimizer.cs @@ -349,7 +349,7 @@ public void ChangeDir() /// Function to minimize /// Initial point /// Approximate minimum - public void Minimize(DifferentiableFunction function, ref VBuffer initial, ref VBuffer result) + public void Minimize(DifferentiableFunction function, in VBuffer initial, ref VBuffer result) { Contracts.Check(FloatUtils.IsFinite(initial.GetValues()), "The initial vector contains NaNs or infinite values."); LineFunc lineFunc = new LineFunc(function, in initial, UseCG); diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs index 37d838d6b3..04f272683c 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsPredictorBase.cs @@ -595,11 +595,15 @@ protected virtual float DifferentiableFunction(in VBuffer x, ref VBuffer< Contracts.AssertValueOrNull(progress); float scaleFactor = 1 / (float)WeightSum; - VBuffer xDense = default(VBuffer); + VBuffer xDense = default; if (x.IsDense) xDense = x; else - x.CopyToDense(ref xDense); + { + VBuffer xDenseTemp = default; + x.CopyToDense(ref xDenseTemp); + xDense = xDenseTemp; + } IProgressChannel pch = progress != null ? progress.StartProgressChannel("Gradient") : null; float loss; @@ -613,7 +617,7 @@ protected virtual float DifferentiableFunction(in VBuffer x, ref VBuffer< if (L2Weight > 0) { Contracts.Assert(xDense.IsDense); - var values = xDense.Values; + var values = xDense.GetValues(); Double r = 0; for (int i = BiasCount; i < values.Length; i++) { diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index 7d5840a776..d8cbb246f1 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -665,11 +665,12 @@ protected override void SaveCore(ModelSaveContext ctx) { if (fw.IsDense) { + var fwValues = fw.GetValues(); for (int i = 0; i < fw.Length; i++) { - if (fw.Values[i] != 0) + if (fwValues[i] != 0) { - ctx.Writer.Write(fw.Values[i]); + ctx.Writer.Write(fwValues[i]); count++; } } @@ -697,21 +698,11 @@ protected override void SaveCore(ModelSaveContext ctx) private static int NonZeroCount(in VBuffer vector) { int count = 0; - if (!vector.IsDense) - { - for (int i = 0; i < vector.Count; i++) - { - if (vector.Values[i] != 0) - count++; - } - } - else + var values = vector.GetValues(); + for (int i = 0; i < values.Length; i++) { - for (int i = 0; i < vector.Length; i++) - { - if (vector.Values[i] != 0) - count++; - } + if (values[i] != 0) + count++; } return count; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs index 27e4450f7f..cb10bef433 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs @@ -772,7 +772,7 @@ protected virtual void TrainWithoutLock(IProgressChannelProvider progress, Float while (cursor.MoveNext()) { long idx = getIndexFromId(cursor.Id); - var features = cursor.Features; + VBuffer features = cursor.Features; var label = cursor.Label; float invariant; if (invariants != null) @@ -830,9 +830,9 @@ protected virtual void TrainWithoutLock(IProgressChannelProvider progress, Float } if (features.IsDense) - CpuMathUtils.SdcaL1UpdateDense(primalUpdate, features.Count, features.Values, l1Threshold, l1IntermediateWeights[0].Values, weights[0].Values); + CpuMathUtils.SdcaL1UpdateDense(primalUpdate, features.Count, features.GetValues(), l1Threshold, l1IntermediateWeights[0].Values, weights[0].Values); else if (features.Count > 0) - CpuMathUtils.SdcaL1UpdateSparse(primalUpdate, features.Count, features.Values, features.Indices, l1Threshold, l1IntermediateWeights[0].Values, weights[0].Values); + CpuMathUtils.SdcaL1UpdateSparse(primalUpdate, features.Count, features.GetValues(), features.GetIndices(), l1Threshold, l1IntermediateWeights[0].Values, weights[0].Values); } break; @@ -919,6 +919,7 @@ protected virtual bool CheckConvergence( var lossSum = new CompensatedSum(); var dualLossSum = new CompensatedSum(); var biasTotal = biasReg[0] + biasUnreg[0]; + VBuffer firstWeights = weights[0]; using (var cursor = cursorFactory.Create()) { @@ -955,7 +956,7 @@ protected virtual bool CheckConvergence( var dualityGap = metrics[(int)MetricKind.DualityGap] = newLoss - newDualLoss; metrics[(int)MetricKind.BiasUnreg] = biasUnreg[0]; metrics[(int)MetricKind.BiasReg] = biasReg[0]; - metrics[(int)MetricKind.L1Sparsity] = Args.L1Threshold == 0 ? 1 : (Double)weights[0].Values.Count(w => w != 0) / weights.Length; + metrics[(int)MetricKind.L1Sparsity] = Args.L1Threshold == 0 ? 1 : (Double)firstWeights.GetValues().Count(w => w != 0) / weights.Length; bool converged = dualityGap / newLoss < Args.ConvergenceTolerance; @@ -964,7 +965,7 @@ protected virtual bool CheckConvergence( // Maintain a copy of weights and bias with best primal loss thus far. // This is some extra work and uses extra memory, but it seems worth doing it. // REVIEW: Sparsify bestWeights? - weights[0].CopyTo(ref bestWeights[0]); + firstWeights.CopyTo(ref bestWeights[0]); bestBiasReg[0] = biasReg[0]; bestBiasUnreg[0] = biasUnreg[0]; bestPrimalLoss = metrics[(int)MetricKind.Loss]; diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs index 48a88d6009..37023f42c5 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs @@ -167,7 +167,7 @@ protected override void TrainWithoutLock(IProgressChannelProvider progress, Floa } else { - normSquared = VectorUtils.NormSquared(features); + normSquared = VectorUtils.NormSquared(in features); if (Args.BiasLearningRate == 0) normSquared += 1; @@ -241,9 +241,9 @@ protected override void TrainWithoutLock(IProgressChannelProvider progress, Floa } if (features.IsDense) - CpuMathUtils.SdcaL1UpdateDense(-primalUpdate, features.Count, features.Values, l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); + CpuMathUtils.SdcaL1UpdateDense(-primalUpdate, features.Count, features.GetValues(), l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); else if (features.Count > 0) - CpuMathUtils.SdcaL1UpdateSparse(-primalUpdate, features.Count, features.Values, features.Indices, l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); + CpuMathUtils.SdcaL1UpdateSparse(-primalUpdate, features.Count, features.GetValues(), features.GetIndices(), l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); } break; @@ -268,9 +268,9 @@ protected override void TrainWithoutLock(IProgressChannelProvider progress, Floa : 0; if (features.IsDense) - CpuMathUtils.SdcaL1UpdateDense(labelPrimalUpdate, features.Count, features.Values, l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); + CpuMathUtils.SdcaL1UpdateDense(labelPrimalUpdate, features.Count, features.GetValues(), l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); else if (features.Count > 0) - CpuMathUtils.SdcaL1UpdateSparse(labelPrimalUpdate, features.Count, features.Values, features.Indices, l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); + CpuMathUtils.SdcaL1UpdateSparse(labelPrimalUpdate, features.Count, features.GetValues(), features.GetIndices(), l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); } rowCount++; From d031fa1422dcf92d5edb126a99418fb6c6994bd8 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Wed, 7 Nov 2018 10:09:57 -0600 Subject: [PATCH 02/14] Getting everything to build and tests passing --- src/Microsoft.ML.Core/Data/VBuffer.cs | 22 +++++++++--- .../Utilities/VBufferUtils.cs | 4 +++ .../Depricated/Vector/VectorUtils.cs | 21 ++++++------ .../TreeEnsemble/RegressionTree.cs | 10 +++--- .../KMeansPlusPlusTrainer.cs | 34 +++---------------- .../Optimizer/LineSearch.cs | 2 +- 6 files changed, 41 insertions(+), 52 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/VBuffer.cs b/src/Microsoft.ML.Core/Data/VBuffer.cs index 6e900a36a8..f139515f87 100644 --- a/src/Microsoft.ML.Core/Data/VBuffer.cs +++ b/src/Microsoft.ML.Core/Data/VBuffer.cs @@ -30,10 +30,21 @@ public readonly struct VBuffer /// public readonly int Count; + /// + /// The values. Only the first Count of these are valid. + /// + public T[] Values => _values; + + /// + /// The indices. For a dense representation, this array is not used. For a sparse representation + /// it is parallel to values and specifies the logical indices for the corresponding values. + /// + public int[] Indices => _indices; + /// /// The explicitly represented values. /// - public ReadOnlySpan GetValues() => Values.AsSpan(0, Count); + public ReadOnlySpan GetValues() => _values.AsSpan(0, Count); /// /// The indices. For a dense representation, this array is not used. For a sparse representation @@ -45,7 +56,7 @@ public readonly struct VBuffer /// - non-zeros values 98 and 76 respectively at the 4th and 6th coordinates /// - zeros at all other coordinates /// - public ReadOnlySpan GetIndices() => IsDense ? default : Indices.AsSpan(0, Count); + public ReadOnlySpan GetIndices() => IsDense ? default : _indices.AsSpan(0, Count); /// /// Gets a value indicating whether every logical element is explicitly @@ -112,7 +123,8 @@ public VBuffer(int length, int count, T[] values, int[] indices) /// public void CopyToDense(ref VBuffer dst) { - var mutation = VBufferMutationContext.Create(ref dst, Length, Count); + // create a dense mutation context + var mutation = VBufferMutationContext.Create(ref dst, Length, Length); if (!IsDense) CopyTo(mutation.Values); @@ -545,8 +557,10 @@ internal VBufferMutationContext(int logicalLength, int physicalValuesCount, T[] _values = values; _indices = indices; + bool isDense = logicalLength == physicalValuesCount; + Values = _values.AsSpan(0, physicalValuesCount); - Indices = _indices.AsSpan(0, physicalValuesCount); + Indices = isDense ? default : _indices.AsSpan(0, physicalValuesCount); } public void Complete(ref VBuffer destintation) diff --git a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs index c39f7f60d3..5a2d3a007b 100644 --- a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs @@ -720,7 +720,11 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< { // Major case 2, with src.Dense. if (!dst.IsDense) + { Densify(ref dst); + mutation = VBufferMutationContext.Create(ref dst, dst.Length, dst.Count); + } + // Both are now dense. Both cases of outer are covered. for (int i = 0; i < srcValues.Length; i++) manip(i, srcValues[i], ref mutation.Values[i]); diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs index 84fe2e135f..ba549be4d3 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs @@ -75,12 +75,10 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, var bottomHeap = new Heap>((left, right) => right.Value > left.Value, bottom + 1); bool isDense = a.IsDense; - var aValues = a.GetValues(); - var aIndices = a.GetIndices(); for (int i = 0; i < a.Count; i++) { - int idx = isDense ? i : aIndices[i]; - var value = aValues[i]; + int idx = isDense ? i : a.Indices[i]; + var value = a.Values[i]; if (value < 0 && bottom > 0) { @@ -110,21 +108,22 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, } var newCount = topHeap.Count + bottomHeap.Count; - var mutation = VBufferMutationContext.Create(ref a, a.Length, newCount); - var indices = mutation.Indices; + var indices = a.Indices; + Utils.EnsureSize(ref indices, newCount); + Contracts.Assert(Utils.Size(a.Values) >= newCount); int count = 0; while (topHeap.Count > 0) { var pair = topHeap.Pop(); indices[count] = pair.Key; - mutation.Values[count++] = pair.Value; + a.Values[count++] = pair.Value; } while (bottomHeap.Count > 0) { var pair = bottomHeap.Pop(); indices[count] = pair.Key; - mutation.Values[count++] = pair.Value; + a.Values[count++] = pair.Value; } Contracts.Assert(count == newCount); @@ -133,7 +132,7 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, { for (var i = 0; i < newCount; i++) { - var value = mutation.Values[i]; + var value = a.Values[i]; var absValue = Math.Abs(value); if (absValue > absMax) absMax = absValue; @@ -143,13 +142,13 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, { var ratio = 1 / absMax; for (var i = 0; i < newCount; i++) - mutation.Values[i] = ratio * mutation.Values[i]; + a.Values[i] = ratio * a.Values[i]; } } if (indices != null) Array.Sort(indices, a.Values, 0, newCount); - mutation.Complete(ref a); + a = new VBuffer(a.Length, newCount, a.Values, indices); } /// diff --git a/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs b/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs index 50e965e1a3..834a4188f1 100644 --- a/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs +++ b/src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs @@ -798,7 +798,6 @@ public int GetLeaf(in VBuffer feat, ref List path) if (feat.IsDense) return GetLeafCore(feat.GetValues(), path); return GetLeafCore(feat.GetIndices(), feat.GetValues(), path); - } private Float GetFeatureValue(Float x, int node) @@ -907,7 +906,6 @@ private int GetLeafCore(ReadOnlySpan featIndices, ReadOnlySpan featV if (NumLeaves == 1) return 0; - int count = featIndices.Length; int node = root; while (node >= 0) @@ -922,13 +920,13 @@ private int GetLeafCore(ReadOnlySpan featIndices, ReadOnlySpan featV //REVIEW: Consider experimenting with bitmap instead of doing log(n) binary search. int newNode = LteChild[node]; - int end = Utils.FindIndexSorted(featIndices, 0, count, CategoricalSplitFeatureRanges[node][1]); - for (int i = Utils.FindIndexSorted(featIndices, 0, count, CategoricalSplitFeatureRanges[node][0]); + int end = featIndices.FindIndexSorted(0, count, CategoricalSplitFeatureRanges[node][1]); + for (int i = featIndices.FindIndexSorted(0, count, CategoricalSplitFeatureRanges[node][0]); i < count && i <= end; ++i) { int index = featIndices[i]; - if (Utils.TryFindIndexSorted(CategoricalSplitFeatures[node], 0, CategoricalSplitFeatures[node].Length, index, out int ii)) + if (CategoricalSplitFeatures[node].TryFindIndexSorted(0, CategoricalSplitFeatures[node].Length, index, out int ii)) { Float val = GetFeatureValue(featValues[i], node); if (val > 0.0f) @@ -946,7 +944,7 @@ private int GetLeafCore(ReadOnlySpan featIndices, ReadOnlySpan featV Float val = 0; int ifeat = SplitFeatures[node]; - int ii = Utils.FindIndexSorted(featIndices, 0, count, ifeat); + int ii = featIndices.FindIndexSorted(0, count, ifeat); if (ii < count && featIndices[ii] == ifeat) val = featValues[ii]; val = GetFeatureValue(val, node); diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs index 853976885a..a5a6bd08f2 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs @@ -788,7 +788,6 @@ public static void Initialize(IHost host, int numThreads, IChannel ch, FeatureFl // The final chosen points, to be approximately clustered to determine starting // centroids. VBuffer[] clusters = new VBuffer[totalSamples]; - VBuffer[] readOnlyClusters = null; // L2s, kept for distance trick. float[] clustersL2s = new float[totalSamples]; @@ -861,9 +860,6 @@ public static void Initialize(IHost host, int numThreads, IChannel ch, FeatureFl clusterCount++; } ch.Assert(clusterCount - clusterPrevCount <= numSamplesPerRound); - - KMeansUtils.UpdateReadOnlyCache(clusters, ref readOnlyClusters); - logicalExternalRounds++; pCh.Checkpoint(logicalExternalRounds, numRounds + 2); } @@ -1321,11 +1317,9 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe Initialize(ch, cursorFactory, totalTrainingInstances, numThreads, k, dimensionality, accelMemBudgetInMb, out state, out workState, out reducedState); float[] centroidL2s = new float[k]; - VBuffer[] readOnlyCentroids = new VBuffer[centroids.Length]; - KMeansUtils.UpdateReadOnlyCache(centroids, ref readOnlyCentroids); for (int i = 0; i < k; i++) - centroidL2s[i] = VectorUtils.NormSquared(in readOnlyCentroids[i]); + centroidL2s[i] = VectorUtils.NormSquared(in centroids[i]); using (var pch = host.StartProgressChannel("KMeansTrain")) { @@ -1352,7 +1346,7 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe ops[i] = new Action(() => { using (var cursor = set[chunkId]) - ProcessChunk(cursor, state, workState[chunkId], k, readOnlyCentroids, centroidL2s); + ProcessChunk(cursor, state, workState[chunkId], k, centroids, centroidL2s); }); } @@ -1364,7 +1358,7 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe else { using (var cursor = cursorFactory.Create()) - ProcessChunk(cursor, state, reducedState, k, readOnlyCentroids, centroidL2s); + ProcessChunk(cursor, state, reducedState, k, centroids, centroidL2s); } WorkChunkState.Reduce(workState, reducedState); @@ -1401,13 +1395,11 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe } #endif reducedState.UpdateClusters(centroids, centroidL2s, state.Delta, ref state.DeltaMax); - KMeansUtils.UpdateReadOnlyCache(centroids, ref readOnlyCentroids); - isConverged = reducedState.AverageScoreDelta < convergenceThreshold; state.Iteration++; if (state.Iteration % 100 == 0) - KMeansUtils.VerifyModelConsistency(readOnlyCentroids); + KMeansUtils.VerifyModelConsistency(centroids); } } } @@ -1804,23 +1796,5 @@ public static void VerifyModelConsistency(VBuffer[] centroids) foreach (var centroid in centroids) Contracts.Check(centroid.Items().Select(x => x.Value).All(FloatUtils.IsFinite), "Model training failed: non-finite coordinates are generated"); } - - /// - /// Checks that all coordinates of all centroids are finite, and throws otherwise - /// - public static void VerifyModelConsistency(VBuffer[] centroids) - { - for (int i = 0; i < centroids.Length; i++) - Contracts.Check(centroids[i].GetValues().All(FloatUtils.IsFinite), "Model training failed: non-finite coordinates are generated"); - } - - public static void UpdateReadOnlyCache(VBuffer[] source, ref VBuffer[] destination) - { - Utils.EnsureSize(ref destination, source.Length); - for (int i = 0; i < source.Length; i++) - { - destination[i] = source[i]; - } - } } } diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/LineSearch.cs b/src/Microsoft.ML.StandardLearners/Optimizer/LineSearch.cs index fb8e2a6520..6b905a8ef2 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/LineSearch.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/LineSearch.cs @@ -530,7 +530,7 @@ public static void Main(string[] argv) GDOptimizer gdo = new GDOptimizer(term, null, true); print = true; CreateWrapped(out init, 0, 0); - gdo.Minimize(QuadTest2D, ref init, ref ans); + gdo.Minimize(QuadTest2D, in init, ref ans); QuadTest2D(in ans, ref grad); Console.WriteLine(VectorUtils.Norm(grad)); } From 8adb2af2a3e49e063de6c363359ef0c21d8fba51 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Wed, 7 Nov 2018 23:23:04 -0600 Subject: [PATCH 03/14] Keep moving to the master plan of VBuffer. --- src/Microsoft.ML.Core/Data/VBuffer.cs | 33 ++++- .../Utilities/VBufferUtils.cs | 137 +++++++++--------- .../Commands/ShowSchemaCommand.cs | 2 +- src/Microsoft.ML.Data/Data/BufferBuilder.cs | 4 +- .../DataLoadSave/Text/TextSaver.cs | 7 +- src/Microsoft.ML.Data/DataView/Transposer.cs | 51 ++++--- .../Evaluators/EvaluatorUtils.cs | 4 +- .../Transforms/ConcatTransform.cs | 16 +- src/Microsoft.ML.Ensemble/EnsembleUtils.cs | 39 ++--- .../OutputCombiners/BaseMultiAverager.cs | 11 +- .../OutputCombiners/BaseMultiCombiner.cs | 8 +- .../OutputCombiners/BaseScalarStacking.cs | 8 +- .../OutputCombiners/MultiMedian.cs | 8 +- .../OutputCombiners/MultiStacking.cs | 8 +- .../OutputCombiners/MultiVoting.cs | 17 +-- .../OlsLinearRegression.cs | 18 +-- .../SymSgdClassificationTrainer.cs | 12 +- .../ImagePixelExtractorTransform.cs | 26 ++-- .../KMeansPlusPlusTrainer.cs | 6 +- .../KMeansPredictor.cs | 12 +- .../Models/ConfusionMatrix.cs | 5 +- .../Runtime/EntryPoints/FeatureCombiner.cs | 5 +- .../OnnxTransform.cs | 9 +- src/Microsoft.ML.OnnxTransform/OnnxUtils.cs | 21 ++- .../Optimizer/SgdOptimizer.cs | 56 +++---- .../Standard/LinearPredictorUtils.cs | 4 +- .../LogisticRegression/LogisticRegression.cs | 11 +- .../MulticlassLogisticRegression.cs | 13 +- .../Standard/ModelStatistics.cs | 2 +- .../MultiClass/MultiClassNaiveBayesTrainer.cs | 31 ++-- .../Standard/Online/LinearSvm.cs | 7 +- .../PoissonRegression/PoissonRegression.cs | 5 +- .../Standard/SdcaBinary.cs | 7 +- .../Standard/SdcaMultiClass.cs | 14 +- .../TensorFlow/TensorflowUtils.cs | 20 ++- .../TensorflowTransform.cs | 20 +-- 36 files changed, 349 insertions(+), 308 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/VBuffer.cs b/src/Microsoft.ML.Core/Data/VBuffer.cs index f139515f87..f909fba51b 100644 --- a/src/Microsoft.ML.Core/Data/VBuffer.cs +++ b/src/Microsoft.ML.Core/Data/VBuffer.cs @@ -501,12 +501,30 @@ internal VBufferMutationContext GetMutableContext( Utils.EnsureSize(ref indices, valuesCount.Value, maxCapacity, keepOldOnResize, out createdNewIndices); } - return new VBufferMutationContext(newLogicalLength, valuesCount.Value, values, indices); + return new VBufferMutationContext( + newLogicalLength, + valuesCount.Value, + values, + indices, + createdNewValues, + createdNewIndices); } } public static class VBufferMutationContext { + public static VBufferMutationContext CreateFromBuffer( + ref VBuffer destination) + { + return destination.GetMutableContext( + destination.Length, + destination.Count, + maxValuesCapacity: null, + keepOldOnResize: false, + out bool _, + out bool _); + } + public static VBufferMutationContext Create( ref VBuffer destination, int newLogicalLength, @@ -551,7 +569,15 @@ public ref struct VBufferMutationContext public readonly Span Values; public readonly Span Indices; - internal VBufferMutationContext(int logicalLength, int physicalValuesCount, T[] values, int[] indices) + public bool CreatedNewValues { get;} + public bool CreatedNewIndices { get;} + + internal VBufferMutationContext(int logicalLength, + int physicalValuesCount, + T[] values, + int[] indices, + bool createdNewValues, + bool createdNewIndices) { _logicalLength = logicalLength; _values = values; @@ -561,6 +587,9 @@ internal VBufferMutationContext(int logicalLength, int physicalValuesCount, T[] Values = _values.AsSpan(0, physicalValuesCount); Indices = isDense ? default : _indices.AsSpan(0, physicalValuesCount); + + CreatedNewValues = createdNewValues; + CreatedNewIndices = createdNewIndices; } public void Complete(ref VBuffer destintation) diff --git a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs index 5a2d3a007b..819d421690 100644 --- a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs @@ -697,7 +697,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< var srcValues = src.GetValues(); var dstValues = dst.GetValues(); var dstIndices = dst.GetIndices(); - var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dst.Count); + var mutation = VBufferMutationContext.CreateFromBuffer(ref dst); if (srcValues.Length == 0) { // Major case 1, with srcValues.Length == 0. @@ -722,7 +722,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< if (!dst.IsDense) { Densify(ref dst); - mutation = VBufferMutationContext.Create(ref dst, dst.Length, dst.Count); + mutation = VBufferMutationContext.CreateFromBuffer(ref dst); } // Both are now dense. Both cases of outer are covered. @@ -752,22 +752,22 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< } else { - for (int i = 0; i < src.Count; i++) + for (int i = 0; i < srcValues.Length; i++) manip(srcIndices[i], srcValues[i], ref mutation.Values[srcIndices[i]]); } return; } - if (dst.Count == 0) + if (dstValues.Length == 0) { // Major case 4, with dst empty. Note that !src.Dense. // Neither is dense, and dst is empty. Both cases of outer are covered. mutation = VBufferMutationContext.Create(ref dst, src.Length, - src.Count, + srcValues.Length, maxValuesCapacity: src.Length); mutation.Values.Clear(); - for (int i = 0; i < src.Count; i++) + for (int i = 0; i < srcValues.Length; i++) manip(mutation.Indices[i] = srcIndices[i], srcValues[i], ref mutation.Values[i]); mutation.Complete(ref dst); return; @@ -775,16 +775,16 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // Beyond this point, we can assume both a and b are sparse with positive count. int dI = 0; - int newCount = dst.Count; + int newCount = dstValues.Length; // Try to find each src index in dst indices, counting how many more we'll add. - for (int sI = 0; sI < src.Count; sI++) + for (int sI = 0; sI < srcValues.Length; sI++) { int sIndex = srcIndices[sI]; - while (dI < dst.Count && dstIndices[dI] < sIndex) + while (dI < dstValues.Length && dstIndices[dI] < sIndex) dI++; - if (dI == dst.Count) + if (dI == dstValues.Length) { - newCount += src.Count - sI; + newCount += srcValues.Length - sI; break; } if (dstIndices[dI] == sIndex) @@ -793,8 +793,8 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< newCount++; } Contracts.Assert(newCount > 0); - Contracts.Assert(0 < src.Count && src.Count <= newCount); - Contracts.Assert(0 < dst.Count && dst.Count <= newCount); + Contracts.Assert(0 < srcValues.Length && srcValues.Length <= newCount); + Contracts.Assert(0 < dstValues.Length && dstValues.Length <= newCount); // REVIEW: Densify above a certain threshold, not just if // the output will necessarily become dense? But then we get into @@ -813,7 +813,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< return; } - if (newCount != src.Count && newCount != dst.Count) + if (newCount != srcValues.Length && newCount != dstValues.Length) { // Major case 6, neither set of indices is a subset of the other. // This subcase used to fall through to another subcase, but this @@ -826,8 +826,8 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< maxValuesCapacity: dst.Length); var indices = mutation.Indices; var values = mutation.Values; - int sI = src.Count - 1; - dI = dst.Count - 1; + int sI = srcValues.Length - 1; + dI = dstValues.Length - 1; int sIndex = srcIndices[sI]; int dIndex = dstIndices[dI]; @@ -866,13 +866,13 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< return; } - if (newCount == dst.Count) + if (newCount == dstValues.Length) { - if (newCount == src.Count) + if (newCount == srcValues.Length) { // Major case 7, the set of indices is the same for src and dst. - Contracts.Assert(src.Count == dst.Count); - for (int i = 0; i < src.Count; i++) + Contracts.Assert(srcValues.Length == dstValues.Length); + for (int i = 0; i < srcValues.Length; i++) { Contracts.Assert(srcIndices[i] == dstIndices[i]); manip(srcIndices[i], srcValues[i], ref mutation.Values[i]); @@ -880,18 +880,18 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< return; } // Major case 8, the indices of src must be a subset of dst's indices. - Contracts.Assert(newCount > src.Count); + Contracts.Assert(newCount > srcValues.Length); dI = 0; if (outer) { int sI = 0; int sIndex = srcIndices[sI]; - for (int i = 0; i < dst.Count; ++i) + for (int i = 0; i < dstValues.Length; ++i) { if (dstIndices[i] == sIndex) { manip(sIndex, srcValues[sI], ref mutation.Values[i]); - sIndex = ++sI == src.Count ? src.Length : srcIndices[sI]; + sIndex = ++sI == srcValues.Length ? src.Length : srcIndices[sI]; } else manip(dstIndices[i], default(TSrc), ref mutation.Values[i]); @@ -899,7 +899,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< } else { - for (int sI = 0; sI < src.Count; sI++) + for (int sI = 0; sI < srcValues.Length; sI++) { int sIndex = srcIndices[sI]; while (dstIndices[dI] < sIndex) @@ -911,15 +911,15 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< return; } - if (newCount == src.Count) + if (newCount == srcValues.Length) { // Major case 9, the indices of dst must be a subset of src's indices. Both cases of outer are covered. // First do a "quasi" densification of dst, by making the indices // of dst correspond to those in src. - mutation = VBufferMutationContext.Create(ref dst, newCount, dst.Count); + mutation = VBufferMutationContext.Create(ref dst, newCount, dstValues.Length); int sI = 0; - for (dI = 0; dI < dst.Count; ++dI) + for (dI = 0; dI < dstValues.Length; ++dI) { int bIndex = dstIndices[dI]; while (srcIndices[sI] < bIndex) @@ -935,7 +935,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< newCount, maxValuesCapacity: src.Length); srcIndices.CopyTo(mutation.Indices); - for (sI = 0; sI < src.Count; sI++) + for (sI = 0; sI < srcValues.Length; sI++) manip(srcIndices[sI], srcValues[sI], ref mutation.Values[sI]); mutation.Complete(ref dst); return; @@ -960,16 +960,16 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf var srcValues = src.GetValues(); var dstValues = dst.GetValues(); - if (dst.Count == 0) + if (dstValues.Length == 0) { - if (src.Count == 0) + if (srcValues.Length == 0) { VBufferMutationContext.Create(ref res, length, 0) .Complete(ref res); } else if (src.IsDense) { - Contracts.Assert(src.Count == src.Length); + Contracts.Assert(srcValues.Length == src.Length); var mutation = VBufferMutationContext.Create(ref res, length); for (int i = 0; i < length; i++) manip(i, srcValues[i], default(TDst), ref mutation.Values[i]); @@ -978,7 +978,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf else { // src is non-empty sparse. - int count = src.Count; + int count = srcValues.Length; Contracts.Assert(0 < count && count < length); var mutation = VBufferMutationContext.Create(ref res, length, count); var srcIndices = src.GetIndices(); @@ -995,7 +995,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf else if (dst.IsDense) { var mutation = VBufferMutationContext.Create(ref res, length); - if (src.Count == 0) + if (srcValues.Length == 0) { if (outer) { @@ -1013,7 +1013,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf } else if (src.IsDense) { - Contracts.Assert(src.Count == src.Length); + Contracts.Assert(srcValues.Length == src.Length); for (int i = 0; i < length; i++) manip(i, srcValues[i], dstValues[i], ref mutation.Values[i]); mutation.Complete(ref res); @@ -1021,7 +1021,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf else { // src is sparse and non-empty. - int count = src.Count; + int count = srcValues.Length; Contracts.Assert(0 < count && count < length); int ii = 0; @@ -1061,10 +1061,10 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf else { // dst is non-empty sparse - int dstCount = dst.Count; + int dstCount = dstValues.Length; var dstIndices = dst.GetIndices(); Contracts.Assert(dstCount > 0); - if (src.Count == 0) + if (srcValues.Length == 0) { var mutation = VBufferMutationContext.Create(ref res, length, dstCount); if (outer) @@ -1107,19 +1107,19 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf else { // Both src and dst are non-empty sparse. - Contracts.Assert(src.Count > 0); + Contracts.Assert(srcValues.Length > 0); // Find the count of result, which is the size of the union of the indices set of src and dst. int resCount = dstCount; var srcIndices = src.GetIndices(); - for (int ii = 0, jj = 0; ii < src.Count; ii++) + for (int ii = 0, jj = 0; ii < srcValues.Length; ii++) { int i = srcIndices[ii]; - while (jj < dst.Count && dstIndices[jj] < i) + while (jj < dstValues.Length && dstIndices[jj] < i) jj++; - if (jj == dst.Count) + if (jj == dstValues.Length) { - resCount += src.Count - ii; + resCount += srcValues.Length - ii; break; } if (dstIndices[jj] == i) @@ -1129,8 +1129,8 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf } Contracts.Assert(0 < resCount && resCount <= length); - Contracts.Assert(resCount <= src.Count + dstCount); - Contracts.Assert(src.Count <= resCount); + Contracts.Assert(resCount <= srcValues.Length + dstCount); + Contracts.Assert(srcValues.Length <= resCount); Contracts.Assert(dstCount <= resCount); if (resCount == length) @@ -1158,7 +1158,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf // Slot (i == j) both defined in src and dst. Apply manip. mutation.Indices[kk] = i; manip(i, srcValues[ii], dstValues[jj], ref mutation.Values[kk]); - i = ++ii == src.Count ? length : srcIndices[ii]; + i = ++ii == srcValues.Length ? length : srcIndices[ii]; j = ++jj == dstCount ? length : dstIndices[jj]; } else if (i < j) @@ -1166,7 +1166,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf // Slot i defined only in src, but not in dst. Apply manip. mutation.Indices[kk] = i; manip(i, srcValues[ii], default(TDst), ref mutation.Values[kk]); - i = ++ii == src.Count ? length : srcIndices[ii]; + i = ++ii == srcValues.Length ? length : srcIndices[ii]; } else { @@ -1182,7 +1182,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf } } - Contracts.Assert(ii == src.Count && jj == dstCount); + Contracts.Assert(ii == srcValues.Length && jj == dstCount); Contracts.Assert(i == length && j == length); mutation.Complete(ref res); } @@ -1204,9 +1204,11 @@ public static void ApplyIntoEitherDefined(in VBuffer src, ref { Contracts.CheckValue(func, nameof(func)); + var srcValues = src.GetValues(); + // REVIEW: The analogous WritableVector method insisted on // equal lengths, but I don't care here. - if (src.Count == 0) + if (srcValues.Length == 0) { VBufferMutationContext.Create(ref dst, src.Length, 0) .Complete(ref dst); @@ -1214,10 +1216,9 @@ public static void ApplyIntoEitherDefined(in VBuffer src, ref } var mutation = VBufferMutationContext.Create(ref dst, src.Length, - src.Count, + srcValues.Length, maxValuesCapacity: src.Length); Span values = mutation.Values; - var srcValues = src.GetValues(); if (src.IsDense) { for (int i = 0; i < src.Length; ++i) @@ -1283,7 +1284,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer aIndices = a.GetIndices(); for (int i = 0; i < b.Length; i++) { - TSrc1 aVal = (aI < a.Count && i == aIndices[aI]) ? aValues[aI++] : default(TSrc1); + TSrc1 aVal = (aI < aIndices.Length && i == aIndices[aI]) ? aValues[aI++] : default(TSrc1); mutation.Values[i] = func(i, aVal, bValues[i]); } } @@ -1293,7 +1294,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer bIndices = b.GetIndices(); for (int i = 0; i < a.Length; i++) { - TSrc2 bVal = (bI < b.Count && i == bIndices[bI]) ? bValues[bI++] : default(TSrc2); + TSrc2 bVal = (bI < bIndices.Length && i == bIndices[bI]) ? bValues[bI++] : default(TSrc2); mutation.Values[i] = func(i, aValues[i], bVal); } } @@ -1311,7 +1312,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer int newCount = 0; aIndices = a.GetIndices(); bIndices = b.GetIndices(); - while (aI < a.Count && bI < b.Count) + while (aI < aIndices.Length && bI < bIndices.Length) { int aCompB = aIndices[aI] - bIndices[bI]; if (aCompB <= 0) // a is no larger than b. @@ -1321,10 +1322,10 @@ public static void ApplyInto(in VBuffer a, in VBuffer newCount++; } - if (aI < a.Count) - newCount += a.Count - aI; - if (bI < b.Count) - newCount += b.Count - bI; + if (aI < aIndices.Length) + newCount += aIndices.Length - aI; + if (bI < bIndices.Length) + newCount += bIndices.Length - bI; // REVIEW: Worth optimizing the newCount == a.Length case? // Probably not... @@ -1332,13 +1333,13 @@ public static void ApplyInto(in VBuffer a, in VBuffer mutation = VBufferMutationContext.Create(ref dst, a.Length, newCount); Span indices = mutation.Indices; - if (newCount == b.Count) + if (newCount == bValues.Length) { - if (newCount == a.Count) + if (newCount == aValues.Length) { // Case 3, a and b actually have the same indices! aIndices.CopyTo(indices); - for (aI = 0; aI < a.Count; aI++) + for (aI = 0; aI < aValues.Length; aI++) { Contracts.Assert(aIndices[aI] == bIndices[aI]); mutation.Values[aI] = func(aIndices[aI], aValues[aI], bValues[aI]); @@ -1349,28 +1350,28 @@ public static void ApplyInto(in VBuffer a, in VBuffer // Case 4, a's indices are a subset of b's. bIndices.CopyTo(indices); aI = 0; - for (bI = 0; aI < a.Count && bI < b.Count; bI++) + for (bI = 0; aI < aValues.Length && bI < bValues.Length; bI++) { Contracts.Assert(aIndices[aI] >= bIndices[bI]); TSrc1 aVal = aIndices[aI] == bIndices[bI] ? aValues[aI++] : default(TSrc1); mutation.Values[bI] = func(bIndices[bI], aVal, bValues[bI]); } - for (; bI < b.Count; bI++) + for (; bI < bValues.Length; bI++) mutation.Values[bI] = func(bIndices[bI], default(TSrc1), bValues[bI]); } } - else if (newCount == a.Count) + else if (newCount == aValues.Length) { // Case 5, b's indices are a subset of a's. aIndices.CopyTo(indices); bI = 0; - for (aI = 0; bI < b.Count && aI < a.Count; aI++) + for (aI = 0; bI < bValues.Length && aI < aValues.Length; aI++) { Contracts.Assert(bIndices[bI] >= aIndices[aI]); TSrc2 bVal = aIndices[aI] == bIndices[bI] ? bValues[bI++] : default(TSrc2); mutation.Values[aI] = func(aIndices[aI], aValues[aI], bVal); } - for (; aI < a.Count; aI++) + for (; aI < aValues.Length; aI++) mutation.Values[aI] = func(aIndices[aI], aValues[aI], default(TSrc2)); } else @@ -1379,7 +1380,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer int newI = aI = bI = 0; TSrc1 aVal = default(TSrc1); TSrc2 bVal = default(TSrc2); - while (aI < a.Count && bI < b.Count) + while (aI < aIndices.Length && bI < bIndices.Length) { int aCompB = aIndices[aI] - bIndices[bI]; int index = 0; @@ -1407,14 +1408,14 @@ public static void ApplyInto(in VBuffer a, in VBuffer indices[newI++] = index; } - for (; aI < a.Count; aI++) + for (; aI < aIndices.Length; aI++) { int index = aIndices[aI]; mutation.Values[newI] = func(index, aValues[aI], default(TSrc2)); indices[newI++] = index; } - for (; bI < b.Count; bI++) + for (; bI < bIndices.Length; bI++) { int index = bIndices[bI]; mutation.Values[newI] = func(index, default(TSrc1), bValues[bI]); diff --git a/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs b/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs index 4fb0738e58..5fbab5176a 100644 --- a/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs +++ b/src/Microsoft.ML.Data/Commands/ShowSchemaCommand.cs @@ -279,7 +279,7 @@ private static void ShowMetadataValueVec(IndentingTextWriter itw, ISchema sch var value = default(VBuffer); schema.GetMetadata(kind, col, ref value); - itw.Write(": Length={0}, Count={0}", value.Length, value.Count); + itw.Write(": Length={0}, Count={0}", value.Length, value.GetValues().Length); using (itw.Nest()) { diff --git a/src/Microsoft.ML.Data/Data/BufferBuilder.cs b/src/Microsoft.ML.Data/Data/BufferBuilder.cs index a92cf7b4f8..d6ad3c1cd0 100644 --- a/src/Microsoft.ML.Data/Data/BufferBuilder.cs +++ b/src/Microsoft.ML.Data/Data/BufferBuilder.cs @@ -392,11 +392,11 @@ public void AddFeatures(int index, in VBuffer buffer) { Contracts.Check(0 <= index && index <= _length - buffer.Length); - int count = buffer.Count; + var values = buffer.GetValues(); + int count = values.Length; if (count == 0) return; - var values = buffer.GetValues(); if (buffer.IsDense) { Contracts.Assert(count == buffer.Length); diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs index e5dcf5d582..9474605079 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs @@ -190,11 +190,12 @@ public override void WriteData(Action appendItem, out int le public override void WriteHeader(Action appendItem, out int length) { length = _slotCount; - if (_slotNames.Count == 0) - return; var slotNamesValues = _slotNames.GetValues(); + if (slotNamesValues.Length == 0) + return; + var slotNamesIndices = _slotNames.GetIndices(); - for (int i = 0; i < _slotNames.Count; i++) + for (int i = 0; i < slotNamesValues.Length; i++) { var name = slotNamesValues[i]; if (name.IsEmpty) diff --git a/src/Microsoft.ML.Data/DataView/Transposer.cs b/src/Microsoft.ML.Data/DataView/Transposer.cs index ae38fea8b0..87cd01b5ee 100644 --- a/src/Microsoft.ML.Data/DataView/Transposer.cs +++ b/src/Microsoft.ML.Data/DataView/Transposer.cs @@ -704,13 +704,16 @@ private void EnsureValid() Utils.EnsureSize(ref _cbuff, vecLen); for (int s = 0; s < vecLen; ++s) { - var temp = new VBuffer(_len, _counts[s], _values[s], _indices[s]); - if (temp.Count < _len / 2) + int count = _counts[s]; + T[] values = _values[s]; + int[] indices = _indices[s]; + var temp = new VBuffer(_len, count, values, indices); + if (count < _len / 2) { // Already sparse enough, I guess. Swap out the arrays. Utils.Swap(ref temp, ref _cbuff[s]); - _indices[s] = temp.Indices ?? new int[_len]; - _values[s] = temp.Values ?? new T[_len]; + _indices[s] = indices ?? new int[_len]; + _values[s] = values ?? new T[_len]; Ch.Assert(_indices[s].Length == _len); Ch.Assert(_values[s].Length == _len); } @@ -1273,12 +1276,12 @@ private ValueGetter> CreateGetter(int col) (ref VBuffer value) => { EnsureValid(); - var values = value.Values; + VBufferMutationContext mutation; if (_inputValue.IsDense) { - Utils.EnsureSize(ref values, len); - Array.Copy(_inputValue.Values, min, values, 0, len); - value = new VBuffer(len, values, value.Indices); + mutation = VBufferMutationContext.Create(ref value, len); + _inputValue.GetValues().Slice(min, len).CopyTo(mutation.Values); + mutation.Complete(ref value); return; } // In the sparse case we have ranges on Indices/Values to consider. @@ -1287,20 +1290,25 @@ private ValueGetter> CreateGetter(int col) int scount = slim - smin; if (scount == 0) { - value = new VBuffer(len, 0, value.Values, value.Indices); + VBufferMutationContext.Create(ref value, len, 0) + .Complete(ref value); return; } - var indices = value.Indices; - Utils.EnsureSize(ref indices, scount); - Utils.EnsureSize(ref values, scount); - Array.Copy(_inputValue.Indices, smin, indices, 0, scount); - if (min != 0) + + mutation = VBufferMutationContext.Create(ref value, len, scount); + bool isDense = len == scount; + if (!isDense) { - for (int i = 0; i < scount; ++i) - indices[i] -= min; + _inputValue.GetIndices().Slice(smin, scount).CopyTo(mutation.Indices); + + if (min != 0) + { + for (int i = 0; i < scount; ++i) + mutation.Indices[i] -= min; + } } - Array.Copy(_inputValue.Values, smin, values, 0, scount); - value = new VBuffer(len, scount, values, indices); + _inputValue.GetValues().Slice(smin, scount).CopyTo(mutation.Values); + mutation.Complete(ref value); }; } @@ -1314,15 +1322,14 @@ private void EnsureValid() // and end of each slice. if (_inputValue.IsDense) return; - if (_inputValue.Count == 0) + var indices = _inputValue.GetIndices(); + if (indices.Length == 0) { // Handle this separately, since _inputValue.Indices might be null // in this case, and then we may as well short circuit it anyway. Array.Clear(_srcIndicesLims, 0, _srcIndicesLims.Length); return; } - var indices = _inputValue.Indices; - Contracts.AssertValue(indices); int ii = 0; for (int i = 0; i < Lims.Length; ++i) @@ -1331,7 +1338,7 @@ private void EnsureValid() // REVIEW: Would some form of bisection search be better // than this scan? Possibly if the search were to happen across // all lims at the same time, somehow. - while (ii < _inputValue.Count && indices[ii] < lim) + while (ii < indices.Length && indices[ii] < lim) ii++; _srcIndicesLims[i] = ii; } diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs index 646b7b3547..affee40f9d 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs @@ -615,7 +615,7 @@ public static void ReconcileKeyValues(IHostEnvironment env, IDataView[] views, s var keyNamesVBuffer = new VBuffer>(keyNames.Count, keyNames.Keys.ToArray()); ValueGetter>> keyValueGetter = (ref VBuffer> dst) => - dst = new VBuffer>(keyNamesVBuffer.Length, keyNamesVBuffer.Count, keyNamesVBuffer.Values, keyNamesVBuffer.Indices); + keyNamesVBuffer.CopyTo(ref dst); // For each input data view, create the reconciled key column by wrapping it in a LambdaColumnMapper. for (int i = 0; i < dvCount; i++) @@ -683,7 +683,7 @@ public static void ReconcileVectorKeyValues(IHostEnvironment env, IDataView[] vi var keyNamesVBuffer = new VBuffer>(keyNames.Count, keyNames.Keys.ToArray()); ValueGetter>> keyValueGetter = (ref VBuffer> dst) => - dst = new VBuffer>(keyNamesVBuffer.Length, keyNamesVBuffer.Count, keyNamesVBuffer.Values, keyNamesVBuffer.Indices); + keyNamesVBuffer.CopyTo(ref dst); for (int i = 0; i < dvCount; i++) { diff --git a/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs b/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs index 0348fbe40f..f19234c693 100644 --- a/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ConcatTransform.cs @@ -722,7 +722,7 @@ private Delegate MakeGetter(IRow input) .MarkSensitive(MessageSensitivity.Schema); } dstLength = checked(dstLength + tmpBufs[i].Length); - dstCount = checked(dstCount + tmpBufs[i].Count); + dstCount = checked(dstCount + tmpBufs[i].GetValues().Length); } else { @@ -749,22 +749,24 @@ private Delegate MakeGetter(IRow input) if (_srcTypes[j].IsVector) { var buffer = tmpBufs[j]; - Contracts.Assert(buffer.Count <= dstCount - count); + var bufferValues = buffer.GetValues(); + Contracts.Assert(bufferValues.Length <= dstCount - count); Contracts.Assert(buffer.Length <= dstLength - offset); if (buffer.IsDense) { - for (int i = 0; i < buffer.Length; i++) + for (int i = 0; i < bufferValues.Length; i++) { - values[count] = buffer.Values[i]; + values[count] = bufferValues[i]; indices[count++] = offset + i; } } else { - for (int i = 0; i < buffer.Count; i++) + var bufferIndices = buffer.GetIndices(); + for (int i = 0; i < bufferValues.Length; i++) { - values[count] = buffer.Values[i]; - indices[count++] = offset + buffer.Indices[i]; + values[count] = bufferValues[i]; + indices[count++] = offset + bufferIndices[i]; } } offset += buffer.Length; diff --git a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs index 275594e17b..afa70713b4 100644 --- a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs +++ b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs @@ -47,27 +47,20 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices Contracts.Assert(cardinality == Utils.GetCardinality(includedIndices)); Contracts.Assert(cardinality < src.Length); - var values = dst.Values; - var indices = dst.Indices; - var srcValues = src.GetValues(); if (src.IsDense) { if (cardinality >= src.Length / 2) { T defaultValue = default; - if (Utils.Size(values) < src.Length) - values = new T[src.Length]; + var mutation = VBufferMutationContext.Create(ref dst, src.Length); for (int i = 0; i < srcValues.Length; i++) - values[i] = !includedIndices[i] ? defaultValue : srcValues[i]; - dst = new VBuffer(src.Length, values, indices); + mutation.Values[i] = !includedIndices[i] ? defaultValue : srcValues[i]; + mutation.Complete(ref dst); } else { - if (Utils.Size(values) < cardinality) - values = new T[cardinality]; - if (Utils.Size(indices) < cardinality) - indices = new int[cardinality]; + var mutation = VBufferMutationContext.Create(ref dst, src.Length, cardinality); int count = 0; for (int i = 0; i < srcValues.Length; i++) @@ -75,28 +68,19 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices if (includedIndices[i]) { Contracts.Assert(count < cardinality); - values[count] = srcValues[i]; - indices[count] = i; + mutation.Values[count] = srcValues[i]; + mutation.Indices[count] = i; count++; } } Contracts.Assert(count == cardinality); - dst = new VBuffer(src.Length, count, values, indices); + mutation.Complete(ref dst); } } else { - int valuesSize = Utils.Size(values); - int indicesSize = Utils.Size(indices); - - if (valuesSize < srcValues.Length || indicesSize < srcValues.Length) - { - if (valuesSize < cardinality) - values = new T[cardinality]; - if (indicesSize < cardinality) - indices = new int[cardinality]; - } + var mutation = VBufferMutationContext.Create(ref dst, src.Length, cardinality); int count = 0; var srcIndices = src.GetIndices(); @@ -104,13 +88,14 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices { if (includedIndices[srcIndices[i]]) { - values[count] = srcValues[i]; - indices[count] = srcIndices[i]; + mutation.Values[count] = srcValues[i]; + mutation.Indices[count] = srcIndices[i]; count++; } } - dst = new VBuffer(src.Length, count, values, indices); + // TODO: eerhardt - this should be new VBuffer(src.Length, count); + mutation.Complete(ref dst); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs index 15985316fc..f240600b10 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs @@ -35,14 +35,11 @@ protected void CombineCore(ref VBuffer dst, VBuffer[] src, Singl return; } - var values = dst.Values; - if (Utils.Size(values) < len) - values = new Single[len]; - else - Array.Clear(values, 0, len); - + var mutation = VBufferMutationContext.Create(ref dst, len); + if (!mutation.CreatedNewValues) + mutation.Values.Clear(); // Set the output to values. - dst = new VBuffer(len, values, dst.Indices); + mutation.Complete(ref dst); Single weightTotal; if (weights == null) diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs index cea9c698ae..e9f8aa1e9c 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs @@ -98,12 +98,10 @@ protected bool TryNormalize(VBuffer[] values) protected void GetNaNOutput(ref VBuffer dst, int len) { Contracts.Assert(len >= 0); - var values = dst.Values; - if (Utils.Size(values) < len) - values = new Single[len]; + var mutation = VBufferMutationContext.Create(ref dst, len); for (int i = 0; i < len; i++) - values[i] = Single.NaN; - dst = new VBuffer(len, values, dst.Indices); + mutation.Values[i] = Single.NaN; + mutation.Complete(ref dst); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs index 9b9900a65f..a8c3388a93 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs @@ -25,11 +25,9 @@ protected override void FillFeatureBuffer(Single[] src, ref VBuffer dst) { Contracts.AssertNonEmpty(src); int len = src.Length; - var values = dst.Values; - if (Utils.Size(values) < len) - values = new Single[len]; - Array.Copy(src, values, len); - dst = new VBuffer(len, values, dst.Indices); + var mutation = VBufferMutationContext.Create(ref dst, len); + src.CopyTo(mutation.Values); + mutation.Complete(ref dst); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs index 86312393de..471029c204 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs @@ -81,9 +81,7 @@ public override Combiner> GetCombiner() return; } - var values = dst.Values; - if (Utils.Size(values) < len) - values = new Single[len]; + var mutation = VBufferMutationContext.Create(ref dst, len); int count = src.Length; if (Utils.Size(raw) < count) @@ -92,11 +90,11 @@ public override Combiner> GetCombiner() { for (int j = 0; j < count; j++) raw[j] = i < src[j].Length ? src[j].GetItemOrDefault(i) : 0; - values[i] = MathUtils.GetMedianInPlace(raw, count); + mutation.Values[i] = MathUtils.GetMedianInPlace(raw, count); } // Set the output to values. - dst = new VBuffer(len, values, dst.Indices); + mutation.Complete(ref dst); }; } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs index f02d692f54..c1754d2016 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs @@ -83,19 +83,17 @@ protected override void FillFeatureBuffer(VBuffer[] src, ref VBuffer(len, values, dst.Indices); + var mutation = VBufferMutationContext.Create(ref dst, len); int iv = 0; for (int i = 0; i < src.Length; i++) { - src[i].CopyTo(values, iv); + src[i].CopyTo(mutation.Values, iv); iv += src[i].Length; Contracts.Assert(iv <= len); } Contracts.Assert(iv == len); + mutation.Complete(ref dst); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiVoting.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiVoting.cs index 75fd4f5222..0cb7f5dfd5 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiVoting.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiVoting.cs @@ -77,16 +77,15 @@ private void CombineCore(ref VBuffer dst, VBuffer[] src, Single[ int count = Utils.Size(src); if (count == 0) { - dst = new VBuffer(0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, 0) + .Complete(ref dst); return; } int len = GetClassCount(src); - var values = dst.Values; - if (Utils.Size(values) < len) - values = new Single[len]; - else - Array.Clear(values, 0, len); + var mutation = VBufferMutationContext.Create(ref dst, len); + if (!mutation.CreatedNewValues) + mutation.Values.Clear(); int voteCount = 0; for (int i = 0; i < count; i++) @@ -94,17 +93,17 @@ private void CombineCore(ref VBuffer dst, VBuffer[] src, Single[ int index = VectorUtils.ArgMax(in src[i]); if (index >= 0) { - values[index]++; + mutation.Values[index]++; voteCount++; } } // Normalize by dividing by the number of votes. for (int i = 0; i < len; i++) - values[i] /= voteCount; + mutation.Values[i] /= voteCount; // Set the output to values. - dst = new VBuffer(len, values, dst.Indices); + mutation.Complete(ref dst); } } } diff --git a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs index 405310ec50..2d5134fa0a 100644 --- a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs +++ b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs @@ -279,8 +279,9 @@ private OlsLinearRegressionPredictor TrainCore(IChannel ch, FloatLabelCursor.Fac ch.Check(FloatUtils.IsFinite(beta[i]), "Non-finite values detected in OLS solution"); var weights = VBufferUtils.CreateDense(beta.Length - 1); + var weightsMutation = VBufferMutationContext.CreateFromBuffer(ref weights); for (int i = 1; i < beta.Length; ++i) - weights.Values[i - 1] = (float)beta[i]; + weightsMutation.Values[i - 1] = (float)beta[i]; var bias = (float)beta[0]; if (!(_l2Weight > 0) && m == n) { @@ -670,8 +671,9 @@ private OlsLinearRegressionPredictor(IHostEnvironment env, ModelLoadContext ctx) _tValues = ctx.Reader.ReadDoubleArray(m); TValueCheckDecode(Bias, _tValues[0]); + var weightValues = Weight.GetValues(); for (int i = 1; i < m; ++i) - TValueCheckDecode(Weight.Values[i - 1], _tValues[i]); + TValueCheckDecode(weightValues[i - 1], _tValues[i]); _pValues = ctx.Reader.ReadDoubleArray(m); for (int i = 0; i < m; ++i) @@ -747,7 +749,7 @@ public override void SaveSummary(TextWriter writer, RoleMappedSchema schema) const string format = "{0}\t{1}\t{2}\t{3:g4}\t{4:g4}\t{5:e4}"; writer.WriteLine(format, "", "Bias", Bias, _standardErrors[0], _tValues[0], _pValues[0]); Contracts.Assert(Weight.IsDense); - var coeffs = Weight.Values; + var coeffs = Weight.GetValues(); for (int i = 0; i < coeffs.Length; i++) { var name = names.GetItemOrDefault(i); @@ -762,7 +764,7 @@ public override void SaveSummary(TextWriter writer, RoleMappedSchema schema) const string format = "{0}\t{1}\t{2}"; writer.WriteLine(format, "", "Bias", Bias); Contracts.Assert(Weight.IsDense); - var coeffs = Weight.Values; + var coeffs = Weight.GetValues(); for (int i = 0; i < coeffs.Length; i++) { var name = names.GetItemOrDefault(i); @@ -779,18 +781,16 @@ public override void GetFeatureWeights(ref VBuffer weights) return; } - var values = weights.Values; var size = _pValues.Length - 1; - if (Utils.Size(values) < size) - values = new float[size]; + var mutation = VBufferMutationContext.Create(ref weights, size); for (int i = 0; i < size; i++) { var score = -(float)Math.Log(_pValues[i + 1]); if (score > float.MaxValue) score = float.MaxValue; - values[i] = score; + mutation.Values[i] = score; } - weights = new VBuffer(size, values, weights.Indices); + mutation.Complete(ref weights); } } } diff --git a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs index 0e2d6720d8..a69cdf9009 100644 --- a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs +++ b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs @@ -654,6 +654,8 @@ private TPredictor TrainCore(IChannel ch, RoleMappedData data, LinearPredictor p else weights = VBufferUtils.CreateDense(numFeatures); + var weightsMutation = VBufferMutationContext.CreateFromBuffer(ref weights); + // Reference: Parasail. SymSGD. bool tuneLR = _args.LearningRate == null; var lr = _args.LearningRate ?? 1.0f; @@ -688,7 +690,7 @@ private TPredictor TrainCore(IChannel ch, RoleMappedData data, LinearPredictor p pch.SetHeader(new ProgressHeader(new[] { "iterations" }), entry => entry.SetProgress(0, state.PassIteration, _args.NumberOfIterations)); // If fully loaded, call the SymSGDNative and do not come back until learned for all iterations. - Native.LearnAll(inputDataManager, tuneLR, ref lr, l2Const, piw, weights.Values, ref bias, numFeatures, + Native.LearnAll(inputDataManager, tuneLR, ref lr, l2Const, piw, weightsMutation.Values, ref bias, numFeatures, _args.NumberOfIterations, numThreads, tuneNumLocIter, ref numLocIter, _args.Tolerance, _args.Shuffle, shouldInitialize, stateGCHandle); shouldInitialize = false; } @@ -709,7 +711,7 @@ private TPredictor TrainCore(IChannel ch, RoleMappedData data, LinearPredictor p // If all of this leaves us with 0 passes, then set numPassesForThisBatch to 1 numPassesForThisBatch = Math.Max(1, numPassesForThisBatch); state.PassIteration = iter; - Native.LearnAll(inputDataManager, tuneLR, ref lr, l2Const, piw, weights.Values, ref bias, numFeatures, + Native.LearnAll(inputDataManager, tuneLR, ref lr, l2Const, piw, weightsMutation.Values, ref bias, numFeatures, numPassesForThisBatch, numThreads, tuneNumLocIter, ref numLocIter, _args.Tolerance, _args.Shuffle, shouldInitialize, stateGCHandle); shouldInitialize = false; @@ -730,7 +732,7 @@ private TPredictor TrainCore(IChannel ch, RoleMappedData data, LinearPredictor p // Maps back the dense features that are mislocated if (numThreads > 1) - Native.MapBackWeightVector(weights.Values, stateGCHandle); + Native.MapBackWeightVector(weightsMutation.Values, stateGCHandle); Native.DeallocateSequentially(stateGCHandle); } } @@ -784,7 +786,7 @@ private static extern void LearnAll(int totalNumInstances, int* instSizes, int** /// Specifies if this is the first time to run SymSGD /// public static void LearnAll(InputDataManager inputDataManager, bool tuneLR, - ref float lr, float l2Const, float piw, float[] weightVector, ref float bias, int numFeatres, int numPasses, + ref float lr, float l2Const, float piw, Span weightVector, ref float bias, int numFeatres, int numPasses, int numThreads, bool tuneNumLocIter, ref int numLocIter, float tolerance, bool needShuffle, bool shouldInitialize, GCHandle stateGCHandle) { inputDataManager.PrepareCursoring(); @@ -838,7 +840,7 @@ public static void LearnAll(InputDataManager inputDataManager, bool tuneLR, /// /// The weight vector /// - public static void MapBackWeightVector(float[] weightVector, GCHandle stateGCHandle) + public static void MapBackWeightVector(Span weightVector, GCHandle stateGCHandle) { fixed (float* pweightVector = &weightVector[0]) MapBackWeightVector(pweightVector, (State*)stateGCHandle.AddrOfPinnedObject()); diff --git a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs index 2bb9b912ae..d84e3c5bd6 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs @@ -6,6 +6,7 @@ using System.Collections.Generic; using System.Drawing; using System.Linq; +using System.Runtime.InteropServices; using System.Text; using Microsoft.ML.Core.Data; using Microsoft.ML.Runtime; @@ -439,6 +440,7 @@ protected override Delegate MakeGetter(IRow input, int iinfo, out Action dispose //REVIEW Rewrite it to where TValue : IConvertible private ValueGetter> GetGetterCore(IRow input, int iinfo, out Action disposer) + where TValue : struct { var type = _types[iinfo]; var dims = type.Dimensions; @@ -476,26 +478,26 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo if (src == null) { - dst = new VBuffer(size, 0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, size, 0) + .Complete(ref dst); return; } Host.Check(src.PixelFormat == System.Drawing.Imaging.PixelFormat.Format32bppArgb); Host.Check(src.Height == height && src.Width == width); - var values = dst.Values; - if (Utils.Size(values) < size) - values = new TValue[size]; + var mutation = VBufferMutationContext.Create(ref dst, size); + var values = mutation.Values; float offset = ex.Offset; float scale = ex.Scale; Contracts.Assert(scale != 0); - var vf = values as float[]; - var vb = values as byte[]; - Contracts.Assert(vf != null || vb != null); + Span vf = typeof(TValue) == typeof(float) ? MemoryMarshal.Cast(mutation.Values) : default; + Span vb = typeof(TValue) == typeof(byte) ? MemoryMarshal.Cast(mutation.Values) : default; + Contracts.Assert(!vf.IsEmpty || !vb.IsEmpty); bool needScale = offset != 0 || scale != 1; - Contracts.Assert(!needScale || vf != null); + Contracts.Assert(!needScale || !vf.IsEmpty); bool a = ex.Alpha; bool r = ex.Red; @@ -512,7 +514,7 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo for (int y = 0; y < h; ++y) { var pb = src.GetPixel(x, y); - if (vb != null) + if (!vb.IsEmpty) { if (a) { vb[idst++] = pb.A; } if (r) { vb[idst++] = pb.R; } @@ -543,7 +545,7 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo { // The image only has rgb but we need to supply alpha as well, so fake it up, // assuming that it is 0xFF. - if (vf != null) + if (!vf.IsEmpty) { Single v = (0xFF - offset) * scale; for (int i = 0; i < cpix; i++) @@ -566,7 +568,7 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo int idstBase = idstMin + y * w; // Note that the bytes are in order BGR[A]. We arrange the layers in order ARGB. - if (vb != null) + if (!vb.IsEmpty) { for (int x = 0; x < w; x++, idstBase++) { @@ -605,7 +607,7 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo } } - dst = new VBuffer(size, values, dst.Indices); + mutation.Complete(ref dst); }; } diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs index a5a6bd08f2..2682e50301 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs @@ -396,7 +396,7 @@ public static void Initialize( "Not enough distinct instances to populate {0} clusters (only found {1} distinct instances)", k, i); } - candidate.CopyTo(centroids[i].Values); + candidate.CopyToDense(ref centroids[i]); centroidL2s[i] = cachedCandidateL2 ?? VectorUtils.NormSquared(candidate); } } @@ -1389,8 +1389,10 @@ public static void Train(IHost host, int numThreads, IChannel ch, FeatureFloatVe for (int i = 0; i < k; i++) { + var reducedStateCacheValues = reducedState.CachedSumDebug[i].GetValues(); + var cachedSumCopyValues = cachedSumCopy[i].GetValues(); for (int j = 0; j < dimensionality; j++) - Contracts.Assert(AlmostEq(reducedState.CachedSumDebug[i].Values[j], cachedSumCopy[i].Values[j])); + Contracts.Assert(AlmostEq(reducedStateCacheValues[j], cachedSumCopyValues[j])); } } #endif diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs index c40f9bcdd1..507334dd1c 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs @@ -148,19 +148,17 @@ public ValueMapper GetMapper() { if (src.Length != _dimensionality) throw Host.Except($"Incorrect number of features: expected {_dimensionality}, got {src.Length}"); - var values = dst.Values; - if (Utils.Size(values) < _k) - values = new Float[_k]; - Map(in src, values); - dst = new VBuffer(_k, values, dst.Indices); + var mutation = VBufferMutationContext.Create(ref dst, _k); + Map(in src, mutation.Values); + mutation.Complete(ref dst); }; return (ValueMapper)(Delegate)del; } - private void Map(in VBuffer src, Float[] distances) + private void Map(in VBuffer src, Span distances) { - Host.Assert(Utils.Size(distances) >= _k); + Host.Assert(distances.Length >= _k); Float instanceL2 = VectorUtils.NormSquared(in src); for (int i = 0; i < _k; i++) diff --git a/src/Microsoft.ML.Legacy/Models/ConfusionMatrix.cs b/src/Microsoft.ML.Legacy/Models/ConfusionMatrix.cs index d8bb404d49..04c978ea44 100644 --- a/src/Microsoft.ML.Legacy/Models/ConfusionMatrix.cs +++ b/src/Microsoft.ML.Legacy/Models/ConfusionMatrix.cs @@ -75,9 +75,10 @@ internal static List Create(IHostEnvironment env, IDataView con elements = new double[type.VectorSize, type.VectorSize]; countGetter(ref countValues); - for (int i = 0; i < countValues.Length; i++) + ReadOnlySpan values = countValues.GetValues(); + for (int i = 0; i < values.Length; i++) { - elements[valuesRowIndex, i] = countValues.Values[i]; + elements[valuesRowIndex, i] = values[i]; } valuesRowIndex++; diff --git a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/FeatureCombiner.cs b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/FeatureCombiner.cs index f560671fae..50ad0da77a 100644 --- a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/FeatureCombiner.cs +++ b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/FeatureCombiner.cs @@ -129,10 +129,11 @@ private static string GetTerms(IDataView data, string colName) return null; var sb = new StringBuilder(); var pre = ""; - for (int i = 0; i < metadata.Length; i++) + var metadataValues = metadata.GetValues(); + for (int i = 0; i < metadataValues.Length; i++) { sb.Append(pre); - sb.AppendMemory(metadata.Values[i]); + sb.AppendMemory(metadataValues[i]); pre = ","; } return sb.ToString(); diff --git a/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs index 834889b946..fddf019ee1 100644 --- a/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs @@ -282,12 +282,9 @@ private Delegate MakeGetter(IRow input) var outputTensors = _parent.Model.Run(inputTensors); Contracts.Assert(outputTensors.Count() > 0); - var values = dst.Values; - if (Utils.Size(values) < _outputColType.VectorSize) - values = new T[_outputColType.VectorSize]; - - OnnxUtils.CopyTo(outputTensors[0], values); - dst = new VBuffer(values.Length, values, dst.Indices); + var mutation = VBufferMutationContext.Create(ref dst, _outputColType.VectorSize); + OnnxUtils.CopyTo(outputTensors[0], mutation.Values); + mutation.Complete(ref dst); }; return valueGetter; diff --git a/src/Microsoft.ML.OnnxTransform/OnnxUtils.cs b/src/Microsoft.ML.OnnxTransform/OnnxUtils.cs index 05abdfb40a..92a4188966 100644 --- a/src/Microsoft.ML.OnnxTransform/OnnxUtils.cs +++ b/src/Microsoft.ML.OnnxTransform/OnnxUtils.cs @@ -99,19 +99,20 @@ private class TensorValueGetterVec : ITensorValueGetter private readonly ValueGetter> _srcgetter; private readonly OnnxShape _tensorShape; private VBuffer _vBuffer; - private VBuffer _vBufferDense; + private T[] _denseData; public TensorValueGetterVec(IRow input, int colIndex, OnnxShape tensorShape) { _srcgetter = input.GetGetter>(colIndex); _tensorShape = tensorShape; _vBuffer = default; - _vBufferDense = default; + _denseData = default; } public Tensor GetTensor() { _srcgetter(ref _vBuffer); - _vBuffer.CopyToDense(ref _vBufferDense); - return OnnxUtils.CreateTensor(_vBufferDense.Values, _tensorShape); + Utils.EnsureSize(ref _denseData, _vBuffer.Length, keepOld: false); + _vBuffer.CopyTo(_denseData); + return OnnxUtils.CreateTensor(_denseData, _tensorShape); } } } @@ -338,12 +339,18 @@ public static Tensor CreateTensor(T[] data, OnnxShape shape) /// Also Tensor.CopyTo(List<T> dst) requires a list input, whereas ML.NET /// provides array buffers to copy values to. This mismatch causes an extra copy. /// - public static void CopyTo(Tensor tensor, T[] dst) + public static unsafe void CopyTo(Tensor tensor, Span dst) { if (typeof(T) == typeof(System.Single)) { - var typedDst = (System.Single[])(object)dst; - tensor.CopyTo(typedDst); + DataType dataType = tensor.GetDataType(); + if (dataType != DataType.Type_Float) + { + throw new InvalidOperationException(string.Format("Cannot copy source tensor {0} to managed type System.Single (DataType.Type_Float).", dataType)); + } + + Span tensorSpan = new Span(tensor.UnsafeGetData().ToPointer(), tensor.GetSize()); + tensorSpan.CopyTo(dst); // TODO: the CopyTo() function is susceptible to GC reclaiming tensor // during the method call. Use KeepAlive for now, and remove // after permanent fix in CopyTo(). diff --git a/src/Microsoft.ML.StandardLearners/Optimizer/SgdOptimizer.cs b/src/Microsoft.ML.StandardLearners/Optimizer/SgdOptimizer.cs index b814ee187e..2777cb077d 100644 --- a/src/Microsoft.ML.StandardLearners/Optimizer/SgdOptimizer.cs +++ b/src/Microsoft.ML.StandardLearners/Optimizer/SgdOptimizer.cs @@ -387,96 +387,102 @@ internal static bool ShouldTerminate(in VBuffer x, in VBuffer xpre Contracts.Assert(x.Length == xprev.Length, "Vectors must have the same dimensionality."); Contracts.Assert(FloatUtils.IsFinite(xprev.GetValues())); - if (!FloatUtils.IsFinite(x.GetValues())) + var xValues = x.GetValues(); + if (!FloatUtils.IsFinite(xValues)) return true; + var xprevValues = xprev.GetValues(); if (x.IsDense && xprev.IsDense) { - for (int i = 0; i < x.Length; i++) + for (int i = 0; i < xValues.Length; i++) { - if (x.Values[i] != xprev.Values[i]) + if (xValues[i] != xprevValues[i]) return false; } } else if (xprev.IsDense) { + var xIndices = x.GetIndices(); int j = 0; - for (int ii = 0; ii < x.Count; ii++) + for (int ii = 0; ii < xValues.Length; ii++) { - int i = x.Indices[ii]; + int i = xIndices[ii]; while (j < i) { - if (xprev.Values[j++] != 0) + if (xprevValues[j++] != 0) return false; } Contracts.Assert(i == j); - if (x.Values[ii] != xprev.Values[j++]) + if (xValues[ii] != xprevValues[j++]) return false; } - while (j < xprev.Length) + while (j < xprevValues.Length) { - if (xprev.Values[j++] != 0) + if (xprevValues[j++] != 0) return false; } } else if (x.IsDense) { + var xprevIndices = xprev.GetIndices(); int i = 0; - for (int jj = 0; jj < xprev.Count; jj++) + for (int jj = 0; jj < xprevValues.Length; jj++) { - int j = xprev.Indices[jj]; + int j = xprevIndices[jj]; while (i < j) { - if (x.Values[i++] != 0) + if (xValues[i++] != 0) return false; } Contracts.Assert(j == i); - if (x.Values[i++] != xprev.Values[jj]) + if (xValues[i++] != xprevValues[jj]) return false; } - while (i < x.Length) + while (i < xValues.Length) { - if (x.Values[i++] != 0) + if (xValues[i++] != 0) return false; } } else { // Both sparse. + var xIndices = x.GetIndices(); + var xprevIndices = xprev.GetIndices(); int ii = 0; int jj = 0; - while (ii < x.Count && jj < xprev.Count) + while (ii < xValues.Length && jj < xprevValues.Length) { - int i = x.Indices[ii]; - int j = xprev.Indices[jj]; + int i = xIndices[ii]; + int j = xprevIndices[jj]; if (i == j) { - if (x.Values[ii++] != xprev.Values[jj++]) + if (xValues[ii++] != xprevValues[jj++]) return false; } else if (i < j) { - if (x.Values[ii++] != 0) + if (xValues[ii++] != 0) return false; } else { - if (xprev.Values[jj++] != 0) + if (xprevValues[jj++] != 0) return false; } } - while (ii < x.Count) + while (ii < xValues.Length) { - if (x.Values[ii++] != 0) + if (xValues[ii++] != 0) return false; } - while (jj < xprev.Count) + while (jj < xprevValues.Length) { - if (xprev.Values[jj++] != 0) + if (xprevValues[jj++] != 0) return false; } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearPredictorUtils.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearPredictorUtils.cs index 244019bcdd..69740fa7fe 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LinearPredictorUtils.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LinearPredictorUtils.cs @@ -51,7 +51,7 @@ public static void SaveAsCode(TextWriter writer, in VBuffer weights, Floa writer.Write(FloatUtils.ToRoundTripString(value)); writer.Write("*"); - if (featureNames.Count > 0) + if (featureNames.GetValues().Length > 0) writer.Write(FeatureNameAsCode(featureNames.GetItemOrDefault(idx).ToString(), idx)); else writer.Write("f_" + idx); @@ -118,7 +118,7 @@ public static string LinearModelAsIni(in VBuffer weights, Float bias, IPr var name = featureNames.GetItemOrDefault(idx); inputBuilder.AppendLine("[Input:" + numNonZeroWeights + "]"); - inputBuilder.AppendLine("Name=" + (featureNames.Count == 0 ? "Feature_" + idx : name.IsEmpty ? $"f{idx}" : name.ToString())); + inputBuilder.AppendLine("Name=" + (featureNames.GetValues().Length == 0 ? "Feature_" + idx : name.IsEmpty ? $"f{idx}" : name.ToString())); inputBuilder.AppendLine("Transform=linear"); inputBuilder.AppendLine("Slope=1"); inputBuilder.AppendLine("Intercept=0"); diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 60c81b0ed1..0492cbc708 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -157,8 +157,9 @@ protected override float AccumulateOneGradient(in VBuffer feat, float lab VectorUtils.AddMultWithOffset(in feat, mult, ref grad, 1); // Note that 0th L-BFGS weight is for bias. // Add bias using this strange trick that has advantage of working well for dense and sparse arrays. // Due to the call to EnsureBiases, we know this region is dense. - Contracts.Assert(grad.Count >= BiasCount && (grad.IsDense || grad.Indices[BiasCount - 1] == BiasCount - 1)); - grad.Values[0] += mult; + var mutation = VBufferMutationContext.CreateFromBuffer(ref grad); + Contracts.Assert(mutation.Values.Length >= BiasCount && (grad.IsDense || mutation.Indices[BiasCount - 1] == BiasCount - 1)); + mutation.Values[0] += mult; return weight * datumLoss; } @@ -298,7 +299,7 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. // Increment the first entry of hessian. hessian[0] += variance; - var values = cursor.Features.Values; + var values = cursor.Features.GetValues(); if (cursor.Features.IsDense) { int ioff = 1; @@ -324,8 +325,8 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor. } else { - var indices = cursor.Features.Indices; - for (int ii = 0; ii < cursor.Features.Count; ++ii) + var indices = cursor.Features.GetIndices(); + for (int ii = 0; ii < values.Length; ++ii) { int i = indices[ii]; int wi = i + 1; diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index d8cbb246f1..d2273ca251 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -218,8 +218,9 @@ protected override float AccumulateOneGradient(in VBuffer feat, float lab float mult = weight * (modelProb - probLabel); VectorUtils.AddMultWithOffset(in feat, mult, ref grad, start); // Due to the call to EnsureBiases, we know this region is dense. - Contracts.Assert(grad.Count >= BiasCount && (grad.IsDense || grad.Indices[BiasCount - 1] == BiasCount - 1)); - grad.Values[c] += mult; + var mutation = VBufferMutationContext.CreateFromBuffer(ref grad); + Contracts.Assert(mutation.Values.Length >= BiasCount && (grad.IsDense || mutation.Indices[BiasCount - 1] == BiasCount - 1)); + mutation.Values[c] += mult; } Contracts.Check(FloatUtils.IsFinite(datumLoss), "Data contain bad values."); @@ -663,10 +664,10 @@ protected override void SaveCore(ModelSaveContext ctx) int count = 0; foreach (var fw in _weights) { + var fwValues = fw.GetValues(); if (fw.IsDense) { - var fwValues = fw.GetValues(); - for (int i = 0; i < fw.Length; i++) + for (int i = 0; i < fwValues.Length; i++) { if (fwValues[i] != 0) { @@ -677,8 +678,8 @@ protected override void SaveCore(ModelSaveContext ctx) } else { - ctx.Writer.WriteSinglesNoCount(fw.GetValues()); - count += fw.Count; + ctx.Writer.WriteSinglesNoCount(fwValues); + count += fwValues.Length; } } Host.Assert(count == numIndices); diff --git a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs index 1eeb043c01..812e114091 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/ModelStatistics.cs @@ -106,7 +106,7 @@ internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance, in VBuffer coeffStdError) : this(env, trainingExampleCount, paramCount, deviance, nullDeviance) { - _env.Assert(coeffStdError.Count == _paramCount); + _env.Assert(coeffStdError.GetValues().Length == _paramCount); _coeffStdError = coeffStdError; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs index 8de4a4dd8e..970363cf32 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs @@ -133,20 +133,22 @@ protected override MultiClassNaiveBayesPredictor TrainModelCore(TrainContext con labelHistogram[cursor.Label] += 1; labelCount = labelCount < size ? size : labelCount; + var featureValues = cursor.Features.GetValues(); if (cursor.Features.IsDense) { - for (int i = 0; i < cursor.Features.Count; i += 1) + for (int i = 0; i < featureValues.Length; i += 1) { - if (cursor.Features.Values[i] > 0) + if (featureValues[i] > 0) featureHistogram[cursor.Label][i] += 1; } } else { - for (int i = 0; i < cursor.Features.Count; i += 1) + var featureIndices = cursor.Features.GetIndices(); + for (int i = 0; i < featureValues.Length; i += 1) { - if (cursor.Features.Values[i] > 0) - featureHistogram[cursor.Label][cursor.Features.Indices[i]] += 1; + if (featureValues[i] > 0) + featureHistogram[cursor.Label][featureIndices[i]] += 1; } } @@ -374,7 +376,12 @@ private void ComputeLabelProbabilityFromFeature(double labelOccurrenceCount, int private void Map(in VBuffer src, ref VBuffer dst) { Host.Check(src.Length == _featureCount, "Invalid number of features passed."); - float[] labelScores = (dst.Length >= _labelCount) ? dst.Values : new float[_labelCount]; + + var srcValues = src.GetValues(); + var srcIndices = src.GetIndices(); + + var mutation = VBufferMutationContext.Create(ref dst, _labelCount); + Span labelScores = mutation.Values; for (int iLabel = 0; iLabel < _labelCount; iLabel += 1) { double labelOccurrenceCount = _labelHistogram[iLabel]; @@ -384,18 +391,18 @@ private void Map(in VBuffer src, ref VBuffer dst) { if (src.IsDense) { - for (int iFeature = 0; iFeature < src.Count; iFeature += 1) + for (int iFeature = 0; iFeature < srcValues.Length; iFeature += 1) { ComputeLabelProbabilityFromFeature(labelOccurrenceCount, iLabel, iFeature, - src.Values[iFeature], ref logProb, ref absentFeatureLogProb); + srcValues[iFeature], ref logProb, ref absentFeatureLogProb); } } else { - for (int iFeature = 0; iFeature < src.Count; iFeature += 1) + for (int iFeature = 0; iFeature < srcValues.Length; iFeature += 1) { - ComputeLabelProbabilityFromFeature(labelOccurrenceCount, iLabel, src.Indices[iFeature], - src.Values[iFeature], ref logProb, ref absentFeatureLogProb); + ComputeLabelProbabilityFromFeature(labelOccurrenceCount, iLabel, srcIndices[iFeature], + srcValues[iFeature], ref logProb, ref absentFeatureLogProb); } } } @@ -404,7 +411,7 @@ private void Map(in VBuffer src, ref VBuffer dst) (float)(logProb + (_absentFeaturesLogProb[iLabel] - absentFeatureLogProb)); } - dst = new VBuffer(_labelCount, labelScores, dst.Indices); + mutation.Complete(ref dst); } } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs index 716bd1c4fc..e6590050f0 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs @@ -119,7 +119,8 @@ private void BeginBatch() _batch++; _numBatchExamples = 0; _biasUpdate = 0; - _weightsUpdate = new VBuffer(_weightsUpdate.Length, 0, _weightsUpdate.Values, _weightsUpdate.Indices); + VBufferMutationContext.Create(ref _weightsUpdate, _weightsUpdate.Length, 0) + .Complete(ref _weightsUpdate); } private void FinishBatch(in VBuffer weightsUpdate, Float weightsUpdateScale) @@ -147,7 +148,7 @@ public override void ProcessDataInstance(IChannel ch, in VBuffer feat, Fl Float currentBiasUpdate = trueOutput * weight; _biasUpdate += currentBiasUpdate; // Only aggregate in the case where we're handling multiple instances. - if (_weightsUpdate.Count == 0) + if (_weightsUpdate.GetValues().Length == 0) { VectorUtils.ScaleInto(in feat, currentBiasUpdate, ref _weightsUpdate); _weightsUpdateScale = 1; @@ -160,7 +161,7 @@ public override void ProcessDataInstance(IChannel ch, in VBuffer feat, Fl { if (_batchSize == 1 && loss < 0) { - Contracts.Assert(_weightsUpdate.Count == 0); + Contracts.Assert(_weightsUpdate.GetValues().Length == 0); // If we aren't aggregating multiple instances, just use the instance's // vector directly. Float currentBiasUpdate = trueOutput * weight; diff --git a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs index 606b09b341..14608d37c0 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs @@ -138,8 +138,9 @@ protected override float AccumulateOneGradient(in VBuffer feat, float lab float mult = -(y - lambda) * weight; VectorUtils.AddMultWithOffset(in feat, mult, ref grad, 1); // Due to the call to EnsureBiases, we know this region is dense. - Contracts.Assert(grad.Count >= BiasCount && (grad.IsDense || grad.Indices[BiasCount - 1] == BiasCount - 1)); - grad.Values[0] += mult; + var mutation = VBufferMutationContext.CreateFromBuffer(ref grad); + Contracts.Assert(mutation.Values.Length >= BiasCount && (grad.IsDense || mutation.Indices[BiasCount - 1] == BiasCount - 1)); + mutation.Values[0] += mult; // From the computer's perspective exp(infinity)==infinity // so inf-inf=nan, but in reality, infinity is just a large // number we can't represent, and exp(X)-X for X=inf is just inf. diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs index cb10bef433..b7b8c32804 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs @@ -829,10 +829,11 @@ protected virtual void TrainWithoutLock(IProgressChannelProvider progress, Float : 0; } + var featureValues = features.GetValues(); if (features.IsDense) - CpuMathUtils.SdcaL1UpdateDense(primalUpdate, features.Count, features.GetValues(), l1Threshold, l1IntermediateWeights[0].Values, weights[0].Values); - else if (features.Count > 0) - CpuMathUtils.SdcaL1UpdateSparse(primalUpdate, features.Count, features.GetValues(), features.GetIndices(), l1Threshold, l1IntermediateWeights[0].Values, weights[0].Values); + CpuMathUtils.SdcaL1UpdateDense(primalUpdate, featureValues.Length, featureValues, l1Threshold, l1IntermediateWeights[0].Values, weights[0].Values); + else if (featureValues.Length > 0) + CpuMathUtils.SdcaL1UpdateSparse(primalUpdate, featureValues.Length, featureValues, features.GetIndices(), l1Threshold, l1IntermediateWeights[0].Values, weights[0].Values); } break; diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs index 37023f42c5..b5158bcce6 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs @@ -240,10 +240,11 @@ protected override void TrainWithoutLock(IProgressChannelProvider progress, Floa : 0; } + var featureValues = features.GetValues(); if (features.IsDense) - CpuMathUtils.SdcaL1UpdateDense(-primalUpdate, features.Count, features.GetValues(), l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); - else if (features.Count > 0) - CpuMathUtils.SdcaL1UpdateSparse(-primalUpdate, features.Count, features.GetValues(), features.GetIndices(), l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); + CpuMathUtils.SdcaL1UpdateDense(-primalUpdate, featureValues.Length, featureValues, l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); + else if (featureValues.Length > 0) + CpuMathUtils.SdcaL1UpdateSparse(-primalUpdate, featureValues.Length, featureValues, features.GetIndices(), l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); } break; @@ -267,10 +268,11 @@ protected override void TrainWithoutLock(IProgressChannelProvider progress, Floa ? intermediateBias - Math.Sign(intermediateBias) * l1Threshold : 0; + var featureValues = features.GetValues(); if (features.IsDense) - CpuMathUtils.SdcaL1UpdateDense(labelPrimalUpdate, features.Count, features.GetValues(), l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); - else if (features.Count > 0) - CpuMathUtils.SdcaL1UpdateSparse(labelPrimalUpdate, features.Count, features.GetValues(), features.GetIndices(), l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); + CpuMathUtils.SdcaL1UpdateDense(labelPrimalUpdate, featureValues.Length, featureValues, l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); + else if (featureValues.Length > 0) + CpuMathUtils.SdcaL1UpdateSparse(labelPrimalUpdate, featureValues.Length, featureValues, features.GetIndices(), l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); } rowCount++; diff --git a/src/Microsoft.ML.TensorFlow/TensorFlow/TensorflowUtils.cs b/src/Microsoft.ML.TensorFlow/TensorFlow/TensorflowUtils.cs index 8ac5e532a3..ce33404e32 100644 --- a/src/Microsoft.ML.TensorFlow/TensorFlow/TensorflowUtils.cs +++ b/src/Microsoft.ML.TensorFlow/TensorFlow/TensorflowUtils.cs @@ -115,8 +115,12 @@ public static ISchema GetModelSchema(IExceptionContext ectx, string modelFile) Contracts.Assert(metadataType.IsKnownSizeVector && metadataType.ItemType.IsText); schema.GetMetadata(TensorFlowUtils.InputOps, i, ref inputOps); } - yield return (name, opType.ToString(), type, - Utils.Size(inputOps.Values) > 0 ? inputOps.Values.Select(input => input.ToString()).ToArray() : new string[0]); + var inputOpsValues = inputOps.GetValues(); + string[] inputOpsResult = new string[inputOpsValues.Length]; + for (int j = 0; j < inputOpsValues.Length; j++) + inputOpsResult[j] = inputOpsValues[j].ToString(); + + yield return (name, opType.ToString(), type, inputOpsResult); } } @@ -328,16 +332,10 @@ internal static TFSession GetSession(IHostEnvironment env, string modelPath) return LoadTFSession(env, bytes, modelPath); } - internal static unsafe void FetchData(IntPtr data, T[] result) + internal static unsafe void FetchData(IntPtr data, Span result) { - var size = result.Length; - - GCHandle handle = GCHandle.Alloc(result, GCHandleType.Pinned); - IntPtr target = handle.AddrOfPinnedObject(); - - Int64 sizeInBytes = size * Marshal.SizeOf((typeof(T))); - Buffer.MemoryCopy(data.ToPointer(), target.ToPointer(), sizeInBytes, sizeInBytes); - handle.Free(); + var dataSpan = new Span(data.ToPointer(), result.Length); + dataSpan.CopyTo(result); } internal static bool IsTypeSupported(TFDataType tfoutput) diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index 8c0a079dd8..6717527e7a 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -927,12 +927,9 @@ private Delegate MakeGetter(IRow input, int iinfo, ITensorValueGetter[] srcTe var tensor = outputCache.Outputs[_parent.Outputs[iinfo]]; var tensorSize = tensor.Shape.Where(x => x > 0).Aggregate((x, y) => x * y); - var values = dst.Values; - if (Utils.Size(values) < tensorSize) - values = new T[tensorSize]; - - TensorFlowUtils.FetchData(tensor.Data, values); - dst = new VBuffer(values.Length, values, dst.Indices); + var mutation = VBufferMutationContext.Create(ref dst, (int)tensorSize); + TensorFlowUtils.FetchData(tensor.Data, mutation.Values); + mutation.Complete(ref dst); }; return valuegetter; } @@ -1058,7 +1055,7 @@ private class TensorValueGetterVec : ITensorValueGetter private readonly ValueGetter> _srcgetter; private readonly TFShape _tfShape; private VBuffer _vBuffer; - private VBuffer _vBufferDense; + private T[] _denseData; private readonly T[] _bufferedData; private int _position; @@ -1067,7 +1064,7 @@ public TensorValueGetterVec(IRow input, int colIndex, TFShape tfShape) _srcgetter = input.GetGetter>(colIndex); _tfShape = tfShape; _vBuffer = default; - _vBufferDense = default; + _denseData = default; long size = 0; _position = 0; @@ -1083,8 +1080,11 @@ public TensorValueGetterVec(IRow input, int colIndex, TFShape tfShape) public TFTensor GetTensor() { _srcgetter(ref _vBuffer); - _vBuffer.CopyToDense(ref _vBufferDense); - return TFTensor.Create(_vBufferDense.Values, _vBufferDense.Length, _tfShape); + + Utils.EnsureSize(ref _denseData, _vBuffer.Length, keepOld: false); + _vBuffer.CopyTo(_denseData); + + return TFTensor.Create(_denseData, _vBuffer.Length, _tfShape); } public void BufferTrainingData() From 2eff656c519ccb4026df606fc6b642f6bda0da91 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Wed, 7 Nov 2018 23:53:33 -0600 Subject: [PATCH 04/14] Remove out bools on VBufferMutationContext.Create. --- src/Microsoft.ML.Core/Data/VBuffer.cs | 41 +++++-------------- .../Utilities/VBufferUtils.cs | 5 +-- .../Depricated/Vector/VBufferMathUtils.cs | 7 +--- 3 files changed, 14 insertions(+), 39 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/VBuffer.cs b/src/Microsoft.ML.Core/Data/VBuffer.cs index f909fba51b..c28abdee43 100644 --- a/src/Microsoft.ML.Core/Data/VBuffer.cs +++ b/src/Microsoft.ML.Core/Data/VBuffer.cs @@ -473,13 +473,16 @@ public T GetItemOrDefault(int slot) public override string ToString() => IsDense ? $"Dense vector of size {Length}" : $"Sparse vector of size {Length}, {Count} explicit values"; + internal VBufferMutationContext GetMutableContext() + { + return GetMutableContext(Length, Count, null, false); + } + internal VBufferMutationContext GetMutableContext( int newLogicalLength, int? valuesCount, int? maxValuesCapacity, - bool keepOldOnResize, - out bool createdNewValues, - out bool createdNewIndices) + bool keepOldOnResize) { Contracts.CheckParam(newLogicalLength >= 0, nameof(newLogicalLength)); Contracts.CheckParam(valuesCount == null || valuesCount.Value <= newLogicalLength, nameof(valuesCount)); @@ -488,10 +491,12 @@ internal VBufferMutationContext GetMutableContext( int maxCapacity = maxValuesCapacity ?? newLogicalLength; T[] values = _values; + bool createdNewValues; Utils.EnsureSize(ref values, valuesCount.Value, maxCapacity, keepOldOnResize, out createdNewValues); int[] indices = _indices; bool isDense = newLogicalLength == valuesCount.Value; + bool createdNewIndices; if (isDense) { createdNewIndices = false; @@ -516,36 +521,12 @@ public static class VBufferMutationContext public static VBufferMutationContext CreateFromBuffer( ref VBuffer destination) { - return destination.GetMutableContext( - destination.Length, - destination.Count, - maxValuesCapacity: null, - keepOldOnResize: false, - out bool _, - out bool _); - } - - public static VBufferMutationContext Create( - ref VBuffer destination, - int newLogicalLength, - int? valuesCount = null, - int? maxValuesCapacity = null, - bool keepOldOnResize = false) - { - return destination.GetMutableContext( - newLogicalLength, - valuesCount, - maxValuesCapacity, - keepOldOnResize, - out bool _, - out bool _); + return destination.GetMutableContext(); } public static VBufferMutationContext Create( ref VBuffer destination, int newLogicalLength, - out bool createdNewValues, - out bool createdNewIndices, int? valuesCount = null, int? maxValuesCapacity = null, bool keepOldOnResize = false) @@ -554,9 +535,7 @@ public static VBufferMutationContext Create( newLogicalLength, valuesCount, maxValuesCapacity, - keepOldOnResize, - out createdNewValues, - out createdNewIndices); + keepOldOnResize); } } diff --git a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs index 819d421690..ba8a427b48 100644 --- a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs @@ -436,10 +436,9 @@ public static void Densify(ref VBuffer dst) var values = dst.GetValues(); var mutation = VBufferMutationContext.Create( ref dst, - dst.Length, - out bool createdNewValues, out bool _); + dst.Length); - if (!createdNewValues) + if (!mutation.CreatedNewValues) { // Densify in place. for (int i = values.Length; --i >= 0; ) diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs index 0a0fa3255e..c025fc6c84 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs @@ -383,11 +383,8 @@ public static void ScaleInto(in VBuffer src, Float c, ref VBuffer if (src.Length > 0 && src.IsDense) { // Due to sparsity preservation from src, dst must be dense, in the same way. - var mutation = VBufferMutationContext.Create(ref dst, - src.Length, - out bool createdNewValues, - out bool _); - if (!createdNewValues) // We need to clear it + var mutation = VBufferMutationContext.Create(ref dst, src.Length); + if (!mutation.CreatedNewValues) // We need to clear it mutation.Values.Clear(); mutation.Complete(ref dst); } From 35a0ccc4a40259278e1874ff14299b586e9c2360 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 8 Nov 2018 02:15:06 -0600 Subject: [PATCH 05/14] Remove the rest of the VBuffer.Count usages in ML.Data --- src/Microsoft.ML.Core/Data/VBuffer.cs | 30 +++-- .../Depricated/Vector/VBufferMathUtils.cs | 110 ++++++++-------- .../Depricated/Vector/VectorUtils.cs | 68 +++++----- .../Evaluators/EvaluatorUtils.cs | 58 +++++---- .../Transforms/HashTransform.cs | 120 ++++++++++-------- .../Transforms/InvertHashUtils.cs | 2 +- .../Transforms/KeyToValueTransform.cs | 24 ++-- .../Transforms/KeyToVectorTransform.cs | 28 ++-- .../Transforms/NormalizeColumnSng.cs | 71 +++++------ .../Transforms/TermTransformImpl.cs | 50 ++++---- .../Utilities/SlotDropper.cs | 15 ++- src/Microsoft.ML.Ensemble/EnsembleUtils.cs | 3 +- 12 files changed, 310 insertions(+), 269 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/VBuffer.cs b/src/Microsoft.ML.Core/Data/VBuffer.cs index c28abdee43..bca9d5fe6b 100644 --- a/src/Microsoft.ML.Core/Data/VBuffer.cs +++ b/src/Microsoft.ML.Core/Data/VBuffer.cs @@ -475,14 +475,15 @@ public override string ToString() internal VBufferMutationContext GetMutableContext() { - return GetMutableContext(Length, Count, null, false); + return GetMutableContext(Length, Count, null, false, false); } internal VBufferMutationContext GetMutableContext( int newLogicalLength, int? valuesCount, int? maxValuesCapacity, - bool keepOldOnResize) + bool keepOldOnResize, + bool requireIndicesOnDense) { Contracts.CheckParam(newLogicalLength >= 0, nameof(newLogicalLength)); Contracts.CheckParam(valuesCount == null || valuesCount.Value <= newLogicalLength, nameof(valuesCount)); @@ -497,7 +498,7 @@ internal VBufferMutationContext GetMutableContext( int[] indices = _indices; bool isDense = newLogicalLength == valuesCount.Value; bool createdNewIndices; - if (isDense) + if (isDense && !requireIndicesOnDense) { createdNewIndices = false; } @@ -511,6 +512,7 @@ internal VBufferMutationContext GetMutableContext( valuesCount.Value, values, indices, + requireIndicesOnDense, createdNewValues, createdNewIndices); } @@ -529,13 +531,15 @@ public static VBufferMutationContext Create( int newLogicalLength, int? valuesCount = null, int? maxValuesCapacity = null, - bool keepOldOnResize = false) + bool keepOldOnResize = false, + bool requireIndicesOnDense = false) { return destination.GetMutableContext( newLogicalLength, valuesCount, maxValuesCapacity, - keepOldOnResize); + keepOldOnResize, + requireIndicesOnDense); } } @@ -555,6 +559,7 @@ internal VBufferMutationContext(int logicalLength, int physicalValuesCount, T[] values, int[] indices, + bool requireIndicesOnDense, bool createdNewValues, bool createdNewIndices) { @@ -565,15 +570,22 @@ internal VBufferMutationContext(int logicalLength, bool isDense = logicalLength == physicalValuesCount; Values = _values.AsSpan(0, physicalValuesCount); - Indices = isDense ? default : _indices.AsSpan(0, physicalValuesCount); + Indices = !isDense || requireIndicesOnDense ? _indices.AsSpan(0, physicalValuesCount) : default; CreatedNewValues = createdNewValues; CreatedNewIndices = createdNewIndices; } - public void Complete(ref VBuffer destintation) + public void Complete(ref VBuffer destintation, int? physicalValuesCount = null) { - destintation = new VBuffer(_logicalLength, Values.Length, _values, _indices); + int count = Values.Length; + if (physicalValuesCount.HasValue) + { + Contracts.Check(physicalValuesCount.Value <= count, "Updating physicalValuesCount during Complete cannot be greater than the original physicalValuesCount value used in Create."); + count = physicalValuesCount.Value; + } + + destintation = new VBuffer(_logicalLength, count, _values, _indices); } } -} +} \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs index c025fc6c84..1e4fc4bb67 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs @@ -20,9 +20,10 @@ public static partial class VectorUtils /// public static Float NormSquared(in VBuffer a) { - if (a.Count == 0) + var aValues = a.GetValues(); + if (aValues.Length == 0) return 0; - return CpuMathUtils.SumSq(a.GetValues()); + return CpuMathUtils.SumSq(aValues); } /// @@ -48,9 +49,10 @@ public static Float Norm(in VBuffer a) /// L1 norm of the vector public static Float L1Norm(in VBuffer a) { - if (a.Count == 0) + var aValues = a.GetValues(); + if (aValues.Length == 0) return 0; - return CpuMathUtils.SumAbs(a.GetValues()); + return CpuMathUtils.SumAbs(aValues); } /// @@ -59,9 +61,10 @@ public static Float L1Norm(in VBuffer a) /// L-infinity norm of the vector public static Float MaxNorm(in VBuffer a) { - if (a.Count == 0) + var aValues = a.GetValues(); + if (aValues.Length == 0) return 0; - return CpuMathUtils.MaxAbs(a.GetValues()); + return CpuMathUtils.MaxAbs(aValues); } /// @@ -69,9 +72,10 @@ public static Float MaxNorm(in VBuffer a) /// public static Float Sum(in VBuffer a) { - if (a.Count == 0) + var aValues = a.GetValues(); + if (aValues.Length == 0) return 0; - return CpuMathUtils.Sum(a.GetValues()); + return CpuMathUtils.Sum(aValues); } /// @@ -81,9 +85,9 @@ public static Float Sum(in VBuffer a) /// Value to multiply vector with public static void ScaleBy(ref VBuffer dst, Float c) { - if (c == 1 || dst.Count == 0) + if (c == 1 || dst.GetValues().Length == 0) return; - var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dst.Count); + var mutation = VBufferMutationContext.CreateFromBuffer(ref dst); if (c != 0) CpuMathUtils.Scale(c, mutation.Values); else // Maintain density of dst. @@ -98,7 +102,8 @@ public static void ScaleBy(ref VBuffer dst, Float c) public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float c) { int length = src.Length; - int count = src.Count; + var srcValues = src.GetValues(); + int count = srcValues.Length; if (count == 0) { @@ -116,7 +121,7 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float if (c == 0) mutation.Values.Clear(); else - CpuMathUtils.Scale(c, src.GetValues(), mutation.Values, length); + CpuMathUtils.Scale(c, srcValues, mutation.Values, length); mutation.Complete(ref dst); } else @@ -126,7 +131,7 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float if (c == 0) mutation.Values.Clear(); else - CpuMathUtils.Scale(c, src.GetValues(), mutation.Values, count); + CpuMathUtils.Scale(c, srcValues, mutation.Values, count); mutation.Complete(ref dst); } } @@ -138,16 +143,17 @@ public static void Add(in VBuffer src, ref VBuffer dst) { Contracts.Check(src.Length == dst.Length, "Vectors must have the same dimensionality."); - if (src.Count == 0) + var srcValues = src.GetValues(); + if (srcValues.Length == 0) return; if (dst.IsDense) { var mutation = VBufferMutationContext.Create(ref dst, dst.Length); if (src.IsDense) - CpuMathUtils.Add(src.GetValues(), mutation.Values, src.Length); + CpuMathUtils.Add(srcValues, mutation.Values, src.Length); else - CpuMathUtils.Add(src.GetValues(), src.GetIndices(), mutation.Values, src.Count); + CpuMathUtils.Add(srcValues, src.GetIndices(), mutation.Values, srcValues.Length); return; } // REVIEW: Should we use SSE for any of these possibilities? @@ -165,16 +171,17 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds { Contracts.Check(src.Length == dst.Length, "Vectors must have the same dimensionality."); - if (src.Count == 0 || c == 0) + var srcValues = src.GetValues(); + if (srcValues.Length == 0 || c == 0) return; if (dst.IsDense) { var mutation = VBufferMutationContext.Create(ref dst, dst.Length); if (src.IsDense) - CpuMathUtils.AddScale(c, src.GetValues(), mutation.Values, src.Length); + CpuMathUtils.AddScale(c, srcValues, mutation.Values, src.Length); else - CpuMathUtils.AddScale(c, src.GetValues(), src.GetIndices(), mutation.Values, src.Count); + CpuMathUtils.AddScale(c, srcValues, src.GetIndices(), mutation.Values, srcValues.Length); return; } // REVIEW: Should we use SSE for any of these possibilities? @@ -190,7 +197,8 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds Contracts.Check(src.Length == dst.Length, "Vectors must have the same dimensionality."); int length = src.Length; - if (src.Count == 0 || c == 0) + var srcValues = src.GetValues(); + if (srcValues.Length == 0 || c == 0) { // src is zero vector, res = dst dst.CopyTo(ref res); @@ -201,7 +209,7 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds if (dst.IsDense && src.IsDense) { var mutation = VBufferMutationContext.Create(ref res, length); - CpuMathUtils.AddScaleCopy(c, src.GetValues(), dst.GetValues(), mutation.Values, length); + CpuMathUtils.AddScaleCopy(c, srcValues, dst.GetValues(), mutation.Values, length); mutation.Complete(ref res); return; } @@ -218,9 +226,9 @@ public static void AddMultInto(in VBuffer a, Float c, in VBuffer b { Contracts.Check(a.Length == b.Length, "Vectors must have the same dimensionality."); - if (c == 0 || b.Count == 0) + if (c == 0 || b.GetValues().Length == 0) a.CopyTo(ref dst); - else if (a.Count == 0) + else if (a.GetValues().Length == 0) ScaleInto(in b, c, ref dst); else VBufferUtils.ApplyInto(in a, in b, ref dst, (ind, v1, v2) => v1 + c * v2); @@ -237,7 +245,8 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer Contracts.CheckParam(0 <= offset && offset <= dst.Length, nameof(offset)); Contracts.CheckParam(src.Length <= dst.Length - offset, nameof(offset)); - if (src.Count == 0 || c == 0) + var srcValues = src.GetValues(); + if (srcValues.Length == 0 || c == 0) return; VBufferMutationContext mutation; Span values; @@ -247,9 +256,9 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer mutation = VBufferMutationContext.Create(ref dst, dst.Length); values = mutation.Values.Slice(offset); if (src.IsDense) - CpuMathUtils.AddScale(c, src.GetValues(), values, src.Count); + CpuMathUtils.AddScale(c, srcValues, values, srcValues.Length); else - CpuMathUtils.AddScale(c, src.GetValues(), src.GetIndices(), values, src.Count); + CpuMathUtils.AddScale(c, srcValues, src.GetIndices(), values, srcValues.Length); return; } // REVIEW: Perhaps implementing an ApplyInto with an offset would be more @@ -259,8 +268,8 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer // are often better off going into a dense vector in all applications of interest to us. // Correspondingly, this implementation will be functional, but not optimized. var dstIndices = dst.GetIndices(); - int dMin = dst.Count == 0 ? 0 : Utils.FindIndexSorted(dstIndices, 0, dst.Count, offset); - int dLim = dst.Count == 0 ? 0 : Utils.FindIndexSorted(dstIndices, dMin, dst.Count, offset + src.Length); + int dMin = dstIndices.Length == 0 ? 0 : dstIndices.FindIndexSorted(0, dstIndices.Length, offset); + int dLim = dstIndices.Length == 0 ? 0 : dstIndices.FindIndexSorted(dMin, dstIndices.Length, offset + src.Length); Contracts.Assert(dMin - dLim <= src.Length); // First get the number of extra values that we will need to accomodate. int gapCount; @@ -268,9 +277,9 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer gapCount = src.Length - (dLim - dMin); else { - gapCount = src.Count; + gapCount = srcValues.Length; var srcIndices = src.GetIndices(); - for (int iS = 0, iD = dMin; iS < src.Count && iD < dLim; ) + for (int iS = 0, iD = dMin; iS < srcIndices.Length && iD < dLim; ) { var comp = srcIndices[iS] - dstIndices[iD] + offset; if (comp < 0) // dst index is larger. @@ -286,29 +295,28 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer } } // Extend dst so that it has room for this additional stuff. Shift things over as well. + var dstValues = dst.GetValues(); mutation = VBufferMutationContext.Create(ref dst, dst.Length, - dst.Count + gapCount, + dstValues.Length + gapCount, keepOldOnResize: true); var indices = mutation.Indices; values = mutation.Values; if (gapCount > 0) { // Shift things over, unless there's nothing to shift over, or no new elements are being introduced anyway. - if (dst.Count != dLim) + if (dstValues.Length != dLim) { - Contracts.Assert(dLim < dst.Count); - indices.Slice(dLim, dst.Count - dLim) + Contracts.Assert(dLim < dstValues.Length); + indices.Slice(dLim, dstValues.Length - dLim) .CopyTo(indices.Slice(dLim + gapCount)); - values.Slice(dLim, dst.Count - dLim) + values.Slice(dLim, dstValues.Length - dLim) .CopyTo(values.Slice(dLim + gapCount)); } } // Now, fill in the stuff in this "gap." Both of these implementations work // backwards from the end, since they can potentially be working in place if // the EnsureSize calls did not actually result in a new array. - var srcValues = src.GetValues(); - var dstValues = dst.GetValues(); if (src.IsDense) { // dst is sparse, src is dense. @@ -330,8 +338,8 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer { // Both dst and src are sparse. int iD = dLim - 1; - int iS = src.Count - 1; var srcIndices = src.GetIndices(); + int iS = srcIndices.Length - 1; int sIndex = iS < 0 ? -1 : srcIndices[iS]; int dIndex = iD < 0 ? -1 : dstIndices[iD] - offset; @@ -378,7 +386,7 @@ public static void ScaleInto(in VBuffer src, Float c, ref VBuffer // equal lengths, but I assume I don't care here. if (c == 1) src.CopyTo(ref dst); - else if (src.Count == 0 || c == 0) + else if (src.GetValues().Length == 0 || c == 0) { if (src.Length > 0 && src.IsDense) { @@ -404,13 +412,13 @@ public static int ArgMax(in VBuffer src) { if (src.Length == 0) return -1; - if (src.Count == 0) + var srcValues = src.GetValues(); + if (srcValues.Length == 0) return 0; - var srcValues = src.GetValues(); int ind = MathUtils.ArgMax(srcValues); // ind < 0 iff all explicit values are NaN. - Contracts.Assert(-1 <= ind && ind < src.Count); + Contracts.Assert(-1 <= ind && ind < srcValues.Length); if (src.IsDense) return ind; @@ -429,10 +437,10 @@ public static int ArgMax(in VBuffer src) // All explicit values are non-positive or NaN, so return the first index not in src.Indices. ind = 0; - while (ind < src.Count && srcIndices[ind] == ind) + while (ind < srcIndices.Length && srcIndices[ind] == ind) ind++; - Contracts.Assert(ind <= src.Count); - Contracts.Assert(ind == src.Count || ind < srcIndices[ind]); + Contracts.Assert(ind <= srcIndices.Length); + Contracts.Assert(ind == srcIndices.Length || ind < srcIndices[ind]); return ind; } @@ -440,13 +448,13 @@ public static int ArgMin(in VBuffer src) { if (src.Length == 0) return -1; - if (src.Count == 0) + var srcValues = src.GetValues(); + if (srcValues.Length == 0) return 0; - var srcValues = src.GetValues(); int ind = MathUtils.ArgMin(srcValues); // ind < 0 iff all explicit values are NaN. - Contracts.Assert(-1 <= ind && ind < src.Count); + Contracts.Assert(-1 <= ind && ind < srcValues.Length); if (src.IsDense) return ind; @@ -465,10 +473,10 @@ public static int ArgMin(in VBuffer src) // All explicit values are non-negative or NaN, so return the first index not in srcIndices. ind = 0; - while (ind < src.Count && srcIndices[ind] == ind) + while (ind < srcIndices.Length && srcIndices[ind] == ind) ind++; - Contracts.Assert(ind <= src.Count); - Contracts.Assert(ind == src.Count || ind < srcIndices[ind]); + Contracts.Assert(ind <= srcIndices.Length); + Contracts.Assert(ind == srcIndices.Length || ind < srcIndices[ind]); return ind; } } diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs index ba549be4d3..b2ef37a820 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs @@ -30,30 +30,33 @@ public static Float DotProduct(Float[] a, Float[] b) public static Float DotProduct(Float[] a, in VBuffer b) { Contracts.Check(Utils.Size(a) == b.Length, "Vectors must have the same dimensionality."); - if (b.Count == 0) + var bValues = b.GetValues(); + if (bValues.Length == 0) return 0; if (b.IsDense) - return CpuMathUtils.DotProductDense(a, b.GetValues(), b.Length); - return CpuMathUtils.DotProductSparse(a, b.GetValues(), b.GetIndices(), b.Count); + return CpuMathUtils.DotProductDense(a, bValues, b.Length); + return CpuMathUtils.DotProductSparse(a, bValues, b.GetIndices(), bValues.Length); } public static Float DotProduct(in VBuffer a, in VBuffer b) { Contracts.Check(a.Length == b.Length, "Vectors must have the same dimensionality."); - if (a.Count == 0 || b.Count == 0) + var aValues = a.GetValues(); + var bValues = b.GetValues(); + if (aValues.Length == 0 || bValues.Length == 0) return 0; if (a.IsDense) { if (b.IsDense) - return CpuMathUtils.DotProductDense(a.GetValues(), b.GetValues(), a.Length); - return CpuMathUtils.DotProductSparse(a.GetValues(), b.GetValues(), b.GetIndices(), b.Count); + return CpuMathUtils.DotProductDense(aValues, bValues, a.Length); + return CpuMathUtils.DotProductSparse(aValues, bValues, b.GetIndices(), bValues.Length); } if (b.IsDense) - return CpuMathUtils.DotProductSparse(b.GetValues(), a.GetValues(), a.GetIndices(), a.Count); - return DotProductSparse(a.GetValues(), a.GetIndices(), 0, a.Count, b.GetValues(), b.GetIndices(), 0, b.Count); + return CpuMathUtils.DotProductSparse(bValues, aValues, a.GetIndices(), aValues.Length); + return DotProductSparse(aValues, a.GetIndices(), 0, aValues.Length, bValues, b.GetIndices(), 0, bValues.Length); } /// @@ -75,10 +78,12 @@ public static void SparsifyNormalize(ref VBuffer a, int top, int bottom, var bottomHeap = new Heap>((left, right) => right.Value > left.Value, bottom + 1); bool isDense = a.IsDense; - for (int i = 0; i < a.Count; i++) + var aValues = a.GetValues(); + var aIndices = a.GetIndices(); + for (int i = 0; i < aValues.Length; i++) { - int idx = isDense ? i : a.Indices[i]; - var value = a.Values[i]; + int idx = isDense ? i : aIndices[i]; + var value = aValues[i]; if (value < 0 && bottom > 0) { @@ -160,7 +165,7 @@ public static void MulElementWise(in VBuffer a, ref VBuffer dst) if (a.IsDense && dst.IsDense) { - var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dst.Count); + var mutation = VBufferMutationContext.CreateFromBuffer(ref dst); CpuMathUtils.MulElementWise(a.GetValues(), dst.GetValues(), mutation.Values, a.Length); } else @@ -250,30 +255,30 @@ public static Float DotProductWithOffset(in VBuffer a, int offset, in VBu Contracts.Check(0 <= offset && offset <= a.Length); Contracts.Check(b.Length <= a.Length - offset, "VBuffer b must be no longer than a.Length - offset."); - if (a.Count == 0 || b.Count == 0) + var aValues = a.GetValues(); + var bValues = b.GetValues(); + if (aValues.Length == 0 || bValues.Length == 0) return 0; if (a.IsDense) { if (b.IsDense) - return CpuMathUtils.DotProductDense(a.GetValues().Slice(offset), b.GetValues(), b.Length); - return CpuMathUtils.DotProductSparse(a.GetValues().Slice(offset), b.GetValues(), b.GetIndices(), b.Count); + return CpuMathUtils.DotProductDense(aValues.Slice(offset), bValues, b.Length); + return CpuMathUtils.DotProductSparse(aValues.Slice(offset), bValues, b.GetIndices(), bValues.Length); } else { Float result = 0; - var aValues = a.GetValues(); var aIndices = a.GetIndices(); - var bValues = b.GetValues(); - var bIndices = b.GetIndices(); - int aMin = Utils.FindIndexSorted(aIndices, 0, a.Count, offset); - int aLim = Utils.FindIndexSorted(aIndices, 0, a.Count, offset + b.Length); + int aMin = Utils.FindIndexSorted(aIndices, 0, aIndices.Length, offset); + int aLim = Utils.FindIndexSorted(aIndices, 0, aIndices.Length, offset + b.Length); if (b.IsDense) { for (int iA = aMin; iA < aLim; ++iA) result += aValues[iA] * bValues[aIndices[iA] - offset]; return result; } - for (int iA = aMin, iB = 0; iA < aLim && iB < b.Count; ) + var bIndices = b.GetIndices(); + for (int iA = aMin, iB = 0; iA < aLim && iB < bIndices.Length; ) { int aIndex = aIndices[iA]; int bIndex = bIndices[iB]; @@ -302,12 +307,13 @@ public static Float DotProductWithOffset(Float[] a, int offset, in VBuffer aValues, ReadOnlySpan aIndices, int ia, int iaLim, ReadOnlySpan bValues, ReadOnlySpan bIndices, int ib, int ibLim) @@ -440,16 +446,16 @@ public static void AddMult(in VBuffer src, Float[] dst, Float c) Contracts.CheckValue(dst, nameof(dst)); Contracts.CheckParam(src.Length == dst.Length, nameof(dst), "Arrays must have the same dimensionality."); - if (src.Count == 0 || c == 0) + var srcValues = src.GetValues(); + if (srcValues.Length == 0 || c == 0) return; - var srcValues = src.GetValues(); if (src.IsDense) - CpuMathUtils.AddScale(c, srcValues, dst, src.Count); + CpuMathUtils.AddScale(c, srcValues, dst, srcValues.Length); else { var srcIndices = src.GetIndices(); - for (int i = 0; i < src.Count; i++) + for (int i = 0; i < srcValues.Length; i++) dst[srcIndices[i]] += c * srcValues[i]; } } @@ -468,10 +474,10 @@ public static void AddMultWithOffset(in VBuffer src, Float[] dst, int off Contracts.Check(0 <= offset && offset <= dst.Length); Contracts.Check(src.Length <= dst.Length - offset, "Vector src must be no longer than dst.Length - offset."); - if (src.Count == 0 || c == 0) + var srcValues = src.GetValues(); + if (srcValues.Length == 0 || c == 0) return; - var srcValues = src.GetValues(); if (src.IsDense) { for (int i = 0; i < src.Length; i++) @@ -480,7 +486,7 @@ public static void AddMultWithOffset(in VBuffer src, Float[] dst, int off else { var srcIndices = src.GetIndices(); - for (int i = 0; i < src.Count; i++) + for (int i = 0; i < srcValues.Length; i++) dst[srcIndices[i] + offset] += c * srcValues[i]; } } diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs index affee40f9d..071d1e02b0 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs @@ -691,35 +691,34 @@ public static void ReconcileVectorKeyValues(IHostEnvironment env, IDataView[] vi ValueMapper, VBuffer> mapper = (in VBuffer src, ref VBuffer dst) => { - var values = dst.Values; - if (Utils.Size(values) < src.Count) - values = new uint[src.Count]; + var srcValues = src.GetValues(); + var mutation = VBufferMutationContext.Create( + ref dst, + src.Length, + srcValues.Length); if (src.IsDense) { for (int j = 0; j < src.Length; j++) { - if (src.Values[j] == 0 || src.Values[j] > keyMapperCur.Length) - values[j] = 0; + if (srcValues[j] == 0 || srcValues[j] > keyMapperCur.Length) + mutation.Values[j] = 0; else - values[j] = (uint)keyMapperCur[src.Values[j] - 1] + 1; + mutation.Values[j] = (uint)keyMapperCur[srcValues[j] - 1] + 1; } - dst = new VBuffer(src.Length, values, dst.Indices); } else { - var indices = dst.Indices; - if (Utils.Size(indices) < src.Count) - indices = new int[src.Count]; - for (int j = 0; j < src.Count; j++) + var srcIndices = src.GetIndices(); + for (int j = 0; j < srcValues.Length; j++) { - if (src.Values[j] == 0 || src.Values[j] > keyMapperCur.Length) - values[j] = 0; + if (srcValues[j] == 0 || srcValues[j] > keyMapperCur.Length) + mutation.Values[j] = 0; else - values[j] = (uint)keyMapperCur[src.Values[j] - 1] + 1; - indices[j] = src.Indices[j]; + mutation.Values[j] = (uint)keyMapperCur[srcValues[j] - 1] + 1; + mutation.Indices[j] = srcIndices[j]; } - dst = new VBuffer(src.Length, src.Count, values, indices); } + mutation.Complete(ref dst); }; ValueGetter>> slotNamesGetter = null; @@ -1388,9 +1387,10 @@ public static string GetConfusionTable(IHost host, IDataView confusionDataView, var confusionTable = GetConfusionTableAsArray(confusionDataView, countCol, labelNames.Length, labelIndexToConfIndexMap, numConfusionTableLabels, out precisionSums, out recallSums); + var predictedLabelNames = GetPredictedLabelNames(in labelNames, labelIndexToConfIndexMap); var confusionTableString = GetConfusionTableAsString(confusionTable, recallSums, precisionSums, - labelNames.Values.Where((t, i) => labelIndexToConfIndexMap[i] >= 0).ToArray(), - sampled: numConfusionTableLabels < labelNames.Count, binary: binary); + predictedLabelNames, + sampled: numConfusionTableLabels < labelNames.Length, binary: binary); int weightIndex; if (confusionDataView.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.Weight, out weightIndex)) @@ -1398,8 +1398,8 @@ public static string GetConfusionTable(IHost host, IDataView confusionDataView, confusionTable = GetConfusionTableAsArray(confusionDataView, weightIndex, labelNames.Length, labelIndexToConfIndexMap, numConfusionTableLabels, out precisionSums, out recallSums); weightedConfusionTable = GetConfusionTableAsString(confusionTable, recallSums, precisionSums, - labelNames.Values.Where((t, i) => labelIndexToConfIndexMap[i] >= 0).ToArray(), - sampled: numConfusionTableLabels < labelNames.Count, prefix: "Weighted ", binary: binary); + predictedLabelNames, + sampled: numConfusionTableLabels < labelNames.Length, prefix: "Weighted ", binary: binary); } else weightedConfusionTable = null; @@ -1407,6 +1407,20 @@ public static string GetConfusionTable(IHost host, IDataView confusionDataView, return confusionTableString; } + private static List> GetPredictedLabelNames(in VBuffer> labelNames, int[] labelIndexToConfIndexMap) + { + List> result = new List>(); + var values = labelNames.GetValues(); + for (int i = 0; i < values.Length; i++) + { + if (labelIndexToConfIndexMap[i] >= 0) + { + result.Add(values[i]); + } + } + return result; + } + // This methods is given a data view and a column index of the counts, and computes three arrays: the confusion table, // the per class recall and the per class precision. private static double[][] GetConfusionTableAsArray(IDataView confusionDataView, int countIndex, int numClasses, @@ -1537,7 +1551,7 @@ private static string GetFoldMetricsAsString(IHostEnvironment env, IDataView dat // Get a string representation of a confusion table. private static string GetConfusionTableAsString(double[][] confusionTable, double[] rowSums, double[] columnSums, - ReadOnlyMemory[] predictedLabelNames, string prefix = "", bool sampled = false, bool binary = true) + List> predictedLabelNames, string prefix = "", bool sampled = false, bool binary = true) { int numLabels = Utils.Size(confusionTable); @@ -1555,7 +1569,7 @@ private static string GetConfusionTableAsString(double[][] confusionTable, doubl { // The row label will also include the index, so a user can easily match against the header. // In such a case, a label like "Foo" would be presented as something like "5. Foo". - rowDigitLen = Math.Max(predictedLabelNames.Length - 1, 0).ToString().Length; + rowDigitLen = Math.Max(predictedLabelNames.Count - 1, 0).ToString().Length; Contracts.Assert(rowDigitLen >= 1); rowLabelLen += rowDigitLen + 2; } diff --git a/src/Microsoft.ML.Data/Transforms/HashTransform.cs b/src/Microsoft.ML.Data/Transforms/HashTransform.cs index 869684d5af..4299dd4904 100644 --- a/src/Microsoft.ML.Data/Transforms/HashTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/HashTransform.cs @@ -743,36 +743,34 @@ private static ValueGetter> MakeVectorHashGetter(uint se return (ref VBuffer dst) => { srcGetter(ref src); - int[] indices = dst.Indices; - if (src.Count == 0) + var srcValues = src.GetValues(); + if (srcValues.Length == 0) { - dst = new VBuffer(src.Length, 0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, src.Length, 0) + .Complete(ref dst); return; } + var mutation = VBufferMutationContext.Create(ref dst, src.Length, srcValues.Length); + + for (int i = 0; i < srcValues.Length; ++i) + mutation.Values[i] = hasher.HashCore(seed, mask, srcValues[i]); if (!src.IsDense) - { - Utils.EnsureSize(ref indices, src.Count, keepOld: false); - Array.Copy(src.Indices, 0, indices, 0, src.Count); - } - var values = dst.Values; - Utils.EnsureSize(ref values, src.Count, keepOld: false); - var srcValuesSpan = src.Values.AsSpan(0, src.Count); - for (int i = 0; i < srcValuesSpan.Length; ++i) - values[i] = hasher.HashCore(seed, mask, srcValuesSpan[i]); - dst = new VBuffer(src.Length, src.Count, values, indices); + src.GetIndices().CopyTo(mutation.Indices); + + mutation.Complete(ref dst); }; } // It is not sparsity preserving. return (ref VBuffer dst) => { srcGetter(ref src); - uint[] values = dst.Values; - Utils.EnsureSize(ref values, src.Length, keepOld: false); - var srcValuesSpan = src.Values.AsSpan(0, src.Count); + var mutation = VBufferMutationContext.Create(ref dst, src.Length); + + var srcValues = src.GetValues(); if (src.IsDense) { - for (int i = 0; i < srcValuesSpan.Length; ++i) - values[i] = hasher.HashCore(seed, mask, srcValuesSpan[i]); + for (int i = 0; i < srcValues.Length; ++i) + mutation.Values[i] = hasher.HashCore(seed, mask, srcValues[i]); } else { @@ -781,12 +779,13 @@ private static ValueGetter> MakeVectorHashGetter(uint se // values, rather than having complicated logic to do a simultaneous traversal of the // sparse vs. dense array. for (int i = 0; i < src.Length; ++i) - values[i] = defaultHash; + mutation.Values[i] = defaultHash; // Next overwrite the values in the explicit entries. - for (int i = 0; i < srcValuesSpan.Length; ++i) - values[src.Indices[i]] = hasher.HashCore(seed, mask, srcValuesSpan[i]); + var srcIndices = src.GetIndices(); + for (int i = 0; i < srcValues.Length; ++i) + mutation.Values[srcIndices[i]] = hasher.HashCore(seed, mask, srcValues[i]); } - dst = new VBuffer(src.Length, values, dst.Indices); + mutation.Complete(ref dst); }; } @@ -807,60 +806,59 @@ private static ValueGetter> MakeVectorOrderedHashGetter( return (ref VBuffer dst) => { srcGetter(ref src); - int[] indices = dst.Indices; - if (src.Count == 0) + var srcValues = src.GetValues(); + if (srcValues.Length == 0) { - dst = new VBuffer(src.Length, 0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, src.Length, 0) + .Complete(ref dst); return; } - if (!src.IsDense) - { - Utils.EnsureSize(ref indices, src.Count, keepOld: false); - Array.Copy(src.Indices, 0, indices, 0, src.Count); - } - var values = dst.Values; - Utils.EnsureSize(ref values, src.Count, keepOld: false); - var srcValuesSpan = src.Values.AsSpan(0, src.Count); + var mutation = VBufferMutationContext.Create(ref dst, src.Length, srcValues.Length); + if (src.IsDense) { - for (int i = 0; i < srcValuesSpan.Length; ++i) - values[i] = hasher.HashCore(Hashing.MurmurRound(seed, (uint)i), mask, srcValuesSpan[i]); + for (int i = 0; i < srcValues.Length; ++i) + mutation.Values[i] = hasher.HashCore(Hashing.MurmurRound(seed, (uint)i), mask, srcValues[i]); } else { - for (int i = 0; i < srcValuesSpan.Length; ++i) - values[i] = hasher.HashCore(Hashing.MurmurRound(seed, (uint)src.Indices[i]), mask, srcValuesSpan[i]); + var srcIndices = src.GetIndices(); + for (int i = 0; i < srcValues.Length; ++i) + mutation.Values[i] = hasher.HashCore(Hashing.MurmurRound(seed, (uint)srcIndices[i]), mask, srcValues[i]); + srcIndices.CopyTo(mutation.Indices); + } - dst = new VBuffer(src.Length, src.Count, values, indices); + mutation.Complete(ref dst); }; } // It is not sparsity preserving. return (ref VBuffer dst) => { srcGetter(ref src); - uint[] values = dst.Values; - Utils.EnsureSize(ref values, src.Length, keepOld: false); - var srcValuesSpan = src.Values.AsSpan(0, src.Count); + var mutation = VBufferMutationContext.Create(ref dst, src.Length); + + var srcValues = src.GetValues(); if (src.IsDense) { - for (int i = 0; i < srcValuesSpan.Length; ++i) - values[i] = hasher.HashCore(Hashing.MurmurRound(seed, (uint)i), mask, srcValuesSpan[i]); + for (int i = 0; i < srcValues.Length; ++i) + mutation.Values[i] = hasher.HashCore(Hashing.MurmurRound(seed, (uint)i), mask, srcValues[i]); } else { + var srcIndices = src.GetIndices(); int j = 0; for (int i = 0; i < src.Length; i++) { uint indexSeed = Hashing.MurmurRound(seed, (uint)i); - if (src.Count <= j || src.Indices[j] > i) - values[i] = hasher.HashCore(indexSeed, mask, default); - else if (src.Indices[j] == i) - values[i] = hasher.HashCore(indexSeed, mask, srcValuesSpan[j++]); + if (srcIndices.Length <= j || srcIndices[j] > i) + mutation.Values[i] = hasher.HashCore(indexSeed, mask, default); + else if (srcIndices[j] == i) + mutation.Values[i] = hasher.HashCore(indexSeed, mask, srcValues[j++]); else Contracts.Assert(false, "this should have never happened."); } } - dst = new VBuffer(src.Length, values, dst.Indices); + mutation.Complete(ref dst); }; } @@ -1111,12 +1109,17 @@ public override void Process() { _srcGetter(ref _value); _dstGetter(ref _hash); + + var valueValues = _value.GetValues(); + var hashValues = _hash.GetValues(); + // The two arrays should be consistent in their density, length, count, etc. Contracts.Assert(_value.IsDense == _hash.IsDense); Contracts.Assert(_value.Length == _hash.Length); - Contracts.Assert(_value.Count == _hash.Count); - for (int i = 0; i < _value.Count; ++i) - Collector.Add(_hash.Values[i], _value.Values[i]); + Contracts.Assert(valueValues.Length == hashValues.Length); + + for (int i = 0; i < valueValues.Length; ++i) + Collector.Add(hashValues[i], valueValues[i]); } } @@ -1151,19 +1154,24 @@ public override void Process() { _srcGetter(ref _value); _dstGetter(ref _hash); + + var valueValues = _value.GetValues(); + var hashValues = _hash.GetValues(); + // The two arrays should be consistent in their density, length, count, etc. Contracts.Assert(_value.IsDense == _hash.IsDense); Contracts.Assert(_value.Length == _hash.Length); - Contracts.Assert(_value.Count == _hash.Count); + Contracts.Assert(valueValues.Length == hashValues.Length); if (_hash.IsDense) { - for (int i = 0; i < _value.Count; ++i) - Collector.Add(_hash.Values[i], new KeyValuePair(i, _value.Values[i])); + for (int i = 0; i < valueValues.Length; ++i) + Collector.Add(hashValues[i], new KeyValuePair(i, valueValues[i])); } else { - for (int i = 0; i < _value.Count; ++i) - Collector.Add(_hash.Values[i], new KeyValuePair(_hash.Indices[i], _value.Values[i])); + var hashIndices = _hash.GetIndices(); + for (int i = 0; i < valueValues.Length; ++i) + Collector.Add(hashValues[i], new KeyValuePair(hashIndices[i], valueValues[i])); } } } diff --git a/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs b/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs index 2a5f0a3fc1..88f68ab778 100644 --- a/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs +++ b/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs @@ -412,7 +412,7 @@ private static void Save(IChannel ch, ModelSaveContext ctx, CodecFactory factory ctx.SaveTextStream("Terms.txt", writer => { - writer.WriteLine("# Number of terms = {0} of length {1}", v.Count, v.Length); + writer.WriteLine("# Number of terms = {0} of length {1}", v.GetValues().Length, v.Length); foreach (var pair in v.Items()) { var text = pair.Value; diff --git a/src/Microsoft.ML.Data/Transforms/KeyToValueTransform.cs b/src/Microsoft.ML.Data/Transforms/KeyToValueTransform.cs index c437c17942..759e3bffde 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToValueTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToValueTransform.cs @@ -329,7 +329,7 @@ public KeyToValueMap(Mapper parent, KeyType typeKey, PrimitiveType typeVal, TVal _convertToUInt = Runtime.Data.Conversion.Conversions.Instance.GetStandardConversion(typeKey, NumberType.U4, out identity); } - private void MapKey(ref TKey src, ref TValue dst) + private void MapKey(in TKey src, ref TValue dst) { uint uintSrc = 0; _convertToUInt(in src, ref uintSrc); @@ -361,7 +361,7 @@ public override Delegate GetMappingGetter(IRow input) (ref TValue dst) => { getSrc(ref src); - MapKey(ref src, ref dst); + MapKey(in src, ref dst); }; return retVal; } @@ -376,8 +376,8 @@ public override Delegate GetMappingGetter(IRow input) { getSrc(ref src); int srcSize = src.Length; - int srcCount = src.Count; - var srcValues = src.Values; + var srcValues = src.GetValues(); + int srcCount = srcValues.Length; var dstValues = dst.Values; var dstIndices = dst.Indices; @@ -389,7 +389,7 @@ public override Delegate GetMappingGetter(IRow input) for (int slot = 0; slot < srcSize; ++slot) { - MapKey(ref srcValues[slot], ref dstValues[slot]); + MapKey(in srcValues[slot], ref dstValues[slot]); // REVIEW: // The current implementation always maps dense to dense, even if the resulting columns could benefit from @@ -408,17 +408,17 @@ public override Delegate GetMappingGetter(IRow input) // Currently this always maps sparse to dense, as long as the output type's NA does not equal its default value. Utils.EnsureSize(ref dstValues, srcSize, maxSize, keepOld: false); - var srcIndices = src.Indices; - int nextExplicitSlot = src.Count == 0 ? srcSize : srcIndices[0]; + var srcIndices = src.GetIndices(); + int nextExplicitSlot = srcCount == 0 ? srcSize : srcIndices[0]; int islot = 0; for (int slot = 0; slot < srcSize; ++slot) { if (nextExplicitSlot == slot) { // Current slot has an explicitly defined value. - Parent.Host.Assert(islot < src.Count); - MapKey(ref srcValues[islot], ref dstValues[slot]); - nextExplicitSlot = ++islot == src.Count ? srcSize : srcIndices[islot]; + Parent.Host.Assert(islot < srcCount); + MapKey(in srcValues[islot], ref dstValues[slot]); + nextExplicitSlot = ++islot == srcCount ? srcSize : srcIndices[islot]; Parent.Host.Assert(slot < nextExplicitSlot); } else @@ -434,12 +434,12 @@ public override Delegate GetMappingGetter(IRow input) // As the default value equals the NA value for the output type, we produce sparse output. Utils.EnsureSize(ref dstValues, srcCount, maxSize, keepOld: false); Utils.EnsureSize(ref dstIndices, srcCount, maxSize, keepOld: false); - var srcIndices = src.Indices; + var srcIndices = src.GetIndices(); for (int islotSrc = 0; islotSrc < srcCount; ++islotSrc) { // Current slot has an explicitly defined value. Parent.Host.Assert(islotSrc < srcCount); - MapKey(ref srcValues[islotSrc], ref dstItem); + MapKey(in srcValues[islotSrc], ref dstItem); if (!_isDefault(in dstItem)) { dstValues[islotDst] = dstItem; diff --git a/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs b/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs index a076dc452b..637f0a7d66 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs @@ -523,8 +523,8 @@ private ValueGetter> MakeGetterBag(IRow input, int iinfo) Host.Check(cv == 0 || src.Length == cv); // The indices are irrelevant in the bagging case. - var values = src.Values; - int count = src.Count; + var values = src.GetValues(); + int count = values.Length; for (int slot = 0; slot < count; slot++) { uint key = values[slot] - 1; @@ -564,17 +564,11 @@ private ValueGetter> MakeGetterInd(IRow input, int iinfo) Host.Check(lenSrc == cv || cv == 0); // Since we generate values in order, no need for a builder. - var valuesDst = dst.Values; - var indicesDst = dst.Indices; - int lenDst = checked(size * lenSrc); - int cntSrc = src.Count; - if (Utils.Size(valuesDst) < cntSrc) - valuesDst = new float[cntSrc]; - if (Utils.Size(indicesDst) < cntSrc) - indicesDst = new int[cntSrc]; + var values = src.GetValues(); + int cntSrc = values.Length; + var mutation = VBufferMutationContext.Create(ref dst, lenDst, cntSrc); - var values = src.Values; int count = 0; if (src.IsDense) { @@ -585,24 +579,24 @@ private ValueGetter> MakeGetterInd(IRow input, int iinfo) uint key = values[slot] - 1; if (key >= (uint)size) continue; - valuesDst[count] = 1; - indicesDst[count++] = slot * size + (int)key; + mutation.Values[count] = 1; + mutation.Indices[count++] = slot * size + (int)key; } } else { - var indices = src.Indices; + var indices = src.GetIndices(); for (int islot = 0; islot < cntSrc; islot++) { Host.Assert(count < cntSrc); uint key = values[islot] - 1; if (key >= (uint)size) continue; - valuesDst[count] = 1; - indicesDst[count++] = indices[islot] * size + (int)key; + mutation.Values[count] = 1; + mutation.Indices[count++] = indices[islot] * size + (int)key; } } - dst = new VBuffer(lenDst, count, valuesDst, indicesDst); + mutation.Complete(ref dst, count); }; } diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs b/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs index 7015814e70..cfe7465480 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs @@ -356,14 +356,14 @@ public void ProcessValue(in VBuffer value) var size = _min.Length; Contracts.Check(value.Length == size); _trainCount++; - var count = value.Count; + var values = value.GetValues(); + var count = values.Length; Contracts.Assert(0 <= count & count <= size); if (count == 0) return; if (count == size) { - var values = value.Values; for (int j = 0; j < count; j++) { var val = values[j]; @@ -373,8 +373,7 @@ public void ProcessValue(in VBuffer value) } else { - var indices = value.Indices; - var values = value.Values; + var indices = value.GetIndices(); for (int k = 0; k < count; k++) { var val = values[k]; @@ -459,14 +458,14 @@ public void ProcessValue(in VBuffer value) { _trainCount++; var size = _mean.Length; - var count = value.Count; + var values = value.GetValues(); + var count = values.Length; Contracts.Assert(0 <= count & count <= size); if (count == 0) return; if (count == size) { - var values = value.Values; for (int j = 0; j < count; j++) { var origVal = values[j]; @@ -475,8 +474,7 @@ public void ProcessValue(in VBuffer value) } else { - var indices = value.Indices; - var values = value.Values; + var indices = value.GetIndices(); for (int k = 0; k < count; k++) { var origVal = values[k]; @@ -706,7 +704,8 @@ private static void FillValues(in VBuffer input, BufferBuilder b { Contracts.Assert(input.Length == scale.Length); int size = scale.Length; - int count = input.Count; + var values = input.GetValues(); + int count = values.Length; Contracts.Assert(0 <= count & count <= size); // We always start with sparse, since we may make things sparser than the source. @@ -714,7 +713,6 @@ private static void FillValues(in VBuffer input, BufferBuilder b if (count == 0) return; - var values = input.Values; if (count >= size) { for (int i = 0; i < size; i++) @@ -723,7 +721,7 @@ private static void FillValues(in VBuffer input, BufferBuilder b } // The input is sparse. - var indices = input.Indices; + var indices = input.GetIndices(); for (int ii = 0; ii < count; ii++) { int i = indices[ii]; @@ -737,7 +735,8 @@ private static void FillValues(in VBuffer input, BufferBuilder b { Contracts.Assert(input.Length == scale.Length); int size = scale.Length; - int count = input.Count; + var values = input.GetValues(); + int count = values.Length; Contracts.Assert(0 <= count & count <= size); // We always start with sparse, since we may make things sparser than the source. @@ -750,7 +749,6 @@ private static void FillValues(in VBuffer input, BufferBuilder b return; } - var values = input.Values; if (count >= size) { for (int i = 0; i < size; i++) @@ -759,7 +757,7 @@ private static void FillValues(in VBuffer input, BufferBuilder b } // The input is sparse. - var indices = input.Indices; + var indices = input.GetIndices(); int ii = 0; int ivSrc = indices[ii]; Contracts.Assert(ivSrc < size); @@ -783,7 +781,8 @@ private static void FillValues(in VBuffer input, BufferBuilder b Contracts.Assert(input.Length == scale.Length); int size = scale.Length; - int count = input.Count; + var values = input.GetValues(); + int count = values.Length; Contracts.Assert(0 <= count & count <= size); // We always start with sparse, since we may make things sparser than the source. @@ -796,7 +795,6 @@ private static void FillValues(in VBuffer input, BufferBuilder b return; } - var values = input.Values; if (count >= size) { for (int i = 0; i < size; i++) @@ -805,7 +803,7 @@ private static void FillValues(in VBuffer input, BufferBuilder b } // The input is sparse. - var indices = input.Indices; + var indices = input.GetIndices(); int ii = 0; int ivSrc = indices[ii]; int inz = 0; @@ -983,7 +981,8 @@ private static void FillValues(in VBuffer input, BufferBuilder b { Contracts.Assert(input.Length == mean.Length); int size = mean.Length; - int count = input.Count; + var values = input.GetValues(); + int count = values.Length; Contracts.Assert(0 <= count & count <= size); // We always start with sparse, since we may make things sparser than the source. @@ -992,7 +991,6 @@ private static void FillValues(in VBuffer input, BufferBuilder b if (count == 0) return; - var values = input.Values; if (count >= size) { for (int i = 0; i < size; i++) @@ -1009,7 +1007,7 @@ private static void FillValues(in VBuffer input, BufferBuilder b } // The input is sparse. - var indices = input.Indices; + var indices = input.GetIndices(); for (int ii = 0; ii < indices.Length; ii++) { var ivDst = indices[ii]; @@ -1101,14 +1099,14 @@ public override Delegate GetGetter(IRow input, int icol) (ref TFloat dst) => { getSrc(ref dst); - GetResult(ref dst, ref dst); + GetResult(dst, ref dst); }; return del; } - private void GetResult(ref TFloat input, ref TFloat value) + private void GetResult(TFloat input, ref TFloat value) { - value = BinUtils.GetValue(ref input, _binUpperBounds, _den, _offset); + value = BinUtils.GetValue(input, _binUpperBounds, _den, _offset); } } @@ -1197,7 +1195,8 @@ private void GetResult(in VBuffer input, ref VBuffer value, Buff { Contracts.Assert(input.Length == _binUpperBounds.Length); int size = _binUpperBounds.Length; - int count = input.Count; + var values = input.GetValues(); + int count = values.Length; Contracts.Assert(0 <= count & count <= size); // We always start with sparse, since we may make things sparser than the source. @@ -1208,18 +1207,17 @@ private void GetResult(in VBuffer input, ref VBuffer value, Buff return; } - var values = input.Values; if (count >= size) { if (_offset != null) { for (int i = 0; i < size; i++) - bldr.AddFeature(i, BinUtils.GetValue(ref values[i], _binUpperBounds[i], _den[i], _offset[i])); + bldr.AddFeature(i, BinUtils.GetValue(values[i], _binUpperBounds[i], _den[i], _offset[i])); } else { for (int i = 0; i < size; i++) - bldr.AddFeature(i, BinUtils.GetValue(ref values[i], _binUpperBounds[i], _den[i])); + bldr.AddFeature(i, BinUtils.GetValue(values[i], _binUpperBounds[i], _den[i])); } bldr.GetResult(ref value); return; @@ -1228,7 +1226,7 @@ private void GetResult(in VBuffer input, ref VBuffer value, Buff // The input is sparse. if (_offset != null) { - var indices = input.Indices; + var indices = input.GetIndices(); int ii = 0; int ivSrc = indices[ii]; Contracts.Assert(ivSrc < size); @@ -1239,13 +1237,13 @@ private void GetResult(in VBuffer input, ref VBuffer value, Buff if (ivDst == ivSrc) { bldr.AddFeature(ivDst, - BinUtils.GetValue(ref values[ii], _binUpperBounds[ivDst], _den[ivDst], _offset[ivDst])); + BinUtils.GetValue(values[ii], _binUpperBounds[ivDst], _den[ivDst], _offset[ivDst])); ivSrc = ++ii < count ? indices[ii] : size; Contracts.Assert(ii == count || ivSrc < size); } else bldr.AddFeature(ivDst, - BinUtils.GetValue(ref zero, _binUpperBounds[ivDst], _den[ivDst], _offset[ivDst])); + BinUtils.GetValue(zero, _binUpperBounds[ivDst], _den[ivDst], _offset[ivDst])); } } else @@ -1255,7 +1253,7 @@ private void GetResult(in VBuffer input, ref VBuffer value, Buff { int i = indices[ii]; Contracts.Assert(0 <= i & i < size); - bldr.AddFeature(i, BinUtils.GetValue(ref values[ii], _binUpperBounds[i], _den[i])); + bldr.AddFeature(i, BinUtils.GetValue(values[ii], _binUpperBounds[i], _den[i])); } } @@ -1376,7 +1374,7 @@ public static TFloat Cdf(TFloat input, TFloat mean, TFloat stddev) internal static partial class BinUtils { - public static TFloat GetValue(ref TFloat input, TFloat[] binUpperBounds, TFloat den, TFloat offset) + public static TFloat GetValue(TFloat input, TFloat[] binUpperBounds, TFloat den, TFloat offset) { if (TFloat.IsNaN(input)) return input; @@ -1387,7 +1385,7 @@ public static TFloat GetValue(ref TFloat input, TFloat[] binUpperBounds, TFloat return value; } - public static TFloat GetValue(ref TFloat input, TFloat[] binUpperBounds, TFloat den) + public static TFloat GetValue(TFloat input, TFloat[] binUpperBounds, TFloat den) { if (TFloat.IsNaN(input)) return input; @@ -1803,21 +1801,20 @@ protected override bool ProcessValue(in VBuffer buffer) int size = _values.Length; Host.Check(buffer.Length == size); - int count = buffer.Count; + var values = buffer.GetValues(); + int count = values.Length; Host.Assert(0 <= count & count <= size); if (count == 0) return true; if (count == size) { - var values = buffer.Values; for (int j = 0; j < count; j++) _values[j].Add(values[j]); } else { - var indices = buffer.Indices; - var values = buffer.Values; + var indices = buffer.GetIndices(); for (int k = 0; k < count; k++) { var val = values[k]; diff --git a/src/Microsoft.ML.Data/Transforms/TermTransformImpl.cs b/src/Microsoft.ML.Data/Transforms/TermTransformImpl.cs index 1b70ff0f2d..49372056e7 100644 --- a/src/Microsoft.ML.Data/Transforms/TermTransformImpl.cs +++ b/src/Microsoft.ML.Data/Transforms/TermTransformImpl.cs @@ -106,7 +106,7 @@ public TextImpl(bool sorted) _sorted = sorted; } - public override bool TryAdd(ref ReadOnlyMemory val) + public override bool TryAdd(in ReadOnlyMemory val) { if (val.IsEmpty) return false; @@ -170,7 +170,7 @@ public Impl(PrimitiveType type, InPredicate mapsToMissing, bool sort) _sort = sort; } - public override bool TryAdd(ref T val) + public override bool TryAdd(in T val) { return !_mapsToMissing(in val) && _values.TryAdd(val); } @@ -195,7 +195,7 @@ protected Builder(PrimitiveType type) /// Ensures that the item is in the set. Returns true iff it added the item. /// /// The value to consider - public abstract bool TryAdd(ref T val); + public abstract bool TryAdd(in T val); /// /// Handling for the "terms" arg. @@ -215,7 +215,7 @@ public override void ParseAddTermArg(ref ReadOnlyMemory terms, IChannel ch ch.Warning("Empty strings ignored in 'terms' specification"); else if (!tryParse(in term, out val)) throw ch.Except($"Item '{term}' in 'terms' specification could not be parsed as '{ItemType}'"); - else if (!TryAdd(ref val)) + else if (!TryAdd(in val)) ch.Warning($"Duplicate item '{term}' ignored in 'terms' specification", term); } @@ -240,7 +240,7 @@ public override void ParseAddTermArg(string[] terms, IChannel ch) ch.Warning("Empty strings ignored in 'term' specification"); else if (!tryParse(in term, out val)) ch.Warning("Item '{0}' ignored in 'term' specification since it could not be parsed as '{1}'", term, ItemType); - else if (!TryAdd(ref val)) + else if (!TryAdd(in val)) ch.Warning("Duplicate item '{0}' ignored in 'term' specification", term); } @@ -361,7 +361,7 @@ public sealed override bool ProcessRow() if (_remaining <= 0) return false; _getter(ref _val); - return !_bldr.TryAdd(ref _val) || --_remaining > 0; + return !_bldr.TryAdd(in _val) || --_remaining > 0; } } @@ -381,10 +381,10 @@ public ImplVec(ValueGetter> getter, int max, Builder bldr) _bldr = bldr; } - private bool AccumAndDecrement(ref T val) + private bool AccumAndDecrement(in T val) { Contracts.Assert(_remaining > 0); - return !_bldr.TryAdd(ref val) || --_remaining > 0; + return !_bldr.TryAdd(in val) || --_remaining > 0; } public sealed override bool ProcessRow() @@ -393,11 +393,12 @@ public sealed override bool ProcessRow() if (_remaining <= 0) return false; _getter(ref _val); + var values = _val.GetValues(); if (_val.IsDense || _addedDefaultFromSparse) { - for (int i = 0; i < _val.Count; ++i) + for (int i = 0; i < values.Length; ++i) { - if (!AccumAndDecrement(ref _val.Values[i])) + if (!AccumAndDecrement(in values[i])) return false; } return true; @@ -412,21 +413,22 @@ public sealed override bool ProcessRow() // excited about the slight inefficiency of that first if check. Contracts.Assert(!_val.IsDense && !_addedDefaultFromSparse); T def = default(T); - for (int i = 0; i < _val.Count; ++i) + var valIndices = _val.GetIndices(); + for (int i = 0; i < values.Length; ++i) { - if (!_addedDefaultFromSparse && _val.Indices[i] != i) + if (!_addedDefaultFromSparse && valIndices[i] != i) { _addedDefaultFromSparse = true; - if (!AccumAndDecrement(ref def)) + if (!AccumAndDecrement(in def)) return false; } - if (!AccumAndDecrement(ref _val.Values[i])) + if (!AccumAndDecrement(in values[i])) return false; } if (!_addedDefaultFromSparse) { _addedDefaultFromSparse = true; - if (!AccumAndDecrement(ref def)) + if (!AccumAndDecrement(in def)) return false; } return true; @@ -960,15 +962,15 @@ public override Delegate GetMappingGetter(IRow input) bldr.Reset(cval, dense: false); - var values = src.Values; - var indices = !src.IsDense ? src.Indices : null; - int count = src.Count; + var values = src.GetValues(); + var indices = src.GetIndices(); + int count = values.Length; for (int islot = 0; islot < count; islot++) { map(in values[islot], ref dstItem); if (dstItem != 0) { - int slot = indices != null ? indices[islot] : islot; + int slot = !src.IsDense ? indices[islot] : islot; bldr.AddFeature(slot, dstItem); } } @@ -998,7 +1000,7 @@ public override Delegate GetMappingGetter(IRow input) // unrecognized items. bldr.Reset(cval, dense: false); - var values = src.Values; + var values = src.GetValues(); if (src.IsDense) { for (int slot = 0; slot < src.Length; ++slot) @@ -1010,19 +1012,19 @@ public override Delegate GetMappingGetter(IRow input) } else { - var indices = src.Indices; - int nextExplicitSlot = src.Count == 0 ? src.Length : indices[0]; + var indices = src.GetIndices(); + int nextExplicitSlot = indices.Length == 0 ? src.Length : indices[0]; int islot = 0; for (int slot = 0; slot < src.Length; ++slot) { if (nextExplicitSlot == slot) { // This was an explicitly defined value. - _host.Assert(islot < src.Count); + _host.Assert(islot < values.Length); map(in values[islot], ref dstItem); if (dstItem != 0) bldr.AddFeature(slot, dstItem); - nextExplicitSlot = ++islot == src.Count ? src.Length : indices[islot]; + nextExplicitSlot = ++islot == indices.Length ? src.Length : indices[islot]; } else { diff --git a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs index 029dc15d20..60e7d6d1a7 100644 --- a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs +++ b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs @@ -146,13 +146,17 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) // Sparse case. // Approximate new count is min(#indices, newLength). - var newCount = Math.Min(src.Count, newLength); + var newCount = Math.Min(srcValues.Length, newLength); var indices = dst.GetIndices(); var srcIndices = src.GetIndices(); Contracts.Assert(newCount <= src.Length); - mutation = VBufferMutationContext.Create(ref dst, newLength, newCount); + mutation = VBufferMutationContext.Create( + ref dst, + newLength, + newCount, + requireIndicesOnDense: true); int iiDst = 0; int iiSrc = 0; @@ -162,7 +166,7 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) // REVIEW: Consider using a BitArray with the slots to keep instead of SlotsMax. It would // only make sense when the number of ranges is greater than the number of slots divided by 32. int max = SlotsMax[iRange]; - while (iiSrc < src.Count) + while (iiSrc < srcValues.Length) { // Copy (with offset) the elements before the current range. var index = srcIndices[iiSrc]; @@ -206,10 +210,7 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) Contracts.Assert(index <= max); } - mutation.Complete(ref dst); - // now change the ValuesCount to iiDst to be correct - VBufferMutationContext.Create(ref dst, newLength, iiDst) - .Complete(ref dst); + mutation.Complete(ref dst, iiDst); } } } diff --git a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs index afa70713b4..1dadd67baf 100644 --- a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs +++ b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs @@ -94,8 +94,7 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices } } - // TODO: eerhardt - this should be new VBuffer(src.Length, count); - mutation.Complete(ref dst); + mutation.Complete(ref dst, count); } } } From 001f7d8213754010cd4ea830dbda91a27e706632 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 8 Nov 2018 11:22:55 -0600 Subject: [PATCH 06/14] Remove the rest of the VBuffer.Count usages and make VBuffer.Count private. --- src/Microsoft.ML.Core/Data/VBuffer.cs | 52 ++++----- src/Microsoft.ML.CpuMath/AlignedArray.cs | 2 +- .../CpuMathUtils.netcoreapp.cs | 2 +- .../CpuMathUtils.netstandard.cs | 2 +- src/Microsoft.ML.CpuMath/Sse.cs | 3 +- src/Microsoft.ML.CpuMath/SseIntrinsics.cs | 2 +- .../BinFile/BinFinder.cs | 38 ++++--- src/Microsoft.ML.FastTree/FastTree.cs | 16 ++- src/Microsoft.ML.Transforms/GcnTransform.cs | 105 +++++++++--------- .../LearnerFeatureSelection.cs | 34 +++--- .../Microsoft.ML.Transforms.csproj | 1 + .../MissingValueDroppingTransformer.cs | 72 ++++++------ .../MissingValueIndicatorTransform.cs | 59 ++++------ .../MissingValueIndicatorTransformer.cs | 8 +- .../MissingValueReplacing.cs | 17 +-- .../MissingValueReplacingUtils.cs | 13 +-- .../MutualInformationFeatureSelection.cs | 84 +++++++------- .../RandomFourierFeaturizing.cs | 8 +- .../Text/CharTokenizeTransform.cs | 59 +++++----- .../Text/LdaSingleBox.cs | 29 +++-- .../Text/LdaTransform.cs | 53 +++++---- .../Text/NgramTransform.cs | 9 +- .../Text/NgramUtils.cs | 8 +- .../Text/StopWordsRemoverTransform.cs | 18 +-- .../Text/TextNormalizerTransform.cs | 9 +- .../Text/WordEmbeddingsTransform.cs | 31 +++--- .../Text/WordTokenizeTransform.cs | 16 ++- .../UngroupTransform.cs | 10 +- .../VectorWhitening.cs | 29 +++-- 29 files changed, 395 insertions(+), 394 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/VBuffer.cs b/src/Microsoft.ML.Core/Data/VBuffer.cs index bca9d5fe6b..d1a30a9e53 100644 --- a/src/Microsoft.ML.Core/Data/VBuffer.cs +++ b/src/Microsoft.ML.Core/Data/VBuffer.cs @@ -20,15 +20,15 @@ public readonly struct VBuffer private readonly int[] _indices; /// - /// The logical length of the buffer. + /// The number of items explicitly represented. This is == Length when the representation + /// is dense and < Length when sparse. /// - public readonly int Length; + private readonly int _count; /// - /// The number of items explicitly represented. This is == Length when the representation - /// is dense and < Length when sparse. + /// The logical length of the buffer. /// - public readonly int Count; + public readonly int Length; /// /// The values. Only the first Count of these are valid. @@ -44,7 +44,7 @@ public readonly struct VBuffer /// /// The explicitly represented values. /// - public ReadOnlySpan GetValues() => _values.AsSpan(0, Count); + public ReadOnlySpan GetValues() => _values.AsSpan(0, _count); /// /// The indices. For a dense representation, this array is not used. For a sparse representation @@ -56,7 +56,7 @@ public readonly struct VBuffer /// - non-zeros values 98 and 76 respectively at the 4th and 6th coordinates /// - zeros at all other coordinates /// - public ReadOnlySpan GetIndices() => IsDense ? default : _indices.AsSpan(0, Count); + public ReadOnlySpan GetIndices() => IsDense ? default : _indices.AsSpan(0, _count); /// /// Gets a value indicating whether every logical element is explicitly @@ -66,8 +66,8 @@ public bool IsDense { get { - Contracts.Assert(Count <= Length); - return Count == Length; + Contracts.Assert(_count <= Length); + return _count == Length; } } @@ -81,7 +81,7 @@ public VBuffer(int length, T[] values, int[] indices = null) Contracts.CheckValueOrNull(indices); Length = length; - Count = length; + _count = length; _values = values; _indices = indices; } @@ -113,7 +113,7 @@ public VBuffer(int length, int count, T[] values, int[] indices) #endif Length = length; - Count = count; + _count = count; _values = values; _indices = indices; } @@ -138,7 +138,7 @@ public void CopyToDense(ref VBuffer dst) /// public void CopyTo(ref VBuffer dst) { - var mutation = VBufferMutationContext.Create(ref dst, Length, Count); + var mutation = VBufferMutationContext.Create(ref dst, Length, _count); if (IsDense) { if (Length > 0) @@ -150,10 +150,10 @@ public void CopyTo(ref VBuffer dst) } else { - if (Count > 0) + if (_count > 0) { - _values.AsSpan(0, Count).CopyTo(mutation.Values); - _indices.AsSpan(0, Count).CopyTo(mutation.Indices); + _values.AsSpan(0, _count).CopyTo(mutation.Values); + _indices.AsSpan(0, _count).CopyTo(mutation.Indices); } mutation.Complete(ref dst); } @@ -180,10 +180,10 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) else { int copyCount = 0; - if (Count > 0) + if (_count > 0) { - int copyMin = _indices.FindIndexSorted(0, Count, srcMin); - int copyLim = _indices.FindIndexSorted(copyMin, Count, srcMin + length); + int copyMin = _indices.FindIndexSorted(0, _count, srcMin); + int copyLim = _indices.FindIndexSorted(copyMin, _count, srcMin + length); Contracts.Assert(copyMin <= copyLim); copyCount = copyLim - copyMin; var mutation = VBufferMutationContext.Create(ref dst, length, copyCount); @@ -400,14 +400,14 @@ public void CopyTo(Span dst) return; } - if (Count == 0) + if (_count == 0) { dst.Slice(ivDst, Length).Clear(); return; } int iv = 0; - for (int islot = 0; islot < Count; islot++) + for (int islot = 0; islot < _count; islot++) { int slot = _indices[islot]; Contracts.Assert(slot >= iv); @@ -437,12 +437,12 @@ public static void Copy(T[] src, int srcIndex, ref VBuffer dst, int length) public IEnumerable> Items(bool all = false) { - return VBufferUtils.Items(_values, _indices, Length, Count, all); + return VBufferUtils.Items(_values, _indices, Length, _count, all); } public IEnumerable DenseValues() { - return VBufferUtils.DenseValues(_values, _indices, Length, Count); + return VBufferUtils.DenseValues(_values, _indices, Length, _count); } public void GetItemOrDefault(int slot, ref T dst) @@ -452,7 +452,7 @@ public void GetItemOrDefault(int slot, ref T dst) int index; if (IsDense) dst = _values[slot]; - else if (Count > 0 && _indices.TryFindIndexSorted(0, Count, slot, out index)) + else if (_count > 0 && _indices.TryFindIndexSorted(0, _count, slot, out index)) dst = _values[index]; else dst = default(T); @@ -465,17 +465,17 @@ public T GetItemOrDefault(int slot) int index; if (IsDense) return _values[slot]; - if (Count > 0 && _indices.TryFindIndexSorted(0, Count, slot, out index)) + if (_count > 0 && _indices.TryFindIndexSorted(0, _count, slot, out index)) return _values[index]; return default(T); } public override string ToString() - => IsDense ? $"Dense vector of size {Length}" : $"Sparse vector of size {Length}, {Count} explicit values"; + => IsDense ? $"Dense vector of size {Length}" : $"Sparse vector of size {Length}, {_count} explicit values"; internal VBufferMutationContext GetMutableContext() { - return GetMutableContext(Length, Count, null, false, false); + return GetMutableContext(Length, _count, null, false, false); } internal VBufferMutationContext GetMutableContext( diff --git a/src/Microsoft.ML.CpuMath/AlignedArray.cs b/src/Microsoft.ML.CpuMath/AlignedArray.cs index 87583a8ef6..9902edc4df 100644 --- a/src/Microsoft.ML.CpuMath/AlignedArray.cs +++ b/src/Microsoft.ML.CpuMath/AlignedArray.cs @@ -146,7 +146,7 @@ public void CopyFrom(int start, Float[] src, int index, int count) // valuesSrc contains only the non-zero entries. Those are copied into their logical positions in the dense array. // rgposSrc contains the logical positions + offset of the non-zero entries in the dense array. // rgposSrc runs parallel to the valuesSrc array. - public void CopyFrom(int[] rgposSrc, Float[] valuesSrc, int posMin, int iposMin, int iposLim, bool zeroItems) + public void CopyFrom(ReadOnlySpan rgposSrc, ReadOnlySpan valuesSrc, int posMin, int iposMin, int iposLim, bool zeroItems) { Contracts.Assert(rgposSrc != null); Contracts.Assert(valuesSrc != null); diff --git a/src/Microsoft.ML.CpuMath/CpuMathUtils.netcoreapp.cs b/src/Microsoft.ML.CpuMath/CpuMathUtils.netcoreapp.cs index e9d95ccc1d..973b2278a3 100644 --- a/src/Microsoft.ML.CpuMath/CpuMathUtils.netcoreapp.cs +++ b/src/Microsoft.ML.CpuMath/CpuMathUtils.netcoreapp.cs @@ -88,7 +88,7 @@ public static void MatrixTimesSource(bool transpose, AlignedArray matrix, Aligne } } - public static void MatrixTimesSource(AlignedArray matrix, int[] rgposSrc, AlignedArray sourceValues, + public static void MatrixTimesSource(AlignedArray matrix, ReadOnlySpan rgposSrc, AlignedArray sourceValues, int posMin, int iposMin, int iposLimit, AlignedArray destination, int stride) { Contracts.AssertValue(rgposSrc); diff --git a/src/Microsoft.ML.CpuMath/CpuMathUtils.netstandard.cs b/src/Microsoft.ML.CpuMath/CpuMathUtils.netstandard.cs index bc9569d390..5d54ee6fe0 100644 --- a/src/Microsoft.ML.CpuMath/CpuMathUtils.netstandard.cs +++ b/src/Microsoft.ML.CpuMath/CpuMathUtils.netstandard.cs @@ -18,7 +18,7 @@ public static int GetVectorAlignment() public static void MatrixTimesSource(bool transpose, AlignedArray matrix, AlignedArray source, AlignedArray destination, int stride) => SseUtils.MatTimesSrc(transpose, matrix, source, destination, stride); - public static void MatrixTimesSource(AlignedArray matrix, int[] rgposSrc, AlignedArray sourceValues, + public static void MatrixTimesSource(AlignedArray matrix, ReadOnlySpan rgposSrc, AlignedArray sourceValues, int posMin, int iposMin, int iposLimit, AlignedArray destination, int stride) => SseUtils.MatTimesSrc(matrix, rgposSrc, sourceValues, posMin, iposMin, iposLimit, destination, stride); public static void Add(float value, Span destination) => SseUtils.Add(value, destination); diff --git a/src/Microsoft.ML.CpuMath/Sse.cs b/src/Microsoft.ML.CpuMath/Sse.cs index 3ff59f2840..5c4ace31cd 100644 --- a/src/Microsoft.ML.CpuMath/Sse.cs +++ b/src/Microsoft.ML.CpuMath/Sse.cs @@ -57,13 +57,12 @@ public static void MatTimesSrc(bool tran, AlignedArray mat, AlignedArray src, Al } } - public static void MatTimesSrc(AlignedArray mat, int[] rgposSrc, AlignedArray srcValues, + public static void MatTimesSrc(AlignedArray mat, ReadOnlySpan rgposSrc, AlignedArray srcValues, int posMin, int iposMin, int iposLim, AlignedArray dst, int crun) { Contracts.Assert(Compat(mat)); Contracts.Assert(Compat(srcValues)); Contracts.Assert(Compat(dst)); - Contracts.AssertValue(rgposSrc); Contracts.Assert(0 <= iposMin && iposMin <= iposLim && iposLim <= rgposSrc.Length); Contracts.Assert(mat.Size == dst.Size * srcValues.Size); diff --git a/src/Microsoft.ML.CpuMath/SseIntrinsics.cs b/src/Microsoft.ML.CpuMath/SseIntrinsics.cs index cf85a98132..8e1755e797 100644 --- a/src/Microsoft.ML.CpuMath/SseIntrinsics.cs +++ b/src/Microsoft.ML.CpuMath/SseIntrinsics.cs @@ -276,7 +276,7 @@ public static unsafe void MatMul(ReadOnlySpan mat, ReadOnlySpan sr } // Partial sparse source vector. - public static unsafe void MatMulP(AlignedArray mat, int[] rgposSrc, AlignedArray src, + public static unsafe void MatMulP(AlignedArray mat, ReadOnlySpan rgposSrc, AlignedArray src, int posMin, int iposMin, int iposEnd, AlignedArray dst, int crow, int ccol) { MatMulP(mat.Items, rgposSrc, src.Items, posMin, iposMin, iposEnd, dst.Items, crow, ccol); diff --git a/src/Microsoft.ML.FastTree/BinFile/BinFinder.cs b/src/Microsoft.ML.FastTree/BinFile/BinFinder.cs index 782d85f24a..8deceef6e0 100644 --- a/src/Microsoft.ML.FastTree/BinFile/BinFinder.cs +++ b/src/Microsoft.ML.FastTree/BinFile/BinFinder.cs @@ -17,6 +17,7 @@ internal sealed class BinFinder { private readonly GreedyBinFinder _finder; private double[] _distinctValues; + private double[] _distinctCountsBuffer; private int[] _counts; private static double[] _trivialBinUpperBounds; // Will be initialized to a single element positive infinity array. @@ -43,15 +44,19 @@ public BinFinder() /// The scheme is destructive, because it modifies the arrays within . /// /// The values we are binning + /// A buffer space to work over the values, so the original + /// values aren't modified. /// This working array will be filled with a sorted list of the /// distinct values detected within /// This working array will be filled with a sorted list of the distinct /// values detected within /// The logical length of both and /// - private int FindDistinctCounts(in VBuffer values, double[] distinctValues, int[] counts) + private int FindDistinctCounts(in VBuffer values, double[] valueBuffer, double[] distinctValues, int[] counts) { - if (values.Count == 0) + var valueValues = values.GetValues(); + var valuesCount = valueValues.Length; + if (valuesCount == 0) { if (values.Length == 0) return 0; @@ -59,30 +64,31 @@ private int FindDistinctCounts(in VBuffer values, double[] distinctValue counts[0] = values.Length; return 1; } - var valArray = values.Values; // Get histogram of values - Array.Sort(valArray, 0, values.Count); + Contracts.Assert(valueBuffer.Length >= valuesCount); + valueValues.CopyTo(valueBuffer); + Array.Sort(valueBuffer, 0, valuesCount); // Note that Array.Sort will, by MSDN documentation, make NaN be the first item of a sorted // list (that is, NaN is considered to be ordered "below" any other value for the purpose of // a sort, including negative infinity). So when checking if values contains no NaN values, it // suffices to check only the first item. - if (double.IsNaN(valArray[0])) + if (double.IsNaN(valueBuffer[0])) return -1; int idist = 0; // Index into the "distinct" arrays. - if (!values.IsDense && valArray[0] > 0) + if (!values.IsDense && valueBuffer[0] > 0) { // Implicit zeros at the head. distinctValues[0] = 0; - counts[0] = values.Length - values.Count; + counts[0] = values.Length - valuesCount; idist = 1; } - double last = distinctValues[idist] = valArray[0]; + double last = distinctValues[idist] = valueBuffer[0]; counts[idist] = 1; - for (int i = 1; i < values.Count; ++i) + for (int i = 1; i < valuesCount; ++i) { - double curr = valArray[i]; + double curr = valueBuffer[i]; if (curr != last) { Contracts.Assert(curr > last); @@ -92,7 +98,7 @@ private int FindDistinctCounts(in VBuffer values, double[] distinctValue { // This boundary is going from negative, to non-negative, and there are "implicit" zeros. distinctValues[idist] = 0; - counts[idist] = values.Length - values.Count; + counts[idist] = values.Length - valuesCount; if (curr == 0) { // No need to do any more work. @@ -117,7 +123,7 @@ private int FindDistinctCounts(in VBuffer values, double[] distinctValue { // Implicit zeros at the tail. distinctValues[++idist] = 0; - counts[idist] = values.Length - values.Count; + counts[idist] = values.Length - valuesCount; } return idist + 1; @@ -224,17 +230,19 @@ public bool FindBins(in VBuffer values, int maxBins, int minPerLeaf, out Contracts.Assert(maxBins > 0); Contracts.Assert(minPerLeaf >= 0); - if (values.Count == 0) + var valuesCount = values.GetValues().Length; + if (valuesCount == 0) { binUpperBounds = TrivialBinUpperBounds; return true; } - int arraySize = values.IsDense ? values.Count : values.Count + 1; + int arraySize = values.IsDense ? valuesCount : valuesCount + 1; + Utils.EnsureSize(ref _distinctCountsBuffer, arraySize, arraySize, keepOld: false); Utils.EnsureSize(ref _distinctValues, arraySize, arraySize, keepOld: false); Utils.EnsureSize(ref _counts, arraySize, arraySize, keepOld: false); - int numValues = FindDistinctCounts(in values, _distinctValues, _counts); + int numValues = FindDistinctCounts(in values, _distinctCountsBuffer, _distinctValues, _counts); if (numValues < 0) { binUpperBounds = null; diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index 9728598d3b..2238ef334b 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -1357,20 +1357,18 @@ private ValueMapper, VBuffer> GetCopier(ColumnType itemT return (in VBuffer src, ref VBuffer dst) => { - var indices = dst.Indices; - var values = dst.Values; - if (src.Count > 0) + var srcValues = src.GetValues(); + var mutation = VBufferMutationContext.Create(ref dst, src.Length, srcValues.Length); + if (srcValues.Length > 0) { if (!src.IsDense) { - Utils.EnsureSize(ref indices, src.Count); - Array.Copy(src.Indices, indices, src.Count); + src.GetIndices().CopyTo(mutation.Indices); } - Utils.EnsureSize(ref values, src.Count); - for (int i = 0; i < src.Count; ++i) - conv(in src.Values[i], ref values[i]); + for (int i = 0; i < srcValues.Length; ++i) + conv(in srcValues[i], ref mutation.Values[i]); } - dst = new VBuffer(src.Length, src.Count, values, indices); + mutation.Complete(ref dst); }; } diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index 9c09bf940f..ef52198f1a 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -483,8 +483,9 @@ protected override Delegate MakeGetter(IRow input, int iinfo, out Action dispose (ref VBuffer dst) => { getSrc(ref src); - var mean = Mean(src.Values, src.Count, src.Length); - var divisor = StdDev(src.Values, src.Count, src.Length, mean); + var srcValues = src.GetValues(); + var mean = Mean(srcValues, src.Length); + var divisor = StdDev(srcValues, src.Length, mean); FillValues(Host, in src, ref dst, divisor, scale, mean); }; return del; @@ -493,8 +494,9 @@ protected override Delegate MakeGetter(IRow input, int iinfo, out Action dispose (ref VBuffer dst) => { getSrc(ref src); - var mean = Mean(src.Values, src.Count, src.Length); - var divisor = L2Norm(src.Values, src.Count, mean); + var srcValues = src.GetValues(); + var mean = Mean(srcValues, src.Length); + var divisor = L2Norm(srcValues, mean); FillValues(Host, in src, ref dst, divisor, scale, mean); }; return del; @@ -503,8 +505,9 @@ protected override Delegate MakeGetter(IRow input, int iinfo, out Action dispose (ref VBuffer dst) => { getSrc(ref src); - var mean = Mean(src.Values, src.Count, src.Length); - var divisor = L1Norm(src.Values, src.Count, mean); + var srcValues = src.GetValues(); + var mean = Mean(srcValues, src.Length); + var divisor = L1Norm(srcValues, mean); FillValues(Host, in src, ref dst, divisor, scale, mean); }; return del; @@ -513,8 +516,9 @@ protected override Delegate MakeGetter(IRow input, int iinfo, out Action dispose (ref VBuffer dst) => { getSrc(ref src); - var mean = Mean(src.Values, src.Count, src.Length); - var divisor = LInfNorm(src.Values, src.Count, mean); + var srcValues = src.GetValues(); + var mean = Mean(srcValues, src.Length); + var divisor = LInfNorm(srcValues, mean); FillValues(Host, in src, ref dst, divisor, scale, mean); }; return del; @@ -531,7 +535,7 @@ protected override Delegate MakeGetter(IRow input, int iinfo, out Action dispose (ref VBuffer dst) => { getSrc(ref src); - var divisor = StdDev(src.Values, src.Count, src.Length); + var divisor = StdDev(src.GetValues(), src.Length); FillValues(Host, in src, ref dst, divisor, scale); }; return del; @@ -540,7 +544,7 @@ protected override Delegate MakeGetter(IRow input, int iinfo, out Action dispose (ref VBuffer dst) => { getSrc(ref src); - var divisor = L2Norm(src.Values, src.Count); + var divisor = L2Norm(src.GetValues()); FillValues(Host, in src, ref dst, divisor, scale); }; return del; @@ -549,7 +553,7 @@ protected override Delegate MakeGetter(IRow input, int iinfo, out Action dispose (ref VBuffer dst) => { getSrc(ref src); - var divisor = L1Norm(src.Values, src.Count); + var divisor = L1Norm(src.GetValues()); FillValues(Host, in src, ref dst, divisor, scale); }; return del; @@ -558,7 +562,7 @@ protected override Delegate MakeGetter(IRow input, int iinfo, out Action dispose (ref VBuffer dst) => { getSrc(ref src); - var divisor = LInfNorm(src.Values, src.Count); + var divisor = LInfNorm(src.GetValues()); FillValues(Host, in src, ref dst, divisor, scale); }; return del; @@ -570,14 +574,15 @@ protected override Delegate MakeGetter(IRow input, int iinfo, out Action dispose private static void FillValues(IExceptionContext ectx, in VBuffer src, ref VBuffer dst, float divisor, float scale, float offset = 0) { - int count = src.Count; + var srcValues = src.GetValues(); + int count = srcValues.Length; int length = src.Length; - ectx.Assert(Utils.Size(src.Values) >= count); ectx.Assert(divisor >= 0); if (count == 0) { - dst = new VBuffer(length, 0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, length, 0) + .Complete(ref dst); return; } ectx.Assert(count > 0); @@ -591,21 +596,18 @@ private static void FillValues(IExceptionContext ectx, in VBuffer src, re if (normScale < MinScale) normScale = 1; + VBufferMutationContext mutation; if (offset == 0) { - var dstValues = dst.Values; - if (Utils.Size(dstValues) < count) - dstValues = new float[count]; - var dstIndices = dst.Indices; + mutation = VBufferMutationContext.Create(ref dst, length, count); + var dstValues = mutation.Values; if (!src.IsDense) { - if (Utils.Size(dstIndices) < count) - dstIndices = new int[count]; - Array.Copy(src.Indices, dstIndices, count); + src.GetIndices().CopyTo(mutation.Indices); } - CpuMathUtils.Scale(normScale, src.Values, dstValues, count); - dst = new VBuffer(length, count, dstValues, dstIndices); + CpuMathUtils.Scale(normScale, src.GetValues(), dstValues, count); + mutation.Complete(ref dst); return; } @@ -613,10 +615,11 @@ private static void FillValues(IExceptionContext ectx, in VBuffer src, re // Subtracting the mean requires a dense representation. src.CopyToDense(ref dst); + mutation = VBufferMutationContext.CreateFromBuffer(ref dst); if (normScale != 1) - CpuMathUtils.ScaleAdd(normScale, -offset, dst.Values.AsSpan(0, length)); + CpuMathUtils.ScaleAdd(normScale, -offset, mutation.Values); else - CpuMathUtils.Add(-offset, dst.Values.AsSpan(0, length)); + CpuMathUtils.Add(-offset, mutation.Values); } /// @@ -624,21 +627,21 @@ private static void FillValues(IExceptionContext ectx, in VBuffer src, re /// based on centered values (i.e. after subtracting the mean). But since the centered /// values mean is approximately zero, we can use variance of non-centered values. /// - private static float StdDev(float[] values, int count, int length) + private static float StdDev(ReadOnlySpan values, int length) { - Contracts.Assert(0 <= count && count <= length); - if (count == 0) + Contracts.Assert(0 <= values.Length && values.Length <= length); + if (values.Length == 0) return 0; // We need a mean to compute variance. - var tmpMean = CpuMathUtils.Sum(values.AsSpan(0, count)) / length; + var tmpMean = CpuMathUtils.Sum(values) / length; float sumSq = 0; - if (count != length && tmpMean != 0) + if (values.Length != length && tmpMean != 0) { // Sparse representation. float meanSq = tmpMean * tmpMean; - sumSq = (length - count) * meanSq; + sumSq = (length - values.Length) * meanSq; } - sumSq += CpuMathUtils.SumSq(tmpMean, values.AsSpan(0, count)); + sumSq += CpuMathUtils.SumSq(tmpMean, values); return MathUtils.Sqrt(sumSq / length); } @@ -646,19 +649,19 @@ private static float StdDev(float[] values, int count, int length) /// Compute Standard Deviation. /// We have two overloads of StdDev instead of one with mean for perf reasons. /// - private static float StdDev(float[] values, int count, int length, float mean) + private static float StdDev(ReadOnlySpan values, int length, float mean) { - Contracts.Assert(0 <= count && count <= length); - if (count == 0) + Contracts.Assert(0 <= values.Length && values.Length <= length); + if (values.Length == 0) return 0; float sumSq = 0; - if (count != length && mean != 0) + if (values.Length != length && mean != 0) { // Sparse representation. float meanSq = mean * mean; - sumSq = (length - count) * meanSq; + sumSq = (length - values.Length) * meanSq; } - sumSq += CpuMathUtils.SumSq(mean, values.AsSpan(0, count)); + sumSq += CpuMathUtils.SumSq(mean, values); return MathUtils.Sqrt(sumSq / length); } @@ -666,40 +669,40 @@ private static float StdDev(float[] values, int count, int length, float mean) /// Compute L2-norm. L2-norm computation doesn't subtract the mean from the source values. /// However, we substract the mean here in case subMean is true (if subMean is false, mean is zero). /// - private static float L2Norm(float[] values, int count, float mean = 0) + private static float L2Norm(ReadOnlySpan values, float mean = 0) { - if (count == 0) + if (values.Length == 0) return 0; - return MathUtils.Sqrt(CpuMathUtils.SumSq(mean, values.AsSpan(0, count))); + return MathUtils.Sqrt(CpuMathUtils.SumSq(mean, values)); } /// /// Compute L1-norm. L1-norm computation doesn't subtract the mean from the source values. /// However, we substract the mean here in case subMean is true (if subMean is false, mean is zero). /// - private static float L1Norm(float[] values, int count, float mean = 0) + private static float L1Norm(ReadOnlySpan values, float mean = 0) { - if (count == 0) + if (values.Length == 0) return 0; - return CpuMathUtils.SumAbs(mean, values.AsSpan(0, count)); + return CpuMathUtils.SumAbs(mean, values); } /// /// Compute LInf-norm. LInf-norm computation doesn't subtract the mean from the source values. /// However, we substract the mean here in case subMean is true (if subMean is false, mean is zero). /// - private static float LInfNorm(float[] values, int count, float mean = 0) + private static float LInfNorm(ReadOnlySpan values, float mean = 0) { - if (count == 0) + if (values.Length == 0) return 0; - return CpuMathUtils.MaxAbsDiff(mean, values.AsSpan(0, count)); + return CpuMathUtils.MaxAbsDiff(mean, values); } - private static float Mean(float[] src, int count, int length) + private static float Mean(ReadOnlySpan src, int length) { - if (length == 0 || count == 0) + if (length == 0 || src.Length == 0) return 0; - return CpuMathUtils.Sum(src.AsSpan(0, count)) / length; + return CpuMathUtils.Sum(src) / length; } } } diff --git a/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs b/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs index a67816f348..09a054be25 100644 --- a/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs @@ -120,9 +120,10 @@ private static DropSlotsTransform.Column CreateDropSlotsColumn(Arguments args, i var col = new DropSlotsTransform.Column(); col.Source = args.FeatureColumn; selectedCount = 0; + var scoresValues = scores.GetValues(); // Degenerate case, dropping all slots. - if (scores.Count == 0) + if (scoresValues.Length == 0) { var range = new DropSlotsTransform.Range(); col.Slots = new DropSlotsTransform.Range[] { range }; @@ -139,13 +140,13 @@ private static DropSlotsTransform.Column CreateDropSlotsColumn(Arguments args, i else { Contracts.Assert(args.NumSlotsToKeep.HasValue); - threshold = ComputeThreshold(scores.Values, scores.Count, args.NumSlotsToKeep.Value, out tiedScoresToKeep); + threshold = ComputeThreshold(scoresValues, args.NumSlotsToKeep.Value, out tiedScoresToKeep); } var slots = new List(); - for (int i = 0; i < scores.Count; i++) + for (int i = 0; i < scoresValues.Length; i++) { - var score = Math.Abs(scores.Values[i]); + var score = Math.Abs(scoresValues[i]); if (score > threshold) { selectedCount++; @@ -160,9 +161,9 @@ private static DropSlotsTransform.Column CreateDropSlotsColumn(Arguments args, i var range = new DropSlotsTransform.Range(); range.Min = i; - while (++i < scores.Count) + while (++i < scoresValues.Length) { - score = Math.Abs(scores.Values[i]); + score = Math.Abs(scoresValues[i]); if (score > threshold) { selectedCount++; @@ -181,6 +182,7 @@ private static DropSlotsTransform.Column CreateDropSlotsColumn(Arguments args, i if (!scores.IsDense) { + var scoresIndices = scores.GetIndices(); int ii = 0; var count = slots.Count; for (int i = 0; i < count; i++) @@ -190,16 +192,16 @@ private static DropSlotsTransform.Column CreateDropSlotsColumn(Arguments args, i var min = range.Min; var max = range.Max.Value; Contracts.Assert(min <= max); - Contracts.Assert(max < scores.Count); + Contracts.Assert(max < scoresValues.Length); - range.Min = min == 0 ? 0 : scores.Indices[min - 1] + 1; - range.Max = max == scores.Count - 1 ? scores.Length - 1 : scores.Indices[max + 1] - 1; + range.Min = min == 0 ? 0 : scoresIndices[min - 1] + 1; + range.Max = max == scoresIndices.Length - 1 ? scores.Length - 1 : scoresIndices[max + 1] - 1; // Add the gaps before this range. for (; ii < min; ii++) { - var gapMin = ii == 0 ? 0 : scores.Indices[ii - 1] + 1; - var gapMax = scores.Indices[ii] - 1; + var gapMin = ii == 0 ? 0 : scoresIndices[ii - 1] + 1; + var gapMax = scoresIndices[ii] - 1; if (gapMin <= gapMax) { var gap = new DropSlotsTransform.Range(); @@ -212,10 +214,10 @@ private static DropSlotsTransform.Column CreateDropSlotsColumn(Arguments args, i } // Add the gaps after the last range. - for (; ii <= scores.Count; ii++) + for (; ii <= scoresIndices.Length; ii++) { - var gapMin = ii == 0 ? 0 : scores.Indices[ii - 1] + 1; - var gapMax = ii == scores.Count ? scores.Length - 1 : scores.Indices[ii] - 1; + var gapMin = ii == 0 ? 0 : scoresIndices[ii - 1] + 1; + var gapMax = ii == scoresIndices.Length ? scores.Length - 1 : scoresIndices[ii] - 1; if (gapMin <= gapMax) { var gap = new DropSlotsTransform.Range(); @@ -240,12 +242,12 @@ private static DropSlotsTransform.Column CreateDropSlotsColumn(Arguments args, i return null; } - private static float ComputeThreshold(float[] scores, int count, int topk, out int tiedScoresToKeep) + private static float ComputeThreshold(ReadOnlySpan scores, int topk, out int tiedScoresToKeep) { // Use a min-heap for the topk elements var heap = new Heap((f1, f2) => f1 > f2, topk); - for (int i = 0; i < count; i++) + for (int i = 0; i < scores.Length; i++) { var score = Math.Abs(scores[i]); if (float.IsNaN(score)) diff --git a/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj b/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj index 11d96a6cfc..7ab146b21c 100644 --- a/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj +++ b/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj @@ -4,6 +4,7 @@ netstandard2.0 Microsoft.ML CORECLR + true diff --git a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs index 910941e527..23d32993b3 100644 --- a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs +++ b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs @@ -229,110 +229,110 @@ private void DropNAsAndDefaults(ref VBuffer src, ref VBuffer d { Host.AssertValue(isNA); + var srcValues = src.GetValues(); int newCount = 0; - for (int i = 0; i < src.Count; i++) + for (int i = 0; i < srcValues.Length; i++) { - if (!isNA(in src.Values[i])) + if (!isNA(in srcValues[i])) newCount++; } - Host.Assert(newCount <= src.Count); + Host.Assert(newCount <= srcValues.Length); if (newCount == 0) { - dst = new VBuffer(0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, 0) + .Complete(ref dst); return; } - if (newCount == src.Count) + if (newCount == srcValues.Length) { Utils.Swap(ref src, ref dst); if (!dst.IsDense) { - Host.Assert(dst.Count == newCount); - dst = new VBuffer(dst.Count, dst.Values, dst.Indices); + Host.Assert(dst.GetValues().Length == newCount); + VBufferMutationContext.Create(ref dst, newCount) + .Complete(ref dst); } return; } int iDst = 0; - var values = dst.Values; - if (Utils.Size(values) < newCount) - values = new TDst[newCount]; // Densifying sparse vectors since default value equals NA and hence should be dropped. - for (int i = 0; i < src.Count; i++) + var mutation = VBufferMutationContext.Create(ref dst, newCount); + for (int i = 0; i < srcValues.Length; i++) { - if (!isNA(in src.Values[i])) - values[iDst++] = src.Values[i]; + if (!isNA(in srcValues[i])) + mutation.Values[iDst++] = srcValues[i]; } Host.Assert(iDst == newCount); - dst = new VBuffer(newCount, values, dst.Indices); + mutation.Complete(ref dst); } private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredicate isNA) { Host.AssertValue(isNA); + var srcValues = src.GetValues(); int newCount = 0; - for (int i = 0; i < src.Count; i++) + for (int i = 0; i < srcValues.Length; i++) { - if (!isNA(in src.Values[i])) + if (!isNA(in srcValues[i])) newCount++; } - Host.Assert(newCount <= src.Count); + Host.Assert(newCount <= srcValues.Length); if (newCount == 0) { - dst = new VBuffer(src.Length - src.Count, 0, dst.Values, dst.Indices); + VBufferMutationContext.Create(ref dst, src.Length - srcValues.Length, 0) + .Complete(ref dst); return; } - if (newCount == src.Count) + if (newCount == srcValues.Length) { Utils.Swap(ref src, ref dst); return; } - var values = dst.Values; - if (Utils.Size(values) < newCount) - values = new TDst[newCount]; - int iDst = 0; if (src.IsDense) { - for (int i = 0; i < src.Count; i++) + var mutation = VBufferMutationContext.Create(ref dst, newCount); + for (int i = 0; i < srcValues.Length; i++) { - if (!isNA(in src.Values[i])) + if (!isNA(in srcValues[i])) { - values[iDst] = src.Values[i]; + mutation.Values[iDst] = srcValues[i]; iDst++; } } Host.Assert(iDst == newCount); - dst = new VBuffer(newCount, values, dst.Indices); + mutation.Complete(ref dst); } else { - var indices = dst.Indices; - if (Utils.Size(indices) < newCount) - indices = new int[newCount]; + var newLength = src.Length - srcValues.Length - newCount; + var mutation = VBufferMutationContext.Create(ref dst, newLength, newCount); + var srcIndices = src.GetIndices(); int offset = 0; - for (int i = 0; i < src.Count; i++) + for (int i = 0; i < srcValues.Length; i++) { - if (!isNA(in src.Values[i])) + if (!isNA(in srcValues[i])) { - values[iDst] = src.Values[i]; - indices[iDst] = src.Indices[i] - offset; + mutation.Values[iDst] = srcValues[i]; + mutation.Indices[iDst] = srcIndices[i] - offset; iDst++; } else offset++; } Host.Assert(iDst == newCount); - Host.Assert(offset == src.Count - newCount); - dst = new VBuffer(src.Length - offset, newCount, values, indices); + Host.Assert(offset == srcValues.Length - newCount); + mutation.Complete(ref dst); } } } diff --git a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs index 5e42acfedb..419d09807f 100644 --- a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs @@ -274,32 +274,26 @@ protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, ou private static void FillValues(Float input, ref VBuffer result) { - var values = result.Values; - var indices = result.Indices; - if (input == 0) { - result = new VBuffer(2, 0, values, indices); + VBufferMutationContext.Create(ref result, 2, 0) + .Complete(ref result); return; } - if (Utils.Size(values) < 1) - values = new Float[1]; - if (Utils.Size(indices) < 1) - indices = new int[1]; - + var mutation = VBufferMutationContext.Create(ref result, 2, 1); if (Float.IsNaN(input)) { - values[0] = 1; - indices[0] = 1; + mutation.Values[0] = 1; + mutation.Indices[0] = 1; } else { - values[0] = input; - indices[0] = 0; + mutation.Values[0] = input; + mutation.Indices[0] = 0; } - result = new VBuffer(2, 1, values, indices); + mutation.Complete(ref result); } // This converts in place. @@ -308,18 +302,14 @@ private static void FillValues(IExceptionContext ectx, ref VBuffer buffer int size = buffer.Length; ectx.Check(0 <= size & size < int.MaxValue / 2); - int count = buffer.Count; - var values = buffer.Values; - var indices = buffer.Indices; + var values = buffer.GetValues(); + var mutation = VBufferMutationContext.Create(ref buffer, size * 2, values.Length); int iivDst = 0; - if (count >= size) + if (buffer.IsDense) { // Currently, it's dense. We always produce sparse. - ectx.Assert(Utils.Size(values) >= size); - if (Utils.Size(indices) < size) - indices = new int[size]; - for (int ivSrc = 0; ivSrc < count; ivSrc++) + for (int ivSrc = 0; ivSrc < values.Length; ivSrc++) { ectx.Assert(iivDst <= ivSrc); var val = values[ivSrc]; @@ -327,13 +317,13 @@ private static void FillValues(IExceptionContext ectx, ref VBuffer buffer continue; if (Float.IsNaN(val)) { - values[iivDst] = 1; - indices[iivDst] = 2 * ivSrc + 1; + mutation.Values[iivDst] = 1; + mutation.Indices[iivDst] = 2 * ivSrc + 1; } else { - values[iivDst] = val; - indices[iivDst] = 2 * ivSrc; + mutation.Values[iivDst] = val; + mutation.Indices[iivDst] = 2 * ivSrc; } iivDst++; } @@ -341,11 +331,10 @@ private static void FillValues(IExceptionContext ectx, ref VBuffer buffer else { // Currently, it's sparse. - ectx.Assert(Utils.Size(values) >= count); - ectx.Assert(Utils.Size(indices) >= count); + var indices = buffer.GetIndices(); int ivPrev = -1; - for (int iivSrc = 0; iivSrc < count; iivSrc++) + for (int iivSrc = 0; iivSrc < values.Length; iivSrc++) { ectx.Assert(iivDst <= iivSrc); var val = values[iivSrc]; @@ -356,20 +345,20 @@ private static void FillValues(IExceptionContext ectx, ref VBuffer buffer ivPrev = iv; if (Float.IsNaN(val)) { - values[iivDst] = 1; - indices[iivDst] = 2 * iv + 1; + mutation.Values[iivDst] = 1; + mutation.Indices[iivDst] = 2 * iv + 1; } else { - values[iivDst] = val; - indices[iivDst] = 2 * iv; + mutation.Values[iivDst] = val; + mutation.Indices[iivDst] = 2 * iv; } iivDst++; } } - ectx.Assert(0 <= iivDst & iivDst <= count); - buffer = new VBuffer(size * 2, iivDst, values, indices); + ectx.Assert(0 <= iivDst & iivDst <= values.Length); + mutation.Complete(ref buffer, iivDst); } } } diff --git a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransformer.cs b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransformer.cs index 440f71d0be..305196c743 100644 --- a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransformer.cs +++ b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransformer.cs @@ -294,8 +294,8 @@ private void FindNAs(in VBuffer src, InPredicate isNA, bool defaultIsNA // Find the indices of all of the NAs. indices.Clear(); - var srcValues = src.Values; - var srcCount = src.Count; + var srcValues = src.GetValues(); + var srcCount = srcValues.Length; if (src.IsDense) { for (int i = 0; i < srcCount; i++) @@ -307,7 +307,7 @@ private void FindNAs(in VBuffer src, InPredicate isNA, bool defaultIsNA } else if (!defaultIsNA) { - var srcIndices = src.Indices; + var srcIndices = src.GetIndices(); for (int ii = 0; ii < srcCount; ii++) { if (isNA(in srcValues[ii])) @@ -318,7 +318,7 @@ private void FindNAs(in VBuffer src, InPredicate isNA, bool defaultIsNA else { // Note that this adds non-NAs to indices -- this is indicated by sense being false. - var srcIndices = src.Indices; + var srcIndices = src.GetIndices(); for (int ii = 0; ii < srcCount; ii++) { if (!isNA(in srcValues[ii])) diff --git a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs index c4e39b877f..b70f02f6ad 100644 --- a/src/Microsoft.ML.Transforms/MissingValueReplacing.cs +++ b/src/Microsoft.ML.Transforms/MissingValueReplacing.cs @@ -734,10 +734,8 @@ private void FillValues(in VBuffer src, ref VBuffer dst, InPredicate Host.AssertValue(isNA); int srcSize = src.Length; - int srcCount = src.Count; - var srcValues = src.Values; - Host.Assert(Utils.Size(srcValues) >= srcCount); - var srcIndices = src.Indices; + var srcValues = src.GetValues(); + int srcCount = srcValues.Length; var dstValues = dst.Values; var dstIndices = dst.Indices; @@ -768,8 +766,8 @@ private void FillValues(in VBuffer src, ref VBuffer dst, InPredicate else { // The source vector is sparse. - Host.Assert(Utils.Size(srcIndices) >= srcCount); Host.Assert(srcCount < srcSize); + var srcIndices = src.GetIndices(); // Allocate more space if necessary. // REVIEW: One thing that changing the code to simply ensure that there are srcCount indices in the arrays @@ -818,10 +816,8 @@ private void FillValues(in VBuffer src, ref VBuffer dst, InPredicate Host.AssertValue(isNA); int srcSize = src.Length; - int srcCount = src.Count; - var srcValues = src.Values; - Host.Assert(Utils.Size(srcValues) >= srcCount); - var srcIndices = src.Indices; + var srcValues = src.GetValues(); + int srcCount = srcValues.Length; var dstValues = dst.Values; var dstIndices = dst.Indices; @@ -830,7 +826,6 @@ private void FillValues(in VBuffer src, ref VBuffer dst, InPredicate Utils.EnsureSize(ref dstValues, srcCount, srcSize, keepOld: false); int iivDst = 0; - Host.Assert(Utils.Size(srcValues) >= srcCount); if (src.IsDense) { // The source vector is dense. @@ -852,8 +847,8 @@ private void FillValues(in VBuffer src, ref VBuffer dst, InPredicate else { // The source vector is sparse. - Host.Assert(Utils.Size(srcIndices) >= srcCount); Host.Assert(srcCount < srcSize); + var srcIndices = src.GetIndices(); // Allocate more space if necessary. // REVIEW: One thing that changing the code to simply ensure that there are srcCount indices in the arrays diff --git a/src/Microsoft.ML.Transforms/MissingValueReplacingUtils.cs b/src/Microsoft.ML.Transforms/MissingValueReplacingUtils.cs index 921cd4292e..8466d1b5ef 100644 --- a/src/Microsoft.ML.Transforms/MissingValueReplacingUtils.cs +++ b/src/Microsoft.ML.Transforms/MissingValueReplacingUtils.cs @@ -185,9 +185,8 @@ protected StatAggregatorAcrossSlots(IChannel ch, IRowCursor cursor, int col) protected sealed override void ProcessRow(in VBuffer src) { - var srcCount = src.Count; - var srcValues = src.Values; - Ch.Assert(Utils.Size(srcValues) >= srcCount); + var srcValues = src.GetValues(); + var srcCount = srcValues.Length; for (int slot = 0; slot < srcCount; slot++) ProcessValue(in srcValues[slot]); @@ -210,9 +209,8 @@ protected StatAggregatorBySlot(IChannel ch, ColumnType type, IRowCursor cursor, protected sealed override void ProcessRow(in VBuffer src) { - var srcCount = src.Count; - var srcValues = src.Values; - Ch.Assert(Utils.Size(srcValues) >= srcCount); + var srcValues = src.GetValues(); + var srcCount = srcValues.Length; if (src.IsDense) { // The src vector is dense. @@ -222,8 +220,7 @@ protected sealed override void ProcessRow(in VBuffer src) else { // The src vector is sparse. - var srcIndices = src.Indices; - Ch.Assert(Utils.Size(srcIndices) >= srcCount); + var srcIndices = src.GetIndices(); for (int islot = 0; islot < srcCount; islot++) ProcessValue(in srcValues[islot], srcIndices[islot]); } diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index c98ccfab04..145de15f03 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -406,28 +406,28 @@ private void GetLabels(Transposer trans, ColumnType labelType, int labelCol) { var tmp = default(VBuffer); trans.GetSingleSlotValue(labelCol, ref tmp); - BinInts(ref tmp, ref labels, _numBins, out min, out lim); + BinInts(in tmp, ref labels, _numBins, out min, out lim); _numLabels = lim - min; } else if (labelType == NumberType.R4) { var tmp = default(VBuffer); trans.GetSingleSlotValue(labelCol, ref tmp); - BinSingles(ref tmp, ref labels, _numBins, out min, out lim); + BinSingles(in tmp, ref labels, _numBins, out min, out lim); _numLabels = lim - min; } else if (labelType == NumberType.R8) { var tmp = default(VBuffer); trans.GetSingleSlotValue(labelCol, ref tmp); - BinDoubles(ref tmp, ref labels, _numBins, out min, out lim); + BinDoubles(in tmp, ref labels, _numBins, out min, out lim); _numLabels = lim - min; } else if (labelType.IsBool) { var tmp = default(VBuffer); trans.GetSingleSlotValue(labelCol, ref tmp); - BinBools(ref tmp, ref labels); + BinBools(in tmp, ref labels); _numLabels = 3; min = -1; lim = 2; @@ -485,7 +485,7 @@ private Single[] ComputeMutualInformation(Transposer trans, int col) return ComputeMutualInformation(trans, col, (ref VBuffer src, ref VBuffer dst, out int min, out int lim) => { - BinInts(ref src, ref dst, _numBins, out min, out lim); + BinInts(in src, ref dst, _numBins, out min, out lim); }); } if (type.ItemType == NumberType.R4) @@ -493,7 +493,7 @@ private Single[] ComputeMutualInformation(Transposer trans, int col) return ComputeMutualInformation(trans, col, (ref VBuffer src, ref VBuffer dst, out int min, out int lim) => { - BinSingles(ref src, ref dst, _numBins, out min, out lim); + BinSingles(in src, ref dst, _numBins, out min, out lim); }); } if (type.ItemType == NumberType.R8) @@ -501,7 +501,7 @@ private Single[] ComputeMutualInformation(Transposer trans, int col) return ComputeMutualInformation(trans, col, (ref VBuffer src, ref VBuffer dst, out int min, out int lim) => { - BinDoubles(ref src, ref dst, _numBins, out min, out lim); + BinDoubles(in src, ref dst, _numBins, out min, out lim); }); } if (type.ItemType.IsBool) @@ -511,7 +511,7 @@ private Single[] ComputeMutualInformation(Transposer trans, int col) { min = -1; lim = 2; - BinBools(ref src, ref dst); + BinBools(in src, ref dst); }); } Contracts.Assert(0 < type.ItemType.KeyCount && type.ItemType.KeyCount < Utils.ArrayMaxSize); @@ -610,12 +610,13 @@ private Single ComputeMutualInformation(in VBuffer features, int numFeature private void FillTable(in VBuffer features, int offset, int numFeatures) { Contracts.Assert(_labels.Length == features.Length); + var featureValues = features.GetValues(); if (features.IsDense) { for (int i = 0; i < _labels.Length; i++) { var label = _labels[i]; - var feature = features.Values[i] - offset; + var feature = featureValues[i] - offset; Contracts.Assert(0 <= label && label < _numLabels); Contracts.Assert(0 <= feature && feature < numFeatures); _contingencyTable[label][feature]++; @@ -623,23 +624,24 @@ private void FillTable(in VBuffer features, int offset, int numFeatures) return; } + var featureIndices = features.GetIndices(); int ii = 0; for (int i = 0; i < _labels.Length; i++) { var label = _labels[i]; int feature; - if (ii == features.Count || i < features.Indices[ii]) + if (ii == featureIndices.Length || i < featureIndices[ii]) feature = -offset; else { - feature = features.Values[ii] - offset; + feature = featureValues[ii] - offset; ii++; } Contracts.Assert(0 <= label && label < _numLabels); Contracts.Assert(0 <= feature && feature < numFeatures); _contingencyTable[label][feature]++; } - Contracts.Assert(ii == features.Count); + Contracts.Assert(ii == featureIndices.Length); } /// @@ -673,12 +675,12 @@ private static ValueMapper, VBuffer> BinKeys(ColumnType colTy /// /// Maps Ints. /// - private void BinInts(ref VBuffer input, ref VBuffer output, + private void BinInts(in VBuffer input, ref VBuffer output, int numBins, out int min, out int lim) { Contracts.Assert(_singles.Count == 0); - var bounds = _binFinder.FindBins(numBins, _singles, input.Length - input.Count); + var bounds = _binFinder.FindBins(numBins, _singles, input.Length - input.GetValues().Length); min = -1 - bounds.FindIndexSorted(0); lim = min + bounds.Length + 1; int offset = min; @@ -692,21 +694,19 @@ private void BinInts(ref VBuffer input, ref VBuffer output, /// /// Maps from Singles to ints. NaNs (and only NaNs) are mapped to the first bin. /// - private void BinSingles(ref VBuffer input, ref VBuffer output, + private void BinSingles(in VBuffer input, ref VBuffer output, int numBins, out int min, out int lim) { Contracts.Assert(_singles.Count == 0); - if (input.Values != null) + var inputValues = input.GetValues(); + for (int i = 0; i < inputValues.Length; i++) { - for (int i = 0; i < input.Count; i++) - { - var val = input.Values[i]; - if (!Single.IsNaN(val)) - _singles.Add(val); - } + var val = inputValues[i]; + if (!Single.IsNaN(val)) + _singles.Add(val); } - var bounds = _binFinder.FindBins(numBins, _singles, input.Length - input.Count); + var bounds = _binFinder.FindBins(numBins, _singles, input.Length - inputValues.Length); min = -1 - bounds.FindIndexSorted(0); lim = min + bounds.Length + 1; int offset = min; @@ -720,21 +720,19 @@ private void BinSingles(ref VBuffer input, ref VBuffer output, /// /// Maps from Doubles to ints. NaNs (and only NaNs) are mapped to the first bin. /// - private void BinDoubles(ref VBuffer input, ref VBuffer output, + private void BinDoubles(in VBuffer input, ref VBuffer output, int numBins, out int min, out int lim) { Contracts.Assert(_doubles.Count == 0); - if (input.Values != null) + var inputValues = input.GetValues(); + for (int i = 0; i < inputValues.Length; i++) { - for (int i = 0; i < input.Count; i++) - { - var val = input.Values[i]; - if (!Double.IsNaN(val)) - _doubles.Add(val); - } + var val = inputValues[i]; + if (!Double.IsNaN(val)) + _doubles.Add(val); } - var bounds = _binFinder.FindBins(numBins, _doubles, input.Length - input.Count); + var bounds = _binFinder.FindBins(numBins, _doubles, input.Length - inputValues.Length); var offset = min = -1 - bounds.FindIndexSorted(0); lim = min + bounds.Length + 1; ValueMapper mapper = @@ -744,7 +742,7 @@ private void BinDoubles(ref VBuffer input, ref VBuffer output, _doubles.Clear(); } - private void BinBools(ref VBuffer input, ref VBuffer output) + private void BinBools(in VBuffer input, ref VBuffer output) { if (_boolMapper == null) _boolMapper = CreateVectorMapper(BinOneBool); @@ -775,24 +773,20 @@ private static ValueMapper, VBuffer> CreateVectorMapper(this ValueMapper map, in VBuffer input, ref VBuffer output) { - var values = output.Values; - if (Utils.Size(values) < input.Count) - values = new TDst[input.Count]; - for (int i = 0; i < input.Count; i++) + var inputValues = input.GetValues(); + var mutation = VBufferMutationContext.Create(ref output, input.Length, inputValues.Length); + for (int i = 0; i < inputValues.Length; i++) { - TSrc val = input.Values[i]; - map(in val, ref values[i]); + TSrc val = inputValues[i]; + map(in val, ref mutation.Values[i]); } - var indices = output.Indices; - if (!input.IsDense && input.Count > 0) + if (!input.IsDense && inputValues.Length > 0) { - if (Utils.Size(indices) < input.Count) - indices = new int[input.Count]; - Array.Copy(input.Indices, indices, input.Count); + input.GetIndices().CopyTo(mutation.Indices); } - output = new VBuffer(input.Length, input.Count, values, indices); + mutation.Complete(ref output); } } } diff --git a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs index d3865c303e..fb8207c5fb 100644 --- a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs +++ b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs @@ -615,9 +615,11 @@ private void TransformFeatures(in VBuffer src, ref VBuffer dst, Tr { // This overload of MatTimesSrc ignores the values in slots that are not in src.Indices, so there is // no need to zero them out. - featuresAligned.CopyFrom(src.Indices, src.Values, 0, 0, src.Count, zeroItems: false); - CpuMathUtils.MatrixTimesSource(transformInfo.RndFourierVectors, src.Indices, featuresAligned, 0, 0, - src.Count, productAligned, transformInfo.NewDim); + var srcValues = src.GetValues(); + var srcIndices = src.GetIndices(); + featuresAligned.CopyFrom(srcIndices, srcValues, 0, 0, srcValues.Length, zeroItems: false); + CpuMathUtils.MatrixTimesSource(transformInfo.RndFourierVectors, srcIndices, featuresAligned, 0, 0, + srcValues.Length, productAligned, transformInfo.NewDim); } for (int i = 0; i < transformInfo.NewDim; i++) diff --git a/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs index 53f99405d7..285d47216a 100644 --- a/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs @@ -457,39 +457,37 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) getSrc(ref src); int len = 0; - for (int i = 0; i < src.Count; i++) + var srcValues = src.GetValues(); + for (int i = 0; i < srcValues.Length; i++) { - if (!src.Values[i].IsEmpty) + if (!srcValues[i].IsEmpty) { - len += src.Values[i].Length; + len += srcValues[i].Length; if (_parent._useMarkerChars) len += TextMarkersCount; } } - var values = dst.Values; + var mutation = VBufferMutationContext.Create(ref dst, len); if (len > 0) { - if (Utils.Size(values) < len) - values = new ushort[len]; - int index = 0; - for (int i = 0; i < src.Count; i++) + for (int i = 0; i < srcValues.Length; i++) { - if (src.Values[i].IsEmpty) + if (srcValues[i].IsEmpty) continue; if (_parent._useMarkerChars) - values[index++] = TextStartMarker; - var span = src.Values[i].Span; - for (int ich = 0; ich < src.Values[i].Length; ich++) - values[index++] = span[ich]; + mutation.Values[index++] = TextStartMarker; + var span = srcValues[i].Span; + for (int ich = 0; ich < srcValues[i].Length; ich++) + mutation.Values[index++] = span[ich]; if (_parent._useMarkerChars) - values[index++] = TextEndMarker; + mutation.Values[index++] = TextEndMarker; } Contracts.Assert(index == len); } - dst = new VBuffer(len, values, dst.Indices); + mutation.Complete(ref dst); }; ValueGetter> getterWithUnitSep = (ref VBuffer dst) => @@ -498,11 +496,12 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) int len = 0; - for (int i = 0; i < src.Count; i++) + var srcValues = src.GetValues(); + for (int i = 0; i < srcValues.Length; i++) { - if (!src.Values[i].IsEmpty) + if (!srcValues[i].IsEmpty) { - len += src.Values[i].Length; + len += srcValues[i].Length; if (i > 0) len += 1; // add UnitSeparator character to len that will be added @@ -512,12 +511,9 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) if (_parent._useMarkerChars) len += TextMarkersCount; - var values = dst.Values; + var mutation = VBufferMutationContext.Create(ref dst, len); if (len > 0) { - if (Utils.Size(values) < len) - values = new ushort[len]; - int index = 0; // ReadOnlyMemory can be a result of either concatenating text columns together @@ -527,33 +523,32 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) // Therefore, prepend and append start and end markers only once i.e. at the start and at end of vector. // Insert UnitSeparator after every piece of text in the vector. if (_parent._useMarkerChars) - values[index++] = TextStartMarker; + mutation.Values[index++] = TextStartMarker; - for (int i = 0; i < src.Count; i++) + for (int i = 0; i < srcValues.Length; i++) { - if (src.Values[i].IsEmpty) + if (srcValues[i].IsEmpty) continue; if (i > 0) - values[index++] = UnitSeparator; + mutation.Values[index++] = UnitSeparator; - var span = src.Values[i].Span; - for (int ich = 0; ich < src.Values[i].Length; ich++) - values[index++] = span[ich]; + var span = srcValues[i].Span; + for (int ich = 0; ich < srcValues[i].Length; ich++) + mutation.Values[index++] = span[ich]; } if (_parent._useMarkerChars) - values[index++] = TextEndMarker; + mutation.Values[index++] = TextEndMarker; Contracts.Assert(index == len); } - dst = new VBuffer(len, values, dst.Indices); + mutation.Complete(ref dst); }; return _parent._isSeparatorStartEnd ? getterWithStartEndSep : getterWithUnitSep; } } - } /// diff --git a/src/Microsoft.ML.Transforms/Text/LdaSingleBox.cs b/src/Microsoft.ML.Transforms/Text/LdaSingleBox.cs index 4a9ef780ca..af643a2907 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaSingleBox.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaSingleBox.cs @@ -181,7 +181,7 @@ public void SetAlphaSum(float averageDocLength) LdaInterface.SetAlphaSum(_engine, averageDocLength); } - public int LoadDoc(int[] termID, double[] termVal, int termNum, int numVocab) + public int LoadDoc(ReadOnlySpan termID, ReadOnlySpan termVal, int termNum, int numVocab) { Contracts.Check(numVocab == NumVocab); Contracts.Check(termNum > 0); @@ -189,12 +189,14 @@ public int LoadDoc(int[] termID, double[] termVal, int termNum, int numVocab) Contracts.Check(termVal.Length >= termNum); int[] pID = new int[termNum]; - int[] pVal = termVal.Select(item => (int)item).ToArray(); - Array.Copy(termID, pID, termNum); + int[] pVal = new int[termVal.Length]; + for (int i = 0; i < termVal.Length; i++) + pVal[i] = (int)termVal[i]; + termID.Slice(0, termNum).CopyTo(pID); return LdaInterface.FeedInData(_engine, pID, pVal, termNum, NumVocab); } - public int LoadDocDense(double[] termVal, int termNum, int numVocab) + public int LoadDocDense(ReadOnlySpan termVal, int termNum, int numVocab) { Contracts.Check(numVocab == NumVocab); Contracts.Check(termNum > 0); @@ -202,9 +204,10 @@ public int LoadDocDense(double[] termVal, int termNum, int numVocab) Contracts.Check(termVal.Length >= termNum); int[] pID = new int[termNum]; - int[] pVal = termVal.Select(item => (int)item).ToArray(); + int[] pVal = new int[termVal.Length]; + for (int i = 0; i < termVal.Length; i++) + pVal[i] = (int)termVal[i]; return LdaInterface.FeedInDataDense(_engine, pVal, termNum, NumVocab); - } public List> GetDocTopicVector(int docID) @@ -244,17 +247,19 @@ public List> GetDocTopicVector(int docID) return topicRet; } - public List> TestDoc(int[] termID, double[] termVal, int termNum, int numBurninIter, bool reset) + public List> TestDoc(ReadOnlySpan termID, ReadOnlySpan termVal, int termNum, int numBurninIter, bool reset) { Contracts.Check(termNum > 0); Contracts.Check(termVal.Length >= termNum); Contracts.Check(termID.Length >= termNum); int[] pID = new int[termNum]; - int[] pVal = termVal.Select(item => (int)item).ToArray(); + int[] pVal = new int[termVal.Length]; + for (int i = 0; i < termVal.Length; i++) + pVal[i] = (int)termVal[i]; int[] pTopic = new int[NumTopic]; int[] pProb = new int[NumTopic]; - Array.Copy(termID, pID, termNum); + termID.Slice(0, termNum).CopyTo(pID); int numTopicReturn = NumTopic; @@ -273,12 +278,14 @@ public List> TestDoc(int[] termID, double[] termVal, in return topicRet; } - public List> TestDocDense(double[] termVal, int termNum, int numBurninIter, bool reset) + public List> TestDocDense(ReadOnlySpan termVal, int termNum, int numBurninIter, bool reset) { Contracts.Check(termNum > 0); Contracts.Check(numBurninIter > 0); Contracts.Check(termVal.Length >= termNum); - int[] pVal = termVal.Select(item => (int)item).ToArray(); + int[] pVal = new int[termVal.Length]; + for (int i = 0; i < termVal.Length; i++) + pVal[i] = (int)termVal[i]; int[] pTopic = new int[NumTopic]; int[] pProb = new int[NumTopic]; diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index 3f697a8478..399237ac5f 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -505,9 +505,10 @@ private void Train(IChannel ch, IDataView trainingData, LdaState[] states) getters[i](ref src); // compute term, doc instance#. - for (int termID = 0; termID < src.Count; termID++) + var srcValues = src.GetValues(); + for (int termID = 0; termID < srcValues.Length; termID++) { - int termFreq = GetFrequency(src.Values[termID]); + int termFreq = GetFrequency(srcValues[termID]); if (termFreq < 0) { // Ignore this row. @@ -792,9 +793,10 @@ public int FeedTrain(IExceptionContext ectx, in VBuffer input) int docSize = 0; int termNum = 0; - for (int i = 0; i < input.Count; i++) + var inputValues = input.GetValues(); + for (int i = 0; i < inputValues.Length; i++) { - int termFreq = GetFrequency(input.Values[i]); + int termFreq = GetFrequency(inputValues[i]); if (termFreq < 0) { // Ignore this row. @@ -814,9 +816,9 @@ public int FeedTrain(IExceptionContext ectx, in VBuffer input) int actualSize = 0; if (input.IsDense) - actualSize = _ldaTrainer.LoadDocDense(input.Values, termNum, input.Length); + actualSize = _ldaTrainer.LoadDocDense(inputValues, termNum, input.Length); else - actualSize = _ldaTrainer.LoadDoc(input.Indices, input.Values, termNum, input.Length); + actualSize = _ldaTrainer.LoadDoc(input.GetIndices(), inputValues, termNum, input.Length); ectx.Assert(actualSize == 2 * docSize + 1, string.Format("The doc size are distinct. Actual: {0}, Expected: {1}", actualSize, 2 * docSize + 1)); return actualSize; @@ -849,30 +851,30 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin } int len = InfoEx.NumTopic; - var values = dst.Values; - var indices = dst.Indices; - if (src.Count == 0) + var srcValues = src.GetValues(); + if (srcValues.Length == 0) { - dst = new VBuffer(len, 0, values, indices); + VBufferMutationContext.Create(ref dst, len, 0) + .Complete(ref dst); return; } + VBufferMutationContext mutation; // Make sure all the frequencies are valid and truncate if the sum gets too large. int docSize = 0; int termNum = 0; - for (int i = 0; i < src.Count; i++) + for (int i = 0; i < srcValues.Length; i++) { - int termFreq = GetFrequency(src.Values[i]); + int termFreq = GetFrequency(srcValues[i]); if (termFreq < 0) { // REVIEW: Should this log a warning message? And what should it produce? // It currently produces a vbuffer of all NA values. // REVIEW: Need a utility method to do this... - if (Utils.Size(values) < len) - values = new Float[len]; + mutation = VBufferMutationContext.Create(ref dst, len); for (int k = 0; k < len; k++) - values[k] = Float.NaN; - dst = new VBuffer(len, values, indices); + mutation.Values[k] = Float.NaN; + mutation.Complete(ref dst); return; } @@ -886,17 +888,14 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin // REVIEW: Too much memory allocation here on each prediction. List> retTopics; if (src.IsDense) - retTopics = _ldaTrainer.TestDocDense(src.Values, termNum, numBurninIter, reset); + retTopics = _ldaTrainer.TestDocDense(srcValues, termNum, numBurninIter, reset); else - retTopics = _ldaTrainer.TestDoc(src.Indices.Take(src.Count).ToArray(), src.Values.Take(src.Count).ToArray(), termNum, numBurninIter, reset); + retTopics = _ldaTrainer.TestDoc(src.GetIndices(), srcValues, termNum, numBurninIter, reset); int count = retTopics.Count; Contracts.Assert(count <= len); - if (Utils.Size(values) < count) - values = new Float[count]; - if (count < len && Utils.Size(indices) < count) - indices = new int[count]; + mutation = VBufferMutationContext.Create(ref dst, len, count); double normalizer = 0; for (int i = 0; i < count; i++) { @@ -906,22 +905,22 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin Contracts.Assert(0 <= index && index < len); if (count < len) { - Contracts.Assert(i == 0 || indices[i - 1] < index); - indices[i] = index; + Contracts.Assert(i == 0 || mutation.Indices[i - 1] < index); + mutation.Indices[i] = index; } else Contracts.Assert(index == i); - values[i] = value; + mutation.Values[i] = value; normalizer += value; } if (normalizer > 0) { for (int i = 0; i < count; i++) - values[i] = (Float)(values[i] / normalizer); + mutation.Values[i] = (Float)(mutation.Values[i] / normalizer); } - dst = new VBuffer(len, count, values, indices); + mutation.Complete(ref dst); } public void Dispose() diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs index a443b9ecdd..3dcf350302 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs @@ -315,14 +315,15 @@ public override void Save(ModelSaveContext ctx) if (_slotNamesTypes[i] != null) { GetSlotNames(i, ref ngramsNames); - Host.Assert(_ngramMaps[i].Count == ngramsNames.Count); + Host.Assert(_ngramMaps[i].Count == ngramsNames.GetValues().Length); Host.Assert(ngramsNames.IsDense); ctx.SaveTextStream(string.Format("{0}-ngrams.txt", Infos[i].Name), writer => { - writer.WriteLine("# Number of Ngrams terms = {0}", ngramsNames.Count); - for (int j = 0; j < ngramsNames.Count; j++) - writer.WriteLine("{0}\t{1}", j, ngramsNames.Values[j]); + var ngramNameValues = ngramsNames.GetValues(); + writer.WriteLine("# Number of Ngrams terms = {0}", ngramNameValues.Length); + for (int j = 0; j < ngramNameValues.Length; j++) + writer.WriteLine("{0}\t{1}", j, ngramNameValues[j]); }); } } diff --git a/src/Microsoft.ML.Transforms/Text/NgramUtils.cs b/src/Microsoft.ML.Transforms/Text/NgramUtils.cs index 7a0db6d8bd..38bc6333e8 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramUtils.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramUtils.cs @@ -73,12 +73,13 @@ public bool AddNgrams(in VBuffer src, int icol, uint keyMax) Contracts.Assert(icol >= 0); Contracts.Assert(keyMax > 0); + var srcValues = src.GetValues(); uint curKey = 0; if (src.IsDense) { for (int i = 0; i < src.Length; i++) { - curKey = src.Values[i]; + curKey = srcValues[i]; if (curKey > keyMax) curKey = 0; @@ -92,13 +93,14 @@ public bool AddNgrams(in VBuffer src, int icol, uint keyMax) else { var queueSize = _queue.Capacity; + var srcIndices = src.GetIndices(); int iindex = 0; for (int i = 0; i < src.Length; i++) { - if (iindex < src.Count && i == src.Indices[iindex]) + if (iindex < srcIndices.Length && i == srcIndices[iindex]) { - curKey = src.Values[iindex]; + curKey = srcValues[iindex]; if (curKey > keyMax) curKey = 0; iindex++; diff --git a/src/Microsoft.ML.Transforms/Text/StopWordsRemoverTransform.cs b/src/Microsoft.ML.Transforms/Text/StopWordsRemoverTransform.cs index 98300ebb08..625f5503f9 100644 --- a/src/Microsoft.ML.Transforms/Text/StopWordsRemoverTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/StopWordsRemoverTransform.cs @@ -464,16 +464,17 @@ protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, ou getSrc(ref src); list.Clear(); - for (int i = 0; i < src.Count; i++) + var srcValues = src.GetValues(); + for (int i = 0; i < srcValues.Length; i++) { - if (src.Values[i].IsEmpty) + if (srcValues[i].IsEmpty) continue; buffer.Clear(); - ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(src.Values[i].Span, buffer); + ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(srcValues[i].Span, buffer); // REVIEW nihejazi: Consider using a trie for string matching (Aho-Corasick, etc.) if (StopWords[(int)langToUse].Get(buffer) == null) - list.Add(src.Values[i]); + list.Add(srcValues[i]); } VBufferUtils.Copy(list, ref dst, list.Count); @@ -936,16 +937,17 @@ protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, ou getSrc(ref src); list.Clear(); - for (int i = 0; i < src.Count; i++) + var srcValues = src.GetValues(); + for (int i = 0; i < srcValues.Length; i++) { - if (src.Values[i].IsEmpty) + if (srcValues[i].IsEmpty) continue; buffer.Clear(); - ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(src.Values[i].Span, buffer); + ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(srcValues[i].Span, buffer); // REVIEW nihejazi: Consider using a trie for string matching (Aho-Corasick, etc.) if (_stopWordsMap.Get(buffer) == null) - list.Add(src.Values[i]); + list.Add(srcValues[i]); } VBufferUtils.Copy(list, ref dst, list.Count); diff --git a/src/Microsoft.ML.Transforms/Text/TextNormalizerTransform.cs b/src/Microsoft.ML.Transforms/Text/TextNormalizerTransform.cs index 60de96701b..6bad1cd87f 100644 --- a/src/Microsoft.ML.Transforms/Text/TextNormalizerTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/TextNormalizerTransform.cs @@ -308,7 +308,7 @@ private ValueGetter> MakeGetterOne(IRow input, int iinfo) (ref ReadOnlyMemory dst) => { getSrc(ref src); - NormalizeSrc(ref src, ref dst, buffer); + NormalizeSrc(in src, ref dst, buffer); }; } @@ -325,9 +325,10 @@ private ValueGetter>> MakeGetterVec(IRow input, int { getSrc(ref src); list.Clear(); - for (int i = 0; i < src.Count; i++) + var srcValues = src.GetValues(); + for (int i = 0; i < srcValues.Length; i++) { - NormalizeSrc(ref src.Values[i], ref temp, buffer); + NormalizeSrc(in srcValues[i], ref temp, buffer); if (!temp.IsEmpty) list.Add(temp); } @@ -336,7 +337,7 @@ private ValueGetter>> MakeGetterVec(IRow input, int }; } - private void NormalizeSrc(ref ReadOnlyMemory src, ref ReadOnlyMemory dst, StringBuilder buffer) + private void NormalizeSrc(in ReadOnlyMemory src, ref ReadOnlyMemory dst, StringBuilder buffer) { Host.AssertValue(buffer); diff --git a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs index c32c716e32..b93e487fbd 100644 --- a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs @@ -118,7 +118,7 @@ public void AddWordVector(IChannel ch, string word, float[] wordVector) } } - public bool GetWordVector(ref ReadOnlyMemory word, float[] wordVector) + public bool GetWordVector(in ReadOnlyMemory word, float[] wordVector) { NormStr str = _pool.Get(word); if (str != null) @@ -583,38 +583,37 @@ private ValueGetter> GetGetterVec(IRow input, int iinfo) { int deno = 0; srcGetter(ref src); - var values = dst.Values; - if (Utils.Size(values) != 3 * dimension) - values = new float[3 * dimension]; + var mutation = VBufferMutationContext.Create(ref dst, 3 * dimension); int offset = 2 * dimension; for (int i = 0; i < dimension; i++) { - values[i] = float.MaxValue; - values[i + dimension] = 0; - values[i + offset] = float.MinValue; + mutation.Values[i] = float.MaxValue; + mutation.Values[i + dimension] = 0; + mutation.Values[i + offset] = float.MinValue; } - for (int word = 0; word < src.Count; word++) + var srcValues = src.GetValues(); + for (int word = 0; word < srcValues.Length; word++) { - if (_parent._currentVocab.GetWordVector(ref src.Values[word], wordVector)) + if (_parent._currentVocab.GetWordVector(in srcValues[word], wordVector)) { deno++; for (int i = 0; i < dimension; i++) { float currentTerm = wordVector[i]; - if (values[i] > currentTerm) - values[i] = currentTerm; - values[dimension + i] += currentTerm; - if (values[offset + i] < currentTerm) - values[offset + i] = currentTerm; + if (mutation.Values[i] > currentTerm) + mutation.Values[i] = currentTerm; + mutation.Values[dimension + i] += currentTerm; + if (mutation.Values[offset + i] < currentTerm) + mutation.Values[offset + i] = currentTerm; } } } if (deno != 0) for (int index = 0; index < dimension; index++) - values[index + dimension] /= deno; + mutation.Values[index + dimension] /= deno; - dst = new VBuffer(values.Length, values, dst.Indices); + mutation.Complete(ref dst); }; } } diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs index 05288d7135..066a47b192 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs @@ -316,18 +316,16 @@ private ValueGetter>> MakeGetterVec(IRow input, int getSrc(ref src); terms.Clear(); - for (int i = 0; i < src.Count; i++) - AddTerms(src.Values[i], separators, terms); + var srcValues = src.GetValues(); + for (int i = 0; i < srcValues.Length; i++) + AddTerms(srcValues[i], separators, terms); - var values = dst.Values; - if (terms.Count > 0) + var mutation = VBufferMutationContext.Create(ref dst, terms.Count); + for (int i = 0; i < terms.Count; i++) { - if (Utils.Size(values) < terms.Count) - values = new ReadOnlyMemory[terms.Count]; - terms.CopyTo(values); + mutation.Values[i] = terms[i]; } - - dst = new VBuffer>(terms.Count, values, dst.Indices); + mutation.Complete(ref dst); }; } diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs index c34100da37..e8182215e8 100644 --- a/src/Microsoft.ML.Transforms/UngroupTransform.cs +++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs @@ -631,18 +631,20 @@ private ValueGetter MakeGetter(int col, PrimitiveType itemType) cachedIndex = 0; } + var rowValues = row.GetValues(); if (_pivotColPosition >= row.Length) value = naValue; else if (row.IsDense) - value = row.Values[_pivotColPosition]; + value = rowValues[_pivotColPosition]; else { // The row is sparse. - while (cachedIndex < row.Count && _pivotColPosition > row.Indices[cachedIndex]) + var rowIndices = row.GetIndices(); + while (cachedIndex < rowIndices.Length && _pivotColPosition > rowIndices[cachedIndex]) cachedIndex++; - if (cachedIndex < row.Count && _pivotColPosition == row.Indices[cachedIndex]) - value = row.Values[cachedIndex]; + if (cachedIndex < rowIndices.Length && _pivotColPosition == rowIndices[cachedIndex]) + value = rowValues[cachedIndex]; else value = default(T); } diff --git a/src/Microsoft.ML.Transforms/VectorWhitening.cs b/src/Microsoft.ML.Transforms/VectorWhitening.cs index b60098d572..c9d75bc2af 100644 --- a/src/Microsoft.ML.Transforms/VectorWhitening.cs +++ b/src/Microsoft.ML.Transforms/VectorWhitening.cs @@ -613,10 +613,19 @@ public enum SvdJob : byte MinOvr = (byte)'O', } + public static unsafe void Gemv(Layout layout, Transpose trans, int m, int n, float alpha, + float[] a, int lda, ReadOnlySpan x, int incx, float beta, Span y, int incy) + { + fixed (float* pA = a) + fixed (float* pX = x) + fixed (float* pY = y) + Gemv(layout, trans, m, n, alpha, pA, lda, pX, incx, beta, pY, incy); + } + // See: https://software.intel.com/en-us/node/520750 [DllImport(DllName, EntryPoint = "cblas_sgemv")] - public static extern void Gemv(Layout layout, Transpose trans, int m, int n, float alpha, - float[] a, int lda, float[] x, int incx, float beta, float[] y, int incy); + private static unsafe extern void Gemv(Layout layout, Transpose trans, int m, int n, float alpha, + float* a, int lda, float* x, int incx, float beta, float* y, int incy); // See: https://software.intel.com/en-us/node/520775 [DllImport(DllName, EntryPoint = "cblas_sgemm")] @@ -715,33 +724,31 @@ private ValueGetter GetSrcGetter(IRow input, int iinfo) private static void FillValues(float[] model, ref VBuffer src, ref VBuffer dst, int cdst) { - int count = src.Count; + var values = src.GetValues(); + int count = values.Length; int length = src.Length; - var values = src.Values; - var indices = src.Indices; - Contracts.Assert(Utils.Size(values) >= count); // Since the whitening process produces dense vector, always use dense representation of dst. - var a = Utils.Size(dst.Values) >= cdst ? dst.Values : new float[cdst]; + var mutation = VBufferMutationContext.Create(ref dst, cdst); if (src.IsDense) { Mkl.Gemv(Mkl.Layout.RowMajor, Mkl.Transpose.NoTrans, cdst, length, - 1, model, length, values, 1, 0, a, 1); + 1, model, length, values, 1, 0, mutation.Values, 1); } else { - Contracts.Assert(Utils.Size(indices) >= count); + var indices = src.GetIndices(); int offs = 0; for (int i = 0; i < cdst; i++) { // Returns a dot product of dense vector 'model' starting from offset 'offs' and sparse vector 'values' // with first 'count' valid elements and their corresponding 'indices'. - a[i] = CpuMathUtils.DotProductSparse(model.AsSpan(offs), values, indices, count); + mutation.Values[i] = CpuMathUtils.DotProductSparse(model.AsSpan(offs), values, indices, count); offs += length; } } - dst = new VBuffer(cdst, a, dst.Indices); + mutation.Complete(ref dst); } private static float DotProduct(float[] a, int aOffset, float[] b, int[] indices, int count) From 01654a71d98735f556addf65f18019772c90625c Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 8 Nov 2018 12:33:28 -0600 Subject: [PATCH 07/14] Clean up VBufferMutationContext to match the spec. --- src/Microsoft.ML.Core/Data/MetadataUtils.cs | 4 +- src/Microsoft.ML.Core/Data/VBuffer.cs | 268 +----------------- .../Data/VBufferMutationContext.cs | 148 ++++++++++ .../Utilities/VBufferUtils.cs | 54 ++-- src/Microsoft.ML.Data/Data/BufferBuilder.cs | 8 +- .../DataLoadSave/Binary/Codecs.cs | 4 +- .../DataLoadSave/Text/TextLoaderParser.cs | 8 +- src/Microsoft.ML.Data/DataView/Transposer.cs | 8 +- .../Depricated/Vector/VBufferMathUtils.cs | 18 +- .../Evaluators/EvaluatorUtils.cs | 2 +- .../Transforms/DropSlotsTransform.cs | 4 +- .../Transforms/HashTransform.cs | 16 +- .../Transforms/KeyToVectorTransform.cs | 2 +- .../Utilities/SlotDropper.cs | 8 +- src/Microsoft.ML.Ensemble/EnsembleUtils.cs | 6 +- .../OutputCombiners/BaseMultiAverager.cs | 2 +- .../OutputCombiners/BaseMultiCombiner.cs | 2 +- .../OutputCombiners/BaseScalarStacking.cs | 2 +- .../OutputCombiners/MultiMedian.cs | 2 +- .../OutputCombiners/MultiStacking.cs | 2 +- .../OutputCombiners/MultiVoting.cs | 6 +- src/Microsoft.ML.FastTree/FastTree.cs | 2 +- .../OlsLinearRegression.cs | 2 +- .../ImagePixelExtractorTransform.cs | 6 +- .../KMeansPredictor.cs | 2 +- .../OnnxTransform.cs | 2 +- .../MultiClass/MultiClassNaiveBayesTrainer.cs | 2 +- .../Standard/Online/LinearSvm.cs | 4 +- .../TensorflowTransform.cs | 2 +- src/Microsoft.ML.Transforms/GcnTransform.cs | 6 +- .../MissingValueDroppingTransformer.cs | 18 +- .../MissingValueIndicatorTransform.cs | 8 +- .../MutualInformationFeatureSelection.cs | 2 +- .../Text/CharTokenizeTransform.cs | 4 +- .../Text/LdaTransform.cs | 8 +- .../Text/WordEmbeddingsTransform.cs | 2 +- .../Text/WordTokenizeTransform.cs | 2 +- .../VectorWhitening.cs | 2 +- 38 files changed, 275 insertions(+), 373 deletions(-) create mode 100644 src/Microsoft.ML.Core/Data/VBufferMutationContext.cs diff --git a/src/Microsoft.ML.Core/Data/MetadataUtils.cs b/src/Microsoft.ML.Core/Data/MetadataUtils.cs index d60f67fe31..75cf485c4d 100644 --- a/src/Microsoft.ML.Core/Data/MetadataUtils.cs +++ b/src/Microsoft.ML.Core/Data/MetadataUtils.cs @@ -319,8 +319,8 @@ public static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.Column IReadOnlyList list; if ((list = schema?.GetColumns(role)) == null || list.Count != 1 || !schema.Schema.HasSlotNames(list[0].Index, vectorSize)) { - VBufferMutationContext.Create(ref slotNames, vectorSize, 0) - .Complete(ref slotNames); + slotNames = VBufferMutationContext.Create(ref slotNames, vectorSize, 0) + .CreateBuffer(); } else schema.Schema.GetMetadata(Kinds.SlotNames, list[0].Index, ref slotNames); diff --git a/src/Microsoft.ML.Core/Data/VBuffer.cs b/src/Microsoft.ML.Core/Data/VBuffer.cs index d1a30a9e53..504742b1d3 100644 --- a/src/Microsoft.ML.Core/Data/VBuffer.cs +++ b/src/Microsoft.ML.Core/Data/VBuffer.cs @@ -130,7 +130,7 @@ public void CopyToDense(ref VBuffer dst) CopyTo(mutation.Values); else if (Length > 0) _values.AsSpan(0, Length).CopyTo(mutation.Values); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } /// @@ -145,7 +145,7 @@ public void CopyTo(ref VBuffer dst) { _values.AsSpan(0, Length).CopyTo(mutation.Values); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); Contracts.Assert(dst.IsDense); } else @@ -155,7 +155,7 @@ public void CopyTo(ref VBuffer dst) _values.AsSpan(0, _count).CopyTo(mutation.Values); _indices.AsSpan(0, _count).CopyTo(mutation.Indices); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } } @@ -174,7 +174,7 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) { _values.AsSpan(srcMin, length).CopyTo(mutation.Values); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); Contracts.Assert(dst.IsDense); } else @@ -196,190 +196,16 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) mutation.Indices[i] = _indices[i + copyMin] - srcMin; } } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } else { var mutation = VBufferMutationContext.Create(ref dst, length, copyCount); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } } } -/* /// - /// Copy from this buffer to the given destination, making sure to explicitly include the - /// first count indices in indicesInclude. Note that indicesInclude should be sorted - /// with each index less than this.Length. Note that this can make the destination be - /// dense even if "this" is sparse. - /// - public void CopyTo(ref VBuffer dst, int[] indicesInclude, int count) - { - Contracts.CheckParam(count >= 0, nameof(count)); - Contracts.CheckParam(Utils.Size(indicesInclude) >= count, nameof(indicesInclude)); - Contracts.CheckParam(Utils.Size(indicesInclude) <= Length, nameof(indicesInclude)); - - // REVIEW: Ideally we should Check that indicesInclude is sorted and in range. Would that - // check be too expensive? -#if DEBUG - int prev = -1; - for (int i = 0; i < count; i++) - { - Contracts.Assert(prev < indicesInclude[i]); - prev = indicesInclude[i]; - } - Contracts.Assert(prev < Length); -#endif - - if (IsDense || count == 0) - { - CopyTo(ref dst); - return; - } - - if (count >= Length / 2 || Count >= Length / 2) - { - CopyToDense(ref dst); - return; - } - - var indices = dst.Indices; - var values = dst.Values; - if (Count == 0) - { - // No values in "this". - if (Utils.Size(indices) < count) - indices = new int[count]; - Array.Copy(indicesInclude, indices, count); - if (Utils.Size(values) < count) - values = new T[count]; - else - Array.Clear(values, 0, count); - dst = new VBuffer(Length, count, values, indices); - return; - } - - int size = 0; - int max = count + Count; - Contracts.Assert(max < Length); - int ii1; - int ii2; - if (max >= Length / 2 || Utils.Size(values) < max || Utils.Size(indices) < max) - { - // Compute the needed size. - ii1 = 0; - ii2 = 0; - for (; ; ) - { - Contracts.Assert(ii1 < Count); - Contracts.Assert(ii2 < count); - size++; - int diff = Indices[ii1] - indicesInclude[ii2]; - if (diff == 0) - { - ii1++; - ii2++; - if (ii1 >= Count) - { - size += count - ii2; - break; - } - if (ii2 >= count) - { - size += Count - ii1; - break; - } - } - else if (diff < 0) - { - if (++ii1 >= Count) - { - size += count - ii2; - break; - } - } - else - { - if (++ii2 >= count) - { - size += Count - ii1; - break; - } - } - } - Contracts.Assert(size >= count && size >= Count); - - if (size == Count) - { - CopyTo(ref dst); - return; - } - - if (size >= Length / 2) - { - CopyToDense(ref dst); - return; - } - - if (Utils.Size(values) < size) - values = new T[size]; - if (Utils.Size(indices) < size) - indices = new int[size]; - max = size; - } - - int ii = 0; - ii1 = 0; - ii2 = 0; - for (; ; ) - { - Contracts.Assert(ii < max); - Contracts.Assert(ii1 < Count); - Contracts.Assert(ii2 < count); - int i1 = Indices[ii1]; - int i2 = indicesInclude[ii2]; - if (i1 <= i2) - { - indices[ii] = i1; - values[ii] = Values[ii1]; - ii++; - if (i1 == i2) - ii2++; - if (++ii1 >= Count) - { - if (ii2 >= count) - break; - Array.Clear(values, ii, count - ii2); - Array.Copy(indicesInclude, ii2, indices, ii, count - ii2); - ii += count - ii2; - break; - } - if (ii2 >= count) - { - Array.Copy(Values, ii1, values, ii, Count - ii1); - Array.Copy(Indices, ii1, indices, ii, Count - ii1); - ii += Count - ii1; - break; - } - } - else - { - indices[ii] = i2; - values[ii] = default(T); - ii++; - if (++ii2 >= count) - { - Array.Copy(Values, ii1, values, ii, Count - ii1); - Array.Copy(Indices, ii1, indices, ii, Count - ii1); - ii += Count - ii1; - break; - } - } - } - Contracts.Assert(size == ii || size == 0); - - dst = new VBuffer(Length, ii, values, indices); - }*/ - /// /// Copy from this buffer to the given destination array. This "densifies". /// @@ -432,7 +258,7 @@ public static void Copy(T[] src, int srcIndex, ref VBuffer dst, int length) { src.AsSpan(srcIndex, length).CopyTo(mutation.Values); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } public IEnumerable> Items(bool all = false) @@ -475,21 +301,20 @@ public override string ToString() internal VBufferMutationContext GetMutableContext() { - return GetMutableContext(Length, _count, null, false, false); + return GetMutableContext(Length, _count); } internal VBufferMutationContext GetMutableContext( int newLogicalLength, int? valuesCount, - int? maxValuesCapacity, - bool keepOldOnResize, - bool requireIndicesOnDense) + int maxCapacity = Utils.ArrayMaxSize, + bool keepOldOnResize = false, + bool requireIndicesOnDense = false) { Contracts.CheckParam(newLogicalLength >= 0, nameof(newLogicalLength)); Contracts.CheckParam(valuesCount == null || valuesCount.Value <= newLogicalLength, nameof(valuesCount)); valuesCount = valuesCount ?? newLogicalLength; - int maxCapacity = maxValuesCapacity ?? newLogicalLength; T[] values = _values; bool createdNewValues; @@ -517,75 +342,4 @@ internal VBufferMutationContext GetMutableContext( createdNewIndices); } } - - public static class VBufferMutationContext - { - public static VBufferMutationContext CreateFromBuffer( - ref VBuffer destination) - { - return destination.GetMutableContext(); - } - - public static VBufferMutationContext Create( - ref VBuffer destination, - int newLogicalLength, - int? valuesCount = null, - int? maxValuesCapacity = null, - bool keepOldOnResize = false, - bool requireIndicesOnDense = false) - { - return destination.GetMutableContext( - newLogicalLength, - valuesCount, - maxValuesCapacity, - keepOldOnResize, - requireIndicesOnDense); - } - } - - public ref struct VBufferMutationContext - { - private readonly int _logicalLength; - private readonly T[] _values; - private readonly int[] _indices; - - public readonly Span Values; - public readonly Span Indices; - - public bool CreatedNewValues { get;} - public bool CreatedNewIndices { get;} - - internal VBufferMutationContext(int logicalLength, - int physicalValuesCount, - T[] values, - int[] indices, - bool requireIndicesOnDense, - bool createdNewValues, - bool createdNewIndices) - { - _logicalLength = logicalLength; - _values = values; - _indices = indices; - - bool isDense = logicalLength == physicalValuesCount; - - Values = _values.AsSpan(0, physicalValuesCount); - Indices = !isDense || requireIndicesOnDense ? _indices.AsSpan(0, physicalValuesCount) : default; - - CreatedNewValues = createdNewValues; - CreatedNewIndices = createdNewIndices; - } - - public void Complete(ref VBuffer destintation, int? physicalValuesCount = null) - { - int count = Values.Length; - if (physicalValuesCount.HasValue) - { - Contracts.Check(physicalValuesCount.Value <= count, "Updating physicalValuesCount during Complete cannot be greater than the original physicalValuesCount value used in Create."); - count = physicalValuesCount.Value; - } - - destintation = new VBuffer(_logicalLength, count, _values, _indices); - } - } } \ No newline at end of file diff --git a/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs b/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs new file mode 100644 index 0000000000..cc2d0c32ce --- /dev/null +++ b/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs @@ -0,0 +1,148 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime.Internal.Utilities; +using System; + +namespace Microsoft.ML.Runtime.Data +{ + /// + /// Various methods for creating instances. + /// + public static class VBufferMutationContext + { + /// + /// Creates a mutation context with the same shape (length and density) + /// as the . + /// + public static VBufferMutationContext CreateFromBuffer( + ref VBuffer destination) + { + return destination.GetMutableContext(); + } + + /// + /// Creates a mutation context using 's values + /// and indices buffers. + /// + /// + /// The destination buffer. + /// + /// + /// The new length of the buffer being mutated. + /// + /// + /// The optional number of physical values to be represented in the buffer. + /// The buffer will be dense if is omitted. + /// + /// + /// True means that the old buffer values and indices are preserved, if possible (Array.Resize is called). + /// False means that a new array will be allocated, if necessary. + /// + /// + /// True means to ensure the Indices buffer is available, even if the buffer will be dense. + /// + public static VBufferMutationContext Create( + ref VBuffer destination, + int newLogicalLength, + int? valuesCount = null, + bool keepOldOnResize = false, + bool requireIndicesOnDense = false) + { + return destination.GetMutableContext( + newLogicalLength, + valuesCount, + keepOldOnResize: keepOldOnResize, + requireIndicesOnDense: requireIndicesOnDense); + } + + internal static VBufferMutationContext Create( + ref VBuffer destination, + int newLogicalLength, + int valuesCount, + int maxValuesCapacity) + { + return destination.GetMutableContext( + newLogicalLength, + valuesCount, + maxValuesCapacity); + } + } + + /// + /// An object capable of mutation a by filling out + /// (and if the buffer is not dense). + /// + public ref struct VBufferMutationContext + { + private readonly int _logicalLength; + private readonly T[] _values; + private readonly int[] _indices; + + /// + /// The mutable span of values. + /// + public readonly Span Values; + + /// + /// The mutable span of indices. + /// + public readonly Span Indices; + + /// + /// Gets a value indicating whether a new Values array was allocated. + /// + public bool CreatedNewValues { get; } + + /// + /// Gets a value indicating whether a new Indices array was allocated. + /// + public bool CreatedNewIndices { get; } + + internal VBufferMutationContext(int logicalLength, + int physicalValuesCount, + T[] values, + int[] indices, + bool requireIndicesOnDense, + bool createdNewValues, + bool createdNewIndices) + { + _logicalLength = logicalLength; + _values = values; + _indices = indices; + + bool isDense = logicalLength == physicalValuesCount; + + Values = _values.AsSpan(0, physicalValuesCount); + Indices = !isDense || requireIndicesOnDense ? _indices.AsSpan(0, physicalValuesCount) : default; + + CreatedNewValues = createdNewValues; + CreatedNewIndices = createdNewIndices; + } + + /// + /// Creates a new using the current + /// Values and Indices. + /// + /// + /// An optional size that allows reducing the number of physical values to be + /// represented in the created buffer. + /// This is useful in sparse situations where the mutation context was created + /// with a larger physical value count than was needed + /// because the final value count was not known at creation time. + /// + /// + public VBuffer CreateBuffer(int? physicalValuesCount = null) + { + int count = Values.Length; + if (physicalValuesCount.HasValue) + { + Contracts.Check(physicalValuesCount.Value <= count, "Updating physicalValuesCount during Complete cannot be greater than the original physicalValuesCount value used in Create."); + count = physicalValuesCount.Value; + } + + return new VBuffer(_logicalLength, count, _values, _indices); + } + } +} diff --git a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs index ba8a427b48..b3632d57f8 100644 --- a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs @@ -421,7 +421,7 @@ public static void ApplyAt(ref VBuffer dst, int slot, SlotValueManipulator if (needIndices) mutation.Indices[idx] = slot; mutation.Values[idx] = value; - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } /// @@ -465,7 +465,7 @@ public static void Densify(ref VBuffer dst) for (int i = 0; i < values.Length; ++i) mutation.Values[indices[i]] = values[i]; } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } /// @@ -492,7 +492,7 @@ public static void DensifyFirst(ref VBuffer dst, int denseCount) var newIndicesMutation = VBufferMutationContext.Create(ref dst, dst.Length, denseCount); Utils.FillIdentity(newIndicesMutation.Indices, denseCount); newIndicesMutation.Values.Clear(); - newIndicesMutation.Complete(ref dst); + dst = newIndicesMutation.CreateBuffer(); return; } int lim = Utils.FindIndexSorted(dstIndices, 0, dstValues.Length, denseCount); @@ -514,7 +514,7 @@ public static void DensifyFirst(ref VBuffer dst, int denseCount) mutation.Values[ii] = i >= 0 && dstIndices[i] == ii ? dstValues[i--] : default(T); mutation.Indices[ii] = ii; } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } /// @@ -563,7 +563,7 @@ public static void CreateMaybeSparseCopy(in VBuffer src, ref VBuffer ds Contracts.Assert(j == sparseCount); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } /// @@ -768,7 +768,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< mutation.Values.Clear(); for (int i = 0; i < srcValues.Length; i++) manip(mutation.Indices[i] = srcIndices[i], srcValues[i], ref mutation.Values[i]); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); return; } @@ -861,7 +861,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< dIndex = --dI >= 0 ? dstIndices[dI] : -1; } } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); return; } @@ -926,7 +926,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< Contracts.Assert(srcIndices[sI] == bIndex); mutation.Indices[dI] = sI++; } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); Densify(ref dst); mutation = VBufferMutationContext.Create(ref dst, @@ -936,7 +936,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< srcIndices.CopyTo(mutation.Indices); for (sI = 0; sI < srcValues.Length; sI++) manip(srcIndices[sI], srcValues[sI], ref mutation.Values[sI]); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); return; } @@ -963,8 +963,8 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf { if (srcValues.Length == 0) { - VBufferMutationContext.Create(ref res, length, 0) - .Complete(ref res); + res = VBufferMutationContext.Create(ref res, length, 0) + .CreateBuffer(); } else if (src.IsDense) { @@ -972,7 +972,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf var mutation = VBufferMutationContext.Create(ref res, length); for (int i = 0; i < length; i++) manip(i, srcValues[i], default(TDst), ref mutation.Values[i]); - mutation.Complete(ref res); + res = mutation.CreateBuffer(); } else { @@ -988,7 +988,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf mutation.Indices[ii] = i; manip(i, srcValues[ii], default(TDst), ref mutation.Values[ii]); } - mutation.Complete(ref res); + res = mutation.CreateBuffer(); } } else if (dst.IsDense) @@ -1008,14 +1008,14 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf for (int j = 0; j < length; j++) mutation.Values[j] = dstValues[j]; } - mutation.Complete(ref res); + res = mutation.CreateBuffer(); } else if (src.IsDense) { Contracts.Assert(srcValues.Length == src.Length); for (int i = 0; i < length; i++) manip(i, srcValues[i], dstValues[i], ref mutation.Values[i]); - mutation.Complete(ref res); + res = mutation.CreateBuffer(); } else { @@ -1054,7 +1054,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf mutation.Values[j] = dstValues[j]; } } - mutation.Complete(ref res); + res = mutation.CreateBuffer(); } } else @@ -1083,7 +1083,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf mutation.Values[jj] = dstValues[jj]; } } - mutation.Complete(ref res); + res = mutation.CreateBuffer(); } else if (src.IsDense) { @@ -1101,7 +1101,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf else manip(i, srcValues[i], default(TDst), ref mutation.Values[i]); } - mutation.Complete(ref res); + res = mutation.CreateBuffer(); } else { @@ -1183,7 +1183,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf Contracts.Assert(ii == srcValues.Length && jj == dstCount); Contracts.Assert(i == length && j == length); - mutation.Complete(ref res); + res = mutation.CreateBuffer(); } } } @@ -1209,8 +1209,8 @@ public static void ApplyIntoEitherDefined(in VBuffer src, ref // equal lengths, but I don't care here. if (srcValues.Length == 0) { - VBufferMutationContext.Create(ref dst, src.Length, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, src.Length, 0) + .CreateBuffer(); return; } var mutation = VBufferMutationContext.Create(ref dst, @@ -1231,7 +1231,7 @@ public static void ApplyIntoEitherDefined(in VBuffer src, ref for (int i = 0; i < srcValues.Length; ++i) values[i] = func(srcIndices[i], srcValues[i]); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } /// @@ -1263,8 +1263,8 @@ public static void ApplyInto(in VBuffer a, in VBuffer if (aValues.Length == 0 && bValues.Length == 0) { // Case 1. Output will be empty. - VBufferMutationContext.Create(ref dst, a.Length, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, a.Length, 0) + .CreateBuffer(); return; } @@ -1303,7 +1303,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer for (int i = 0; i < a.Length; i++) mutation.Values[i] = func(i, aValues[i], bValues[i]); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); return; } @@ -1421,7 +1421,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer indices[newI++] = index; } } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } /// @@ -1439,7 +1439,7 @@ public static void Copy(List src, ref VBuffer dst, int length) mutation.Values[i] = src[i]; } } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } } } diff --git a/src/Microsoft.ML.Data/Data/BufferBuilder.cs b/src/Microsoft.ML.Data/Data/BufferBuilder.cs index d6ad3c1cd0..d1a647dafd 100644 --- a/src/Microsoft.ML.Data/Data/BufferBuilder.cs +++ b/src/Microsoft.ML.Data/Data/BufferBuilder.cs @@ -434,8 +434,8 @@ public void GetResult(ref VBuffer buffer) { if (IsEmpty) { - VBufferMutationContext.Create(ref buffer, _length, 0) - .Complete(ref buffer); + buffer = VBufferMutationContext.Create(ref buffer, _length, 0) + .CreateBuffer(); return; } @@ -451,7 +451,7 @@ public void GetResult(ref VBuffer buffer) { var mutation = VBufferMutationContext.Create(ref buffer, _length); _values.AsSpan(0, _length).CopyTo(mutation.Values); - mutation.Complete(ref buffer); + buffer = mutation.CreateBuffer(); } else { @@ -459,7 +459,7 @@ public void GetResult(ref VBuffer buffer) var mutation = VBufferMutationContext.Create(ref buffer, _length, _count); _values.AsSpan(0, _count).CopyTo(mutation.Values); _indices.AsSpan(0, _count).CopyTo(mutation.Indices); - mutation.Complete(ref buffer); + buffer = mutation.CreateBuffer(); } } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs index 4472a0206c..9e1584fd4d 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs @@ -1118,7 +1118,7 @@ public override void Get(ref VBuffer value) _values.AsSpan(_valuesOffset, length) .CopyTo(mutation.Values); } - mutation.Complete(ref value); + value = mutation.CreateBuffer(); } else { @@ -1131,7 +1131,7 @@ public override void Get(ref VBuffer value) _indices.AsSpan(_indicesOffset, count) .CopyTo(mutation.Indices); } - mutation.Complete(ref value); + value = mutation.CreateBuffer(); } } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs index 138ccc7a14..29941fb3fd 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs @@ -403,8 +403,8 @@ public void Get(ref VBuffer dst) if (_count == 0) { - VBufferMutationContext.Create(ref dst, _size, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, _size, 0) + .CreateBuffer(); return; } @@ -412,12 +412,12 @@ public void Get(ref VBuffer dst) _values.AsSpan(0, _count).CopyTo(mutation.Values); if (_count == _size) { - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); return; } _indices.AsSpan(0, _count).CopyTo(mutation.Indices); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } } diff --git a/src/Microsoft.ML.Data/DataView/Transposer.cs b/src/Microsoft.ML.Data/DataView/Transposer.cs index 87cd01b5ee..cce1b393c8 100644 --- a/src/Microsoft.ML.Data/DataView/Transposer.cs +++ b/src/Microsoft.ML.Data/DataView/Transposer.cs @@ -1281,7 +1281,7 @@ private ValueGetter> CreateGetter(int col) { mutation = VBufferMutationContext.Create(ref value, len); _inputValue.GetValues().Slice(min, len).CopyTo(mutation.Values); - mutation.Complete(ref value); + value = mutation.CreateBuffer(); return; } // In the sparse case we have ranges on Indices/Values to consider. @@ -1290,8 +1290,8 @@ private ValueGetter> CreateGetter(int col) int scount = slim - smin; if (scount == 0) { - VBufferMutationContext.Create(ref value, len, 0) - .Complete(ref value); + value = VBufferMutationContext.Create(ref value, len, 0) + .CreateBuffer(); return; } @@ -1308,7 +1308,7 @@ private ValueGetter> CreateGetter(int col) } } _inputValue.GetValues().Slice(smin, scount).CopyTo(mutation.Values); - mutation.Complete(ref value); + value = mutation.CreateBuffer(); }; } diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs index 1e4fc4bb67..4b07fa87f2 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs @@ -108,8 +108,8 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float if (count == 0) { // dst is a zero vector. - VBufferMutationContext.Create(ref dst, length, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, length, 0) + .CreateBuffer(); return; } @@ -122,7 +122,7 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float mutation.Values.Clear(); else CpuMathUtils.Scale(c, srcValues, mutation.Values, length); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } else { @@ -132,7 +132,7 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float mutation.Values.Clear(); else CpuMathUtils.Scale(c, srcValues, mutation.Values, count); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } } @@ -210,7 +210,7 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds { var mutation = VBufferMutationContext.Create(ref res, length); CpuMathUtils.AddScaleCopy(c, srcValues, dst.GetValues(), mutation.Values, length); - mutation.Complete(ref res); + res = mutation.CreateBuffer(); return; } @@ -368,7 +368,7 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer } } } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } /// @@ -394,12 +394,12 @@ public static void ScaleInto(in VBuffer src, Float c, ref VBuffer var mutation = VBufferMutationContext.Create(ref dst, src.Length); if (!mutation.CreatedNewValues) // We need to clear it mutation.Values.Clear(); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } else { - VBufferMutationContext.Create(ref dst, src.Length, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, src.Length, 0) + .CreateBuffer(); } } else if (c == -1) diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs index 071d1e02b0..2f9e652e47 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs @@ -718,7 +718,7 @@ public static void ReconcileVectorKeyValues(IHostEnvironment env, IDataView[] vi mutation.Indices[j] = srcIndices[j]; } } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; ValueGetter>> slotNamesGetter = null; diff --git a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs index ca28f78d9f..b9363ddf83 100644 --- a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs @@ -701,8 +701,8 @@ private ValueGetter> MakeVecTrivialGetter() // Delegates onto instance methods are more efficient than delegates onto static methods. private void VecTrivialGetter(ref VBuffer value) { - VBufferMutationContext.Create(ref value, 1, 0) - .Complete(ref value); + value = VBufferMutationContext.Create(ref value, 1, 0) + .CreateBuffer(); } private Delegate MakeVecGetter(IRow input, int iinfo) diff --git a/src/Microsoft.ML.Data/Transforms/HashTransform.cs b/src/Microsoft.ML.Data/Transforms/HashTransform.cs index 4299dd4904..bda34d6168 100644 --- a/src/Microsoft.ML.Data/Transforms/HashTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/HashTransform.cs @@ -746,8 +746,8 @@ private static ValueGetter> MakeVectorHashGetter(uint se var srcValues = src.GetValues(); if (srcValues.Length == 0) { - VBufferMutationContext.Create(ref dst, src.Length, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, src.Length, 0) + .CreateBuffer(); return; } var mutation = VBufferMutationContext.Create(ref dst, src.Length, srcValues.Length); @@ -757,7 +757,7 @@ private static ValueGetter> MakeVectorHashGetter(uint se if (!src.IsDense) src.GetIndices().CopyTo(mutation.Indices); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; } // It is not sparsity preserving. @@ -785,7 +785,7 @@ private static ValueGetter> MakeVectorHashGetter(uint se for (int i = 0; i < srcValues.Length; ++i) mutation.Values[srcIndices[i]] = hasher.HashCore(seed, mask, srcValues[i]); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; } @@ -809,8 +809,8 @@ private static ValueGetter> MakeVectorOrderedHashGetter( var srcValues = src.GetValues(); if (srcValues.Length == 0) { - VBufferMutationContext.Create(ref dst, src.Length, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, src.Length, 0) + .CreateBuffer(); return; } var mutation = VBufferMutationContext.Create(ref dst, src.Length, srcValues.Length); @@ -828,7 +828,7 @@ private static ValueGetter> MakeVectorOrderedHashGetter( srcIndices.CopyTo(mutation.Indices); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; } // It is not sparsity preserving. @@ -858,7 +858,7 @@ private static ValueGetter> MakeVectorOrderedHashGetter( Contracts.Assert(false, "this should have never happened."); } } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; } diff --git a/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs b/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs index 637f0a7d66..c5b65a4184 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs @@ -596,7 +596,7 @@ private ValueGetter> MakeGetterInd(IRow input, int iinfo) mutation.Indices[count++] = indices[islot] * size + (int)key; } } - mutation.Complete(ref dst, count); + dst = mutation.CreateBuffer(count); }; } diff --git a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs index 60e7d6d1a7..5e6918b4f7 100644 --- a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs +++ b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs @@ -107,8 +107,8 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) if (newLength == 0) { // All slots dropped. - VBufferMutationContext.Create(ref dst, 1, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, 1, 0) + .CreateBuffer(); return; } @@ -140,7 +140,7 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) mutation.Values[iDst++] = srcValues[iSrc++]; } Contracts.Assert(iDst == newLength); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); return; } @@ -210,7 +210,7 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) Contracts.Assert(index <= max); } - mutation.Complete(ref dst, iiDst); + dst = mutation.CreateBuffer(iiDst); } } } diff --git a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs index 1dadd67baf..2b90a9ac47 100644 --- a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs +++ b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs @@ -56,7 +56,7 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices var mutation = VBufferMutationContext.Create(ref dst, src.Length); for (int i = 0; i < srcValues.Length; i++) mutation.Values[i] = !includedIndices[i] ? defaultValue : srcValues[i]; - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } else { @@ -75,7 +75,7 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices } Contracts.Assert(count == cardinality); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } } else @@ -94,7 +94,7 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices } } - mutation.Complete(ref dst, count); + dst = mutation.CreateBuffer(count); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs index f240600b10..361417736c 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs @@ -39,7 +39,7 @@ protected void CombineCore(ref VBuffer dst, VBuffer[] src, Singl if (!mutation.CreatedNewValues) mutation.Values.Clear(); // Set the output to values. - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); Single weightTotal; if (weights == null) diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs index e9f8aa1e9c..cc81634746 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs @@ -101,7 +101,7 @@ protected void GetNaNOutput(ref VBuffer dst, int len) var mutation = VBufferMutationContext.Create(ref dst, len); for (int i = 0; i < len; i++) mutation.Values[i] = Single.NaN; - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs index a8c3388a93..a20e8466e7 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs @@ -27,7 +27,7 @@ protected override void FillFeatureBuffer(Single[] src, ref VBuffer dst) int len = src.Length; var mutation = VBufferMutationContext.Create(ref dst, len); src.CopyTo(mutation.Values); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs index 471029c204..aefde033b8 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs @@ -94,7 +94,7 @@ public override Combiner> GetCombiner() } // Set the output to values. - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs index c1754d2016..54326c3819 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs @@ -93,7 +93,7 @@ protected override void FillFeatureBuffer(VBuffer[] src, ref VBuffer dst, VBuffer[] src, Single[ int count = Utils.Size(src); if (count == 0) { - VBufferMutationContext.Create(ref dst, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, 0) + .CreateBuffer(); return; } @@ -103,7 +103,7 @@ private void CombineCore(ref VBuffer dst, VBuffer[] src, Single[ mutation.Values[i] /= voteCount; // Set the output to values. - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } } } diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index 2238ef334b..ebd06a16fc 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -1368,7 +1368,7 @@ private ValueMapper, VBuffer> GetCopier(ColumnType itemT for (int i = 0; i < srcValues.Length; ++i) conv(in srcValues[i], ref mutation.Values[i]); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; } diff --git a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs index 2d5134fa0a..58c4537afc 100644 --- a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs +++ b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs @@ -790,7 +790,7 @@ public override void GetFeatureWeights(ref VBuffer weights) score = float.MaxValue; mutation.Values[i] = score; } - mutation.Complete(ref weights); + weights = mutation.CreateBuffer(); } } } diff --git a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs index d84e3c5bd6..2e21c6ae95 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs @@ -478,8 +478,8 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo if (src == null) { - VBufferMutationContext.Create(ref dst, size, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, size, 0) + .CreateBuffer(); return; } @@ -607,7 +607,7 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo } } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; } diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs index 507334dd1c..fdd35aed7b 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs @@ -150,7 +150,7 @@ public ValueMapper GetMapper() throw Host.Except($"Incorrect number of features: expected {_dimensionality}, got {src.Length}"); var mutation = VBufferMutationContext.Create(ref dst, _k); Map(in src, mutation.Values); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; return (ValueMapper)(Delegate)del; diff --git a/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs index fddf019ee1..285a142465 100644 --- a/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs @@ -284,7 +284,7 @@ private Delegate MakeGetter(IRow input) var mutation = VBufferMutationContext.Create(ref dst, _outputColType.VectorSize); OnnxUtils.CopyTo(outputTensors[0], mutation.Values); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; return valueGetter; diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs index 970363cf32..2b20b173b4 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs @@ -411,7 +411,7 @@ private void Map(in VBuffer src, ref VBuffer dst) (float)(logProb + (_absentFeaturesLogProb[iLabel] - absentFeatureLogProb)); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs index e6590050f0..e47e078496 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs @@ -119,8 +119,8 @@ private void BeginBatch() _batch++; _numBatchExamples = 0; _biasUpdate = 0; - VBufferMutationContext.Create(ref _weightsUpdate, _weightsUpdate.Length, 0) - .Complete(ref _weightsUpdate); + _weightsUpdate = VBufferMutationContext.Create(ref _weightsUpdate, _weightsUpdate.Length, 0) + .CreateBuffer(); } private void FinishBatch(in VBuffer weightsUpdate, Float weightsUpdateScale) diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index 6717527e7a..6bd4162b43 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -929,7 +929,7 @@ private Delegate MakeGetter(IRow input, int iinfo, ITensorValueGetter[] srcTe var mutation = VBufferMutationContext.Create(ref dst, (int)tensorSize); TensorFlowUtils.FetchData(tensor.Data, mutation.Values); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; return valuegetter; } diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index ef52198f1a..781285dfac 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -581,8 +581,8 @@ private static void FillValues(IExceptionContext ectx, in VBuffer src, re if (count == 0) { - VBufferMutationContext.Create(ref dst, length, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, length, 0) + .CreateBuffer(); return; } ectx.Assert(count > 0); @@ -607,7 +607,7 @@ private static void FillValues(IExceptionContext ectx, in VBuffer src, re } CpuMathUtils.Scale(normScale, src.GetValues(), dstValues, count); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); return; } diff --git a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs index 23d32993b3..b8e03558d6 100644 --- a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs +++ b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs @@ -240,8 +240,8 @@ private void DropNAsAndDefaults(ref VBuffer src, ref VBuffer d if (newCount == 0) { - VBufferMutationContext.Create(ref dst, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, 0) + .CreateBuffer(); return; } @@ -251,8 +251,8 @@ private void DropNAsAndDefaults(ref VBuffer src, ref VBuffer d if (!dst.IsDense) { Host.Assert(dst.GetValues().Length == newCount); - VBufferMutationContext.Create(ref dst, newCount) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, newCount) + .CreateBuffer(); } return; } @@ -268,7 +268,7 @@ private void DropNAsAndDefaults(ref VBuffer src, ref VBuffer d } Host.Assert(iDst == newCount); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredicate isNA) @@ -286,8 +286,8 @@ private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredi if (newCount == 0) { - VBufferMutationContext.Create(ref dst, src.Length - srcValues.Length, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, src.Length - srcValues.Length, 0) + .CreateBuffer(); return; } @@ -310,7 +310,7 @@ private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredi } } Host.Assert(iDst == newCount); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } else { @@ -332,7 +332,7 @@ private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredi } Host.Assert(iDst == newCount); Host.Assert(offset == srcValues.Length - newCount); - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } } } diff --git a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs index 419d09807f..b8607717d2 100644 --- a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs @@ -276,8 +276,8 @@ private static void FillValues(Float input, ref VBuffer result) { if (input == 0) { - VBufferMutationContext.Create(ref result, 2, 0) - .Complete(ref result); + result = VBufferMutationContext.Create(ref result, 2, 0) + .CreateBuffer(); return; } @@ -293,7 +293,7 @@ private static void FillValues(Float input, ref VBuffer result) mutation.Indices[0] = 0; } - mutation.Complete(ref result); + result = mutation.CreateBuffer(); } // This converts in place. @@ -358,7 +358,7 @@ private static void FillValues(IExceptionContext ectx, ref VBuffer buffer } ectx.Assert(0 <= iivDst & iivDst <= values.Length); - mutation.Complete(ref buffer, iivDst); + buffer = mutation.CreateBuffer(iivDst); } } } diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index 145de15f03..37a7372060 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -786,7 +786,7 @@ private static void MapVector(this ValueMapper map, in V input.GetIndices().CopyTo(mutation.Indices); } - mutation.Complete(ref output); + output = mutation.CreateBuffer(); } } } diff --git a/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs index 285d47216a..1765d5c152 100644 --- a/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs @@ -487,7 +487,7 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) Contracts.Assert(index == len); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; ValueGetter> getterWithUnitSep = (ref VBuffer dst) => @@ -544,7 +544,7 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) Contracts.Assert(index == len); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; return _parent._isSeparatorStartEnd ? getterWithStartEndSep : getterWithUnitSep; } diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index 399237ac5f..ab721bb4e4 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -854,8 +854,8 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin var srcValues = src.GetValues(); if (srcValues.Length == 0) { - VBufferMutationContext.Create(ref dst, len, 0) - .Complete(ref dst); + dst = VBufferMutationContext.Create(ref dst, len, 0) + .CreateBuffer(); return; } @@ -874,7 +874,7 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin mutation = VBufferMutationContext.Create(ref dst, len); for (int k = 0; k < len; k++) mutation.Values[k] = Float.NaN; - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); return; } @@ -920,7 +920,7 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin for (int i = 0; i < count; i++) mutation.Values[i] = (Float)(mutation.Values[i] / normalizer); } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } public void Dispose() diff --git a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs index b93e487fbd..eb021e3929 100644 --- a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs @@ -613,7 +613,7 @@ private ValueGetter> GetGetterVec(IRow input, int iinfo) for (int index = 0; index < dimension; index++) mutation.Values[index + dimension] /= deno; - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; } } diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs index 066a47b192..ad5d379315 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs @@ -325,7 +325,7 @@ private ValueGetter>> MakeGetterVec(IRow input, int { mutation.Values[i] = terms[i]; } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); }; } diff --git a/src/Microsoft.ML.Transforms/VectorWhitening.cs b/src/Microsoft.ML.Transforms/VectorWhitening.cs index c9d75bc2af..1b47967967 100644 --- a/src/Microsoft.ML.Transforms/VectorWhitening.cs +++ b/src/Microsoft.ML.Transforms/VectorWhitening.cs @@ -748,7 +748,7 @@ private static void FillValues(float[] model, ref VBuffer src, ref VBuffe offs += length; } } - mutation.Complete(ref dst); + dst = mutation.CreateBuffer(); } private static float DotProduct(float[] a, int aOffset, float[] b, int[] indices, int count) From d9905f323e1a3aaea34d645c7ec7ad575d48bbab Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 8 Nov 2018 17:28:51 -0600 Subject: [PATCH 08/14] Fix two failing tests. --- src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs | 4 ++-- src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs index 3cc3ddb4a6..5755b285be 100644 --- a/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs +++ b/src/Microsoft.ML.Data/Evaluators/RankerEvaluator.cs @@ -573,8 +573,8 @@ internal Result(IExceptionContext ectx, IRow overallResult) { VBuffer Fetch(string name) => Fetch>(ectx, overallResult, name); - Dcg = Fetch(RankerEvaluator.Dcg).Values; - Ndcg = Fetch(RankerEvaluator.Ndcg).Values; + Dcg = Fetch(RankerEvaluator.Dcg).GetValues().ToArray(); + Ndcg = Fetch(RankerEvaluator.Ndcg).GetValues().ToArray(); } } } diff --git a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs index 7cc814b4ce..cf29a87feb 100644 --- a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs +++ b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs @@ -341,7 +341,7 @@ private void GetSlotNames(int iinfo, ref VBuffer> dst) sb.Append('.'); int len = sb.Length; - foreach (var key in bits.Values) + foreach (var key in bits.GetValues()) { sb.Length = len; sb.AppendMemory(key); From 4cafe937b9c8bd1b6de3ab1ca73350c87af16022 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Mon, 12 Nov 2018 17:58:27 -0600 Subject: [PATCH 09/14] Fix FastTreeBinaryClassificationCategoricalSplitTest by remembering the underlying arrays in the column buffer in Transposer. Also enable a Transposer test, since it passes. --- src/Microsoft.ML.Data/DataView/Transposer.cs | 34 ++++++++++++++----- .../TestTransposer.cs | 2 +- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.Data/DataView/Transposer.cs b/src/Microsoft.ML.Data/DataView/Transposer.cs index cce1b393c8..d1f9276b8a 100644 --- a/src/Microsoft.ML.Data/DataView/Transposer.cs +++ b/src/Microsoft.ML.Data/DataView/Transposer.cs @@ -504,8 +504,25 @@ private sealed class SlotCursorVec : SlotCursor private T[][] _values; // Working intermediate value buffers. private int[] _counts; // Working intermediate count buffers. - // The transposed contents of _colStored. - private VBuffer[] _cbuff; // Working intermediate column-wise buffer. + private struct ColumnBufferStorage + { + // The transposed contents of _colStored. + public VBuffer Buffer; + + // These two arrays are the "cached" arrays inside of the Buffer + // to be swapped between the _cbuff and _values/_indices. + public readonly T[] Values; + public readonly int[] Indices; + + public ColumnBufferStorage(VBuffer buffer, T[] values, int[] indices) + { + Buffer = buffer; + Values = values; + Indices = indices; + } + } + + private ColumnBufferStorage[] _cbuff; // Working intermediate column-wise buffer. // Variables to track current cursor position. private int _colStored; // The current column of the source data view actually stored in the intermediate buffers. @@ -711,16 +728,17 @@ private void EnsureValid() if (count < _len / 2) { // Already sparse enough, I guess. Swap out the arrays. - Utils.Swap(ref temp, ref _cbuff[s]); - _indices[s] = indices ?? new int[_len]; - _values[s] = values ?? new T[_len]; + ColumnBufferStorage existingBuffer = _cbuff[s]; + _cbuff[s] = new ColumnBufferStorage(temp, values, indices); + _indices[s] = existingBuffer.Indices ?? new int[_len]; + _values[s] = existingBuffer.Values ?? new T[_len]; Ch.Assert(_indices[s].Length == _len); Ch.Assert(_values[s].Length == _len); } else { // Not dense enough. Densify temp into _cbuff[s]. Don't swap the arrays. - temp.CopyToDense(ref _cbuff[s]); + temp.CopyToDense(ref _cbuff[s].Buffer); } } _colStored = _colCurr; @@ -743,8 +761,8 @@ private void Getter(ref VBuffer dst) { Ch.Check(IsGood, "Cannot get values in the cursor's current state"); EnsureValid(); - Ch.Assert(0 <= _slotCurr && _slotCurr < Utils.Size(_cbuff) && _cbuff[_slotCurr].Length == _len); - _cbuff[_slotCurr].CopyTo(ref dst); + Ch.Assert(0 <= _slotCurr && _slotCurr < Utils.Size(_cbuff) && _cbuff[_slotCurr].Buffer.Length == _len); + _cbuff[_slotCurr].Buffer.CopyTo(ref dst); } protected override ValueGetter> GetGetterCore() diff --git a/test/Microsoft.ML.Predictor.Tests/TestTransposer.cs b/test/Microsoft.ML.Predictor.Tests/TestTransposer.cs index 9517e82a55..a9f969a7f1 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestTransposer.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestTransposer.cs @@ -143,7 +143,7 @@ private static T[] GenerateHelper(int rowCount, Double density, Random rgen, return values; } - [Fact(Skip = "Need CoreTLC specific baseline update")] + [Fact] [TestCategory("Transposer")] public void TransposerTest() { From 1d786efad5d2fd1e6d62a4e5e07508c3e9667c7d Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Mon, 12 Nov 2018 20:43:52 -0600 Subject: [PATCH 10/14] Fix up VectorWhitening after rebase. --- src/Microsoft.ML.Transforms/VectorWhitening.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Transforms/VectorWhitening.cs b/src/Microsoft.ML.Transforms/VectorWhitening.cs index 1b47967967..4c05af05be 100644 --- a/src/Microsoft.ML.Transforms/VectorWhitening.cs +++ b/src/Microsoft.ML.Transforms/VectorWhitening.cs @@ -751,7 +751,7 @@ private static void FillValues(float[] model, ref VBuffer src, ref VBuffe dst = mutation.CreateBuffer(); } - private static float DotProduct(float[] a, int aOffset, float[] b, int[] indices, int count) + private static float DotProduct(float[] a, int aOffset, ReadOnlySpan b, ReadOnlySpan indices, int count) { Contracts.Assert(count <= indices.Length); return CpuMathUtils.DotProductSparse(a.AsSpan(aOffset), b, indices, count); From 1cb14112b861c19cb5d088b1a89632dfa0b89543 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 15 Nov 2018 07:38:40 -0600 Subject: [PATCH 11/14] Respond to PR feedback. --- src/Microsoft.ML.Core/Data/MetadataUtils.cs | 3 +- .../Data/VBufferMutationContext.cs | 4 ++- src/Microsoft.ML.Core/Utilities/Utils.cs | 4 --- .../Utilities/VBufferUtils.cs | 31 ++++++++++--------- src/Microsoft.ML.Data/Data/BufferBuilder.cs | 3 +- .../DataLoadSave/Text/TextLoaderParser.cs | 3 +- src/Microsoft.ML.Data/DataView/Transposer.cs | 3 +- .../Depricated/Vector/VBufferMathUtils.cs | 8 ++--- .../Transforms/DropSlotsTransform.cs | 3 +- .../Transforms/HashTransform.cs | 6 ++-- .../Utilities/SlotDropper.cs | 3 +- .../OutputCombiners/MultiVoting.cs | 3 +- .../BinFile/BinFinder.cs | 20 ++++++------ .../OlsLinearRegression.cs | 7 +++-- .../ImagePixelExtractorTransform.cs | 4 +-- .../Standard/Online/LinearSvm.cs | 3 +- .../TensorFlow/TensorflowUtils.cs | 8 ++--- src/Microsoft.ML.Transforms/GcnTransform.cs | 3 +- .../MissingValueDroppingTransformer.cs | 9 ++---- .../MissingValueIndicatorTransform.cs | 3 +- .../Text/LdaTransform.cs | 3 +- .../Text/NgramTransform.cs | 8 ++--- .../Text/WordTokenizeTransform.cs | 2 -- 23 files changed, 63 insertions(+), 81 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/MetadataUtils.cs b/src/Microsoft.ML.Core/Data/MetadataUtils.cs index 75cf485c4d..734ce31b23 100644 --- a/src/Microsoft.ML.Core/Data/MetadataUtils.cs +++ b/src/Microsoft.ML.Core/Data/MetadataUtils.cs @@ -319,8 +319,7 @@ public static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.Column IReadOnlyList list; if ((list = schema?.GetColumns(role)) == null || list.Count != 1 || !schema.Schema.HasSlotNames(list[0].Index, vectorSize)) { - slotNames = VBufferMutationContext.Create(ref slotNames, vectorSize, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref slotNames, vectorSize, 0); } else schema.Schema.GetMetadata(Kinds.SlotNames, list[0].Index, ref slotNames); diff --git a/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs b/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs index cc2d0c32ce..54e90d3695 100644 --- a/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs +++ b/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs @@ -132,7 +132,9 @@ internal VBufferMutationContext(int logicalLength, /// with a larger physical value count than was needed /// because the final value count was not known at creation time. /// - /// + /// + /// The newly created . + /// public VBuffer CreateBuffer(int? physicalValuesCount = null) { int count = Values.Length; diff --git a/src/Microsoft.ML.Core/Utilities/Utils.cs b/src/Microsoft.ML.Core/Utilities/Utils.cs index f3ed1c942e..d87b13fd1e 100644 --- a/src/Microsoft.ML.Core/Utilities/Utils.cs +++ b/src/Microsoft.ML.Core/Utilities/Utils.cs @@ -1114,9 +1114,7 @@ public static int Count(this ReadOnlySpan source, Func(this ReadOnlySpan source, Func(in VBuffer a, in VBuffer b, Act /// public static void Clear(ref VBuffer dst) { - int dstValuesCount = dst.GetValues().Length; - if (dstValuesCount == 0) - return; - var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dstValuesCount); + var mutation = VBufferMutationContext.CreateFromBuffer(ref dst); mutation.Values.Clear(); } @@ -346,8 +343,7 @@ public static void Apply(ref VBuffer dst, SlotValueManipulator manip) { Contracts.CheckValue(manip, nameof(manip)); - int dstValuesCount = dst.GetValues().Length; - var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dstValuesCount); + var mutation = VBufferMutationContext.CreateFromBuffer(ref dst); if (dst.IsDense) { for (int i = 0; i < mutation.Values.Length; i++) @@ -381,8 +377,8 @@ public static void ApplyAt(ref VBuffer dst, int slot, SlotValueManipulator Contracts.CheckValue(manip, nameof(manip)); Contracts.CheckValueOrNull(pred); - int dstValuesCount = dst.GetValues().Length; - var mutation = VBufferMutationContext.Create(ref dst, dst.Length, dstValuesCount); + var mutation = VBufferMutationContext.CreateFromBuffer(ref dst); + int dstValuesCount = mutation.Values.Length; if (dst.IsDense) { // The vector is dense, so we can just do a direct access. @@ -963,8 +959,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf { if (srcValues.Length == 0) { - res = VBufferMutationContext.Create(ref res, length, 0) - .CreateBuffer(); + Resize(ref res, length, 0); } else if (src.IsDense) { @@ -1209,8 +1204,7 @@ public static void ApplyIntoEitherDefined(in VBuffer src, ref // equal lengths, but I don't care here. if (srcValues.Length == 0) { - dst = VBufferMutationContext.Create(ref dst, src.Length, 0) - .CreateBuffer(); + Resize(ref dst, src.Length, 0); return; } var mutation = VBufferMutationContext.Create(ref dst, @@ -1263,8 +1257,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer if (aValues.Length == 0 && bValues.Length == 0) { // Case 1. Output will be empty. - dst = VBufferMutationContext.Create(ref dst, a.Length, 0) - .CreateBuffer(); + Resize(ref dst, a.Length, 0); return; } @@ -1441,5 +1434,15 @@ public static void Copy(List src, ref VBuffer dst, int length) } dst = mutation.CreateBuffer(); } + + /// + /// Updates the logical length and number of physical values to be represented in + /// , while preserving the underlying buffers. + /// + public static void Resize(ref VBuffer dst, int newLogicalLength, int? valuesCount = null) + { + dst = VBufferMutationContext.Create(ref dst, newLogicalLength, valuesCount) + .CreateBuffer(); + } } } diff --git a/src/Microsoft.ML.Data/Data/BufferBuilder.cs b/src/Microsoft.ML.Data/Data/BufferBuilder.cs index d1a647dafd..c4207a83ba 100644 --- a/src/Microsoft.ML.Data/Data/BufferBuilder.cs +++ b/src/Microsoft.ML.Data/Data/BufferBuilder.cs @@ -434,8 +434,7 @@ public void GetResult(ref VBuffer buffer) { if (IsEmpty) { - buffer = VBufferMutationContext.Create(ref buffer, _length, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref buffer, _length, 0); return; } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs index 29941fb3fd..abdebf4c87 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs @@ -403,8 +403,7 @@ public void Get(ref VBuffer dst) if (_count == 0) { - dst = VBufferMutationContext.Create(ref dst, _size, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, _size, 0); return; } diff --git a/src/Microsoft.ML.Data/DataView/Transposer.cs b/src/Microsoft.ML.Data/DataView/Transposer.cs index d1f9276b8a..ff8fb4fa19 100644 --- a/src/Microsoft.ML.Data/DataView/Transposer.cs +++ b/src/Microsoft.ML.Data/DataView/Transposer.cs @@ -1308,8 +1308,7 @@ private ValueGetter> CreateGetter(int col) int scount = slim - smin; if (scount == 0) { - value = VBufferMutationContext.Create(ref value, len, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref value, len, 0); return; } diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs index 4b07fa87f2..c1683d5bda 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs @@ -108,8 +108,7 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float if (count == 0) { // dst is a zero vector. - dst = VBufferMutationContext.Create(ref dst, length, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, length, 0); return; } @@ -392,14 +391,13 @@ public static void ScaleInto(in VBuffer src, Float c, ref VBuffer { // Due to sparsity preservation from src, dst must be dense, in the same way. var mutation = VBufferMutationContext.Create(ref dst, src.Length); - if (!mutation.CreatedNewValues) // We need to clear it + if (!mutation.CreatedNewValues) // We need to clear it. mutation.Values.Clear(); dst = mutation.CreateBuffer(); } else { - dst = VBufferMutationContext.Create(ref dst, src.Length, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, src.Length, 0); } } else if (c == -1) diff --git a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs index b9363ddf83..27a73824b2 100644 --- a/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/DropSlotsTransform.cs @@ -701,8 +701,7 @@ private ValueGetter> MakeVecTrivialGetter() // Delegates onto instance methods are more efficient than delegates onto static methods. private void VecTrivialGetter(ref VBuffer value) { - value = VBufferMutationContext.Create(ref value, 1, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref value, 1, 0); } private Delegate MakeVecGetter(IRow input, int iinfo) diff --git a/src/Microsoft.ML.Data/Transforms/HashTransform.cs b/src/Microsoft.ML.Data/Transforms/HashTransform.cs index bda34d6168..0cbffe2ee4 100644 --- a/src/Microsoft.ML.Data/Transforms/HashTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/HashTransform.cs @@ -746,8 +746,7 @@ private static ValueGetter> MakeVectorHashGetter(uint se var srcValues = src.GetValues(); if (srcValues.Length == 0) { - dst = VBufferMutationContext.Create(ref dst, src.Length, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, src.Length, 0); return; } var mutation = VBufferMutationContext.Create(ref dst, src.Length, srcValues.Length); @@ -809,8 +808,7 @@ private static ValueGetter> MakeVectorOrderedHashGetter( var srcValues = src.GetValues(); if (srcValues.Length == 0) { - dst = VBufferMutationContext.Create(ref dst, src.Length, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, src.Length, 0); return; } var mutation = VBufferMutationContext.Create(ref dst, src.Length, srcValues.Length); diff --git a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs index 5e6918b4f7..7b3da0d28e 100644 --- a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs +++ b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs @@ -107,8 +107,7 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) if (newLength == 0) { // All slots dropped. - dst = VBufferMutationContext.Create(ref dst, 1, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, 1, 0); return; } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiVoting.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiVoting.cs index 00a88e8768..000e5c0e56 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiVoting.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiVoting.cs @@ -77,8 +77,7 @@ private void CombineCore(ref VBuffer dst, VBuffer[] src, Single[ int count = Utils.Size(src); if (count == 0) { - dst = VBufferMutationContext.Create(ref dst, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, 0); return; } diff --git a/src/Microsoft.ML.FastTree/BinFile/BinFinder.cs b/src/Microsoft.ML.FastTree/BinFile/BinFinder.cs index 8deceef6e0..eaae7d5448 100644 --- a/src/Microsoft.ML.FastTree/BinFile/BinFinder.cs +++ b/src/Microsoft.ML.FastTree/BinFile/BinFinder.cs @@ -54,9 +54,9 @@ public BinFinder() /// private int FindDistinctCounts(in VBuffer values, double[] valueBuffer, double[] distinctValues, int[] counts) { - var valueValues = values.GetValues(); - var valuesCount = valueValues.Length; - if (valuesCount == 0) + var explicitValues = values.GetValues(); + var explicitValuesCount = explicitValues.Length; + if (explicitValuesCount == 0) { if (values.Length == 0) return 0; @@ -66,9 +66,9 @@ private int FindDistinctCounts(in VBuffer values, double[] valueBuffer, } // Get histogram of values - Contracts.Assert(valueBuffer.Length >= valuesCount); - valueValues.CopyTo(valueBuffer); - Array.Sort(valueBuffer, 0, valuesCount); + Contracts.Assert(valueBuffer.Length >= explicitValuesCount); + explicitValues.CopyTo(valueBuffer); + Array.Sort(valueBuffer, 0, explicitValuesCount); // Note that Array.Sort will, by MSDN documentation, make NaN be the first item of a sorted // list (that is, NaN is considered to be ordered "below" any other value for the purpose of // a sort, including negative infinity). So when checking if values contains no NaN values, it @@ -80,13 +80,13 @@ private int FindDistinctCounts(in VBuffer values, double[] valueBuffer, { // Implicit zeros at the head. distinctValues[0] = 0; - counts[0] = values.Length - valuesCount; + counts[0] = values.Length - explicitValuesCount; idist = 1; } double last = distinctValues[idist] = valueBuffer[0]; counts[idist] = 1; - for (int i = 1; i < valuesCount; ++i) + for (int i = 1; i < explicitValuesCount; ++i) { double curr = valueBuffer[i]; if (curr != last) @@ -98,7 +98,7 @@ private int FindDistinctCounts(in VBuffer values, double[] valueBuffer, { // This boundary is going from negative, to non-negative, and there are "implicit" zeros. distinctValues[idist] = 0; - counts[idist] = values.Length - valuesCount; + counts[idist] = values.Length - explicitValuesCount; if (curr == 0) { // No need to do any more work. @@ -123,7 +123,7 @@ private int FindDistinctCounts(in VBuffer values, double[] valueBuffer, { // Implicit zeros at the tail. distinctValues[++idist] = 0; - counts[idist] = values.Length - valuesCount; + counts[idist] = values.Length - explicitValuesCount; } return idist + 1; diff --git a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs index 58c4537afc..1159f2ea88 100644 --- a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs +++ b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs @@ -278,10 +278,11 @@ private OlsLinearRegressionPredictor TrainCore(IChannel ch, FloatLabelCursor.Fac for (int i = 0; i < beta.Length; ++i) ch.Check(FloatUtils.IsFinite(beta[i]), "Non-finite values detected in OLS solution"); - var weights = VBufferUtils.CreateDense(beta.Length - 1); - var weightsMutation = VBufferMutationContext.CreateFromBuffer(ref weights); + var weightsValues = new float[beta.Length - 1]; for (int i = 1; i < beta.Length; ++i) - weightsMutation.Values[i - 1] = (float)beta[i]; + weightsValues[i - 1] = (float)beta[i]; + var weights = new VBuffer(weightsValues.Length, weightsValues); + var bias = (float)beta[0]; if (!(_l2Weight > 0) && m == n) { diff --git a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs index 2e21c6ae95..44ea1a4008 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs @@ -478,8 +478,7 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo if (src == null) { - dst = VBufferMutationContext.Create(ref dst, size, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, size, 0); return; } @@ -493,6 +492,7 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo float scale = ex.Scale; Contracts.Assert(scale != 0); + // REVIEW: split the getter into 2 specialized getters, one for float case and one for byte case. Span vf = typeof(TValue) == typeof(float) ? MemoryMarshal.Cast(mutation.Values) : default; Span vb = typeof(TValue) == typeof(byte) ? MemoryMarshal.Cast(mutation.Values) : default; Contracts.Assert(!vf.IsEmpty || !vb.IsEmpty); diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs index e47e078496..09700c8184 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs @@ -119,8 +119,7 @@ private void BeginBatch() _batch++; _numBatchExamples = 0; _biasUpdate = 0; - _weightsUpdate = VBufferMutationContext.Create(ref _weightsUpdate, _weightsUpdate.Length, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref _weightsUpdate, _weightsUpdate.Length, 0); } private void FinishBatch(in VBuffer weightsUpdate, Float weightsUpdateScale) diff --git a/src/Microsoft.ML.TensorFlow/TensorFlow/TensorflowUtils.cs b/src/Microsoft.ML.TensorFlow/TensorFlow/TensorflowUtils.cs index ce33404e32..dbf080d9a1 100644 --- a/src/Microsoft.ML.TensorFlow/TensorFlow/TensorflowUtils.cs +++ b/src/Microsoft.ML.TensorFlow/TensorFlow/TensorflowUtils.cs @@ -115,10 +115,10 @@ public static ISchema GetModelSchema(IExceptionContext ectx, string modelFile) Contracts.Assert(metadataType.IsKnownSizeVector && metadataType.ItemType.IsText); schema.GetMetadata(TensorFlowUtils.InputOps, i, ref inputOps); } - var inputOpsValues = inputOps.GetValues(); - string[] inputOpsResult = new string[inputOpsValues.Length]; - for (int j = 0; j < inputOpsValues.Length; j++) - inputOpsResult[j] = inputOpsValues[j].ToString(); + + string[] inputOpsResult = inputOps.DenseValues() + .Select(input => input.ToString()) + .ToArray(); yield return (name, opType.ToString(), type, inputOpsResult); } diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index 781285dfac..afd996a755 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -581,8 +581,7 @@ private static void FillValues(IExceptionContext ectx, in VBuffer src, re if (count == 0) { - dst = VBufferMutationContext.Create(ref dst, length, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, length, 0); return; } ectx.Assert(count > 0); diff --git a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs index b8e03558d6..c35afd5bcd 100644 --- a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs +++ b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs @@ -240,8 +240,7 @@ private void DropNAsAndDefaults(ref VBuffer src, ref VBuffer d if (newCount == 0) { - dst = VBufferMutationContext.Create(ref dst, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, 0); return; } @@ -251,8 +250,7 @@ private void DropNAsAndDefaults(ref VBuffer src, ref VBuffer d if (!dst.IsDense) { Host.Assert(dst.GetValues().Length == newCount); - dst = VBufferMutationContext.Create(ref dst, newCount) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, newCount); } return; } @@ -286,8 +284,7 @@ private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredi if (newCount == 0) { - dst = VBufferMutationContext.Create(ref dst, src.Length - srcValues.Length, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, src.Length - srcValues.Length, 0); return; } diff --git a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs index b8607717d2..2b279bae07 100644 --- a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs @@ -276,8 +276,7 @@ private static void FillValues(Float input, ref VBuffer result) { if (input == 0) { - result = VBufferMutationContext.Create(ref result, 2, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref result, 2, 0); return; } diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index ab721bb4e4..d1a52baf85 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -854,8 +854,7 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin var srcValues = src.GetValues(); if (srcValues.Length == 0) { - dst = VBufferMutationContext.Create(ref dst, len, 0) - .CreateBuffer(); + VBufferUtils.Resize(ref dst, len, 0); return; } diff --git a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs index 3dcf350302..deaa7855cd 100644 --- a/src/Microsoft.ML.Transforms/Text/NgramTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/NgramTransform.cs @@ -320,10 +320,10 @@ public override void Save(ModelSaveContext ctx) ctx.SaveTextStream(string.Format("{0}-ngrams.txt", Infos[i].Name), writer => { - var ngramNameValues = ngramsNames.GetValues(); - writer.WriteLine("# Number of Ngrams terms = {0}", ngramNameValues.Length); - for (int j = 0; j < ngramNameValues.Length; j++) - writer.WriteLine("{0}\t{1}", j, ngramNameValues[j]); + var explicitNgramNames = ngramsNames.GetValues(); + writer.WriteLine("# Number of Ngrams terms = {0}", explicitNgramNames.Length); + for (int j = 0; j < explicitNgramNames.Length; j++) + writer.WriteLine("{0}\t{1}", j, explicitNgramNames[j]); }); } } diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs index ad5d379315..1ccff8ea14 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs @@ -322,9 +322,7 @@ private ValueGetter>> MakeGetterVec(IRow input, int var mutation = VBufferMutationContext.Create(ref dst, terms.Count); for (int i = 0; i < terms.Count; i++) - { mutation.Values[i] = terms[i]; - } dst = mutation.CreateBuffer(); }; } From 28d48a11361d61dbe252440ecc7e85daecaa2f1b Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 15 Nov 2018 16:39:51 -0600 Subject: [PATCH 12/14] Rename CreateBuffer to Commit and CommitTruncated. --- src/Microsoft.ML.Core/Data/VBuffer.cs | 14 +++--- .../Data/VBufferMutationContext.cs | 45 ++++++++++++------- .../Utilities/VBufferUtils.cs | 44 +++++++++--------- src/Microsoft.ML.Data/Data/BufferBuilder.cs | 4 +- .../DataLoadSave/Binary/Codecs.cs | 4 +- .../DataLoadSave/Text/TextLoaderParser.cs | 4 +- src/Microsoft.ML.Data/DataView/Transposer.cs | 4 +- .../Depricated/Vector/VBufferMathUtils.cs | 10 ++--- .../Evaluators/EvaluatorUtils.cs | 2 +- .../Transforms/HashTransform.cs | 8 ++-- .../Transforms/KeyToVectorTransform.cs | 2 +- .../Utilities/SlotDropper.cs | 4 +- src/Microsoft.ML.Ensemble/EnsembleUtils.cs | 6 +-- .../OutputCombiners/BaseMultiAverager.cs | 2 +- .../OutputCombiners/BaseMultiCombiner.cs | 2 +- .../OutputCombiners/BaseScalarStacking.cs | 2 +- .../OutputCombiners/MultiMedian.cs | 2 +- .../OutputCombiners/MultiStacking.cs | 2 +- .../OutputCombiners/MultiVoting.cs | 2 +- src/Microsoft.ML.FastTree/FastTree.cs | 2 +- .../OlsLinearRegression.cs | 2 +- .../ImagePixelExtractorTransform.cs | 2 +- .../KMeansPredictor.cs | 2 +- .../OnnxTransform.cs | 2 +- .../MultiClass/MultiClassNaiveBayesTrainer.cs | 2 +- .../TensorflowTransform.cs | 2 +- src/Microsoft.ML.Transforms/GcnTransform.cs | 2 +- .../MissingValueDroppingTransformer.cs | 6 +-- .../MissingValueIndicatorTransform.cs | 4 +- .../MutualInformationFeatureSelection.cs | 2 +- .../Text/CharTokenizeTransform.cs | 4 +- .../Text/LdaTransform.cs | 4 +- .../Text/WordEmbeddingsTransform.cs | 2 +- .../Text/WordTokenizeTransform.cs | 2 +- .../VectorWhitening.cs | 2 +- 35 files changed, 108 insertions(+), 97 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/VBuffer.cs b/src/Microsoft.ML.Core/Data/VBuffer.cs index 504742b1d3..a3c4f77f00 100644 --- a/src/Microsoft.ML.Core/Data/VBuffer.cs +++ b/src/Microsoft.ML.Core/Data/VBuffer.cs @@ -130,7 +130,7 @@ public void CopyToDense(ref VBuffer dst) CopyTo(mutation.Values); else if (Length > 0) _values.AsSpan(0, Length).CopyTo(mutation.Values); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } /// @@ -145,7 +145,7 @@ public void CopyTo(ref VBuffer dst) { _values.AsSpan(0, Length).CopyTo(mutation.Values); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); Contracts.Assert(dst.IsDense); } else @@ -155,7 +155,7 @@ public void CopyTo(ref VBuffer dst) _values.AsSpan(0, _count).CopyTo(mutation.Values); _indices.AsSpan(0, _count).CopyTo(mutation.Indices); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } } @@ -174,7 +174,7 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) { _values.AsSpan(srcMin, length).CopyTo(mutation.Values); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); Contracts.Assert(dst.IsDense); } else @@ -196,12 +196,12 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) mutation.Indices[i] = _indices[i + copyMin] - srcMin; } } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } else { var mutation = VBufferMutationContext.Create(ref dst, length, copyCount); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } } } @@ -258,7 +258,7 @@ public static void Copy(T[] src, int srcIndex, ref VBuffer dst, int length) { src.AsSpan(srcIndex, length).CopyTo(mutation.Values); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } public IEnumerable> Items(bool all = false) diff --git a/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs b/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs index 54e90d3695..433bd5d814 100644 --- a/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs +++ b/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs @@ -74,7 +74,7 @@ internal static VBufferMutationContext Create( /// An object capable of mutation a by filling out /// (and if the buffer is not dense). /// - public ref struct VBufferMutationContext + public readonly ref struct VBufferMutationContext { private readonly int _logicalLength; private readonly T[] _values; @@ -122,29 +122,40 @@ internal VBufferMutationContext(int logicalLength, } /// - /// Creates a new using the current - /// Values and Indices. + /// Commits the edits and creates a new using + /// the current Values and Indices. + /// + /// + /// The newly created . + /// + public VBuffer Commit() + { + return new VBuffer(_logicalLength, Values.Length, _values, _indices); + } + + /// + /// Commits the edits and creates a new using + /// the current Values and Indices, while allowing to truncate the length + /// of Values and Indices. /// /// - /// An optional size that allows reducing the number of physical values to be - /// represented in the created buffer. - /// This is useful in sparse situations where the mutation context was created - /// with a larger physical value count than was needed - /// because the final value count was not known at creation time. + /// The new number of physical values to be represented in the created buffer. /// /// /// The newly created . /// - public VBuffer CreateBuffer(int? physicalValuesCount = null) + /// + /// CommitTruncated allows to modify the length of the explicitly + /// defined values. + /// This is useful in sparse situations where the + /// was created with a larger physical value count than was needed + /// because the final value count was not known at creation time. + /// + public VBuffer CommitTruncated(int physicalValuesCount) { - int count = Values.Length; - if (physicalValuesCount.HasValue) - { - Contracts.Check(physicalValuesCount.Value <= count, "Updating physicalValuesCount during Complete cannot be greater than the original physicalValuesCount value used in Create."); - count = physicalValuesCount.Value; - } - - return new VBuffer(_logicalLength, count, _values, _indices); + Contracts.CheckParam(physicalValuesCount <= Values.Length, nameof(physicalValuesCount), "Updating physicalValuesCount during CommitTruncated cannot be greater than the original physicalValuesCount value used in Create."); + + return new VBuffer(_logicalLength, physicalValuesCount, _values, _indices); } } } diff --git a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs index 843144f4c5..390e80727e 100644 --- a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs @@ -417,7 +417,7 @@ public static void ApplyAt(ref VBuffer dst, int slot, SlotValueManipulator if (needIndices) mutation.Indices[idx] = slot; mutation.Values[idx] = value; - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } /// @@ -461,7 +461,7 @@ public static void Densify(ref VBuffer dst) for (int i = 0; i < values.Length; ++i) mutation.Values[indices[i]] = values[i]; } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } /// @@ -488,7 +488,7 @@ public static void DensifyFirst(ref VBuffer dst, int denseCount) var newIndicesMutation = VBufferMutationContext.Create(ref dst, dst.Length, denseCount); Utils.FillIdentity(newIndicesMutation.Indices, denseCount); newIndicesMutation.Values.Clear(); - dst = newIndicesMutation.CreateBuffer(); + dst = newIndicesMutation.Commit(); return; } int lim = Utils.FindIndexSorted(dstIndices, 0, dstValues.Length, denseCount); @@ -510,7 +510,7 @@ public static void DensifyFirst(ref VBuffer dst, int denseCount) mutation.Values[ii] = i >= 0 && dstIndices[i] == ii ? dstValues[i--] : default(T); mutation.Indices[ii] = ii; } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } /// @@ -559,7 +559,7 @@ public static void CreateMaybeSparseCopy(in VBuffer src, ref VBuffer ds Contracts.Assert(j == sparseCount); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } /// @@ -764,7 +764,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< mutation.Values.Clear(); for (int i = 0; i < srcValues.Length; i++) manip(mutation.Indices[i] = srcIndices[i], srcValues[i], ref mutation.Values[i]); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); return; } @@ -857,7 +857,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< dIndex = --dI >= 0 ? dstIndices[dI] : -1; } } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); return; } @@ -922,7 +922,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< Contracts.Assert(srcIndices[sI] == bIndex); mutation.Indices[dI] = sI++; } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); Densify(ref dst); mutation = VBufferMutationContext.Create(ref dst, @@ -932,7 +932,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< srcIndices.CopyTo(mutation.Indices); for (sI = 0; sI < srcValues.Length; sI++) manip(srcIndices[sI], srcValues[sI], ref mutation.Values[sI]); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); return; } @@ -967,7 +967,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf var mutation = VBufferMutationContext.Create(ref res, length); for (int i = 0; i < length; i++) manip(i, srcValues[i], default(TDst), ref mutation.Values[i]); - res = mutation.CreateBuffer(); + res = mutation.Commit(); } else { @@ -983,7 +983,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf mutation.Indices[ii] = i; manip(i, srcValues[ii], default(TDst), ref mutation.Values[ii]); } - res = mutation.CreateBuffer(); + res = mutation.Commit(); } } else if (dst.IsDense) @@ -1003,14 +1003,14 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf for (int j = 0; j < length; j++) mutation.Values[j] = dstValues[j]; } - res = mutation.CreateBuffer(); + res = mutation.Commit(); } else if (src.IsDense) { Contracts.Assert(srcValues.Length == src.Length); for (int i = 0; i < length; i++) manip(i, srcValues[i], dstValues[i], ref mutation.Values[i]); - res = mutation.CreateBuffer(); + res = mutation.Commit(); } else { @@ -1049,7 +1049,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf mutation.Values[j] = dstValues[j]; } } - res = mutation.CreateBuffer(); + res = mutation.Commit(); } } else @@ -1078,7 +1078,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf mutation.Values[jj] = dstValues[jj]; } } - res = mutation.CreateBuffer(); + res = mutation.Commit(); } else if (src.IsDense) { @@ -1096,7 +1096,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf else manip(i, srcValues[i], default(TDst), ref mutation.Values[i]); } - res = mutation.CreateBuffer(); + res = mutation.Commit(); } else { @@ -1178,7 +1178,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf Contracts.Assert(ii == srcValues.Length && jj == dstCount); Contracts.Assert(i == length && j == length); - res = mutation.CreateBuffer(); + res = mutation.Commit(); } } } @@ -1225,7 +1225,7 @@ public static void ApplyIntoEitherDefined(in VBuffer src, ref for (int i = 0; i < srcValues.Length; ++i) values[i] = func(srcIndices[i], srcValues[i]); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } /// @@ -1296,7 +1296,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer for (int i = 0; i < a.Length; i++) mutation.Values[i] = func(i, aValues[i], bValues[i]); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); return; } @@ -1414,7 +1414,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer indices[newI++] = index; } } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } /// @@ -1432,7 +1432,7 @@ public static void Copy(List src, ref VBuffer dst, int length) mutation.Values[i] = src[i]; } } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } /// @@ -1442,7 +1442,7 @@ public static void Copy(List src, ref VBuffer dst, int length) public static void Resize(ref VBuffer dst, int newLogicalLength, int? valuesCount = null) { dst = VBufferMutationContext.Create(ref dst, newLogicalLength, valuesCount) - .CreateBuffer(); + .Commit(); } } } diff --git a/src/Microsoft.ML.Data/Data/BufferBuilder.cs b/src/Microsoft.ML.Data/Data/BufferBuilder.cs index c4207a83ba..d066fbf65b 100644 --- a/src/Microsoft.ML.Data/Data/BufferBuilder.cs +++ b/src/Microsoft.ML.Data/Data/BufferBuilder.cs @@ -450,7 +450,7 @@ public void GetResult(ref VBuffer buffer) { var mutation = VBufferMutationContext.Create(ref buffer, _length); _values.AsSpan(0, _length).CopyTo(mutation.Values); - buffer = mutation.CreateBuffer(); + buffer = mutation.Commit(); } else { @@ -458,7 +458,7 @@ public void GetResult(ref VBuffer buffer) var mutation = VBufferMutationContext.Create(ref buffer, _length, _count); _values.AsSpan(0, _count).CopyTo(mutation.Values); _indices.AsSpan(0, _count).CopyTo(mutation.Indices); - buffer = mutation.CreateBuffer(); + buffer = mutation.Commit(); } } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs index 9e1584fd4d..5d47f4747b 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs @@ -1118,7 +1118,7 @@ public override void Get(ref VBuffer value) _values.AsSpan(_valuesOffset, length) .CopyTo(mutation.Values); } - value = mutation.CreateBuffer(); + value = mutation.Commit(); } else { @@ -1131,7 +1131,7 @@ public override void Get(ref VBuffer value) _indices.AsSpan(_indicesOffset, count) .CopyTo(mutation.Indices); } - value = mutation.CreateBuffer(); + value = mutation.Commit(); } } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs index abdebf4c87..973b882190 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs @@ -411,12 +411,12 @@ public void Get(ref VBuffer dst) _values.AsSpan(0, _count).CopyTo(mutation.Values); if (_count == _size) { - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); return; } _indices.AsSpan(0, _count).CopyTo(mutation.Indices); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } } diff --git a/src/Microsoft.ML.Data/DataView/Transposer.cs b/src/Microsoft.ML.Data/DataView/Transposer.cs index ff8fb4fa19..808069561d 100644 --- a/src/Microsoft.ML.Data/DataView/Transposer.cs +++ b/src/Microsoft.ML.Data/DataView/Transposer.cs @@ -1299,7 +1299,7 @@ private ValueGetter> CreateGetter(int col) { mutation = VBufferMutationContext.Create(ref value, len); _inputValue.GetValues().Slice(min, len).CopyTo(mutation.Values); - value = mutation.CreateBuffer(); + value = mutation.Commit(); return; } // In the sparse case we have ranges on Indices/Values to consider. @@ -1325,7 +1325,7 @@ private ValueGetter> CreateGetter(int col) } } _inputValue.GetValues().Slice(smin, scount).CopyTo(mutation.Values); - value = mutation.CreateBuffer(); + value = mutation.Commit(); }; } diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs index c1683d5bda..d47cdd2b6d 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs @@ -121,7 +121,7 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float mutation.Values.Clear(); else CpuMathUtils.Scale(c, srcValues, mutation.Values, length); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } else { @@ -131,7 +131,7 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float mutation.Values.Clear(); else CpuMathUtils.Scale(c, srcValues, mutation.Values, count); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } } @@ -209,7 +209,7 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds { var mutation = VBufferMutationContext.Create(ref res, length); CpuMathUtils.AddScaleCopy(c, srcValues, dst.GetValues(), mutation.Values, length); - res = mutation.CreateBuffer(); + res = mutation.Commit(); return; } @@ -367,7 +367,7 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer } } } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } /// @@ -393,7 +393,7 @@ public static void ScaleInto(in VBuffer src, Float c, ref VBuffer var mutation = VBufferMutationContext.Create(ref dst, src.Length); if (!mutation.CreatedNewValues) // We need to clear it. mutation.Values.Clear(); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } else { diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs index 2f9e652e47..9a9782fb8d 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs @@ -718,7 +718,7 @@ public static void ReconcileVectorKeyValues(IHostEnvironment env, IDataView[] vi mutation.Indices[j] = srcIndices[j]; } } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; ValueGetter>> slotNamesGetter = null; diff --git a/src/Microsoft.ML.Data/Transforms/HashTransform.cs b/src/Microsoft.ML.Data/Transforms/HashTransform.cs index 0cbffe2ee4..f51582d885 100644 --- a/src/Microsoft.ML.Data/Transforms/HashTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/HashTransform.cs @@ -756,7 +756,7 @@ private static ValueGetter> MakeVectorHashGetter(uint se if (!src.IsDense) src.GetIndices().CopyTo(mutation.Indices); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; } // It is not sparsity preserving. @@ -784,7 +784,7 @@ private static ValueGetter> MakeVectorHashGetter(uint se for (int i = 0; i < srcValues.Length; ++i) mutation.Values[srcIndices[i]] = hasher.HashCore(seed, mask, srcValues[i]); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; } @@ -826,7 +826,7 @@ private static ValueGetter> MakeVectorOrderedHashGetter( srcIndices.CopyTo(mutation.Indices); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; } // It is not sparsity preserving. @@ -856,7 +856,7 @@ private static ValueGetter> MakeVectorOrderedHashGetter( Contracts.Assert(false, "this should have never happened."); } } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; } diff --git a/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs b/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs index c5b65a4184..131694050e 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs @@ -596,7 +596,7 @@ private ValueGetter> MakeGetterInd(IRow input, int iinfo) mutation.Indices[count++] = indices[islot] * size + (int)key; } } - dst = mutation.CreateBuffer(count); + dst = mutation.CommitTruncated(count); }; } diff --git a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs index 7b3da0d28e..209b2a1c65 100644 --- a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs +++ b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs @@ -139,7 +139,7 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) mutation.Values[iDst++] = srcValues[iSrc++]; } Contracts.Assert(iDst == newLength); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); return; } @@ -209,7 +209,7 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) Contracts.Assert(index <= max); } - dst = mutation.CreateBuffer(iiDst); + dst = mutation.CommitTruncated(iiDst); } } } diff --git a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs index 2b90a9ac47..bbab77f365 100644 --- a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs +++ b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs @@ -56,7 +56,7 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices var mutation = VBufferMutationContext.Create(ref dst, src.Length); for (int i = 0; i < srcValues.Length; i++) mutation.Values[i] = !includedIndices[i] ? defaultValue : srcValues[i]; - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } else { @@ -75,7 +75,7 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices } Contracts.Assert(count == cardinality); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } } else @@ -94,7 +94,7 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices } } - dst = mutation.CreateBuffer(count); + dst = mutation.CommitTruncated(count); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs index 361417736c..368854ae07 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs @@ -39,7 +39,7 @@ protected void CombineCore(ref VBuffer dst, VBuffer[] src, Singl if (!mutation.CreatedNewValues) mutation.Values.Clear(); // Set the output to values. - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); Single weightTotal; if (weights == null) diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs index cc81634746..63f8bcd0e7 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs @@ -101,7 +101,7 @@ protected void GetNaNOutput(ref VBuffer dst, int len) var mutation = VBufferMutationContext.Create(ref dst, len); for (int i = 0; i < len; i++) mutation.Values[i] = Single.NaN; - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs index a20e8466e7..4e279a8556 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs @@ -27,7 +27,7 @@ protected override void FillFeatureBuffer(Single[] src, ref VBuffer dst) int len = src.Length; var mutation = VBufferMutationContext.Create(ref dst, len); src.CopyTo(mutation.Values); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs index aefde033b8..6d45ed818e 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs @@ -94,7 +94,7 @@ public override Combiner> GetCombiner() } // Set the output to values. - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs index 54326c3819..793e4643fd 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs @@ -93,7 +93,7 @@ protected override void FillFeatureBuffer(VBuffer[] src, ref VBuffer dst, VBuffer[] src, Single[ mutation.Values[i] /= voteCount; // Set the output to values. - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } } } diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index ebd06a16fc..aa40726cdf 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -1368,7 +1368,7 @@ private ValueMapper, VBuffer> GetCopier(ColumnType itemT for (int i = 0; i < srcValues.Length; ++i) conv(in srcValues[i], ref mutation.Values[i]); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; } diff --git a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs index 1159f2ea88..7e310376fd 100644 --- a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs +++ b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs @@ -791,7 +791,7 @@ public override void GetFeatureWeights(ref VBuffer weights) score = float.MaxValue; mutation.Values[i] = score; } - weights = mutation.CreateBuffer(); + weights = mutation.Commit(); } } } diff --git a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs index 44ea1a4008..d577519167 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs @@ -607,7 +607,7 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo } } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; } diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs index fdd35aed7b..d55f7bd5da 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs @@ -150,7 +150,7 @@ public ValueMapper GetMapper() throw Host.Except($"Incorrect number of features: expected {_dimensionality}, got {src.Length}"); var mutation = VBufferMutationContext.Create(ref dst, _k); Map(in src, mutation.Values); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; return (ValueMapper)(Delegate)del; diff --git a/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs index 285a142465..b6a4ad70f5 100644 --- a/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs @@ -284,7 +284,7 @@ private Delegate MakeGetter(IRow input) var mutation = VBufferMutationContext.Create(ref dst, _outputColType.VectorSize); OnnxUtils.CopyTo(outputTensors[0], mutation.Values); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; return valueGetter; diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs index 2b20b173b4..e122e118c5 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs @@ -411,7 +411,7 @@ private void Map(in VBuffer src, ref VBuffer dst) (float)(logProb + (_absentFeaturesLogProb[iLabel] - absentFeatureLogProb)); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } } } diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index 6bd4162b43..1950925f9f 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -929,7 +929,7 @@ private Delegate MakeGetter(IRow input, int iinfo, ITensorValueGetter[] srcTe var mutation = VBufferMutationContext.Create(ref dst, (int)tensorSize); TensorFlowUtils.FetchData(tensor.Data, mutation.Values); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; return valuegetter; } diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index afd996a755..48b923bfcd 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -606,7 +606,7 @@ private static void FillValues(IExceptionContext ectx, in VBuffer src, re } CpuMathUtils.Scale(normScale, src.GetValues(), dstValues, count); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); return; } diff --git a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs index c35afd5bcd..345ea9fd1a 100644 --- a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs +++ b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs @@ -266,7 +266,7 @@ private void DropNAsAndDefaults(ref VBuffer src, ref VBuffer d } Host.Assert(iDst == newCount); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredicate isNA) @@ -307,7 +307,7 @@ private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredi } } Host.Assert(iDst == newCount); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } else { @@ -329,7 +329,7 @@ private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredi } Host.Assert(iDst == newCount); Host.Assert(offset == srcValues.Length - newCount); - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } } } diff --git a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs index 2b279bae07..5d39d58afd 100644 --- a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs @@ -292,7 +292,7 @@ private static void FillValues(Float input, ref VBuffer result) mutation.Indices[0] = 0; } - result = mutation.CreateBuffer(); + result = mutation.Commit(); } // This converts in place. @@ -357,7 +357,7 @@ private static void FillValues(IExceptionContext ectx, ref VBuffer buffer } ectx.Assert(0 <= iivDst & iivDst <= values.Length); - buffer = mutation.CreateBuffer(iivDst); + buffer = mutation.CommitTruncated(iivDst); } } } diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index 37a7372060..1fdf400818 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -786,7 +786,7 @@ private static void MapVector(this ValueMapper map, in V input.GetIndices().CopyTo(mutation.Indices); } - output = mutation.CreateBuffer(); + output = mutation.Commit(); } } } diff --git a/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs index 1765d5c152..76d61e30e4 100644 --- a/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs @@ -487,7 +487,7 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) Contracts.Assert(index == len); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; ValueGetter> getterWithUnitSep = (ref VBuffer dst) => @@ -544,7 +544,7 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) Contracts.Assert(index == len); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; return _parent._isSeparatorStartEnd ? getterWithStartEndSep : getterWithUnitSep; } diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index d1a52baf85..04d419940c 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -873,7 +873,7 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin mutation = VBufferMutationContext.Create(ref dst, len); for (int k = 0; k < len; k++) mutation.Values[k] = Float.NaN; - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); return; } @@ -919,7 +919,7 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin for (int i = 0; i < count; i++) mutation.Values[i] = (Float)(mutation.Values[i] / normalizer); } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } public void Dispose() diff --git a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs index eb021e3929..e4c6b2c93e 100644 --- a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs @@ -613,7 +613,7 @@ private ValueGetter> GetGetterVec(IRow input, int iinfo) for (int index = 0; index < dimension; index++) mutation.Values[index + dimension] /= deno; - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; } } diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs index 1ccff8ea14..38de9ce896 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs @@ -323,7 +323,7 @@ private ValueGetter>> MakeGetterVec(IRow input, int var mutation = VBufferMutationContext.Create(ref dst, terms.Count); for (int i = 0; i < terms.Count; i++) mutation.Values[i] = terms[i]; - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); }; } diff --git a/src/Microsoft.ML.Transforms/VectorWhitening.cs b/src/Microsoft.ML.Transforms/VectorWhitening.cs index 4c05af05be..aadbcffed7 100644 --- a/src/Microsoft.ML.Transforms/VectorWhitening.cs +++ b/src/Microsoft.ML.Transforms/VectorWhitening.cs @@ -748,7 +748,7 @@ private static void FillValues(float[] model, ref VBuffer src, ref VBuffe offs += length; } } - dst = mutation.CreateBuffer(); + dst = mutation.Commit(); } private static float DotProduct(float[] a, int aOffset, ReadOnlySpan b, ReadOnlySpan indices, int count) From faeb7d423c2be5f665a59b56ef1ccfb7be456147 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 15 Nov 2018 17:36:07 -0600 Subject: [PATCH 13/14] Rename VBufferMutationContext to VBufferEditor. --- src/Microsoft.ML.Core/Data/VBuffer.cs | 20 +++---- ...ferMutationContext.cs => VBufferEditor.cs} | 35 ++++++------- .../Utilities/VBufferUtils.cs | 52 +++++++++---------- src/Microsoft.ML.Data/Data/BufferBuilder.cs | 4 +- .../DataLoadSave/Binary/Codecs.cs | 4 +- .../DataLoadSave/Text/TextLoaderParser.cs | 2 +- src/Microsoft.ML.Data/DataView/Transposer.cs | 6 +-- .../Depricated/Vector/VBufferMathUtils.cs | 20 +++---- .../Depricated/Vector/VectorUtils.cs | 2 +- .../Evaluators/EvaluatorUtils.cs | 2 +- .../Transforms/HashTransform.cs | 8 +-- .../Transforms/KeyToVectorTransform.cs | 2 +- .../Utilities/SlotDropper.cs | 6 +-- src/Microsoft.ML.Ensemble/EnsembleUtils.cs | 6 +-- .../OutputCombiners/BaseMultiAverager.cs | 2 +- .../OutputCombiners/BaseMultiCombiner.cs | 2 +- .../OutputCombiners/BaseScalarStacking.cs | 2 +- .../OutputCombiners/MultiMedian.cs | 2 +- .../OutputCombiners/MultiStacking.cs | 2 +- .../OutputCombiners/MultiVoting.cs | 2 +- src/Microsoft.ML.FastTree/FastTree.cs | 2 +- .../OlsLinearRegression.cs | 2 +- .../SymSgdClassificationTrainer.cs | 2 +- .../ImagePixelExtractorTransform.cs | 2 +- .../KMeansPredictor.cs | 2 +- .../OnnxTransform.cs | 2 +- .../LogisticRegression/LogisticRegression.cs | 2 +- .../MulticlassLogisticRegression.cs | 2 +- .../MultiClass/MultiClassNaiveBayesTrainer.cs | 2 +- .../PoissonRegression/PoissonRegression.cs | 2 +- .../TensorflowTransform.cs | 2 +- src/Microsoft.ML.Transforms/GcnTransform.cs | 6 +-- .../MissingValueDroppingTransformer.cs | 6 +-- .../MissingValueIndicatorTransform.cs | 4 +- .../MutualInformationFeatureSelection.cs | 2 +- .../Text/CharTokenizeTransform.cs | 4 +- .../Text/LdaTransform.cs | 6 +-- .../Text/WordEmbeddingsTransform.cs | 2 +- .../Text/WordTokenizeTransform.cs | 2 +- .../VectorWhitening.cs | 2 +- 40 files changed, 118 insertions(+), 119 deletions(-) rename src/Microsoft.ML.Core/Data/{VBufferMutationContext.cs => VBufferEditor.cs} (82%) diff --git a/src/Microsoft.ML.Core/Data/VBuffer.cs b/src/Microsoft.ML.Core/Data/VBuffer.cs index a3c4f77f00..adaa958663 100644 --- a/src/Microsoft.ML.Core/Data/VBuffer.cs +++ b/src/Microsoft.ML.Core/Data/VBuffer.cs @@ -124,7 +124,7 @@ public VBuffer(int length, int count, T[] values, int[] indices) public void CopyToDense(ref VBuffer dst) { // create a dense mutation context - var mutation = VBufferMutationContext.Create(ref dst, Length, Length); + var mutation = VBufferEditor.Create(ref dst, Length, Length); if (!IsDense) CopyTo(mutation.Values); @@ -138,7 +138,7 @@ public void CopyToDense(ref VBuffer dst) /// public void CopyTo(ref VBuffer dst) { - var mutation = VBufferMutationContext.Create(ref dst, Length, _count); + var mutation = VBufferEditor.Create(ref dst, Length, _count); if (IsDense) { if (Length > 0) @@ -169,7 +169,7 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) if (IsDense) { - var mutation = VBufferMutationContext.Create(ref dst, length, length); + var mutation = VBufferEditor.Create(ref dst, length, length); if (length > 0) { _values.AsSpan(srcMin, length).CopyTo(mutation.Values); @@ -186,7 +186,7 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) int copyLim = _indices.FindIndexSorted(copyMin, _count, srcMin + length); Contracts.Assert(copyMin <= copyLim); copyCount = copyLim - copyMin; - var mutation = VBufferMutationContext.Create(ref dst, length, copyCount); + var mutation = VBufferEditor.Create(ref dst, length, copyCount); if (copyCount > 0) { _values.AsSpan(copyMin, copyCount).CopyTo(mutation.Values); @@ -200,7 +200,7 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) } else { - var mutation = VBufferMutationContext.Create(ref dst, length, copyCount); + var mutation = VBufferEditor.Create(ref dst, length, copyCount); dst = mutation.Commit(); } } @@ -253,7 +253,7 @@ public static void Copy(T[] src, int srcIndex, ref VBuffer dst, int length) { Contracts.CheckParam(0 <= length && length <= Utils.Size(src), nameof(length)); Contracts.CheckParam(0 <= srcIndex && srcIndex <= Utils.Size(src) - length, nameof(srcIndex)); - var mutation = VBufferMutationContext.Create(ref dst, length, length); + var mutation = VBufferEditor.Create(ref dst, length, length); if (length > 0) { src.AsSpan(srcIndex, length).CopyTo(mutation.Values); @@ -299,12 +299,12 @@ public T GetItemOrDefault(int slot) public override string ToString() => IsDense ? $"Dense vector of size {Length}" : $"Sparse vector of size {Length}, {_count} explicit values"; - internal VBufferMutationContext GetMutableContext() + internal VBufferEditor GetEditor() { - return GetMutableContext(Length, _count); + return GetEditor(Length, _count); } - internal VBufferMutationContext GetMutableContext( + internal VBufferEditor GetEditor( int newLogicalLength, int? valuesCount, int maxCapacity = Utils.ArrayMaxSize, @@ -332,7 +332,7 @@ internal VBufferMutationContext GetMutableContext( Utils.EnsureSize(ref indices, valuesCount.Value, maxCapacity, keepOldOnResize, out createdNewIndices); } - return new VBufferMutationContext( + return new VBufferEditor( newLogicalLength, valuesCount.Value, values, diff --git a/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs b/src/Microsoft.ML.Core/Data/VBufferEditor.cs similarity index 82% rename from src/Microsoft.ML.Core/Data/VBufferMutationContext.cs rename to src/Microsoft.ML.Core/Data/VBufferEditor.cs index 433bd5d814..8da19b641f 100644 --- a/src/Microsoft.ML.Core/Data/VBufferMutationContext.cs +++ b/src/Microsoft.ML.Core/Data/VBufferEditor.cs @@ -2,35 +2,34 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using Microsoft.ML.Runtime.Internal.Utilities; using System; namespace Microsoft.ML.Runtime.Data { /// - /// Various methods for creating instances. + /// Various methods for creating instances. /// - public static class VBufferMutationContext + public static class VBufferEditor { /// - /// Creates a mutation context with the same shape (length and density) - /// as the . + /// Creates a with the same shape + /// (length and density) as the . /// - public static VBufferMutationContext CreateFromBuffer( + public static VBufferEditor CreateFromBuffer( ref VBuffer destination) { - return destination.GetMutableContext(); + return destination.GetEditor(); } /// - /// Creates a mutation context using 's values - /// and indices buffers. + /// Creates a using + /// 's values and indices buffers. /// /// /// The destination buffer. /// /// - /// The new length of the buffer being mutated. + /// The logical length of the new buffer being edited. /// /// /// The optional number of physical values to be represented in the buffer. @@ -43,27 +42,27 @@ public static VBufferMutationContext CreateFromBuffer( /// /// True means to ensure the Indices buffer is available, even if the buffer will be dense. /// - public static VBufferMutationContext Create( + public static VBufferEditor Create( ref VBuffer destination, int newLogicalLength, int? valuesCount = null, bool keepOldOnResize = false, bool requireIndicesOnDense = false) { - return destination.GetMutableContext( + return destination.GetEditor( newLogicalLength, valuesCount, keepOldOnResize: keepOldOnResize, requireIndicesOnDense: requireIndicesOnDense); } - internal static VBufferMutationContext Create( + internal static VBufferEditor Create( ref VBuffer destination, int newLogicalLength, int valuesCount, int maxValuesCapacity) { - return destination.GetMutableContext( + return destination.GetEditor( newLogicalLength, valuesCount, maxValuesCapacity); @@ -71,10 +70,10 @@ internal static VBufferMutationContext Create( } /// - /// An object capable of mutation a by filling out + /// An object capable of editing a by filling out /// (and if the buffer is not dense). /// - public readonly ref struct VBufferMutationContext + public readonly ref struct VBufferEditor { private readonly int _logicalLength; private readonly T[] _values; @@ -100,7 +99,7 @@ public readonly ref struct VBufferMutationContext /// public bool CreatedNewIndices { get; } - internal VBufferMutationContext(int logicalLength, + internal VBufferEditor(int logicalLength, int physicalValuesCount, T[] values, int[] indices, @@ -147,7 +146,7 @@ public VBuffer Commit() /// /// CommitTruncated allows to modify the length of the explicitly /// defined values. - /// This is useful in sparse situations where the + /// This is useful in sparse situations where the /// was created with a larger physical value count than was needed /// because the final value count was not known at creation time. /// diff --git a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs index 390e80727e..f056a0c5a8 100644 --- a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs @@ -314,7 +314,7 @@ public static void ForEachEitherDefined(in VBuffer a, in VBuffer b, Act /// public static void Clear(ref VBuffer dst) { - var mutation = VBufferMutationContext.CreateFromBuffer(ref dst); + var mutation = VBufferEditor.CreateFromBuffer(ref dst); mutation.Values.Clear(); } @@ -343,7 +343,7 @@ public static void Apply(ref VBuffer dst, SlotValueManipulator manip) { Contracts.CheckValue(manip, nameof(manip)); - var mutation = VBufferMutationContext.CreateFromBuffer(ref dst); + var mutation = VBufferEditor.CreateFromBuffer(ref dst); if (dst.IsDense) { for (int i = 0; i < mutation.Values.Length; i++) @@ -377,7 +377,7 @@ public static void ApplyAt(ref VBuffer dst, int slot, SlotValueManipulator Contracts.CheckValue(manip, nameof(manip)); Contracts.CheckValueOrNull(pred); - var mutation = VBufferMutationContext.CreateFromBuffer(ref dst); + var mutation = VBufferEditor.CreateFromBuffer(ref dst); int dstValuesCount = mutation.Values.Length; if (dst.IsDense) { @@ -405,7 +405,7 @@ public static void ApplyAt(ref VBuffer dst, int slot, SlotValueManipulator // we are modifying in the sparse vector, in which case the vector becomes // dense. Then there is no need to do anything with indices. bool needIndices = dstValuesCount + 1 < dst.Length; - mutation = VBufferMutationContext.Create(ref dst, dst.Length, dstValuesCount + 1); + mutation = VBufferEditor.Create(ref dst, dst.Length, dstValuesCount + 1); if (idx != dstValuesCount) { // We have to do some sort of shift copy. @@ -430,7 +430,7 @@ public static void Densify(ref VBuffer dst) var indices = dst.GetIndices(); var values = dst.GetValues(); - var mutation = VBufferMutationContext.Create( + var mutation = VBufferEditor.Create( ref dst, dst.Length); @@ -485,7 +485,7 @@ public static void DensifyFirst(ref VBuffer dst, int denseCount) if (dstIndices.IsEmpty) { // no previous values - var newIndicesMutation = VBufferMutationContext.Create(ref dst, dst.Length, denseCount); + var newIndicesMutation = VBufferEditor.Create(ref dst, dst.Length, denseCount); Utils.FillIdentity(newIndicesMutation.Indices, denseCount); newIndicesMutation.Values.Clear(); dst = newIndicesMutation.Commit(); @@ -500,7 +500,7 @@ public static void DensifyFirst(ref VBuffer dst, int denseCount) return; } - var mutation = VBufferMutationContext.Create(ref dst, dst.Length, newLen, keepOldOnResize: true); + var mutation = VBufferEditor.Create(ref dst, dst.Length, newLen, keepOldOnResize: true); int sliceLength = dstValues.Length - lim; mutation.Values.Slice(lim, sliceLength).CopyTo(mutation.Values.Slice(denseCount)); mutation.Indices.Slice(lim, sliceLength).CopyTo(mutation.Indices.Slice(denseCount)); @@ -541,7 +541,7 @@ public static void CreateMaybeSparseCopy(in VBuffer src, ref VBuffer ds } } - var mutation = VBufferMutationContext.Create(ref dst, src.Length, sparseCount); + var mutation = VBufferEditor.Create(ref dst, src.Length, sparseCount); if (sparseCount > 0) { int j = 0; @@ -692,7 +692,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< var srcValues = src.GetValues(); var dstValues = dst.GetValues(); var dstIndices = dst.GetIndices(); - var mutation = VBufferMutationContext.CreateFromBuffer(ref dst); + var mutation = VBufferEditor.CreateFromBuffer(ref dst); if (srcValues.Length == 0) { // Major case 1, with srcValues.Length == 0. @@ -717,7 +717,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< if (!dst.IsDense) { Densify(ref dst); - mutation = VBufferMutationContext.CreateFromBuffer(ref dst); + mutation = VBufferEditor.CreateFromBuffer(ref dst); } // Both are now dense. Both cases of outer are covered. @@ -757,7 +757,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< { // Major case 4, with dst empty. Note that !src.Dense. // Neither is dense, and dst is empty. Both cases of outer are covered. - mutation = VBufferMutationContext.Create(ref dst, + mutation = VBufferEditor.Create(ref dst, src.Length, srcValues.Length, maxValuesCapacity: src.Length); @@ -815,7 +815,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // proved to be inefficient so we go to the little bit of extra work // to handle it here. - mutation = VBufferMutationContext.Create(ref dst, + mutation = VBufferEditor.Create(ref dst, src.Length, newCount, maxValuesCapacity: dst.Length); @@ -912,7 +912,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // First do a "quasi" densification of dst, by making the indices // of dst correspond to those in src. - mutation = VBufferMutationContext.Create(ref dst, newCount, dstValues.Length); + mutation = VBufferEditor.Create(ref dst, newCount, dstValues.Length); int sI = 0; for (dI = 0; dI < dstValues.Length; ++dI) { @@ -925,7 +925,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< dst = mutation.Commit(); Densify(ref dst); - mutation = VBufferMutationContext.Create(ref dst, + mutation = VBufferEditor.Create(ref dst, src.Length, newCount, maxValuesCapacity: src.Length); @@ -964,7 +964,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf else if (src.IsDense) { Contracts.Assert(srcValues.Length == src.Length); - var mutation = VBufferMutationContext.Create(ref res, length); + var mutation = VBufferEditor.Create(ref res, length); for (int i = 0; i < length; i++) manip(i, srcValues[i], default(TDst), ref mutation.Values[i]); res = mutation.Commit(); @@ -974,7 +974,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf // src is non-empty sparse. int count = srcValues.Length; Contracts.Assert(0 < count && count < length); - var mutation = VBufferMutationContext.Create(ref res, length, count); + var mutation = VBufferEditor.Create(ref res, length, count); var srcIndices = src.GetIndices(); srcIndices.CopyTo(mutation.Indices); for (int ii = 0; ii < count; ii++) @@ -988,7 +988,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf } else if (dst.IsDense) { - var mutation = VBufferMutationContext.Create(ref res, length); + var mutation = VBufferEditor.Create(ref res, length); if (srcValues.Length == 0) { if (outer) @@ -1060,7 +1060,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf Contracts.Assert(dstCount > 0); if (srcValues.Length == 0) { - var mutation = VBufferMutationContext.Create(ref res, length, dstCount); + var mutation = VBufferEditor.Create(ref res, length, dstCount); if (outer) { for (int jj = 0; jj < dstCount; jj++) @@ -1083,7 +1083,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf else if (src.IsDense) { // res will be dense. - var mutation = VBufferMutationContext.Create(ref res, length); + var mutation = VBufferEditor.Create(ref res, length); int jj = 0; int j = dstIndices[jj]; for (int i = 0; i < length; i++) @@ -1137,7 +1137,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf } else { - var mutation = VBufferMutationContext.Create(ref res, length, resCount); + var mutation = VBufferEditor.Create(ref res, length, resCount); int ii = 0; int i = srcIndices[ii]; @@ -1207,7 +1207,7 @@ public static void ApplyIntoEitherDefined(in VBuffer src, ref Resize(ref dst, src.Length, 0); return; } - var mutation = VBufferMutationContext.Create(ref dst, + var mutation = VBufferEditor.Create(ref dst, src.Length, srcValues.Length, maxValuesCapacity: src.Length); @@ -1265,11 +1265,11 @@ public static void ApplyInto(in VBuffer a, in VBuffer int bI = 0; ReadOnlySpan aIndices; ReadOnlySpan bIndices; - VBufferMutationContext mutation; + VBufferEditor mutation; if (a.IsDense || b.IsDense) { // Case 2. One of the two inputs is dense. The output will be dense. - mutation = VBufferMutationContext.Create(ref dst, a.Length); + mutation = VBufferEditor.Create(ref dst, a.Length); if (!a.IsDense) { // a is sparse, b is dense @@ -1322,7 +1322,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer // REVIEW: Worth optimizing the newCount == a.Length case? // Probably not... - mutation = VBufferMutationContext.Create(ref dst, a.Length, newCount); + mutation = VBufferEditor.Create(ref dst, a.Length, newCount); Span indices = mutation.Indices; if (newCount == bValues.Length) @@ -1423,7 +1423,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer public static void Copy(List src, ref VBuffer dst, int length) { Contracts.CheckParam(0 <= length && length <= Utils.Size(src), nameof(length)); - var mutation = VBufferMutationContext.Create(ref dst, length); + var mutation = VBufferEditor.Create(ref dst, length); if (length > 0) { // List.CopyTo should have an overload for Span - https://github.com/dotnet/corefx/issues/33006 @@ -1441,7 +1441,7 @@ public static void Copy(List src, ref VBuffer dst, int length) /// public static void Resize(ref VBuffer dst, int newLogicalLength, int? valuesCount = null) { - dst = VBufferMutationContext.Create(ref dst, newLogicalLength, valuesCount) + dst = VBufferEditor.Create(ref dst, newLogicalLength, valuesCount) .Commit(); } } diff --git a/src/Microsoft.ML.Data/Data/BufferBuilder.cs b/src/Microsoft.ML.Data/Data/BufferBuilder.cs index d066fbf65b..6b1003777e 100644 --- a/src/Microsoft.ML.Data/Data/BufferBuilder.cs +++ b/src/Microsoft.ML.Data/Data/BufferBuilder.cs @@ -448,14 +448,14 @@ public void GetResult(ref VBuffer buffer) if (_dense) { - var mutation = VBufferMutationContext.Create(ref buffer, _length); + var mutation = VBufferEditor.Create(ref buffer, _length); _values.AsSpan(0, _length).CopyTo(mutation.Values); buffer = mutation.Commit(); } else { Contracts.Assert(_count < _length); - var mutation = VBufferMutationContext.Create(ref buffer, _length, _count); + var mutation = VBufferEditor.Create(ref buffer, _length, _count); _values.AsSpan(0, _count).CopyTo(mutation.Values); _indices.AsSpan(0, _count).CopyTo(mutation.Indices); buffer = mutation.Commit(); diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs index 5d47f4747b..f062c61d98 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs @@ -1112,7 +1112,7 @@ public override void Get(ref VBuffer value) if (count < 0) { // dense - var mutation = VBufferMutationContext.Create(ref value, length); + var mutation = VBufferEditor.Create(ref value, length); if (length > 0) { _values.AsSpan(_valuesOffset, length) @@ -1123,7 +1123,7 @@ public override void Get(ref VBuffer value) else { // sparse - var mutation = VBufferMutationContext.Create(ref value, length, count); + var mutation = VBufferEditor.Create(ref value, length, count); if (count > 0) { _values.AsSpan(_valuesOffset, count) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs index 973b882190..023986aaf7 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs @@ -407,7 +407,7 @@ public void Get(ref VBuffer dst) return; } - var mutation = VBufferMutationContext.Create(ref dst, _size, _count); + var mutation = VBufferEditor.Create(ref dst, _size, _count); _values.AsSpan(0, _count).CopyTo(mutation.Values); if (_count == _size) { diff --git a/src/Microsoft.ML.Data/DataView/Transposer.cs b/src/Microsoft.ML.Data/DataView/Transposer.cs index 808069561d..4f4c0d4ab5 100644 --- a/src/Microsoft.ML.Data/DataView/Transposer.cs +++ b/src/Microsoft.ML.Data/DataView/Transposer.cs @@ -1294,10 +1294,10 @@ private ValueGetter> CreateGetter(int col) (ref VBuffer value) => { EnsureValid(); - VBufferMutationContext mutation; + VBufferEditor mutation; if (_inputValue.IsDense) { - mutation = VBufferMutationContext.Create(ref value, len); + mutation = VBufferEditor.Create(ref value, len); _inputValue.GetValues().Slice(min, len).CopyTo(mutation.Values); value = mutation.Commit(); return; @@ -1312,7 +1312,7 @@ private ValueGetter> CreateGetter(int col) return; } - mutation = VBufferMutationContext.Create(ref value, len, scount); + mutation = VBufferEditor.Create(ref value, len, scount); bool isDense = len == scount; if (!isDense) { diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs index d47cdd2b6d..b7fb9bd1b7 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs @@ -87,7 +87,7 @@ public static void ScaleBy(ref VBuffer dst, Float c) { if (c == 1 || dst.GetValues().Length == 0) return; - var mutation = VBufferMutationContext.CreateFromBuffer(ref dst); + var mutation = VBufferEditor.CreateFromBuffer(ref dst); if (c != 0) CpuMathUtils.Scale(c, mutation.Values); else // Maintain density of dst. @@ -115,7 +115,7 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float if (src.IsDense) { // Maintain the density of src to dst in order to avoid slow down of L-BFGS. - var mutation = VBufferMutationContext.Create(ref dst, length); + var mutation = VBufferEditor.Create(ref dst, length); Contracts.Assert(length == count); if (c == 0) mutation.Values.Clear(); @@ -125,7 +125,7 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float } else { - var mutation = VBufferMutationContext.Create(ref dst, length, count); + var mutation = VBufferEditor.Create(ref dst, length, count); src.GetIndices().CopyTo(mutation.Indices); if (c == 0) mutation.Values.Clear(); @@ -148,7 +148,7 @@ public static void Add(in VBuffer src, ref VBuffer dst) if (dst.IsDense) { - var mutation = VBufferMutationContext.Create(ref dst, dst.Length); + var mutation = VBufferEditor.Create(ref dst, dst.Length); if (src.IsDense) CpuMathUtils.Add(srcValues, mutation.Values, src.Length); else @@ -176,7 +176,7 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds if (dst.IsDense) { - var mutation = VBufferMutationContext.Create(ref dst, dst.Length); + var mutation = VBufferEditor.Create(ref dst, dst.Length); if (src.IsDense) CpuMathUtils.AddScale(c, srcValues, mutation.Values, src.Length); else @@ -207,7 +207,7 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds Contracts.Assert(length > 0); if (dst.IsDense && src.IsDense) { - var mutation = VBufferMutationContext.Create(ref res, length); + var mutation = VBufferEditor.Create(ref res, length); CpuMathUtils.AddScaleCopy(c, srcValues, dst.GetValues(), mutation.Values, length); res = mutation.Commit(); return; @@ -247,12 +247,12 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer var srcValues = src.GetValues(); if (srcValues.Length == 0 || c == 0) return; - VBufferMutationContext mutation; + VBufferEditor mutation; Span values; if (dst.IsDense) { // This is by far the most common case. - mutation = VBufferMutationContext.Create(ref dst, dst.Length); + mutation = VBufferEditor.Create(ref dst, dst.Length); values = mutation.Values.Slice(offset); if (src.IsDense) CpuMathUtils.AddScale(c, srcValues, values, srcValues.Length); @@ -295,7 +295,7 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer } // Extend dst so that it has room for this additional stuff. Shift things over as well. var dstValues = dst.GetValues(); - mutation = VBufferMutationContext.Create(ref dst, + mutation = VBufferEditor.Create(ref dst, dst.Length, dstValues.Length + gapCount, keepOldOnResize: true); @@ -390,7 +390,7 @@ public static void ScaleInto(in VBuffer src, Float c, ref VBuffer if (src.Length > 0 && src.IsDense) { // Due to sparsity preservation from src, dst must be dense, in the same way. - var mutation = VBufferMutationContext.Create(ref dst, src.Length); + var mutation = VBufferEditor.Create(ref dst, src.Length); if (!mutation.CreatedNewValues) // We need to clear it. mutation.Values.Clear(); dst = mutation.Commit(); diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs index b2ef37a820..1a3d5fdaa3 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs @@ -165,7 +165,7 @@ public static void MulElementWise(in VBuffer a, ref VBuffer dst) if (a.IsDense && dst.IsDense) { - var mutation = VBufferMutationContext.CreateFromBuffer(ref dst); + var mutation = VBufferEditor.CreateFromBuffer(ref dst); CpuMathUtils.MulElementWise(a.GetValues(), dst.GetValues(), mutation.Values, a.Length); } else diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs index 9a9782fb8d..dcb0499965 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs @@ -692,7 +692,7 @@ public static void ReconcileVectorKeyValues(IHostEnvironment env, IDataView[] vi (in VBuffer src, ref VBuffer dst) => { var srcValues = src.GetValues(); - var mutation = VBufferMutationContext.Create( + var mutation = VBufferEditor.Create( ref dst, src.Length, srcValues.Length); diff --git a/src/Microsoft.ML.Data/Transforms/HashTransform.cs b/src/Microsoft.ML.Data/Transforms/HashTransform.cs index f51582d885..bb4c12989e 100644 --- a/src/Microsoft.ML.Data/Transforms/HashTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/HashTransform.cs @@ -749,7 +749,7 @@ private static ValueGetter> MakeVectorHashGetter(uint se VBufferUtils.Resize(ref dst, src.Length, 0); return; } - var mutation = VBufferMutationContext.Create(ref dst, src.Length, srcValues.Length); + var mutation = VBufferEditor.Create(ref dst, src.Length, srcValues.Length); for (int i = 0; i < srcValues.Length; ++i) mutation.Values[i] = hasher.HashCore(seed, mask, srcValues[i]); @@ -763,7 +763,7 @@ private static ValueGetter> MakeVectorHashGetter(uint se return (ref VBuffer dst) => { srcGetter(ref src); - var mutation = VBufferMutationContext.Create(ref dst, src.Length); + var mutation = VBufferEditor.Create(ref dst, src.Length); var srcValues = src.GetValues(); if (src.IsDense) @@ -811,7 +811,7 @@ private static ValueGetter> MakeVectorOrderedHashGetter( VBufferUtils.Resize(ref dst, src.Length, 0); return; } - var mutation = VBufferMutationContext.Create(ref dst, src.Length, srcValues.Length); + var mutation = VBufferEditor.Create(ref dst, src.Length, srcValues.Length); if (src.IsDense) { @@ -833,7 +833,7 @@ private static ValueGetter> MakeVectorOrderedHashGetter( return (ref VBuffer dst) => { srcGetter(ref src); - var mutation = VBufferMutationContext.Create(ref dst, src.Length); + var mutation = VBufferEditor.Create(ref dst, src.Length); var srcValues = src.GetValues(); if (src.IsDense) diff --git a/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs b/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs index 131694050e..35a7b7f058 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs @@ -567,7 +567,7 @@ private ValueGetter> MakeGetterInd(IRow input, int iinfo) int lenDst = checked(size * lenSrc); var values = src.GetValues(); int cntSrc = values.Length; - var mutation = VBufferMutationContext.Create(ref dst, lenDst, cntSrc); + var mutation = VBufferEditor.Create(ref dst, lenDst, cntSrc); int count = 0; if (src.IsDense) diff --git a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs index 209b2a1c65..405253bbca 100644 --- a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs +++ b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs @@ -115,11 +115,11 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) // End of the trivial cases // At this point, we need to drop some slots and keep some slots. - VBufferMutationContext mutation; + VBufferEditor mutation; var srcValues = src.GetValues(); if (src.IsDense) { - mutation = VBufferMutationContext.Create(ref dst, newLength); + mutation = VBufferEditor.Create(ref dst, newLength); int iDst = 0; int iSrc = 0; @@ -151,7 +151,7 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) Contracts.Assert(newCount <= src.Length); - mutation = VBufferMutationContext.Create( + mutation = VBufferEditor.Create( ref dst, newLength, newCount, diff --git a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs index bbab77f365..a4a63c702a 100644 --- a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs +++ b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs @@ -53,14 +53,14 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices if (cardinality >= src.Length / 2) { T defaultValue = default; - var mutation = VBufferMutationContext.Create(ref dst, src.Length); + var mutation = VBufferEditor.Create(ref dst, src.Length); for (int i = 0; i < srcValues.Length; i++) mutation.Values[i] = !includedIndices[i] ? defaultValue : srcValues[i]; dst = mutation.Commit(); } else { - var mutation = VBufferMutationContext.Create(ref dst, src.Length, cardinality); + var mutation = VBufferEditor.Create(ref dst, src.Length, cardinality); int count = 0; for (int i = 0; i < srcValues.Length; i++) @@ -80,7 +80,7 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices } else { - var mutation = VBufferMutationContext.Create(ref dst, src.Length, cardinality); + var mutation = VBufferEditor.Create(ref dst, src.Length, cardinality); int count = 0; var srcIndices = src.GetIndices(); diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs index 368854ae07..df58a1e58d 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs @@ -35,7 +35,7 @@ protected void CombineCore(ref VBuffer dst, VBuffer[] src, Singl return; } - var mutation = VBufferMutationContext.Create(ref dst, len); + var mutation = VBufferEditor.Create(ref dst, len); if (!mutation.CreatedNewValues) mutation.Values.Clear(); // Set the output to values. diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs index 63f8bcd0e7..6df0b6f28a 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs @@ -98,7 +98,7 @@ protected bool TryNormalize(VBuffer[] values) protected void GetNaNOutput(ref VBuffer dst, int len) { Contracts.Assert(len >= 0); - var mutation = VBufferMutationContext.Create(ref dst, len); + var mutation = VBufferEditor.Create(ref dst, len); for (int i = 0; i < len; i++) mutation.Values[i] = Single.NaN; dst = mutation.Commit(); diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs index 4e279a8556..33ab0627c3 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs @@ -25,7 +25,7 @@ protected override void FillFeatureBuffer(Single[] src, ref VBuffer dst) { Contracts.AssertNonEmpty(src); int len = src.Length; - var mutation = VBufferMutationContext.Create(ref dst, len); + var mutation = VBufferEditor.Create(ref dst, len); src.CopyTo(mutation.Values); dst = mutation.Commit(); } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs index 6d45ed818e..0f6bc0b96d 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs @@ -81,7 +81,7 @@ public override Combiner> GetCombiner() return; } - var mutation = VBufferMutationContext.Create(ref dst, len); + var mutation = VBufferEditor.Create(ref dst, len); int count = src.Length; if (Utils.Size(raw) < count) diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs index 793e4643fd..9649f2b9dd 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs @@ -83,7 +83,7 @@ protected override void FillFeatureBuffer(VBuffer[] src, ref VBuffer dst, VBuffer[] src, Single[ } int len = GetClassCount(src); - var mutation = VBufferMutationContext.Create(ref dst, len); + var mutation = VBufferEditor.Create(ref dst, len); if (!mutation.CreatedNewValues) mutation.Values.Clear(); diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index aa40726cdf..a34ac677b4 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -1358,7 +1358,7 @@ private ValueMapper, VBuffer> GetCopier(ColumnType itemT (in VBuffer src, ref VBuffer dst) => { var srcValues = src.GetValues(); - var mutation = VBufferMutationContext.Create(ref dst, src.Length, srcValues.Length); + var mutation = VBufferEditor.Create(ref dst, src.Length, srcValues.Length); if (srcValues.Length > 0) { if (!src.IsDense) diff --git a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs index 7e310376fd..7ca3103365 100644 --- a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs +++ b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs @@ -783,7 +783,7 @@ public override void GetFeatureWeights(ref VBuffer weights) } var size = _pValues.Length - 1; - var mutation = VBufferMutationContext.Create(ref weights, size); + var mutation = VBufferEditor.Create(ref weights, size); for (int i = 0; i < size; i++) { var score = -(float)Math.Log(_pValues[i + 1]); diff --git a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs index a69cdf9009..07481938fb 100644 --- a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs +++ b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs @@ -654,7 +654,7 @@ private TPredictor TrainCore(IChannel ch, RoleMappedData data, LinearPredictor p else weights = VBufferUtils.CreateDense(numFeatures); - var weightsMutation = VBufferMutationContext.CreateFromBuffer(ref weights); + var weightsMutation = VBufferEditor.CreateFromBuffer(ref weights); // Reference: Parasail. SymSGD. bool tuneLR = _args.LearningRate == null; diff --git a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs index d577519167..0e4c2115b3 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs @@ -485,7 +485,7 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo Host.Check(src.PixelFormat == System.Drawing.Imaging.PixelFormat.Format32bppArgb); Host.Check(src.Height == height && src.Width == width); - var mutation = VBufferMutationContext.Create(ref dst, size); + var mutation = VBufferEditor.Create(ref dst, size); var values = mutation.Values; float offset = ex.Offset; diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs index d55f7bd5da..af95a34356 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs @@ -148,7 +148,7 @@ public ValueMapper GetMapper() { if (src.Length != _dimensionality) throw Host.Except($"Incorrect number of features: expected {_dimensionality}, got {src.Length}"); - var mutation = VBufferMutationContext.Create(ref dst, _k); + var mutation = VBufferEditor.Create(ref dst, _k); Map(in src, mutation.Values); dst = mutation.Commit(); }; diff --git a/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs index b6a4ad70f5..59b34229a3 100644 --- a/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs @@ -282,7 +282,7 @@ private Delegate MakeGetter(IRow input) var outputTensors = _parent.Model.Run(inputTensors); Contracts.Assert(outputTensors.Count() > 0); - var mutation = VBufferMutationContext.Create(ref dst, _outputColType.VectorSize); + var mutation = VBufferEditor.Create(ref dst, _outputColType.VectorSize); OnnxUtils.CopyTo(outputTensors[0], mutation.Values); dst = mutation.Commit(); }; diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 0492cbc708..5a7d8ee213 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -157,7 +157,7 @@ protected override float AccumulateOneGradient(in VBuffer feat, float lab VectorUtils.AddMultWithOffset(in feat, mult, ref grad, 1); // Note that 0th L-BFGS weight is for bias. // Add bias using this strange trick that has advantage of working well for dense and sparse arrays. // Due to the call to EnsureBiases, we know this region is dense. - var mutation = VBufferMutationContext.CreateFromBuffer(ref grad); + var mutation = VBufferEditor.CreateFromBuffer(ref grad); Contracts.Assert(mutation.Values.Length >= BiasCount && (grad.IsDense || mutation.Indices[BiasCount - 1] == BiasCount - 1)); mutation.Values[0] += mult; diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index d2273ca251..c7cb012c0e 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -218,7 +218,7 @@ protected override float AccumulateOneGradient(in VBuffer feat, float lab float mult = weight * (modelProb - probLabel); VectorUtils.AddMultWithOffset(in feat, mult, ref grad, start); // Due to the call to EnsureBiases, we know this region is dense. - var mutation = VBufferMutationContext.CreateFromBuffer(ref grad); + var mutation = VBufferEditor.CreateFromBuffer(ref grad); Contracts.Assert(mutation.Values.Length >= BiasCount && (grad.IsDense || mutation.Indices[BiasCount - 1] == BiasCount - 1)); mutation.Values[c] += mult; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs index e122e118c5..470113217b 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs @@ -380,7 +380,7 @@ private void Map(in VBuffer src, ref VBuffer dst) var srcValues = src.GetValues(); var srcIndices = src.GetIndices(); - var mutation = VBufferMutationContext.Create(ref dst, _labelCount); + var mutation = VBufferEditor.Create(ref dst, _labelCount); Span labelScores = mutation.Values; for (int iLabel = 0; iLabel < _labelCount; iLabel += 1) { diff --git a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs index 14608d37c0..94a1008e3c 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs @@ -138,7 +138,7 @@ protected override float AccumulateOneGradient(in VBuffer feat, float lab float mult = -(y - lambda) * weight; VectorUtils.AddMultWithOffset(in feat, mult, ref grad, 1); // Due to the call to EnsureBiases, we know this region is dense. - var mutation = VBufferMutationContext.CreateFromBuffer(ref grad); + var mutation = VBufferEditor.CreateFromBuffer(ref grad); Contracts.Assert(mutation.Values.Length >= BiasCount && (grad.IsDense || mutation.Indices[BiasCount - 1] == BiasCount - 1)); mutation.Values[0] += mult; // From the computer's perspective exp(infinity)==infinity diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index 1950925f9f..10ed5759b5 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -927,7 +927,7 @@ private Delegate MakeGetter(IRow input, int iinfo, ITensorValueGetter[] srcTe var tensor = outputCache.Outputs[_parent.Outputs[iinfo]]; var tensorSize = tensor.Shape.Where(x => x > 0).Aggregate((x, y) => x * y); - var mutation = VBufferMutationContext.Create(ref dst, (int)tensorSize); + var mutation = VBufferEditor.Create(ref dst, (int)tensorSize); TensorFlowUtils.FetchData(tensor.Data, mutation.Values); dst = mutation.Commit(); }; diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index 48b923bfcd..92a1c32391 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -595,10 +595,10 @@ private static void FillValues(IExceptionContext ectx, in VBuffer src, re if (normScale < MinScale) normScale = 1; - VBufferMutationContext mutation; + VBufferEditor mutation; if (offset == 0) { - mutation = VBufferMutationContext.Create(ref dst, length, count); + mutation = VBufferEditor.Create(ref dst, length, count); var dstValues = mutation.Values; if (!src.IsDense) { @@ -614,7 +614,7 @@ private static void FillValues(IExceptionContext ectx, in VBuffer src, re // Subtracting the mean requires a dense representation. src.CopyToDense(ref dst); - mutation = VBufferMutationContext.CreateFromBuffer(ref dst); + mutation = VBufferEditor.CreateFromBuffer(ref dst); if (normScale != 1) CpuMathUtils.ScaleAdd(normScale, -offset, mutation.Values); else diff --git a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs index 345ea9fd1a..bbe880b1c6 100644 --- a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs +++ b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs @@ -258,7 +258,7 @@ private void DropNAsAndDefaults(ref VBuffer src, ref VBuffer d int iDst = 0; // Densifying sparse vectors since default value equals NA and hence should be dropped. - var mutation = VBufferMutationContext.Create(ref dst, newCount); + var mutation = VBufferEditor.Create(ref dst, newCount); for (int i = 0; i < srcValues.Length; i++) { if (!isNA(in srcValues[i])) @@ -297,7 +297,7 @@ private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredi int iDst = 0; if (src.IsDense) { - var mutation = VBufferMutationContext.Create(ref dst, newCount); + var mutation = VBufferEditor.Create(ref dst, newCount); for (int i = 0; i < srcValues.Length; i++) { if (!isNA(in srcValues[i])) @@ -312,7 +312,7 @@ private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredi else { var newLength = src.Length - srcValues.Length - newCount; - var mutation = VBufferMutationContext.Create(ref dst, newLength, newCount); + var mutation = VBufferEditor.Create(ref dst, newLength, newCount); var srcIndices = src.GetIndices(); int offset = 0; diff --git a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs index 5d39d58afd..548c567534 100644 --- a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs @@ -280,7 +280,7 @@ private static void FillValues(Float input, ref VBuffer result) return; } - var mutation = VBufferMutationContext.Create(ref result, 2, 1); + var mutation = VBufferEditor.Create(ref result, 2, 1); if (Float.IsNaN(input)) { mutation.Values[0] = 1; @@ -302,7 +302,7 @@ private static void FillValues(IExceptionContext ectx, ref VBuffer buffer ectx.Check(0 <= size & size < int.MaxValue / 2); var values = buffer.GetValues(); - var mutation = VBufferMutationContext.Create(ref buffer, size * 2, values.Length); + var mutation = VBufferEditor.Create(ref buffer, size * 2, values.Length); int iivDst = 0; if (buffer.IsDense) { diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index 1fdf400818..1832ef7f8b 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -774,7 +774,7 @@ private static ValueMapper, VBuffer> CreateVectorMapper(this ValueMapper map, in VBuffer input, ref VBuffer output) { var inputValues = input.GetValues(); - var mutation = VBufferMutationContext.Create(ref output, input.Length, inputValues.Length); + var mutation = VBufferEditor.Create(ref output, input.Length, inputValues.Length); for (int i = 0; i < inputValues.Length; i++) { TSrc val = inputValues[i]; diff --git a/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs index 76d61e30e4..3075ecdd55 100644 --- a/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs @@ -468,7 +468,7 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) } } - var mutation = VBufferMutationContext.Create(ref dst, len); + var mutation = VBufferEditor.Create(ref dst, len); if (len > 0) { int index = 0; @@ -511,7 +511,7 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) if (_parent._useMarkerChars) len += TextMarkersCount; - var mutation = VBufferMutationContext.Create(ref dst, len); + var mutation = VBufferEditor.Create(ref dst, len); if (len > 0) { int index = 0; diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index 04d419940c..fac7a7a9e7 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -858,7 +858,7 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin return; } - VBufferMutationContext mutation; + VBufferEditor mutation; // Make sure all the frequencies are valid and truncate if the sum gets too large. int docSize = 0; int termNum = 0; @@ -870,7 +870,7 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin // REVIEW: Should this log a warning message? And what should it produce? // It currently produces a vbuffer of all NA values. // REVIEW: Need a utility method to do this... - mutation = VBufferMutationContext.Create(ref dst, len); + mutation = VBufferEditor.Create(ref dst, len); for (int k = 0; k < len; k++) mutation.Values[k] = Float.NaN; dst = mutation.Commit(); @@ -894,7 +894,7 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin int count = retTopics.Count; Contracts.Assert(count <= len); - mutation = VBufferMutationContext.Create(ref dst, len, count); + mutation = VBufferEditor.Create(ref dst, len, count); double normalizer = 0; for (int i = 0; i < count; i++) { diff --git a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs index e4c6b2c93e..d7867d16a3 100644 --- a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs @@ -583,7 +583,7 @@ private ValueGetter> GetGetterVec(IRow input, int iinfo) { int deno = 0; srcGetter(ref src); - var mutation = VBufferMutationContext.Create(ref dst, 3 * dimension); + var mutation = VBufferEditor.Create(ref dst, 3 * dimension); int offset = 2 * dimension; for (int i = 0; i < dimension; i++) { diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs index 38de9ce896..85cf3f2326 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs @@ -320,7 +320,7 @@ private ValueGetter>> MakeGetterVec(IRow input, int for (int i = 0; i < srcValues.Length; i++) AddTerms(srcValues[i], separators, terms); - var mutation = VBufferMutationContext.Create(ref dst, terms.Count); + var mutation = VBufferEditor.Create(ref dst, terms.Count); for (int i = 0; i < terms.Count; i++) mutation.Values[i] = terms[i]; dst = mutation.Commit(); diff --git a/src/Microsoft.ML.Transforms/VectorWhitening.cs b/src/Microsoft.ML.Transforms/VectorWhitening.cs index aadbcffed7..8927103422 100644 --- a/src/Microsoft.ML.Transforms/VectorWhitening.cs +++ b/src/Microsoft.ML.Transforms/VectorWhitening.cs @@ -729,7 +729,7 @@ private static void FillValues(float[] model, ref VBuffer src, ref VBuffe int length = src.Length; // Since the whitening process produces dense vector, always use dense representation of dst. - var mutation = VBufferMutationContext.Create(ref dst, cdst); + var mutation = VBufferEditor.Create(ref dst, cdst); if (src.IsDense) { Mkl.Gemv(Mkl.Layout.RowMajor, Mkl.Transpose.NoTrans, cdst, length, From e16ab689ff69e89e5b27bea04a560aa80ac2a2ac Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Thu, 15 Nov 2018 17:53:03 -0600 Subject: [PATCH 14/14] Rename mutation to editor. --- src/Microsoft.ML.Core/Data/VBuffer.cs | 46 ++-- .../Utilities/VBufferUtils.cs | 260 +++++++++--------- src/Microsoft.ML.Data/Data/BufferBuilder.cs | 14 +- .../DataLoadSave/Binary/Codecs.cs | 14 +- .../DataLoadSave/Text/TextLoaderParser.cs | 10 +- src/Microsoft.ML.Data/DataView/Transposer.cs | 18 +- .../Depricated/Vector/VBufferMathUtils.cs | 64 ++--- .../Depricated/Vector/VectorUtils.cs | 4 +- .../Evaluators/EvaluatorUtils.cs | 14 +- .../Transforms/HashTransform.cs | 38 +-- .../Transforms/KeyToVectorTransform.cs | 12 +- .../Utilities/SlotDropper.cs | 18 +- src/Microsoft.ML.Ensemble/EnsembleUtils.cs | 22 +- .../OutputCombiners/BaseMultiAverager.cs | 8 +- .../OutputCombiners/BaseMultiCombiner.cs | 6 +- .../OutputCombiners/BaseScalarStacking.cs | 6 +- .../OutputCombiners/MultiMedian.cs | 6 +- .../OutputCombiners/MultiStacking.cs | 6 +- .../OutputCombiners/MultiVoting.cs | 12 +- src/Microsoft.ML.FastTree/FastTree.cs | 8 +- .../OlsLinearRegression.cs | 6 +- .../SymSgdClassificationTrainer.cs | 8 +- .../ImagePixelExtractorTransform.cs | 10 +- .../KMeansPredictor.cs | 6 +- .../OnnxTransform.cs | 6 +- .../LogisticRegression/LogisticRegression.cs | 6 +- .../MulticlassLogisticRegression.cs | 6 +- .../MultiClass/MultiClassNaiveBayesTrainer.cs | 6 +- .../PoissonRegression/PoissonRegression.cs | 6 +- .../TensorflowTransform.cs | 6 +- src/Microsoft.ML.Transforms/GcnTransform.cs | 16 +- .../MissingValueDroppingTransformer.cs | 20 +- .../MissingValueIndicatorTransform.cs | 32 +-- .../MutualInformationFeatureSelection.cs | 8 +- .../Text/CharTokenizeTransform.cs | 22 +- .../Text/LdaTransform.cs | 20 +- .../Text/WordEmbeddingsTransform.cs | 22 +- .../Text/WordTokenizeTransform.cs | 6 +- .../VectorWhitening.cs | 8 +- 39 files changed, 403 insertions(+), 403 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/VBuffer.cs b/src/Microsoft.ML.Core/Data/VBuffer.cs index adaa958663..217afa746c 100644 --- a/src/Microsoft.ML.Core/Data/VBuffer.cs +++ b/src/Microsoft.ML.Core/Data/VBuffer.cs @@ -123,14 +123,14 @@ public VBuffer(int length, int count, T[] values, int[] indices) /// public void CopyToDense(ref VBuffer dst) { - // create a dense mutation context - var mutation = VBufferEditor.Create(ref dst, Length, Length); + // create a dense editor + var editor = VBufferEditor.Create(ref dst, Length); if (!IsDense) - CopyTo(mutation.Values); + CopyTo(editor.Values); else if (Length > 0) - _values.AsSpan(0, Length).CopyTo(mutation.Values); - dst = mutation.Commit(); + _values.AsSpan(0, Length).CopyTo(editor.Values); + dst = editor.Commit(); } /// @@ -138,24 +138,24 @@ public void CopyToDense(ref VBuffer dst) /// public void CopyTo(ref VBuffer dst) { - var mutation = VBufferEditor.Create(ref dst, Length, _count); + var editor = VBufferEditor.Create(ref dst, Length, _count); if (IsDense) { if (Length > 0) { - _values.AsSpan(0, Length).CopyTo(mutation.Values); + _values.AsSpan(0, Length).CopyTo(editor.Values); } - dst = mutation.Commit(); + dst = editor.Commit(); Contracts.Assert(dst.IsDense); } else { if (_count > 0) { - _values.AsSpan(0, _count).CopyTo(mutation.Values); - _indices.AsSpan(0, _count).CopyTo(mutation.Indices); + _values.AsSpan(0, _count).CopyTo(editor.Values); + _indices.AsSpan(0, _count).CopyTo(editor.Indices); } - dst = mutation.Commit(); + dst = editor.Commit(); } } @@ -169,12 +169,12 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) if (IsDense) { - var mutation = VBufferEditor.Create(ref dst, length, length); + var editor = VBufferEditor.Create(ref dst, length, length); if (length > 0) { - _values.AsSpan(srcMin, length).CopyTo(mutation.Values); + _values.AsSpan(srcMin, length).CopyTo(editor.Values); } - dst = mutation.Commit(); + dst = editor.Commit(); Contracts.Assert(dst.IsDense); } else @@ -186,22 +186,22 @@ public void CopyTo(ref VBuffer dst, int srcMin, int length) int copyLim = _indices.FindIndexSorted(copyMin, _count, srcMin + length); Contracts.Assert(copyMin <= copyLim); copyCount = copyLim - copyMin; - var mutation = VBufferEditor.Create(ref dst, length, copyCount); + var editor = VBufferEditor.Create(ref dst, length, copyCount); if (copyCount > 0) { - _values.AsSpan(copyMin, copyCount).CopyTo(mutation.Values); + _values.AsSpan(copyMin, copyCount).CopyTo(editor.Values); if (copyCount < length) { for (int i = 0; i < copyCount; ++i) - mutation.Indices[i] = _indices[i + copyMin] - srcMin; + editor.Indices[i] = _indices[i + copyMin] - srcMin; } } - dst = mutation.Commit(); + dst = editor.Commit(); } else { - var mutation = VBufferEditor.Create(ref dst, length, copyCount); - dst = mutation.Commit(); + var editor = VBufferEditor.Create(ref dst, length, copyCount); + dst = editor.Commit(); } } } @@ -253,12 +253,12 @@ public static void Copy(T[] src, int srcIndex, ref VBuffer dst, int length) { Contracts.CheckParam(0 <= length && length <= Utils.Size(src), nameof(length)); Contracts.CheckParam(0 <= srcIndex && srcIndex <= Utils.Size(src) - length, nameof(srcIndex)); - var mutation = VBufferEditor.Create(ref dst, length, length); + var editor = VBufferEditor.Create(ref dst, length, length); if (length > 0) { - src.AsSpan(srcIndex, length).CopyTo(mutation.Values); + src.AsSpan(srcIndex, length).CopyTo(editor.Values); } - dst = mutation.Commit(); + dst = editor.Commit(); } public IEnumerable> Items(bool all = false) diff --git a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs index f056a0c5a8..f730d61724 100644 --- a/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs +++ b/src/Microsoft.ML.Core/Utilities/VBufferUtils.cs @@ -314,8 +314,8 @@ public static void ForEachEitherDefined(in VBuffer a, in VBuffer b, Act /// public static void Clear(ref VBuffer dst) { - var mutation = VBufferEditor.CreateFromBuffer(ref dst); - mutation.Values.Clear(); + var editor = VBufferEditor.CreateFromBuffer(ref dst); + editor.Values.Clear(); } // REVIEW: Look into removing slot in this and other manipulators, so that we @@ -343,17 +343,17 @@ public static void Apply(ref VBuffer dst, SlotValueManipulator manip) { Contracts.CheckValue(manip, nameof(manip)); - var mutation = VBufferEditor.CreateFromBuffer(ref dst); + var editor = VBufferEditor.CreateFromBuffer(ref dst); if (dst.IsDense) { - for (int i = 0; i < mutation.Values.Length; i++) - manip(i, ref mutation.Values[i]); + for (int i = 0; i < editor.Values.Length; i++) + manip(i, ref editor.Values[i]); } else { var dstIndices = dst.GetIndices(); - for (int i = 0; i < mutation.Values.Length; i++) - manip(dstIndices[i], ref mutation.Values[i]); + for (int i = 0; i < editor.Values.Length; i++) + manip(dstIndices[i], ref editor.Values[i]); } } @@ -377,19 +377,19 @@ public static void ApplyAt(ref VBuffer dst, int slot, SlotValueManipulator Contracts.CheckValue(manip, nameof(manip)); Contracts.CheckValueOrNull(pred); - var mutation = VBufferEditor.CreateFromBuffer(ref dst); - int dstValuesCount = mutation.Values.Length; + var editor = VBufferEditor.CreateFromBuffer(ref dst); + int dstValuesCount = editor.Values.Length; if (dst.IsDense) { // The vector is dense, so we can just do a direct access. - manip(slot, ref mutation.Values[slot]); + manip(slot, ref editor.Values[slot]); return; } int idx = 0; - if (dstValuesCount > 0 && Utils.TryFindIndexSorted(mutation.Indices, 0, dstValuesCount, slot, out idx)) + if (dstValuesCount > 0 && Utils.TryFindIndexSorted(editor.Indices, 0, dstValuesCount, slot, out idx)) { // Vector is sparse, but the item exists so we can access it. - manip(slot, ref mutation.Values[idx]); + manip(slot, ref editor.Values[idx]); return; } // The vector is sparse and there is no corresponding item, yet. @@ -405,19 +405,19 @@ public static void ApplyAt(ref VBuffer dst, int slot, SlotValueManipulator // we are modifying in the sparse vector, in which case the vector becomes // dense. Then there is no need to do anything with indices. bool needIndices = dstValuesCount + 1 < dst.Length; - mutation = VBufferEditor.Create(ref dst, dst.Length, dstValuesCount + 1); + editor = VBufferEditor.Create(ref dst, dst.Length, dstValuesCount + 1); if (idx != dstValuesCount) { // We have to do some sort of shift copy. int sliceLength = dstValuesCount - idx; if (needIndices) - mutation.Indices.Slice(idx, sliceLength).CopyTo(mutation.Indices.Slice(idx + 1)); - mutation.Values.Slice(idx, sliceLength).CopyTo(mutation.Values.Slice(idx + 1)); + editor.Indices.Slice(idx, sliceLength).CopyTo(editor.Indices.Slice(idx + 1)); + editor.Values.Slice(idx, sliceLength).CopyTo(editor.Values.Slice(idx + 1)); } if (needIndices) - mutation.Indices[idx] = slot; - mutation.Values[idx] = value; - dst = mutation.Commit(); + editor.Indices[idx] = slot; + editor.Values[idx] = value; + dst = editor.Commit(); } /// @@ -430,38 +430,38 @@ public static void Densify(ref VBuffer dst) var indices = dst.GetIndices(); var values = dst.GetValues(); - var mutation = VBufferEditor.Create( + var editor = VBufferEditor.Create( ref dst, dst.Length); - if (!mutation.CreatedNewValues) + if (!editor.CreatedNewValues) { // Densify in place. for (int i = values.Length; --i >= 0; ) { Contracts.Assert(i <= indices[i]); - mutation.Values[indices[i]] = values[i]; + editor.Values[indices[i]] = values[i]; } if (values.Length == 0) - mutation.Values.Clear(); + editor.Values.Clear(); else { int min = 0; for (int ii = 0; ii < values.Length; ++ii) { - mutation.Values.Slice(min, indices[ii] - min).Clear(); + editor.Values.Slice(min, indices[ii] - min).Clear(); min = indices[ii] + 1; } - mutation.Values.Slice(min, dst.Length - min).Clear(); + editor.Values.Slice(min, dst.Length - min).Clear(); } } else { - // createdNewValues is true, keepOldOnResize is false, so mutation.Values is already cleared + // createdNewValues is true, keepOldOnResize is false, so Values is already cleared for (int i = 0; i < values.Length; ++i) - mutation.Values[indices[i]] = values[i]; + editor.Values[indices[i]] = values[i]; } - dst = mutation.Commit(); + dst = editor.Commit(); } /// @@ -485,10 +485,10 @@ public static void DensifyFirst(ref VBuffer dst, int denseCount) if (dstIndices.IsEmpty) { // no previous values - var newIndicesMutation = VBufferEditor.Create(ref dst, dst.Length, denseCount); - Utils.FillIdentity(newIndicesMutation.Indices, denseCount); - newIndicesMutation.Values.Clear(); - dst = newIndicesMutation.Commit(); + var newIndicesEditor = VBufferEditor.Create(ref dst, dst.Length, denseCount); + Utils.FillIdentity(newIndicesEditor.Indices, denseCount); + newIndicesEditor.Values.Clear(); + dst = newIndicesEditor.Commit(); return; } int lim = Utils.FindIndexSorted(dstIndices, 0, dstValues.Length, denseCount); @@ -500,17 +500,17 @@ public static void DensifyFirst(ref VBuffer dst, int denseCount) return; } - var mutation = VBufferEditor.Create(ref dst, dst.Length, newLen, keepOldOnResize: true); + var editor = VBufferEditor.Create(ref dst, dst.Length, newLen, keepOldOnResize: true); int sliceLength = dstValues.Length - lim; - mutation.Values.Slice(lim, sliceLength).CopyTo(mutation.Values.Slice(denseCount)); - mutation.Indices.Slice(lim, sliceLength).CopyTo(mutation.Indices.Slice(denseCount)); + editor.Values.Slice(lim, sliceLength).CopyTo(editor.Values.Slice(denseCount)); + editor.Indices.Slice(lim, sliceLength).CopyTo(editor.Indices.Slice(denseCount)); int i = lim - 1; for (int ii = denseCount; --ii >= 0; ) { - mutation.Values[ii] = i >= 0 && dstIndices[i] == ii ? dstValues[i--] : default(T); - mutation.Indices[ii] = ii; + editor.Values[ii] = i >= 0 && dstIndices[i] == ii ? dstValues[i--] : default(T); + editor.Indices[ii] = ii; } - dst = mutation.Commit(); + dst = editor.Commit(); } /// @@ -541,7 +541,7 @@ public static void CreateMaybeSparseCopy(in VBuffer src, ref VBuffer ds } } - var mutation = VBufferEditor.Create(ref dst, src.Length, sparseCount); + var editor = VBufferEditor.Create(ref dst, src.Length, sparseCount); if (sparseCount > 0) { int j = 0; @@ -550,8 +550,8 @@ public static void CreateMaybeSparseCopy(in VBuffer src, ref VBuffer ds if (!isDefaultPredicate(in srcValues[i])) { Contracts.Assert(j < sparseCount); - mutation.Indices[j] = i; - mutation.Values[j] = srcValues[i]; + editor.Indices[j] = i; + editor.Values[j] = srcValues[i]; j++; } } @@ -559,7 +559,7 @@ public static void CreateMaybeSparseCopy(in VBuffer src, ref VBuffer ds Contracts.Assert(j == sparseCount); } - dst = mutation.Commit(); + dst = editor.Commit(); } /// @@ -692,7 +692,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< var srcValues = src.GetValues(); var dstValues = dst.GetValues(); var dstIndices = dst.GetIndices(); - var mutation = VBufferEditor.CreateFromBuffer(ref dst); + var editor = VBufferEditor.CreateFromBuffer(ref dst); if (srcValues.Length == 0) { // Major case 1, with srcValues.Length == 0. @@ -701,12 +701,12 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< if (dst.IsDense) { for (int i = 0; i < dst.Length; i++) - manip(i, default(TSrc), ref mutation.Values[i]); + manip(i, default(TSrc), ref editor.Values[i]); } else { for (int i = 0; i < dstValues.Length; i++) - manip(dstIndices[i], default(TSrc), ref mutation.Values[i]); + manip(dstIndices[i], default(TSrc), ref editor.Values[i]); } return; } @@ -717,12 +717,12 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< if (!dst.IsDense) { Densify(ref dst); - mutation = VBufferEditor.CreateFromBuffer(ref dst); + editor = VBufferEditor.CreateFromBuffer(ref dst); } // Both are now dense. Both cases of outer are covered. for (int i = 0; i < srcValues.Length; i++) - manip(i, srcValues[i], ref mutation.Values[i]); + manip(i, srcValues[i], ref editor.Values[i]); return; } @@ -738,17 +738,17 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< { if (i == sIndex) { - manip(i, srcValues[sI], ref mutation.Values[i]); + manip(i, srcValues[sI], ref editor.Values[i]); sIndex = ++sI == srcValues.Length ? src.Length : srcIndices[sI]; } else - manip(i, default(TSrc), ref mutation.Values[i]); + manip(i, default(TSrc), ref editor.Values[i]); } } else { for (int i = 0; i < srcValues.Length; i++) - manip(srcIndices[i], srcValues[i], ref mutation.Values[srcIndices[i]]); + manip(srcIndices[i], srcValues[i], ref editor.Values[srcIndices[i]]); } return; } @@ -757,14 +757,14 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< { // Major case 4, with dst empty. Note that !src.Dense. // Neither is dense, and dst is empty. Both cases of outer are covered. - mutation = VBufferEditor.Create(ref dst, + editor = VBufferEditor.Create(ref dst, src.Length, srcValues.Length, maxValuesCapacity: src.Length); - mutation.Values.Clear(); + editor.Values.Clear(); for (int i = 0; i < srcValues.Length; i++) - manip(mutation.Indices[i] = srcIndices[i], srcValues[i], ref mutation.Values[i]); - dst = mutation.Commit(); + manip(editor.Indices[i] = srcIndices[i], srcValues[i], ref editor.Values[i]); + dst = editor.Commit(); return; } @@ -815,12 +815,12 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // proved to be inefficient so we go to the little bit of extra work // to handle it here. - mutation = VBufferEditor.Create(ref dst, + editor = VBufferEditor.Create(ref dst, src.Length, newCount, maxValuesCapacity: dst.Length); - var indices = mutation.Indices; - var values = mutation.Values; + var indices = editor.Indices; + var values = editor.Values; int sI = srcValues.Length - 1; dI = dstValues.Length - 1; int sIndex = srcIndices[sI]; @@ -857,7 +857,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< dIndex = --dI >= 0 ? dstIndices[dI] : -1; } } - dst = mutation.Commit(); + dst = editor.Commit(); return; } @@ -870,7 +870,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< for (int i = 0; i < srcValues.Length; i++) { Contracts.Assert(srcIndices[i] == dstIndices[i]); - manip(srcIndices[i], srcValues[i], ref mutation.Values[i]); + manip(srcIndices[i], srcValues[i], ref editor.Values[i]); } return; } @@ -885,11 +885,11 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< { if (dstIndices[i] == sIndex) { - manip(sIndex, srcValues[sI], ref mutation.Values[i]); + manip(sIndex, srcValues[sI], ref editor.Values[i]); sIndex = ++sI == srcValues.Length ? src.Length : srcIndices[sI]; } else - manip(dstIndices[i], default(TSrc), ref mutation.Values[i]); + manip(dstIndices[i], default(TSrc), ref editor.Values[i]); } } else @@ -900,7 +900,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< while (dstIndices[dI] < sIndex) dI++; Contracts.Assert(dstIndices[dI] == sIndex); - manip(sIndex, srcValues[sI], ref mutation.Values[dI++]); + manip(sIndex, srcValues[sI], ref editor.Values[dI++]); } } return; @@ -912,7 +912,7 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< // First do a "quasi" densification of dst, by making the indices // of dst correspond to those in src. - mutation = VBufferEditor.Create(ref dst, newCount, dstValues.Length); + editor = VBufferEditor.Create(ref dst, newCount, dstValues.Length); int sI = 0; for (dI = 0; dI < dstValues.Length; ++dI) { @@ -920,19 +920,19 @@ private static void ApplyWithCore(in VBuffer src, ref VBuffer< while (srcIndices[sI] < bIndex) sI++; Contracts.Assert(srcIndices[sI] == bIndex); - mutation.Indices[dI] = sI++; + editor.Indices[dI] = sI++; } - dst = mutation.Commit(); + dst = editor.Commit(); Densify(ref dst); - mutation = VBufferEditor.Create(ref dst, + editor = VBufferEditor.Create(ref dst, src.Length, newCount, maxValuesCapacity: src.Length); - srcIndices.CopyTo(mutation.Indices); + srcIndices.CopyTo(editor.Indices); for (sI = 0; sI < srcValues.Length; sI++) - manip(srcIndices[sI], srcValues[sI], ref mutation.Values[sI]); - dst = mutation.Commit(); + manip(srcIndices[sI], srcValues[sI], ref editor.Values[sI]); + dst = editor.Commit(); return; } @@ -964,53 +964,53 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf else if (src.IsDense) { Contracts.Assert(srcValues.Length == src.Length); - var mutation = VBufferEditor.Create(ref res, length); + var editor = VBufferEditor.Create(ref res, length); for (int i = 0; i < length; i++) - manip(i, srcValues[i], default(TDst), ref mutation.Values[i]); - res = mutation.Commit(); + manip(i, srcValues[i], default(TDst), ref editor.Values[i]); + res = editor.Commit(); } else { // src is non-empty sparse. int count = srcValues.Length; Contracts.Assert(0 < count && count < length); - var mutation = VBufferEditor.Create(ref res, length, count); + var editor = VBufferEditor.Create(ref res, length, count); var srcIndices = src.GetIndices(); - srcIndices.CopyTo(mutation.Indices); + srcIndices.CopyTo(editor.Indices); for (int ii = 0; ii < count; ii++) { int i = srcIndices[ii]; - mutation.Indices[ii] = i; - manip(i, srcValues[ii], default(TDst), ref mutation.Values[ii]); + editor.Indices[ii] = i; + manip(i, srcValues[ii], default(TDst), ref editor.Values[ii]); } - res = mutation.Commit(); + res = editor.Commit(); } } else if (dst.IsDense) { - var mutation = VBufferEditor.Create(ref res, length); + var editor = VBufferEditor.Create(ref res, length); if (srcValues.Length == 0) { if (outer) { // Apply manip to all slots, as all slots of dst are defined. for (int j = 0; j < length; j++) - manip(j, default(TSrc), dstValues[j], ref mutation.Values[j]); + manip(j, default(TSrc), dstValues[j], ref editor.Values[j]); } else { // Copy only. No slot of src is defined. for (int j = 0; j < length; j++) - mutation.Values[j] = dstValues[j]; + editor.Values[j] = dstValues[j]; } - res = mutation.Commit(); + res = editor.Commit(); } else if (src.IsDense) { Contracts.Assert(srcValues.Length == src.Length); for (int i = 0; i < length; i++) - manip(i, srcValues[i], dstValues[i], ref mutation.Values[i]); - res = mutation.Commit(); + manip(i, srcValues[i], dstValues[i], ref editor.Values[i]); + res = editor.Commit(); } else { @@ -1028,11 +1028,11 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf { if (j == i) { - manip(j, srcValues[ii], dstValues[j], ref mutation.Values[j]); + manip(j, srcValues[ii], dstValues[j], ref editor.Values[j]); i = ++ii == count ? length : srcIndices[ii]; } else - manip(j, default(TSrc), dstValues[j], ref mutation.Values[j]); + manip(j, default(TSrc), dstValues[j], ref editor.Values[j]); } } else @@ -1042,14 +1042,14 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf { if (j == i) { - manip(j, srcValues[ii], dstValues[j], ref mutation.Values[j]); + manip(j, srcValues[ii], dstValues[j], ref editor.Values[j]); i = ++ii == count ? length : srcIndices[ii]; } else - mutation.Values[j] = dstValues[j]; + editor.Values[j] = dstValues[j]; } } - res = mutation.Commit(); + res = editor.Commit(); } } else @@ -1060,43 +1060,43 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf Contracts.Assert(dstCount > 0); if (srcValues.Length == 0) { - var mutation = VBufferEditor.Create(ref res, length, dstCount); + var editor = VBufferEditor.Create(ref res, length, dstCount); if (outer) { for (int jj = 0; jj < dstCount; jj++) { int j = dstIndices[jj]; - mutation.Indices[jj] = j; - manip(j, default(TSrc), dstValues[jj], ref mutation.Values[jj]); + editor.Indices[jj] = j; + manip(j, default(TSrc), dstValues[jj], ref editor.Values[jj]); } } else { for (int jj = 0; jj < dstCount; jj++) { - mutation.Indices[jj] = dstIndices[jj]; - mutation.Values[jj] = dstValues[jj]; + editor.Indices[jj] = dstIndices[jj]; + editor.Values[jj] = dstValues[jj]; } } - res = mutation.Commit(); + res = editor.Commit(); } else if (src.IsDense) { // res will be dense. - var mutation = VBufferEditor.Create(ref res, length); + var editor = VBufferEditor.Create(ref res, length); int jj = 0; int j = dstIndices[jj]; for (int i = 0; i < length; i++) { if (i == j) { - manip(i, srcValues[i], dstValues[jj], ref mutation.Values[i]); + manip(i, srcValues[i], dstValues[jj], ref editor.Values[i]); j = ++jj == dstCount ? length : dstIndices[jj]; } else - manip(i, srcValues[i], default(TDst), ref mutation.Values[i]); + manip(i, srcValues[i], default(TDst), ref editor.Values[i]); } - res = mutation.Commit(); + res = editor.Commit(); } else { @@ -1137,7 +1137,7 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf } else { - var mutation = VBufferEditor.Create(ref res, length, resCount); + var editor = VBufferEditor.Create(ref res, length, resCount); int ii = 0; int i = srcIndices[ii]; @@ -1150,35 +1150,35 @@ private static void ApplyWithCoreCopy(in VBuffer src, ref VBuf if (i == j) { // Slot (i == j) both defined in src and dst. Apply manip. - mutation.Indices[kk] = i; - manip(i, srcValues[ii], dstValues[jj], ref mutation.Values[kk]); + editor.Indices[kk] = i; + manip(i, srcValues[ii], dstValues[jj], ref editor.Values[kk]); i = ++ii == srcValues.Length ? length : srcIndices[ii]; j = ++jj == dstCount ? length : dstIndices[jj]; } else if (i < j) { // Slot i defined only in src, but not in dst. Apply manip. - mutation.Indices[kk] = i; - manip(i, srcValues[ii], default(TDst), ref mutation.Values[kk]); + editor.Indices[kk] = i; + manip(i, srcValues[ii], default(TDst), ref editor.Values[kk]); i = ++ii == srcValues.Length ? length : srcIndices[ii]; } else { // Slot j defined only in dst, but not in src. Apply manip if outer. // Otherwise just copy. - mutation.Indices[kk] = j; + editor.Indices[kk] = j; // REVIEW: Should we move checking of outer outside the loop? if (outer) - manip(j, default(TSrc), dstValues[jj], ref mutation.Values[kk]); + manip(j, default(TSrc), dstValues[jj], ref editor.Values[kk]); else - mutation.Values[kk] = dstValues[jj]; + editor.Values[kk] = dstValues[jj]; j = ++jj == dstCount ? length : dstIndices[jj]; } } Contracts.Assert(ii == srcValues.Length && jj == dstCount); Contracts.Assert(i == length && j == length); - res = mutation.Commit(); + res = editor.Commit(); } } } @@ -1207,11 +1207,11 @@ public static void ApplyIntoEitherDefined(in VBuffer src, ref Resize(ref dst, src.Length, 0); return; } - var mutation = VBufferEditor.Create(ref dst, + var editor = VBufferEditor.Create(ref dst, src.Length, srcValues.Length, maxValuesCapacity: src.Length); - Span values = mutation.Values; + Span values = editor.Values; if (src.IsDense) { for (int i = 0; i < src.Length; ++i) @@ -1219,13 +1219,13 @@ public static void ApplyIntoEitherDefined(in VBuffer src, ref } else { - Span indices = mutation.Indices; + Span indices = editor.Indices; var srcIndices = src.GetIndices(); srcIndices.CopyTo(indices); for (int i = 0; i < srcValues.Length; ++i) values[i] = func(srcIndices[i], srcValues[i]); } - dst = mutation.Commit(); + dst = editor.Commit(); } /// @@ -1265,11 +1265,11 @@ public static void ApplyInto(in VBuffer a, in VBuffer int bI = 0; ReadOnlySpan aIndices; ReadOnlySpan bIndices; - VBufferEditor mutation; + VBufferEditor editor; if (a.IsDense || b.IsDense) { // Case 2. One of the two inputs is dense. The output will be dense. - mutation = VBufferEditor.Create(ref dst, a.Length); + editor = VBufferEditor.Create(ref dst, a.Length); if (!a.IsDense) { // a is sparse, b is dense @@ -1277,7 +1277,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer for (int i = 0; i < b.Length; i++) { TSrc1 aVal = (aI < aIndices.Length && i == aIndices[aI]) ? aValues[aI++] : default(TSrc1); - mutation.Values[i] = func(i, aVal, bValues[i]); + editor.Values[i] = func(i, aVal, bValues[i]); } } else if (!b.IsDense) @@ -1287,16 +1287,16 @@ public static void ApplyInto(in VBuffer a, in VBuffer for (int i = 0; i < a.Length; i++) { TSrc2 bVal = (bI < bIndices.Length && i == bIndices[bI]) ? bValues[bI++] : default(TSrc2); - mutation.Values[i] = func(i, aValues[i], bVal); + editor.Values[i] = func(i, aValues[i], bVal); } } else { // both dense for (int i = 0; i < a.Length; i++) - mutation.Values[i] = func(i, aValues[i], bValues[i]); + editor.Values[i] = func(i, aValues[i], bValues[i]); } - dst = mutation.Commit(); + dst = editor.Commit(); return; } @@ -1322,8 +1322,8 @@ public static void ApplyInto(in VBuffer a, in VBuffer // REVIEW: Worth optimizing the newCount == a.Length case? // Probably not... - mutation = VBufferEditor.Create(ref dst, a.Length, newCount); - Span indices = mutation.Indices; + editor = VBufferEditor.Create(ref dst, a.Length, newCount); + Span indices = editor.Indices; if (newCount == bValues.Length) { @@ -1334,7 +1334,7 @@ public static void ApplyInto(in VBuffer a, in VBuffer for (aI = 0; aI < aValues.Length; aI++) { Contracts.Assert(aIndices[aI] == bIndices[aI]); - mutation.Values[aI] = func(aIndices[aI], aValues[aI], bValues[aI]); + editor.Values[aI] = func(aIndices[aI], aValues[aI], bValues[aI]); } } else @@ -1346,10 +1346,10 @@ public static void ApplyInto(in VBuffer a, in VBuffer { Contracts.Assert(aIndices[aI] >= bIndices[bI]); TSrc1 aVal = aIndices[aI] == bIndices[bI] ? aValues[aI++] : default(TSrc1); - mutation.Values[bI] = func(bIndices[bI], aVal, bValues[bI]); + editor.Values[bI] = func(bIndices[bI], aVal, bValues[bI]); } for (; bI < bValues.Length; bI++) - mutation.Values[bI] = func(bIndices[bI], default(TSrc1), bValues[bI]); + editor.Values[bI] = func(bIndices[bI], default(TSrc1), bValues[bI]); } } else if (newCount == aValues.Length) @@ -1361,10 +1361,10 @@ public static void ApplyInto(in VBuffer a, in VBuffer { Contracts.Assert(bIndices[bI] >= aIndices[aI]); TSrc2 bVal = aIndices[aI] == bIndices[bI] ? bValues[bI++] : default(TSrc2); - mutation.Values[aI] = func(aIndices[aI], aValues[aI], bVal); + editor.Values[aI] = func(aIndices[aI], aValues[aI], bVal); } for (; aI < aValues.Length; aI++) - mutation.Values[aI] = func(aIndices[aI], aValues[aI], default(TSrc2)); + editor.Values[aI] = func(aIndices[aI], aValues[aI], default(TSrc2)); } else { @@ -1396,25 +1396,25 @@ public static void ApplyInto(in VBuffer a, in VBuffer aVal = aValues[aI++]; bVal = bValues[bI++]; } - mutation.Values[newI] = func(index, aVal, bVal); + editor.Values[newI] = func(index, aVal, bVal); indices[newI++] = index; } for (; aI < aIndices.Length; aI++) { int index = aIndices[aI]; - mutation.Values[newI] = func(index, aValues[aI], default(TSrc2)); + editor.Values[newI] = func(index, aValues[aI], default(TSrc2)); indices[newI++] = index; } for (; bI < bIndices.Length; bI++) { int index = bIndices[bI]; - mutation.Values[newI] = func(index, default(TSrc1), bValues[bI]); + editor.Values[newI] = func(index, default(TSrc1), bValues[bI]); indices[newI++] = index; } } - dst = mutation.Commit(); + dst = editor.Commit(); } /// @@ -1423,16 +1423,16 @@ public static void ApplyInto(in VBuffer a, in VBuffer public static void Copy(List src, ref VBuffer dst, int length) { Contracts.CheckParam(0 <= length && length <= Utils.Size(src), nameof(length)); - var mutation = VBufferEditor.Create(ref dst, length); + var editor = VBufferEditor.Create(ref dst, length); if (length > 0) { // List.CopyTo should have an overload for Span - https://github.com/dotnet/corefx/issues/33006 for (int i = 0; i < length; i++) { - mutation.Values[i] = src[i]; + editor.Values[i] = src[i]; } } - dst = mutation.Commit(); + dst = editor.Commit(); } /// diff --git a/src/Microsoft.ML.Data/Data/BufferBuilder.cs b/src/Microsoft.ML.Data/Data/BufferBuilder.cs index 6b1003777e..2f37f4ea81 100644 --- a/src/Microsoft.ML.Data/Data/BufferBuilder.cs +++ b/src/Microsoft.ML.Data/Data/BufferBuilder.cs @@ -448,17 +448,17 @@ public void GetResult(ref VBuffer buffer) if (_dense) { - var mutation = VBufferEditor.Create(ref buffer, _length); - _values.AsSpan(0, _length).CopyTo(mutation.Values); - buffer = mutation.Commit(); + var editor = VBufferEditor.Create(ref buffer, _length); + _values.AsSpan(0, _length).CopyTo(editor.Values); + buffer = editor.Commit(); } else { Contracts.Assert(_count < _length); - var mutation = VBufferEditor.Create(ref buffer, _length, _count); - _values.AsSpan(0, _count).CopyTo(mutation.Values); - _indices.AsSpan(0, _count).CopyTo(mutation.Indices); - buffer = mutation.Commit(); + var editor = VBufferEditor.Create(ref buffer, _length, _count); + _values.AsSpan(0, _count).CopyTo(editor.Values); + _indices.AsSpan(0, _count).CopyTo(editor.Indices); + buffer = editor.Commit(); } } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs index f062c61d98..29fa46c1a0 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs @@ -1112,26 +1112,26 @@ public override void Get(ref VBuffer value) if (count < 0) { // dense - var mutation = VBufferEditor.Create(ref value, length); + var editor = VBufferEditor.Create(ref value, length); if (length > 0) { _values.AsSpan(_valuesOffset, length) - .CopyTo(mutation.Values); + .CopyTo(editor.Values); } - value = mutation.Commit(); + value = editor.Commit(); } else { // sparse - var mutation = VBufferEditor.Create(ref value, length, count); + var editor = VBufferEditor.Create(ref value, length, count); if (count > 0) { _values.AsSpan(_valuesOffset, count) - .CopyTo(mutation.Values); + .CopyTo(editor.Values); _indices.AsSpan(_indicesOffset, count) - .CopyTo(mutation.Indices); + .CopyTo(editor.Indices); } - value = mutation.Commit(); + value = editor.Commit(); } } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs index 023986aaf7..cdcb507f51 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderParser.cs @@ -407,16 +407,16 @@ public void Get(ref VBuffer dst) return; } - var mutation = VBufferEditor.Create(ref dst, _size, _count); - _values.AsSpan(0, _count).CopyTo(mutation.Values); + var editor = VBufferEditor.Create(ref dst, _size, _count); + _values.AsSpan(0, _count).CopyTo(editor.Values); if (_count == _size) { - dst = mutation.Commit(); + dst = editor.Commit(); return; } - _indices.AsSpan(0, _count).CopyTo(mutation.Indices); - dst = mutation.Commit(); + _indices.AsSpan(0, _count).CopyTo(editor.Indices); + dst = editor.Commit(); } } diff --git a/src/Microsoft.ML.Data/DataView/Transposer.cs b/src/Microsoft.ML.Data/DataView/Transposer.cs index 4f4c0d4ab5..df0d772a07 100644 --- a/src/Microsoft.ML.Data/DataView/Transposer.cs +++ b/src/Microsoft.ML.Data/DataView/Transposer.cs @@ -1294,12 +1294,12 @@ private ValueGetter> CreateGetter(int col) (ref VBuffer value) => { EnsureValid(); - VBufferEditor mutation; + VBufferEditor editor; if (_inputValue.IsDense) { - mutation = VBufferEditor.Create(ref value, len); - _inputValue.GetValues().Slice(min, len).CopyTo(mutation.Values); - value = mutation.Commit(); + editor = VBufferEditor.Create(ref value, len); + _inputValue.GetValues().Slice(min, len).CopyTo(editor.Values); + value = editor.Commit(); return; } // In the sparse case we have ranges on Indices/Values to consider. @@ -1312,20 +1312,20 @@ private ValueGetter> CreateGetter(int col) return; } - mutation = VBufferEditor.Create(ref value, len, scount); + editor = VBufferEditor.Create(ref value, len, scount); bool isDense = len == scount; if (!isDense) { - _inputValue.GetIndices().Slice(smin, scount).CopyTo(mutation.Indices); + _inputValue.GetIndices().Slice(smin, scount).CopyTo(editor.Indices); if (min != 0) { for (int i = 0; i < scount; ++i) - mutation.Indices[i] -= min; + editor.Indices[i] -= min; } } - _inputValue.GetValues().Slice(smin, scount).CopyTo(mutation.Values); - value = mutation.Commit(); + _inputValue.GetValues().Slice(smin, scount).CopyTo(editor.Values); + value = editor.Commit(); }; } diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs index b7fb9bd1b7..8aae1f7c66 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VBufferMathUtils.cs @@ -87,11 +87,11 @@ public static void ScaleBy(ref VBuffer dst, Float c) { if (c == 1 || dst.GetValues().Length == 0) return; - var mutation = VBufferEditor.CreateFromBuffer(ref dst); + var editor = VBufferEditor.CreateFromBuffer(ref dst); if (c != 0) - CpuMathUtils.Scale(c, mutation.Values); + CpuMathUtils.Scale(c, editor.Values); else // Maintain density of dst. - mutation.Values.Clear(); + editor.Values.Clear(); // REVIEW: Any benefit in sparsifying? } @@ -115,23 +115,23 @@ public static void ScaleBy(in VBuffer src, ref VBuffer dst, Float if (src.IsDense) { // Maintain the density of src to dst in order to avoid slow down of L-BFGS. - var mutation = VBufferEditor.Create(ref dst, length); + var editor = VBufferEditor.Create(ref dst, length); Contracts.Assert(length == count); if (c == 0) - mutation.Values.Clear(); + editor.Values.Clear(); else - CpuMathUtils.Scale(c, srcValues, mutation.Values, length); - dst = mutation.Commit(); + CpuMathUtils.Scale(c, srcValues, editor.Values, length); + dst = editor.Commit(); } else { - var mutation = VBufferEditor.Create(ref dst, length, count); - src.GetIndices().CopyTo(mutation.Indices); + var editor = VBufferEditor.Create(ref dst, length, count); + src.GetIndices().CopyTo(editor.Indices); if (c == 0) - mutation.Values.Clear(); + editor.Values.Clear(); else - CpuMathUtils.Scale(c, srcValues, mutation.Values, count); - dst = mutation.Commit(); + CpuMathUtils.Scale(c, srcValues, editor.Values, count); + dst = editor.Commit(); } } @@ -148,11 +148,11 @@ public static void Add(in VBuffer src, ref VBuffer dst) if (dst.IsDense) { - var mutation = VBufferEditor.Create(ref dst, dst.Length); + var editor = VBufferEditor.Create(ref dst, dst.Length); if (src.IsDense) - CpuMathUtils.Add(srcValues, mutation.Values, src.Length); + CpuMathUtils.Add(srcValues, editor.Values, src.Length); else - CpuMathUtils.Add(srcValues, src.GetIndices(), mutation.Values, srcValues.Length); + CpuMathUtils.Add(srcValues, src.GetIndices(), editor.Values, srcValues.Length); return; } // REVIEW: Should we use SSE for any of these possibilities? @@ -176,11 +176,11 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds if (dst.IsDense) { - var mutation = VBufferEditor.Create(ref dst, dst.Length); + var editor = VBufferEditor.Create(ref dst, dst.Length); if (src.IsDense) - CpuMathUtils.AddScale(c, srcValues, mutation.Values, src.Length); + CpuMathUtils.AddScale(c, srcValues, editor.Values, src.Length); else - CpuMathUtils.AddScale(c, srcValues, src.GetIndices(), mutation.Values, srcValues.Length); + CpuMathUtils.AddScale(c, srcValues, src.GetIndices(), editor.Values, srcValues.Length); return; } // REVIEW: Should we use SSE for any of these possibilities? @@ -207,9 +207,9 @@ public static void AddMult(in VBuffer src, Float c, ref VBuffer ds Contracts.Assert(length > 0); if (dst.IsDense && src.IsDense) { - var mutation = VBufferEditor.Create(ref res, length); - CpuMathUtils.AddScaleCopy(c, srcValues, dst.GetValues(), mutation.Values, length); - res = mutation.Commit(); + var editor = VBufferEditor.Create(ref res, length); + CpuMathUtils.AddScaleCopy(c, srcValues, dst.GetValues(), editor.Values, length); + res = editor.Commit(); return; } @@ -247,13 +247,13 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer var srcValues = src.GetValues(); if (srcValues.Length == 0 || c == 0) return; - VBufferEditor mutation; + VBufferEditor editor; Span values; if (dst.IsDense) { // This is by far the most common case. - mutation = VBufferEditor.Create(ref dst, dst.Length); - values = mutation.Values.Slice(offset); + editor = VBufferEditor.Create(ref dst, dst.Length); + values = editor.Values.Slice(offset); if (src.IsDense) CpuMathUtils.AddScale(c, srcValues, values, srcValues.Length); else @@ -295,12 +295,12 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer } // Extend dst so that it has room for this additional stuff. Shift things over as well. var dstValues = dst.GetValues(); - mutation = VBufferEditor.Create(ref dst, + editor = VBufferEditor.Create(ref dst, dst.Length, dstValues.Length + gapCount, keepOldOnResize: true); - var indices = mutation.Indices; - values = mutation.Values; + var indices = editor.Indices; + values = editor.Values; if (gapCount > 0) { // Shift things over, unless there's nothing to shift over, or no new elements are being introduced anyway. @@ -367,7 +367,7 @@ public static void AddMultWithOffset(in VBuffer src, Float c, ref VBuffer } } } - dst = mutation.Commit(); + dst = editor.Commit(); } /// @@ -390,10 +390,10 @@ public static void ScaleInto(in VBuffer src, Float c, ref VBuffer if (src.Length > 0 && src.IsDense) { // Due to sparsity preservation from src, dst must be dense, in the same way. - var mutation = VBufferEditor.Create(ref dst, src.Length); - if (!mutation.CreatedNewValues) // We need to clear it. - mutation.Values.Clear(); - dst = mutation.Commit(); + var editor = VBufferEditor.Create(ref dst, src.Length); + if (!editor.CreatedNewValues) // We need to clear it. + editor.Values.Clear(); + dst = editor.Commit(); } else { diff --git a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs index 1a3d5fdaa3..2926b47f83 100644 --- a/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs +++ b/src/Microsoft.ML.Data/Depricated/Vector/VectorUtils.cs @@ -165,8 +165,8 @@ public static void MulElementWise(in VBuffer a, ref VBuffer dst) if (a.IsDense && dst.IsDense) { - var mutation = VBufferEditor.CreateFromBuffer(ref dst); - CpuMathUtils.MulElementWise(a.GetValues(), dst.GetValues(), mutation.Values, a.Length); + var editor = VBufferEditor.CreateFromBuffer(ref dst); + CpuMathUtils.MulElementWise(a.GetValues(), dst.GetValues(), editor.Values, a.Length); } else VBufferUtils.ApplyWithEitherDefined(in a, ref dst, (int ind, Float v1, ref Float v2) => { v2 *= v1; }); diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs index dcb0499965..6c68b3fa20 100644 --- a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs +++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs @@ -692,7 +692,7 @@ public static void ReconcileVectorKeyValues(IHostEnvironment env, IDataView[] vi (in VBuffer src, ref VBuffer dst) => { var srcValues = src.GetValues(); - var mutation = VBufferEditor.Create( + var editor = VBufferEditor.Create( ref dst, src.Length, srcValues.Length); @@ -701,9 +701,9 @@ public static void ReconcileVectorKeyValues(IHostEnvironment env, IDataView[] vi for (int j = 0; j < src.Length; j++) { if (srcValues[j] == 0 || srcValues[j] > keyMapperCur.Length) - mutation.Values[j] = 0; + editor.Values[j] = 0; else - mutation.Values[j] = (uint)keyMapperCur[srcValues[j] - 1] + 1; + editor.Values[j] = (uint)keyMapperCur[srcValues[j] - 1] + 1; } } else @@ -712,13 +712,13 @@ public static void ReconcileVectorKeyValues(IHostEnvironment env, IDataView[] vi for (int j = 0; j < srcValues.Length; j++) { if (srcValues[j] == 0 || srcValues[j] > keyMapperCur.Length) - mutation.Values[j] = 0; + editor.Values[j] = 0; else - mutation.Values[j] = (uint)keyMapperCur[srcValues[j] - 1] + 1; - mutation.Indices[j] = srcIndices[j]; + editor.Values[j] = (uint)keyMapperCur[srcValues[j] - 1] + 1; + editor.Indices[j] = srcIndices[j]; } } - dst = mutation.Commit(); + dst = editor.Commit(); }; ValueGetter>> slotNamesGetter = null; diff --git a/src/Microsoft.ML.Data/Transforms/HashTransform.cs b/src/Microsoft.ML.Data/Transforms/HashTransform.cs index bb4c12989e..0481d5e5e7 100644 --- a/src/Microsoft.ML.Data/Transforms/HashTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/HashTransform.cs @@ -749,27 +749,27 @@ private static ValueGetter> MakeVectorHashGetter(uint se VBufferUtils.Resize(ref dst, src.Length, 0); return; } - var mutation = VBufferEditor.Create(ref dst, src.Length, srcValues.Length); + var editor = VBufferEditor.Create(ref dst, src.Length, srcValues.Length); for (int i = 0; i < srcValues.Length; ++i) - mutation.Values[i] = hasher.HashCore(seed, mask, srcValues[i]); + editor.Values[i] = hasher.HashCore(seed, mask, srcValues[i]); if (!src.IsDense) - src.GetIndices().CopyTo(mutation.Indices); + src.GetIndices().CopyTo(editor.Indices); - dst = mutation.Commit(); + dst = editor.Commit(); }; } // It is not sparsity preserving. return (ref VBuffer dst) => { srcGetter(ref src); - var mutation = VBufferEditor.Create(ref dst, src.Length); + var editor = VBufferEditor.Create(ref dst, src.Length); var srcValues = src.GetValues(); if (src.IsDense) { for (int i = 0; i < srcValues.Length; ++i) - mutation.Values[i] = hasher.HashCore(seed, mask, srcValues[i]); + editor.Values[i] = hasher.HashCore(seed, mask, srcValues[i]); } else { @@ -778,13 +778,13 @@ private static ValueGetter> MakeVectorHashGetter(uint se // values, rather than having complicated logic to do a simultaneous traversal of the // sparse vs. dense array. for (int i = 0; i < src.Length; ++i) - mutation.Values[i] = defaultHash; + editor.Values[i] = defaultHash; // Next overwrite the values in the explicit entries. var srcIndices = src.GetIndices(); for (int i = 0; i < srcValues.Length; ++i) - mutation.Values[srcIndices[i]] = hasher.HashCore(seed, mask, srcValues[i]); + editor.Values[srcIndices[i]] = hasher.HashCore(seed, mask, srcValues[i]); } - dst = mutation.Commit(); + dst = editor.Commit(); }; } @@ -811,35 +811,35 @@ private static ValueGetter> MakeVectorOrderedHashGetter( VBufferUtils.Resize(ref dst, src.Length, 0); return; } - var mutation = VBufferEditor.Create(ref dst, src.Length, srcValues.Length); + var editor = VBufferEditor.Create(ref dst, src.Length, srcValues.Length); if (src.IsDense) { for (int i = 0; i < srcValues.Length; ++i) - mutation.Values[i] = hasher.HashCore(Hashing.MurmurRound(seed, (uint)i), mask, srcValues[i]); + editor.Values[i] = hasher.HashCore(Hashing.MurmurRound(seed, (uint)i), mask, srcValues[i]); } else { var srcIndices = src.GetIndices(); for (int i = 0; i < srcValues.Length; ++i) - mutation.Values[i] = hasher.HashCore(Hashing.MurmurRound(seed, (uint)srcIndices[i]), mask, srcValues[i]); - srcIndices.CopyTo(mutation.Indices); + editor.Values[i] = hasher.HashCore(Hashing.MurmurRound(seed, (uint)srcIndices[i]), mask, srcValues[i]); + srcIndices.CopyTo(editor.Indices); } - dst = mutation.Commit(); + dst = editor.Commit(); }; } // It is not sparsity preserving. return (ref VBuffer dst) => { srcGetter(ref src); - var mutation = VBufferEditor.Create(ref dst, src.Length); + var editor = VBufferEditor.Create(ref dst, src.Length); var srcValues = src.GetValues(); if (src.IsDense) { for (int i = 0; i < srcValues.Length; ++i) - mutation.Values[i] = hasher.HashCore(Hashing.MurmurRound(seed, (uint)i), mask, srcValues[i]); + editor.Values[i] = hasher.HashCore(Hashing.MurmurRound(seed, (uint)i), mask, srcValues[i]); } else { @@ -849,14 +849,14 @@ private static ValueGetter> MakeVectorOrderedHashGetter( { uint indexSeed = Hashing.MurmurRound(seed, (uint)i); if (srcIndices.Length <= j || srcIndices[j] > i) - mutation.Values[i] = hasher.HashCore(indexSeed, mask, default); + editor.Values[i] = hasher.HashCore(indexSeed, mask, default); else if (srcIndices[j] == i) - mutation.Values[i] = hasher.HashCore(indexSeed, mask, srcValues[j++]); + editor.Values[i] = hasher.HashCore(indexSeed, mask, srcValues[j++]); else Contracts.Assert(false, "this should have never happened."); } } - dst = mutation.Commit(); + dst = editor.Commit(); }; } diff --git a/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs b/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs index 35a7b7f058..79500df97e 100644 --- a/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs @@ -567,7 +567,7 @@ private ValueGetter> MakeGetterInd(IRow input, int iinfo) int lenDst = checked(size * lenSrc); var values = src.GetValues(); int cntSrc = values.Length; - var mutation = VBufferEditor.Create(ref dst, lenDst, cntSrc); + var editor = VBufferEditor.Create(ref dst, lenDst, cntSrc); int count = 0; if (src.IsDense) @@ -579,8 +579,8 @@ private ValueGetter> MakeGetterInd(IRow input, int iinfo) uint key = values[slot] - 1; if (key >= (uint)size) continue; - mutation.Values[count] = 1; - mutation.Indices[count++] = slot * size + (int)key; + editor.Values[count] = 1; + editor.Indices[count++] = slot * size + (int)key; } } else @@ -592,11 +592,11 @@ private ValueGetter> MakeGetterInd(IRow input, int iinfo) uint key = values[islot] - 1; if (key >= (uint)size) continue; - mutation.Values[count] = 1; - mutation.Indices[count++] = indices[islot] * size + (int)key; + editor.Values[count] = 1; + editor.Indices[count++] = indices[islot] * size + (int)key; } } - dst = mutation.CommitTruncated(count); + dst = editor.CommitTruncated(count); }; } diff --git a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs index 405253bbca..188f8e72b5 100644 --- a/src/Microsoft.ML.Data/Utilities/SlotDropper.cs +++ b/src/Microsoft.ML.Data/Utilities/SlotDropper.cs @@ -115,11 +115,11 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) // End of the trivial cases // At this point, we need to drop some slots and keep some slots. - VBufferEditor mutation; + VBufferEditor editor; var srcValues = src.GetValues(); if (src.IsDense) { - mutation = VBufferEditor.Create(ref dst, newLength); + editor = VBufferEditor.Create(ref dst, newLength); int iDst = 0; int iSrc = 0; @@ -129,17 +129,17 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) while (iSrc < lim) { Contracts.Assert(iDst <= iSrc); - mutation.Values[iDst++] = srcValues[iSrc++]; + editor.Values[iDst++] = srcValues[iSrc++]; } iSrc = SlotsMax[i] + 1; } while (iSrc < src.Length) { Contracts.Assert(iDst <= iSrc); - mutation.Values[iDst++] = srcValues[iSrc++]; + editor.Values[iDst++] = srcValues[iSrc++]; } Contracts.Assert(iDst == newLength); - dst = mutation.Commit(); + dst = editor.Commit(); return; } @@ -151,7 +151,7 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) Contracts.Assert(newCount <= src.Length); - mutation = VBufferEditor.Create( + editor = VBufferEditor.Create( ref dst, newLength, newCount, @@ -172,8 +172,8 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) if (index < min) { Contracts.Assert(iiDst <= iiSrc); - mutation.Indices[iiDst] = index - iOffset; - mutation.Values[iiDst++] = srcValues[iiSrc++]; + editor.Indices[iiDst] = index - iOffset; + editor.Values[iiDst++] = srcValues[iiSrc++]; continue; } if (index <= max) @@ -209,7 +209,7 @@ public void DropSlots(ref VBuffer src, ref VBuffer dst) Contracts.Assert(index <= max); } - dst = mutation.CommitTruncated(iiDst); + dst = editor.CommitTruncated(iiDst); } } } diff --git a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs index a4a63c702a..66a6ff165e 100644 --- a/src/Microsoft.ML.Ensemble/EnsembleUtils.cs +++ b/src/Microsoft.ML.Ensemble/EnsembleUtils.cs @@ -53,14 +53,14 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices if (cardinality >= src.Length / 2) { T defaultValue = default; - var mutation = VBufferEditor.Create(ref dst, src.Length); + var editor = VBufferEditor.Create(ref dst, src.Length); for (int i = 0; i < srcValues.Length; i++) - mutation.Values[i] = !includedIndices[i] ? defaultValue : srcValues[i]; - dst = mutation.Commit(); + editor.Values[i] = !includedIndices[i] ? defaultValue : srcValues[i]; + dst = editor.Commit(); } else { - var mutation = VBufferEditor.Create(ref dst, src.Length, cardinality); + var editor = VBufferEditor.Create(ref dst, src.Length, cardinality); int count = 0; for (int i = 0; i < srcValues.Length; i++) @@ -68,19 +68,19 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices if (includedIndices[i]) { Contracts.Assert(count < cardinality); - mutation.Values[count] = srcValues[i]; - mutation.Indices[count] = i; + editor.Values[count] = srcValues[i]; + editor.Indices[count] = i; count++; } } Contracts.Assert(count == cardinality); - dst = mutation.Commit(); + dst = editor.Commit(); } } else { - var mutation = VBufferEditor.Create(ref dst, src.Length, cardinality); + var editor = VBufferEditor.Create(ref dst, src.Length, cardinality); int count = 0; var srcIndices = src.GetIndices(); @@ -88,13 +88,13 @@ public static void SelectFeatures(in VBuffer src, BitArray includedIndices { if (includedIndices[srcIndices[i]]) { - mutation.Values[count] = srcValues[i]; - mutation.Indices[count] = srcIndices[i]; + editor.Values[count] = srcValues[i]; + editor.Indices[count] = srcIndices[i]; count++; } } - dst = mutation.CommitTruncated(count); + dst = editor.CommitTruncated(count); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs index df58a1e58d..e7a50c11c3 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiAverager.cs @@ -35,11 +35,11 @@ protected void CombineCore(ref VBuffer dst, VBuffer[] src, Singl return; } - var mutation = VBufferEditor.Create(ref dst, len); - if (!mutation.CreatedNewValues) - mutation.Values.Clear(); + var editor = VBufferEditor.Create(ref dst, len); + if (!editor.CreatedNewValues) + editor.Values.Clear(); // Set the output to values. - dst = mutation.Commit(); + dst = editor.Commit(); Single weightTotal; if (weights == null) diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs index 6df0b6f28a..350833aebb 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseMultiCombiner.cs @@ -98,10 +98,10 @@ protected bool TryNormalize(VBuffer[] values) protected void GetNaNOutput(ref VBuffer dst, int len) { Contracts.Assert(len >= 0); - var mutation = VBufferEditor.Create(ref dst, len); + var editor = VBufferEditor.Create(ref dst, len); for (int i = 0; i < len; i++) - mutation.Values[i] = Single.NaN; - dst = mutation.Commit(); + editor.Values[i] = Single.NaN; + dst = editor.Commit(); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs index 33ab0627c3..dbe1517f22 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/BaseScalarStacking.cs @@ -25,9 +25,9 @@ protected override void FillFeatureBuffer(Single[] src, ref VBuffer dst) { Contracts.AssertNonEmpty(src); int len = src.Length; - var mutation = VBufferEditor.Create(ref dst, len); - src.CopyTo(mutation.Values); - dst = mutation.Commit(); + var editor = VBufferEditor.Create(ref dst, len); + src.CopyTo(editor.Values); + dst = editor.Commit(); } } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs index 0f6bc0b96d..3b11146203 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiMedian.cs @@ -81,7 +81,7 @@ public override Combiner> GetCombiner() return; } - var mutation = VBufferEditor.Create(ref dst, len); + var editor = VBufferEditor.Create(ref dst, len); int count = src.Length; if (Utils.Size(raw) < count) @@ -90,11 +90,11 @@ public override Combiner> GetCombiner() { for (int j = 0; j < count; j++) raw[j] = i < src[j].Length ? src[j].GetItemOrDefault(i) : 0; - mutation.Values[i] = MathUtils.GetMedianInPlace(raw, count); + editor.Values[i] = MathUtils.GetMedianInPlace(raw, count); } // Set the output to values. - dst = mutation.Commit(); + dst = editor.Commit(); }; } } diff --git a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs index 9649f2b9dd..4e352e8265 100644 --- a/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs +++ b/src/Microsoft.ML.Ensemble/OutputCombiners/MultiStacking.cs @@ -83,17 +83,17 @@ protected override void FillFeatureBuffer(VBuffer[] src, ref VBuffer dst, VBuffer[] src, Single[ } int len = GetClassCount(src); - var mutation = VBufferEditor.Create(ref dst, len); - if (!mutation.CreatedNewValues) - mutation.Values.Clear(); + var editor = VBufferEditor.Create(ref dst, len); + if (!editor.CreatedNewValues) + editor.Values.Clear(); int voteCount = 0; for (int i = 0; i < count; i++) @@ -92,17 +92,17 @@ private void CombineCore(ref VBuffer dst, VBuffer[] src, Single[ int index = VectorUtils.ArgMax(in src[i]); if (index >= 0) { - mutation.Values[index]++; + editor.Values[index]++; voteCount++; } } // Normalize by dividing by the number of votes. for (int i = 0; i < len; i++) - mutation.Values[i] /= voteCount; + editor.Values[i] /= voteCount; // Set the output to values. - dst = mutation.Commit(); + dst = editor.Commit(); } } } diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index a34ac677b4..06faf2f292 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -1358,17 +1358,17 @@ private ValueMapper, VBuffer> GetCopier(ColumnType itemT (in VBuffer src, ref VBuffer dst) => { var srcValues = src.GetValues(); - var mutation = VBufferEditor.Create(ref dst, src.Length, srcValues.Length); + var editor = VBufferEditor.Create(ref dst, src.Length, srcValues.Length); if (srcValues.Length > 0) { if (!src.IsDense) { - src.GetIndices().CopyTo(mutation.Indices); + src.GetIndices().CopyTo(editor.Indices); } for (int i = 0; i < srcValues.Length; ++i) - conv(in srcValues[i], ref mutation.Values[i]); + conv(in srcValues[i], ref editor.Values[i]); } - dst = mutation.Commit(); + dst = editor.Commit(); }; } diff --git a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs index 7ca3103365..48ff08b67b 100644 --- a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs +++ b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs @@ -783,15 +783,15 @@ public override void GetFeatureWeights(ref VBuffer weights) } var size = _pValues.Length - 1; - var mutation = VBufferEditor.Create(ref weights, size); + var editor = VBufferEditor.Create(ref weights, size); for (int i = 0; i < size; i++) { var score = -(float)Math.Log(_pValues[i + 1]); if (score > float.MaxValue) score = float.MaxValue; - mutation.Values[i] = score; + editor.Values[i] = score; } - weights = mutation.Commit(); + weights = editor.Commit(); } } } diff --git a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs index 07481938fb..23280c6176 100644 --- a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs +++ b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs @@ -654,7 +654,7 @@ private TPredictor TrainCore(IChannel ch, RoleMappedData data, LinearPredictor p else weights = VBufferUtils.CreateDense(numFeatures); - var weightsMutation = VBufferEditor.CreateFromBuffer(ref weights); + var weightsEditor = VBufferEditor.CreateFromBuffer(ref weights); // Reference: Parasail. SymSGD. bool tuneLR = _args.LearningRate == null; @@ -690,7 +690,7 @@ private TPredictor TrainCore(IChannel ch, RoleMappedData data, LinearPredictor p pch.SetHeader(new ProgressHeader(new[] { "iterations" }), entry => entry.SetProgress(0, state.PassIteration, _args.NumberOfIterations)); // If fully loaded, call the SymSGDNative and do not come back until learned for all iterations. - Native.LearnAll(inputDataManager, tuneLR, ref lr, l2Const, piw, weightsMutation.Values, ref bias, numFeatures, + Native.LearnAll(inputDataManager, tuneLR, ref lr, l2Const, piw, weightsEditor.Values, ref bias, numFeatures, _args.NumberOfIterations, numThreads, tuneNumLocIter, ref numLocIter, _args.Tolerance, _args.Shuffle, shouldInitialize, stateGCHandle); shouldInitialize = false; } @@ -711,7 +711,7 @@ private TPredictor TrainCore(IChannel ch, RoleMappedData data, LinearPredictor p // If all of this leaves us with 0 passes, then set numPassesForThisBatch to 1 numPassesForThisBatch = Math.Max(1, numPassesForThisBatch); state.PassIteration = iter; - Native.LearnAll(inputDataManager, tuneLR, ref lr, l2Const, piw, weightsMutation.Values, ref bias, numFeatures, + Native.LearnAll(inputDataManager, tuneLR, ref lr, l2Const, piw, weightsEditor.Values, ref bias, numFeatures, numPassesForThisBatch, numThreads, tuneNumLocIter, ref numLocIter, _args.Tolerance, _args.Shuffle, shouldInitialize, stateGCHandle); shouldInitialize = false; @@ -732,7 +732,7 @@ private TPredictor TrainCore(IChannel ch, RoleMappedData data, LinearPredictor p // Maps back the dense features that are mislocated if (numThreads > 1) - Native.MapBackWeightVector(weightsMutation.Values, stateGCHandle); + Native.MapBackWeightVector(weightsEditor.Values, stateGCHandle); Native.DeallocateSequentially(stateGCHandle); } } diff --git a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs index 0e4c2115b3..b5419fa981 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImagePixelExtractorTransform.cs @@ -485,16 +485,16 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo Host.Check(src.PixelFormat == System.Drawing.Imaging.PixelFormat.Format32bppArgb); Host.Check(src.Height == height && src.Width == width); - var mutation = VBufferEditor.Create(ref dst, size); - var values = mutation.Values; + var editor = VBufferEditor.Create(ref dst, size); + var values = editor.Values; float offset = ex.Offset; float scale = ex.Scale; Contracts.Assert(scale != 0); // REVIEW: split the getter into 2 specialized getters, one for float case and one for byte case. - Span vf = typeof(TValue) == typeof(float) ? MemoryMarshal.Cast(mutation.Values) : default; - Span vb = typeof(TValue) == typeof(byte) ? MemoryMarshal.Cast(mutation.Values) : default; + Span vf = typeof(TValue) == typeof(float) ? MemoryMarshal.Cast(editor.Values) : default; + Span vb = typeof(TValue) == typeof(byte) ? MemoryMarshal.Cast(editor.Values) : default; Contracts.Assert(!vf.IsEmpty || !vb.IsEmpty); bool needScale = offset != 0 || scale != 1; Contracts.Assert(!needScale || !vf.IsEmpty); @@ -607,7 +607,7 @@ private ValueGetter> GetGetterCore(IRow input, int iinfo } } - dst = mutation.Commit(); + dst = editor.Commit(); }; } diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs index af95a34356..38b5116da4 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPredictor.cs @@ -148,9 +148,9 @@ public ValueMapper GetMapper() { if (src.Length != _dimensionality) throw Host.Except($"Incorrect number of features: expected {_dimensionality}, got {src.Length}"); - var mutation = VBufferEditor.Create(ref dst, _k); - Map(in src, mutation.Values); - dst = mutation.Commit(); + var editor = VBufferEditor.Create(ref dst, _k); + Map(in src, editor.Values); + dst = editor.Commit(); }; return (ValueMapper)(Delegate)del; diff --git a/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs index 59b34229a3..5496e67994 100644 --- a/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransform/OnnxTransform.cs @@ -282,9 +282,9 @@ private Delegate MakeGetter(IRow input) var outputTensors = _parent.Model.Run(inputTensors); Contracts.Assert(outputTensors.Count() > 0); - var mutation = VBufferEditor.Create(ref dst, _outputColType.VectorSize); - OnnxUtils.CopyTo(outputTensors[0], mutation.Values); - dst = mutation.Commit(); + var editor = VBufferEditor.Create(ref dst, _outputColType.VectorSize); + OnnxUtils.CopyTo(outputTensors[0], editor.Values); + dst = editor.Commit(); }; return valueGetter; diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 5a7d8ee213..382caf53e3 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -157,9 +157,9 @@ protected override float AccumulateOneGradient(in VBuffer feat, float lab VectorUtils.AddMultWithOffset(in feat, mult, ref grad, 1); // Note that 0th L-BFGS weight is for bias. // Add bias using this strange trick that has advantage of working well for dense and sparse arrays. // Due to the call to EnsureBiases, we know this region is dense. - var mutation = VBufferEditor.CreateFromBuffer(ref grad); - Contracts.Assert(mutation.Values.Length >= BiasCount && (grad.IsDense || mutation.Indices[BiasCount - 1] == BiasCount - 1)); - mutation.Values[0] += mult; + var editor = VBufferEditor.CreateFromBuffer(ref grad); + Contracts.Assert(editor.Values.Length >= BiasCount && (grad.IsDense || editor.Indices[BiasCount - 1] == BiasCount - 1)); + editor.Values[0] += mult; return weight * datumLoss; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index c7cb012c0e..4e4571b9c1 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -218,9 +218,9 @@ protected override float AccumulateOneGradient(in VBuffer feat, float lab float mult = weight * (modelProb - probLabel); VectorUtils.AddMultWithOffset(in feat, mult, ref grad, start); // Due to the call to EnsureBiases, we know this region is dense. - var mutation = VBufferEditor.CreateFromBuffer(ref grad); - Contracts.Assert(mutation.Values.Length >= BiasCount && (grad.IsDense || mutation.Indices[BiasCount - 1] == BiasCount - 1)); - mutation.Values[c] += mult; + var editor = VBufferEditor.CreateFromBuffer(ref grad); + Contracts.Assert(editor.Values.Length >= BiasCount && (grad.IsDense || editor.Indices[BiasCount - 1] == BiasCount - 1)); + editor.Values[c] += mult; } Contracts.Check(FloatUtils.IsFinite(datumLoss), "Data contain bad values."); diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs index 470113217b..756607deea 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs @@ -380,8 +380,8 @@ private void Map(in VBuffer src, ref VBuffer dst) var srcValues = src.GetValues(); var srcIndices = src.GetIndices(); - var mutation = VBufferEditor.Create(ref dst, _labelCount); - Span labelScores = mutation.Values; + var editor = VBufferEditor.Create(ref dst, _labelCount); + Span labelScores = editor.Values; for (int iLabel = 0; iLabel < _labelCount; iLabel += 1) { double labelOccurrenceCount = _labelHistogram[iLabel]; @@ -411,7 +411,7 @@ private void Map(in VBuffer src, ref VBuffer dst) (float)(logProb + (_absentFeaturesLogProb[iLabel] - absentFeatureLogProb)); } - dst = mutation.Commit(); + dst = editor.Commit(); } } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs index 94a1008e3c..cfa56e1e6f 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs @@ -138,9 +138,9 @@ protected override float AccumulateOneGradient(in VBuffer feat, float lab float mult = -(y - lambda) * weight; VectorUtils.AddMultWithOffset(in feat, mult, ref grad, 1); // Due to the call to EnsureBiases, we know this region is dense. - var mutation = VBufferEditor.CreateFromBuffer(ref grad); - Contracts.Assert(mutation.Values.Length >= BiasCount && (grad.IsDense || mutation.Indices[BiasCount - 1] == BiasCount - 1)); - mutation.Values[0] += mult; + var editor = VBufferEditor.CreateFromBuffer(ref grad); + Contracts.Assert(editor.Values.Length >= BiasCount && (grad.IsDense || editor.Indices[BiasCount - 1] == BiasCount - 1)); + editor.Values[0] += mult; // From the computer's perspective exp(infinity)==infinity // so inf-inf=nan, but in reality, infinity is just a large // number we can't represent, and exp(X)-X for X=inf is just inf. diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index 10ed5759b5..39157b0eba 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -927,9 +927,9 @@ private Delegate MakeGetter(IRow input, int iinfo, ITensorValueGetter[] srcTe var tensor = outputCache.Outputs[_parent.Outputs[iinfo]]; var tensorSize = tensor.Shape.Where(x => x > 0).Aggregate((x, y) => x * y); - var mutation = VBufferEditor.Create(ref dst, (int)tensorSize); - TensorFlowUtils.FetchData(tensor.Data, mutation.Values); - dst = mutation.Commit(); + var editor = VBufferEditor.Create(ref dst, (int)tensorSize); + TensorFlowUtils.FetchData(tensor.Data, editor.Values); + dst = editor.Commit(); }; return valuegetter; } diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index 92a1c32391..62db703c06 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -595,18 +595,18 @@ private static void FillValues(IExceptionContext ectx, in VBuffer src, re if (normScale < MinScale) normScale = 1; - VBufferEditor mutation; + VBufferEditor editor; if (offset == 0) { - mutation = VBufferEditor.Create(ref dst, length, count); - var dstValues = mutation.Values; + editor = VBufferEditor.Create(ref dst, length, count); + var dstValues = editor.Values; if (!src.IsDense) { - src.GetIndices().CopyTo(mutation.Indices); + src.GetIndices().CopyTo(editor.Indices); } CpuMathUtils.Scale(normScale, src.GetValues(), dstValues, count); - dst = mutation.Commit(); + dst = editor.Commit(); return; } @@ -614,11 +614,11 @@ private static void FillValues(IExceptionContext ectx, in VBuffer src, re // Subtracting the mean requires a dense representation. src.CopyToDense(ref dst); - mutation = VBufferEditor.CreateFromBuffer(ref dst); + editor = VBufferEditor.CreateFromBuffer(ref dst); if (normScale != 1) - CpuMathUtils.ScaleAdd(normScale, -offset, mutation.Values); + CpuMathUtils.ScaleAdd(normScale, -offset, editor.Values); else - CpuMathUtils.Add(-offset, mutation.Values); + CpuMathUtils.Add(-offset, editor.Values); } /// diff --git a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs index bbe880b1c6..360ab5db82 100644 --- a/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs +++ b/src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs @@ -258,15 +258,15 @@ private void DropNAsAndDefaults(ref VBuffer src, ref VBuffer d int iDst = 0; // Densifying sparse vectors since default value equals NA and hence should be dropped. - var mutation = VBufferEditor.Create(ref dst, newCount); + var editor = VBufferEditor.Create(ref dst, newCount); for (int i = 0; i < srcValues.Length; i++) { if (!isNA(in srcValues[i])) - mutation.Values[iDst++] = srcValues[i]; + editor.Values[iDst++] = srcValues[i]; } Host.Assert(iDst == newCount); - dst = mutation.Commit(); + dst = editor.Commit(); } private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredicate isNA) @@ -297,22 +297,22 @@ private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredi int iDst = 0; if (src.IsDense) { - var mutation = VBufferEditor.Create(ref dst, newCount); + var editor = VBufferEditor.Create(ref dst, newCount); for (int i = 0; i < srcValues.Length; i++) { if (!isNA(in srcValues[i])) { - mutation.Values[iDst] = srcValues[i]; + editor.Values[iDst] = srcValues[i]; iDst++; } } Host.Assert(iDst == newCount); - dst = mutation.Commit(); + dst = editor.Commit(); } else { var newLength = src.Length - srcValues.Length - newCount; - var mutation = VBufferEditor.Create(ref dst, newLength, newCount); + var editor = VBufferEditor.Create(ref dst, newLength, newCount); var srcIndices = src.GetIndices(); int offset = 0; @@ -320,8 +320,8 @@ private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredi { if (!isNA(in srcValues[i])) { - mutation.Values[iDst] = srcValues[i]; - mutation.Indices[iDst] = srcIndices[i] - offset; + editor.Values[iDst] = srcValues[i]; + editor.Indices[iDst] = srcIndices[i] - offset; iDst++; } else @@ -329,7 +329,7 @@ private void DropNAs(ref VBuffer src, ref VBuffer dst, InPredi } Host.Assert(iDst == newCount); Host.Assert(offset == srcValues.Length - newCount); - dst = mutation.Commit(); + dst = editor.Commit(); } } } diff --git a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs index 548c567534..6ebc10459b 100644 --- a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs @@ -280,19 +280,19 @@ private static void FillValues(Float input, ref VBuffer result) return; } - var mutation = VBufferEditor.Create(ref result, 2, 1); + var editor = VBufferEditor.Create(ref result, 2, 1); if (Float.IsNaN(input)) { - mutation.Values[0] = 1; - mutation.Indices[0] = 1; + editor.Values[0] = 1; + editor.Indices[0] = 1; } else { - mutation.Values[0] = input; - mutation.Indices[0] = 0; + editor.Values[0] = input; + editor.Indices[0] = 0; } - result = mutation.Commit(); + result = editor.Commit(); } // This converts in place. @@ -302,7 +302,7 @@ private static void FillValues(IExceptionContext ectx, ref VBuffer buffer ectx.Check(0 <= size & size < int.MaxValue / 2); var values = buffer.GetValues(); - var mutation = VBufferEditor.Create(ref buffer, size * 2, values.Length); + var editor = VBufferEditor.Create(ref buffer, size * 2, values.Length); int iivDst = 0; if (buffer.IsDense) { @@ -316,13 +316,13 @@ private static void FillValues(IExceptionContext ectx, ref VBuffer buffer continue; if (Float.IsNaN(val)) { - mutation.Values[iivDst] = 1; - mutation.Indices[iivDst] = 2 * ivSrc + 1; + editor.Values[iivDst] = 1; + editor.Indices[iivDst] = 2 * ivSrc + 1; } else { - mutation.Values[iivDst] = val; - mutation.Indices[iivDst] = 2 * ivSrc; + editor.Values[iivDst] = val; + editor.Indices[iivDst] = 2 * ivSrc; } iivDst++; } @@ -344,20 +344,20 @@ private static void FillValues(IExceptionContext ectx, ref VBuffer buffer ivPrev = iv; if (Float.IsNaN(val)) { - mutation.Values[iivDst] = 1; - mutation.Indices[iivDst] = 2 * iv + 1; + editor.Values[iivDst] = 1; + editor.Indices[iivDst] = 2 * iv + 1; } else { - mutation.Values[iivDst] = val; - mutation.Indices[iivDst] = 2 * iv; + editor.Values[iivDst] = val; + editor.Indices[iivDst] = 2 * iv; } iivDst++; } } ectx.Assert(0 <= iivDst & iivDst <= values.Length); - buffer = mutation.CommitTruncated(iivDst); + buffer = editor.CommitTruncated(iivDst); } } } diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index 1832ef7f8b..906e7447ad 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -774,19 +774,19 @@ private static ValueMapper, VBuffer> CreateVectorMapper(this ValueMapper map, in VBuffer input, ref VBuffer output) { var inputValues = input.GetValues(); - var mutation = VBufferEditor.Create(ref output, input.Length, inputValues.Length); + var editor = VBufferEditor.Create(ref output, input.Length, inputValues.Length); for (int i = 0; i < inputValues.Length; i++) { TSrc val = inputValues[i]; - map(in val, ref mutation.Values[i]); + map(in val, ref editor.Values[i]); } if (!input.IsDense && inputValues.Length > 0) { - input.GetIndices().CopyTo(mutation.Indices); + input.GetIndices().CopyTo(editor.Indices); } - output = mutation.Commit(); + output = editor.Commit(); } } } diff --git a/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs index 3075ecdd55..f885d6722d 100644 --- a/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/CharTokenizeTransform.cs @@ -468,7 +468,7 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) } } - var mutation = VBufferEditor.Create(ref dst, len); + var editor = VBufferEditor.Create(ref dst, len); if (len > 0) { int index = 0; @@ -477,17 +477,17 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) if (srcValues[i].IsEmpty) continue; if (_parent._useMarkerChars) - mutation.Values[index++] = TextStartMarker; + editor.Values[index++] = TextStartMarker; var span = srcValues[i].Span; for (int ich = 0; ich < srcValues[i].Length; ich++) - mutation.Values[index++] = span[ich]; + editor.Values[index++] = span[ich]; if (_parent._useMarkerChars) - mutation.Values[index++] = TextEndMarker; + editor.Values[index++] = TextEndMarker; } Contracts.Assert(index == len); } - dst = mutation.Commit(); + dst = editor.Commit(); }; ValueGetter> getterWithUnitSep = (ref VBuffer dst) => @@ -511,7 +511,7 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) if (_parent._useMarkerChars) len += TextMarkersCount; - var mutation = VBufferEditor.Create(ref dst, len); + var editor = VBufferEditor.Create(ref dst, len); if (len > 0) { int index = 0; @@ -523,7 +523,7 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) // Therefore, prepend and append start and end markers only once i.e. at the start and at end of vector. // Insert UnitSeparator after every piece of text in the vector. if (_parent._useMarkerChars) - mutation.Values[index++] = TextStartMarker; + editor.Values[index++] = TextStartMarker; for (int i = 0; i < srcValues.Length; i++) { @@ -531,20 +531,20 @@ private ValueGetter> MakeGetterVec(IRow input, int iinfo) continue; if (i > 0) - mutation.Values[index++] = UnitSeparator; + editor.Values[index++] = UnitSeparator; var span = srcValues[i].Span; for (int ich = 0; ich < srcValues[i].Length; ich++) - mutation.Values[index++] = span[ich]; + editor.Values[index++] = span[ich]; } if (_parent._useMarkerChars) - mutation.Values[index++] = TextEndMarker; + editor.Values[index++] = TextEndMarker; Contracts.Assert(index == len); } - dst = mutation.Commit(); + dst = editor.Commit(); }; return _parent._isSeparatorStartEnd ? getterWithStartEndSep : getterWithUnitSep; } diff --git a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs index fac7a7a9e7..0c18f713d3 100644 --- a/src/Microsoft.ML.Transforms/Text/LdaTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/LdaTransform.cs @@ -858,7 +858,7 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin return; } - VBufferEditor mutation; + VBufferEditor editor; // Make sure all the frequencies are valid and truncate if the sum gets too large. int docSize = 0; int termNum = 0; @@ -870,10 +870,10 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin // REVIEW: Should this log a warning message? And what should it produce? // It currently produces a vbuffer of all NA values. // REVIEW: Need a utility method to do this... - mutation = VBufferEditor.Create(ref dst, len); + editor = VBufferEditor.Create(ref dst, len); for (int k = 0; k < len; k++) - mutation.Values[k] = Float.NaN; - dst = mutation.Commit(); + editor.Values[k] = Float.NaN; + dst = editor.Commit(); return; } @@ -894,7 +894,7 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin int count = retTopics.Count; Contracts.Assert(count <= len); - mutation = VBufferEditor.Create(ref dst, len, count); + editor = VBufferEditor.Create(ref dst, len, count); double normalizer = 0; for (int i = 0; i < count; i++) { @@ -904,22 +904,22 @@ public void Output(in VBuffer src, ref VBuffer dst, int numBurnin Contracts.Assert(0 <= index && index < len); if (count < len) { - Contracts.Assert(i == 0 || mutation.Indices[i - 1] < index); - mutation.Indices[i] = index; + Contracts.Assert(i == 0 || editor.Indices[i - 1] < index); + editor.Indices[i] = index; } else Contracts.Assert(index == i); - mutation.Values[i] = value; + editor.Values[i] = value; normalizer += value; } if (normalizer > 0) { for (int i = 0; i < count; i++) - mutation.Values[i] = (Float)(mutation.Values[i] / normalizer); + editor.Values[i] = (Float)(editor.Values[i] / normalizer); } - dst = mutation.Commit(); + dst = editor.Commit(); } public void Dispose() diff --git a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs index d7867d16a3..57d23d3b74 100644 --- a/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordEmbeddingsTransform.cs @@ -583,13 +583,13 @@ private ValueGetter> GetGetterVec(IRow input, int iinfo) { int deno = 0; srcGetter(ref src); - var mutation = VBufferEditor.Create(ref dst, 3 * dimension); + var editor = VBufferEditor.Create(ref dst, 3 * dimension); int offset = 2 * dimension; for (int i = 0; i < dimension; i++) { - mutation.Values[i] = float.MaxValue; - mutation.Values[i + dimension] = 0; - mutation.Values[i + offset] = float.MinValue; + editor.Values[i] = float.MaxValue; + editor.Values[i + dimension] = 0; + editor.Values[i + offset] = float.MinValue; } var srcValues = src.GetValues(); for (int word = 0; word < srcValues.Length; word++) @@ -600,20 +600,20 @@ private ValueGetter> GetGetterVec(IRow input, int iinfo) for (int i = 0; i < dimension; i++) { float currentTerm = wordVector[i]; - if (mutation.Values[i] > currentTerm) - mutation.Values[i] = currentTerm; - mutation.Values[dimension + i] += currentTerm; - if (mutation.Values[offset + i] < currentTerm) - mutation.Values[offset + i] = currentTerm; + if (editor.Values[i] > currentTerm) + editor.Values[i] = currentTerm; + editor.Values[dimension + i] += currentTerm; + if (editor.Values[offset + i] < currentTerm) + editor.Values[offset + i] = currentTerm; } } } if (deno != 0) for (int index = 0; index < dimension; index++) - mutation.Values[index + dimension] /= deno; + editor.Values[index + dimension] /= deno; - dst = mutation.Commit(); + dst = editor.Commit(); }; } } diff --git a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs index 85cf3f2326..af3979792b 100644 --- a/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/WordTokenizeTransform.cs @@ -320,10 +320,10 @@ private ValueGetter>> MakeGetterVec(IRow input, int for (int i = 0; i < srcValues.Length; i++) AddTerms(srcValues[i], separators, terms); - var mutation = VBufferEditor.Create(ref dst, terms.Count); + var editor = VBufferEditor.Create(ref dst, terms.Count); for (int i = 0; i < terms.Count; i++) - mutation.Values[i] = terms[i]; - dst = mutation.Commit(); + editor.Values[i] = terms[i]; + dst = editor.Commit(); }; } diff --git a/src/Microsoft.ML.Transforms/VectorWhitening.cs b/src/Microsoft.ML.Transforms/VectorWhitening.cs index 8927103422..1bf5cbc861 100644 --- a/src/Microsoft.ML.Transforms/VectorWhitening.cs +++ b/src/Microsoft.ML.Transforms/VectorWhitening.cs @@ -729,11 +729,11 @@ private static void FillValues(float[] model, ref VBuffer src, ref VBuffe int length = src.Length; // Since the whitening process produces dense vector, always use dense representation of dst. - var mutation = VBufferEditor.Create(ref dst, cdst); + var editor = VBufferEditor.Create(ref dst, cdst); if (src.IsDense) { Mkl.Gemv(Mkl.Layout.RowMajor, Mkl.Transpose.NoTrans, cdst, length, - 1, model, length, values, 1, 0, mutation.Values, 1); + 1, model, length, values, 1, 0, editor.Values, 1); } else { @@ -744,11 +744,11 @@ private static void FillValues(float[] model, ref VBuffer src, ref VBuffe { // Returns a dot product of dense vector 'model' starting from offset 'offs' and sparse vector 'values' // with first 'count' valid elements and their corresponding 'indices'. - mutation.Values[i] = CpuMathUtils.DotProductSparse(model.AsSpan(offs), values, indices, count); + editor.Values[i] = CpuMathUtils.DotProductSparse(model.AsSpan(offs), values, indices, count); offs += length; } } - dst = mutation.Commit(); + dst = editor.Commit(); } private static float DotProduct(float[] a, int aOffset, ReadOnlySpan b, ReadOnlySpan indices, int count)