From 3ca267a2460a4c8887c76ac2a3e1eeb9e9183dc2 Mon Sep 17 00:00:00 2001 From: Eric StJohn Date: Thu, 21 Dec 2023 13:49:10 -0800 Subject: [PATCH] Fix assert by only accessing idx Asserting on `_rowCount < Utils.Size(_valueBoundaries)` was catching a case where `_rowCount`'s update was reordered before `_valueBoundaries` This was unnecessary, since this method doesn't need to use `_rowCount`. Instead, make the asserts use only `idx` which will be maintained consistent with the waiter logic in this cache. Ensure we only ever use `_rowCount` from the caching thread, so write reordering won't matter. --- src/Microsoft.ML.Data/DataView/CacheDataView.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.ML.Data/DataView/CacheDataView.cs b/src/Microsoft.ML.Data/DataView/CacheDataView.cs index 4c2491e09a..e6c8755edf 100644 --- a/src/Microsoft.ML.Data/DataView/CacheDataView.cs +++ b/src/Microsoft.ML.Data/DataView/CacheDataView.cs @@ -1320,7 +1320,7 @@ public virtual void Freeze() private sealed class ImplVec : ColumnCache> { - // The number of rows cached. + // The number of rows cached. Only to be accesssed by the Caching thread. private int _rowCount; // For a given row [r], elements at [r] and [r+1] specify the inclusive // and exclusive range of values for the two big arrays. In the case @@ -1384,10 +1384,10 @@ public override void CacheCurrent() public override void Fetch(int idx, ref VBuffer value) { - Ctx.Assert(0 <= idx && idx < _rowCount); - Ctx.Assert(_rowCount < Utils.Size(_indexBoundaries)); - Ctx.Assert(_rowCount < Utils.Size(_valueBoundaries)); - Ctx.Assert(_uniformLength > 0 || _rowCount <= Utils.Size(_lengths)); + Ctx.Assert(0 <= idx); + Ctx.Assert((idx + 1) < Utils.Size(_indexBoundaries)); + Ctx.Assert((idx + 1) < Utils.Size(_valueBoundaries)); + Ctx.Assert(_uniformLength > 0 || idx < Utils.Size(_lengths)); Ctx.Assert(_indexBoundaries[idx + 1] - _indexBoundaries[idx] <= int.MaxValue); int indexCount = (int)(_indexBoundaries[idx + 1] - _indexBoundaries[idx]);