From 7b5fedfb0e3e8b03204a45a5b613266dc379b22a Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Tue, 6 Jun 2023 15:13:10 +0300 Subject: [PATCH 1/3] Provide ability to filter by null value --- .../DataFrameColumn.BinaryOperations.cs | 9 +++ .../DataFrameColumn.BinaryOperations.tt | 9 +++ ...imitiveDataFrameColumn.BinaryOperations.cs | 75 +++++++++++++++++++ ...imitiveDataFrameColumn.BinaryOperations.tt | 13 +++- .../PrimitiveDataFrameColumn.cs | 24 ++++++ .../StringDataFrameColumn.BinaryOperations.cs | 26 +++++++ .../DataFrameTests.cs | 68 +++++++++++++++++ 7 files changed, 223 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs b/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs index 8ecd052486..4a3bac6988 100644 --- a/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs +++ b/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs @@ -316,5 +316,14 @@ public virtual PrimitiveDataFrameColumn ElementwiseLessThan(T value) throw new NotImplementedException(); } + public virtual PrimitiveDataFrameColumn ElementwiseIsNull() + { + throw new NotImplementedException(); + } + + public virtual PrimitiveDataFrameColumn ElementwiseIsNotNull() + { + throw new NotImplementedException(); + } } } diff --git a/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.tt b/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.tt index d2e289195c..a417bbf876 100644 --- a/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.tt +++ b/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.tt @@ -78,5 +78,14 @@ namespace Microsoft.Data.Analysis <# } #> <# } #> + public virtual PrimitiveDataFrameColumn ElementwiseIsNull() + { + throw new NotImplementedException(); + } + + public virtual PrimitiveDataFrameColumn ElementwiseIsNotNull() + { + throw new NotImplementedException(); + } } } diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs index 2e51f72e63..74506e0904 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs @@ -47,10 +47,12 @@ public override DataFrameColumn Add(DataFrameColumn column, bool inPlace = false return AddImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return AddImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override DataFrameColumn Add(U value, bool inPlace = false) { @@ -61,6 +63,7 @@ public override DataFrameColumn Add(U value, bool inPlace = false) } return AddImplementation(value, inPlace); } + /// public override DataFrameColumn Subtract(DataFrameColumn column, bool inPlace = false) { @@ -94,10 +97,12 @@ public override DataFrameColumn Subtract(DataFrameColumn column, bool inPlace = return SubtractImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return SubtractImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override DataFrameColumn Subtract(U value, bool inPlace = false) { @@ -108,6 +113,7 @@ public override DataFrameColumn Subtract(U value, bool inPlace = false) } return SubtractImplementation(value, inPlace); } + /// public override DataFrameColumn Multiply(DataFrameColumn column, bool inPlace = false) { @@ -141,10 +147,12 @@ public override DataFrameColumn Multiply(DataFrameColumn column, bool inPlace = return MultiplyImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return MultiplyImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override DataFrameColumn Multiply(U value, bool inPlace = false) { @@ -155,6 +163,7 @@ public override DataFrameColumn Multiply(U value, bool inPlace = false) } return MultiplyImplementation(value, inPlace); } + /// public override DataFrameColumn Divide(DataFrameColumn column, bool inPlace = false) { @@ -188,10 +197,12 @@ public override DataFrameColumn Divide(DataFrameColumn column, bool inPlace = fa return DivideImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return DivideImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override DataFrameColumn Divide(U value, bool inPlace = false) { @@ -202,6 +213,7 @@ public override DataFrameColumn Divide(U value, bool inPlace = false) } return DivideImplementation(value, inPlace); } + /// public override DataFrameColumn Modulo(DataFrameColumn column, bool inPlace = false) { @@ -235,10 +247,12 @@ public override DataFrameColumn Modulo(DataFrameColumn column, bool inPlace = fa return ModuloImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return ModuloImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override DataFrameColumn Modulo(U value, bool inPlace = false) { @@ -249,6 +263,7 @@ public override DataFrameColumn Modulo(U value, bool inPlace = false) } return ModuloImplementation(value, inPlace); } + /// public override DataFrameColumn And(DataFrameColumn column, bool inPlace = false) { @@ -282,15 +297,18 @@ public override DataFrameColumn And(DataFrameColumn column, bool inPlace = false return AndImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return AndImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn And(bool value, bool inPlace = false) { return AndImplementation(value, inPlace); } + /// public override DataFrameColumn Or(DataFrameColumn column, bool inPlace = false) { @@ -324,15 +342,18 @@ public override DataFrameColumn Or(DataFrameColumn column, bool inPlace = false) return OrImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return OrImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn Or(bool value, bool inPlace = false) { return OrImplementation(value, inPlace); } + /// public override DataFrameColumn Xor(DataFrameColumn column, bool inPlace = false) { @@ -366,15 +387,18 @@ public override DataFrameColumn Xor(DataFrameColumn column, bool inPlace = false return XorImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return XorImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn Xor(bool value, bool inPlace = false) { return XorImplementation(value, inPlace); } + /// public override DataFrameColumn LeftShift(int value, bool inPlace = false) { @@ -418,10 +442,14 @@ public override PrimitiveDataFrameColumn ElementwiseEquals(DataFrameColumn return ElementwiseEqualsImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: return ElementwiseEqualsImplementation(DateTimeColumn); + case null: + return ElementwiseIsNull(); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn ElementwiseEquals(U value) { @@ -432,6 +460,7 @@ public override PrimitiveDataFrameColumn ElementwiseEquals(U value) } return ElementwiseEqualsImplementation(value); } + /// public override PrimitiveDataFrameColumn ElementwiseNotEquals(DataFrameColumn column) { @@ -465,10 +494,14 @@ public override PrimitiveDataFrameColumn ElementwiseNotEquals(DataFrameCol return ElementwiseNotEqualsImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: return ElementwiseNotEqualsImplementation(DateTimeColumn); + case null: + return ElementwiseIsNotNull(); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn ElementwiseNotEquals(U value) { @@ -479,6 +512,7 @@ public override PrimitiveDataFrameColumn ElementwiseNotEquals(U value) } return ElementwiseNotEqualsImplementation(value); } + /// public override PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqual(DataFrameColumn column) { @@ -512,10 +546,12 @@ public override PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqual(Dat return ElementwiseGreaterThanOrEqualImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: return ElementwiseGreaterThanOrEqualImplementation(DateTimeColumn); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqual(U value) { @@ -526,6 +562,7 @@ public override PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqual( } return ElementwiseGreaterThanOrEqualImplementation(value); } + /// public override PrimitiveDataFrameColumn ElementwiseLessThanOrEqual(DataFrameColumn column) { @@ -559,10 +596,12 @@ public override PrimitiveDataFrameColumn ElementwiseLessThanOrEqual(DataFr return ElementwiseLessThanOrEqualImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: return ElementwiseLessThanOrEqualImplementation(DateTimeColumn); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn ElementwiseLessThanOrEqual(U value) { @@ -573,6 +612,7 @@ public override PrimitiveDataFrameColumn ElementwiseLessThanOrEqual(U v } return ElementwiseLessThanOrEqualImplementation(value); } + /// public override PrimitiveDataFrameColumn ElementwiseGreaterThan(DataFrameColumn column) { @@ -606,10 +646,12 @@ public override PrimitiveDataFrameColumn ElementwiseGreaterThan(DataFrameC return ElementwiseGreaterThanImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: return ElementwiseGreaterThanImplementation(DateTimeColumn); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn ElementwiseGreaterThan(U value) { @@ -620,6 +662,7 @@ public override PrimitiveDataFrameColumn ElementwiseGreaterThan(U value } return ElementwiseGreaterThanImplementation(value); } + /// public override PrimitiveDataFrameColumn ElementwiseLessThan(DataFrameColumn column) { @@ -653,10 +696,12 @@ public override PrimitiveDataFrameColumn ElementwiseLessThan(DataFrameColu return ElementwiseLessThanImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: return ElementwiseLessThanImplementation(DateTimeColumn); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn ElementwiseLessThan(U value) { @@ -668,6 +713,7 @@ public override PrimitiveDataFrameColumn ElementwiseLessThan(U value) return ElementwiseLessThanImplementation(value); } + internal DataFrameColumn AddImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -750,6 +796,7 @@ internal DataFrameColumn AddImplementation(PrimitiveDataFrameColumn column throw new NotSupportedException(); } } + internal DataFrameColumn AddImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -827,6 +874,7 @@ internal DataFrameColumn AddImplementation(U value, bool inPlace) throw new NotSupportedException(); } } + internal DataFrameColumn SubtractImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -909,6 +957,7 @@ internal DataFrameColumn SubtractImplementation(PrimitiveDataFrameColumn c throw new NotSupportedException(); } } + internal DataFrameColumn SubtractImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -986,6 +1035,7 @@ internal DataFrameColumn SubtractImplementation(U value, bool inPlace) throw new NotSupportedException(); } } + internal DataFrameColumn MultiplyImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -1068,6 +1118,7 @@ internal DataFrameColumn MultiplyImplementation(PrimitiveDataFrameColumn c throw new NotSupportedException(); } } + internal DataFrameColumn MultiplyImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1145,6 +1196,7 @@ internal DataFrameColumn MultiplyImplementation(U value, bool inPlace) throw new NotSupportedException(); } } + internal DataFrameColumn DivideImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -1227,6 +1279,7 @@ internal DataFrameColumn DivideImplementation(PrimitiveDataFrameColumn col throw new NotSupportedException(); } } + internal DataFrameColumn DivideImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1304,6 +1357,7 @@ internal DataFrameColumn DivideImplementation(U value, bool inPlace) throw new NotSupportedException(); } } + internal DataFrameColumn ModuloImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -1386,6 +1440,7 @@ internal DataFrameColumn ModuloImplementation(PrimitiveDataFrameColumn col throw new NotSupportedException(); } } + internal DataFrameColumn ModuloImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1463,6 +1518,7 @@ internal DataFrameColumn ModuloImplementation(U value, bool inPlace) throw new NotSupportedException(); } } + internal DataFrameColumn AndImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -1498,6 +1554,7 @@ internal DataFrameColumn AndImplementation(PrimitiveDataFrameColumn column throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn AndImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1528,6 +1585,7 @@ internal PrimitiveDataFrameColumn AndImplementation(U value, bool inPla throw new NotSupportedException(); } } + internal DataFrameColumn OrImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -1563,6 +1621,7 @@ internal DataFrameColumn OrImplementation(PrimitiveDataFrameColumn column, throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn OrImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1593,6 +1652,7 @@ internal PrimitiveDataFrameColumn OrImplementation(U value, bool inPlac throw new NotSupportedException(); } } + internal DataFrameColumn XorImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -1628,6 +1688,7 @@ internal DataFrameColumn XorImplementation(PrimitiveDataFrameColumn column throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn XorImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1658,6 +1719,7 @@ internal PrimitiveDataFrameColumn XorImplementation(U value, bool inPla throw new NotSupportedException(); } } + internal DataFrameColumn LeftShiftImplementation(int value, bool inPlace) { switch (typeof(T)) @@ -1721,6 +1783,7 @@ internal DataFrameColumn LeftShiftImplementation(int value, bool inPlace) throw new NotSupportedException(); } } + internal DataFrameColumn RightShiftImplementation(int value, bool inPlace) { switch (typeof(T)) @@ -1784,6 +1847,7 @@ internal DataFrameColumn RightShiftImplementation(int value, bool inPlace) throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseEqualsImplementation(PrimitiveDataFrameColumn column) where U : unmanaged { @@ -1873,6 +1937,7 @@ internal PrimitiveDataFrameColumn ElementwiseEqualsImplementation(Primi throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseEqualsImplementation(U value) { switch (typeof(T)) @@ -1957,6 +2022,7 @@ internal PrimitiveDataFrameColumn ElementwiseEqualsImplementation(U val throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseNotEqualsImplementation(PrimitiveDataFrameColumn column) where U : unmanaged { @@ -2046,6 +2112,7 @@ internal PrimitiveDataFrameColumn ElementwiseNotEqualsImplementation(Pr throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseNotEqualsImplementation(U value) { switch (typeof(T)) @@ -2130,6 +2197,7 @@ internal PrimitiveDataFrameColumn ElementwiseNotEqualsImplementation(U throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqualImplementation(PrimitiveDataFrameColumn column) where U : unmanaged { @@ -2207,6 +2275,7 @@ internal PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqualImplementat throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqualImplementation(U value) { switch (typeof(T)) @@ -2279,6 +2348,7 @@ internal PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqualImplementat throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseLessThanOrEqualImplementation(PrimitiveDataFrameColumn column) where U : unmanaged { @@ -2356,6 +2426,7 @@ internal PrimitiveDataFrameColumn ElementwiseLessThanOrEqualImplementation throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseLessThanOrEqualImplementation(U value) { switch (typeof(T)) @@ -2428,6 +2499,7 @@ internal PrimitiveDataFrameColumn ElementwiseLessThanOrEqualImplementation throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseGreaterThanImplementation(PrimitiveDataFrameColumn column) where U : unmanaged { @@ -2505,6 +2577,7 @@ internal PrimitiveDataFrameColumn ElementwiseGreaterThanImplementation( throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseGreaterThanImplementation(U value) { switch (typeof(T)) @@ -2577,6 +2650,7 @@ internal PrimitiveDataFrameColumn ElementwiseGreaterThanImplementation( throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseLessThanImplementation(PrimitiveDataFrameColumn column) where U : unmanaged { @@ -2654,6 +2728,7 @@ internal PrimitiveDataFrameColumn ElementwiseLessThanImplementation(Pri throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseLessThanImplementation(U value) { switch (typeof(T)) diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.tt b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.tt index 71b7a87339..83a6c80d49 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.tt +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.tt @@ -40,10 +40,20 @@ namespace Microsoft.Data.Analysis return <#=method.MethodName#>Implementation(<#=type.TypeName#>Column); <# } #> <# } #> +<# if (method.MethodName == "ElementwiseEquals") {#> + case null: + return ElementwiseIsNull(); +<# } #> +<# if (method.MethodName == "ElementwiseNotEquals") {#> + case null: + return ElementwiseIsNotNull(); +<# } #> + default: throw new NotSupportedException(); } } + <# } #> <# if (method.MethodType == MethodType.BinaryScalar || method.MethodType == MethodType.ComparisonScalar) {#> <# if (method.MethodType == MethodType.BinaryScalar) {#> @@ -79,6 +89,7 @@ namespace Microsoft.Data.Analysis return <#=method.MethodName#>Implementation(value); <# } #> } + <# } #> <# if (method.MethodType == MethodType.BinaryInt ) {#> /// @@ -88,8 +99,8 @@ namespace Microsoft.Data.Analysis } <# } #> <# } #> - <# foreach (MethodConfiguration method in methodConfiguration) { #> + <# if (method.MethodType == MethodType.BinaryScalar || method.MethodType == MethodType.ComparisonScalar) {#> <# if (method.MethodType == MethodType.BinaryScalar) {#> <# if (method.IsBitwise == true) { #> diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs index 113b67cc1c..a5534a3d4f 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs @@ -822,5 +822,29 @@ public override Dictionary> GetGroupedOccurrences(DataFr { return GetGroupedOccurrences(other, out otherColumnNullIndices); } + + public override PrimitiveDataFrameColumn ElementwiseIsNull() + { + var ret = new BooleanDataFrameColumn(Name, Length); + + for (long i = 0; i < Length; i++) + { + ret[i] = !_columnContainer[i].HasValue; + } + + return ret; + } + + public override PrimitiveDataFrameColumn ElementwiseIsNotNull() + { + var ret = new BooleanDataFrameColumn(Name, Length); + + for (long i = 0; i < Length; i++) + { + ret[i] = _columnContainer[i].HasValue; + } + + return ret; + } } } diff --git a/src/Microsoft.Data.Analysis/StringDataFrameColumn.BinaryOperations.cs b/src/Microsoft.Data.Analysis/StringDataFrameColumn.BinaryOperations.cs index 0bfbd3b6bc..c6ffe4c4cf 100644 --- a/src/Microsoft.Data.Analysis/StringDataFrameColumn.BinaryOperations.cs +++ b/src/Microsoft.Data.Analysis/StringDataFrameColumn.BinaryOperations.cs @@ -91,6 +91,9 @@ internal static PrimitiveDataFrameColumn ElementwiseEqualsImplementation(D /// public override PrimitiveDataFrameColumn ElementwiseEquals(DataFrameColumn column) { + if (column == null) + return ElementwiseIsNull(); + return ElementwiseEqualsImplementation(this, column); } @@ -128,6 +131,26 @@ internal static PrimitiveDataFrameColumn ElementwiseNotEqualsImplementatio return ret; } + public override PrimitiveDataFrameColumn ElementwiseIsNotNull() + { + PrimitiveDataFrameColumn ret = new PrimitiveDataFrameColumn(Name, Length); + for (long i = 0; i < Length; i++) + { + ret[i] = this[i] != null; + } + return ret; + } + + public override PrimitiveDataFrameColumn ElementwiseIsNull() + { + PrimitiveDataFrameColumn ret = new PrimitiveDataFrameColumn(Name, Length); + for (long i = 0; i < Length; i++) + { + ret[i] = this[i] == null; + } + return ret; + } + public PrimitiveDataFrameColumn ElementwiseNotEquals(string value) { PrimitiveDataFrameColumn ret = new PrimitiveDataFrameColumn(Name, Length); @@ -141,6 +164,9 @@ public PrimitiveDataFrameColumn ElementwiseNotEquals(string value) /// public override PrimitiveDataFrameColumn ElementwiseNotEquals(DataFrameColumn column) { + if (column == null) + return ElementwiseIsNotNull(); + return ElementwiseNotEqualsImplementation(this, column); } diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs index 1695ded747..583203f049 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs @@ -3333,5 +3333,73 @@ public void TestMeanMedian() Assert.Equal(4, df["Decimal"].Median()); } + + [Fact] + public void Test_PrimitiveColumnNotEqualsNull() + { + var col = new DoubleDataFrameColumn("col", new double?[] { 1.23, null, 2, 3 }); + var dfTest = new DataFrame(col); + + var filteredNullDf = dfTest.Filter(dfTest["col"].ElementwiseNotEquals(null)); + + Assert.True(filteredNullDf.Columns.IndexOf("col") >= 0); + Assert.Equal(3, filteredNullDf.Columns["col"].Length); + + Assert.Equal(1.23, filteredNullDf.Columns["col"][0]); + Assert.Equal(2.0, filteredNullDf.Columns["col"][1]); + Assert.Equal(3.0, filteredNullDf.Columns["col"][2]); + } + + [Fact] + public void Test_PrimitiveColumnEqualsNull() + { + var index = new Int32DataFrameColumn("index", new int[] { 1, 2, 3, 4, 5 }); + var col = new DoubleDataFrameColumn("col", new double?[] { 1.23, null, 2, 3, null }); ; + var dfTest = new DataFrame(index, col); + + var filteredNullDf = dfTest.Filter(dfTest["col"].ElementwiseEquals(null)); + + Assert.True(filteredNullDf.Columns.IndexOf("col") >= 0); + Assert.True(filteredNullDf.Columns.IndexOf("index") >= 0); + + Assert.Equal(2, filteredNullDf.Rows.Count); + + Assert.Equal(2, filteredNullDf.Columns["index"][0]); + Assert.Equal(5, filteredNullDf.Columns["index"][1]); + } + + [Fact] + public void Test_StringColumnNotEqualsNull() + { + var col = new StringDataFrameColumn("col", new[] { "One", null, "Two", "Three" }); + var dfTest = new DataFrame(col); + + var filteredNullDf = dfTest.Filter(dfTest["col"].ElementwiseNotEquals(null)); + + Assert.True(filteredNullDf.Columns.IndexOf("col") >= 0); + Assert.Equal(3, filteredNullDf.Columns["col"].Length); + + Assert.Equal("One", filteredNullDf.Columns["col"][0]); + Assert.Equal("Two", filteredNullDf.Columns["col"][1]); + Assert.Equal("Three", filteredNullDf.Columns["col"][2]); + } + + [Fact] + public void Test_StringColumnEqualsNull() + { + var index = new Int32DataFrameColumn("index", new int[] { 1, 2, 3, 4, 5 }); + var col = new StringDataFrameColumn("col", new[] { "One", null, "Three", "Four", null }); ; + var dfTest = new DataFrame(index, col); + + var filteredNullDf = dfTest.Filter(dfTest["col"].ElementwiseEquals(null)); + + Assert.True(filteredNullDf.Columns.IndexOf("col") >= 0); + Assert.True(filteredNullDf.Columns.IndexOf("index") >= 0); + + Assert.Equal(2, filteredNullDf.Rows.Count); + + Assert.Equal(2, filteredNullDf.Columns["index"][0]); + Assert.Equal(5, filteredNullDf.Columns["index"][1]); + } } } From 55d4fcf234626a351e6596403a09149537c4d017 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Tue, 6 Jun 2023 16:55:14 +0300 Subject: [PATCH 2/3] Add comments --- .../DataFrameColumn.BinaryOperations.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs b/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs index 4a3bac6988..1c340575db 100644 --- a/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs +++ b/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs @@ -316,11 +316,17 @@ public virtual PrimitiveDataFrameColumn ElementwiseLessThan(T value) throw new NotImplementedException(); } + /// + /// Performs an element-wise equal to Null on each value in the column + /// public virtual PrimitiveDataFrameColumn ElementwiseIsNull() { throw new NotImplementedException(); } + /// + /// Performs an element-wise not equal to Null on each value in the column + /// public virtual PrimitiveDataFrameColumn ElementwiseIsNotNull() { throw new NotImplementedException(); From 655f2687058096d82098e4c10766ccb3df82abf5 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Thu, 6 Jul 2023 12:37:26 +0300 Subject: [PATCH 3/3] Fix code review findings --- src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs index 9fbff16e4b..402509a00e 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs @@ -840,7 +840,7 @@ public override PrimitiveDataFrameColumn ElementwiseIsNull() for (long i = 0; i < Length; i++) { - ret[i] = !_columnContainer[i].HasValue; + ret[i] = !IsValid(i); } return ret; @@ -852,7 +852,7 @@ public override PrimitiveDataFrameColumn ElementwiseIsNotNull() for (long i = 0; i < Length; i++) { - ret[i] = _columnContainer[i].HasValue; + ret[i] = IsValid(i); } return ret;