Skip to content

Commit 7b712f8

Browse files
authored
Produce uncorrelated IN for Contains with nullable item (dotnet#32575)
Fixes dotnet#32574
1 parent b5b1abf commit 7b712f8

File tree

31 files changed

+1255
-619
lines changed

31 files changed

+1255
-619
lines changed

src/EFCore.Relational/Query/SqlNullabilityProcessor.cs

Lines changed: 175 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -699,7 +699,7 @@ protected virtual SqlExpression VisitIn(InExpression inExpression, bool allowOpt
699699
return inExpression;
700700

701701
case (true, false):
702-
{
702+
NullableItemWithNonNullableProjection:
703703
// If the item is actually null (not just nullable) and the projection is non-nullable, just return false immediately:
704704
// WHERE NULL IN (SELECT NonNullable FROM foo) -> false
705705
if (IsNull(item))
@@ -714,7 +714,6 @@ protected virtual SqlExpression VisitIn(InExpression inExpression, bool allowOpt
714714
return allowOptimizedExpansion
715715
? inExpression
716716
: _sqlExpressionFactory.AndAlso(inExpression, _sqlExpressionFactory.IsNotNull(item));
717-
}
718717

719718
case (false, true):
720719
{
@@ -727,6 +726,14 @@ protected virtual SqlExpression VisitIn(InExpression inExpression, bool allowOpt
727726
return inExpression;
728727
}
729728

729+
// If the subquery happens to be a primitive collection (e.g. OPENJSON), pull out the null values from the parameter.
730+
// Since the item is non-nullable, it can never match those null values, and all they do is cause the IN expression
731+
// to return NULL if the item isn't found. So just remove them.
732+
if (TryMakeNonNullable(subquery, out var nonNullableSubquery, out _))
733+
{
734+
return inExpression.Update(item, nonNullableSubquery);
735+
}
736+
730737
// On SQL Server, EXISTS isn't less efficient than IN, and the additional COALESCE (and CASE/WHEN which it requires)
731738
// add unneeded clutter (and possibly hurt perf). So allow providers to prefer EXISTS.
732739
if (PreferExistsToInWithCoalesce)
@@ -738,13 +745,46 @@ protected virtual SqlExpression VisitIn(InExpression inExpression, bool allowOpt
738745
}
739746

740747
case (true, true):
741-
TransformToExists:
742-
// Worst case: both sides are nullable; there's no way to distinguish the item was found or not.
743-
// We rewrite to an EXISTS subquery where we can generate a precise predicate to check for what we need. Note that this
744-
// performs (significantly) worse than an IN expression, since it involves a correlated subquery.
748+
// Worst case: both sides are nullable; that means that with IN, there's no way to distinguish between:
749+
// a) The item was NULL and was found (e.g. NULL IN (1, 2, NULL) should yield true), and
750+
// b) The item wasn't found (e.g. 3 IN (1, 2, NULL) should yield false)
751+
752+
// As a last resort, we can rewrite to an EXISTS subquery where we can generate a precise predicate to check for what we
753+
// need. This unfortunately performs (significantly) worse than an IN expression, since it involves a correlated
754+
// subquery, and can cause indexes to not get used.
755+
756+
// But before doing this, we check whether the subquery represents a simple parameterized collection (e.g. a bare
757+
// OPENJSON call over a parameter in SQL Server), and if it is, rewrite the parameter to remove nulls so we can keep
758+
// using IN.
759+
if (TryMakeNonNullable(subquery, out var nonNullableSubquery2, out var foundNull))
760+
{
761+
inExpression = inExpression.Update(item, nonNullableSubquery2);
762+
763+
if (!foundNull.Value)
764+
{
765+
// There weren't any actual nulls inside the parameterized collection - we can jump to the case which handles
766+
// that.
767+
goto NullableItemWithNonNullableProjection;
768+
}
769+
770+
// Nulls were found inside the parameterized collection, and removed. If the item is a null constant, just convert
771+
// the whole thing to true.
772+
if (IsNull(item))
773+
{
774+
return _sqlExpressionFactory.Constant(true, inExpression.TypeMapping);
775+
}
776+
777+
// Otherwise we now need to compensate for the removed nulls outside, by adding OR item IS NULL.
778+
// Note that this is safe in negated (non-optimized) contexts:
779+
// WHERE item NOT IN ("foo", "bar") AND item IS NOT NULL
780+
// When item is NULL, the item IS NOT NULL clause causes the whole thing to return false. Otherwise that clause
781+
// can be ignored, and we have non-null item IN non-null list-of-values.
782+
return _sqlExpressionFactory.OrElse(inExpression, _sqlExpressionFactory.IsNull(item));
783+
}
745784

746-
// We'll need to mutate the subquery to introduce the predicate inside it, but it might be referenced by other places in
747-
// the tree, so we create a copy to work on.
785+
TransformToExists:
786+
// We unfortunately need to rewrite to EXISTS. We'll need to mutate the subquery to introduce the predicate inside it,
787+
// but it might be referenced by other places in the tree, so we create a copy to work on.
748788

749789
// No need for a projection with EXISTS, clear it to get SELECT 1
750790
subquery = subquery.Update(
@@ -1953,6 +1993,133 @@ static bool TryNegate(ExpressionType expressionType, out ExpressionType result)
19531993
}
19541994
}
19551995

1996+
/// <summary>
1997+
/// Attempts to convert the given <paramref name="selectExpression" />, which has a nullable projection, to an identical expression
1998+
/// which does not have a nullable projection. This is used to extract NULLs out of e.g. the parameter argument of SQL Server
1999+
/// OPENJSON, in order to allow a more efficient translation.
2000+
/// </summary>
2001+
[EntityFrameworkInternal]
2002+
protected virtual bool TryMakeNonNullable(
2003+
SelectExpression selectExpression,
2004+
[NotNullWhen(true)] out SelectExpression? rewrittenSelectExpression,
2005+
[NotNullWhen(true)] out bool? foundNull)
2006+
{
2007+
if (selectExpression is
2008+
{
2009+
Tables: [var collectionTable],
2010+
GroupBy: [],
2011+
Having: null,
2012+
Limit: null,
2013+
Offset: null,
2014+
Predicate: null,
2015+
// Note that a orderings and distinct are OK - they don't interact with our null removal.
2016+
// We exclude the predicate since it may actually filter out nulls
2017+
Projection: [{ Expression: ColumnExpression projectedColumn }] projection
2018+
}
2019+
&& projectedColumn.Table == collectionTable
2020+
&& IsCollectionTable(collectionTable, out var collection)
2021+
&& collection is SqlParameterExpression collectionParameter
2022+
&& ParameterValues[collectionParameter.Name] is IList values)
2023+
{
2024+
// We're looking at a parameter beyond its simple nullability, so we can't use the 2nd-level cache for this query.
2025+
DoNotCache();
2026+
2027+
IList? processedValues = null;
2028+
2029+
for (var i = 0; i < values.Count; i++)
2030+
{
2031+
var value = values[i];
2032+
2033+
if (value is null)
2034+
{
2035+
if (processedValues is null)
2036+
{
2037+
var elementClrType = values.GetType().GetSequenceType();
2038+
processedValues = (IList)Activator.CreateInstance(typeof(List<>).MakeGenericType(elementClrType), values.Count)!;
2039+
for (var j = 0; j < i; j++)
2040+
{
2041+
processedValues.Add(values[j]!);
2042+
}
2043+
}
2044+
2045+
// Skip the value
2046+
continue;
2047+
}
2048+
2049+
processedValues?.Add(value);
2050+
}
2051+
2052+
if (processedValues is null)
2053+
{
2054+
// No null was found in the parameter's elements - the select expression is already non-nullable.
2055+
// TODO: We should change the project column to be non-nullable, but it's too closed down for that.
2056+
rewrittenSelectExpression = selectExpression;
2057+
foundNull = false;
2058+
return true;
2059+
}
2060+
2061+
foundNull = true;
2062+
2063+
// TODO: We currently only have read-only access to the parameter values in the nullability processor (and in all of the
2064+
// 2nd-level query pipeline); to need to flow the mutable dictionary in. Note that any modification of parameter values (as
2065+
// here) must immediately entail DoNotCache().
2066+
Check.DebugAssert(ParameterValues is Dictionary<string, object?>, "ParameterValues isn't a Dictionary");
2067+
if (ParameterValues is not Dictionary<string, object?> mutableParameterValues)
2068+
{
2069+
rewrittenSelectExpression = null;
2070+
foundNull = null;
2071+
return false;
2072+
}
2073+
2074+
var rewrittenParameter = new SqlParameterExpression(
2075+
collectionParameter.Name + "_without_nulls", collectionParameter.Type, collectionParameter.TypeMapping);
2076+
mutableParameterValues[rewrittenParameter.Name] = processedValues;
2077+
var rewrittenCollectionTable = UpdateParameterCollection(collectionTable, rewrittenParameter);
2078+
2079+
// We clone the select expression since Update below doesn't create a pure copy, mutating the original as well (because of
2080+
// TableReferenceExpression). TODO: Remove this after #31327.
2081+
#pragma warning disable EF1001
2082+
rewrittenSelectExpression = selectExpression.Clone();
2083+
#pragma warning restore EF1001
2084+
2085+
rewrittenSelectExpression = rewrittenSelectExpression.Update(
2086+
projection, // TODO: We should change the project column to be non-nullable, but it's too closed down for that.
2087+
new[] { rewrittenCollectionTable },
2088+
selectExpression.Predicate,
2089+
selectExpression.GroupBy,
2090+
selectExpression.Having,
2091+
selectExpression.Orderings,
2092+
selectExpression.Limit,
2093+
selectExpression.Offset);
2094+
2095+
return true;
2096+
}
2097+
2098+
rewrittenSelectExpression = null;
2099+
foundNull = null;
2100+
return false;
2101+
}
2102+
2103+
/// <summary>
2104+
/// A provider hook for identifying a <see cref="TableExpressionBase" /> which represents a collection, e.g. OPENJSON on SQL Server.
2105+
/// </summary>
2106+
[EntityFrameworkInternal]
2107+
protected virtual bool IsCollectionTable(TableExpressionBase table, [NotNullWhen(true)] out Expression? collection)
2108+
{
2109+
collection = null;
2110+
return false;
2111+
}
2112+
2113+
/// <summary>
2114+
/// Given a <see cref="TableExpressionBase" /> which was previously identified to be a parameterized collection table (e.g.
2115+
/// OPENJSON on SQL Server, see <see cref="IsCollectionTable" />), replaces the parameter for that table.
2116+
/// </summary>
2117+
[EntityFrameworkInternal]
2118+
protected virtual TableExpressionBase UpdateParameterCollection(
2119+
TableExpressionBase table,
2120+
SqlParameterExpression newCollectionParameter)
2121+
=> throw new InvalidOperationException();
2122+
19562123
private SqlExpression ProcessNullNotNull(SqlUnaryExpression sqlUnaryExpression, bool operandNullable)
19572124
{
19582125
if (!operandNullable)

src/EFCore.SqlServer/Query/Internal/SqlServerSqlNullabilityProcessor.cs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
using System.Collections;
5+
using System.Diagnostics.CodeAnalysis;
46
using Microsoft.EntityFrameworkCore.Query.SqlExpressions;
57

68
namespace Microsoft.EntityFrameworkCore.SqlServer.Query.Internal;
@@ -113,4 +115,36 @@ protected virtual SqlExpression VisitSqlServerAggregateFunction(
113115
/// </summary>
114116
protected override bool PreferExistsToInWithCoalesce
115117
=> true;
118+
119+
#pragma warning disable EF1001
120+
/// <summary>
121+
/// This is an internal API that supports the Entity Framework Core infrastructure and not subject to
122+
/// the same compatibility standards as public APIs. It may be changed or removed without notice in
123+
/// any release. You should only use it directly in your code with extreme caution and knowing that
124+
/// doing so can result in application failures when updating to a new Entity Framework Core release.
125+
/// </summary>
126+
protected override bool IsCollectionTable(TableExpressionBase table, [NotNullWhen(true)] out Expression? collection)
127+
{
128+
if (table is SqlServerOpenJsonExpression { Arguments: [var argument] })
129+
{
130+
collection = argument;
131+
return true;
132+
}
133+
134+
return base.IsCollectionTable(table, out collection);
135+
}
136+
137+
/// <summary>
138+
/// This is an internal API that supports the Entity Framework Core infrastructure and not subject to
139+
/// the same compatibility standards as public APIs. It may be changed or removed without notice in
140+
/// any release. You should only use it directly in your code with extreme caution and knowing that
141+
/// doing so can result in application failures when updating to a new Entity Framework Core release.
142+
/// </summary>
143+
protected override TableExpressionBase UpdateParameterCollection(
144+
TableExpressionBase table,
145+
SqlParameterExpression newCollectionParameter)
146+
=> table is SqlServerOpenJsonExpression { Arguments: [SqlParameterExpression] } openJsonExpression
147+
? openJsonExpression.Update(newCollectionParameter, path: null)
148+
: base.UpdateParameterCollection(table, newCollectionParameter);
149+
#pragma warning restore EF1001
116150
}

src/EFCore.Sqlite.Core/Query/Internal/SqliteSqlNullabilityProcessor.cs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
using System.Diagnostics.CodeAnalysis;
45
using Microsoft.EntityFrameworkCore.Query.SqlExpressions;
56
using Microsoft.EntityFrameworkCore.Sqlite.Query.SqlExpressions.Internal;
67

@@ -83,4 +84,36 @@ protected virtual SqlExpression VisitRegexp(
8384

8485
return regexpExpression.Update(match, pattern);
8586
}
87+
88+
#pragma warning disable EF1001
89+
/// <summary>
90+
/// This is an internal API that supports the Entity Framework Core infrastructure and not subject to
91+
/// the same compatibility standards as public APIs. It may be changed or removed without notice in
92+
/// any release. You should only use it directly in your code with extreme caution and knowing that
93+
/// doing so can result in application failures when updating to a new Entity Framework Core release.
94+
/// </summary>
95+
protected override bool IsCollectionTable(TableExpressionBase table, [NotNullWhen(true)] out Expression? collection)
96+
{
97+
if (table is TableValuedFunctionExpression { Name: "json_each", Schema: null, IsBuiltIn: true, Arguments: [var argument] })
98+
{
99+
collection = argument;
100+
return true;
101+
}
102+
103+
return base.IsCollectionTable(table, out collection);
104+
}
105+
106+
/// <summary>
107+
/// This is an internal API that supports the Entity Framework Core infrastructure and not subject to
108+
/// the same compatibility standards as public APIs. It may be changed or removed without notice in
109+
/// any release. You should only use it directly in your code with extreme caution and knowing that
110+
/// doing so can result in application failures when updating to a new Entity Framework Core release.
111+
/// </summary>
112+
protected override TableExpressionBase UpdateParameterCollection(
113+
TableExpressionBase table,
114+
SqlParameterExpression newCollectionParameter)
115+
=> table is TableValuedFunctionExpression { Arguments: [SqlParameterExpression] } jsonEachExpression
116+
? jsonEachExpression.Update(new[] { newCollectionParameter })
117+
: base.UpdateParameterCollection(table, newCollectionParameter);
118+
#pragma warning restore EF1001
86119
}

test/EFCore.Relational.Specification.Tests/Query/NullSemanticsQueryTestBase.cs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,6 +1244,8 @@ await AssertQueryScalar(
12441244

12451245
#endregion Contains with subquery
12461246

1247+
// For more tests on Contains with parameterized collections, see PrimitiveCollectionsqueryTestBase
1248+
12471249
#region Contains with inline collection
12481250

12491251
[ConditionalTheory]
@@ -1260,7 +1262,7 @@ await AssertQueryScalar(
12601262

12611263
[ConditionalTheory]
12621264
[MemberData(nameof(IsAsyncData))]
1263-
public virtual async Task Null_semantics_contains_with_non_nullable_item_and_inline_non_nullable_values_with_null(bool async)
1265+
public virtual async Task Null_semantics_contains_with_non_nullable_item_and_inline_values_with_null(bool async)
12641266
{
12651267
await AssertQueryScalar(
12661268
async, ss => ss.Set<NullSemanticsEntity1>()
@@ -1272,7 +1274,7 @@ await AssertQueryScalar(
12721274

12731275
[ConditionalTheory]
12741276
[MemberData(nameof(IsAsyncData))]
1275-
public virtual async Task Null_semantics_contains_with_non_nullable_item_and_inline_nullable_values(bool async)
1277+
public virtual async Task Null_semantics_contains_with_non_nullable_item_and_inline_values_with_nullable_column(bool async)
12761278
{
12771279
await AssertQueryScalar(
12781280
async, ss => ss.Set<NullSemanticsEntity1>()
@@ -1284,7 +1286,7 @@ await AssertQueryScalar(
12841286

12851287
[ConditionalTheory]
12861288
[MemberData(nameof(IsAsyncData))]
1287-
public virtual async Task Null_semantics_contains_with_non_nullable_item_and_inline_nullable_values_with_null(bool async)
1289+
public virtual async Task Null_semantics_contains_with_non_nullable_item_and_inline_values_with_nullable_column_and_null(bool async)
12881290
{
12891291
await AssertQueryScalar(
12901292
async, ss => ss.Set<NullSemanticsEntity1>()
@@ -1308,7 +1310,7 @@ await AssertQueryScalar(
13081310

13091311
[ConditionalTheory]
13101312
[MemberData(nameof(IsAsyncData))]
1311-
public virtual async Task Null_semantics_contains_with_nullable_item_and_inline_non_nullable_values_with_null(bool async)
1313+
public virtual async Task Null_semantics_contains_with_nullable_item_and_inline_values_with_null(bool async)
13121314
{
13131315
await AssertQueryScalar(
13141316
async, ss => ss.Set<NullSemanticsEntity1>()
@@ -1320,7 +1322,7 @@ await AssertQueryScalar(
13201322

13211323
[ConditionalTheory]
13221324
[MemberData(nameof(IsAsyncData))]
1323-
public virtual async Task Null_semantics_contains_with_nullable_item_and_inline_nullable_values(bool async)
1325+
public virtual async Task Null_semantics_contains_with_nullable_item_and_inline_values_with_nullable_column(bool async)
13241326
{
13251327
await AssertQueryScalar(
13261328
async, ss => ss.Set<NullSemanticsEntity1>()
@@ -1332,7 +1334,7 @@ await AssertQueryScalar(
13321334

13331335
[ConditionalTheory]
13341336
[MemberData(nameof(IsAsyncData))]
1335-
public virtual async Task Null_semantics_contains_with_nullable_item_and_inline_nullable_values_with_null(bool async)
1337+
public virtual async Task Null_semantics_contains_with_nullable_item_and_values_with_nullable_column_and_null(bool async)
13361338
{
13371339
await AssertQueryScalar(
13381340
async, ss => ss.Set<NullSemanticsEntity1>()

0 commit comments

Comments
 (0)