@@ -699,7 +699,7 @@ protected virtual SqlExpression VisitIn(InExpression inExpression, bool allowOpt
699699 return inExpression ;
700700
701701 case ( true , false ) :
702- {
702+ NullableItemWithNonNullableProjection :
703703 // If the item is actually null (not just nullable) and the projection is non-nullable, just return false immediately:
704704 // WHERE NULL IN (SELECT NonNullable FROM foo) -> false
705705 if ( IsNull ( item ) )
@@ -714,7 +714,6 @@ protected virtual SqlExpression VisitIn(InExpression inExpression, bool allowOpt
714714 return allowOptimizedExpansion
715715 ? inExpression
716716 : _sqlExpressionFactory . AndAlso ( inExpression , _sqlExpressionFactory . IsNotNull ( item ) ) ;
717- }
718717
719718 case ( false , true ) :
720719 {
@@ -727,6 +726,14 @@ protected virtual SqlExpression VisitIn(InExpression inExpression, bool allowOpt
727726 return inExpression ;
728727 }
729728
729+ // If the subquery happens to be a primitive collection (e.g. OPENJSON), pull out the null values from the parameter.
730+ // Since the item is non-nullable, it can never match those null values, and all they do is cause the IN expression
731+ // to return NULL if the item isn't found. So just remove them.
732+ if ( TryMakeNonNullable ( subquery , out var nonNullableSubquery , out _ ) )
733+ {
734+ return inExpression . Update ( item , nonNullableSubquery ) ;
735+ }
736+
730737 // On SQL Server, EXISTS isn't less efficient than IN, and the additional COALESCE (and CASE/WHEN which it requires)
731738 // add unneeded clutter (and possibly hurt perf). So allow providers to prefer EXISTS.
732739 if ( PreferExistsToInWithCoalesce )
@@ -738,13 +745,46 @@ protected virtual SqlExpression VisitIn(InExpression inExpression, bool allowOpt
738745 }
739746
740747 case ( true , true ) :
741- TransformToExists :
742- // Worst case: both sides are nullable; there's no way to distinguish the item was found or not.
743- // We rewrite to an EXISTS subquery where we can generate a precise predicate to check for what we need. Note that this
744- // performs (significantly) worse than an IN expression, since it involves a correlated subquery.
748+ // Worst case: both sides are nullable; that means that with IN, there's no way to distinguish between:
749+ // a) The item was NULL and was found (e.g. NULL IN (1, 2, NULL) should yield true), and
750+ // b) The item wasn't found (e.g. 3 IN (1, 2, NULL) should yield false)
751+
752+ // As a last resort, we can rewrite to an EXISTS subquery where we can generate a precise predicate to check for what we
753+ // need. This unfortunately performs (significantly) worse than an IN expression, since it involves a correlated
754+ // subquery, and can cause indexes to not get used.
755+
756+ // But before doing this, we check whether the subquery represents a simple parameterized collection (e.g. a bare
757+ // OPENJSON call over a parameter in SQL Server), and if it is, rewrite the parameter to remove nulls so we can keep
758+ // using IN.
759+ if ( TryMakeNonNullable ( subquery , out var nonNullableSubquery2 , out var foundNull ) )
760+ {
761+ inExpression = inExpression . Update ( item , nonNullableSubquery2 ) ;
762+
763+ if ( ! foundNull . Value )
764+ {
765+ // There weren't any actual nulls inside the parameterized collection - we can jump to the case which handles
766+ // that.
767+ goto NullableItemWithNonNullableProjection ;
768+ }
769+
770+ // Nulls were found inside the parameterized collection, and removed. If the item is a null constant, just convert
771+ // the whole thing to true.
772+ if ( IsNull ( item ) )
773+ {
774+ return _sqlExpressionFactory . Constant ( true , inExpression . TypeMapping ) ;
775+ }
776+
777+ // Otherwise we now need to compensate for the removed nulls outside, by adding OR item IS NULL.
778+ // Note that this is safe in negated (non-optimized) contexts:
779+ // WHERE item NOT IN ("foo", "bar") AND item IS NOT NULL
780+ // When item is NULL, the item IS NOT NULL clause causes the whole thing to return false. Otherwise that clause
781+ // can be ignored, and we have non-null item IN non-null list-of-values.
782+ return _sqlExpressionFactory . OrElse ( inExpression , _sqlExpressionFactory . IsNull ( item ) ) ;
783+ }
745784
746- // We'll need to mutate the subquery to introduce the predicate inside it, but it might be referenced by other places in
747- // the tree, so we create a copy to work on.
785+ TransformToExists :
786+ // We unfortunately need to rewrite to EXISTS. We'll need to mutate the subquery to introduce the predicate inside it,
787+ // but it might be referenced by other places in the tree, so we create a copy to work on.
748788
749789 // No need for a projection with EXISTS, clear it to get SELECT 1
750790 subquery = subquery . Update (
@@ -1953,6 +1993,133 @@ static bool TryNegate(ExpressionType expressionType, out ExpressionType result)
19531993 }
19541994 }
19551995
1996+ /// <summary>
1997+ /// Attempts to convert the given <paramref name="selectExpression" />, which has a nullable projection, to an identical expression
1998+ /// which does not have a nullable projection. This is used to extract NULLs out of e.g. the parameter argument of SQL Server
1999+ /// OPENJSON, in order to allow a more efficient translation.
2000+ /// </summary>
2001+ [ EntityFrameworkInternal ]
2002+ protected virtual bool TryMakeNonNullable (
2003+ SelectExpression selectExpression ,
2004+ [ NotNullWhen ( true ) ] out SelectExpression ? rewrittenSelectExpression ,
2005+ [ NotNullWhen ( true ) ] out bool ? foundNull )
2006+ {
2007+ if ( selectExpression is
2008+ {
2009+ Tables : [ var collectionTable ] ,
2010+ GroupBy : [ ] ,
2011+ Having : null ,
2012+ Limit : null ,
2013+ Offset : null ,
2014+ Predicate : null ,
2015+ // Note that a orderings and distinct are OK - they don't interact with our null removal.
2016+ // We exclude the predicate since it may actually filter out nulls
2017+ Projection : [ { Expression : ColumnExpression projectedColumn } ] projection
2018+ }
2019+ && projectedColumn . Table == collectionTable
2020+ && IsCollectionTable ( collectionTable , out var collection )
2021+ && collection is SqlParameterExpression collectionParameter
2022+ && ParameterValues [ collectionParameter . Name ] is IList values )
2023+ {
2024+ // We're looking at a parameter beyond its simple nullability, so we can't use the 2nd-level cache for this query.
2025+ DoNotCache ( ) ;
2026+
2027+ IList ? processedValues = null ;
2028+
2029+ for ( var i = 0 ; i < values . Count ; i ++ )
2030+ {
2031+ var value = values [ i ] ;
2032+
2033+ if ( value is null )
2034+ {
2035+ if ( processedValues is null )
2036+ {
2037+ var elementClrType = values . GetType ( ) . GetSequenceType ( ) ;
2038+ processedValues = ( IList ) Activator . CreateInstance ( typeof ( List < > ) . MakeGenericType ( elementClrType ) , values . Count ) ! ;
2039+ for ( var j = 0 ; j < i ; j ++ )
2040+ {
2041+ processedValues . Add ( values [ j ] ! ) ;
2042+ }
2043+ }
2044+
2045+ // Skip the value
2046+ continue ;
2047+ }
2048+
2049+ processedValues ? . Add ( value ) ;
2050+ }
2051+
2052+ if ( processedValues is null )
2053+ {
2054+ // No null was found in the parameter's elements - the select expression is already non-nullable.
2055+ // TODO: We should change the project column to be non-nullable, but it's too closed down for that.
2056+ rewrittenSelectExpression = selectExpression ;
2057+ foundNull = false ;
2058+ return true ;
2059+ }
2060+
2061+ foundNull = true ;
2062+
2063+ // TODO: We currently only have read-only access to the parameter values in the nullability processor (and in all of the
2064+ // 2nd-level query pipeline); to need to flow the mutable dictionary in. Note that any modification of parameter values (as
2065+ // here) must immediately entail DoNotCache().
2066+ Check . DebugAssert ( ParameterValues is Dictionary < string , object ? > , "ParameterValues isn't a Dictionary" ) ;
2067+ if ( ParameterValues is not Dictionary < string , object ? > mutableParameterValues )
2068+ {
2069+ rewrittenSelectExpression = null ;
2070+ foundNull = null ;
2071+ return false ;
2072+ }
2073+
2074+ var rewrittenParameter = new SqlParameterExpression (
2075+ collectionParameter . Name + "_without_nulls" , collectionParameter . Type , collectionParameter . TypeMapping ) ;
2076+ mutableParameterValues [ rewrittenParameter . Name ] = processedValues ;
2077+ var rewrittenCollectionTable = UpdateParameterCollection ( collectionTable , rewrittenParameter ) ;
2078+
2079+ // We clone the select expression since Update below doesn't create a pure copy, mutating the original as well (because of
2080+ // TableReferenceExpression). TODO: Remove this after #31327.
2081+ #pragma warning disable EF1001
2082+ rewrittenSelectExpression = selectExpression . Clone ( ) ;
2083+ #pragma warning restore EF1001
2084+
2085+ rewrittenSelectExpression = rewrittenSelectExpression . Update (
2086+ projection , // TODO: We should change the project column to be non-nullable, but it's too closed down for that.
2087+ new [ ] { rewrittenCollectionTable } ,
2088+ selectExpression . Predicate ,
2089+ selectExpression . GroupBy ,
2090+ selectExpression . Having ,
2091+ selectExpression . Orderings ,
2092+ selectExpression . Limit ,
2093+ selectExpression . Offset ) ;
2094+
2095+ return true ;
2096+ }
2097+
2098+ rewrittenSelectExpression = null ;
2099+ foundNull = null ;
2100+ return false ;
2101+ }
2102+
2103+ /// <summary>
2104+ /// A provider hook for identifying a <see cref="TableExpressionBase" /> which represents a collection, e.g. OPENJSON on SQL Server.
2105+ /// </summary>
2106+ [ EntityFrameworkInternal ]
2107+ protected virtual bool IsCollectionTable ( TableExpressionBase table , [ NotNullWhen ( true ) ] out Expression ? collection )
2108+ {
2109+ collection = null ;
2110+ return false ;
2111+ }
2112+
2113+ /// <summary>
2114+ /// Given a <see cref="TableExpressionBase" /> which was previously identified to be a parameterized collection table (e.g.
2115+ /// OPENJSON on SQL Server, see <see cref="IsCollectionTable" />), replaces the parameter for that table.
2116+ /// </summary>
2117+ [ EntityFrameworkInternal ]
2118+ protected virtual TableExpressionBase UpdateParameterCollection (
2119+ TableExpressionBase table ,
2120+ SqlParameterExpression newCollectionParameter )
2121+ => throw new InvalidOperationException ( ) ;
2122+
19562123 private SqlExpression ProcessNullNotNull ( SqlUnaryExpression sqlUnaryExpression , bool operandNullable )
19572124 {
19582125 if ( ! operandNullable )
0 commit comments