From e7b2a8abe0702c68761913c275c366f27a80fec7 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Thu, 20 Feb 2025 22:20:52 +0530 Subject: [PATCH 1/6] Specialize Diagonal * Adjoint (#1207) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reinstates slightly altered versions of the methods that were removed in https://github.com/JuliaLang/julia/pull/52389. Sort of fixes #1205, although this doesn't recover the full performance. However, this version is more general, and works with the example presented in https://github.com/JuliaLang/julia/pull/52389. There's still a performance regression, but the full performance may only be obtained for mutable matrices, and we may not assume mutability in general. Performance: v1.10: ```julia julia> n = 100 100 julia> A = adjoint(sparse(Float64, I, n, n)); julia> B = Diagonal(ones(n)); julia> @btime $A * $B; 837.119 ns (5 allocations: 2.59 KiB) ``` This PR ```julia julia> @btime $A * $B; 1.106 μs (15 allocations: 5.56 KiB) ``` We need double the allocations here compared to earlier, as we firstly materialize `D' * A'`, and then we again copy the adjoint of this result. I wonder if this may be reduced. --------- Co-authored-by: Daniel Karrasch --- src/adjtrans.jl | 4 ++-- src/diagonal.jl | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/adjtrans.jl b/src/adjtrans.jl index f7caa9bc..d81aa3ae 100644 --- a/src/adjtrans.jl +++ b/src/adjtrans.jl @@ -319,8 +319,8 @@ const AdjointAbsVec{T} = Adjoint{T,<:AbstractVector} const AdjointAbsMat{T} = Adjoint{T,<:AbstractMatrix} const TransposeAbsVec{T} = Transpose{T,<:AbstractVector} const TransposeAbsMat{T} = Transpose{T,<:AbstractMatrix} -const AdjOrTransAbsVec{T} = AdjOrTrans{T,<:AbstractVector} -const AdjOrTransAbsMat{T} = AdjOrTrans{T,<:AbstractMatrix} +const AdjOrTransAbsVec{T,V<:AbstractVector} = AdjOrTrans{T,V} +const AdjOrTransAbsMat{T,M<:AbstractMatrix} = AdjOrTrans{T,M} # for internal use below wrapperop(_) = identity diff --git a/src/diagonal.jl b/src/diagonal.jl index 9f8d54e5..9334f2c6 100644 --- a/src/diagonal.jl +++ b/src/diagonal.jl @@ -332,6 +332,28 @@ function (*)(D::Diagonal, V::AbstractVector) return D.diag .* V end +function _diag_adj_mul(A::AdjOrTransAbsMat, D::Diagonal) + adj = wrapperop(A) + copy(adj(adj(D) * adj(A))) +end +function _diag_adj_mul(A::AdjOrTransAbsMat{<:Number, <:StridedMatrix}, D::Diagonal{<:Number}) + @invoke *(A::AbstractMatrix, D::AbstractMatrix) +end +function _diag_adj_mul(D::Diagonal, A::AdjOrTransAbsMat) + adj = wrapperop(A) + copy(adj(adj(A) * adj(D))) +end +function _diag_adj_mul(D::Diagonal{<:Number}, A::AdjOrTransAbsMat{<:Number, <:StridedMatrix}) + @invoke *(D::AbstractMatrix, A::AbstractMatrix) +end + +function (*)(A::AdjOrTransAbsMat, D::Diagonal) + _diag_adj_mul(A, D) +end +function (*)(D::Diagonal, A::AdjOrTransAbsMat) + _diag_adj_mul(D, A) +end + function rmul!(A::AbstractMatrix, D::Diagonal) matmul_size_check(size(A), size(D)) for I in CartesianIndices(A) From 0745131bb035f018bdb0239a74175e8a8cd883dd Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Wed, 5 Feb 2025 12:06:07 +0100 Subject: [PATCH 2/6] Use `BLAS.trsm!` instead of `LAPACK.trtrs!` in left-triangular solves (#1194) Co-authored-by: Alexis Montoison --- src/blas.jl | 13 ++++++++++++- src/triangular.jl | 8 +++++--- test/testtriag.jl | 2 ++ test/triangular.jl | 7 ++++++- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/blas.jl b/src/blas.jl index 3c156300..04e590c1 100644 --- a/src/blas.jl +++ b/src/blas.jl @@ -84,7 +84,8 @@ export trsm!, trsm -using ..LinearAlgebra: libblastrampoline, BlasReal, BlasComplex, BlasFloat, BlasInt, DimensionMismatch, checksquare, chkstride1 +using ..LinearAlgebra: libblastrampoline, BlasReal, BlasComplex, BlasFloat, BlasInt, + DimensionMismatch, checksquare, chkstride1, SingularException include("lbt.jl") @@ -1369,6 +1370,11 @@ for (fname, elty) in ((:dtrsv_,:Float64), throw(DimensionMismatch(lazy"size of A is $n != length(x) = $(length(x))")) end chkstride1(A) + if diag == 'N' + for i in 1:n + iszero(A[i,i]) && throw(SingularException(i)) + end + end px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed")) GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, @@ -2217,6 +2223,11 @@ for (mmname, smname, elty) in end chkstride1(A) chkstride1(B) + if diag == 'N' + for i in 1:k + iszero(A[i,i]) && throw(SingularException(i)) + end + end ccall((@blasfunc($smname), libblastrampoline), Cvoid, (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, diff --git a/src/triangular.jl b/src/triangular.jl index 5c6b5188..3faa12da 100644 --- a/src/triangular.jl +++ b/src/triangular.jl @@ -1223,11 +1223,13 @@ function generic_mattrimul!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, end end # division -function generic_trimatdiv!(C::StridedVecOrMat{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} +generic_trimatdiv!(C::StridedVector{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, B::AbstractVector{T}) where {T<:BlasFloat} = + BLAS.trsv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, C === B ? C : copyto!(C, B)) +function generic_trimatdiv!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, B::AbstractMatrix{T}) where {T<:BlasFloat} if stride(C,1) == stride(A,1) == 1 - LAPACK.trtrs!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, C === B ? C : copyto!(C, B)) + BLAS.trsm!('L', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, C === B ? C : copyto!(C, B)) else # incompatible with LAPACK - @invoke generic_trimatdiv!(C::AbstractVecOrMat, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractVecOrMat) + @invoke generic_trimatdiv!(C::AbstractVecOrMat, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractMatrix) end end function generic_mattridiv!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::AbstractMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} diff --git a/test/testtriag.jl b/test/testtriag.jl index 7542d843..6ed7a0d6 100644 --- a/test/testtriag.jl +++ b/test/testtriag.jl @@ -493,6 +493,8 @@ function test_triangular(elty1_types) @test_throws DimensionMismatch transpose(Ann) \ bm if t1 == UpperTriangular || t1 == LowerTriangular @test_throws SingularException ldiv!(t1(zeros(elty1, n, n)), fill(eltyB(1), n)) + @test_throws SingularException ldiv!(t1(zeros(elty1, n, n)), fill(eltyB(1), n, 2)) + @test_throws SingularException rdiv!(fill(eltyB(1), n, n), t1(zeros(elty1, n, n))) end @test B / A1 ≈ B / M1 @test B / transpose(A1) ≈ B / transpose(M1) diff --git a/test/triangular.jl b/test/triangular.jl index a5fa45d0..8eeeb666 100644 --- a/test/triangular.jl +++ b/test/triangular.jl @@ -886,8 +886,13 @@ end end end -@testset "(l/r)mul! and (l/r)div! for non-contiguous matrices" begin +@testset "(l/r)mul! and (l/r)div! for non-contiguous arrays" begin U = UpperTriangular(reshape(collect(3:27.0),5,5)) + b = float.(1:10) + b2 = copy(b); b2v = view(b2, 1:2:9); b2vc = copy(b2v) + @test lmul!(U, b2v) == lmul!(U, b2vc) + b2 = copy(b); b2v = view(b2, 1:2:9); b2vc = copy(b2v) + @test ldiv!(U, b2v) ≈ ldiv!(U, b2vc) B = float.(collect(reshape(1:100, 10,10))) B2 = copy(B); B2v = view(B2, 1:2:9, 1:5); B2vc = copy(B2v) @test lmul!(U, B2v) == lmul!(U, B2vc) From 9b6fe735ca7f32124fa0d7bfad01d103e1ca0517 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 10 Feb 2025 00:08:29 -0500 Subject: [PATCH 3/6] Explicitly declare type constructor imports (#1196) These would otherwise give a warning after https://github.com/JuliaLang/julia/pull/57311, but are a good change even independently. --------- Co-authored-by: Daniel Karrasch (cherry picked from commit 2a1696a93683e947b46991ea454c4ffb1988e794) --- src/LinearAlgebra.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/LinearAlgebra.jl b/src/LinearAlgebra.jl index fc1081e0..2c76dcdc 100644 --- a/src/LinearAlgebra.jl +++ b/src/LinearAlgebra.jl @@ -16,6 +16,7 @@ import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, as permutedims, permuterows!, power_by_squaring, promote_rule, real, sec, sech, setindex!, show, similar, sin, sincos, sinh, size, sqrt, strides, stride, tan, tanh, transpose, trunc, typed_hcat, vec, view, zero +import Base: AbstractArray, AbstractMatrix, Array, Matrix using Base: IndexLinear, promote_eltype, promote_op, print_matrix, @propagate_inbounds, reduce, typed_hvcat, typed_vcat, require_one_based_indexing, splat, BitInteger From 87542a7f88f7ef749d2e1d2460dc33da0d0a8f08 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 14 Feb 2025 03:01:23 +0100 Subject: [PATCH 4/6] Add fast path in generic matmul (#1202) This manually adds the critical optimisation investigated in https://github.com/JuliaLang/julia/issues/56954. While we could rely on LLVM to continue doing this optimisation, it's more robust to add it manually. (cherry picked from commit ed35a377cd3945c91654cabc0bac9cbed2d67a0e) --- src/matmul.jl | 1 + test/matmul.jl | 1 + 2 files changed, 2 insertions(+) diff --git a/src/matmul.jl b/src/matmul.jl index 76719c53..21c6e299 100644 --- a/src/matmul.jl +++ b/src/matmul.jl @@ -1021,6 +1021,7 @@ function _generic_matmatmul_nonadjtrans!(C, A, B, alpha, beta) @inbounds for n in axes(B, 2), k in axes(B, 1) # Balpha = B[k,n] * alpha, but we skip the multiplication in case isone(alpha) Balpha = @stable_muladdmul MulAddMul(alpha, false)(B[k,n]) + !ismissing(Balpha) && iszero(Balpha) && continue @simd for m in axes(A, 1) C[m,n] = muladd(A[m,k], Balpha, C[m,n]) end diff --git a/test/matmul.jl b/test/matmul.jl index 805edeac..afa730f1 100644 --- a/test/matmul.jl +++ b/test/matmul.jl @@ -767,6 +767,7 @@ import LinearAlgebra: Adjoint, Transpose (*)(x::RootInt, y::Integer) = x.i * y adjoint(x::RootInt) = x transpose(x::RootInt) = x +Base.zero(::RootInt) = RootInt(0) @test Base.promote_op(*, RootInt, RootInt) === Int From 37aa43ad700967a6bd8f9fc324705c28f8b695bc Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Mon, 17 Feb 2025 07:01:08 +0530 Subject: [PATCH 5/6] Restrict Diagonal sqrt branch to positive diag (#1203) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As noted in https://github.com/JuliaLang/LinearAlgebra.jl/issues/1193, the `sqrt(::Digonal{<:Real})` method requires the diagonal element to be positive, so that `sqrt` is defined for the individual elements. We therefore may restrict the diagonal branch in the dense `sqrt` method to matrices with positive `diag` if the `eltype` is `Real`. Fixes #1193 After this, ```julia julia> A = diagm(0 => [1.0, -1.0]) 2×2 Matrix{Float64}: 1.0 0.0 0.0 -1.0 julia> sqrt(A) 2×2 Matrix{ComplexF64}: 1.0+0.0im 0.0+0.0im 0.0+0.0im 0.0+1.0im ``` (cherry picked from commit 16d9d61d67419939f5528870eff4aacb3dd3afe5) --- src/dense.jl | 3 ++- test/dense.jl | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/dense.jl b/src/dense.jl index ccf6bc80..234391e5 100644 --- a/src/dense.jl +++ b/src/dense.jl @@ -972,7 +972,8 @@ sqrt(::AbstractMatrix) function sqrt(A::AbstractMatrix{T}) where {T<:Union{Real,Complex}} if checksquare(A) == 0 return copy(float(A)) - elseif isdiag(A) + elseif isdiag(A) && (T <: Complex || all(x -> x ≥ zero(x), diagview(A))) + # Real Diagonal sqrt requires each diagonal element to be positive return applydiagonal(sqrt, A) elseif ishermitian(A) sqrtHermA = sqrt(Hermitian(A)) diff --git a/test/dense.jl b/test/dense.jl index 1f953ea9..1d4b330c 100644 --- a/test/dense.jl +++ b/test/dense.jl @@ -984,6 +984,12 @@ end @testset "sqrt for diagonal" begin A = diagm(0 => [1, 2, 3]) @test sqrt(A)^2 ≈ A + + A = diagm(0 => [1.0, -1.0]) + @test sqrt(A) == diagm(0 => [1.0, 1.0im]) + @test sqrt(A)^2 ≈ A + B = im*A + @test sqrt(B)^2 ≈ B end @testset "issue #40141" begin From 6b5bdf2cdb2cae8ce00cd9498c0155481a46be7e Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Thu, 20 Feb 2025 16:50:21 +0530 Subject: [PATCH 6/6] Indirection in matrix multiplication to avoid ambiguities (#1210) (cherry picked from commit 5cf41c438b9014e2b9607b60a20140b4bb0a97da) --- src/diagonal.jl | 30 ++++++++++++++++-------------- src/matmul.jl | 4 +++- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/diagonal.jl b/src/diagonal.jl index 9334f2c6..8ded64ad 100644 --- a/src/diagonal.jl +++ b/src/diagonal.jl @@ -693,22 +693,24 @@ end for Tri in (:UpperTriangular, :LowerTriangular) UTri = Symbol(:Unit, Tri) # 2 args - for (fun, f) in zip((:*, :rmul!, :rdiv!, :/), (:identity, :identity, :inv, :inv)) - @eval $fun(A::$Tri, D::Diagonal) = $Tri($fun(A.data, D)) - @eval $fun(A::$UTri, D::Diagonal) = $Tri(_setdiag!($fun(A.data, D), $f, D.diag)) + for (fun, f) in zip((:mul, :rmul!, :rdiv!, :/), (:identity, :identity, :inv, :inv)) + g = fun == :mul ? :* : fun + @eval $fun(A::$Tri, D::Diagonal) = $Tri($g(A.data, D)) + @eval $fun(A::$UTri, D::Diagonal) = $Tri(_setdiag!($g(A.data, D), $f, D.diag)) end - @eval *(A::$Tri{<:Any, <:StridedMaybeAdjOrTransMat}, D::Diagonal) = - @invoke *(A::AbstractMatrix, D::Diagonal) - @eval *(A::$UTri{<:Any, <:StridedMaybeAdjOrTransMat}, D::Diagonal) = - @invoke *(A::AbstractMatrix, D::Diagonal) - for (fun, f) in zip((:*, :lmul!, :ldiv!, :\), (:identity, :identity, :inv, :inv)) - @eval $fun(D::Diagonal, A::$Tri) = $Tri($fun(D, A.data)) - @eval $fun(D::Diagonal, A::$UTri) = $Tri(_setdiag!($fun(D, A.data), $f, D.diag)) + @eval mul(A::$Tri{<:Any, <:StridedMaybeAdjOrTransMat}, D::Diagonal) = + @invoke mul(A::AbstractMatrix, D::Diagonal) + @eval mul(A::$UTri{<:Any, <:StridedMaybeAdjOrTransMat}, D::Diagonal) = + @invoke mul(A::AbstractMatrix, D::Diagonal) + for (fun, f) in zip((:mul, :lmul!, :ldiv!, :\), (:identity, :identity, :inv, :inv)) + g = fun == :mul ? :* : fun + @eval $fun(D::Diagonal, A::$Tri) = $Tri($g(D, A.data)) + @eval $fun(D::Diagonal, A::$UTri) = $Tri(_setdiag!($g(D, A.data), $f, D.diag)) end - @eval *(D::Diagonal, A::$Tri{<:Any, <:StridedMaybeAdjOrTransMat}) = - @invoke *(D::Diagonal, A::AbstractMatrix) - @eval *(D::Diagonal, A::$UTri{<:Any, <:StridedMaybeAdjOrTransMat}) = - @invoke *(D::Diagonal, A::AbstractMatrix) + @eval mul(D::Diagonal, A::$Tri{<:Any, <:StridedMaybeAdjOrTransMat}) = + @invoke mul(D::Diagonal, A::AbstractMatrix) + @eval mul(D::Diagonal, A::$UTri{<:Any, <:StridedMaybeAdjOrTransMat}) = + @invoke mul(D::Diagonal, A::AbstractMatrix) # 3-arg ldiv! @eval ldiv!(C::$Tri, D::Diagonal, A::$Tri) = $Tri(ldiv!(C.data, D, A.data)) @eval ldiv!(C::$Tri, D::Diagonal, A::$UTri) = $Tri(_setdiag!(ldiv!(C.data, D, A.data), inv, D.diag)) diff --git a/src/matmul.jl b/src/matmul.jl index 21c6e299..84a83d3a 100644 --- a/src/matmul.jl +++ b/src/matmul.jl @@ -111,7 +111,9 @@ julia> [1 1; 0 1] * [1 0; 1 1] 1 1 ``` """ -function (*)(A::AbstractMatrix, B::AbstractMatrix) +(*)(A::AbstractMatrix, B::AbstractMatrix) = mul(A, B) +# we add an extra level of indirection to avoid ambiguities in * +function mul(A::AbstractMatrix, B::AbstractMatrix) TS = promote_op(matprod, eltype(A), eltype(B)) mul!(matprod_dest(A, B, TS), A, B) end