diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index ae8b9d461ffc2..82428e6bdbadc 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -17,7 +17,7 @@ import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, as strides, stride, tan, tanh, transpose, trunc, typed_hcat, vec, zero using Base: IndexLinear, promote_eltype, promote_op, promote_typeof, @propagate_inbounds, @pure, reduce, typed_hvcat, typed_vcat, require_one_based_indexing, - splat + splat, has_offset_axes, axes1 using Base.Broadcast: Broadcasted, broadcasted import Libdl diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl index 0cbfeaf9ed3ea..04256fed31aa0 100644 --- a/stdlib/LinearAlgebra/src/matmul.jl +++ b/stdlib/LinearAlgebra/src/matmul.jl @@ -44,7 +44,7 @@ end function (*)(A::StridedMatrix{T}, x::StridedVector{S}) where {T<:BlasFloat,S<:Real} TS = promote_op(matprod, T, S) y = isconcretetype(TS) ? convert(AbstractVector{TS}, x) : x - mul!(similar(x, TS, size(A,1)), A, y) + mul!(similar(x, TS, axes(A,1)), A, y) end function (*)(A::AbstractMatrix{T}, x::AbstractVector{S}) where {T,S} TS = promote_op(matprod, T, S) @@ -73,30 +73,30 @@ for elty in (Float32,Float64) end @inline mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector, alpha::Number, beta::Number) = - generic_matvecmul!(y, 'N', A, x, MulAddMul(alpha, beta)) + generic_matvecmul!(y, identity, A, x, alpha, beta) function *(tA::Transpose{<:Any,<:StridedMatrix{T}}, x::StridedVector{S}) where {T<:BlasFloat,S} TS = promote_op(matprod, T, S) - mul!(similar(x, TS, size(tA, 1)), tA, convert(AbstractVector{TS}, x)) + mul!(similar(x, TS, axes(tA, 1)), tA, convert(AbstractVector{TS}, x)) end function *(tA::Transpose{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where {T,S} TS = promote_op(matprod, T, S) - mul!(similar(x, TS, size(tA, 1)), tA, x) + mul!(similar(x, TS, axes(tA, 1)), tA, x) end @inline mul!(y::StridedVector{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T}, alpha::Number, beta::Number) where {T<:BlasFloat} = gemv!(y, 'T', tA.parent, x, alpha, beta) @inline mul!(y::AbstractVector, tA::Transpose{<:Any,<:AbstractVecOrMat}, x::AbstractVector, alpha::Number, beta::Number) = - generic_matvecmul!(y, 'T', tA.parent, x, MulAddMul(alpha, beta)) + generic_matvecmul!(y, transpose, tA.parent, x, alpha, beta) function *(adjA::Adjoint{<:Any,<:StridedMatrix{T}}, x::StridedVector{S}) where {T<:BlasFloat,S} TS = promote_op(matprod, T, S) - mul!(similar(x, TS, size(adjA, 1)), adjA, convert(AbstractVector{TS}, x)) + mul!(similar(x, TS, axes(adjA, 1)), adjA, convert(AbstractVector{TS}, x)) end function *(adjA::Adjoint{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where {T,S} TS = promote_op(matprod, T, S) - mul!(similar(x, TS, size(adjA, 1)), adjA, x) + mul!(similar(x, TS, axes(adjA, 1)), adjA, x) end @inline mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T}, @@ -107,7 +107,7 @@ end gemv!(y, 'C', adjA.parent, x, alpha, beta) @inline mul!(y::AbstractVector, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, x::AbstractVector, alpha::Number, beta::Number) = - generic_matvecmul!(y, 'C', adjA.parent, x, MulAddMul(alpha, beta)) + generic_matvecmul!(y, adjoint, adjA.parent, x, alpha, beta) # Vector-Matrix multiplication (*)(x::AdjointAbsVec, A::AbstractMatrix) = (A'*x')' @@ -130,18 +130,18 @@ julia> [1 1; 0 1] * [1 0; 1 1] """ function (*)(A::AbstractMatrix, B::AbstractMatrix) TS = promote_op(matprod, eltype(A), eltype(B)) - mul!(similar(B, TS, (size(A,1), size(B,2))), A, B) + mul!(similar(B, TS, (axes(A,1), axes(B,2))), A, B) end # optimization for dispatching to BLAS, e.g. *(::Matrix{Float32}, ::Matrix{Float64}) # but avoiding the case *(::Matrix{<:BlasComplex}, ::Matrix{<:BlasReal}) # which is better handled by reinterpreting rather than promotion function (*)(A::StridedMatrix{<:BlasReal}, B::StridedMatrix{<:BlasFloat}) TS = promote_type(eltype(A), eltype(B)) - mul!(similar(B, TS, (size(A,1), size(B,2))), convert(AbstractArray{TS}, A), convert(AbstractArray{TS}, B)) + mul!(similar(B, TS, (axes(A,1), axes(B,2))), convert(AbstractArray{TS}, A), convert(AbstractArray{TS}, B)) end function (*)(A::StridedMatrix{<:BlasComplex}, B::StridedMatrix{<:BlasComplex}) TS = promote_type(eltype(A), eltype(B)) - mul!(similar(B, TS, (size(A,1), size(B,2))), convert(AbstractArray{TS}, A), convert(AbstractArray{TS}, B)) + mul!(similar(B, TS, (axes(A,1), axes(B,2))), convert(AbstractArray{TS}, A), convert(AbstractArray{TS}, B)) end @inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, B::StridedVecOrMat{T}, @@ -280,7 +280,7 @@ julia> C """ @inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, B::AbstractVecOrMat, alpha::Number, beta::Number) = - generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) + generic_matmatmul!(C, identity, identity, A, B, alpha, beta) """ rmul!(A, B) @@ -359,7 +359,7 @@ lmul!(A, B) end @inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat, alpha::Number, beta::Number) = - generic_matmatmul!(C, 'T', 'N', tA.parent, B, MulAddMul(alpha, beta)) + generic_matmatmul!(C, transpose, identity, tA.parent, B, alpha, beta) @inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, tB::Transpose{<:Any,<:StridedVecOrMat{T}}, alpha::Number, beta::Number) where {T<:BlasFloat} @@ -385,24 +385,24 @@ end # collapsing the following two defs with C::AbstractVecOrMat yields ambiguities @inline mul!(C::AbstractVector, A::AbstractVecOrMat, tB::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = - generic_matmatmul!(C, 'N', 'T', A, tB.parent, MulAddMul(alpha, beta)) + generic_matmatmul!(C, identity, transpose, A, tB.parent, alpha, beta) @inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, tB::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = - generic_matmatmul!(C, 'N', 'T', A, tB.parent, MulAddMul(alpha, beta)) + generic_matmatmul!(C, identity, transpose, A, tB.parent, alpha, beta) @inline mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}}, alpha::Number, beta::Number) where {T<:BlasFloat} = gemm_wrapper!(C, 'T', 'T', tA.parent, tB.parent, MulAddMul(alpha, beta)) @inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, tB::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = - generic_matmatmul!(C, 'T', 'T', tA.parent, tB.parent, MulAddMul(alpha, beta)) + generic_matmatmul!(C, transpose, transpose, tA.parent, tB.parent, alpha, beta) @inline mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}}, alpha::Number, beta::Number) where {T<:BlasFloat} = gemm_wrapper!(C, 'T', 'C', tA.parent, adjB.parent, MulAddMul(alpha, beta)) @inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, tB::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = - generic_matmatmul!(C, 'T', 'C', tA.parent, tB.parent, MulAddMul(alpha, beta)) + generic_matmatmul!(C, transpose, adjoint, tA.parent, tB.parent, alpha, beta) @inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T}, alpha::Real, beta::Real) where {T<:BlasReal} = @@ -418,7 +418,7 @@ end end @inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat, alpha::Number, beta::Number) = - generic_matmatmul!(C, 'C', 'N', adjA.parent, B, MulAddMul(alpha, beta)) + generic_matmatmul!(C, adjoint, identity, adjA.parent, B, alpha, beta) @inline mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:StridedVecOrMat{<:BlasReal}}, alpha::Number, beta::Number) where {T<:BlasFloat} = @@ -434,21 +434,21 @@ end end @inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, adjB::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = - generic_matmatmul!(C, 'N', 'C', A, adjB.parent, MulAddMul(alpha, beta)) + generic_matmatmul!(C, identity, adjoint, A, adjB.parent, alpha, beta) @inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}}, alpha::Number, beta::Number) where {T<:BlasFloat} = gemm_wrapper!(C, 'C', 'C', adjA.parent, adjB.parent, MulAddMul(alpha, beta)) @inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, adjB::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = - generic_matmatmul!(C, 'C', 'C', adjA.parent, adjB.parent, MulAddMul(alpha, beta)) + generic_matmatmul!(C, adjoint, adjoint, adjA.parent, adjB.parent, alpha, beta) @inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}}, alpha::Number, beta::Number) where {T<:BlasFloat} = gemm_wrapper!(C, 'C', 'T', adjA.parent, tB.parent, MulAddMul(alpha, beta)) @inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, tB::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = - generic_matmatmul!(C, 'C', 'T', adjA.parent, tB.parent, MulAddMul(alpha, beta)) + generic_matmatmul!(C, adjoint, transpose, adjA.parent, tB.parent, alpha, beta) # Supporting functions for matrix multiplication @@ -624,6 +624,10 @@ end # cases are handled here lapack_size(t::AbstractChar, M::AbstractVecOrMat) = (size(M, t=='N' ? 1 : 2), size(M, t=='N' ? 2 : 1)) +lapack_axes(t::AbstractChar, M::AbstractVecOrMat) = (axes(M, t=='N' ? 1 : 2), axes(M, t=='N' ? 2 : 1)) +lapack_char(::typeof(identity)) = 'N' +lapack_char(::typeof(transpose)) = 'T' +lapack_char(::typeof(adjoint)) = 'C' function copyto!(B::AbstractVecOrMat, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int}) if tM == 'N' @@ -645,6 +649,10 @@ function copy_transpose!(B::AbstractMatrix, ir_dest::AbstractUnitRange{Int}, jr_ B end +# This method helps e.g. OffsetArrays to dispatch on C only, unwrap & call mul! again: +generic_matvecmul!(C::AbstractVector, fA::Function, A::AbstractVecOrMat, B::AbstractVector, alpha=true, beta=false) = + generic_matvecmul!(C, lapack_char(fA), A, B, MulAddMul(alpha, beta)) + # TODO: It will be faster for large matrices to convert to float, # call BLAS, and convert back to required type. @@ -653,7 +661,9 @@ end function generic_matvecmul!(C::AbstractVector{R}, tA, A::AbstractVecOrMat, B::AbstractVector, _add::MulAddMul = MulAddMul()) where R - require_one_based_indexing(C, A, B) + if has_offset_axes(C, A, B) + return generic_offset_matvecmul!(C, tA, A, B, _add) # avoids linear indexing + end mB = length(B) mA, nA = lapack_size(tA, A) if mB != nA @@ -718,17 +728,84 @@ function generic_matvecmul!(C::AbstractVector{R}, tA, A::AbstractVecOrMat, B::Ab C end +function generic_offset_matvecmul!(C::AbstractVector{R}, tA, A::AbstractVecOrMat, B::AbstractVector, + _add::MulAddMul = MulAddMul()) where R + @info "generic_offset_matvecmul!" axes(C) + mB_axis = axes1(B) + mA_axis, nA_axis = lapack_axes(tA, A) + if mB_axis != nA_axis + throw(DimensionMismatch("mul! can't contract axis $(UnitRange(nA_axis)) from A with axes(B) == ($(UnitRange(mB_axis)),)")) + end + if mA_axis != axes1(C) + throw(DimensionMismatch("mul! got axes(C) == ($(UnitRange(axes1(C))),), expected $(UnitRange(mA_axis))")) + end + + @inbounds begin + if tA == 'T' + if isempty(nA_axis) + for k in mA_axis + _modify!(_add, false, C, k) + end + else + for k in mA_axis + s = zero(A[begin, k]*B[begin] + A[begin, k]*B[begin]) + for i in nA_axis + s += transpose(A[i, k]) * B[i] + end + _modify!(_add, s, C, k) + end + end + elseif tA == 'C' + if isempty(nA_axis) + for k in mA_axis + _modify!(_add, false, C, k) + end + else + for k in mA_axis + s = zero(A[begin, k]*B[begin] + A[begin, k]*B[begin]) + for i in nA_axis + s += adjoint(A[i, k]) * B[i] + end + _modify!(_add, s, C, k) + end + end + else # tA == 'N' + for i in mA_axis + if !iszero(_add.beta) + C[i] *= _add.beta + elseif isempty(mB_axis) + C[i] = false + else + C[i] = zero(A[i, begin]*B[begin] + A[i, begin]*B[begin]) + end + end + for k in nA_axis + b = _add(B[k], false) + for i in mA_axis + C[i] += A[i, k] * b + end + end + end + end # @inbounds + C +end + function generic_matmatmul(tA, tB, A::AbstractVecOrMat{T}, B::AbstractMatrix{S}) where {T,S} - mA, nA = lapack_size(tA, A) - mB, nB = lapack_size(tB, B) + mA, nA = lapack_axes(tA, A) + mB, nB = lapack_axes(tB, B) C = similar(B, promote_op(matprod, T, S), mA, nB) generic_matmatmul!(C, tA, tB, A, B) end const tilebufsize = 10800 # Approximately 32k/3 +# This method helps e.g. OffsetArrays to dispatch on C only, unwrap & call mul! again +generic_matmatmul!(C::AbstractMatrix, fA::Function, fB::Function, A::AbstractMatrix, B::AbstractMatrix, + alpha=true, beta=false) = + generic_matmatmul!(C, lapack_char(fA), lapack_char(fB), A, B, MulAddMul(alpha, beta)) function generic_matmatmul!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix, - _add::MulAddMul=MulAddMul()) + _add::MulAddMul) + has_offset_axes(A,B,C) && @info "generic_matmatmul! with offsets" axes(C) mA, nA = lapack_size(tA, A) mB, nB = lapack_size(tB, B) mC, nC = size(C) @@ -745,19 +822,26 @@ function generic_matmatmul!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::Abs _generic_matmatmul!(C, tA, tB, A, B, _add) end -generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, _add::MulAddMul) = +# Nx1 matrices may be mixed up with with vectors, and cannot be 3x3 +generic_matmatmul!(C::AbstractVecOrMat, fA::Function, fB::Function, A::AbstractVecOrMat, B::AbstractVecOrMat, alpha=true, beta=false) = + _generic_matmatmul!(C, lapack_char(fA), lapack_char(fB), A, B, MulAddMul(alpha, beta)) +generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, _add::MulAddMul = MulAddMul()) = _generic_matmatmul!(C, tA, tB, A, B, _add) function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S}, _add::MulAddMul) where {T,S,R} - require_one_based_indexing(C, A, B) + mA, nA = lapack_size(tA, A) mB, nB = lapack_size(tB, B) - if mB != nA - throw(DimensionMismatch("matrix A has dimensions ($mA,$nA), matrix B has dimensions ($mB,$nB)")) - end - if size(C,1) != mA || size(C,2) != nB - throw(DimensionMismatch("result C has dimensions $(size(C)), needs ($mA,$nB)")) + mA_axis, nA_axis = lapack_axes(tA, A) + mB_axis, nB_axis = lapack_axes(tB, B) + + if nA_axis != mB_axis + throw(DimensionMismatch("mul! can't contract axis $(UnitRange(nA_axis)) from A with $(UnitRange(mB_axis)) from B")) + elseif mA_axis != axes(C,1) + throw(DimensionMismatch("mul! got axes(C,1) == $(UnitRange(axes(C,1))), expected $(UnitRange(mA_axis)) from A")) + elseif nB_axis != axes(C,2) + throw(DimensionMismatch("mul! got axes(C,2) == $(UnitRange(axes(C,2))), expected $(UnitRange(nB_axis)) from B")) end if iszero(_add.alpha) || isempty(A) || isempty(B) @@ -769,7 +853,7 @@ function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat tile_size = floor(Int, sqrt(tilebufsize / max(sizeof(R), sizeof(S), sizeof(T), 1))) end @inbounds begin - if tile_size > 0 + if tile_size > 0 && !has_offset_axes(A, B, C) sz = (tile_size, tile_size) Atile = Array{T}(undef, sz) Btile = Array{S}(undef, sz) @@ -829,28 +913,28 @@ function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat # Multiplication for non-plain-data uses the naive algorithm if tA == 'N' if tB == 'N' - for i = 1:mA, j = 1:nB - z2 = zero(A[i, 1]*B[1, j] + A[i, 1]*B[1, j]) + for i in mA_axis, j in nB_axis + z2 = zero(A[i, begin]*B[begin, j] + A[i, begin]*B[begin, j]) Ctmp = convert(promote_type(R, typeof(z2)), z2) - for k = 1:nA + for k in nA_axis Ctmp += A[i, k]*B[k, j] end _modify!(_add, Ctmp, C, (i,j)) end elseif tB == 'T' - for i = 1:mA, j = 1:nB - z2 = zero(A[i, 1]*transpose(B[j, 1]) + A[i, 1]*transpose(B[j, 1])) + for i in mA_axis, j in nB_axis + z2 = zero(A[i, begin]*transpose(B[j, begin]) + A[i, begin]*transpose(B[j, begin])) Ctmp = convert(promote_type(R, typeof(z2)), z2) - for k = 1:nA + for k in nA_axis Ctmp += A[i, k] * transpose(B[j, k]) end _modify!(_add, Ctmp, C, (i,j)) end else - for i = 1:mA, j = 1:nB - z2 = zero(A[i, 1]*B[j, 1]' + A[i, 1]*B[j, 1]') + for i in mA_axis, j in nB_axis + z2 = zero(A[i, begin]*B[j, begin]' + A[i, begin]*B[j, begin]') Ctmp = convert(promote_type(R, typeof(z2)), z2) - for k = 1:nA + for k in nA_axis Ctmp += A[i, k]*B[j, k]' end _modify!(_add, Ctmp, C, (i,j)) @@ -858,28 +942,28 @@ function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat end elseif tA == 'T' if tB == 'N' - for i = 1:mA, j = 1:nB - z2 = zero(transpose(A[1, i])*B[1, j] + transpose(A[1, i])*B[1, j]) + for i in mA_axis, j in nB_axis + z2 = zero(transpose(A[begin, i])*B[begin, j] + transpose(A[begin, i])*B[begin, j]) Ctmp = convert(promote_type(R, typeof(z2)), z2) - for k = 1:nA + for k in nA_axis Ctmp += transpose(A[k, i]) * B[k, j] end _modify!(_add, Ctmp, C, (i,j)) end elseif tB == 'T' - for i = 1:mA, j = 1:nB - z2 = zero(transpose(A[1, i])*transpose(B[j, 1]) + transpose(A[1, i])*transpose(B[j, 1])) + for i in mA_axis, j in nB_axis + z2 = zero(transpose(A[begin, i])*transpose(B[j, begin]) + transpose(A[begin, i])*transpose(B[j, begin])) Ctmp = convert(promote_type(R, typeof(z2)), z2) - for k = 1:nA + for k in nA_axis Ctmp += transpose(A[k, i]) * transpose(B[j, k]) end _modify!(_add, Ctmp, C, (i,j)) end else - for i = 1:mA, j = 1:nB - z2 = zero(transpose(A[1, i])*B[j, 1]' + transpose(A[1, i])*B[j, 1]') + for i in mA_axis, j in nB_axis + z2 = zero(transpose(A[begin, i])*B[j, begin]' + transpose(A[begin, i])*B[j, begin]') Ctmp = convert(promote_type(R, typeof(z2)), z2) - for k = 1:nA + for k in nA_axis Ctmp += transpose(A[k, i]) * adjoint(B[j, k]) end _modify!(_add, Ctmp, C, (i,j)) @@ -887,28 +971,28 @@ function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat end else if tB == 'N' - for i = 1:mA, j = 1:nB - z2 = zero(A[1, i]'*B[1, j] + A[1, i]'*B[1, j]) + for i in mA_axis, j in nB_axis + z2 = zero(A[begin, i]'*B[begin, j] + A[begin, i]'*B[begin, j]) Ctmp = convert(promote_type(R, typeof(z2)), z2) - for k = 1:nA + for k in nA_axis Ctmp += A[k, i]'B[k, j] end _modify!(_add, Ctmp, C, (i,j)) end elseif tB == 'T' - for i = 1:mA, j = 1:nB - z2 = zero(A[1, i]'*transpose(B[j, 1]) + A[1, i]'*transpose(B[j, 1])) + for i in mA_axis, j in nB_axis + z2 = zero(A[begin, i]'*transpose(B[j, begin]) + A[begin, i]'*transpose(B[j, begin])) Ctmp = convert(promote_type(R, typeof(z2)), z2) - for k = 1:nA + for k in nA_axis Ctmp += adjoint(A[k, i]) * transpose(B[j, k]) end _modify!(_add, Ctmp, C, (i,j)) end else - for i = 1:mA, j = 1:nB - z2 = zero(A[1, i]'*B[j, 1]' + A[1, i]'*B[j, 1]') + for i in mA_axis, j in nB_axis + z2 = zero(A[begin, i]'*B[j, begin]' + A[begin, i]'*B[j, begin]') Ctmp = convert(promote_type(R, typeof(z2)), z2) - for k = 1:nA + for k in nA_axis Ctmp += A[k, i]'B[j, k]' end _modify!(_add, Ctmp, C, (i,j)) @@ -928,7 +1012,9 @@ end function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix, _add::MulAddMul = MulAddMul()) - require_one_based_indexing(C, A, B) + if has_offset_axes(C, A, B) + return _generic_matmatmul!(C, tA, tB, A, B, _add) + end if !(size(A) == size(B) == size(C) == (2,2)) throw(DimensionMismatch("A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) end @@ -966,12 +1052,15 @@ end # Multiply 3x3 matrices function matmul3x3(tA, tB, A::AbstractMatrix{T}, B::AbstractMatrix{S}) where {T,S} + require_one_based_indexing(A, B) # ?? matmul3x3!(similar(B, promote_op(matprod, T, S), 3, 3), tA, tB, A, B) end function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix, _add::MulAddMul = MulAddMul()) - require_one_based_indexing(C, A, B) + if has_offset_axes(C, A, B) + return _generic_matmatmul!(C, tA, tB, A, B, _add) + end if !(size(A) == size(B) == size(C) == (3,3)) throw(DimensionMismatch("A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) end