From f3340a3acc81b044e05d6ea1358d718b6a80f740 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Wed, 2 Apr 2025 10:36:03 +0200 Subject: [PATCH 1/5] Fix gradient with `LowerTriangular` and `UpperTriangular` input --- src/apiutils.jl | 16 ++++++++++++++++ src/gradient.jl | 8 ++++++++ test/GradientTest.jl | 9 +++++++++ 3 files changed, 33 insertions(+) diff --git a/src/apiutils.jl b/src/apiutils.jl index 5a2a1df9..1d617045 100644 --- a/src/apiutils.jl +++ b/src/apiutils.jl @@ -53,6 +53,22 @@ function seed!(duals::AbstractArray{Dual{T,V,N}}, x, return duals end +# Triangular matrices +function _nonzero_indices(x::UpperTriangular) + n = size(x, 1) + return (CartesianIndex(i, j) for j in 1:n for i in 1:j) +end +function _nonzero_indices(x::LowerTriangular) + n = size(x, 1) + return (CartesianIndex(i, j) for j in 1:n for i in j:n) +end +function seed!(duals::Union{LowerTriangular{Dual{T,V,N}},UpperTriangular{Dual{T,V,N}}}, x, seeds::NTuple{N,Partials{N,V}}) where {T,V,N} + for (idx, seed) in zip(_nonzero_indices(duals), seeds) + duals[idx] = Dual{T,V,N}(x[idx], seed) + end + return duals +end + function seed!(duals::AbstractArray{Dual{T,V,N}}, x, index, seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N} offset = index - 1 diff --git a/src/gradient.jl b/src/gradient.jl index 6d4cc791..289ebaa5 100644 --- a/src/gradient.jl +++ b/src/gradient.jl @@ -65,6 +65,14 @@ end extract_gradient!(::Type{T}, result::AbstractArray, y::Real) where {T} = fill!(result, zero(y)) extract_gradient!(::Type{T}, result::AbstractArray, dual::Dual) where {T}= copyto!(result, partials(T, dual)) +# Triangular matrices +function extract_gradient!(::Type{T}, result::Union{UpperTriangular,LowerTriangular}, dual::Dual) where {T} + for (idx, p) in zip(_nonzero_indices(result), partials(T, dual)) + result[idx] = p + end + return result +end + function extract_gradient_chunk!(::Type{T}, result, dual, index, chunksize) where {T} offset = index - 1 for i in 1:chunksize diff --git a/test/GradientTest.jl b/test/GradientTest.jl index a386c479..5f6c80dc 100644 --- a/test/GradientTest.jl +++ b/test/GradientTest.jl @@ -226,4 +226,13 @@ end @test dx ≈ sum(a * b) end +# issue #738 +@testset "LowerTriangular and UpperTriangular" begin + M = rand(3, 3) + for T in (LowerTriangular, UpperTriangular) + @test ForwardDiff.gradient(sum, T(randn(3, 3))) == T(ones(3, 3)) + @test ForwardDiff.gradient(x -> dot(M, x), T(randn(3, 3))) == T(M) + end +end + end # module From 715f4bc7469b258a8f18bd70555409d8c9ec3b9b Mon Sep 17 00:00:00 2001 From: David Widmann Date: Wed, 2 Apr 2025 16:17:58 +0200 Subject: [PATCH 2/5] Only seed structurally non-zero entries --- src/apiutils.jl | 61 +++++++++++++++++++++++++++----------------- src/gradient.jl | 16 ++++++------ src/prelude.jl | 9 ++++++- test/GradientTest.jl | 12 +++++---- 4 files changed, 61 insertions(+), 37 deletions(-) diff --git a/src/apiutils.jl b/src/apiutils.jl index 1d617045..362bf63a 100644 --- a/src/apiutils.jl +++ b/src/apiutils.jl @@ -40,48 +40,63 @@ end return Expr(:tuple, [:(single_seed(Partials{N,V}, Val{$i}())) for i in 1:N]...) end -function seed!(duals::AbstractArray{Dual{T,V,N}}, x, - seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N} - duals .= Dual{T,V,N}.(x, Ref(seed)) - return duals -end - -function seed!(duals::AbstractArray{Dual{T,V,N}}, x, - seeds::NTuple{N,Partials{N,V}}) where {T,V,N} - dual_inds = 1:N - duals[dual_inds] .= Dual{T,V,N}.(view(x,dual_inds), seeds) - return duals -end - -# Triangular matrices -function _nonzero_indices(x::UpperTriangular) +# Only seed indices that are structurally non-zero +_structural_nonzero_indices(x::AbstractArray) = eachindex(x) +function _structural_nonzero_indices(x::UpperTriangular) n = size(x, 1) return (CartesianIndex(i, j) for j in 1:n for i in 1:j) end -function _nonzero_indices(x::LowerTriangular) +function _structural_nonzero_indices(x::LowerTriangular) n = size(x, 1) return (CartesianIndex(i, j) for j in 1:n for i in j:n) end -function seed!(duals::Union{LowerTriangular{Dual{T,V,N}},UpperTriangular{Dual{T,V,N}}}, x, seeds::NTuple{N,Partials{N,V}}) where {T,V,N} - for (idx, seed) in zip(_nonzero_indices(duals), seeds) +_structural_nonzero_indices(x::Diagonal) = diagind(x) + +function seed!(duals::AbstractArray{Dual{T,V,N}}, x, + seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N} + if eachindex(duals) != eachindex(x) + throw(ArgumentError("indices of input array and array of duals are not identical")) + end + for idx in _structural_nonzero_indices(duals) duals[idx] = Dual{T,V,N}(x[idx], seed) end return duals end +function seed!(duals::AbstractArray{Dual{T,V,N}}, x, + seeds::NTuple{N,Partials{N,V}}) where {T,V,N} + if eachindex(duals) != eachindex(x) + throw(ArgumentError("indices of input array and array of duals are not identical")) + end + for (i, idx) in enumerate(_structural_nonzero_indices(duals)) + duals[idx] = Dual{T,V,N}(x[idx], seeds[i]) + end + return duals +end + function seed!(duals::AbstractArray{Dual{T,V,N}}, x, index, seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N} + if eachindex(duals) != eachindex(x) + throw(ArgumentError("indices of input array and array of duals are not identical")) + end offset = index - 1 - dual_inds = (1:N) .+ offset - duals[dual_inds] .= Dual{T,V,N}.(view(x, dual_inds), Ref(seed)) + idxs = Iterators.drop(_structural_nonzero_indices(duals), offset) + for idx in idxs + duals[idx] = Dual{T,V,N}(x[idx], seed) + end return duals end function seed!(duals::AbstractArray{Dual{T,V,N}}, x, index, seeds::NTuple{N,Partials{N,V}}, chunksize = N) where {T,V,N} + if eachindex(duals) != eachindex(x) + throw(ArgumentError("indices of input array and array of duals are not identical")) + end offset = index - 1 - seed_inds = 1:chunksize - dual_inds = seed_inds .+ offset - duals[dual_inds] .= Dual{T,V,N}.(view(x, dual_inds), getindex.(Ref(seeds), seed_inds)) + idxs = Iterators.drop(_structural_nonzero_indices(duals), offset) + for (i, idx) in enumerate(idxs) + i > chunksize && break + duals[idx] = Dual{T,V,N}(x[idx], seeds[i]) + end return duals end diff --git a/src/gradient.jl b/src/gradient.jl index 289ebaa5..3000270a 100644 --- a/src/gradient.jl +++ b/src/gradient.jl @@ -63,20 +63,20 @@ function extract_gradient!(::Type{T}, result::DiffResult, dual::Dual) where {T} end extract_gradient!(::Type{T}, result::AbstractArray, y::Real) where {T} = fill!(result, zero(y)) -extract_gradient!(::Type{T}, result::AbstractArray, dual::Dual) where {T}= copyto!(result, partials(T, dual)) - -# Triangular matrices -function extract_gradient!(::Type{T}, result::Union{UpperTriangular,LowerTriangular}, dual::Dual) where {T} - for (idx, p) in zip(_nonzero_indices(result), partials(T, dual)) - result[idx] = p +function extract_gradient!(::Type{T}, result::AbstractArray, dual::Dual) where {T} + idxs = _structural_nonzero_indices(result) + for (i, idx) in enumerate(idxs) + result[idx] = partials(T, dual, i) end return result end function extract_gradient_chunk!(::Type{T}, result, dual, index, chunksize) where {T} offset = index - 1 - for i in 1:chunksize - result[i + offset] = partials(T, dual, i) + idxs = Iterators.drop(_structural_nonzero_indices(result), offset) + for (i, idx) in enumerate(idxs) + i > chunksize && break + result[idx] = partials(T, dual, i) end return result end diff --git a/src/prelude.jl b/src/prelude.jl index ed4782e9..79cdf75f 100644 --- a/src/prelude.jl +++ b/src/prelude.jl @@ -12,8 +12,15 @@ function Chunk(input_length::Integer, threshold::Integer = DEFAULT_CHUNK_THRESHO Base.@nif 12 d->(N == d) d->(Chunk{d}()) d->(Chunk{N}()) end +_length_structural_nonzero_indices(x::AbstractArray) = length(x) +function _length_structural_nonzero_indices(x::Union{LowerTriangular,UpperTriangular}) + n = size(x, 1) + return (n * (n + 1)) >> 1 +end +_length_structural_nonzero_indices(x::Diagonal) = size(x, 1) + function Chunk(x::AbstractArray, threshold::Integer = DEFAULT_CHUNK_THRESHOLD) - return Chunk(length(x), threshold) + return Chunk(_length_structural_nonzero_indices(x), threshold) end # Constrained to `N <= threshold`, minimize (in order of priority): diff --git a/test/GradientTest.jl b/test/GradientTest.jl index 5f6c80dc..7a6661cb 100644 --- a/test/GradientTest.jl +++ b/test/GradientTest.jl @@ -227,11 +227,13 @@ end end # issue #738 -@testset "LowerTriangular and UpperTriangular" begin - M = rand(3, 3) - for T in (LowerTriangular, UpperTriangular) - @test ForwardDiff.gradient(sum, T(randn(3, 3))) == T(ones(3, 3)) - @test ForwardDiff.gradient(x -> dot(M, x), T(randn(3, 3))) == T(M) +@testset "LowerTriangular, UpperTriangular and Diagonal" begin + for n in (3, 10, 20) + M = rand(n, n) + for T in (LowerTriangular, UpperTriangular, Diagonal) + @test ForwardDiff.gradient(sum, T(randn(n, n))) == T(ones(n, n)) + @test ForwardDiff.gradient(x -> dot(M, x), T(randn(n, n))) == T(M) + end end end From 161d227711a1501686b82d38063b7a121a23bf00 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Wed, 2 Apr 2025 17:31:59 +0200 Subject: [PATCH 3/5] Base vector vs chunk mode on structural length --- src/gradient.jl | 8 ++++---- src/jacobian.jl | 12 ++++++------ src/prelude.jl | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/gradient.jl b/src/gradient.jl index 3000270a..7c63f998 100644 --- a/src/gradient.jl +++ b/src/gradient.jl @@ -16,7 +16,7 @@ Set `check` to `Val{false}()` to disable tag checking. This can lead to perturba function gradient(f::F, x::AbstractArray, cfg::GradientConfig{T} = GradientConfig(f, x), ::Val{CHK}=Val{true}()) where {F, T, CHK} require_one_based_indexing(x) CHK && checktag(T, f, x) - if chunksize(cfg) == length(x) + if chunksize(cfg) == structural_length(x) return vector_mode_gradient(f, x, cfg) else return chunk_mode_gradient(f, x, cfg) @@ -35,7 +35,7 @@ This method assumes that `isa(f(x), Real)`. function gradient!(result::Union{AbstractArray,DiffResult}, f::F, x::AbstractArray, cfg::GradientConfig{T} = GradientConfig(f, x), ::Val{CHK}=Val{true}()) where {T, CHK, F} result isa DiffResult ? require_one_based_indexing(x) : require_one_based_indexing(result, x) CHK && checktag(T, f, x) - if chunksize(cfg) == length(x) + if chunksize(cfg) == structural_length(x) vector_mode_gradient!(result, f, x, cfg) else chunk_mode_gradient!(result, f, x, cfg) @@ -114,10 +114,10 @@ end function chunk_mode_gradient_expr(result_definition::Expr) return quote - @assert length(x) >= N "chunk size cannot be greater than length(x) ($(N) > $(length(x)))" + @assert structural_length(x) >= N "chunk size cannot be greater than ForwardDiff.structural_length(x) ($(N) > $(structural_length(x)))" # precalculate loop bounds - xlen = length(x) + xlen = structural_length(x) remainder = xlen % N lastchunksize = ifelse(remainder == 0, N, remainder) lastchunkindex = xlen - lastchunksize + 1 diff --git a/src/jacobian.jl b/src/jacobian.jl index e7ca96f5..d654d38b 100644 --- a/src/jacobian.jl +++ b/src/jacobian.jl @@ -18,7 +18,7 @@ Set `check` to `Val{false}()` to disable tag checking. This can lead to perturba function jacobian(f::F, x::AbstractArray, cfg::JacobianConfig{T} = JacobianConfig(f, x), ::Val{CHK}=Val{true}()) where {F,T,CHK} require_one_based_indexing(x) CHK && checktag(T, f, x) - if chunksize(cfg) == length(x) + if chunksize(cfg) == structural_length(x) return vector_mode_jacobian(f, x, cfg) else return chunk_mode_jacobian(f, x, cfg) @@ -36,7 +36,7 @@ Set `check` to `Val{false}()` to disable tag checking. This can lead to perturba function jacobian(f!::F, y::AbstractArray, x::AbstractArray, cfg::JacobianConfig{T} = JacobianConfig(f!, y, x), ::Val{CHK}=Val{true}()) where {F,T, CHK} require_one_based_indexing(y, x) CHK && checktag(T, f!, x) - if chunksize(cfg) == length(x) + if chunksize(cfg) == structural_length(x) return vector_mode_jacobian(f!, y, x, cfg) else return chunk_mode_jacobian(f!, y, x, cfg) @@ -57,7 +57,7 @@ Set `check` to `Val{false}()` to disable tag checking. This can lead to perturba function jacobian!(result::Union{AbstractArray,DiffResult}, f::F, x::AbstractArray, cfg::JacobianConfig{T} = JacobianConfig(f, x), ::Val{CHK}=Val{true}()) where {F,T, CHK} result isa DiffResult ? require_one_based_indexing(x) : require_one_based_indexing(result, x) CHK && checktag(T, f, x) - if chunksize(cfg) == length(x) + if chunksize(cfg) == structural_length(x) vector_mode_jacobian!(result, f, x, cfg) else chunk_mode_jacobian!(result, f, x, cfg) @@ -78,7 +78,7 @@ Set `check` to `Val{false}()` to disable tag checking. This can lead to perturba function jacobian!(result::Union{AbstractArray,DiffResult}, f!::F, y::AbstractArray, x::AbstractArray, cfg::JacobianConfig{T} = JacobianConfig(f!, y, x), ::Val{CHK}=Val{true}()) where {F,T,CHK} result isa DiffResult ? require_one_based_indexing(y, x) : require_one_based_indexing(result, y, x) CHK && checktag(T, f!, x) - if chunksize(cfg) == length(x) + if chunksize(cfg) == structural_length(x) vector_mode_jacobian!(result, f!, y, x, cfg) else chunk_mode_jacobian!(result, f!, y, x, cfg) @@ -169,10 +169,10 @@ const JACOBIAN_ERROR = DimensionMismatch("jacobian(f, x) expects that f(x) is an function jacobian_chunk_mode_expr(work_array_definition::Expr, compute_ydual::Expr, result_definition::Expr, y_definition::Expr) return quote - @assert length(x) >= N "chunk size cannot be greater than length(x) ($(N) > $(length(x)))" + @assert structural_length(x) >= N "chunk size cannot be greater than ForwardDiff.structural_length(x) ($(N) > $(structural_length(x)))" # precalculate loop bounds - xlen = length(x) + xlen = structural_length(x) remainder = xlen % N lastchunksize = ifelse(remainder == 0, N, remainder) lastchunkindex = xlen - lastchunksize + 1 diff --git a/src/prelude.jl b/src/prelude.jl index 79cdf75f..9e037afa 100644 --- a/src/prelude.jl +++ b/src/prelude.jl @@ -12,15 +12,15 @@ function Chunk(input_length::Integer, threshold::Integer = DEFAULT_CHUNK_THRESHO Base.@nif 12 d->(N == d) d->(Chunk{d}()) d->(Chunk{N}()) end -_length_structural_nonzero_indices(x::AbstractArray) = length(x) -function _length_structural_nonzero_indices(x::Union{LowerTriangular,UpperTriangular}) +structural_length(x::AbstractArray) = length(x) +function structural_length(x::Union{LowerTriangular,UpperTriangular}) n = size(x, 1) return (n * (n + 1)) >> 1 end -_length_structural_nonzero_indices(x::Diagonal) = size(x, 1) +structural_length(x::Diagonal) = size(x, 1) function Chunk(x::AbstractArray, threshold::Integer = DEFAULT_CHUNK_THRESHOLD) - return Chunk(_length_structural_nonzero_indices(x), threshold) + return Chunk(structural_length(x), threshold) end # Constrained to `N <= threshold`, minimize (in order of priority): From cf9f4bb6a1df8e305f8b8a77b1cb329e9d645d57 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Wed, 2 Apr 2025 18:55:14 +0200 Subject: [PATCH 4/5] Fix tests --- src/apiutils.jl | 49 +++++++++++++++++++++++++++---------------------- src/gradient.jl | 9 ++++----- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/src/apiutils.jl b/src/apiutils.jl index 362bf63a..39608d6c 100644 --- a/src/apiutils.jl +++ b/src/apiutils.jl @@ -41,23 +41,38 @@ end end # Only seed indices that are structurally non-zero -_structural_nonzero_indices(x::AbstractArray) = eachindex(x) -function _structural_nonzero_indices(x::UpperTriangular) +structural_eachindex(x::AbstractArray) = structural_eachindex(x, x) +function structural_eachindex(x::AbstractArray, y::AbstractArray) + require_one_based_indexing(x, y) + eachindex(x, y) +end +function structural_eachindex(x::UpperTriangular, y::AbstractArray) + require_one_based_indexing(x, y) + if size(x) != size(y) + throw(DimensionMismatch()) + end n = size(x, 1) return (CartesianIndex(i, j) for j in 1:n for i in 1:j) end -function _structural_nonzero_indices(x::LowerTriangular) +function structural_eachindex(x::LowerTriangular, y::AbstractArray) + require_one_based_indexing(x, y) + if size(x) != size(y) + throw(DimensionMismatch()) + end n = size(x, 1) return (CartesianIndex(i, j) for j in 1:n for i in j:n) end -_structural_nonzero_indices(x::Diagonal) = diagind(x) +function structural_eachindex(x::Diagonal, y::AbstractArray) + require_one_based_indexing(x, y) + if size(x) != size(y) + throw(DimensionMismatch()) + end + return diagind(x) +end function seed!(duals::AbstractArray{Dual{T,V,N}}, x, seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N} - if eachindex(duals) != eachindex(x) - throw(ArgumentError("indices of input array and array of duals are not identical")) - end - for idx in _structural_nonzero_indices(duals) + for idx in structural_eachindex(duals, x) duals[idx] = Dual{T,V,N}(x[idx], seed) end return duals @@ -65,10 +80,7 @@ end function seed!(duals::AbstractArray{Dual{T,V,N}}, x, seeds::NTuple{N,Partials{N,V}}) where {T,V,N} - if eachindex(duals) != eachindex(x) - throw(ArgumentError("indices of input array and array of duals are not identical")) - end - for (i, idx) in enumerate(_structural_nonzero_indices(duals)) + for (i, idx) in zip(1:N, structural_eachindex(duals, x)) duals[idx] = Dual{T,V,N}(x[idx], seeds[i]) end return duals @@ -76,11 +88,8 @@ end function seed!(duals::AbstractArray{Dual{T,V,N}}, x, index, seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N} - if eachindex(duals) != eachindex(x) - throw(ArgumentError("indices of input array and array of duals are not identical")) - end offset = index - 1 - idxs = Iterators.drop(_structural_nonzero_indices(duals), offset) + idxs = Iterators.drop(structural_eachindex(duals, x), offset) for idx in idxs duals[idx] = Dual{T,V,N}(x[idx], seed) end @@ -89,13 +98,9 @@ end function seed!(duals::AbstractArray{Dual{T,V,N}}, x, index, seeds::NTuple{N,Partials{N,V}}, chunksize = N) where {T,V,N} - if eachindex(duals) != eachindex(x) - throw(ArgumentError("indices of input array and array of duals are not identical")) - end offset = index - 1 - idxs = Iterators.drop(_structural_nonzero_indices(duals), offset) - for (i, idx) in enumerate(idxs) - i > chunksize && break + idxs = Iterators.drop(structural_eachindex(duals, x), offset) + for (i, idx) in zip(1:chunksize, idxs) duals[idx] = Dual{T,V,N}(x[idx], seeds[i]) end return duals diff --git a/src/gradient.jl b/src/gradient.jl index 7c63f998..98bdb091 100644 --- a/src/gradient.jl +++ b/src/gradient.jl @@ -64,8 +64,8 @@ end extract_gradient!(::Type{T}, result::AbstractArray, y::Real) where {T} = fill!(result, zero(y)) function extract_gradient!(::Type{T}, result::AbstractArray, dual::Dual) where {T} - idxs = _structural_nonzero_indices(result) - for (i, idx) in enumerate(idxs) + idxs = structural_eachindex(result) + for (i, idx) in zip(1:npartials(dual), idxs) result[idx] = partials(T, dual, i) end return result @@ -73,9 +73,8 @@ end function extract_gradient_chunk!(::Type{T}, result, dual, index, chunksize) where {T} offset = index - 1 - idxs = Iterators.drop(_structural_nonzero_indices(result), offset) - for (i, idx) in enumerate(idxs) - i > chunksize && break + idxs = Iterators.drop(structural_eachindex(result), offset) + for (i, idx) in zip(1:chunksize, idxs) result[idx] = partials(T, dual, i) end return result From d2a8af7eaa9c43820c686f2abc9384de129da96f Mon Sep 17 00:00:00 2001 From: David Widmann Date: Wed, 2 Apr 2025 22:12:48 +0200 Subject: [PATCH 5/5] Check number of function evaluations and chunk sizes --- test/GradientTest.jl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/GradientTest.jl b/test/GradientTest.jl index 7a6661cb..4f46c167 100644 --- a/test/GradientTest.jl +++ b/test/GradientTest.jl @@ -233,6 +233,24 @@ end for T in (LowerTriangular, UpperTriangular, Diagonal) @test ForwardDiff.gradient(sum, T(randn(n, n))) == T(ones(n, n)) @test ForwardDiff.gradient(x -> dot(M, x), T(randn(n, n))) == T(M) + + # Check number of function evaluations and chunk sizes + fevals = Ref(0) + npartials = Ref(0) + y = ForwardDiff.gradient(T(randn(n, n))) do x + fevals[] += 1 + npartials[] += ForwardDiff.npartials(eltype(x)) + return sum(x) + end + if npartials[] <= ForwardDiff.DEFAULT_CHUNK_THRESHOLD + # Vector mode (single evaluation) + @test fevals[] == 1 + @test npartials[] == sum(y) + else + # Chunk mode (multiple evaluations) + @test fevals[] > 1 + @test sum(y) <= npartials[] < sum(y) + fevals[] + end end end end