From 83e47e9138fcec6df85ae2c25f74ea7593df5f27 Mon Sep 17 00:00:00 2001 From: quinnj Date: Fri, 10 Aug 2018 11:06:13 -0600 Subject: [PATCH] Updates for 1.0 --- REQUIRE | 1 - src/DataArrays.jl | 1 - src/abstractdataarray.jl | 7 +- src/broadcast.jl | 436 ++++++++-------- src/dataarray.jl | 72 ++- src/datamatrix.jl | 3 +- src/datavector.jl | 12 +- src/indexing.jl | 11 +- src/linalg.jl | 8 +- src/literals.jl | 8 +- src/operators.jl | 29 +- src/pooleddataarray.jl | 14 +- src/reduce.jl | 34 +- src/reducedim.jl | 1074 +++++++++++++++++++------------------- test/constructors.jl | 8 +- test/dataarray.jl | 2 +- test/runtests.jl | 1 - 17 files changed, 859 insertions(+), 862 deletions(-) diff --git a/REQUIRE b/REQUIRE index 6c88a77..71caaed 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,5 +1,4 @@ julia 0.7 -Compat 0.43.0 Missings 0.2.1 StatsBase 0.15.0 Reexport diff --git a/src/DataArrays.jl b/src/DataArrays.jl index 9ed5fdb..9a8d969 100644 --- a/src/DataArrays.jl +++ b/src/DataArrays.jl @@ -4,7 +4,6 @@ module DataArrays @reexport using StatsBase @reexport using Missings using SpecialFunctions - using Compat: AbstractRange, Nothing, Cvoid, uninitialized, invpermute! using Printf, Dates const DEFAULT_POOLED_REF_TYPE = UInt32 diff --git a/src/abstractdataarray.jl b/src/abstractdataarray.jl index 13c1ddc..3e83121 100644 --- a/src/abstractdataarray.jl +++ b/src/abstractdataarray.jl @@ -24,9 +24,10 @@ Base.eltype(d::AbstractDataArray{T, N}) where {T, N} = Union{T,Missing} # Generic iteration over AbstractDataArray's -Base.start(x::AbstractDataArray) = 1 -Base.next(x::AbstractDataArray, state::Integer) = (x[state], state + 1) -Base.done(x::AbstractDataArray, state::Integer) = state > length(x) +function Base.iterate(x::AbstractDataArray, st=1) + st > length(x) && return nothing + return (x[st], st + 1) +end # FIXME: type piracy """ diff --git a/src/broadcast.jl b/src/broadcast.jl index c32493b..2a0a61b 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -1,218 +1,218 @@ -using Base: @get!, promote_eltype -using Base.Broadcast: bitcache_chunks, bitcache_size, dumpbitcache - -_broadcast_shape(x...) = Base.to_shape(Base.Broadcast.broadcast_indices(x...)) - -# Get ref for value for a PooledDataArray, adding to the pool if -# necessary -_unsafe_pdaref!(Bpool, Brefdict::Dict, val::Missing) = 0 -function _unsafe_pdaref!(Bpool, Brefdict::Dict, val) - @get! Brefdict val begin - push!(Bpool, val) - length(Bpool) - end -end - -# Generate a branch for each possible combination of missing/not missing. This -# gives good performance at the cost of 2^narrays branches. -function gen_na_conds(f, nd, arrtype, outtype, - daidx=findall(t -> t <: DataArray || t <: PooledDataArray, arrtype), pos=1, ismissing=()) - - if pos > length(daidx) - args = Any[Symbol("v_$(k)") for k = 1:length(arrtype)] - for i = 1:length(daidx) - if ismissing[i] - args[daidx[i]] = missing - end - end - - # Needs to be gensymmed so that the compiler won't box it - val = gensym("val") - quote - $val = $(Expr(:call, f, args...)) - $(if outtype <: DataArray - :(@inbounds unsafe_dasetindex!(Bdata, Bc, $val, ind)) - elseif outtype <: PooledDataArray - :(@inbounds (@nref $nd Brefs i) = _unsafe_pdaref!(Bpool, Brefdict, $val)) - end) - end - else - k = daidx[pos] - quote - if $(Symbol("ismissing_$(k)")) - $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(ismissing..., true))) - else - $(if arrtype[k] <: DataArray - :(@inbounds $(Symbol("v_$(k)")) = $(Symbol("data_$(k)"))[$(Symbol("state_$(k)_0"))]) - else - :(@inbounds $(Symbol("v_$(k)")) = $(Symbol("pool_$(k)"))[$(Symbol("r_$(k)"))]) - end) - $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(ismissing..., false))) - end - end - end -end - -# Broadcast implementation for DataArrays -# -# TODO: Fall back on faster implementation for same-sized inputs when -# it is safe to do so. -Base.map!(f::F, B::Union{DataArray, PooledDataArray}, A0::AbstractArray, As::AbstractArray...) where {F} = - broadcast!(f, B, A0, As...) -Base.map!(f::F, B::Union{DataArray, PooledDataArray}, A0, As...) where {F} = - broadcast!(f, B, A0, As...) - -@generated function _broadcast!(f, B::Union{DataArray, PooledDataArray}, As...) - - F = :(f) - nd = ndims(B) - N = length(As) - - dataarrays = find(t -> t <: DataArray, As) - - quote - @boundscheck Base.Broadcast.check_broadcast_indices(indices(B), As...) - # check_broadcast_shape(size(B), As...) - @nexprs $N i->(A_i = As[i]) - - @assert ndims(B) == $nd - - # Set up input DataArray/PooledDataArrays - $(Expr(:block, [ - As[k] <: DataArray ? quote - $(Symbol("na_$(k)")) = $(Symbol("A_$(k)")).na.chunks - $(Symbol("data_$(k)")) = $(Symbol("A_$(k)")).data - $(Symbol("state_$(k)_0")) = $(Symbol("state_$(k)_$(nd)")) = 1 - @nexprs $nd d->($(Symbol("skip_$(k)_d")) = size($(Symbol("data_$(k)")), d) == 1) - end : As[k] <: PooledDataArray ? quote - $(Symbol("refs_$(k)")) = $(Symbol("A_$(k)")).refs - $(Symbol("pool_$(k)")) = $(Symbol("A_$(k)")).pool - end : nothing - for k = 1:N]...)) - - # Set up output DataArray/PooledDataArray - $(if B <: DataArray - quote - Bdata = B.data - # Copy in case aliased - # TODO: check for aliasing? - Bna = falses(size(Bdata)) - Bc = Bna.chunks - ind = 1 - end - elseif B <: PooledDataArray - quote - Bpool = B.pool = similar(B.pool, 0) - Brefs = B.refs - Brefdict = Dict{eltype(Bpool),eltype(Brefs)}() - end - end) - - @nloops($nd, i, $(B <: DataArray ? (:Bdata) : (:Brefs)), - # pre - d->($(Expr(:block, [ - As[k] <: DataArray ? quote - $(Symbol("state_$(k)_")){d-1} = $(Symbol("state_$(k)_d")); - $(Symbol("j_$(k)_d")) = $(Symbol("skip_$(k)_d")) ? 1 : i_d - end : (As[k] <: AbstractArray ? quote - $(Symbol("j_$(k)_d")) = size($(Symbol("A_$(k)")), d) == 1 ? 1 : i_d - end : quote - $(Symbol("j_$(k)_d")) = 1 - end) - for k = 1:N]...))), - - # post - d->($(Expr(:block, [quote - $(Symbol("skip_$(k)_d")) || ($(Symbol("state_$(k)_d")) = $(Symbol("state_$(k)_0"))) - end for k in dataarrays]...))), - - # body - begin - # Advance iterators for DataArray and determine missing status - $(Expr(:block, [ - As[k] <: DataArray ? quote - @inbounds $(Symbol("ismissing_$(k)")) = Base.unsafe_bitgetindex($(Symbol("na_$(k)")), $(Symbol("state_$(k)_0"))) - end : As[k] <: PooledDataArray ? quote - @inbounds $(Symbol("r_$(k)")) = @nref $nd $(Symbol("refs_$(k)")) d->$(Symbol("j_$(k)_d")) - $(Symbol("ismissing_$(k)")) = $(Symbol("r_$(k)")) == 0 - end : nothing - for k = 1:N]...)) - - # Extract values for other type - $(Expr(:block, [ - As[k] <: AbstractArray && !(As[k] <: AbstractDataArray) ? quote - # ordinary AbstractArrays - @inbounds $(Symbol("v_$(k)")) = @nref $nd $(Symbol("A_$(k)")) d->$(Symbol("j_$(k)_d")) - end : quote - # non AbstractArrays (e.g. Strings and Numbers) - @inbounds $(Symbol("v_$(k)")) = $(Symbol("A_$(k)")) - end - for k = 1:N]...)) - - # Compute and store return value - $(gen_na_conds(F, nd, As, B)) - - # Increment state - $(Expr(:block, [:($(Symbol("state_$(k)_0")) += 1) for k in dataarrays]...)) - $(if B <: DataArray - :(ind += 1) - end) - end) - - $(if B <: DataArray - :(B.na = Bna) - end) - - return B - end -end -Base.Broadcast.broadcast!(f, B::Union{DataArray, PooledDataArray}, ::Type{T}, As...) where T = - _broadcast!((t...) -> f(T, t...), B, As...) -Base.Broadcast.broadcast!(f, B::Union{DataArray, PooledDataArray}, A0::Number, As::Number...) = - _broadcast!(f, B, A0, As...) -Base.Broadcast.broadcast!(f, B::Union{DataArray, PooledDataArray}, A0, As...) = - _broadcast!(f, B, A0, As...) - -Base.Broadcast.promote_containertype(::Type{DataArray}, ::Type{DataArray}) = DataArray -Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ::Type{PooledDataArray}) = PooledDataArray -Base.Broadcast.promote_containertype(::Type{DataArray}, ::Type{Array}) = DataArray -Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ::Type{Array}) = PooledDataArray -Base.Broadcast.promote_containertype(::Type{Array}, ::Type{DataArray}) = DataArray -Base.Broadcast.promote_containertype(::Type{Array}, ::Type{PooledDataArray}) = PooledDataArray -Base.Broadcast.promote_containertype(::Type{DataArray}, ::Type{PooledDataArray}) = DataArray -Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ::Type{DataArray}) = DataArray -Base.Broadcast.promote_containertype(::Type{DataArray}, ct) = DataArray -Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ct) = PooledDataArray -Base.Broadcast.promote_containertype(ct, ::Type{DataArray}) = DataArray -Base.Broadcast.promote_containertype(ct, ::Type{PooledDataArray}) = PooledDataArray -Base.Broadcast._containertype(::Type{T}) where T<:DataArray = DataArray -Base.Broadcast._containertype(::Type{T}) where T<:PooledDataArray = PooledDataArray -Base.Broadcast.broadcast_indices(::Type{T}, A) where T<:AbstractDataArray = indices(A) - -@inline function broadcast_t(f, ::Type{T}, shape, A, Bs...) where {T} - dest = Base.Broadcast.containertype(A, Bs...)(Missings.T(T), Base.index_lengths(shape...)) - return broadcast!(f, dest, A, Bs...) -end - -# This is mainly to handle ismissing.(x) since ismissing is probably the only -# function that can guarantee that missings will never propagate -@inline function broadcast_t(f, ::Type{Bool}, shape, A, Bs...) - dest = similar(BitArray, shape) - return broadcast!(f, dest, A, Bs...) -end - -# This one is almost identical to the version in Base and can hopefully be -# removed at some point. The main issue in Base is that it tests for -# isleaftype(T) which is false for Union{T,Missing}. If the test in Base -# can be modified to cover simple unions of leaftypes then this method -# can probably be deleted and the two _t methods adjusted to match the Base -# invokation from Base.Broadcast.broadcast_c -@inline function Base.Broadcast.broadcast_c(f, ::Type{S}, A, Bs...) where {S<:AbstractDataArray} - T = Base.Broadcast._broadcast_eltype(f, A, Bs...) - shape = Base.Broadcast.broadcast_indices(A, Bs...) - return broadcast_t(f, T, shape, A, Bs...) -end - -# This one is much faster than normal broadcasting but the method won't get called -# in fusing operations like (!).(ismissing.(x)) -Base.broadcast(::typeof(ismissing), da::DataArray) = copy(da.na) +# using Base: @get!, promote_eltype +# using Base.Broadcast: bitcache_chunks, bitcache_size, dumpbitcache + +# _broadcast_shape(x...) = Base.to_shape(Base.Broadcast.broadcast_indices(x...)) + +# # Get ref for value for a PooledDataArray, adding to the pool if +# # necessary +# _unsafe_pdaref!(Bpool, Brefdict::Dict, val::Missing) = 0 +# function _unsafe_pdaref!(Bpool, Brefdict::Dict, val) +# @get! Brefdict val begin +# push!(Bpool, val) +# length(Bpool) +# end +# end + +# # Generate a branch for each possible combination of missing/not missing. This +# # gives good performance at the cost of 2^narrays branches. +# function gen_na_conds(f, nd, arrtype, outtype, +# daidx=findall(t -> t <: DataArray || t <: PooledDataArray, arrtype), pos=1, ismissing=()) + +# if pos > length(daidx) +# args = Any[Symbol("v_$(k)") for k = 1:length(arrtype)] +# for i = 1:length(daidx) +# if ismissing[i] +# args[daidx[i]] = missing +# end +# end + +# # Needs to be gensymmed so that the compiler won't box it +# val = gensym("val") +# quote +# $val = $(Expr(:call, f, args...)) +# $(if outtype <: DataArray +# :(@inbounds unsafe_dasetindex!(Bdata, Bc, $val, ind)) +# elseif outtype <: PooledDataArray +# :(@inbounds (@nref $nd Brefs i) = _unsafe_pdaref!(Bpool, Brefdict, $val)) +# end) +# end +# else +# k = daidx[pos] +# quote +# if $(Symbol("ismissing_$(k)")) +# $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(ismissing..., true))) +# else +# $(if arrtype[k] <: DataArray +# :(@inbounds $(Symbol("v_$(k)")) = $(Symbol("data_$(k)"))[$(Symbol("state_$(k)_0"))]) +# else +# :(@inbounds $(Symbol("v_$(k)")) = $(Symbol("pool_$(k)"))[$(Symbol("r_$(k)"))]) +# end) +# $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(ismissing..., false))) +# end +# end +# end +# end + +# # Broadcast implementation for DataArrays +# # +# # TODO: Fall back on faster implementation for same-sized inputs when +# # it is safe to do so. +# Base.map!(f::F, B::Union{DataArray, PooledDataArray}, A0::AbstractArray, As::AbstractArray...) where {F} = +# broadcast!(f, B, A0, As...) +# Base.map!(f::F, B::Union{DataArray, PooledDataArray}, A0, As...) where {F} = +# broadcast!(f, B, A0, As...) + +# @generated function _broadcast!(f, B::Union{DataArray, PooledDataArray}, As...) + +# F = :(f) +# nd = ndims(B) +# N = length(As) + +# dataarrays = find(t -> t <: DataArray, As) + +# quote +# @boundscheck Base.Broadcast.check_broadcast_indices(indices(B), As...) +# # check_broadcast_shape(size(B), As...) +# @nexprs $N i->(A_i = As[i]) + +# @assert ndims(B) == $nd + +# # Set up input DataArray/PooledDataArrays +# $(Expr(:block, [ +# As[k] <: DataArray ? quote +# $(Symbol("na_$(k)")) = $(Symbol("A_$(k)")).na.chunks +# $(Symbol("data_$(k)")) = $(Symbol("A_$(k)")).data +# $(Symbol("state_$(k)_0")) = $(Symbol("state_$(k)_$(nd)")) = 1 +# @nexprs $nd d->($(Symbol("skip_$(k)_d")) = size($(Symbol("data_$(k)")), d) == 1) +# end : As[k] <: PooledDataArray ? quote +# $(Symbol("refs_$(k)")) = $(Symbol("A_$(k)")).refs +# $(Symbol("pool_$(k)")) = $(Symbol("A_$(k)")).pool +# end : nothing +# for k = 1:N]...)) + +# # Set up output DataArray/PooledDataArray +# $(if B <: DataArray +# quote +# Bdata = B.data +# # Copy in case aliased +# # TODO: check for aliasing? +# Bna = falses(size(Bdata)) +# Bc = Bna.chunks +# ind = 1 +# end +# elseif B <: PooledDataArray +# quote +# Bpool = B.pool = similar(B.pool, 0) +# Brefs = B.refs +# Brefdict = Dict{eltype(Bpool),eltype(Brefs)}() +# end +# end) + +# @nloops($nd, i, $(B <: DataArray ? (:Bdata) : (:Brefs)), +# # pre +# d->($(Expr(:block, [ +# As[k] <: DataArray ? quote +# $(Symbol("state_$(k)_")){d-1} = $(Symbol("state_$(k)_d")); +# $(Symbol("j_$(k)_d")) = $(Symbol("skip_$(k)_d")) ? 1 : i_d +# end : (As[k] <: AbstractArray ? quote +# $(Symbol("j_$(k)_d")) = size($(Symbol("A_$(k)")), d) == 1 ? 1 : i_d +# end : quote +# $(Symbol("j_$(k)_d")) = 1 +# end) +# for k = 1:N]...))), + +# # post +# d->($(Expr(:block, [quote +# $(Symbol("skip_$(k)_d")) || ($(Symbol("state_$(k)_d")) = $(Symbol("state_$(k)_0"))) +# end for k in dataarrays]...))), + +# # body +# begin +# # Advance iterators for DataArray and determine missing status +# $(Expr(:block, [ +# As[k] <: DataArray ? quote +# @inbounds $(Symbol("ismissing_$(k)")) = Base.unsafe_bitgetindex($(Symbol("na_$(k)")), $(Symbol("state_$(k)_0"))) +# end : As[k] <: PooledDataArray ? quote +# @inbounds $(Symbol("r_$(k)")) = @nref $nd $(Symbol("refs_$(k)")) d->$(Symbol("j_$(k)_d")) +# $(Symbol("ismissing_$(k)")) = $(Symbol("r_$(k)")) == 0 +# end : nothing +# for k = 1:N]...)) + +# # Extract values for other type +# $(Expr(:block, [ +# As[k] <: AbstractArray && !(As[k] <: AbstractDataArray) ? quote +# # ordinary AbstractArrays +# @inbounds $(Symbol("v_$(k)")) = @nref $nd $(Symbol("A_$(k)")) d->$(Symbol("j_$(k)_d")) +# end : quote +# # non AbstractArrays (e.g. Strings and Numbers) +# @inbounds $(Symbol("v_$(k)")) = $(Symbol("A_$(k)")) +# end +# for k = 1:N]...)) + +# # Compute and store return value +# $(gen_na_conds(F, nd, As, B)) + +# # Increment state +# $(Expr(:block, [:($(Symbol("state_$(k)_0")) += 1) for k in dataarrays]...)) +# $(if B <: DataArray +# :(ind += 1) +# end) +# end) + +# $(if B <: DataArray +# :(B.na = Bna) +# end) + +# return B +# end +# end +# Base.Broadcast.broadcast!(f, B::Union{DataArray, PooledDataArray}, ::Type{T}, As...) where T = +# _broadcast!((t...) -> f(T, t...), B, As...) +# Base.Broadcast.broadcast!(f, B::Union{DataArray, PooledDataArray}, A0::Number, As::Number...) = +# _broadcast!(f, B, A0, As...) +# Base.Broadcast.broadcast!(f, B::Union{DataArray, PooledDataArray}, A0, As...) = +# _broadcast!(f, B, A0, As...) + +# Base.Broadcast.promote_containertype(::Type{DataArray}, ::Type{DataArray}) = DataArray +# Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ::Type{PooledDataArray}) = PooledDataArray +# Base.Broadcast.promote_containertype(::Type{DataArray}, ::Type{Array}) = DataArray +# Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ::Type{Array}) = PooledDataArray +# Base.Broadcast.promote_containertype(::Type{Array}, ::Type{DataArray}) = DataArray +# Base.Broadcast.promote_containertype(::Type{Array}, ::Type{PooledDataArray}) = PooledDataArray +# Base.Broadcast.promote_containertype(::Type{DataArray}, ::Type{PooledDataArray}) = DataArray +# Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ::Type{DataArray}) = DataArray +# Base.Broadcast.promote_containertype(::Type{DataArray}, ct) = DataArray +# Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ct) = PooledDataArray +# Base.Broadcast.promote_containertype(ct, ::Type{DataArray}) = DataArray +# Base.Broadcast.promote_containertype(ct, ::Type{PooledDataArray}) = PooledDataArray +# Base.Broadcast._containertype(::Type{T}) where T<:DataArray = DataArray +# Base.Broadcast._containertype(::Type{T}) where T<:PooledDataArray = PooledDataArray +# Base.Broadcast.broadcast_indices(::Type{T}, A) where T<:AbstractDataArray = indices(A) + +# @inline function broadcast_t(f, ::Type{T}, shape, A, Bs...) where {T} +# dest = Base.Broadcast.containertype(A, Bs...)(Missings.T(T), Base.index_lengths(shape...)) +# return broadcast!(f, dest, A, Bs...) +# end + +# # This is mainly to handle ismissing.(x) since ismissing is probably the only +# # function that can guarantee that missings will never propagate +# @inline function broadcast_t(f, ::Type{Bool}, shape, A, Bs...) +# dest = similar(BitArray, shape) +# return broadcast!(f, dest, A, Bs...) +# end + +# # This one is almost identical to the version in Base and can hopefully be +# # removed at some point. The main issue in Base is that it tests for +# # isleaftype(T) which is false for Union{T,Missing}. If the test in Base +# # can be modified to cover simple unions of leaftypes then this method +# # can probably be deleted and the two _t methods adjusted to match the Base +# # invokation from Base.Broadcast.broadcast_c +# @inline function Base.Broadcast.broadcast_c(f, ::Type{S}, A, Bs...) where {S<:AbstractDataArray} +# T = Base.Broadcast._broadcast_eltype(f, A, Bs...) +# shape = Base.Broadcast.broadcast_indices(A, Bs...) +# return broadcast_t(f, T, shape, A, Bs...) +# end + +# # This one is much faster than normal broadcasting but the method won't get called +# # in fusing operations like (!).(ismissing.(x)) +# Base.broadcast(::typeof(ismissing), da::DataArray) = copy(da.na) diff --git a/src/dataarray.jl b/src/dataarray.jl index 18108d7..338a680 100644 --- a/src/dataarray.jl +++ b/src/dataarray.jl @@ -43,7 +43,7 @@ mutable struct DataArray{T, N} <: AbstractDataArray{T, N} if eltype(d) >: Missing # If the original eltype is wider than the target eltype T, conversion may fail # in the presence of missings: we need to allocate a copy, leaving entries - # corresponding to missings uninitialized + # corresponding to missings undef if eltype(d) <: T @inbounds for i in eachindex(d) if isassigned(d, i) && ismissing(d, i) @@ -86,7 +86,7 @@ function DataArray(d::Array, m::AbstractArray{Bool}) # -> DataArray{T} end function DataArray(T::Type, dims::Integer...) # -> DataArray{T} - return DataArray(Array{Missings.T(T)}(uninitialized, dims...), trues(dims...)) + return DataArray(Array{Missings.T(T)}(undef, dims...), trues(dims...)) end function DataArray(T::Type, dims::NTuple{N, Int}) where N # -> DataArray{T} @@ -107,11 +107,11 @@ A 2-dimensional `DataArray` with element type `T`. """ const DataMatrix{T} = DataArray{T, 2} -Base.copy(d::DataArray) = Base.copy!(similar(d), d) # -> DataArray{T} +Base.copy(d::DataArray) = Base.copyto!(similar(d), d) # -> DataArray{T} -function Base.copy!(dest::DataArray, src::DataArray) # -> DataArray{T} +function Base.copyto!(dest::DataArray, src::DataArray) # -> DataArray{T} if isbits(eltype(src)) && isbits(eltype(dest)) - copy!(dest.data, src.data) + copyto!(dest.data, src.data) else # Elements of src_data are not necessarily initialized, so # only copy initialized elements @@ -125,28 +125,28 @@ function Base.copy!(dest::DataArray, src::DataArray) # -> DataArray{T} end end end - copy!(dest.na, src.na) + copyto!(dest.na, src.na) dest end -function Base.copy!(dest::DataArray, doffs::Integer, src::DataArray) # -> DataArray{T} - copy!(dest, doffs, src, 1, length(src)) +function Base.copyto!(dest::DataArray, doffs::Integer, src::DataArray) # -> DataArray{T} + copyto!(dest, doffs, src, 1, length(src)) end # redundant on Julia 0.4 -function Base.copy!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Integer) # -> DataArray{T} +function Base.copyto!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Integer) # -> DataArray{T} soffs <= length(src) || throw(BoundsError()) - copy!(dest, doffs, src, soffs, length(src)-soffs+1) + copyto!(dest, doffs, src, soffs, length(src)-soffs+1) end -function Base.copy!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Integer, n::Integer) # -> DataArray{T} +function Base.copyto!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Integer, n::Integer) # -> DataArray{T} if n == 0 return dest elseif n < 0 throw(ArgumentError("tried to copy n=$n elements, but n should be nonnegative")) end if isbits(eltype(src)) - copy!(dest.data, doffs, src.data, soffs, n) + copyto!(dest.data, doffs, src.data, soffs, n) else # Elements of src_data are not necessarily initialized, so # only copy initialized elements @@ -165,7 +165,7 @@ function Base.copy!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Inte end end end - copy!(dest.na, doffs, src.na, soffs, n) + copyto!(dest.na, doffs, src.na, soffs, n) dest end @@ -185,15 +185,15 @@ function Base.resize!(da::DataArray{T,1}, n::Int) where T end function Base.similar(da::DataArray, T::Type, dims::Dims) #-> DataArray{T} - return DataArray(Array{Missings.T(T)}(uninitialized, dims), trues(dims)) + return DataArray(Array{Missings.T(T)}(undef, dims), trues(dims)) end Base.size(d::DataArray) = size(d.data) # -> (Int...) Base.ndims(da::DataArray) = ndims(da.data) # -> Int Base.length(d::DataArray) = length(d.data) # -> Int -Base.endof(da::DataArray) = endof(da.data) # -> Int +Base.lastindex(da::DataArray) = lastindex(da.data) # -> Int -function Base.find(da::DataArray{Bool}) # -> Array{Int} +function Base.findall(da::DataArray{Bool}) # -> Array{Int} data = da.data ntrue = 0 @inbounds @bitenumerate da.na i na begin @@ -238,7 +238,7 @@ function Base.convert( ) where {S, T, N} # -> Array{S, N} replacementS = convert(S, replacement) - res = Array{S}(uninitialized, size(da)) + res = Array{S}(undef, size(da)) for i in 1:length(da) if da.na[i] res[i] = replacementS @@ -267,16 +267,12 @@ struct EachFailMissing{T<:DataArray} end Missings.fail(da::DataArray) = EachFailMissing(da) Base.length(itr::EachFailMissing) = length(itr.da) -Base.start(itr::EachFailMissing) = 1 -Base.done(itr::EachFailMissing, ind::Integer) = ind > length(itr) -Base.eltype(itr::EachFailMissing) = Missings.T(eltype(itr.da)) -function Base.next(itr::EachFailMissing, ind::Integer) - if itr.da.na[ind] - throw(MissingException("missing value encountered in Missings.fail")) - else - (itr.da.data[ind], ind + 1) - end +function Base.iterate(itr::EachFailMissing, st=1) + st > length(itr) && return nothing + itr.da.na[st] && throw(MissingException("missing value encountered in Missings.fail")) + return (itr.da.data[st], st + 1) end +Base.eltype(itr::EachFailMissing) = Missings.T(eltype(itr.da)) struct EachDropMissing{T<:DataArray} da::T @@ -290,12 +286,11 @@ function _next_nonna_ind(da::DataArray, ind::Int) ind end Base.length(itr::EachDropMissing) = length(itr.da) - sum(itr.da.na) -Base.start(itr::EachDropMissing) = _next_nonna_ind(itr.da, 0) -Base.done(itr::EachDropMissing, ind::Int) = ind > length(itr.da) -Base.eltype(itr::EachDropMissing) = Missings.T(eltype(itr.da)) -function Base.next(itr::EachDropMissing, ind::Int) - (itr.da.data[ind], _next_nonna_ind(itr.da, ind)) +function Base.iterate(itr::EachDropMissing, st=_next_nonna_ind(itr.da, 0)) + st > length(itr.da) && return nothing + return (itr.da.data[st], _next_nonna_ind(itr.da, ind)) end +Base.eltype(itr::EachDropMissing) = Missings.T(eltype(itr.da)) struct EachReplaceMissing{S<:DataArray, T} da::S @@ -304,13 +299,12 @@ end Missings.replace(da::DataArray, replacement::Any) = EachReplaceMissing(da, replacement) Base.length(itr::EachReplaceMissing) = length(itr.da) -Base.start(itr::EachReplaceMissing) = 1 -Base.done(itr::EachReplaceMissing, ind::Integer) = ind > length(itr) -Base.eltype(itr::EachReplaceMissing) = Missings.T(eltype(itr.da)) -function Base.next(itr::EachReplaceMissing, ind::Integer) - item = itr.da.na[ind] ? itr.replacement : itr.da.data[ind] - (item, ind + 1) +function Base.iterate(itr::EachReplaceMissing, st=1) + st > length(itr) && return nothing + item = itr.da.na[st] ? itr.replacement : itr.da.data[st] + return (item, st + 1) end +Base.eltype(itr::EachReplaceMissing) = Missings.T(eltype(itr.da)) Base.collect(itr::EachDropMissing{<:DataVector}) = itr.da.data[.!itr.da.na] # -> Vector Base.collect(itr::EachFailMissing{<:DataVector}) = copy(itr.da.data) # -> Vector @@ -416,7 +410,7 @@ Get the unique values in `da` as well as the index of the first `missing` value in `da` if present, or 0 otherwise. """ function finduniques(da::DataArray{T}) where T # -> Vector{T}, Int - out = Vector{T}(uninitialized, 0) + out = Vector{T}(undef, 0) seen = Set{T}() n = length(da) firstmissing = 0 @@ -439,7 +433,7 @@ function Base.unique(da::DataArray{T}) where T # -> DataVector{T} unique_values, firstmissing = finduniques(da) n = length(unique_values) if firstmissing > 0 - res = DataArray(Vector{T}(uninitialized, n + 1)) + res = DataArray(Vector{T}(undef, n + 1)) i = 1 for val in unique_values if i == firstmissing diff --git a/src/datamatrix.jl b/src/datamatrix.jl index 9313663..6d0fee9 100644 --- a/src/datamatrix.jl +++ b/src/datamatrix.jl @@ -1,2 +1,3 @@ # Extract the matrix diagonal -Base.diag(dm::DataMatrix{T}) where {T} = DataArray(diag(dm.data), diag(dm.na)) +using LinearAlgebra +LinearAlgebra.diag(dm::DataMatrix{T}) where {T} = DataArray(diag(dm.data), diag(dm.na)) diff --git a/src/datavector.jl b/src/datavector.jl index 11e8e78..737c646 100644 --- a/src/datavector.jl +++ b/src/datavector.jl @@ -23,19 +23,19 @@ function Base.pop!(dv::DataVector) end end -function Base.unshift!(dv::DataVector{T}, v::Missing) where T +function Base.pushfirst!(dv::DataVector{T}, v::Missing) where T ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), dv.data, 1) pushfirst!(dv.na, true) return v end -function Base.unshift!(dv::DataVector{S}, v::T) where {S, T} +function Base.pushfirst!(dv::DataVector{S}, v::T) where {S, T} pushfirst!(dv.data, v) pushfirst!(dv.na, false) return v end -function Base.shift!(dv::DataVector{T}) where T +function Base.popfirst!(dv::DataVector{T}) where T d, m = popfirst!(dv.data), popfirst!(dv.na) if m return missing @@ -113,18 +113,18 @@ end Base.pop!(pdv::PooledDataVector) = pdv.pool[pop!(pdv.refs)] -function Base.unshift!(pdv::PooledDataVector{T,R}, v::Missing) where {T,R} +function Base.pushfirst!(pdv::PooledDataVector{T,R}, v::Missing) where {T,R} pushfirst!(pdv.refs, zero(R)) return v end -function Base.unshift!(pdv::PooledDataVector{S,R}, v::T) where {S,R,T} +function Base.pushfirst!(pdv::PooledDataVector{S,R}, v::T) where {S,R,T} v = convert(S,v) pushfirst!(pdv.refs, getpoolidx(pdv, v)) return v end -Base.shift!(pdv::PooledDataVector) = pdv.pool[popfirst!(pdv.refs)] +Base.popfirst!(pdv::PooledDataVector) = pdv.pool[popfirst!(pdv.refs)] Base.reverse(x::AbstractDataVector) = x[end:-1:1] diff --git a/src/indexing.jl b/src/indexing.jl index 4b52e8a..854141b 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -62,7 +62,7 @@ function combine_pools!(pool, newpool) end # Find pool elements in existing array, or add them - poolidx = Vector{Int}(uninitialized, length(newpool)) + poolidx = Vector{Int}(undef, length(newpool)) for j = 1:length(newpool) poolidx[j] = Base.@get!(seen, newpool[j], (push!(pool, newpool[j]); i += 1)) end @@ -129,16 +129,17 @@ end N = length(I) quote $(Expr(:meta, :inline)) - flipbits!(dest.na) # similar initializes with missings + dest.na .= .!(dest.na) # similar initializes with missings @nexprs $N d->(J_d = I[d]) srcextr = daextract(src) destextr = daextract(dest) srcsz = size(src) D = eachindex(dest) - Ds = start(D) + Ds = iterate(D) @nloops $N j d->J_d begin - offset_0 = @ncall $N sub2ind srcsz j - d, Ds = next(D, Ds) + offset_0 = @ncall $N LinearIndices srcsz j + d, dstate = Ds + Ds = iterate(D, dstate) if unsafe_ismissing(src, srcextr, offset_0) unsafe_dasetindex!(dest, destextr, missing, d) else diff --git a/src/linalg.jl b/src/linalg.jl index 7ff703f..8a73ed9 100644 --- a/src/linalg.jl +++ b/src/linalg.jl @@ -63,7 +63,7 @@ function na_safe_rowmeans(dm::DataMatrix) end # TODO: Default to failure in the face of missings -function Base.svd(D::DataMatrix, k::Int; tracing = false, tolerance = 10e-4) +function LinearAlgebra.svd(D::DataMatrix, k::Int; tracing = false, tolerance = 10e-4) # Make a copy of the data that we can alter in place dm = copy(D) @@ -135,9 +135,9 @@ function Base.svd(D::DataMatrix, k::Int; tracing = false, tolerance = 10e-4) # Only return the SVD entries, not the imputation return (U[:, 1:k], D[1:k], V[:, 1:k]) end -Base.svd(dm::DataMatrix) = svd(dm, minimum(size(dm))) +LinearAlgebra.svd(dm::DataMatrix) = svd(dm, minimum(size(dm))) -function Base.eig(dm::DataMatrix) +function LinearAlgebra.eigen(dm::DataMatrix) U, D, V = svd(dm) - return eig(U * diagm(D) * V') + return eigen(U * diagm(D) * V') end diff --git a/src/literals.jl b/src/literals.jl index ab16ea5..d18deda 100644 --- a/src/literals.jl +++ b/src/literals.jl @@ -1,7 +1,7 @@ function fixargs(args::Vector{Any}, stub::Any) n = length(args) - data = Array{Any}(uninitialized, n) - na = BitArray(uninitialized, n) + data = Array{Any}(undef, n) + na = BitArray(undef, n) for i in 1:n if args[i] == :missing || args[i] == :NA data[i] = stub @@ -73,8 +73,8 @@ function parsematrix(ex::Expr) end nrows = length(rows) - datarows = Array{Expr}(uninitialized, nrows) - narows = Array{Expr}(uninitialized, nrows) + datarows = Array{Expr}(undef, nrows) + narows = Array{Expr}(undef, nrows) for irow in 1:nrows data, na = fixargs(ex.args[rows[irow]].args, stub) datarows[irow] = Expr(:row, data...) diff --git a/src/operators.jl b/src/operators.jl index f7ba98f..c0ac4b7 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -1,14 +1,15 @@ -const unary_vector_operators = [:(Base.median), +using Statistics +const unary_vector_operators = [:(Statistics.median), :(StatsBase.mad), - :(Base.norm), + :(LinearAlgebra.norm), :(StatsBase.skewness), :(StatsBase.kurtosis)] # TODO: dist, iqr -const binary_vector_operators = [:(Base.dot), - :(Base.cor), - :(Base.cov), +const binary_vector_operators = [:(LinearAlgebra.dot), + :(Statistics.cor), + :(Statistics.cov), :(StatsBase.corspearman)] const rowwise_operators = [:rowminimums, @@ -163,7 +164,7 @@ macro dataarray_binary_array(vectorfunc, scalarfunc) data1 = $(atype == :DataArray || atype == :(DataArray{Bool}) ? :(a.data) : :a) data2 = $(btype == :DataArray || btype == :(DataArray{Bool}) ? :(b.data) : :b) res = Array{promote_op($vectorfunc, eltype(a), eltype(b))}( - uninitialized, promote_shape(size(a), size(b))) + undef, promote_shape(size(a), size(b))) resna = $narule @bitenumerate resna i na begin if !na @@ -210,8 +211,8 @@ end @dataarray_unary(-, Any, T) @dataarray_unary(!, Bool, T) -# Treat ctranspose and * in a special way -for (f, elf) in ((:(Base.ctranspose), :conj), (:(Base.transpose), :identity)) +# Treat adjoint and * in a special way +for (f, elf) in ((:(Base.adjoint), :conj), (:(Base.transpose), :identity)) @eval begin function $(f)(d::DataMatrix{T}) where T # (c)transpose in Base uses a cache-friendly algorithm for @@ -307,7 +308,7 @@ end for f in (:(Base.acos), :(Base.acosh), :(Base.asin), :(Base.asinh), :(Base.atan), :(Base.atanh), :(Base.sin), :(Base.sinh), :(Base.cos), :(Base.cosh), :(Base.tan), :(Base.tanh), :(Base.exp), :(Base.exp2), :(Base.expm1), :(Base.log), :(Base.log10), :(Base.log1p), - :(Base.log2), :(Base.exponent), :(Base.sqrt), :(Base.gamma), :(Base.lgamma)) + :(Base.log2), :(Base.exponent), :(Base.sqrt), :(SpecialFunctions.gamma), :(SpecialFunctions.lgamma)) @eval begin @dataarray_unary $(f) AbstractFloat T @dataarray_unary $(f) Real Float64 @@ -419,7 +420,7 @@ end if isdefined(Base, :UniformScaling) function (+)(A::DataArray{TA,2},J::UniformScaling{TJ}) where {TA,TJ} - n = LinAlg.checksquare(A) + n = LinearAlgebra.checksquare(A) B = similar(A,promote_type(TA,TJ)) copy!(B,A) @inbounds for i = 1:n @@ -432,7 +433,7 @@ end (+)(J::UniformScaling,A::DataArray{TA,2}) where {TA} = A + J function (-)(A::DataArray{TA,2},J::UniformScaling{TJ}) where {TA,TJ<:Number} - n = LinAlg.checksquare(A) + n = LinearAlgebra.checksquare(A) B = similar(A,promote_type(TA,TJ)) copy!(B,A) @inbounds for i = 1:n @@ -443,7 +444,7 @@ function (-)(A::DataArray{TA,2},J::UniformScaling{TJ}) where {TA,TJ<:Number} B end function (-)(J::UniformScaling{TJ},A::DataArray{TA,2}) where {TA,TJ<:Number} - n = LinAlg.checksquare(A) + n = LinearAlgebra.checksquare(A) B = -A @inbounds for i = 1:n if !B.na[i,i] @@ -514,7 +515,7 @@ end DataArray(Array{T,N}(size(b)), trues(size(b))) @dataarray_binary_scalar(/, /, nothing, false) -function Base.LinAlg.diff(dv::DataVector) +function LinearAlgebra.diff(dv::DataVector) n = length(dv) new_data = diff(dv.data) new_na = falses(n - 1) @@ -681,7 +682,7 @@ function rle(v::AbstractDataVector{T}) where T current_length = 1 values = DataArray(T, n) total_values = 1 - lengths = Vector{Int16}(uninitialized, n) + lengths = Vector{Int16}(undef, n) total_lengths = 1 for i in 2:n if ismissing(v[i]) || ismissing(current_value) diff --git a/src/pooleddataarray.jl b/src/pooleddataarray.jl index 1ee7b1b..9709f57 100644 --- a/src/pooleddataarray.jl +++ b/src/pooleddataarray.jl @@ -117,7 +117,7 @@ function PooledDataArray(d::AbstractArray{<:Union{T,Missing}, N}, throw(ArgumentError("Cannot construct a PooledDataVector with type $R with a pool of size $(length(pool))")) end - newrefs = Array{R,N}(uninitialized, size(d)) + newrefs = Array{R,N}(undef, size(d)) poolref = Dict{T, R}() # loop through once to fill the poolref dict @@ -149,7 +149,7 @@ end # Construct an all-missing PooledDataVector of a specific type PooledDataArray(t::Type, dims::Tuple{Vararg{Int}}) = PooledDataArray(Array{t}(dims), trues(dims)) -PooledDataArray(t::Type, dims::Int...) = PooledDataArray(Array{t}(uninitialized, dims), trues(dims)) +PooledDataArray(t::Type, dims::Int...) = PooledDataArray(Array{t}(undef, dims), trues(dims)) PooledDataArray(t::Type, r::Type{R}, dims::Tuple{Vararg{Int}}) where {R<:Integer} = PooledDataArray(Array{t}(dims), trues(dims), r) PooledDataArray(t::Type, r::Type{R}, dims::Int...) where {R<:Integer} = PooledDataArray(Array(t, dims), trues(dims), r) @@ -198,7 +198,7 @@ end Base.size(pda::PooledDataArray) = size(pda.refs) Base.length(pda::PooledDataArray) = length(pda.refs) -Base.endof(pda::PooledDataArray) = endof(pda.refs) +Base.lastindex(pda::PooledDataArray) = lastindex(pda.refs) ############################################################################## ## @@ -559,11 +559,11 @@ end ############################################################################## ## -## find() +## findall() ## ############################################################################## -Base.find(pdv::PooledDataVector{Bool}) = findall(convert(Vector{Bool}, pdv, false)) +Base.findall(pdv::PooledDataVector{Bool}) = findall(convert(Vector{Bool}, pdv, false)) ############################################################################## ## @@ -859,7 +859,7 @@ Base.convert(::Type{PooledDataArray}, a::AbstractArray) = function Base.convert(::Type{DataArray{S,N}}, pda::PooledDataArray{T,R,N}) where {S,T,R<:Integer,N} - res = DataArray(Array{S}(uninitialized, size(pda)), BitArray(uninitialized, size(pda))) + res = DataArray(Array{S}(undef, size(pda)), BitArray(undef, size(pda))) for i in 1:length(pda) r = pda.refs[i] if r == 0 # TODO: Use zero(R) @@ -909,7 +909,7 @@ function Base.convert( pda::PooledDataArray{T, R, N}, replacement::Any) where {S, T, R, N} - res = Array{S}(uninitialized, size(pda)) + res = Array{S}(undef, size(pda)) replacementS = convert(S, replacement) for i in 1:length(pda) if pda.refs[i] == zero(R) diff --git a/src/reduce.jl b/src/reduce.jl index ce74493..c17f965 100644 --- a/src/reduce.jl +++ b/src/reduce.jl @@ -88,7 +88,7 @@ end # in Base, which is slow because it's type-unstable, but guarantees the # correct semantics const SafeMapFuns = Union{typeof(identity), typeof(abs), typeof(abs2), - typeof(exp), typeof(log), typeof(Base.centralizedabs2fun)} + typeof(exp), typeof(log), typeof(Statistics.centralizedabs2fun)} const SafeReduceFuns = Union{typeof(+), typeof(*), typeof(max), typeof(min)} function Base._mapreduce(f::SafeMapFuns, op::SafeReduceFuns, A::DataArray) any(A.na) && return missing @@ -148,12 +148,12 @@ end ## mean -Base.mean(a::DataArray; skipmissing::Bool=false, skipna::Bool=false) = +Statistics.mean(a::DataArray; skipmissing::Bool=false, skipna::Bool=false) = sum(a; skipmissing=skipmissing, skipna=skipna) / (length(a.na)-(skipna || skipmissing ? countnz(a.na) : 0)) ## variance -function Base.varm(A::DataArray{T}, m::Number; +function Statistics.varm(A::DataArray{T}, m::Number; corrected::Bool=true, skipmissing::Bool=false, skipna::Bool=false) where T if skipna || skipmissing if skipna && !skipmissing @@ -162,44 +162,44 @@ function Base.varm(A::DataArray{T}, m::Number; n = length(A) na = A.na - + MT = typeof((zero(T) * zero(T) + zero(T) * zero(T)) / 2) nna = countnz(na) - nna == n && return convert(Base.momenttype(T), NaN) - nna == n-1 && return convert(Base.momenttype(T), + nna == n && return convert(MT, NaN) + nna == n-1 && return convert(MT, abs2(A.data[Base.findnextnot(na, 1)] - m)/(1 - corrected)) - /(nna == 0 ? Base.centralize_sumabs2(A.data, m, 1, n) : - mapreduce_impl_skipmissing(Base.centralizedabs2fun(m), +, A), + /(nna == 0 ? Statistics.centralize_sumabs2(A.data, m, 1, n) : + mapreduce_impl_skipmissing(Statistics.centralizedabs2fun(m), +, A), n - nna - corrected) else any(A.na) && return missing - Base.varm(A.data, m; corrected=corrected) + Statistics.varm(A.data, m; corrected=corrected) end end -Base.varm(A::DataArray{T}, m::Missing; +Statistics.varm(A::DataArray{T}, m::Missing; corrected::Bool=true, skipmissing::Bool=false, skipna::Bool=false) where {T} = missing -function Base.var(A::DataArray; +function Statistics.var(A::DataArray; corrected::Bool=true, mean=nothing, skipmissing::Bool=false, skipna::Bool=false) - mean == 0 ? Base.varm(A, 0; corrected=corrected, skipmissing=skipmissing, skipna=skipna) : - mean == nothing ? varm(A, Base.mean(A; skipmissing=skipmissing, skipna=skipna); + mean == 0 ? Statistics.varm(A, 0; corrected=corrected, skipmissing=skipmissing, skipna=skipna) : + mean == nothing ? varm(A, Statistics.mean(A; skipmissing=skipmissing, skipna=skipna); corrected=corrected, skipmissing=skipmissing, skipna=skipna) : isa(mean, Union{Number,Missing}) ? varm(A, mean; corrected=corrected, skipmissing=skipmissing, skipna=skipna) : throw(ErrorException("Invalid value of mean.")) end -Base.stdm(A::DataArray, m::Number; +Statistics.stdm(A::DataArray, m::Number; corrected::Bool=true, skipmissing::Bool=false, skipna::Bool=false) = sqrt(varm(A, m; corrected=corrected, skipmissing=skipmissing, skipna=skipna)) -Base.std(A::DataArray; +Statistics.std(A::DataArray; corrected::Bool=true, mean=nothing, skipmissing::Bool=false, skipna::Bool=false) = sqrt(var(A; corrected=corrected, mean=mean, skipmissing=skipmissing, skipna=skipna)) ## weighted mean -function Base.mean(a::DataArray, w::Weights; skipmissing::Bool=false, skipna::Bool=false) +function Statistics.mean(a::DataArray, w::Weights; skipmissing::Bool=false, skipna::Bool=false) if skipna || skipmissing v = a .* w.values sum(v; skipmissing=true) / sum(DataArray(w.values, v.na); skipmissing=true) @@ -208,7 +208,7 @@ function Base.mean(a::DataArray, w::Weights; skipmissing::Bool=false, skipna::Bo end end -function Base.mean(a::DataArray, w::Weights{W,V}; +function Statistics.mean(a::DataArray, w::Weights{W,V}; skipmissing::Bool=false, skipna::Bool=false) where {W,V<:DataArray} if skipna || skipmissing v = a .* w.values diff --git a/src/reducedim.jl b/src/reducedim.jl index ba29732..2c87acc 100644 --- a/src/reducedim.jl +++ b/src/reducedim.jl @@ -1,537 +1,537 @@ -## Utility function - -using Base: check_reducedims - -# Determine if there are any true values in a BitArray in a given -# range. We use this for reductions with skipmissing=false along the first -# dimension. -function _any(B::BitArray, istart::Int, iend::Int) - chunks = B.chunks - startidx, startbit = Base.get_chunks_id(istart) - endidx, endbit = Base.get_chunks_id(iend) - startidx == endidx && return chunks[startidx] >> startbit << (63-endbit+startbit) != 0 - - chunks[startidx] >> startbit != 0 && return true - for i = startidx+1:endidx-1 - chunks[i] != 0 && return true - end - chunks[endidx] << (63-endbit) != 0 && return true - return false -end - -# Counts the number of ones in a given range. We use this for counting -# the values for mean and var with skipmissing=false along the first -# dimension. -function _count(B::BitArray, istart::Int, iend::Int) - chunks = B.chunks - startidx, startbit = Base.get_chunks_id(istart) - endidx, endbit = Base.get_chunks_id(iend) - startidx == endidx && return count_ones(chunks[startidx] >> startbit << (63-endbit+startbit)) - - n = 0 - n += count_ones(chunks[startidx] >> startbit) - for i = startidx+1:endidx-1 - n += count_ones(chunks[i]) - end - n += count_ones(chunks[endidx] << (63-endbit)) - return n -end - -## missing-preserving -@generated function _mapreducedim!(f::SafeMapFuns, op::SafeReduceFuns, - R::DataArray, A::DataArray{T,N} where {T}) where {N} - quote - data = A.data - na = A.na - - lsiz = check_reducedims(R, data) - isempty(data) && return R - - if lsiz > 16 - # use mapreduce_impl, which is probably better tuned to achieve higher performance - nslices = div(length(A), lsiz) - ibase = 0 - extr = daextract(R) - for i = 1:nslices - if _any(na, ibase+1, ibase+lsiz) - unsafe_setmissing!(R, extr, i) - else - v = Base.mapreduce_impl(f, op, data, ibase+1, ibase+lsiz) - @inbounds unsafe_dasetindex!(R, extr, v, i) - end - ibase += lsiz - end - else - @nextract $N sizeR d->size(R,d) - na_chunks = A.na.chunks - - new_data = R.data - - # If reducing to a DataArray, skip strides with missings. - # In my benchmarks, it is actually faster to compute a new missing - # array and BitArray it than to operate on the BitArray - # directly. - new_na = fill(false, size(new_data)) - - @nexprs 1 d->(state_0 = state_{$N} = 1) - @nexprs $N d->(skip_d = sizeR_d == 1) - k = 1 - @nloops($N, i, A, - d->(state_{d-1} = state_d), - d->(skip_d || (state_d = state_0)), begin - @inbounds vna = new_na[state_0] | Base.unsafe_bitgetindex(na_chunks, k) - if vna - @inbounds new_na[state_0] = true - else - @inbounds x = data[k] - v = f(x) - @inbounds v0 = new_data[state_0] - nv = op(v0, v) - @inbounds new_data[state_0] = nv - end - - state_0 += 1 - k += 1 - end) - - R.na = BitArray(new_na) - end - return R - end -end - -## missing-preserving to array -@generated function _mapreducedim!(f::SafeMapFuns, op::SafeReduceFuns, - R::AbstractArray, A::DataArray{T,N} where {T}) where {N} - quote - data = A.data - na = A.na - - lsiz = check_reducedims(R, data) - isempty(data) && return R - - if lsiz > 16 - # use mapreduce_impl, which is probably better tuned to achieve higher performance - nslices = div(length(A), lsiz) - ibase = 0 - extr = daextract(R) - for i = 1:nslices - if _any(na, ibase+1, ibase+lsiz) - throw(MissingException("array contains missing values but output array does not support them")) - else - v = Base.mapreduce_impl(f, op, data, ibase+1, ibase+lsiz) - @inbounds unsafe_dasetindex!(R, extr, v, i) - end - ibase += lsiz - end - else - @nextract $N sizeR d->size(R,d) - - # If reducing to a non-DataArray, throw an error at the start on missing - any(ismissing, A) && throw(MissingException("array contains missing values: pass skipmissing=true to skip them")) - @nloops $N i data d->(j_d = sizeR_d==1 ? 1 : i_d) begin - @inbounds x = (@nref $N data i) - v = f(x) - @inbounds v0 = (@nref $N R j) - nv = op(v0, v) - @inbounds (@nref $N R j) = nv - end - end - return R - end -end -_mapreducedim!(f, op, R, A) = Base._mapreducedim!(f, op, R, A) - -## missing-skipping -_getdata(A) = A -_getdata(A::DataArray) = A.data - -# mapreduce across a dimension. If specified, C contains the number of -# non-missing values reduced into each element of R. -@generated function _mapreducedim_skipmissing_impl!(f, op, R::AbstractArray, - C::Union{Array{Int}, Nothing}, - A::DataArray{T,N} where {T}) where {N} - quote - - data = A.data - na = A.na - na_chunks = na.chunks - new_data = _getdata(R) - - C === nothing || size(R) == size(C) || throw(DimensionMismatch("R and C must have same size")) - lsiz = check_reducedims(new_data, data) - isempty(data) && return R - - if lsiz > 16 - # keep the accumulator as a local variable when reducing along the first dimension - nslices = div(length(A), lsiz) - ibase = 0 - for i = 1:nslices - # TODO: use pairwise impl for sum - @inbounds v = new_data[i] - @inbounds C !== nothing && (C[i] = lsiz - _count(na, ibase+1, ibase+lsiz)) - for k = ibase+1:ibase+lsiz - @inbounds Base.unsafe_bitgetindex(na_chunks, k) && continue - @inbounds x = data[k] - v = convert(typeof(v), op(f(x), v))::typeof(v) - end - @inbounds new_data[i] = v - ibase += lsiz - end - else - # general implementation - @nextract $N sizeR d->size(new_data,d) - @nexprs 1 d->(state_0 = state_{$N} = 1) - @nexprs $N d->(skip_d = sizeR_d == 1) - k = 1 - C !== nothing && fill!(C, div(length(A), length(R))) - @nloops($N, i, A, - d->(state_{d-1} = state_d), - d->(skip_d || (state_d = state_0)), begin - @inbounds xna = Base.unsafe_bitgetindex(na_chunks, k) - if xna - C !== nothing && @inbounds C[state_0] -= 1 - else - @inbounds x = data[k] - v = f(x) - @inbounds v0 = new_data[state_0] - nv = op(v0, v) - @inbounds new_data[state_0] = nv - end - - state_0 += 1 - k += 1 - end) - end - return R - end -end - -_mapreducedim_skipmissing!(f, op, R::AbstractArray, A::DataArray) = - _mapreducedim_skipmissing_impl!(f, op, R, nothing, A) - -# for MinFun/MaxFun, min or max is missing if all values along a dimension are missing -function _mapreducedim_skipmissing!(f, op::Union{typeof(min), typeof(max)}, R::DataArray, A::DataArray) - R.na = BitArray(all!(fill(true, size(R)), A.na)) - _mapreducedim_skipmissing_impl!(f, op, R, nothing, A) -end -function _mapreducedim_skipmissing!(f, op::Union{typeof(min), typeof(max)}, R::AbstractArray, A::DataArray) - if any(all!(fill(true, size(R)), A.na)) - throw(MissingException("some dimensions of the array only contain missing values")) - end - _mapreducedim_skipmissing_impl!(f, op, R, nothing, A) -end - -## general reducedim interface - -for op in (+, *, &, |, min, max) - @eval begin - function Base.initarray!(a::DataArray{T}, op::typeof($op), init::Bool) where T - if init - Base.initarray!(a.data, op, true) - Base.fill!(a.na, false) - end - a - end - end -end - -function Base.reducedim_initarray(A::DataArray, region, v0, ::Type{R}) where R - rd = length.(Base.reduced_indices(A.data, region)) - DataArray(fill!(similar(A.data, R, rd), v0), falses(rd)) -end -function Base.reducedim_initarray0(A::DataArray, region, v0, ::Type{R}) where R - rd = length.(Base.reduced_indices0(A,region)) - DataArray(fill!(similar(A.data, R, rd), v0), falses(rd)) -end - -function Base.mapreducedim!(f::Function, op, R::AbstractArray, A::DataArray; - skipmissing::Bool=false, skipna::Bool=false) - if skipna && !skipmissing - Base.depwarn("skipna=$skipna is deprecated, use skipmissing=$skipna instead", :mapreduce) - skipmissing = true - end - skipmissing ? _mapreducedim_skipmissing!(f, op, R, A) : _mapreducedim!(f, op, R, A) -end -function Base.mapreducedim!(f, op, R::AbstractArray, A::DataArray; - skipmissing::Bool=false, skipna::Bool=false) - if skipn && !skipmissing - Base.depwarn("skipna=$skipna is deprecated, use skipmissing=$skipna instead", :mapreduce) - skipmissing = true - end - skipmissing ? _mapreducedim_skipna!(f, op, R, A) : _mapreducedim!(f, op, R, A) -end -Base.reducedim!(op, R::DataArray{RT}, A::AbstractArray; - skipmissing::Bool=false, skipna::Bool=false) where {RT} = - Base.mapreducedim!(identity, op, R, A, zero(RT); skipna=skipna) - -Base.mapreducedim(f, op, A::DataArray, region, v0; - skipmissing::Bool=false, skipna::Bool=false) = - Base.mapreducedim!(f, op, Base.reducedim_initarray(A, region, v0), A; - skipmissing=skipmissing, skipna=skipna) -Base.mapreducedim(f, op, A::DataArray{T}, region; - skipmissing::Bool=false, skipna::Bool=false) where {T} = - Base.mapreducedim!(f, op, Base.reducedim_init(f, op, A, region), A; - skipmissing=skipmissing, skipna=skipna) - -Base.reducedim(op, A::DataArray, region, v0; skipna::Bool=false) = - Base.mapreducedim(identity, op, A, region, v0; skipmissing=skipmissing, skipna=skipna) -Base.reducedim(op, A::DataArray, region; skipna::Bool=false) = - Base.mapreducedim(identity, op, A, region; skipmissing=skipmissing, skipna=skipna) - -## usual reductions - -for (basfn, Op) in [(:sum, +), (:prod, *), - (:maximum, max), (:minimum, min), - (:all, &), (:any, |)] - fname = Expr(:., :Base, Base.Meta.quot(basfn)) - fname! = Expr(:., :Base, Base.Meta.quot(Symbol(string(basfn, '!')))) - @eval begin - $(fname!)(f::Union{Function,$(supertype(typeof(abs)))}, r::AbstractArray, A::DataArray; - init::Bool=true, skipmissing::Bool=false, skipna::Bool=false) = - Base.mapreducedim!(f, $(Op), Base.initarray!(r, $(Op), init), A; - skipmissing=skipmissing, skipna=skipna) - $(fname!)(r::AbstractArray, A::DataArray; - init::Bool=true, skipmissing::Bool=false, skipna::Bool=false) = - $(fname!)(identity, r, A; init=init, skipmissing=skipmissing, skipna=skipna) - - $(fname)(f::Union{Function,$(supertype(typeof(abs)))}, A::DataArray, region; - skipmissing::Bool=false, skipna::Bool=false) = - Base.mapreducedim(f, $(Op), A, region; skipmissing=skipmissing, skipna=skipna) - $(fname)(A::DataArray, region; skipmissing::Bool=false, skipna::Bool=false) = - $(fname)(identity, A, region; skipmissing=skipmissing, skipna=skipna) - end -end - -## mean - -function Base.mean!(R::AbstractArray{T}, A::DataArray; - skipmissing::Bool=false, skipna::Bool=false, init::Bool=true) where {T} - init && fill!(R, 0) - if skipna || skipmissing - C = Array{Int}(size(R)) - _mapreducedim_skipmissing_impl!(identity, +, R, C, A) - broadcast!(/, R, R, C) - else - sum!(R, A; skipmissing=false) - broadcast!(/, R, R, convert(T, length(A)/length(R))) - R - end -end - -Base.mean(A::DataArray{T}, region; skipmissing::Bool=false, skipna::Bool=false) where {T} = - mean!(Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), A; - skipmissing=skipmissing, skipna=skipna, init=false) - -## var - -struct MapReduceDim2ArgHelperFun{F,T} - f::F - val::T -end -(f::MapReduceDim2ArgHelperFun)(x) = f.f(x, f.val) - -# A version of _mapreducedim! that accepts an array S of the same size -# as R, the elements of which are passed as a second argument to f. -@generated function _mapreducedim_2arg!(f, op, R::DataArray, - A::DataArray{T,N} where {T}, - S::AbstractArray) where {N} - quote - data = A.data - na = A.na - Sextr = daextract(S) - - lsiz = check_reducedims(R, data) - size(R) == size(S) || throw(DimensionMismatch("R and S must have same size")) - isempty(data) && return R - - if lsiz > 16 - # use mapreduce_impl, which is probably better tuned to achieve higher performance - nslices = div(length(A), lsiz) - ibase = 0 - extr = daextract(R) - for i = 1:nslices - if unsafe_ismissing(S, Sextr, i) || _any(na, ibase+1, ibase+lsiz) - unsafe_setmissing!(R, extr, i) - else - @inbounds s = unsafe_getindex_notmissing(S, Sextr, i) - v = Base.mapreduce_impl(MapReduceDim2ArgHelperFun(f, s), op, data, ibase+1, ibase+lsiz) - @inbounds unsafe_dasetindex!(R, extr, v, i) - end - ibase += lsiz - end - else - @nextract $N sizeR d->size(R,d) - na_chunks = A.na.chunks - new_data = R.data - new_na = isa(S, DataArray) ? Array(S.na) : fill(false, size(S)) - - @nexprs 1 d->(state_0 = state_{$N} = 1) - @nexprs $N d->(skip_d = sizeR_d == 1) - k = 1 - @nloops($N, i, A, - d->(state_{d-1} = state_d), - d->(skip_d || (state_d = state_0)), begin - @inbounds vna = new_na[state_0] | Base.unsafe_bitgetindex(na_chunks, k) - if vna - @inbounds new_na[state_0] = true - else - @inbounds s = unsafe_getindex_notmissing(S, Sextr, state_0) - @inbounds x = data[k] - v = f(x, s) - @inbounds v0 = new_data[state_0] - nv = op(v0, v) - @inbounds new_data[state_0] = nv - end - - state_0 += 1 - k += 1 - end) - - R.na = BitArray(new_na) - end - return R - end -end - -# A version of _mapreducedim_skipmissing! that accepts an array S of the same size -# as R, the elements of which are passed as a second argument to f. -@generated function _mapreducedim_skipmissing_2arg!(f, op, R::AbstractArray, - C::Union{Array{Int}, Nothing}, - A::DataArray{T,N} where {T}, S::AbstractArray) where {N} - quote - data = A.data - na = A.na - na_chunks = na.chunks - new_data = _getdata(R) - Sextr = daextract(S) - - lsiz = check_reducedims(new_data, data) - C === nothing || size(R) == size(C) || throw(DimensionMismatch("R and C must have same size")) - size(R) == size(S) || throw(DimensionMismatch("R and S must have same size")) - isempty(data) && return R - @nextract $N sizeR d->size(new_data,d) - sizA1 = size(data, 1) - - # If there are any missings in S, assume these will produce missings in R - if isa(S, DataArray) - copy!(R.na, S.na) - end - - if lsiz > 16 - # keep the accumulator as a local variable when reducing along the first dimension - nslices = div(length(A), lsiz) - ibase = 0 - for i = 1:nslices - @inbounds v = new_data[i] - !isa(C, Nothing) && (C[i] = lsiz - _count(na, ibase+1, ibase+lsiz)) - - # If S[i] is missing, skip this iteration - @inbounds sna = unsafe_ismissing(S, Sextr, i) - if !sna - @inbounds s = unsafe_getindex_notmissing(S, Sextr, i) - # TODO: use pairwise impl for sum - for k = ibase+1:ibase+lsiz - @inbounds Base.unsafe_bitgetindex(na_chunks, k) && continue - @inbounds x = data[k] - v = convert(typeof(v), op(f(x, s), v))::typeof(v) - end - - @inbounds new_data[i] = v - end - - ibase += lsiz - end - else - # general implementation - @nexprs 1 d->(state_0 = state_{$N} = 1) - @nexprs $N d->(skip_d = sizeR_d == 1) - k = 1 - !isa(C, Nothing) && fill!(C, div(length(A), length(R))) - @nloops($N, i, A, - d->(state_{d-1} = state_d), - d->(skip_d || (state_d = state_0)), begin - @inbounds xna = Base.unsafe_bitgetindex(na_chunks, k) | unsafe_ismissing(S, Sextr, state_0) - if xna - !isa(C, Nothing) && @inbounds C[state_0] -= 1 - else - @inbounds s = unsafe_getindex_notmissing(S, Sextr, state_0) - @inbounds x = data[k] - v = f(x, s) - @inbounds v0 = new_data[state_0] - nv = op(v0, v) - @inbounds new_data[state_0] = nv - end - - state_0 += 1 - k += 1 - end) - end - return R - end -end - -struct Abs2MinusFun end -(::Abs2MinusFun)(x, m) = abs2(x - m) - -function Base.varm!(R::AbstractArray, A::DataArray, m::AbstractArray; corrected::Bool=true, - skipmissing::Bool=false, skipna::Bool=false, init::Bool=true) - if skipna && !skipmissing - Base.depwarn("skipna=$skipna is deprecated, use skipmissing=$skipna instead", :mapreduce) - skipmissing = true - end - if isempty(A) - fill!(R, convert(eltype(R), NaN)) - else - init && fill!(R, zero(eltype(R))) - if skipmissing - C = Array{Int}(size(R)) - - # Compute R = abs2(A-m) - _mapreducedim_skipmissing_2arg!(Abs2MinusFun(), +, R, C, A, m) - - # Divide by number of non-missing values - if corrected - for i = 1:length(C) - @inbounds C[i] = max(C[i] - 1, 0) - end - end - broadcast!(/, R, R, C) - else - # Compute R = abs2(A-m) - _mapreducedim_2arg!(Abs2MinusFun(), +, R, A, m) - - # Divide by number of values - broadcast!(/, R, R, div(length(A), length(R)) - corrected) - end - end -end - -Base.varm(A::DataArray{T}, m::AbstractArray, region; corrected::Bool=true, - skipmissing::Bool=false, skipna::Bool=false) where {T} = - Base.varm!(Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), A, m; - corrected=corrected, skipmissing=skipmissing, skipna=skipna, init=false) - -function Base.var(A::DataArray{T}, region::Union{Integer, AbstractArray, Tuple}; - corrected::Bool=true, mean=nothing, - skipmissing::Bool=false, skipna::Bool=false) where T - if mean == 0 - Base.varm(A, Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), region; - corrected=corrected, skipmissing=skipmissing, skipna=skipna) - elseif mean == nothing - if skipna || skipmissing - # Can reduce mean into ordinary array - m = zeros(Base.momenttype(T), length.(Base.reduced_indices(A, region))) - Base.varm(A, Base.mean!(m, A; skipmissing=skipmissing, skipna=skipna), region; - corrected=corrected, skipmissing=skipmissing, skipna=skipna) - else - Base.varm(A, Base.mean(A, region; skipmissing=skipmissing, skipna=skipna), region; - corrected=corrected, skipmissing=skipmissing, skipna=skipna) - end - elseif isa(mean, AbstractArray) - Base.varm(A, mean::AbstractArray, region; - corrected=corrected, skipmissing=skipmissing, skipna=skipna) - else - throw(ErrorException("invalid value of mean")) - end -end +# ## Utility function + +# using Base: check_reducedims + +# # Determine if there are any true values in a BitArray in a given +# # range. We use this for reductions with skipmissing=false along the first +# # dimension. +# function _any(B::BitArray, istart::Int, iend::Int) +# chunks = B.chunks +# startidx, startbit = Base.get_chunks_id(istart) +# endidx, endbit = Base.get_chunks_id(iend) +# startidx == endidx && return chunks[startidx] >> startbit << (63-endbit+startbit) != 0 + +# chunks[startidx] >> startbit != 0 && return true +# for i = startidx+1:endidx-1 +# chunks[i] != 0 && return true +# end +# chunks[endidx] << (63-endbit) != 0 && return true +# return false +# end + +# # Counts the number of ones in a given range. We use this for counting +# # the values for mean and var with skipmissing=false along the first +# # dimension. +# function _count(B::BitArray, istart::Int, iend::Int) +# chunks = B.chunks +# startidx, startbit = Base.get_chunks_id(istart) +# endidx, endbit = Base.get_chunks_id(iend) +# startidx == endidx && return count_ones(chunks[startidx] >> startbit << (63-endbit+startbit)) + +# n = 0 +# n += count_ones(chunks[startidx] >> startbit) +# for i = startidx+1:endidx-1 +# n += count_ones(chunks[i]) +# end +# n += count_ones(chunks[endidx] << (63-endbit)) +# return n +# end + +# ## missing-preserving +# @generated function _mapreducedim!(f::SafeMapFuns, op::SafeReduceFuns, +# R::DataArray, A::DataArray{T,N} where {T}) where {N} +# quote +# data = A.data +# na = A.na + +# lsiz = check_reducedims(R, data) +# isempty(data) && return R + +# if lsiz > 16 +# # use mapreduce_impl, which is probably better tuned to achieve higher performance +# nslices = div(length(A), lsiz) +# ibase = 0 +# extr = daextract(R) +# for i = 1:nslices +# if _any(na, ibase+1, ibase+lsiz) +# unsafe_setmissing!(R, extr, i) +# else +# v = Base.mapreduce_impl(f, op, data, ibase+1, ibase+lsiz) +# @inbounds unsafe_dasetindex!(R, extr, v, i) +# end +# ibase += lsiz +# end +# else +# @nextract $N sizeR d->size(R,d) +# na_chunks = A.na.chunks + +# new_data = R.data + +# # If reducing to a DataArray, skip strides with missings. +# # In my benchmarks, it is actually faster to compute a new missing +# # array and BitArray it than to operate on the BitArray +# # directly. +# new_na = fill(false, size(new_data)) + +# @nexprs 1 d->(state_0 = state_{$N} = 1) +# @nexprs $N d->(skip_d = sizeR_d == 1) +# k = 1 +# @nloops($N, i, A, +# d->(state_{d-1} = state_d), +# d->(skip_d || (state_d = state_0)), begin +# @inbounds vna = new_na[state_0] | Base.unsafe_bitgetindex(na_chunks, k) +# if vna +# @inbounds new_na[state_0] = true +# else +# @inbounds x = data[k] +# v = f(x) +# @inbounds v0 = new_data[state_0] +# nv = op(v0, v) +# @inbounds new_data[state_0] = nv +# end + +# state_0 += 1 +# k += 1 +# end) + +# R.na = BitArray(new_na) +# end +# return R +# end +# end + +# ## missing-preserving to array +# @generated function _mapreducedim!(f::SafeMapFuns, op::SafeReduceFuns, +# R::AbstractArray, A::DataArray{T,N} where {T}) where {N} +# quote +# data = A.data +# na = A.na + +# lsiz = check_reducedims(R, data) +# isempty(data) && return R + +# if lsiz > 16 +# # use mapreduce_impl, which is probably better tuned to achieve higher performance +# nslices = div(length(A), lsiz) +# ibase = 0 +# extr = daextract(R) +# for i = 1:nslices +# if _any(na, ibase+1, ibase+lsiz) +# throw(MissingException("array contains missing values but output array does not support them")) +# else +# v = Base.mapreduce_impl(f, op, data, ibase+1, ibase+lsiz) +# @inbounds unsafe_dasetindex!(R, extr, v, i) +# end +# ibase += lsiz +# end +# else +# @nextract $N sizeR d->size(R,d) + +# # If reducing to a non-DataArray, throw an error at the start on missing +# any(ismissing, A) && throw(MissingException("array contains missing values: pass skipmissing=true to skip them")) +# @nloops $N i data d->(j_d = sizeR_d==1 ? 1 : i_d) begin +# @inbounds x = (@nref $N data i) +# v = f(x) +# @inbounds v0 = (@nref $N R j) +# nv = op(v0, v) +# @inbounds (@nref $N R j) = nv +# end +# end +# return R +# end +# end +# _mapreducedim!(f, op, R, A) = Base._mapreducedim!(f, op, R, A) + +# ## missing-skipping +# _getdata(A) = A +# _getdata(A::DataArray) = A.data + +# # mapreduce across a dimension. If specified, C contains the number of +# # non-missing values reduced into each element of R. +# @generated function _mapreducedim_skipmissing_impl!(f, op, R::AbstractArray, +# C::Union{Array{Int}, Nothing}, +# A::DataArray{T,N} where {T}) where {N} +# quote + +# data = A.data +# na = A.na +# na_chunks = na.chunks +# new_data = _getdata(R) + +# C === nothing || size(R) == size(C) || throw(DimensionMismatch("R and C must have same size")) +# lsiz = check_reducedims(new_data, data) +# isempty(data) && return R + +# if lsiz > 16 +# # keep the accumulator as a local variable when reducing along the first dimension +# nslices = div(length(A), lsiz) +# ibase = 0 +# for i = 1:nslices +# # TODO: use pairwise impl for sum +# @inbounds v = new_data[i] +# @inbounds C !== nothing && (C[i] = lsiz - _count(na, ibase+1, ibase+lsiz)) +# for k = ibase+1:ibase+lsiz +# @inbounds Base.unsafe_bitgetindex(na_chunks, k) && continue +# @inbounds x = data[k] +# v = convert(typeof(v), op(f(x), v))::typeof(v) +# end +# @inbounds new_data[i] = v +# ibase += lsiz +# end +# else +# # general implementation +# @nextract $N sizeR d->size(new_data,d) +# @nexprs 1 d->(state_0 = state_{$N} = 1) +# @nexprs $N d->(skip_d = sizeR_d == 1) +# k = 1 +# C !== nothing && fill!(C, div(length(A), length(R))) +# @nloops($N, i, A, +# d->(state_{d-1} = state_d), +# d->(skip_d || (state_d = state_0)), begin +# @inbounds xna = Base.unsafe_bitgetindex(na_chunks, k) +# if xna +# C !== nothing && @inbounds C[state_0] -= 1 +# else +# @inbounds x = data[k] +# v = f(x) +# @inbounds v0 = new_data[state_0] +# nv = op(v0, v) +# @inbounds new_data[state_0] = nv +# end + +# state_0 += 1 +# k += 1 +# end) +# end +# return R +# end +# end + +# _mapreducedim_skipmissing!(f, op, R::AbstractArray, A::DataArray) = +# _mapreducedim_skipmissing_impl!(f, op, R, nothing, A) + +# # for MinFun/MaxFun, min or max is missing if all values along a dimension are missing +# function _mapreducedim_skipmissing!(f, op::Union{typeof(min), typeof(max)}, R::DataArray, A::DataArray) +# R.na = BitArray(all!(fill(true, size(R)), A.na)) +# _mapreducedim_skipmissing_impl!(f, op, R, nothing, A) +# end +# function _mapreducedim_skipmissing!(f, op::Union{typeof(min), typeof(max)}, R::AbstractArray, A::DataArray) +# if any(all!(fill(true, size(R)), A.na)) +# throw(MissingException("some dimensions of the array only contain missing values")) +# end +# _mapreducedim_skipmissing_impl!(f, op, R, nothing, A) +# end + +# ## general reducedim interface + +# for op in (+, *, &, |, min, max) +# @eval begin +# function Base.initarray!(a::DataArray{T}, op::typeof($op), init::Bool) where T +# if init +# Base.initarray!(a.data, op, true) +# Base.fill!(a.na, false) +# end +# a +# end +# end +# end + +# function Base.reducedim_initarray(A::DataArray, region, v0, ::Type{R}) where R +# rd = length.(Base.reduced_indices(A.data, region)) +# DataArray(fill!(similar(A.data, R, rd), v0), falses(rd)) +# end +# function Base.reducedim_initarray0(A::DataArray, region, v0, ::Type{R}) where R +# rd = length.(Base.reduced_indices0(A,region)) +# DataArray(fill!(similar(A.data, R, rd), v0), falses(rd)) +# end + +# function Base.mapreducedim!(f::Function, op, R::AbstractArray, A::DataArray; +# skipmissing::Bool=false, skipna::Bool=false) +# if skipna && !skipmissing +# Base.depwarn("skipna=$skipna is deprecated, use skipmissing=$skipna instead", :mapreduce) +# skipmissing = true +# end +# skipmissing ? _mapreducedim_skipmissing!(f, op, R, A) : _mapreducedim!(f, op, R, A) +# end +# function Base.mapreducedim!(f, op, R::AbstractArray, A::DataArray; +# skipmissing::Bool=false, skipna::Bool=false) +# if skipn && !skipmissing +# Base.depwarn("skipna=$skipna is deprecated, use skipmissing=$skipna instead", :mapreduce) +# skipmissing = true +# end +# skipmissing ? _mapreducedim_skipna!(f, op, R, A) : _mapreducedim!(f, op, R, A) +# end +# Base.reducedim!(op, R::DataArray{RT}, A::AbstractArray; +# skipmissing::Bool=false, skipna::Bool=false) where {RT} = +# Base.mapreducedim!(identity, op, R, A, zero(RT); skipna=skipna) + +# Base.mapreducedim(f, op, A::DataArray, region, v0; +# skipmissing::Bool=false, skipna::Bool=false) = +# Base.mapreducedim!(f, op, Base.reducedim_initarray(A, region, v0), A; +# skipmissing=skipmissing, skipna=skipna) +# Base.mapreducedim(f, op, A::DataArray{T}, region; +# skipmissing::Bool=false, skipna::Bool=false) where {T} = +# Base.mapreducedim!(f, op, Base.reducedim_init(f, op, A, region), A; +# skipmissing=skipmissing, skipna=skipna) + +# Base.reducedim(op, A::DataArray, region, v0; skipna::Bool=false) = +# Base.mapreducedim(identity, op, A, region, v0; skipmissing=skipmissing, skipna=skipna) +# Base.reducedim(op, A::DataArray, region; skipna::Bool=false) = +# Base.mapreducedim(identity, op, A, region; skipmissing=skipmissing, skipna=skipna) + +# ## usual reductions + +# for (basfn, Op) in [(:sum, +), (:prod, *), +# (:maximum, max), (:minimum, min), +# (:all, &), (:any, |)] +# fname = Expr(:., :Base, Base.Meta.quot(basfn)) +# fname! = Expr(:., :Base, Base.Meta.quot(Symbol(string(basfn, '!')))) +# @eval begin +# $(fname!)(f::Union{Function,$(supertype(typeof(abs)))}, r::AbstractArray, A::DataArray; +# init::Bool=true, skipmissing::Bool=false, skipna::Bool=false) = +# Base.mapreducedim!(f, $(Op), Base.initarray!(r, $(Op), init), A; +# skipmissing=skipmissing, skipna=skipna) +# $(fname!)(r::AbstractArray, A::DataArray; +# init::Bool=true, skipmissing::Bool=false, skipna::Bool=false) = +# $(fname!)(identity, r, A; init=init, skipmissing=skipmissing, skipna=skipna) + +# $(fname)(f::Union{Function,$(supertype(typeof(abs)))}, A::DataArray, region; +# skipmissing::Bool=false, skipna::Bool=false) = +# Base.mapreducedim(f, $(Op), A, region; skipmissing=skipmissing, skipna=skipna) +# $(fname)(A::DataArray, region; skipmissing::Bool=false, skipna::Bool=false) = +# $(fname)(identity, A, region; skipmissing=skipmissing, skipna=skipna) +# end +# end + +# ## mean + +# function Base.mean!(R::AbstractArray{T}, A::DataArray; +# skipmissing::Bool=false, skipna::Bool=false, init::Bool=true) where {T} +# init && fill!(R, 0) +# if skipna || skipmissing +# C = Array{Int}(size(R)) +# _mapreducedim_skipmissing_impl!(identity, +, R, C, A) +# broadcast!(/, R, R, C) +# else +# sum!(R, A; skipmissing=false) +# broadcast!(/, R, R, convert(T, length(A)/length(R))) +# R +# end +# end + +# Base.mean(A::DataArray{T}, region; skipmissing::Bool=false, skipna::Bool=false) where {T} = +# mean!(Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), A; +# skipmissing=skipmissing, skipna=skipna, init=false) + +# ## var + +# struct MapReduceDim2ArgHelperFun{F,T} +# f::F +# val::T +# end +# (f::MapReduceDim2ArgHelperFun)(x) = f.f(x, f.val) + +# # A version of _mapreducedim! that accepts an array S of the same size +# # as R, the elements of which are passed as a second argument to f. +# @generated function _mapreducedim_2arg!(f, op, R::DataArray, +# A::DataArray{T,N} where {T}, +# S::AbstractArray) where {N} +# quote +# data = A.data +# na = A.na +# Sextr = daextract(S) + +# lsiz = check_reducedims(R, data) +# size(R) == size(S) || throw(DimensionMismatch("R and S must have same size")) +# isempty(data) && return R + +# if lsiz > 16 +# # use mapreduce_impl, which is probably better tuned to achieve higher performance +# nslices = div(length(A), lsiz) +# ibase = 0 +# extr = daextract(R) +# for i = 1:nslices +# if unsafe_ismissing(S, Sextr, i) || _any(na, ibase+1, ibase+lsiz) +# unsafe_setmissing!(R, extr, i) +# else +# @inbounds s = unsafe_getindex_notmissing(S, Sextr, i) +# v = Base.mapreduce_impl(MapReduceDim2ArgHelperFun(f, s), op, data, ibase+1, ibase+lsiz) +# @inbounds unsafe_dasetindex!(R, extr, v, i) +# end +# ibase += lsiz +# end +# else +# @nextract $N sizeR d->size(R,d) +# na_chunks = A.na.chunks +# new_data = R.data +# new_na = isa(S, DataArray) ? Array(S.na) : fill(false, size(S)) + +# @nexprs 1 d->(state_0 = state_{$N} = 1) +# @nexprs $N d->(skip_d = sizeR_d == 1) +# k = 1 +# @nloops($N, i, A, +# d->(state_{d-1} = state_d), +# d->(skip_d || (state_d = state_0)), begin +# @inbounds vna = new_na[state_0] | Base.unsafe_bitgetindex(na_chunks, k) +# if vna +# @inbounds new_na[state_0] = true +# else +# @inbounds s = unsafe_getindex_notmissing(S, Sextr, state_0) +# @inbounds x = data[k] +# v = f(x, s) +# @inbounds v0 = new_data[state_0] +# nv = op(v0, v) +# @inbounds new_data[state_0] = nv +# end + +# state_0 += 1 +# k += 1 +# end) + +# R.na = BitArray(new_na) +# end +# return R +# end +# end + +# # A version of _mapreducedim_skipmissing! that accepts an array S of the same size +# # as R, the elements of which are passed as a second argument to f. +# @generated function _mapreducedim_skipmissing_2arg!(f, op, R::AbstractArray, +# C::Union{Array{Int}, Nothing}, +# A::DataArray{T,N} where {T}, S::AbstractArray) where {N} +# quote +# data = A.data +# na = A.na +# na_chunks = na.chunks +# new_data = _getdata(R) +# Sextr = daextract(S) + +# lsiz = check_reducedims(new_data, data) +# C === nothing || size(R) == size(C) || throw(DimensionMismatch("R and C must have same size")) +# size(R) == size(S) || throw(DimensionMismatch("R and S must have same size")) +# isempty(data) && return R +# @nextract $N sizeR d->size(new_data,d) +# sizA1 = size(data, 1) + +# # If there are any missings in S, assume these will produce missings in R +# if isa(S, DataArray) +# copy!(R.na, S.na) +# end + +# if lsiz > 16 +# # keep the accumulator as a local variable when reducing along the first dimension +# nslices = div(length(A), lsiz) +# ibase = 0 +# for i = 1:nslices +# @inbounds v = new_data[i] +# !isa(C, Nothing) && (C[i] = lsiz - _count(na, ibase+1, ibase+lsiz)) + +# # If S[i] is missing, skip this iteration +# @inbounds sna = unsafe_ismissing(S, Sextr, i) +# if !sna +# @inbounds s = unsafe_getindex_notmissing(S, Sextr, i) +# # TODO: use pairwise impl for sum +# for k = ibase+1:ibase+lsiz +# @inbounds Base.unsafe_bitgetindex(na_chunks, k) && continue +# @inbounds x = data[k] +# v = convert(typeof(v), op(f(x, s), v))::typeof(v) +# end + +# @inbounds new_data[i] = v +# end + +# ibase += lsiz +# end +# else +# # general implementation +# @nexprs 1 d->(state_0 = state_{$N} = 1) +# @nexprs $N d->(skip_d = sizeR_d == 1) +# k = 1 +# !isa(C, Nothing) && fill!(C, div(length(A), length(R))) +# @nloops($N, i, A, +# d->(state_{d-1} = state_d), +# d->(skip_d || (state_d = state_0)), begin +# @inbounds xna = Base.unsafe_bitgetindex(na_chunks, k) | unsafe_ismissing(S, Sextr, state_0) +# if xna +# !isa(C, Nothing) && @inbounds C[state_0] -= 1 +# else +# @inbounds s = unsafe_getindex_notmissing(S, Sextr, state_0) +# @inbounds x = data[k] +# v = f(x, s) +# @inbounds v0 = new_data[state_0] +# nv = op(v0, v) +# @inbounds new_data[state_0] = nv +# end + +# state_0 += 1 +# k += 1 +# end) +# end +# return R +# end +# end + +# struct Abs2MinusFun end +# (::Abs2MinusFun)(x, m) = abs2(x - m) + +# function Base.varm!(R::AbstractArray, A::DataArray, m::AbstractArray; corrected::Bool=true, +# skipmissing::Bool=false, skipna::Bool=false, init::Bool=true) +# if skipna && !skipmissing +# Base.depwarn("skipna=$skipna is deprecated, use skipmissing=$skipna instead", :mapreduce) +# skipmissing = true +# end +# if isempty(A) +# fill!(R, convert(eltype(R), NaN)) +# else +# init && fill!(R, zero(eltype(R))) +# if skipmissing +# C = Array{Int}(size(R)) + +# # Compute R = abs2(A-m) +# _mapreducedim_skipmissing_2arg!(Abs2MinusFun(), +, R, C, A, m) + +# # Divide by number of non-missing values +# if corrected +# for i = 1:length(C) +# @inbounds C[i] = max(C[i] - 1, 0) +# end +# end +# broadcast!(/, R, R, C) +# else +# # Compute R = abs2(A-m) +# _mapreducedim_2arg!(Abs2MinusFun(), +, R, A, m) + +# # Divide by number of values +# broadcast!(/, R, R, div(length(A), length(R)) - corrected) +# end +# end +# end + +# Base.varm(A::DataArray{T}, m::AbstractArray, region; corrected::Bool=true, +# skipmissing::Bool=false, skipna::Bool=false) where {T} = +# Base.varm!(Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), A, m; +# corrected=corrected, skipmissing=skipmissing, skipna=skipna, init=false) + +# function Base.var(A::DataArray{T}, region::Union{Integer, AbstractArray, Tuple}; +# corrected::Bool=true, mean=nothing, +# skipmissing::Bool=false, skipna::Bool=false) where T +# if mean == 0 +# Base.varm(A, Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), region; +# corrected=corrected, skipmissing=skipmissing, skipna=skipna) +# elseif mean == nothing +# if skipna || skipmissing +# # Can reduce mean into ordinary array +# m = zeros(Base.momenttype(T), length.(Base.reduced_indices(A, region))) +# Base.varm(A, Base.mean!(m, A; skipmissing=skipmissing, skipna=skipna), region; +# corrected=corrected, skipmissing=skipmissing, skipna=skipna) +# else +# Base.varm(A, Base.mean(A, region; skipmissing=skipmissing, skipna=skipna), region; +# corrected=corrected, skipmissing=skipmissing, skipna=skipna) +# end +# elseif isa(mean, AbstractArray) +# Base.varm(A, mean::AbstractArray, region; +# corrected=corrected, skipmissing=skipmissing, skipna=skipna) +# else +# throw(ErrorException("invalid value of mean")) +# end +# end diff --git a/test/constructors.jl b/test/constructors.jl index c6013fc..ffa8cf5 100644 --- a/test/constructors.jl +++ b/test/constructors.jl @@ -1,3 +1,5 @@ +using LinearAlgebra + @testset "Constructors" begin # # missings @@ -130,9 +132,9 @@ @test_nowarn convert(DataArray, falses(2, 2)) @test_nowarn convert(DataArray, trues(2, 2)) - @test_nowarn convert(DataArray, eye(3, 2)) - @test_nowarn convert(DataArray, eye(2)) - @test_nowarn convert(DataArray, diagm(Float64[pi, pi])) + @test_nowarn convert(DataArray, Matrix{Float64}(LinearAlgebra.I, 3, 2)) + @test_nowarn convert(DataArray, Matrix{Float64}(LinearAlgebra.I, 2, 2)) + @test_nowarn convert(DataArray, LinearAlgebra.diagm(0=>Float64[pi, pi])) @test DataArray([1 missing]) isa DataMatrix{Int} @test isequal(DataArray([1 missing]), [1 missing]) diff --git a/test/dataarray.jl b/test/dataarray.jl index dc6aa88..d96f61b 100644 --- a/test/dataarray.jl +++ b/test/dataarray.jl @@ -5,7 +5,7 @@ m = [1 2; 3 4] dm = DataArray(m, falses(size(m))) - t = Array{Int}(uninitialized, 2, 2, 2) + t = Array{Int}(undef, 2, 2, 2) t[1:2, 1:2, 1:2] = 1 dt = DataArray(t, falses(size(t))) diff --git a/test/runtests.jl b/test/runtests.jl index b853063..793e1ae 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,7 +2,6 @@ # Correctness Tests # -using Compat: uninitialized using Test, Dates, Printf using DataArrays