Skip to content
This repository was archived by the owner on May 4, 2019. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion REQUIRE
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
julia 0.7
Compat 0.43.0
Missings 0.2.1
StatsBase 0.15.0
Reexport
Expand Down
1 change: 0 additions & 1 deletion src/DataArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ module DataArrays
@reexport using StatsBase
@reexport using Missings
using SpecialFunctions
using Compat: AbstractRange, Nothing, Cvoid, uninitialized, invpermute!
using Printf, Dates

const DEFAULT_POOLED_REF_TYPE = UInt32
Expand Down
7 changes: 4 additions & 3 deletions src/abstractdataarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ Base.eltype(d::AbstractDataArray{T, N}) where {T, N} = Union{T,Missing}

# Generic iteration over AbstractDataArray's

Base.start(x::AbstractDataArray) = 1
Base.next(x::AbstractDataArray, state::Integer) = (x[state], state + 1)
Base.done(x::AbstractDataArray, state::Integer) = state > length(x)
function Base.iterate(x::AbstractDataArray, st=1)
st > length(x) && return nothing
return (x[st], st + 1)
end

# FIXME: type piracy
"""
Expand Down
436 changes: 218 additions & 218 deletions src/broadcast.jl

Large diffs are not rendered by default.

72 changes: 33 additions & 39 deletions src/dataarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ mutable struct DataArray{T, N} <: AbstractDataArray{T, N}
if eltype(d) >: Missing
# If the original eltype is wider than the target eltype T, conversion may fail
# in the presence of missings: we need to allocate a copy, leaving entries
# corresponding to missings uninitialized
# corresponding to missings undef
if eltype(d) <: T
@inbounds for i in eachindex(d)
if isassigned(d, i) && ismissing(d, i)
Expand Down Expand Up @@ -86,7 +86,7 @@ function DataArray(d::Array, m::AbstractArray{Bool}) # -> DataArray{T}
end

function DataArray(T::Type, dims::Integer...) # -> DataArray{T}
return DataArray(Array{Missings.T(T)}(uninitialized, dims...), trues(dims...))
return DataArray(Array{Missings.T(T)}(undef, dims...), trues(dims...))
end

function DataArray(T::Type, dims::NTuple{N, Int}) where N # -> DataArray{T}
Expand All @@ -107,11 +107,11 @@ A 2-dimensional `DataArray` with element type `T`.
"""
const DataMatrix{T} = DataArray{T, 2}

Base.copy(d::DataArray) = Base.copy!(similar(d), d) # -> DataArray{T}
Base.copy(d::DataArray) = Base.copyto!(similar(d), d) # -> DataArray{T}

function Base.copy!(dest::DataArray, src::DataArray) # -> DataArray{T}
function Base.copyto!(dest::DataArray, src::DataArray) # -> DataArray{T}
if isbits(eltype(src)) && isbits(eltype(dest))
copy!(dest.data, src.data)
copyto!(dest.data, src.data)
else
# Elements of src_data are not necessarily initialized, so
# only copy initialized elements
Expand All @@ -125,28 +125,28 @@ function Base.copy!(dest::DataArray, src::DataArray) # -> DataArray{T}
end
end
end
copy!(dest.na, src.na)
copyto!(dest.na, src.na)
dest
end

function Base.copy!(dest::DataArray, doffs::Integer, src::DataArray) # -> DataArray{T}
copy!(dest, doffs, src, 1, length(src))
function Base.copyto!(dest::DataArray, doffs::Integer, src::DataArray) # -> DataArray{T}
copyto!(dest, doffs, src, 1, length(src))
end

# redundant on Julia 0.4
function Base.copy!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Integer) # -> DataArray{T}
function Base.copyto!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Integer) # -> DataArray{T}
soffs <= length(src) || throw(BoundsError())
copy!(dest, doffs, src, soffs, length(src)-soffs+1)
copyto!(dest, doffs, src, soffs, length(src)-soffs+1)
end

function Base.copy!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Integer, n::Integer) # -> DataArray{T}
function Base.copyto!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Integer, n::Integer) # -> DataArray{T}
if n == 0
return dest
elseif n < 0
throw(ArgumentError("tried to copy n=$n elements, but n should be nonnegative"))
end
if isbits(eltype(src))
copy!(dest.data, doffs, src.data, soffs, n)
copyto!(dest.data, doffs, src.data, soffs, n)
else
# Elements of src_data are not necessarily initialized, so
# only copy initialized elements
Expand All @@ -165,7 +165,7 @@ function Base.copy!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Inte
end
end
end
copy!(dest.na, doffs, src.na, soffs, n)
copyto!(dest.na, doffs, src.na, soffs, n)
dest
end

Expand All @@ -185,15 +185,15 @@ function Base.resize!(da::DataArray{T,1}, n::Int) where T
end

function Base.similar(da::DataArray, T::Type, dims::Dims) #-> DataArray{T}
return DataArray(Array{Missings.T(T)}(uninitialized, dims), trues(dims))
return DataArray(Array{Missings.T(T)}(undef, dims), trues(dims))
end

Base.size(d::DataArray) = size(d.data) # -> (Int...)
Base.ndims(da::DataArray) = ndims(da.data) # -> Int
Base.length(d::DataArray) = length(d.data) # -> Int
Base.endof(da::DataArray) = endof(da.data) # -> Int
Base.lastindex(da::DataArray) = lastindex(da.data) # -> Int

function Base.find(da::DataArray{Bool}) # -> Array{Int}
function Base.findall(da::DataArray{Bool}) # -> Array{Int}
data = da.data
ntrue = 0
@inbounds @bitenumerate da.na i na begin
Expand Down Expand Up @@ -238,7 +238,7 @@ function Base.convert(
) where {S, T, N} # -> Array{S, N}

replacementS = convert(S, replacement)
res = Array{S}(uninitialized, size(da))
res = Array{S}(undef, size(da))
for i in 1:length(da)
if da.na[i]
res[i] = replacementS
Expand Down Expand Up @@ -267,16 +267,12 @@ struct EachFailMissing{T<:DataArray}
end
Missings.fail(da::DataArray) = EachFailMissing(da)
Base.length(itr::EachFailMissing) = length(itr.da)
Base.start(itr::EachFailMissing) = 1
Base.done(itr::EachFailMissing, ind::Integer) = ind > length(itr)
Base.eltype(itr::EachFailMissing) = Missings.T(eltype(itr.da))
function Base.next(itr::EachFailMissing, ind::Integer)
if itr.da.na[ind]
throw(MissingException("missing value encountered in Missings.fail"))
else
(itr.da.data[ind], ind + 1)
end
function Base.iterate(itr::EachFailMissing, st=1)
st > length(itr) && return nothing
itr.da.na[st] && throw(MissingException("missing value encountered in Missings.fail"))
return (itr.da.data[st], st + 1)
end
Base.eltype(itr::EachFailMissing) = Missings.T(eltype(itr.da))

struct EachDropMissing{T<:DataArray}
da::T
Expand All @@ -290,12 +286,11 @@ function _next_nonna_ind(da::DataArray, ind::Int)
ind
end
Base.length(itr::EachDropMissing) = length(itr.da) - sum(itr.da.na)
Base.start(itr::EachDropMissing) = _next_nonna_ind(itr.da, 0)
Base.done(itr::EachDropMissing, ind::Int) = ind > length(itr.da)
Base.eltype(itr::EachDropMissing) = Missings.T(eltype(itr.da))
function Base.next(itr::EachDropMissing, ind::Int)
(itr.da.data[ind], _next_nonna_ind(itr.da, ind))
function Base.iterate(itr::EachDropMissing, st=_next_nonna_ind(itr.da, 0))
st > length(itr.da) && return nothing
return (itr.da.data[st], _next_nonna_ind(itr.da, ind))
end
Base.eltype(itr::EachDropMissing) = Missings.T(eltype(itr.da))

struct EachReplaceMissing{S<:DataArray, T}
da::S
Expand All @@ -304,13 +299,12 @@ end
Missings.replace(da::DataArray, replacement::Any) =
EachReplaceMissing(da, replacement)
Base.length(itr::EachReplaceMissing) = length(itr.da)
Base.start(itr::EachReplaceMissing) = 1
Base.done(itr::EachReplaceMissing, ind::Integer) = ind > length(itr)
Base.eltype(itr::EachReplaceMissing) = Missings.T(eltype(itr.da))
function Base.next(itr::EachReplaceMissing, ind::Integer)
item = itr.da.na[ind] ? itr.replacement : itr.da.data[ind]
(item, ind + 1)
function Base.iterate(itr::EachReplaceMissing, st=1)
st > length(itr) && return nothing
item = itr.da.na[st] ? itr.replacement : itr.da.data[st]
return (item, st + 1)
end
Base.eltype(itr::EachReplaceMissing) = Missings.T(eltype(itr.da))

Base.collect(itr::EachDropMissing{<:DataVector}) = itr.da.data[.!itr.da.na] # -> Vector
Base.collect(itr::EachFailMissing{<:DataVector}) = copy(itr.da.data) # -> Vector
Expand Down Expand Up @@ -416,7 +410,7 @@ Get the unique values in `da` as well as the index of the first `missing` value
in `da` if present, or 0 otherwise.
"""
function finduniques(da::DataArray{T}) where T # -> Vector{T}, Int
out = Vector{T}(uninitialized, 0)
out = Vector{T}(undef, 0)
seen = Set{T}()
n = length(da)
firstmissing = 0
Expand All @@ -439,7 +433,7 @@ function Base.unique(da::DataArray{T}) where T # -> DataVector{T}
unique_values, firstmissing = finduniques(da)
n = length(unique_values)
if firstmissing > 0
res = DataArray(Vector{T}(uninitialized, n + 1))
res = DataArray(Vector{T}(undef, n + 1))
i = 1
for val in unique_values
if i == firstmissing
Expand Down
3 changes: 2 additions & 1 deletion src/datamatrix.jl
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# Extract the matrix diagonal
Base.diag(dm::DataMatrix{T}) where {T} = DataArray(diag(dm.data), diag(dm.na))
using LinearAlgebra
LinearAlgebra.diag(dm::DataMatrix{T}) where {T} = DataArray(diag(dm.data), diag(dm.na))
12 changes: 6 additions & 6 deletions src/datavector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,19 @@ function Base.pop!(dv::DataVector)
end
end

function Base.unshift!(dv::DataVector{T}, v::Missing) where T
function Base.pushfirst!(dv::DataVector{T}, v::Missing) where T
ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), dv.data, 1)
pushfirst!(dv.na, true)
return v
end

function Base.unshift!(dv::DataVector{S}, v::T) where {S, T}
function Base.pushfirst!(dv::DataVector{S}, v::T) where {S, T}
pushfirst!(dv.data, v)
pushfirst!(dv.na, false)
return v
end

function Base.shift!(dv::DataVector{T}) where T
function Base.popfirst!(dv::DataVector{T}) where T
d, m = popfirst!(dv.data), popfirst!(dv.na)
if m
return missing
Expand Down Expand Up @@ -113,18 +113,18 @@ end

Base.pop!(pdv::PooledDataVector) = pdv.pool[pop!(pdv.refs)]

function Base.unshift!(pdv::PooledDataVector{T,R}, v::Missing) where {T,R}
function Base.pushfirst!(pdv::PooledDataVector{T,R}, v::Missing) where {T,R}
pushfirst!(pdv.refs, zero(R))
return v
end

function Base.unshift!(pdv::PooledDataVector{S,R}, v::T) where {S,R,T}
function Base.pushfirst!(pdv::PooledDataVector{S,R}, v::T) where {S,R,T}
v = convert(S,v)
pushfirst!(pdv.refs, getpoolidx(pdv, v))
return v
end

Base.shift!(pdv::PooledDataVector) = pdv.pool[popfirst!(pdv.refs)]
Base.popfirst!(pdv::PooledDataVector) = pdv.pool[popfirst!(pdv.refs)]

Base.reverse(x::AbstractDataVector) = x[end:-1:1]

Expand Down
11 changes: 6 additions & 5 deletions src/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ function combine_pools!(pool, newpool)
end

# Find pool elements in existing array, or add them
poolidx = Vector{Int}(uninitialized, length(newpool))
poolidx = Vector{Int}(undef, length(newpool))
for j = 1:length(newpool)
poolidx[j] = Base.@get!(seen, newpool[j], (push!(pool, newpool[j]); i += 1))
end
Expand Down Expand Up @@ -129,16 +129,17 @@ end
N = length(I)
quote
$(Expr(:meta, :inline))
flipbits!(dest.na) # similar initializes with missings
dest.na .= .!(dest.na) # similar initializes with missings
@nexprs $N d->(J_d = I[d])
srcextr = daextract(src)
destextr = daextract(dest)
srcsz = size(src)
D = eachindex(dest)
Ds = start(D)
Ds = iterate(D)
@nloops $N j d->J_d begin
offset_0 = @ncall $N sub2ind srcsz j
d, Ds = next(D, Ds)
offset_0 = @ncall $N LinearIndices srcsz j
d, dstate = Ds
Ds = iterate(D, dstate)
if unsafe_ismissing(src, srcextr, offset_0)
unsafe_dasetindex!(dest, destextr, missing, d)
else
Expand Down
8 changes: 4 additions & 4 deletions src/linalg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ function na_safe_rowmeans(dm::DataMatrix)
end

# TODO: Default to failure in the face of missings
function Base.svd(D::DataMatrix, k::Int; tracing = false, tolerance = 10e-4)
function LinearAlgebra.svd(D::DataMatrix, k::Int; tracing = false, tolerance = 10e-4)

# Make a copy of the data that we can alter in place
dm = copy(D)
Expand Down Expand Up @@ -135,9 +135,9 @@ function Base.svd(D::DataMatrix, k::Int; tracing = false, tolerance = 10e-4)
# Only return the SVD entries, not the imputation
return (U[:, 1:k], D[1:k], V[:, 1:k])
end
Base.svd(dm::DataMatrix) = svd(dm, minimum(size(dm)))
LinearAlgebra.svd(dm::DataMatrix) = svd(dm, minimum(size(dm)))

function Base.eig(dm::DataMatrix)
function LinearAlgebra.eigen(dm::DataMatrix)
U, D, V = svd(dm)
return eig(U * diagm(D) * V')
return eigen(U * diagm(D) * V')
end
8 changes: 4 additions & 4 deletions src/literals.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
function fixargs(args::Vector{Any}, stub::Any)
n = length(args)
data = Array{Any}(uninitialized, n)
na = BitArray(uninitialized, n)
data = Array{Any}(undef, n)
na = BitArray(undef, n)
for i in 1:n
if args[i] == :missing || args[i] == :NA
data[i] = stub
Expand Down Expand Up @@ -73,8 +73,8 @@ function parsematrix(ex::Expr)
end

nrows = length(rows)
datarows = Array{Expr}(uninitialized, nrows)
narows = Array{Expr}(uninitialized, nrows)
datarows = Array{Expr}(undef, nrows)
narows = Array{Expr}(undef, nrows)
for irow in 1:nrows
data, na = fixargs(ex.args[rows[irow]].args, stub)
datarows[irow] = Expr(:row, data...)
Expand Down
Loading