diff --git a/benchmark/operators.jl b/benchmark/operators.jl index b7b4c0e..ea4b8cc 100644 --- a/benchmark/operators.jl +++ b/benchmark/operators.jl @@ -35,7 +35,7 @@ macro perf(fn, replications, idx...) quote println($name) gc_disable() - df = compare([()->$fn for i=$idx], $replications) + df = compare([let i=i; ()->$fn; end for i=$idx], $replications) gc_enable() gc() df[:Function] = TEST_NAMES[$idx] @@ -71,7 +71,6 @@ const Bool2 = make_test_types(make_bools, 1000) @perf Bool1[i] $ Bool2[i] 100 # Vector operators -@perf sum(Float1[i]) 250 1:div(length(Float1), 2) @perf diff(Float1[i]) 50 1:div(length(Float1), 2) @perf cumsum(Float1[i]) 50 1:div(length(Float1), 2) end diff --git a/benchmark/reduce.jl b/benchmark/reduce.jl new file mode 100644 index 0000000..eebc227 --- /dev/null +++ b/benchmark/reduce.jl @@ -0,0 +1,49 @@ +module ReduceBenchmark +using DataArrays, Benchmark + +# seed rng for more consistent timings +srand(1776) + +const TEST_NAMES = [ + "Vector", + "DataVector No NA skipna=false", + "DataVector No NA skipna=true", + "DataVector Half NA skipna=false", + "DataVector Half NA skipna=true" +] + +function make_test_types(genfunc, sz) + mat = genfunc(sz) + na = shuffle!([trues(ifloor(sz/2)), falses(iceil(sz/2))]) + ( + mat, + DataArray(mat), + DataArray(mat, na) + ) +end + +const Data = make_test_types(rand, 100000000) + +macro perf(fn, replications) + quote + println($fn) + fns = [()->$fn(Data[1]), + ()->$fn(Data[2]), + ()->$fn(Data[2]; skipna=true), + ()->$fn(Data[3]), + ()->$fn(Data[3]; skipna=true)] + gc_disable() + df = compare(fns, $replications) + gc_enable() + gc() + df[:Function] = TEST_NAMES + df[:Relative] = df[:Average]./df[1, :Average] + println(df) + end +end + +@perf sum 10 +@perf maximum 10 +@perf mean 10 +@perf var 10 +end diff --git a/benchmark/reducedim.jl b/benchmark/reducedim.jl new file mode 100644 index 0000000..29a6406 --- /dev/null +++ b/benchmark/reducedim.jl @@ -0,0 +1,53 @@ +module ReducedimBenchmark +using DataArrays, Benchmark + +# seed rng for more consistent timings +srand(1776) + +const TEST_NAMES = [ + "Matrix", + "DataMatrix No NA skipna=false", + "DataMatrix No NA skipna=true", + "DataMatrix Half NA skipna=false", + "DataMatrix Half NA skipna=true" +] + +function make_test_types(genfunc, sz) + mat = genfunc(abs2(sz)) + na = shuffle!([trues(ifloor(abs2(sz)/2)), falses(iceil(abs2(sz)/2))]) + ( + reshape(mat, sz, sz), + DataArray(reshape(mat, sz, sz)), + DataArray(reshape(mat, sz, sz), reshape(na, sz, sz)) + ) +end + +const Data = make_test_types(rand, 10000) + +macro perf(fn, dim, replications) + quote + println($fn, " (region = ", $dim, ")") + fns = [()->$fn(Data[1], $dim), + ()->$fn(Data[2], $dim), + ()->$fn(Data[2], $dim; skipna=true), + ()->$fn(Data[3], $dim), + ()->$fn(Data[3], $dim; skipna=true)] + gc_disable() + df = compare(fns, $replications) + gc_enable() + gc() + df[:Function] = TEST_NAMES + df[:Relative] = df[:Average]./df[1, :Average] + println(df) + end +end + +@perf sum 1 10 +@perf sum 2 10 +@perf maximum 1 10 +@perf maximum 2 10 +@perf mean 1 10 +@perf mean 2 10 +@perf var 1 10 +@perf var 2 10 +end diff --git a/src/DataArrays.jl b/src/DataArrays.jl index c370ee7..5d678cf 100644 --- a/src/DataArrays.jl +++ b/src/DataArrays.jl @@ -63,6 +63,8 @@ module DataArrays include("datamatrix.jl") include("linalg.jl") include("operators.jl") + include("reduce.jl") + include("reducedim.jl") include("broadcast.jl") include("sort.jl") include("extras.jl") diff --git a/src/broadcast.jl b/src/broadcast.jl index 6d229c9..af20330 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -1,4 +1,4 @@ -using DataArrays, Base.Cartesian, Base.@get! +using DataArrays, Base.@get! using Base.Broadcast: bitcache_chunks, bitcache_size, dumpbitcache, promote_eltype, broadcast_shape, eltype_plus, type_minus, type_div, type_pow @@ -103,9 +103,11 @@ function gen_broadcast_dataarray(nd::Int, arrtype::(DataType...), outtype, f::Fu # Set up output DataArray/PooledDataArray $(if outtype == DataArray quote - Bc = B.na.chunks - fill!(Bc, 0) Bdata = B.data + # Copy in case aliased + # TODO: check for aliasing? + Bna = falses(size(Bdata)) + Bc = Bna.chunks ind = 1 end elseif outtype == PooledDataArray @@ -158,6 +160,10 @@ function gen_broadcast_dataarray(nd::Int, arrtype::(DataType...), outtype, f::Fu :(ind += 1) end) end) + + $(if outtype == DataArray + :(B.na = Bna) + end) end _F_ end diff --git a/src/dataarray.jl b/src/dataarray.jl index d625541..8fe16ea 100644 --- a/src/dataarray.jl +++ b/src/dataarray.jl @@ -152,6 +152,9 @@ function Base.copy!(dest::DataArray, src::DataArray) # -> DataArray{T} dest end +Base.fill!(A::DataArray, ::NAtype) = (fill!(A.na, true); A) +Base.fill!(A::DataArray, v) = (fill!(A.data, v); fill!(A.na, false); A) + #' @description #' #' Create a deep copy of a DataArray. diff --git a/src/indexing.jl b/src/indexing.jl index d0358bd..e6c8eee 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -12,6 +12,7 @@ daextract(a) = nothing # Check for NA unsafe_isna(da::DataArray, extr, idx::Real) = Base.unsafe_bitgetindex(extr[2], idx) unsafe_isna(pda::PooledDataArray, extr, idx::Real) = extr[1][idx] == 0 +unsafe_isna(a, extr, idx::Real) = false unsafe_getindex_notna(da::DataArray, extr, idx::Real) = getindex(extr[1], idx) unsafe_getindex_notna(pda::PooledDataArray, extr, idx::Real) = getindex(extr[2], extr[1][idx]) unsafe_getindex_notna(a, extr, idx::Real) = Base.unsafe_getindex(a, idx) @@ -43,6 +44,7 @@ unsafe_dasetindex!(da::PooledDataArray, extr, val::NAtype, idx::Real) = nothing unsafe_dasetindex!(da::DataArray, extr, val, idx::Real) = setindex!(extr[1], val, idx) unsafe_dasetindex!(pda::PooledDataArray, extr, val, idx::Real) = setindex!(extr[1], getpoolidx(pda, val), idx) +unsafe_dasetindex!(a::AbstractArray, extr, val, idx::Real) = setindex!(a, val, idx) ## PooledDataArray helper functions diff --git a/src/operators.jl b/src/operators.jl index c1c5efa..980341b 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -9,6 +9,7 @@ const numeric_unary_operators = [:(Base.(:+)), const logical_unary_operators = [:(Base.(:!))] const elementary_functions = [:(Base.abs), + :(Base.abs2), :(Base.sign), :(Base.acos), :(Base.acosh), @@ -146,14 +147,7 @@ const bit_operators = [:(Base.(:&)), :(Base.(:|)), :(Base.(:$))] -const unary_vector_operators = [:(Base.minimum), - :(Base.maximum), - :(Base.prod), - :(Base.sum), - :(Base.mean), - :(Base.median), - :(Base.std), - :(Base.var), +const unary_vector_operators = [:(Base.median), :(StatsBase.mad), :(Base.norm), :(StatsBase.skewness), @@ -460,9 +454,17 @@ end # XXX: The below should be revisited once we have a way to infer what # the proper return type of an array should be. +# One-argument elementary functions that do something different for +# Complex +for f in (:(Base.abs), :(Base.abs2)) + @eval begin + @dataarray_unary $(f) Complex T.parameters[1] + end +end + # One-argument elementary functions that return the same type as their # inputs -for f in (:(Base.abs), :(Base.conj), :(Base.sign)) +for f in (:(Base.abs), :(Base.abs2), :(Base.conj), :(Base.sign)) @eval begin $(f)(::NAtype) = NA @dataarray_unary $(f) Number T @@ -672,7 +674,8 @@ Base.(:.^)(::MathConst{:e}, B::AbstractDataArray) = exp(B) for f in (:(Base.(:+)), :(Base.(:.+)), :(Base.(:-)), :(Base.(:.-)), :(Base.(:*)), :(Base.(:.*)), :(Base.(:.^)), :(Base.div), - :(Base.mod), :(Base.fld), :(Base.rem)) + :(Base.mod), :(Base.fld), :(Base.rem), :(Base.min), + :(Base.max)) @eval begin # Scalar with NA ($f)(::NAtype, ::NAtype) = NA diff --git a/src/reduce.jl b/src/reduce.jl new file mode 100644 index 0000000..50dedf5 --- /dev/null +++ b/src/reduce.jl @@ -0,0 +1,172 @@ +using Base: evaluate + +## mapreduce implementation that skips NA + +function skipna_init(f, op, na::BitArray, data::Array, ifirst::Int, ilast::Int) + # Get first non-NA element + ifirst = Base.findnextnot(na, ifirst) + @inbounds d1 = data[ifirst] + + # Get next non-NA element + ifirst = Base.findnextnot(na, ifirst+1) + @inbounds d2 = data[ifirst] + + # Reduce first two elements + (evaluate(op, evaluate(f, d1), evaluate(f, d2)), ifirst) +end + +function mapreduce_seq_impl_skipna(f, op, T, A::DataArray, ifirst::Int, ilast::Int) + data = A.data + na = A.na + chunks = na.chunks + + v, i = skipna_init(f, op, na, data, ifirst, ilast) + + while i < ilast + i += 1 + @inbounds na = Base.unsafe_bitgetindex(chunks, i) + na && continue + @inbounds d = data[i] + v = evaluate(op, v, evaluate(f, d)) + end + v +end + +# Pairwise map-reduce +function mapreduce_pairwise_impl_skipna{T}(f, op, A::DataArray{T}, bytefirst::Int, bytelast::Int, n_notna::Int, blksize::Int) + if n_notna <= blksize + ifirst = 64*(bytefirst-1)+1 + ilast = min(64*bytelast, length(A)) + # Fall back to Base implementation if no NAs in block + return ilast - ifirst + 1 == n_notna ? Base.mapreduce_seq_impl(f, op, A.data, ifirst, ilast) : + mapreduce_seq_impl_skipna(f, op, T, A, ifirst, ilast) + end + + # Find byte in the middle of range + # The block size is restricted so that there will always be at + # least two non-NA elements in the returned range + chunks = A.na.chunks + nmid = 0 + imid = bytefirst-1 + while nmid < (n_notna >> 1) + imid += 1 + @inbounds nmid += count_zeros(chunks[imid]) + end + + v1 = mapreduce_pairwise_impl_skipna(f, op, A, bytefirst, imid, nmid, blksize) + v2 = mapreduce_pairwise_impl_skipna(f, op, A, imid+1, bytelast, n_notna-nmid, blksize) + evaluate(op, v1, v2) +end + +mapreduce_impl_skipna{T}(f, op, A::DataArray{T}) = + mapreduce_seq_impl_skipna(f, op, T, A, 1, length(A.data)) +mapreduce_impl_skipna(f, op::Base.AddFun, A::DataArray) = + mapreduce_pairwise_impl_skipna(f, op, A, 1, length(A.na.chunks), + length(A.na)-countnz(A.na), + max(128, Base.sum_pairwise_blocksize(f))) + +## general mapreduce interface + +function _mapreduce_skipna{T}(f, op, A::DataArray{T}) + n = length(A) + na = A.na + + nna = countnz(na) + nna == n && return Base.mr_empty(f, op, T) + nna == n-1 && return Base.r_promote(op, evaluate(f, A.data[Base.findnextnot(na, 1)])) + nna == 0 && return Base.mapreduce_impl(f, op, A.data, 1, n) + + mapreduce_impl_skipna(f, op, A) +end + +# This is only safe when we can guarantee that if a function is passed +# NA, it returns NA. Otherwise we will fall back to the implementation +# in Base, which is slow because it's type-unstable, but guarantees the +# correct semantics +typealias SafeMapFuns Union(Base.IdFun, Base.AbsFun, Base.Abs2Fun, + Base.ExpFun, Base.LogFun, Base.CentralizedAbs2Fun) +typealias SafeReduceFuns Union(Base.AddFun, Base.MulFun, Base.MaxFun, Base.MinFun) +function Base._mapreduce(f::SafeMapFuns, op::SafeReduceFuns, A::DataArray) + any(A.na) && return NA + Base._mapreduce(f, op, A.data) +end + +function Base.mapreduce(f, op::Function, A::DataArray; skipna::Bool=false) + is(op, +) ? (skipna ? _mapreduce_skipna(f, Base.AddFun(), A) : Base._mapreduce(f, Base.AddFun(), A)) : + is(op, *) ? (skipna ? _mapreduce_skipna(f, Base.MulFun(), A) : Base._mapreduce(f, Base.MulFun(), A)) : + is(op, &) ? (skipna ? _mapreduce_skipna(f, Base.AndFun(), A) : Base._mapreduce(f, Base.AndFun(), A)) : + is(op, |) ? (skipna ? _mapreduce_skipna(f, Base.OrFun(), A) : Base._mapreduce(f, Base.OrFun(), A)) : + skipna ? _mapreduce_skipna(f, op, A) : Base._mapreduce(f, op, A) +end + +Base.mapreduce(f, op, A::DataArray; skipna::Bool=false) = + skipna ? _mapreduce_skipna(f, op, A) : Base._mapreduce(f, op, A) + +Base.reduce(op, A::DataArray; skipna::Bool=false) = + mapreduce(Base.IdFun(), op, A; skipna=skipna) + +## usual reductions + +for (fn, op) in ((:(Base.sum), Base.AddFun()), + (:(Base.prod), Base.MulFun()), + (:(Base.minimum), Base.MinFun()), + (:(Base.maximum), Base.MaxFun())) + @eval begin + $fn(f::Union(Function,Base.Func{1}), a::DataArray; skipna::Bool=false) = + mapreduce(f, $op, a; skipna=skipna) + $fn(a::DataArray; skipna::Bool=false) = + mapreduce(Base.IdFun(), $op, a; skipna=skipna) + end +end + +for (fn, f, op) in ((:(Base.sumabs), Base.AbsFun(), Base.AddFun()), + (:(Base.sumabs2), Base.Abs2Fun(), Base.AddFun())) + @eval $fn(a::DataArray; skipna::Bool=false) = mapreduce($f, $op, a; skipna=skipna) +end + +## mean + +Base.mean(a::DataArray; skipna::Bool=false) = + sum(a; skipna=skipna) / (length(a.na)-(skipna ? countnz(a.na) : 0)) + +## variance + +function Base.varm{T}(A::DataArray{T}, m::Number; corrected::Bool=true, skipna::Bool=false) + if skipna + n = length(A) + na = A.na + + nna = countnz(na) + nna == n && return convert(Base.momenttype(T), NaN) + nna == n-1 && return convert(Base.momenttype(T), + abs2(A.data[Base.findnextnot(na, 1)] - m)/(1 - int(corrected))) + + /(nna == 0 ? Base.centralize_sumabs2(A.data, m, 1, n) : + mapreduce_impl_skipna(Base.CentralizedAbs2Fun(m), Base.AddFun(), A), + n - nna - int(corrected)) + else + any(A.na) && return NA + Base.varm(A.data, m; corrected=corrected) + end +end +Base.varm{T}(A::DataArray{T}, m::NAtype; corrected::Bool=true, skipna::Bool=false) = NA + +function Base.varzm{T}(A::DataArray{T}; corrected::Bool=true, skipna::Bool=false) + n = length(A) + nna = skipna ? countnz(A.na) : 0 + (n == 0 || n == nna) && return convert(Base.momenttype(T), NaN) + return Base.sumabs2(A; skipna=skipna) / (n - nna - int(corrected)) +end + +function Base.var(A::DataArray; corrected::Bool=true, mean=nothing, skipna::Bool=false) + mean == 0 ? Base.varzm(A; corrected=corrected, skipna=skipna) : + mean == nothing ? varm(A, Base.mean(A; skipna=skipna); corrected=corrected, skipna=skipna) : + isa(mean, Union(Number, NAtype)) ? varm(A, mean; corrected=corrected, skipna=skipna) : + error("Invalid value of mean.") +end + +Base.stdm(A::DataArray, m::Number; corrected::Bool=true, skipna::Bool=false) = + sqrt(varm(A, m; corrected=corrected, skipna=skipna)) + +Base.std(A::DataArray; corrected::Bool=true, mean=nothing, skipna::Bool=false) = + sqrt(var(A; corrected=corrected, mean=mean, skipna=skipna)) diff --git a/src/reducedim.jl b/src/reducedim.jl new file mode 100644 index 0000000..b7239e0 --- /dev/null +++ b/src/reducedim.jl @@ -0,0 +1,566 @@ +## Utility function + +# This is a substantially faster implementation of the "all" reduction +# across dimensions for reducing a BitArray to an Array{Bool}. We use +# this below for implementing MaxFun and MinFun with skipna=true. +@ngenerate N typeof(R) function Base._mapreducedim!{N}(f, op::Base.AndFun, R::Array{Bool}, A::BitArray{N}) + lsiz = Base.check_reducdims(R, A) + isempty(A) && return R + @nextract N sizeR d->size(R, d) + @nexprs 1 d->(state_0 = state_{N} = 1) + @nexprs N d->(skip_d = sizeR_d == 1) + Achunks = A.chunks + k = 1 + @nloops(N, i, A, + d->(state_{d-1} = state_d), + d->(skip_d || (state_d = state_0)), + begin + @inbounds R[state_0] &= evaluate(f, Base.unsafe_bitgetindex(Achunks, k)) + state_0 += 1 + k += 1 + end) + R +end + +# Determine if there are any true values in a BitArray in a given +# range. We use this for reductions with skipna=false along the first +# dimension. +function _any(B::BitArray, istart::Int, iend::Int) + chunks = B.chunks + startidx, startbit = Base.get_chunks_id(istart) + endidx, endbit = Base.get_chunks_id(iend) + startidx == endidx && return chunks[startidx] >> startbit << (63-endbit+startbit) != 0 + + chunks[startidx] >> startbit != 0 && return true + for i = startidx+1:endidx-1 + chunks[i] != 0 && return true + end + chunks[endidx] << (63-endbit) != 0 && return true + return false +end + +# Counts the number of ones in a given range. We use this for counting +# the values for mean and var with skipna=false along the first +# dimension. +function _count(B::BitArray, istart::Int, iend::Int) + chunks = B.chunks + startidx, startbit = Base.get_chunks_id(istart) + endidx, endbit = Base.get_chunks_id(iend) + startidx == endidx && return count_ones(chunks[startidx] >> startbit << (63-endbit+startbit)) + + n = 0 + n += count_ones(chunks[startidx] >> startbit) + for i = startidx+1:endidx-1 + n += count_ones(chunks[i]) + end + n += count_ones(chunks[endidx] << (63-endbit)) + return n +end + +## NA-preserving +@ngenerate N typeof(R) function _mapreducedim!{T,N}(f::SafeMapFuns, op::SafeReduceFuns, + R::DataArray, A::DataArray{T,N}) + data = A.data + na = A.na + + lsiz = Base.check_reducdims(R, data) + isempty(data) && return R + + if lsiz > 16 + # use mapreduce_impl, which is probably better tuned to achieve higher performance + nslices = div(length(A), lsiz) + ibase = 0 + extr = daextract(R) + for i = 1:nslices + if _any(na, ibase+1, ibase+lsiz) + unsafe_setna!(R, extr, i) + else + v = Base.mapreduce_impl(f, op, data, ibase+1, ibase+lsiz) + @inbounds unsafe_dasetindex!(R, extr, v, i) + end + ibase += lsiz + end + else + @nextract N sizeR d->size(R,d) + na_chunks = A.na.chunks + + new_data = R.data + + # If reducing to a DataArray, skip strides with NAs. + # In my benchmarks, it is actually faster to compute a new NA + # array and bitpack it than to operate on the BitArray + # directly. + new_na = fill(false, size(new_data)) + + @nexprs 1 d->(state_0 = state_{N} = 1) + @nexprs N d->(skip_d = sizeR_d == 1) + k = 1 + @nloops(N, i, A, + d->(state_{d-1} = state_d), + d->(skip_d || (state_d = state_0)), begin + @inbounds vna = new_na[state_0] | Base.unsafe_bitgetindex(na_chunks, k) + if vna + @inbounds new_na[state_0] = true + else + @inbounds x = data[k] + v = evaluate(f, x) + @inbounds v0 = new_data[state_0] + nv = evaluate(op, v0, v) + @inbounds new_data[state_0] = nv + end + + state_0 += 1 + k += 1 + end) + + R.na = bitpack(new_na) + end + return R +end + +## NA-preserving to array +@ngenerate N typeof(R) function _mapreducedim!{T,N}(f::SafeMapFuns, op::SafeReduceFuns, + R::AbstractArray, A::DataArray{T,N}) + data = A.data + na = A.na + + lsiz = Base.check_reducdims(R, data) + isempty(data) && return R + + if lsiz > 16 + # use mapreduce_impl, which is probably better tuned to achieve higher performance + nslices = div(length(A), lsiz) + ibase = 0 + extr = daextract(R) + for i = 1:nslices + if _any(na, ibase+1, ibase+lsiz) + error("cannot reduce a DataArray containing NAs to an AbstractArray") + else + v = Base.mapreduce_impl(f, op, data, ibase+1, ibase+lsiz) + @inbounds unsafe_dasetindex!(R, extr, v, i) + end + ibase += lsiz + end + else + @nextract N sizeR d->size(R,d) + + # If reducing to a non-DataArray, throw an error at the start on NA + any(isna(A)) && error("cannot reduce a DataArray containing NAs to an AbstractArray") + @nloops N i data d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @inbounds x = (@nref N data i) + v = evaluate(f, x) + @inbounds v0 = (@nref N R j) + nv = evaluate(op, v0, v) + @inbounds (@nref N R j) = nv + end + end + return R +end +_mapreducedim!(f, op, R, A) = Base._mapreducedim!(f, op, R, A) + +## NA-skipping +_getdata(A) = A +_getdata(A::DataArray) = A.data + +# mapreduce across a dimension. If specified, C contains the number of +# non-NA values reduced into each element of R. +@ngenerate N typeof(R) function _mapreducedim_skipna_impl!{T,N}(f, op, R::AbstractArray, + C::Union(Array{Int}, Nothing), + A::DataArray{T,N}) + data = A.data + na = A.na + na_chunks = na.chunks + new_data = _getdata(R) + + isa(C, Nothing) || size(R) == size(C) || error("R and C must have same size") + lsiz = Base.check_reducdims(new_data, data) + isempty(data) && return R + + if lsiz > 16 + # keep the accumulator as a local variable when reducing along the first dimension + nslices = div(length(A), lsiz) + ibase = 0 + for i = 1:nslices + # TODO: use pairwise impl for sum + @inbounds v = new_data[i] + @inbounds !isa(C, Nothing) && (C[i] = lsiz - _count(na, ibase+1, ibase+lsiz)) + for k = ibase+1:ibase+lsiz + @inbounds Base.unsafe_bitgetindex(na_chunks, k) && continue + @inbounds x = data[k] + v = convert(typeof(v), evaluate(op, evaluate(f, x), v))::typeof(v) + end + @inbounds new_data[i] = v + ibase += lsiz + end + else + # general implementation + @nextract N sizeR d->size(new_data,d) + @nexprs 1 d->(state_0 = state_{N} = 1) + @nexprs N d->(skip_d = sizeR_d == 1) + k = 1 + !isa(C, Nothing) && fill!(C, div(length(A), length(R))) + @nloops(N, i, A, + d->(state_{d-1} = state_d), + d->(skip_d || (state_d = state_0)), begin + @inbounds xna = Base.unsafe_bitgetindex(na_chunks, k) + if xna + !isa(C, Nothing) && @inbounds C[state_0] -= 1 + else + @inbounds x = data[k] + v = evaluate(f, x) + @inbounds v0 = new_data[state_0] + nv = evaluate(op, v0, v) + @inbounds new_data[state_0] = nv + end + + state_0 += 1 + k += 1 + end) + end + return R +end + +_mapreducedim_skipna!(f, op, R::AbstractArray, A::DataArray) = + _mapreducedim_skipna_impl!(f, op, R, nothing, A) + +# for MinFun/MaxFun, min or max is NA if all values along a dimension are NA +function _mapreducedim_skipna!(f, op::Union(Base.MinFun, Base.MaxFun), R::DataArray, A::DataArray) + R.na = bitpack(all!(fill(true, size(R)), A.na)) + _mapreducedim_skipna_impl!(f, op, R, nothing, A) +end +function _mapreducedim_skipna!(f, op::Union(Base.MinFun, Base.MaxFun), R::AbstractArray, A::DataArray) + if any(all!(fill(true, size(R)), A.na)) + error("all values along specified dimension are NA for one element of reduced dimension; cannot reduce to non-DataArray") + end + _mapreducedim_skipna_impl!(f, op, R, nothing, A) +end + +## general reducedim interface + +for op in (Base.AddFun, Base.MulFun, Base.AndFun, Base.OrFun, Base.MinFun, Base.MaxFun) + @eval begin + function Base.initarray!{T}(a::DataArray{T}, op::$op, init::Bool) + if init + Base.initarray!(a.data, op, true) + Base.fill!(a.na, false) + end + a + end + end +end + +function Base.reducedim_initarray{R}(A::DataArray, region, v0, ::Type{R}) + rd = Base.reduced_dims(A.data, region) + DataArray(fill!(similar(A.data, R, rd), v0), falses(rd)) +end +function Base.reducedim_initarray0{R}(A::DataArray, region, v0, ::Type{R}) + rd = Base.reduced_dims0(A,region) + DataArray(fill!(similar(A.data, R, rd), v0), falses(rd)) +end + +function Base.mapreducedim!(f::Function, op, R::AbstractArray, A::DataArray; skipna::Bool=false) + is(op, +) ? (skipna ? _mapreducedim_skipna!(f, Base.AddFun(), R, A) : _mapreducedim!(f, Base.AddFun(), R, A)) : + is(op, *) ? (skipna ? _mapreducedim_skipna!(f, Base.MulFun(), R, A) : _mapreducedim!(f, Base.MulFun(), R, A)) : + is(op, &) ? (skipna ? _mapreducedim_skipna!(f, Base.AndFun(), R, A) : _mapreducedim!(f, Base.AndFun(), R, A)) : + is(op, |) ? (skipna ? _mapreducedim_skipna!(f, Base.OrFun(), R, A) : _mapreducedim!(f, Base.OrFun(), R, A)) : + skipna ? _mapreducedim_skipna!(f, op, R, A) : _mapreducedim!(f, op, R, A) +end +Base.mapreducedim!(f, op, R::AbstractArray, A::DataArray; skipna::Bool=false) = + skipna ? _mapreducedim_skipna!(f, op, R, A) : _mapreducedim!(f, op, R, A) +Base.reducedim!{RT}(op, R::DataArray{RT}, A::AbstractArray; skipna::Bool=false) = + Base.mapreducedim!(Base.IdFun(), op, R, A, zero(RT); skipna=skipna) + +Base.mapreducedim(f, op, A::DataArray, region, v0; skipna::Bool=false) = + Base.mapreducedim!(f, op, Base.reducedim_initarray(A, region, v0), A; skipna=skipna) +Base.mapreducedim{T}(f, op, A::DataArray{T}, region; skipna::Bool=false) = + Base.mapreducedim!(f, op, Base.reducedim_init(f, op, A, region), A; skipna=skipna) + +Base.reducedim(op, A::DataArray, region, v0; skipna::Bool=false) = + Base.mapreducedim(Base.IdFun(), op, A, region, v0; skipna=skipna) +Base.reducedim(op, A::DataArray, region; skipna::Bool=false) = + Base.mapreducedim(Base.IdFun(), op, A, region; skipna=skipna) + +## usual reductions + +for (basfn, Op) in [(:sum, Base.AddFun), (:prod, Base.MulFun), + (:maximum, Base.MaxFun), (:minimum, Base.MinFun), + (:all, Base.AndFun), (:any, Base.OrFun)] + fname = Expr(:., :Base, Base.Meta.quot(basfn)) + fname! = Expr(:., :Base, Base.Meta.quot(symbol(string(basfn, '!')))) + @eval begin + $(fname!)(f::Union(Function,Base.Func{1}), r::AbstractArray, A::DataArray; + init::Bool=true, skipna::Bool=false) = + Base.mapreducedim!(f, $(Op)(), Base.initarray!(r, $(Op)(), init), A; skipna=skipna) + $(fname!)(r::AbstractArray, A::DataArray; init::Bool=true, skipna::Bool=false) = + $(fname!)(Base.IdFun(), r, A; init=init, skipna=skipna) + + $(fname)(f::Union(Function,Base.Func{1}), A::DataArray, region; skipna::Bool=false) = + Base.mapreducedim(f, $(Op)(), A, region; skipna=skipna) + $(fname)(A::DataArray, region; skipna::Bool=false) = + $(fname)(Base.IdFun(), A, region; skipna=skipna) + end +end + +for (basfn, fbase, Fun) in [(:sumabs, :sum, Base.AbsFun), + (:sumabs2, :sum, Base.Abs2Fun), + (:maxabs, :maximum, Base.AbsFun), + (:minabs, :minimum, Base.AbsFun)] + fname = Expr(:., :Base, Base.Meta.quot(basfn)) + fname! = Expr(:., :Base, Base.Meta.quot(symbol(string(basfn, '!')))) + fbase! = Expr(:., :Base, Base.Meta.quot(symbol(string(fbase, '!')))) + @eval begin + $(fname!)(r::AbstractArray, A::DataArray; init::Bool=true, skipna::Bool=false) = + $(fbase!)($(Fun)(), r, A; init=init, skipna=skipna) + $(fname)(A::DataArray, region; skipna::Bool=false) = + $(fbase)($(Fun)(), A, region; skipna=skipna) + end +end + +## mean + +function Base.mean!{T}(R::AbstractArray{T}, A::DataArray; skipna::Bool=false, + init::Bool=true) + init && fill!(R, zero(eltype(R))) + if skipna + C = Array(Int, size(R)) + _mapreducedim_skipna_impl!(Base.IdFun(), Base.AddFun(), R, C, A) + broadcast!(/, R, R, C) + else + sum!(R, A; skipna=false) + broadcast!(/, R, R, convert(T, length(A)/length(R))) + R + end +end + +Base.mean{T}(A::DataArray{T}, region; skipna::Bool=false) = + mean!(Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), A; skipna=skipna, + init=false) + +## var + +immutable MapReduceDim2ArgHelperFun{F,T} + f::F + val::T +end +Base.evaluate(f::MapReduceDim2ArgHelperFun, x) = evaluate(f.f, x, f.val) + +# A version of _mapreducedim! that accepts an array S of the same size +# as R, the elements of which are passed as a second argument to f. +@ngenerate N typeof(R) function _mapreducedim_2arg!{T,N}(f, op, R::DataArray, + A::DataArray{T,N}, + S::AbstractArray) + data = A.data + na = A.na + Sextr = daextract(S) + + lsiz = Base.check_reducdims(R, data) + size(R) == size(S) || error("R and S must have same size") + isempty(data) && return R + + if lsiz > 16 + # use mapreduce_impl, which is probably better tuned to achieve higher performance + nslices = div(length(A), lsiz) + ibase = 0 + extr = daextract(R) + for i = 1:nslices + if unsafe_isna(S, Sextr, i) || _any(na, ibase+1, ibase+lsiz) + unsafe_setna!(R, extr, i) + else + @inbounds s = unsafe_getindex_notna(S, Sextr, i) + v = Base.mapreduce_impl(MapReduceDim2ArgHelperFun(f, s), op, data, ibase+1, ibase+lsiz) + @inbounds unsafe_dasetindex!(R, extr, v, i) + end + ibase += lsiz + end + else + @nextract N sizeR d->size(R,d) + na_chunks = A.na.chunks + new_data = R.data + new_na = isa(S, DataArray) ? bitunpack(S.na) : fill(false, size(S)) + + @nexprs 1 d->(state_0 = state_{N} = 1) + @nexprs N d->(skip_d = sizeR_d == 1) + k = 1 + @nloops(N, i, A, + d->(state_{d-1} = state_d), + d->(skip_d || (state_d = state_0)), begin + @inbounds vna = new_na[state_0] | Base.unsafe_bitgetindex(na_chunks, k) + if vna + @inbounds new_na[state_0] = true + else + @inbounds s = unsafe_getindex_notna(S, Sextr, state_0) + @inbounds x = data[k] + v = evaluate(f, x, s) + @inbounds v0 = new_data[state_0] + nv = evaluate(op, v0, v) + @inbounds new_data[state_0] = nv + end + + state_0 += 1 + k += 1 + end) + + R.na = bitpack(new_na) + end + return R +end + +# A version of _mapreducedim_skipna! that accepts an array S of the same size +# as R, the elements of which are passed as a second argument to f. +@ngenerate N typeof(R) function _mapreducedim_skipna_2arg!{T,N}(f, op, R::AbstractArray, + C::Union(Array{Int}, Nothing), + A::DataArray{T,N}, S::AbstractArray) + data = A.data + na = A.na + na_chunks = na.chunks + new_data = _getdata(R) + Sextr = daextract(S) + + lsiz = Base.check_reducdims(new_data, data) + isa(C, Nothing) || size(R) == size(C) || error("R and C must have same size") + size(R) == size(S) || error("R and S must have same size") + isempty(data) && return R + @nextract N sizeR d->size(new_data,d) + sizA1 = size(data, 1) + + # If there are any NAs in S, assume these will produce NAs in R + if isa(S, DataArray) + copy!(R.na, S.na) + end + + if lsiz > 16 + # keep the accumulator as a local variable when reducing along the first dimension + nslices = div(length(A), lsiz) + ibase = 0 + for i = 1:nslices + @inbounds v = new_data[i] + !isa(C, Nothing) && (C[i] = lsiz - _count(na, ibase+1, ibase+lsiz)) + + # If S[i] is NA, skip this iteration + @inbounds sna = unsafe_isna(S, Sextr, i) + if !sna + @inbounds s = unsafe_getindex_notna(S, Sextr, i) + # TODO: use pairwise impl for sum + for k = ibase+1:ibase+lsiz + @inbounds Base.unsafe_bitgetindex(na_chunks, k) && continue + @inbounds x = data[k] + v = convert(typeof(v), evaluate(op, evaluate(f, x, s), v))::typeof(v) + end + + @inbounds new_data[i] = v + end + + ibase += lsiz + end + else + # general implementation + @nexprs 1 d->(state_0 = state_{N} = 1) + @nexprs N d->(skip_d = sizeR_d == 1) + k = 1 + !isa(C, Nothing) && fill!(C, div(length(A), length(R))) + @nloops(N, i, A, + d->(state_{d-1} = state_d), + d->(skip_d || (state_d = state_0)), begin + @inbounds xna = Base.unsafe_bitgetindex(na_chunks, k) | unsafe_isna(S, Sextr, state_0) + if xna + !isa(C, Nothing) && @inbounds C[state_0] -= 1 + else + @inbounds s = unsafe_getindex_notna(S, Sextr, state_0) + @inbounds x = data[k] + v = evaluate(f, x, s) + @inbounds v0 = new_data[state_0] + nv = evaluate(op, v0, v) + @inbounds new_data[state_0] = nv + end + + state_0 += 1 + k += 1 + end) + end + return R +end + +immutable Abs2MinusFun end +Base.evaluate(f::Abs2MinusFun, x, m) = abs2(x - m) + +function Base.varm!(R::AbstractArray, A::DataArray, m::AbstractArray; corrected::Bool=true, + skipna::Bool=false, init::Bool=true) + if isempty(A) + fill!(R, convert(eltype(R), NaN)) + else + init && fill!(R, zero(eltype(R))) + if skipna + C = Array(Int, size(R)) + + # Compute R = abs2(A-m) + _mapreducedim_skipna_2arg!(Abs2MinusFun(), Base.AddFun(), R, C, A, m) + + # Divide by number of non-NA values + if corrected + for i = 1:length(C) + @inbounds C[i] = max(C[i] - 1, 0) + end + end + broadcast!(/, R, R, C) + else + # Compute R = abs2(A-m) + _mapreducedim_2arg!(Abs2MinusFun(), Base.AddFun(), R, A, m) + + # Divide by number of values + broadcast!(/, R, R, div(length(A), length(R)) - int(corrected)) + end + end +end + +function Base.varzm!(R::AbstractArray, A::DataArray; corrected::Bool=true, + skipna::Bool=false, init::Bool=true) + if isempty(A) + fill!(R, convert(eltype(R), NaN)) + else + init && fill!(R, zero(eltype(R))) + if skipna + C = Array(Int, size(R)) + _mapreducedim_skipna_impl!(Base.Abs2Fun(), Base.AddFun(), R, C, A) + if corrected + for i = 1:length(C) + @inbounds C[i] = max(C[i] - 1, 0) + end + end + broadcast!(/, R, R, C) + else + Base.sumabs2!(R, A; init=true) + broadcast!(/, R, R, div(length(A), length(R)) - int(corrected)) + end + end +end + +Base.varm{T}(A::DataArray{T}, m::AbstractArray, region; corrected::Bool=true, + skipna::Bool=false) = + Base.varm!(Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), A, m; + corrected=corrected, skipna=skipna, init=false) + +Base.varzm{T}(A::DataArray{T}, region::Union(Integer, AbstractArray, Tuple); + corrected::Bool=true, skipna::Bool=false) = + Base.varzm!(Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), A; + corrected=corrected, skipna=skipna, init=false) + +function Base.var{T}(A::DataArray{T}, region::Union(Integer, AbstractArray, Tuple); + corrected::Bool=true, mean=nothing, skipna::Bool=false) + if mean == 0 + Base.varzm(A, region; corrected=corrected, skipna=skipna) + elseif mean == nothing + if skipna + # Can reduce mean into ordinary array + m = zeros(Base.momenttype(T), Base.reduced_dims(A, region)) + Base.varm(A, Base.mean!(m, A; skipna=skipna), region; + corrected=corrected, skipna=skipna) + else + Base.varm(A, Base.mean(A, region; skipna=skipna), region; + corrected=corrected, skipna=skipna) + end + elseif isa(mean, AbstractArray) + Base.varm(A, mean::AbstractArray, region; corrected=corrected, skipna=skipna) + else + error("invalid value of mean") + end +end diff --git a/test/reduce.jl b/test/reduce.jl new file mode 100644 index 0000000..dac1b1d --- /dev/null +++ b/test/reduce.jl @@ -0,0 +1,133 @@ +module TestReduce +using DataArrays, Base.Test + +## extended test of sum + +for skipna in (true, false) + @test sum(@data(Int8[]); skipna=skipna) === 0 + @test sum(@data(Int[]); skipna=skipna) === 0 + @test sum(@data(Float64[]); skipna=skipna) === 0.0 + + @test sum(@data([int8(3)]); skipna=skipna) === 3 + @test sum(@data([3]); skipna=skipna) === 3 + @test sum(@data([3.0]); skipna=skipna) === 3.0 + + z = DataArray(reshape(1:16, (2,2,2,2))) + fz = convert(DataArray{Float64}, z) + bfz = convert(DataArray{BigFloat}, z) + @test sum(z) === 136 + @test sum(fz) === 136.0 + @test sum(bfz) == 136 +end + +@test sum(@data(Int[NA])) === NA +@test sum(@data(Int[NA]); skipna=true) === 0 +@test sum(@data(Int[NA, NA])) === NA +@test sum(@data(Int[NA, NA]); skipna=true) === 0 +@test sum(@data(Int[NA, NA, 1]); skipna=true) === 1 +@test sum(@data(Int[NA, NA, 1, 2]); skipna=true) === 3 +@test sum(@data(Int[NA, 1, NA, 1, 2]); skipna=true) === 4 + +z = DataArray(reshape(1:16, (2,2,2,2))) +z[6] = NA +fz = convert(DataArray{Float64}, z) +bfz = convert(DataArray{BigFloat}, z) +@test isna(sum(z)) +@test isna(sum(fz)) +@test isna(sum(bfz)) +@test sum(z; skipna=true) === 130 +@test sum(fz; skipna=true) === 130.0 +@test sum(bfz; skipna=true) == 130 + +bs = Base.sum_pairwise_blocksize(Base.IdFun()) +for n in [bs-64, bs-1, bs, bs+1, bs+2, 2*bs-2:2*bs+3, 4*bs-2:4*bs+3] + da = DataArray(randn(n)) + s = sum(da.data) + @test_approx_eq sum(da) s + @test_approx_eq sum(da; skipna=true) s + + da2 = copy(da) + da2[1:2:end] = NA + @test isna(sum(da2)) + @test_approx_eq sum(da2; skipna=true) sum(dropna(da2)) + + da2 = convert(DataArray{BigFloat}, da2) + @test isna(sum(da2)) + @test_approx_eq sum(da2; skipna=true) sum(dropna(da2)) + + da2 = copy(da) + da2[2:2:end] = NA + @test isna(sum(da2)) + @test_approx_eq sum(da2; skipna=true) sum(dropna(da2)) + + da2 = convert(DataArray{BigFloat}, da2) + @test isna(sum(da2)) + @test_approx_eq sum(da2; skipna=true) sum(dropna(da2)) +end + +## other reductions + +macro same_behavior(ex1, ex2) + quote + v = try + $ex2 + catch e + e + end + isa(v, Exception) ? @test_throws(typeof(v), $ex1) : @test_approx_eq($ex1, v) + end +end + +_varuc(x; kw...) = var(x; corrected=false, kw...) +_varzm(x; kw...) = var(x; mean=0, kw...) +_varzmuc(x; kw...) = var(x; corrected=false, mean=0, kw...) +_var1m(x; kw...) = var(x; mean=1, kw...) +_var1muc(x; kw...) = var(x; corrected=false, mean=1, kw...) +_std1m(x; kw...) = stdm(x, 1; kw...) + +for fn in (prod, minimum, maximum, mean, + var, _varuc, _varzm, _varzmuc, _var1m, _var1muc, + std, _std1m, Base.sumabs, Base.sumabs2) + for n in [0, 1, 2, 62, 63, 64, 65, 66] + da = DataArray(randn(n)) + @same_behavior fn(da) fn(da.data) + @same_behavior fn(da; skipna=true) fn(da.data) + + da2 = copy(da) + da2[1:2:end] = NA + n > 0 && @test isna(fn(da2)) + @same_behavior fn(da2; skipna=true) fn(dropna(da2)) + + da2 = convert(DataArray{BigFloat}, da2) + n > 0 && @test isna(fn(da2)) + @same_behavior fn(da2; skipna=true) fn(dropna(da2)) + + da2 = copy(da) + da2[2:2:end] = NA + n > 1 && @test isna(fn(da2)) + @same_behavior fn(da2; skipna=true) fn(dropna(da2)) + + da2 = convert(DataArray{BigFloat}, da2) + n > 1 && @test isna(fn(da2)) + @same_behavior fn(da2; skipna=true) fn(dropna(da2)) + end +end + +## reduce and mapreduce drivers + +for fn in (+, *, |, &) + da = convert(DataArray, randbool(10)) + + s = mapreduce(Base.IdFun(), fn, da.data) + @test mapreduce(Base.IdFun(), fn, da) == s + @test mapreduce(Base.IdFun(), fn, da; skipna=true) == s + @test reduce(fn, da) == s + @test reduce(fn, da; skipna=true) == s +end + +# make sure reductions of & and | are still calling Base +@test isna(reduce(&, @data([true, NA]))) +@test !reduce(&, @data([false, NA])) +@test reduce(|, @data([true, NA])) +@test isna(reduce(|, @data([false, NA]))) +end diff --git a/test/reducedim.jl b/test/reducedim.jl new file mode 100644 index 0000000..7beb71e --- /dev/null +++ b/test/reducedim.jl @@ -0,0 +1,177 @@ +module TestReducedim +using DataArrays, Base.Test + +# Test for fast unit stride BitArray functions +function test_any() + bits = randbool(64*5) + for i = 1:length(bits), j = i:length(bits) + v = false + for k = i:j + v |= bits[k] + end + Base.Test.@test DataArrays._any(bits, i, j) == v + end +end +test_any() + +function test_count() + bits = randbool(64*5) + for i = 1:length(bits), j = i:length(bits) + v = 0 + for k = i:j + v += bits[k] + end + Base.Test.@test DataArrays._count(bits, i, j) == v + end +end +test_count() + +# mapslices from Base, hacked to work for these cases +function safe_mapslices{T}(f::Function, A::AbstractArray{T}, region, skipna) + dims = intersect(region, 1:ndims(A)) + if isempty(dims) + if skipna + naval = f(T[], 1) + A = copy(A) + A[isna(A)] = isempty(naval) ? NA : naval[1] + end + return A + end + + if isempty(dims) + return map(f,A) + end + + dimsA = [size(A)...] + ndimsA = ndims(A) + alldims = [1:ndimsA] + + otherdims = setdiff(alldims, dims) + + idx = cell(ndimsA) + fill!(idx, 1) + Asliceshape = tuple(dimsA[dims]...) + itershape = tuple(dimsA[otherdims]...) + for d in dims + idx[d] = 1:size(A,d) + end + + r1 = f(reshape(A[idx...], Asliceshape); skipna=skipna) + + # determine result size and allocate + Rsize = copy(dimsA) + # TODO: maybe support removing dimensions + if !isa(r1, AbstractArray) || ndims(r1) == 0 + r2 = similar(A, T, 1) + r2[1] = r1 + r1 = r2 + end + Rsize[dims] = [size(r1)...; ones(Int,max(0,length(dims)-ndims(r1)))] + R = similar(r1, tuple(Rsize...)) + + ridx = cell(ndims(R)) + fill!(ridx, 1) + for d in dims + ridx[d] = 1:size(R,d) + end + + R[ridx...] = r1 + + first = true + Base.cartesianmap(itershape) do idxs... + if first + first = false + else + ia = [idxs...] + idx[otherdims] = ia + ridx[otherdims] = ia + try + R[ridx...] = f(reshape(A[idx...], Asliceshape); skipna=skipna) + catch e + if isa(e, ErrorException) && e.msg == "Reducing over an empty array is not allowed." + R[ridx...] = NA + else + rethrow(e) + end + end + end + end + + return R +end + +macro test_da_approx_eq(da1, da2) + quote + v1 = $da1 + v2 = $da2 + na = isna(v1) + @test na == isna(v2) + defined = !na + if any(defined) + @test_approx_eq v1[defined] v2[defined] + end + end +end + +myvarzm(x; skipna::Bool=false) = var(x; mean=0, skipna=skipna) +myvar1m(x; skipna::Bool=false) = var(x; mean=1, skipna=skipna) + +for Areduc in (DataArray(rand(3, 4, 5, 6)), + DataArray(rand(3, 4, 5, 6), rand(3, 4, 5, 6) .< 0.2)) + for skipna = (false, true) + for region in { + 1, 2, 3, 4, 5, (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4), + (1, 2, 3), (1, 3, 4), (2, 3, 4), (1, 2, 3, 4)} + # println("region = $region, skipna = $skipna") + + outputs = {DataArray(fill(NaN, Base.reduced_dims(size(Areduc), region)))} + has_na = anyna(Areduc) + if has_na && !skipna + # Should throw an error reducing to non-DataArray + @test_throws ErrorException sum!(outputs[1].data, Areduc; skipna=skipna) + else + # Should be able to reduce to non-DataArray + push!(outputs, outputs[1].data) + end + + for r in outputs + @test_da_approx_eq sum!(r, Areduc; skipna=skipna) safe_mapslices(sum, Areduc, region, skipna) + @test_da_approx_eq prod!(r, Areduc; skipna=skipna) safe_mapslices(prod, Areduc, region, skipna) + if !has_na + @test_da_approx_eq maximum!(r, Areduc; skipna=skipna) safe_mapslices(maximum, Areduc, region, skipna) + @test_da_approx_eq minimum!(r, Areduc; skipna=skipna) safe_mapslices(minimum, Areduc, region, skipna) + end + @test_da_approx_eq Base.sumabs!(r, Areduc; skipna=skipna) safe_mapslices(sum, abs(Areduc), region, skipna) + @test_da_approx_eq Base.sumabs2!(r, Areduc; skipna=skipna) safe_mapslices(sum, abs2(Areduc), region, skipna) + @test_da_approx_eq mean!(r, Areduc; skipna=skipna) safe_mapslices(mean, Areduc, region, skipna) + end + + @test_da_approx_eq sum(Areduc, region; skipna=skipna) safe_mapslices(sum, Areduc, region, skipna) + @test_da_approx_eq prod(Areduc, region; skipna=skipna) safe_mapslices(prod, Areduc, region, skipna) + @test_da_approx_eq maximum(Areduc, region; skipna=skipna) safe_mapslices(maximum, Areduc, region, skipna) + @test_da_approx_eq minimum(Areduc, region; skipna=skipna) safe_mapslices(minimum, Areduc, region, skipna) + @test_da_approx_eq Base.sumabs(Areduc, region; skipna=skipna) safe_mapslices(sum, abs(Areduc), region, skipna) + @test_da_approx_eq Base.sumabs2(Areduc, region; skipna=skipna) safe_mapslices(sum, abs2(Areduc), region, skipna) + @test_da_approx_eq mean(Areduc, region; skipna=skipna) safe_mapslices(mean, Areduc, region, skipna) + + if region != 5 + @test_da_approx_eq var(Areduc, region; skipna=skipna) safe_mapslices(var, Areduc, region, skipna) + @test_da_approx_eq var(Areduc, region; mean=0, skipna=skipna) safe_mapslices(myvarzm, Areduc, region, skipna) + for r in outputs + @test_da_approx_eq var(Areduc, region; mean=fill!(r, 1), skipna=skipna) safe_mapslices(myvar1m, Areduc, region, skipna) + end + end + end + end +end + +# Test NA-skipping behavior for maximum +a = @data([NA NA; 3 4]) +@test isequal(maximum(a, 1; skipna=true), [3 4]) +@test isequal(maximum!(zeros(1, 2), a; skipna=true), [3 4]) + +# Maximum should give an NA in the output if all values along dimension are NA +@test isequal(maximum(a, 2; skipna=true), @data([NA 4])') +# Maximum should refuse to reduce to a non-DataArray +@test_throws ErrorException maximum!(zeros(2, 1), a; skipna=true) +end diff --git a/test/runtests.jl b/test/runtests.jl index f103210..16bb124 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -17,6 +17,8 @@ my_tests = ["abstractarray.jl", "linalg.jl", #"test/nas.jl", "operators.jl", + "reduce.jl", + "reducedim.jl", "broadcast.jl", "padding.jl", "pooleddataarray.jl",