bug fix - correctly assign typeof(eachcol) + handling nonmissingtype (#109)

sl-solution · web-flow · commit 6c25393f97de · 2023-05-17T06:31:14.000Z
diff --git a/src/abstractdataset/abstractdataset.jl b/src/abstractdataset/abstractdataset.jl
@@ -42,16 +42,18 @@ In broadcasting `AbstractDataset` behavior is similar to a `Matrix`.
 """
 abstract type AbstractDataset end
 
+abstract type AbstractDatasetColumn end
+
 # DatasetColumn is a representation of a column of data set
 # it is wrapped into a new type to make sure that when ever a column is
 # selected, the data set is attached to it
-struct DatasetColumn{T <: AbstractDataset, E}
+struct DatasetColumn{T <: AbstractDataset, E} <: AbstractDatasetColumn
     col::Int
     ds::T
     val::E
 end
 
-struct SubDatasetColumn{T <: AbstractDataset, E}
+struct SubDatasetColumn{T <: AbstractDataset, E} <: AbstractDatasetColumn
     col::Int
     ds::T
     val::E
@@ -308,7 +310,7 @@ function content(ds::AbstractDataset; output = false)
     for i in 1:ncol(ds)
         push!(f_v[1], all_names[i])
         push!(f_v[2], getformat(ds, i))
-        push!(f_v[3], nonmissingtype(eltype(ds[!, i])))
+        push!(f_v[3], our_nonmissingtype(eltype(ds[!, i])))
     end
     format_ds = Dataset(f_v, [:column, :format, :eltype], copycols = false)
     if !output
diff --git a/src/abstractdataset/iteration.jl b/src/abstractdataset/iteration.jl
@@ -234,7 +234,7 @@ Base.ndims(::DatasetColumns) = 1
 Base.ndims(::Type{<:DatasetColumns}) = 1
 
 Base.length(itr::DatasetColumns) = size(itr)[1]
-Base.eltype(::Type{<:DatasetColumns}) = AbstractVector
+Base.eltype(::Type{<:DatasetColumns}) = AbstractDatasetColumn
 
 Base.firstindex(itr::DatasetColumns) = 1
 Base.lastindex(itr::DatasetColumns) = length(itr)
diff --git a/src/abstractdataset/show.jl b/src/abstractdataset/show.jl
@@ -118,7 +118,7 @@ function compacttype(T::Type, maxwidth::Int=8)
     textwidth(sT) ≤ maxwidth && return sT
 
     if T >: Missing
-        T = nonmissingtype(T)
+        T = our_nonmissingtype(T)
         sT = string(T)
         suffix = "?"
         textwidth(sT) ≤ maxwidth && return sT * suffix
@@ -223,7 +223,7 @@ function _show(io::IO,
     alignment_regex_complex = [r"(?<!^)(?<!e)[+-]"]
 
     for i = 1:num_cols
-        type_i = nonmissingtype(types[i])
+        type_i = our_nonmissingtype(types[i])
 
         if type_i <: Complex
             alignment_anchor_regex[i] = alignment_regex_complex
diff --git a/src/byrow/byrow.jl b/src/byrow/byrow.jl
@@ -261,15 +261,15 @@ end
 
 function byrow(ds::AbstractDataset, f::Function, col::ColumnIndex; threads = nrow(ds)>1000, allowmissing::Bool = true)
 	if threads
-		T = Core.Compiler.return_type(f, Tuple{nonmissingtype(eltype(ds[!, col]))})
+		T = Core.Compiler.return_type(f, Tuple{our_nonmissingtype(eltype(ds[!, col]))})
 		if allowmissing
 			res = Vector{Union{Missing, T}}(undef, nrow(ds))
 		else
 			res = Vector{T}(undef, nrow(ds))
 		end
 		_hp_map_a_function!(res, f, _columns(ds)[index(ds)[col]])
 	else
-		T = Core.Compiler.return_type(f, Tuple{nonmissingtype(eltype(ds[!, col]))})
+		T = Core.Compiler.return_type(f, Tuple{our_nonmissingtype(eltype(ds[!, col]))})
 		if allowmissing
 			res = Vector{Union{Missing, T}}(undef, nrow(ds))
 		else
diff --git a/src/byrow/row_functions.jl b/src/byrow/row_functions.jl
@@ -33,7 +33,7 @@ function row_sum(ds::AbstractDataset, f::Function,  cols = names(ds, Union{Missi
     colsidx = multiple_getindex(index(ds), cols)
     CT = mapreduce(eltype, promote_type, view(_columns(ds),colsidx))
     T = Core.Compiler.return_type(f, Tuple{CT})
-	CT = nonmissingtype(T)
+	CT = our_nonmissingtype(T)
 	CT <: Base.SmallSigned ? CT = Int : nothing
 	CT <: Base.SmallUnsigned ? CT = UInt : nothing
 	CT <: Bool ? CT = Int : nothing
@@ -68,7 +68,7 @@ function row_prod(ds::AbstractDataset, f::Function, cols = names(ds, Union{Missi
     colsidx = multiple_getindex(index(ds), cols)
     CT = mapreduce(eltype, promote_type, view(_columns(ds),colsidx))
     T = Core.Compiler.return_type(f, Tuple{CT})
-	CT = nonmissingtype(T)
+	CT = our_nonmissingtype(T)
 	CT <: Base.SmallSigned ? CT = Int : nothing
 	CT <: Base.SmallUnsigned ? CT = UInt : nothing
 	CT <: Bool ? CT = Int : nothing
diff --git a/src/dataset/modify.jl b/src/dataset/modify.jl
@@ -597,7 +597,7 @@ end
 
 # the number of destination can be smaller or greater than the number of elements of Tuple,
 function _modify_multiple_out!(ds, x, dst)
-    !(nonmissingtype(eltype(x)) <: Tuple) && throw(ArgumentError("to use `splitter`, the source column must be a vector of Tuple"))
+    !(our_nonmissingtype(eltype(x)) <: Tuple) && throw(ArgumentError("to use `splitter`, the source column must be a vector of Tuple"))
     tb = Tables.columntable(x)
     for j in 1:length(dst)
         try
diff --git a/src/dataset/other.jl b/src/dataset/other.jl
@@ -492,7 +492,7 @@ function Base.map!(ds::AbstractDataset, f::Vector{<:Function}, cols::MultiColumn
         # Core.Compiler.return_type cannot handle the situations like x->ismissing(x) ? 0 : x when x is missing and float, since the output of Core.Compiler.return_type is Union{Missing, Float64, Int64}
         # we remove missing and then check the result,
         # TODO is there any problem with this?
-        T = Core.Compiler.return_type(f[j], Tuple{nonmissingtype(CT)})
+        T = Core.Compiler.return_type(f[j], Tuple{our_nonmissingtype(CT)})
         T = Union{Missing, T}
         if promote_type(T, CT) <: CT
             if threads && DataAPI.refpool(_columns(ds)[colsidx[j]]) === nothing
diff --git a/src/dataset/transpose.jl b/src/dataset/transpose.jl
@@ -408,7 +408,7 @@ end
 
 function _fill_outputmat_withoutid(T, in_cols, ds, starts, perms, new_col_names, row_names_length, threads; default_fill = missing)
 
-    @assert _check_allocation_limit(nonmissingtype(T), row_names_length*_ngroups(ds), length(new_col_names)) < 1.0 "The output data set is huge and there is not enough resource, check the passed arguments."
+    @assert _check_allocation_limit(our_nonmissingtype(T), row_names_length*_ngroups(ds), length(new_col_names)) < 1.0 "The output data set is huge and there is not enough resource, check the passed arguments."
     CT = promote_type(T, typeof(default_fill))
     # outputmat = [__fill!(_our_vect_alloc(CT, row_names_length*_ngroups(ds)), default_fill) for _ in 1:length(new_col_names)]
     outputmat = Vector{typeof(_our_vect_alloc(CT, 0))}(undef, length(new_col_names))
@@ -420,7 +420,7 @@ end
 
 function _fill_outputmat_withid(T, in_cols, ds, starts, perms, ids, new_col_names, row_names_length, threads; default_fill = missing)
 
-    @assert _check_allocation_limit(nonmissingtype(T), row_names_length*_ngroups(ds), length(new_col_names)) < 1.0 "The output data set is huge and there is not enough resource, check the passed arguments."
+    @assert _check_allocation_limit(our_nonmissingtype(T), row_names_length*_ngroups(ds), length(new_col_names)) < 1.0 "The output data set is huge and there is not enough resource, check the passed arguments."
     CT = promote_type(T, typeof(default_fill))
     # outputmat = [fill!(_our_vect_alloc(CT, row_names_length*_ngroups(ds)), default_fill) for _ in 1:length(new_col_names)]
     outputmat = Vector{typeof(_our_vect_alloc(CT, 0))}(undef, length(new_col_names))
@@ -787,7 +787,7 @@ function flatten!(ds::Dataset,
         for col in 2:length(idxcols)
              if mapformats
                  f_fmt = getformat(ds, idxcols[col])
-                 push!(all_idxcols, byrow(ds, f_fmt, idxcols[col]), threads = threads)
+                 push!(all_idxcols, byrow(ds, f_fmt, idxcols[col], threads = threads))
              else
                  push!(all_idxcols, _columns(ds)[idxcols[col]])
              end
@@ -854,7 +854,7 @@ function flatten(ds::AbstractDataset,
         for col in 2:length(idxcols)
              if mapformats
                  f_fmt = getformat(ds, idxcols[col])
-                 push!(all_idxcols, byrow(ds, f_fmt, idxcols[col]), threads = threads)
+                 push!(all_idxcols, byrow(ds, f_fmt, idxcols[col], threads = threads))
              else
                  push!(all_idxcols, _columns(ds)[idxcols[col]])
              end
diff --git a/src/join/join_dict.jl b/src/join/join_dict.jl
@@ -60,7 +60,7 @@ function _create_dictionary_for_join(f, v, fl, vl, ::Val{T}) where T
         maxval = hp_maximum(DataAPI.refarray(v))
         rangelen = maxval - minval + 1
         _create_dictionary_for_join_int(identity, DataAPI.refarray(v), minval, rangelen, Val(T))
-    elseif nonmissingtype(return_type(f, v)) <: AbstractVector{<:Union{Missing, INTEGERS}} && nonmissingtype(return_type(fl, vl)) <: AbstractVector{<:Union{Missing, INTEGERS}}
+    elseif our_nonmissingtype(return_type(f, v)) <: AbstractVector{<:Union{Missing, INTEGERS}} && our_nonmissingtype(return_type(fl, vl)) <: AbstractVector{<:Union{Missing, INTEGERS}}
         minval = hp_minimum(f, v)
         # if minval is missing all values are missing
         if ismissing(minval)
@@ -531,8 +531,8 @@ function _update!_dict(dsl, dsr, ranges, onleft, onright, right_cols, ::Val{T};
     for j in 1:length(right_cols)
         if haskey(index(dsl).lookup, _names(dsr)[right_cols[j]])
             left_cols_idx = index(dsl)[_names(dsr)[right_cols[j]]]
-            TL = nonmissingtype(eltype(_columns(dsl)[left_cols_idx]))
-            TR = nonmissingtype(eltype(_columns(dsr)[right_cols[j]]))
+            TL = our_nonmissingtype(eltype(_columns(dsl)[left_cols_idx]))
+            TR = our_nonmissingtype(eltype(_columns(dsr)[right_cols[j]]))
             if promote_type(TR, TL) <: TL
                 _update_left_with_right!(_columns(dsl)[left_cols_idx], _columns(dsr)[right_cols[j]], ranges, allowmissing, f_mode, threads = threads, op = op)
             end
diff --git a/src/join/update.jl b/src/join/update.jl
@@ -59,8 +59,8 @@ function _update!(dsl::Dataset, dsr::AbstractDataset, ::Val{T}; onleft, onright,
     for j in 1:length(right_cols)
         if haskey(index(dsl).lookup, _names(dsr)[right_cols[j]])
             left_cols_idx = index(dsl)[_names(dsr)[right_cols[j]]]
-            TL = nonmissingtype(eltype(_columns(dsl)[left_cols_idx]))
-            TR = nonmissingtype(eltype(_columns(dsr)[right_cols[j]]))
+            TL = our_nonmissingtype(eltype(_columns(dsl)[left_cols_idx]))
+            TR = our_nonmissingtype(eltype(_columns(dsr)[right_cols[j]]))
             if promote_type(TR, TL) <: TL
                 _update_left_with_right!(_columns(dsl)[left_cols_idx], view(_columns(dsr)[right_cols[j]], idx), ranges, allowmissing, f_mode, threads = threads, op = op)
             end
diff --git a/src/other/utils.jl b/src/other/utils.jl
@@ -1,6 +1,16 @@
 const INTEGERS = Union{Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Bool}
 const FLOATS = Union{Float16, Float32, Float64}
 
+function our_nonmissingtype(x)
+    T = nonmissingtype(x)
+    if T === Union{}
+        Missing
+    else
+        T
+    end
+end
+
+
 # work around slow allocation of type union in julia
 function _our_vect_alloc(T, len)
 	if len > 0
@@ -34,7 +44,7 @@ function return_type(f::Function, x)
     if eltype(x) <: AbstractVector
         return return_type_tuple(f, x)
     end
-    CT = nonmissingtype(eltype(x))
+    CT = our_nonmissingtype(eltype(x))
     T = Core.Compiler.return_type(f, Tuple{Vector{CT}})
     # workaround for SubArray type
     if T <: SubArray
@@ -50,7 +60,7 @@ function return_type(f::Function, x)
 end
 
 function return_type_tuple(f::Function, x)
-    CT = ntuple(i -> nonmissingtype(eltype(x[i])), length(x))
+    CT = ntuple(i -> our_nonmissingtype(eltype(x[i])), length(x))
     T = Core.Compiler.return_type(f, Tuple{ntuple(i->Vector{CT[i]}, length(x))...})
     # workaround for SubArray type
     if T <: SubArray
@@ -454,7 +464,7 @@ function _gather_groups(ds, cols, ::Val{T}; mapformats = false, stable = true, t
         else
             v = _columns(ds)[colidx[j]]
         end
-        if nonmissingtype(Core.Compiler.return_type(_f, Tuple{nonmissingtype(eltype(v))})) <: Union{Missing, INTEGERS}
+        if our_nonmissingtype(Core.Compiler.return_type(_f, Tuple{our_nonmissingtype(eltype(v))})) <: Union{Missing, INTEGERS}
 			if threads
 				_minval = hp_minimum(_f, v)
 			else
diff --git a/src/precompile/precompile.jl b/src/precompile/precompile.jl
@@ -546,6 +546,6 @@ function _precompile()
         Base.precompile(Tuple{Core.kwftype(typeof(transpose)),NamedTuple{(:id, :threads), Tuple{Symbol, Bool}},typeof(transpose),Dataset,Vector{Symbol}})
         Base.precompile(Tuple{Core.kwftype(typeof(transpose)),NamedTuple{(:threads,), Tuple{Bool}},typeof(transpose),Dataset,UnitRange{Int64}})
         Base.precompile(Tuple{Core.kwftype(typeof(transpose)),NamedTuple{(:threads,), Tuple{Bool}},typeof(transpose),GroupBy,Vector{Int64}})
-
+        VERSION >= v"1.9" && IMD.warmup()
     return nothing
 end
diff --git a/src/precompile/warmup.jl b/src/precompile/warmup.jl
@@ -173,7 +173,8 @@ function warmup()
     findall(duplicates(ds, :a, mapformats = true)) == 2:12
     unique(ds) == ds1
     unique(ds, 2:3) == ds1
-
+    ds = Dataset(x=[rand(10) for _ in 1:100])
+    flatten!(ds, 1)
     t2 = now()
     Dataset(x1 = "Finished warmup in", x2 = t2-t1)
 end
diff --git a/src/sort/gatherby.jl b/src/sort/gatherby.jl
@@ -276,8 +276,8 @@ function gatherby_mapreduce(gds::GatherBy, f, op, col::ColumnIndex, nt, init, ::
     res
 end
 
-_gatherby_maximum(gds, col; f = identity, nt = Threads.nthreads(), threads = true) = gatherby_mapreduce(gds, f, _stat_max_fun, col, nt, missing, Val(nonmissingtype(eltype(gds.parent[!, col]))), threads = threads)
-_gatherby_minimum(gds, col; f = identity, nt = Threads.nthreads(), threads = true) = gatherby_mapreduce(gds, f, _stat_min_fun, col, nt, missing, Val(nonmissingtype(eltype(gds.parent[!, col]))), threads = threads)
+_gatherby_maximum(gds, col; f = identity, nt = Threads.nthreads(), threads = true) = gatherby_mapreduce(gds, f, _stat_max_fun, col, nt, missing, Val(our_nonmissingtype(eltype(gds.parent[!, col]))), threads = threads)
+_gatherby_minimum(gds, col; f = identity, nt = Threads.nthreads(), threads = true) = gatherby_mapreduce(gds, f, _stat_min_fun, col, nt, missing, Val(our_nonmissingtype(eltype(gds.parent[!, col]))), threads = threads)
 _gatherby_sum(gds, col; f = identity, nt = Threads.nthreads(), threads = true) = gatherby_mapreduce(gds, f, _stat_add_sum, col, nt, missing, Val(typeof(zero(Core.Compiler.return_type(f, Tuple{eltype(gds.parent[!, col])})))), promotetypes = true, threads = threads)
 _gatherby_n(gds, col; nt = Threads.nthreads(), threads = true) = _gatherby_sum(gds, col, f = _stat_notmissing, nt = nt, threads = threads)
 _gatherby_length(gds, col; nt = Threads.nthreads(), threads = true) = _gatherby_sum(gds, col, f = x->1, nt = nt, threads = threads)
@@ -311,7 +311,7 @@ function _gatherby_mean(gds, col; nt = Threads.nthreads(), threads = true)
 		nval = t2
 	end
 
-	T = Core.Compiler.return_type(/, Tuple{nonmissingtype(eltype(sval)), nonmissingtype(eltype(nval))})
+	T = Core.Compiler.return_type(/, Tuple{our_nonmissingtype(eltype(sval)), our_nonmissingtype(eltype(nval))})
 	res = _our_vect_alloc(Union{Missing, T}, length(nval))
 	_fill_gatherby_mean_barrier!(res, sval, nval)
 	res
@@ -367,7 +367,7 @@ function _gatherby_var(gds, col; dof = true, cal_std = false, threads = true)
 		ss = t3
 		nval = t4
 	end
-	T = Core.Compiler.return_type(/, Tuple{nonmissingtype(eltype(meanval)), nonmissingtype(eltype(nval))})
+	T = Core.Compiler.return_type(/, Tuple{our_nonmissingtype(eltype(meanval)), our_nonmissingtype(eltype(nval))})
 	res = _our_vect_alloc(Union{Missing, T}, length(nval))
 	_fill_gatherby_var_barrier!(res, countnan, meanval, ss, nval, cal_std, dof)
 	res
diff --git a/src/sort/sortperm.jl b/src/sort/sortperm.jl
@@ -133,15 +133,15 @@ end
 function _apply_by_f_barrier(x::AbstractVector{T}, by, rev, threads) where T
     needrev = rev
     missat = :right
-    CT = Core.Compiler.return_type(_date_value∘by, Tuple{nonmissingtype(T)})
+    CT = Core.Compiler.return_type(_date_value∘by, Tuple{our_nonmissingtype(T)})
     if CT == Bool
         CT = Int8
     end
     CT = Union{Missing, CT}
     # _temp = Vector{CT}(undef, length(x))
     _temp = _our_vect_alloc(CT, length(x))
     # we should make sure changing sign doesn't overflow
-    if rev && nonmissingtype(CT) <: Union{Bool, Int8, Int16, Int32, Int64} && isless(typemin(nonmissingtype(CT)), threads ? hp_minimum(_date_value∘by, x) : stat_minimum(_date_value∘by, x))
+    if rev && our_nonmissingtype(CT) <: Union{Bool, Int8, Int16, Int32, Int64} && isless(typemin(our_nonmissingtype(CT)), threads ? hp_minimum(_date_value∘by, x) : stat_minimum(_date_value∘by, x))
         _by = x-> -_date_value(by(x))
         needrev = false
         missat = :left
diff --git a/src/stat/hp_stat.jl b/src/stat/hp_stat.jl
@@ -3,7 +3,7 @@ function hp_maximum(f, x::AbstractVector{T}) where {T}
     nt = Threads.nthreads()
     cz = div(n, nt)
     cz == 0 && return stat_maximum(f, x)
-    CT = Core.Compiler.return_type(f, Tuple{nonmissingtype(eltype(x))})
+    CT = Core.Compiler.return_type(f, Tuple{our_nonmissingtype(eltype(x))})
     if T >: Missing
         CT = Union{Missing,CT}
     end
@@ -22,7 +22,7 @@ function hp_minimum(f, x::AbstractVector{T}) where {T}
     nt = Threads.nthreads()
     cz = div(n, nt)
     cz == 0 && return stat_minimum(f, x)
-    CT = Core.Compiler.return_type(f, Tuple{nonmissingtype(eltype(x))})
+    CT = Core.Compiler.return_type(f, Tuple{our_nonmissingtype(eltype(x))})
     if T >: Missing
         CT = Union{Missing,CT}
     end
@@ -41,7 +41,7 @@ function hp_sum(f, x::AbstractVector{T}) where {T}
     nt = Threads.nthreads()
     cz = div(n, nt)
     cz == 0 && return stat_sum(f, x)
-    CT = Core.Compiler.return_type(f, Tuple{nonmissingtype(eltype(x))})
+    CT = Core.Compiler.return_type(f, Tuple{our_nonmissingtype(eltype(x))})
     CT <: Base.SmallSigned ? CT = Int : nothing
     CT <: Base.SmallUnsigned ? CT = UInt : nothing
     CT <: Bool ? CT = Int : nothing
diff --git a/src/stat/non_hp_stat.jl b/src/stat/non_hp_stat.jl
@@ -137,7 +137,7 @@ end
 # this is manual simd version for max(min) function
 function stat_maximum(f::typeof(identity), x::AbstractArray{T,1}; lo=1, hi=length(x)) where {T}
     all(ismissing, view(x, lo:hi)) && return missing
-    _dmiss(x) = ismissing(x) ? typemin(nonmissingtype(T)) : x
+    _dmiss(x) = ismissing(x) ? typemin(our_nonmissingtype(T)) : x
     Base.mapreduce_impl(_dmiss, max, x, lo, hi)
 end
 function stat_maximum(f::F, x::AbstractArray{T,1}; lo=1, hi=length(x)) where {F,T}
@@ -162,7 +162,7 @@ stat_findmax(x::AbstractArray{T,1}) where {T} = stat_findmax(identity, x)
 
 function stat_minimum(f::typeof(identity), x::AbstractArray{T,1}; lo=1, hi=length(x)) where {T}
     all(ismissing, view(x, lo:hi)) && return missing
-    @inline _dmiss(x) = ismissing(x) ? typemax(nonmissingtype(T)) : x
+    @inline _dmiss(x) = ismissing(x) ? typemax(our_nonmissingtype(T)) : x
     Base.mapreduce_impl(_dmiss, min, x, lo, hi)
 end
 function stat_minimum(f::F, x::AbstractArray{T,1}; lo=1, hi=length(x)) where {F,T}
@@ -331,7 +331,7 @@ stat_std(x::AbstractArray{T,1}, dof=true) where {T} = stat_std(identity, x, dof)
 function stat_median(v::AbstractArray{T,1}) where {T}
     isempty(v) && throw(ArgumentError("median of an empty array is undefined, $(repr(v))"))
     all(ismissing, v) && return missing
-    (nonmissingtype(eltype(v)) <: AbstractFloat || nonmissingtype(eltype(v)) >: AbstractFloat) && any(ISNAN, v) && return convert(eltype(v), NaN)
+    (our_nonmissingtype(eltype(v)) <: AbstractFloat || our_nonmissingtype(eltype(v)) >: AbstractFloat) && any(ISNAN, v) && return convert(eltype(v), NaN)
     nmis::Int = mapreduce(ismissing, +, v)
     n = length(v) - nmis
     mid = div(1 + n, 2)
@@ -346,7 +346,7 @@ end
 function stat_median!(v::AbstractArray{T,1}) where {T}
     isempty(v) && throw(ArgumentError("median of an empty array is undefined, $(repr(v))"))
     all(ismissing, v) && return missing
-    (nonmissingtype(eltype(v)) <: AbstractFloat || nonmissingtype(eltype(v)) >: AbstractFloat) && any(ISNAN, v) && return convert(eltype(v), NaN)
+    (our_nonmissingtype(eltype(v)) <: AbstractFloat || our_nonmissingtype(eltype(v)) >: AbstractFloat) && any(ISNAN, v) && return convert(eltype(v), NaN)
     nmis::Int = mapreduce(ismissing, +, v)
     n = length(v) - nmis
     mid = div(1 + n, 2)
diff --git a/test/broadcasting.jl b/test/broadcasting.jl
@@ -133,7 +133,7 @@ end
     end
     ds4 = (x -> ds[1, 1]).(ds)
     @test names(ds4) == names(ds)
-    @test all(isa.(eachcol(ds4), Ref(CategoricalArray)))
+    @test all(isa.(eachcol(ds4), DatasetColumn{Dataset, CategoricalVector{Union{Missing, String}, UInt32, String, CategoricalValue{String, UInt32}, Missing}}))
     @test all(eachcol(ds4) .== Ref(categorical(["a", "a"])))
 
     ds5 = Dataset(x=Any[1, 2, 3], y=Any[1, 2.0, big(3)])
diff --git a/test/constructors.jl b/test/constructors.jl
diff --git a/test/join.jl b/test/join.jl