Skip to content

Commit 6c25393

Browse files
authored
bug fix - correctly assign typeof(eachcol) + handling nonmissingtype (#109)
1 parent 0b6ed9b commit 6c25393

File tree

20 files changed

+79
-69
lines changed

20 files changed

+79
-69
lines changed

src/abstractdataset/abstractdataset.jl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,18 @@ In broadcasting `AbstractDataset` behavior is similar to a `Matrix`.
4242
"""
4343
abstract type AbstractDataset end
4444

45+
abstract type AbstractDatasetColumn end
46+
4547
# DatasetColumn is a representation of a column of data set
4648
# it is wrapped into a new type to make sure that when ever a column is
4749
# selected, the data set is attached to it
48-
struct DatasetColumn{T <: AbstractDataset, E}
50+
struct DatasetColumn{T <: AbstractDataset, E} <: AbstractDatasetColumn
4951
col::Int
5052
ds::T
5153
val::E
5254
end
5355

54-
struct SubDatasetColumn{T <: AbstractDataset, E}
56+
struct SubDatasetColumn{T <: AbstractDataset, E} <: AbstractDatasetColumn
5557
col::Int
5658
ds::T
5759
val::E
@@ -308,7 +310,7 @@ function content(ds::AbstractDataset; output = false)
308310
for i in 1:ncol(ds)
309311
push!(f_v[1], all_names[i])
310312
push!(f_v[2], getformat(ds, i))
311-
push!(f_v[3], nonmissingtype(eltype(ds[!, i])))
313+
push!(f_v[3], our_nonmissingtype(eltype(ds[!, i])))
312314
end
313315
format_ds = Dataset(f_v, [:column, :format, :eltype], copycols = false)
314316
if !output

src/abstractdataset/iteration.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ Base.ndims(::DatasetColumns) = 1
234234
Base.ndims(::Type{<:DatasetColumns}) = 1
235235

236236
Base.length(itr::DatasetColumns) = size(itr)[1]
237-
Base.eltype(::Type{<:DatasetColumns}) = AbstractVector
237+
Base.eltype(::Type{<:DatasetColumns}) = AbstractDatasetColumn
238238

239239
Base.firstindex(itr::DatasetColumns) = 1
240240
Base.lastindex(itr::DatasetColumns) = length(itr)

src/abstractdataset/show.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ function compacttype(T::Type, maxwidth::Int=8)
118118
textwidth(sT) maxwidth && return sT
119119

120120
if T >: Missing
121-
T = nonmissingtype(T)
121+
T = our_nonmissingtype(T)
122122
sT = string(T)
123123
suffix = "?"
124124
textwidth(sT) maxwidth && return sT * suffix
@@ -223,7 +223,7 @@ function _show(io::IO,
223223
alignment_regex_complex = [r"(?<!^)(?<!e)[+-]"]
224224

225225
for i = 1:num_cols
226-
type_i = nonmissingtype(types[i])
226+
type_i = our_nonmissingtype(types[i])
227227

228228
if type_i <: Complex
229229
alignment_anchor_regex[i] = alignment_regex_complex

src/byrow/byrow.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,15 +261,15 @@ end
261261

262262
function byrow(ds::AbstractDataset, f::Function, col::ColumnIndex; threads = nrow(ds)>1000, allowmissing::Bool = true)
263263
if threads
264-
T = Core.Compiler.return_type(f, Tuple{nonmissingtype(eltype(ds[!, col]))})
264+
T = Core.Compiler.return_type(f, Tuple{our_nonmissingtype(eltype(ds[!, col]))})
265265
if allowmissing
266266
res = Vector{Union{Missing, T}}(undef, nrow(ds))
267267
else
268268
res = Vector{T}(undef, nrow(ds))
269269
end
270270
_hp_map_a_function!(res, f, _columns(ds)[index(ds)[col]])
271271
else
272-
T = Core.Compiler.return_type(f, Tuple{nonmissingtype(eltype(ds[!, col]))})
272+
T = Core.Compiler.return_type(f, Tuple{our_nonmissingtype(eltype(ds[!, col]))})
273273
if allowmissing
274274
res = Vector{Union{Missing, T}}(undef, nrow(ds))
275275
else

src/byrow/row_functions.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ function row_sum(ds::AbstractDataset, f::Function, cols = names(ds, Union{Missi
3333
colsidx = multiple_getindex(index(ds), cols)
3434
CT = mapreduce(eltype, promote_type, view(_columns(ds),colsidx))
3535
T = Core.Compiler.return_type(f, Tuple{CT})
36-
CT = nonmissingtype(T)
36+
CT = our_nonmissingtype(T)
3737
CT <: Base.SmallSigned ? CT = Int : nothing
3838
CT <: Base.SmallUnsigned ? CT = UInt : nothing
3939
CT <: Bool ? CT = Int : nothing
@@ -68,7 +68,7 @@ function row_prod(ds::AbstractDataset, f::Function, cols = names(ds, Union{Missi
6868
colsidx = multiple_getindex(index(ds), cols)
6969
CT = mapreduce(eltype, promote_type, view(_columns(ds),colsidx))
7070
T = Core.Compiler.return_type(f, Tuple{CT})
71-
CT = nonmissingtype(T)
71+
CT = our_nonmissingtype(T)
7272
CT <: Base.SmallSigned ? CT = Int : nothing
7373
CT <: Base.SmallUnsigned ? CT = UInt : nothing
7474
CT <: Bool ? CT = Int : nothing

src/dataset/modify.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,7 @@ end
597597

598598
# the number of destination can be smaller or greater than the number of elements of Tuple,
599599
function _modify_multiple_out!(ds, x, dst)
600-
!(nonmissingtype(eltype(x)) <: Tuple) && throw(ArgumentError("to use `splitter`, the source column must be a vector of Tuple"))
600+
!(our_nonmissingtype(eltype(x)) <: Tuple) && throw(ArgumentError("to use `splitter`, the source column must be a vector of Tuple"))
601601
tb = Tables.columntable(x)
602602
for j in 1:length(dst)
603603
try

src/dataset/other.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ function Base.map!(ds::AbstractDataset, f::Vector{<:Function}, cols::MultiColumn
492492
# Core.Compiler.return_type cannot handle the situations like x->ismissing(x) ? 0 : x when x is missing and float, since the output of Core.Compiler.return_type is Union{Missing, Float64, Int64}
493493
# we remove missing and then check the result,
494494
# TODO is there any problem with this?
495-
T = Core.Compiler.return_type(f[j], Tuple{nonmissingtype(CT)})
495+
T = Core.Compiler.return_type(f[j], Tuple{our_nonmissingtype(CT)})
496496
T = Union{Missing, T}
497497
if promote_type(T, CT) <: CT
498498
if threads && DataAPI.refpool(_columns(ds)[colsidx[j]]) === nothing

src/dataset/transpose.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ end
408408

409409
function _fill_outputmat_withoutid(T, in_cols, ds, starts, perms, new_col_names, row_names_length, threads; default_fill = missing)
410410

411-
@assert _check_allocation_limit(nonmissingtype(T), row_names_length*_ngroups(ds), length(new_col_names)) < 1.0 "The output data set is huge and there is not enough resource, check the passed arguments."
411+
@assert _check_allocation_limit(our_nonmissingtype(T), row_names_length*_ngroups(ds), length(new_col_names)) < 1.0 "The output data set is huge and there is not enough resource, check the passed arguments."
412412
CT = promote_type(T, typeof(default_fill))
413413
# outputmat = [__fill!(_our_vect_alloc(CT, row_names_length*_ngroups(ds)), default_fill) for _ in 1:length(new_col_names)]
414414
outputmat = Vector{typeof(_our_vect_alloc(CT, 0))}(undef, length(new_col_names))
@@ -420,7 +420,7 @@ end
420420

421421
function _fill_outputmat_withid(T, in_cols, ds, starts, perms, ids, new_col_names, row_names_length, threads; default_fill = missing)
422422

423-
@assert _check_allocation_limit(nonmissingtype(T), row_names_length*_ngroups(ds), length(new_col_names)) < 1.0 "The output data set is huge and there is not enough resource, check the passed arguments."
423+
@assert _check_allocation_limit(our_nonmissingtype(T), row_names_length*_ngroups(ds), length(new_col_names)) < 1.0 "The output data set is huge and there is not enough resource, check the passed arguments."
424424
CT = promote_type(T, typeof(default_fill))
425425
# outputmat = [fill!(_our_vect_alloc(CT, row_names_length*_ngroups(ds)), default_fill) for _ in 1:length(new_col_names)]
426426
outputmat = Vector{typeof(_our_vect_alloc(CT, 0))}(undef, length(new_col_names))
@@ -787,7 +787,7 @@ function flatten!(ds::Dataset,
787787
for col in 2:length(idxcols)
788788
if mapformats
789789
f_fmt = getformat(ds, idxcols[col])
790-
push!(all_idxcols, byrow(ds, f_fmt, idxcols[col]), threads = threads)
790+
push!(all_idxcols, byrow(ds, f_fmt, idxcols[col], threads = threads))
791791
else
792792
push!(all_idxcols, _columns(ds)[idxcols[col]])
793793
end
@@ -854,7 +854,7 @@ function flatten(ds::AbstractDataset,
854854
for col in 2:length(idxcols)
855855
if mapformats
856856
f_fmt = getformat(ds, idxcols[col])
857-
push!(all_idxcols, byrow(ds, f_fmt, idxcols[col]), threads = threads)
857+
push!(all_idxcols, byrow(ds, f_fmt, idxcols[col], threads = threads))
858858
else
859859
push!(all_idxcols, _columns(ds)[idxcols[col]])
860860
end

src/join/join_dict.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ function _create_dictionary_for_join(f, v, fl, vl, ::Val{T}) where T
6060
maxval = hp_maximum(DataAPI.refarray(v))
6161
rangelen = maxval - minval + 1
6262
_create_dictionary_for_join_int(identity, DataAPI.refarray(v), minval, rangelen, Val(T))
63-
elseif nonmissingtype(return_type(f, v)) <: AbstractVector{<:Union{Missing, INTEGERS}} && nonmissingtype(return_type(fl, vl)) <: AbstractVector{<:Union{Missing, INTEGERS}}
63+
elseif our_nonmissingtype(return_type(f, v)) <: AbstractVector{<:Union{Missing, INTEGERS}} && our_nonmissingtype(return_type(fl, vl)) <: AbstractVector{<:Union{Missing, INTEGERS}}
6464
minval = hp_minimum(f, v)
6565
# if minval is missing all values are missing
6666
if ismissing(minval)
@@ -531,8 +531,8 @@ function _update!_dict(dsl, dsr, ranges, onleft, onright, right_cols, ::Val{T};
531531
for j in 1:length(right_cols)
532532
if haskey(index(dsl).lookup, _names(dsr)[right_cols[j]])
533533
left_cols_idx = index(dsl)[_names(dsr)[right_cols[j]]]
534-
TL = nonmissingtype(eltype(_columns(dsl)[left_cols_idx]))
535-
TR = nonmissingtype(eltype(_columns(dsr)[right_cols[j]]))
534+
TL = our_nonmissingtype(eltype(_columns(dsl)[left_cols_idx]))
535+
TR = our_nonmissingtype(eltype(_columns(dsr)[right_cols[j]]))
536536
if promote_type(TR, TL) <: TL
537537
_update_left_with_right!(_columns(dsl)[left_cols_idx], _columns(dsr)[right_cols[j]], ranges, allowmissing, f_mode, threads = threads, op = op)
538538
end

src/join/update.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ function _update!(dsl::Dataset, dsr::AbstractDataset, ::Val{T}; onleft, onright,
5959
for j in 1:length(right_cols)
6060
if haskey(index(dsl).lookup, _names(dsr)[right_cols[j]])
6161
left_cols_idx = index(dsl)[_names(dsr)[right_cols[j]]]
62-
TL = nonmissingtype(eltype(_columns(dsl)[left_cols_idx]))
63-
TR = nonmissingtype(eltype(_columns(dsr)[right_cols[j]]))
62+
TL = our_nonmissingtype(eltype(_columns(dsl)[left_cols_idx]))
63+
TR = our_nonmissingtype(eltype(_columns(dsr)[right_cols[j]]))
6464
if promote_type(TR, TL) <: TL
6565
_update_left_with_right!(_columns(dsl)[left_cols_idx], view(_columns(dsr)[right_cols[j]], idx), ranges, allowmissing, f_mode, threads = threads, op = op)
6666
end

0 commit comments

Comments
 (0)