Skip to content

Commit e92e409

Browse files
committed
some cleanup
1 parent 8353586 commit e92e409

File tree

1 file changed

+3
-20
lines changed

1 file changed

+3
-20
lines changed

src/sort/gatherby.jl

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,6 @@ function compute_indices(groups, ngroups, ::Val{T}; threads = true) where T
115115
idx, starts
116116
end
117117

118-
# fast combine for gatherby data
119-
120118
mutable struct GatherBy
121119
parent
122120
groupcols
@@ -208,29 +206,14 @@ function gatherby(ds::AbstractDataset, cols::MultiColumnIndex; mapformats::Bool
208206
end
209207
gatherby(ds::AbstractDataset, col::ColumnIndex; mapformats = true, stable = true, isgathered = false, eachrow = false, threads = true) = gatherby(ds, [col], mapformats = mapformats, stable = stable, isgathered = isgathered, eachrow = eachrow, threads = threads)
210208

211-
212-
__SPFRMT(x) = x & 1023
213-
__SPFRMT(::Missing) = missing # not needed
214-
215-
# currently not been used in gatherby
216-
# use sort and format trick for fast gatherby - hm stands for high memory footprint
217-
function hm_gatherby(ds::AbstractDataset, cols::MultiColumnIndex; mapformats = false, threads = true)
218-
modify!(ds, cols=>byrow(hash; threads = threads, mapformats = mapformats)=>:___tmp___cols8934, :___tmp___cols8934=>identity=>:___tmp___cols8934_2)
219-
setformat!(ds, :___tmp___cols8934_2=>__SPFRMT)
220-
gds = groupby(ds, [:___tmp___cols8934_2, :___tmp___cols8934], stable = false, threads = threads)
221-
grpcols, ranges, last_valid_index = _find_starts_of_groups(view(ds, gds.perm, cols), cols, nrow(ds) < typemax(Int32) ? Val(Int32) : Val(Int64); mapformats = mapformats, threads = threads)
222-
select!(ds, Not([:___tmp___cols8934, :___tmp___cols8934_2]))
223-
GatherBy(ds, grpcols, nothing, last_valid_index, mapformats, gds.perm, ranges, _get_lastmodified(_attributes(ds)))
224-
end
225-
226209
function _fill_mapreduce_col!(x, f, op, y, loc)
227210
@inbounds for i in 1:length(y)
228211
x[loc[i]] = op(x[loc[i]], f(y[i]))
229212
end
230213
end
231214

232215
# only for calculating var - mval is a vector of means
233-
function _fill_mapreduce_col!(x, mval::Vector, op, y, loc)
216+
function _fill_mapreduce_col!(x, mval::AbstractVector, op, y, loc)
234217
@inbounds for i in 1:length(y)
235218
x[loc[i]] = op(x[loc[i]], _abs2mean(y[i], mval[loc[i]]))
236219
end
@@ -249,7 +232,7 @@ function _fill_mapreduce_col_threaded!(x, f, op, y, loc, nt)
249232
end
250233

251234
# only for calculating var - mval is a vector of means
252-
function _fill_mapreduce_col_threaded!(x, mval::Vector, op, y, loc, nt)
235+
function _fill_mapreduce_col_threaded!(x, mval::AbstractVector, op, y, loc, nt)
253236
@sync for thid in 0:nt-1
254237
Threads.@spawn for i in 1:length(y)
255238
@inbounds if loc[i] % nt == thid
@@ -383,7 +366,7 @@ const FAST_GATHERBY_REDUCTION = [sum, length, minimum, maximum, mean, var, std,
383366

384367
function _fast_gatherby_reduction(gds, ms)
385368
!(gds isa GatherBy) && return false
386-
gds.groups == nothing && return false
369+
gds.groups === nothing && return false
387370
for i in 1:length(ms)
388371
if (ms[i].second.first isa Expr) && ms[i].second.first.head == :BYROW
389372
elseif (ms[i].second.first isa Base.Callable)

0 commit comments

Comments
 (0)