@@ -115,8 +115,6 @@ function compute_indices(groups, ngroups, ::Val{T}; threads = true) where T
115115 idx, starts
116116end
117117
118- # fast combine for gatherby data
119-
120118mutable struct GatherBy
121119 parent
122120 groupcols
@@ -208,29 +206,14 @@ function gatherby(ds::AbstractDataset, cols::MultiColumnIndex; mapformats::Bool
208206end
209207gatherby (ds:: AbstractDataset , col:: ColumnIndex ; mapformats = true , stable = true , isgathered = false , eachrow = false , threads = true ) = gatherby (ds, [col], mapformats = mapformats, stable = stable, isgathered = isgathered, eachrow = eachrow, threads = threads)
210208
211-
212- __SPFRMT (x) = x & 1023
213- __SPFRMT (:: Missing ) = missing # not needed
214-
215- # currently not been used in gatherby
216- # use sort and format trick for fast gatherby - hm stands for high memory footprint
217- function hm_gatherby (ds:: AbstractDataset , cols:: MultiColumnIndex ; mapformats = false , threads = true )
218- modify! (ds, cols=> byrow (hash; threads = threads, mapformats = mapformats)=> :___tmp___cols8934 , :___tmp___cols8934 => identity=> :___tmp___cols8934_2 )
219- setformat! (ds, :___tmp___cols8934_2 => __SPFRMT)
220- gds = groupby (ds, [:___tmp___cols8934_2 , :___tmp___cols8934 ], stable = false , threads = threads)
221- grpcols, ranges, last_valid_index = _find_starts_of_groups (view (ds, gds. perm, cols), cols, nrow (ds) < typemax (Int32) ? Val (Int32) : Val (Int64); mapformats = mapformats, threads = threads)
222- select! (ds, Not ([:___tmp___cols8934 , :___tmp___cols8934_2 ]))
223- GatherBy (ds, grpcols, nothing , last_valid_index, mapformats, gds. perm, ranges, _get_lastmodified (_attributes (ds)))
224- end
225-
226209function _fill_mapreduce_col! (x, f, op, y, loc)
227210 @inbounds for i in 1 : length (y)
228211 x[loc[i]] = op (x[loc[i]], f (y[i]))
229212 end
230213end
231214
232215# only for calculating var - mval is a vector of means
233- function _fill_mapreduce_col! (x, mval:: Vector , op, y, loc)
216+ function _fill_mapreduce_col! (x, mval:: AbstractVector , op, y, loc)
234217 @inbounds for i in 1 : length (y)
235218 x[loc[i]] = op (x[loc[i]], _abs2mean (y[i], mval[loc[i]]))
236219 end
@@ -249,7 +232,7 @@ function _fill_mapreduce_col_threaded!(x, f, op, y, loc, nt)
249232end
250233
251234# only for calculating var - mval is a vector of means
252- function _fill_mapreduce_col_threaded! (x, mval:: Vector , op, y, loc, nt)
235+ function _fill_mapreduce_col_threaded! (x, mval:: AbstractVector , op, y, loc, nt)
253236 @sync for thid in 0 : nt- 1
254237 Threads. @spawn for i in 1 : length (y)
255238 @inbounds if loc[i] % nt == thid
@@ -383,7 +366,7 @@ const FAST_GATHERBY_REDUCTION = [sum, length, minimum, maximum, mean, var, std,
383366
384367function _fast_gatherby_reduction (gds, ms)
385368 ! (gds isa GatherBy) && return false
386- gds. groups == nothing && return false
369+ gds. groups === nothing && return false
387370 for i in 1 : length (ms)
388371 if (ms[i]. second. first isa Expr) && ms[i]. second. first. head == :BYROW
389372 elseif (ms[i]. second. first isa Base. Callable)
0 commit comments