Skip to content

Commit 67de3e5

Browse files
committed
add a fast path for gatherby
1 parent 050e9fb commit 67de3e5

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

src/other/utils.jl

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,22 @@ function _gather_groups(ds, cols, ::Val{T}; mapformats = false, stable = true, t
429429
colidx = index(ds)[cols]
430430
_max_level = nrow(ds)
431431

432-
432+
# fast path for a common scenario
433+
# PooledVectors are already gathered. However, note stable must be false, since the result is not stable
434+
if (ds isa Dataset) && length(colidx) == 1 && (_columns(ds)[colidx[1]] isa PooledVector) && !stable
435+
_f = _date_value
436+
if mapformats
437+
_f = _date_valuegetformat(ds, colidx[1])
438+
end
439+
if _f == _date_valueidentity || !mapformats
440+
v = DataAPI.refarray(_columns(ds)[colidx[1]])
441+
else
442+
v = DataAPI.refarray(map(_f, _columns(ds)[colidx[1]]))
443+
end
444+
prev_groups = Vector{T}(undef, nrow(ds))
445+
copy!(prev_groups, v)
446+
return prev_groups, T[], threads ? hp_maximum(prev_groups) : stat_maximum(prev_groups)
447+
end
433448
if nrow(ds) > 2^23 && !stable && 5<length(colidx)<16 # the result is stable anyway
434449
if !mapformats || all(==(identity), getformat.(Ref(ds), colidx))
435450
return _gather_groups_hugeds_multicols(ds, cols, Val(T); threads = threads)

0 commit comments

Comments
 (0)