Skip to content

Commit ff5da80

Browse files
authored
fix #93 (#94)
1 parent 71b4ce7 commit ff5da80

File tree

3 files changed

+61
-47
lines changed

3 files changed

+61
-47
lines changed

src/dataset/constructor.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ Dataset(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol},
384384

385385
# Create Dataset
386386
Dataset(column_eltypes::AbstractVector{<:Type}, cnames::AbstractVector{<:AbstractString},
387-
nrows::Integer=0; makeunique::Bool=false) where T<:Type =
387+
nrows::Integer=0; makeunique::Bool=false) =
388388
throw(ArgumentError("`Dataset` constructor with passed eltypes is " *
389389
"not supported. Pass explicitly created columns to a " *
390390
"`Dataset` constructor instead."))

src/sort/qsort.jl

Lines changed: 48 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,21 @@
11

2-
midpoint(lo::T, hi::T) where T<:Integer = lo + ((hi - lo) >>> 0x01)
2+
midpoint(lo::T, hi::T) where {T<:Integer} = lo + ((hi - lo) >>> 0x01)
33
midpoint(lo::Integer, hi::Integer) = midpoint(promote(lo, hi)...)
4-
struct HeapSortAlg <: Base.Sort.Algorithm end
4+
import Base.Sort.Algorithm
5+
struct HeapSortAlg <: Algorithm end
6+
7+
if VERSION >= v"1.9.0-DEV.1635"
8+
struct QuickSortAlg <: Algorithm end # we define it here because Julia 1.9.0-DEV.1635 dropped it / QuickSort = PartialQuickSort{Missing, Missing}
9+
# fall back to QuickSortAlg -
10+
ds_sort!(v, idx::Vector{<:Integer}, lo::Integer, hi::Integer, a::PartialQuickSort{Missing, Missing}, o::Ordering) = ds_sort!(v, idx, lo::Integer, hi, QuickSortAlg(), o)
11+
else
12+
import Base.Sort.QuickSortAlg
13+
end
14+
515
const DEFAULT_UNSTABLE = QuickSort
616
# const DEFAULT_STABLE = MergeSort
7-
const SMALL_ALGORITHM = InsertionSort
8-
const SMALL_THRESHOLD = 20
17+
const SMALL_ALGORITHM = InsertionSort
18+
const SMALL_THRESHOLD = 20
919
const HeapSort = HeapSortAlg()
1020

1121

@@ -65,9 +75,14 @@ function _partition!(v, idx::Vector{<:Integer}, lo::Integer, hi::Integer, o::Ord
6575
# pivot == v[lo], v[hi] > pivot
6676
i, j = lo, hi
6777
@inbounds while true
68-
i += 1; j -= 1
69-
while lt(o, v[i], pivot); i += 1; end;
70-
while lt(o, pivot, v[j]); j -= 1; end;
78+
i += 1
79+
j -= 1
80+
while lt(o, v[i], pivot)
81+
i += 1
82+
end
83+
while lt(o, pivot, v[j])
84+
j -= 1
85+
end
7186
i >= j && break
7287
v[i], v[j] = v[j], v[i]
7388
idx[i], idx[j] = idx[j], idx[i]
@@ -83,17 +98,17 @@ end
8398

8499
function ds_sort!(v, idx::Vector{<:Integer}, lo::Integer, hi::Integer, a::QuickSortAlg, o::Ordering)
85100
@inbounds while lo < hi
86-
hi-lo <= SMALL_THRESHOLD && return ds_sort!(v, idx, lo, hi, SMALL_ALGORITHM, o)
101+
hi - lo <= SMALL_THRESHOLD && return ds_sort!(v, idx, lo, hi, SMALL_ALGORITHM, o)
87102
j = _partition!(v, idx, lo, hi, o)
88-
if j-lo < hi-j
103+
if j - lo < hi - j
89104
# recurse on the smaller chunk
90105
# this is necessary to preserve O(log(n))
91106
# stack space in the worst case (rather than O(n))
92-
lo < (j-1) && ds_sort!(v, idx, lo, j-1, a, o)
93-
lo = j+1
107+
lo < (j - 1) && ds_sort!(v, idx, lo, j - 1, a, o)
108+
lo = j + 1
94109
else
95-
j+1 < hi && ds_sort!(v, idx, j+1, hi, a, o)
96-
hi = j-1
110+
j + 1 < hi && ds_sort!(v, idx, j + 1, hi, a, o)
111+
hi = j - 1
97112
end
98113
end
99114
end
@@ -102,10 +117,10 @@ end
102117
# the assumption is that x[lo:mid] is sorted and x[mid+1:hi] is also sorted,
103118
# the function uses this information to sort x[lo:hi]
104119
# x_cpy is a copy of the x, idx_cpy is a copy of idx
105-
function _sort_two_sorted_half!(x, x_cpy, idx::Vector{<:Integer}, idx_cpy, lo, mid, hi, o; cpy_offset = 0)
120+
function _sort_two_sorted_half!(x, x_cpy, idx::Vector{<:Integer}, idx_cpy, lo, mid, hi, o; cpy_offset=0)
106121
st1 = lo
107122
en1 = mid
108-
st2 = mid+1
123+
st2 = mid + 1
109124
en2 = hi
110125
cnt = lo
111126
@inbounds while true
@@ -141,21 +156,21 @@ function _sort_two_sorted_half!(x, x_cpy, idx::Vector{<:Integer}, idx_cpy, lo, m
141156
end
142157

143158
# to simplify the problem we assume number_of_chunks is 2^n for some n
144-
function _sort_chunks!(x, idx::Vector{<:Integer}, lo, hi, number_of_chunks, a::Base.Sort.Algorithm, o::Ordering)
159+
function _sort_chunks!(x, idx::Vector{<:Integer}, lo, hi, number_of_chunks, a::Algorithm, o::Ordering)
145160
rangelen = hi - lo + 1
146161
st_offset = lo - 1
147162
cz = div(rangelen, number_of_chunks)
148163
en = hi
149164
Threads.@threads for i in 1:number_of_chunks
150-
ds_sort!(x, idx, (i-1)*cz+1+st_offset, i*cz+st_offset, a, o)
165+
ds_sort!(x, idx, (i - 1) * cz + 1 + st_offset, i * cz + st_offset, a, o)
151166
end
152167
# take care of the last few observations
153-
if number_of_chunks*div(rangelen, number_of_chunks)+st_offset < en
154-
ds_sort!(x, idx, number_of_chunks*div(rangelen, number_of_chunks)+1+st_offset, en, a, o)
168+
if number_of_chunks * div(rangelen, number_of_chunks) + st_offset < en
169+
ds_sort!(x, idx, number_of_chunks * div(rangelen, number_of_chunks) + 1 + st_offset, en, a, o)
155170
end
156171
end
157172

158-
function _sort_multi_sorted_chunk!(x, idx::Vector{<:Integer}, lo, hi, number_of_chunks, a::Base.Sort.Algorithm, o::Ordering)
173+
function _sort_multi_sorted_chunk!(x, idx::Vector{<:Integer}, lo, hi, number_of_chunks, a::Algorithm, o::Ordering)
159174
rangelen = hi - lo + 1
160175
st_offset = lo - 1
161176
cz = div(rangelen, number_of_chunks)
@@ -165,29 +180,29 @@ function _sort_multi_sorted_chunk!(x, idx::Vector{<:Integer}, lo, hi, number_of_
165180
idx_cpy = idx[lo:hi]
166181
while true
167182
Threads.@threads for i in 1:2:current_numberof_chunks
168-
_sort_two_sorted_half!(x, x_cpy, idx, idx_cpy, (i-1)*cz+1+st_offset, i*cz+st_offset, (i+1)*cz+st_offset, o; cpy_offset = lo-1)
183+
_sort_two_sorted_half!(x, x_cpy, idx, idx_cpy, (i - 1) * cz + 1 + st_offset, i * cz + st_offset, (i + 1) * cz + st_offset, o; cpy_offset=lo - 1)
169184
end
170185
cz *= 2
171-
current_numberof_chunks = current_numberof_chunks >> 1
186+
current_numberof_chunks = current_numberof_chunks >> 1
172187
current_numberof_chunks < 2 && break
173188
copyto!(x_cpy, 1, x, lo, rangelen)
174189
copyto!(idx_cpy, 1, idx, lo, rangelen)
175190
end
176191
# take care of the last few (less than number_of_chunks) observations
177-
if number_of_chunks*div(rangelen, number_of_chunks)+st_offset < en
192+
if number_of_chunks * div(rangelen, number_of_chunks) + st_offset < en
178193
copyto!(x_cpy, 1, x, lo, rangelen)
179194
copyto!(idx_cpy, 1, idx, lo, rangelen)
180-
_sort_two_sorted_half!(x, x_cpy, idx, idx_cpy, lo, number_of_chunks*div(rangelen, number_of_chunks)+st_offset, en, o; cpy_offset = lo-1)
195+
_sort_two_sorted_half!(x, x_cpy, idx, idx_cpy, lo, number_of_chunks * div(rangelen, number_of_chunks) + st_offset, en, o; cpy_offset=lo - 1)
181196
end
182197
end
183198

184199
# sorting a vector using parallel quick sort
185200
# it uses a simple algorithm for doing this, and to make it even simpler the number of threads must be in the form of 2^n
186-
function hp_ds_sort!(x, idx, a::Base.Sort.Algorithm, o::Ordering; lo = 1, hi = length(x))
201+
function hp_ds_sort!(x, idx, a::Algorithm, o::Ordering; lo=1, hi=length(x))
187202
cpucnt = Threads.nthreads()
188203
@assert cpucnt >= 2 "we need at least 2 cpus for parallel sorting"
189-
cpucnt = 2 ^ floor(Int, log2(cpucnt))
190-
_sort_chunks!(x , idx, lo, hi, cpucnt, a, o)
204+
cpucnt = 2^floor(Int, log2(cpucnt))
205+
_sort_chunks!(x, idx, lo, hi, cpucnt, a, o)
191206
_sort_multi_sorted_chunk!(x, idx, lo, hi, cpucnt, a, o)
192207
end
193208

@@ -200,7 +215,7 @@ heapright(i::Integer) = 2i + 1
200215
heapparent(i::Integer) = div(i, 2)
201216

202217
# Binary min-heap percolate down.
203-
function percolate_down!(xs::AbstractArray, idx, i::Integer, x=xs[i], idval = idx[i], o::Ordering=Forward, len::Integer=length(xs))
218+
function percolate_down!(xs::AbstractArray, idx, i::Integer, x=xs[i], idval=idx[i], o::Ordering=Forward, len::Integer=length(xs))
204219
@inbounds while (l = heapleft(i)) <= len
205220
r = heapright(i)
206221
j = r > len || lt(o, xs[l], xs[r]) ? l : r
@@ -226,7 +241,7 @@ function heapify!(xs::AbstractArray, idx, o::Ordering=Forward)
226241
end
227242

228243
function ds_sort!(v::AbstractVector, idx::AbstractVector{<:Integer}, lo::Integer, hi::Integer, a::HeapSortAlg, o::Ordering)
229-
hi-lo <= SMALL_THRESHOLD && return ds_sort!(v, idx, lo, hi, SMALL_ALGORITHM, o)
244+
hi - lo <= SMALL_THRESHOLD && return ds_sort!(v, idx, lo, hi, SMALL_ALGORITHM, o)
230245
if lo > 1 || hi < length(v)
231246
return ds_sort!(view(v, lo:hi), view(idx, lo:hi), 1, length(v), a, o)
232247
end
@@ -240,7 +255,7 @@ function ds_sort!(v::AbstractVector, idx::AbstractVector{<:Integer}, lo::Integer
240255
idx[i] = idx[1]
241256
# The heap portion now ends at position i-1, but needs fixing up
242257
# starting with the root
243-
percolate_down!(v, idx, 1, x, idxval, r, i-1)
258+
percolate_down!(v, idx, 1, x, idxval, r, i - 1)
244259
end
245260
v
246261
end
@@ -269,8 +284,8 @@ function heapify2!(xs::AbstractArray, o::Ordering=Forward)
269284
return xs
270285
end
271286

272-
function Base.sort!(v::AbstractVector, lo::Integer, hi::Integer, a::HeapSortAlg, o::Ordering = Forward)
273-
hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
287+
function Base.sort!(v::AbstractVector, lo::Integer, hi::Integer, a::HeapSortAlg, o::Ordering=Forward)
288+
hi - lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
274289
if lo > 1 || hi < length(v)
275290
return sort!(view(v, lo:hi), 1, length(v), a, o)
276291
end
@@ -282,7 +297,7 @@ function Base.sort!(v::AbstractVector, lo::Integer, hi::Integer, a::HeapSortAlg,
282297
v[i] = v[1]
283298
# The heap portion now ends at position i-1, but needs fixing up
284299
# starting with the root
285-
percolate_down2!(v, 1, x, r, i-1)
300+
percolate_down2!(v, 1, x, r, i - 1)
286301
end
287302
v
288303
end

src/sort/util.jl

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
using .Base.Order
2-
import Base.Sort.QuickSortAlg
2+
# import Base.Sort.QuickSortAlg # julia >= 1.9.0-DEV.1635 dropped QuickSortAlg
33
import Base.Sort.InsertionSortAlg
4-
import Base.Sort.MergeSort
4+
import Base.Sort.MergeSort
55
import Base.Order.lt
6-
76
using .Base: sub_with_overflow, add_with_overflow
87

98
trunc2int(x) = unsafe_trunc(Int, x)
109
trunc2int(::Missing) = missing
1110
_is_intable(x) = (typemin(Int) <= x <= typemax(Int)) && (round(x, RoundToZero) == x)
1211
_is_intable(::Missing) = true
1312
# x is sorted based on o
14-
function _fill_starts!(ranges, x, rangescpy, last_valid_range, o::Ordering, ::Val{T}) where T
13+
function _fill_starts!(ranges, x, rangescpy, last_valid_range, o::Ordering, ::Val{T}) where {T}
1514

1615
cnt = 1
1716
st = 1
@@ -22,13 +21,13 @@ function _fill_starts!(ranges, x, rangescpy, last_valid_range, o::Ordering, ::Va
2221
j == last_valid_range ? hi = length(x) : hi = rangescpy[j+1] - 1
2322
cnt = _find_blocks_sorted!(ranges, x, lo, hi, cnt, o, Val(T))
2423
end
25-
@inbounds for j in 1:(cnt - 1)
24+
@inbounds for j in 1:(cnt-1)
2625
rangescpy[j] = ranges[j]
2726
end
2827
return cnt - 1
2928
end
3029

31-
function _find_blocks_sorted!(ranges, x, lo, hi, cnt, o::Ordering, ::Val{T}) where T
30+
function _find_blocks_sorted!(ranges, x, lo, hi, cnt, o::Ordering, ::Val{T}) where {T}
3231
n = hi - lo + 1
3332
counter = 0
3433
st::T = lo
@@ -49,9 +48,9 @@ function _find_blocks_sorted!(ranges, x, lo, hi, cnt, o::Ordering, ::Val{T}) whe
4948
if counter > div(n, 2)
5049
# ranges[cnt] = st
5150
# cnt += 1
52-
for i in st:hi - 1
51+
for i in st:hi-1
5352
if !isequal(x[i], x[i+1])
54-
# if lt(o, x[i], x[i+1])
53+
# if lt(o, x[i], x[i+1])
5554
ranges[cnt] = i + 1
5655
cnt += 1
5756
end
@@ -62,7 +61,7 @@ function _find_blocks_sorted!(ranges, x, lo, hi, cnt, o::Ordering, ::Val{T}) whe
6261
end
6362

6463
# inbits is zeros(Bool, length(x))
65-
function _fill_starts_v2!(ranges, inbits, x, last_valid_range, o::Ordering, ::Val{T}; threads = true) where T
64+
function _fill_starts_v2!(ranges, inbits, x, last_valid_range, o::Ordering, ::Val{T}; threads=true) where {T}
6665
# first split x to chunks
6766
# if last_valid_range == 1
6867
# @error "not yet implemented"
@@ -90,10 +89,10 @@ function _fill_starts_v2!(ranges, inbits, x, last_valid_range, o::Ordering, ::Va
9089
cnt - 1
9190
end
9291

93-
function _mark_start_of_groups_sorted!(inbits, x, lo, hi, o, ::Val{T}) where T
92+
function _mark_start_of_groups_sorted!(inbits, x, lo, hi, o, ::Val{T}) where {T}
9493
n = hi - lo + 1
9594
n == 1 && return
96-
cp = ceil(Int, n/log2(n))
95+
cp = ceil(Int, n / log2(n))
9796
# cp = div(n,2)
9897
counter = 0
9998
st::T = lo
@@ -111,8 +110,8 @@ function _mark_start_of_groups_sorted!(inbits, x, lo, hi, o, ::Val{T}) where T
111110
if counter > cp
112111
# ranges[cnt] = st
113112
# cnt += 1
114-
for i in st:hi - 1
115-
!isequal(x[i], x[i+1]) ? inbits[i + 1] = true : nothing
113+
for i in st:hi-1
114+
!isequal(x[i], x[i+1]) ? inbits[i+1] = true : nothing
116115
end
117116
break
118117
end

0 commit comments

Comments
 (0)