Skip to content

Commit 80988aa

Browse files
committed
bug fix
1 parent b67f35d commit 80988aa

File tree

1 file changed

+94
-77
lines changed

1 file changed

+94
-77
lines changed

src/subdataset/subdataset.jl

Lines changed: 94 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,85 +1,105 @@
11
"""
2-
SubDataset{<:AbstractDataset, <:AbstractIndex, <:AbstractVector{Int}} <: AbstractDataset
2+
SubDataset{<:Dataset, <:AbstractIndex, <:AbstractVector{Int}} <: Dataset
33
4-
A view of an `AbstractDataset`. It is returned by a call to the `view` function
5-
on an `AbstractDataset` if a collections of rows and columns are specified.
4+
A view of a `Dataset`. It is returned by a call to the `view` function
5+
on an `Dataset` if a collections of rows and columns are specified.
66
7-
A `SubDataset` is an `AbstractDataset`, so expect that most
8-
Dataset functions should work. Such methods include `describe`,
9-
`summary`, `nrow`, `size`, `by`, `stack`, and `join`.
10-
11-
If the selection of columns in a parent data frame is passed as `:` (a colon)
12-
then `SubDataset` will always have all columns from the parent,
13-
even if they are added or removed after its creation.
7+
View of a data set preserves the `format` of columns.
148
159
# Examples
1610
```jldoctest
17-
julia> df = Dataset(a = repeat([1, 2, 3, 4], outer=[2]),
18-
b = repeat([2, 1], outer=[4]),
19-
c = 1:8)
11+
julia> ds = Dataset(a = repeat([1, 2, 3, 4], outer=[2]),
12+
b = repeat([2, 1], outer=[4]),
13+
c = 1:8)
2014
8×3 Dataset
21-
Row │ a b c
22-
│ Int64 Int64 Int64
23-
─────┼─────────────────────
24-
1 │ 1 2 1
25-
2 │ 2 1 2
26-
3 │ 3 2 3
27-
4 │ 4 1 4
28-
5 │ 1 2 5
29-
6 │ 2 1 6
30-
7 │ 3 2 7
31-
8 │ 4 1 8
32-
33-
julia> sdf1 = view(df, :, 2:3) # column subsetting
15+
Row │ a b c
16+
│ identity identity identity
17+
│ Int64? Int64? Int64?
18+
─────┼──────────────────────────────
19+
1 │ 1 2 1
20+
2 │ 2 1 2
21+
3 │ 3 2 3
22+
4 │ 4 1 4
23+
5 │ 1 2 5
24+
6 │ 2 1 6
25+
7 │ 3 2 7
26+
8 │ 4 1 8
27+
28+
julia> sds1 = view(ds, :, 2:3) # column subsetting
29+
8×2 SubDataset
30+
Row │ b c
31+
│ identity identity
32+
│ Int64? Int64?
33+
────┼────────────────────
34+
1 │ 2 1
35+
2 │ 1 2
36+
3 │ 2 3
37+
4 │ 1 4
38+
5 │ 2 5
39+
6 │ 1 6
40+
7 │ 2 7
41+
8 │ 1 8
42+
43+
julia> sds2 = @view ds[end:-1:1, [1, 3]]
3444
8×2 SubDataset
35-
Row │ b c
36-
│ Int64 Int64
37-
─────┼──────────────
38-
1 │ 2 1
39-
2 │ 1 2
40-
3 │ 2 3
41-
4 │ 1 4
42-
5 │ 2 5
43-
6 │ 1 6
44-
7 │ 2 7
45-
8 │ 1 8
46-
47-
julia> sdf2 = @view df[end:-1:1, [1, 3]] # row and column subsetting
45+
Row │ a c
46+
│ identity identity
47+
│ Int64? Int64?
48+
────┼────────────────────
49+
1 │ 4 8
50+
2 │ 3 7
51+
3 │ 2 6
52+
4 │ 1 5
53+
5 │ 4 4
54+
6 │ 3 3
55+
7 │ 2 2
56+
8 │ 1 1
57+
58+
julia> setformat!(ds, 1=>iseven)
59+
8×3 Dataset
60+
Row │ a b c
61+
│ iseven identity identity
62+
│ Int64? Int64? Int64?
63+
─────┼────────────────────────────
64+
1 │ false 2 1
65+
2 │ true 1 2
66+
3 │ false 2 3
67+
4 │ true 1 4
68+
5 │ false 2 5
69+
6 │ true 1 6
70+
7 │ false 2 7
71+
8 │ true 1 8
72+
73+
julia> view(ds, 8:-1:1, [1,3])
4874
8×2 SubDataset
49-
Row │ a c
50-
│ Int64 Int64
51-
─────┼──────────────
52-
1 │ 4 8
53-
2 │ 3 7
54-
3 │ 2 6
55-
4 │ 1 5
56-
5 │ 4 4
57-
6 │ 3 3
58-
7 │ 2 2
59-
8 │ 1 1
60-
61-
julia> sdf3 = groupby(df, :a)[1] # indexing a GroupedDataset returns a SubDataset
62-
2×3 SubDataset
63-
Row │ a b c
64-
│ Int64 Int64 Int64
65-
─────┼─────────────────────
66-
1 │ 1 2 1
67-
2 │ 1 2 5
75+
Row │ a c
76+
│ iseven identity
77+
│ Int64? Int64?
78+
─────┼──────────────────
79+
1 │ true 8
80+
2 │ false 7
81+
3 │ true 6
82+
4 │ false 5
83+
5 │ true 4
84+
6 │ false 3
85+
7 │ true 2
86+
8 │ false 1
6887
```
6988
"""
7089
struct SubDataset{D<:AbstractDataset, S<:AbstractIndex, T<:AbstractVector{Int}} <: AbstractDataset
7190
parent::D
7291
colindex::S
73-
rows::T # maps from subdf row indexes to parent row indexes
92+
rows::T # maps from subds row indexes to parent row indexes
7493
end
7594

7695
_attributes(sds::SubDataset) = getfield(parent(sds), :attributes)
7796

7897
# Experimental
7998
function _columns(sds::SubDataset)
8099
allcols = AbstractArray[]
81-
for j in 1:ncol(parent(sds))
82-
push!(allcols, view(_columns(parent(sds))[j], rows(sds)))
100+
colsidx = parentcols(index(sds))
101+
for j in 1:length(colsidx)
102+
push!(allcols, view(_columns(parent(sds))[colsidx[j]], rows(sds)))
83103
end
84104
allcols
85105
end
@@ -148,9 +168,9 @@ function _getformats_for_show(ds::SubDataset)
148168
end
149169

150170

151-
rows(sdf::SubDataset) = getfield(sdf, :rows)
152-
Base.parent(sdf::SubDataset) = getfield(sdf, :parent)
153-
Base.parentindices(sdf::SubDataset) = (rows(sdf), parentcols(index(sdf)))
171+
rows(sds::SubDataset) = getfield(sds, :rows)
172+
Base.parent(sds::SubDataset) = getfield(sds, :parent)
173+
Base.parentindices(sds::SubDataset) = (rows(sds), parentcols(index(sds)))
154174

155175
function Base.view(ds::Dataset, rowinds, colind::ColumnIndex)
156176
idx = index(ds)[colind]
@@ -184,7 +204,7 @@ Base.@propagate_inbounds Base.view(ads::AbstractDataset, rowinds::typeof(!),
184204
SubDataset(ads, :, colinds)
185205
Base.@propagate_inbounds Base.view(ads::AbstractDataset, rowinds::Not,
186206
colinds::MultiColumnIndex) =
187-
SubDataset(ads, axes(adf, 1)[rowinds], colinds)
207+
SubDataset(ads, axes(ads, 1)[rowinds], colinds)
188208

189209
##############################################################################
190210
##
@@ -216,9 +236,9 @@ Base.@propagate_inbounds Base.getindex(sds::SubDataset, rowinds::Union{AbstractV
216236
Base.@propagate_inbounds Base.getindex(sds::SubDataset, ::Colon,
217237
colinds::MultiColumnIndex) =
218238
parent(sds)[rows(sds), parentcols(index(sds), colinds)]
219-
Base.@propagate_inbounds Base.getindex(df::SubDataset, row_ind::typeof(!),
239+
Base.@propagate_inbounds Base.getindex(ds::SubDataset, row_ind::typeof(!),
220240
col_inds::MultiColumnIndex) =
221-
view(df, :, col_inds)
241+
view(ds, :, col_inds)
222242

223243

224244
Base.@propagate_inbounds function Base.setindex!(sds::SubDataset, val::Any, idx::CartesianIndex{2})
@@ -240,11 +260,11 @@ Base.@propagate_inbounds Base.setindex!(sds::SubDataset, val::Any, rowinds::Bool
240260

241261
Base.setproperty!(::SubDataset, ::Symbol, ::Any) =
242262
throw(ArgumentError("Replacing or adding of columns of a SubDataset is not allowed. " *
243-
"Instead use `df[:, col_ind] = v` or `df[:, col_ind] .= v` " *
263+
"Instead use `ds[:, col_ind] = v` or `ds[:, col_ind] .= v` " *
244264
"to perform an in-place assignment."))
245265
Base.setproperty!(::SubDataset, ::AbstractString, ::Any) =
246266
throw(ArgumentError("Replacing or adding of columns of a SubDataset is not allowed. " *
247-
"Instead use `df[:, col_ind] = v` or `df[:, col_ind] .= v` " *
267+
"Instead use `ds[:, col_ind] = v` or `ds[:, col_ind] .= v` " *
248268
"to perform an in-place assignment."))
249269

250270
##############################################################################
@@ -258,14 +278,11 @@ Base.copy(sds::SubDataset) = parent(sds)[rows(sds), parentcols(index(sds), :)]
258278
Base.delete!(ds::SubDataset, ind) =
259279
throw(ArgumentError("SubDataset does not support deleting rows"))
260280

261-
function Dataset(sds::SubDataset; copycols::Bool=true)
262-
if copycols
263-
sds[:, :]
264-
else
265-
newds = Dataset(collect(eachcol(sds)), Index(parent(index(sds)).lookup, parent(index(sds)).names, parent(index(sds)).format), copycols=false)
266-
setinfo!(newds, _attributes(sds).meta.info[])
267-
newds
268-
end
281+
function Dataset(sds::SubDataset)
282+
283+
newds = sds[:, :]
284+
setinfo!(newds, _attributes(sds).meta.info[])
285+
newds
269286
end
270287

271288
Base.convert(::Type{Dataset}, sds::SubDataset) = Dataset(sds)

0 commit comments

Comments
 (0)