11"""
2- SubDataset{<:AbstractDataset , <:AbstractIndex, <:AbstractVector{Int}} <: AbstractDataset
2+ SubDataset{<:Dataset , <:AbstractIndex, <:AbstractVector{Int}} <: Dataset
33
4- A view of an `AbstractDataset `. It is returned by a call to the `view` function
5- on an `AbstractDataset ` if a collections of rows and columns are specified.
4+ A view of a `Dataset `. It is returned by a call to the `view` function
5+ on an `Dataset ` if a collections of rows and columns are specified.
66
7- A `SubDataset` is an `AbstractDataset`, so expect that most
8- Dataset functions should work. Such methods include `describe`,
9- `summary`, `nrow`, `size`, `by`, `stack`, and `join`.
10-
11- If the selection of columns in a parent data frame is passed as `:` (a colon)
12- then `SubDataset` will always have all columns from the parent,
13- even if they are added or removed after its creation.
7+ View of a data set preserves the `format` of columns.
148
159# Examples
1610```jldoctest
17- julia> df = Dataset(a = repeat([1, 2, 3, 4], outer=[2]),
18- b = repeat([2, 1], outer=[4]),
19- c = 1:8)
11+ julia> ds = Dataset(a = repeat([1, 2, 3, 4], outer=[2]),
12+ b = repeat([2, 1], outer=[4]),
13+ c = 1:8)
20148×3 Dataset
21- Row │ a b c
22- │ Int64 Int64 Int64
23- ─────┼─────────────────────
24- 1 │ 1 2 1
25- 2 │ 2 1 2
26- 3 │ 3 2 3
27- 4 │ 4 1 4
28- 5 │ 1 2 5
29- 6 │ 2 1 6
30- 7 │ 3 2 7
31- 8 │ 4 1 8
32-
33- julia> sdf1 = view(df, :, 2:3) # column subsetting
15+ Row │ a b c
16+ │ identity identity identity
17+ │ Int64? Int64? Int64?
18+ ─────┼──────────────────────────────
19+ 1 │ 1 2 1
20+ 2 │ 2 1 2
21+ 3 │ 3 2 3
22+ 4 │ 4 1 4
23+ 5 │ 1 2 5
24+ 6 │ 2 1 6
25+ 7 │ 3 2 7
26+ 8 │ 4 1 8
27+
28+ julia> sds1 = view(ds, :, 2:3) # column subsetting
29+ 8×2 SubDataset
30+ Row │ b c
31+ │ identity identity
32+ │ Int64? Int64?
33+ ────┼────────────────────
34+ 1 │ 2 1
35+ 2 │ 1 2
36+ 3 │ 2 3
37+ 4 │ 1 4
38+ 5 │ 2 5
39+ 6 │ 1 6
40+ 7 │ 2 7
41+ 8 │ 1 8
42+
43+ julia> sds2 = @view ds[end:-1:1, [1, 3]]
34448×2 SubDataset
35- Row │ b c
36- │ Int64 Int64
37- ─────┼──────────────
38- 1 │ 2 1
39- 2 │ 1 2
40- 3 │ 2 3
41- 4 │ 1 4
42- 5 │ 2 5
43- 6 │ 1 6
44- 7 │ 2 7
45- 8 │ 1 8
46-
47- julia> sdf2 = @view df[end:-1:1, [1, 3]] # row and column subsetting
45+ Row │ a c
46+ │ identity identity
47+ │ Int64? Int64?
48+ ────┼────────────────────
49+ 1 │ 4 8
50+ 2 │ 3 7
51+ 3 │ 2 6
52+ 4 │ 1 5
53+ 5 │ 4 4
54+ 6 │ 3 3
55+ 7 │ 2 2
56+ 8 │ 1 1
57+
58+ julia> setformat!(ds, 1=>iseven)
59+ 8×3 Dataset
60+ Row │ a b c
61+ │ iseven identity identity
62+ │ Int64? Int64? Int64?
63+ ─────┼────────────────────────────
64+ 1 │ false 2 1
65+ 2 │ true 1 2
66+ 3 │ false 2 3
67+ 4 │ true 1 4
68+ 5 │ false 2 5
69+ 6 │ true 1 6
70+ 7 │ false 2 7
71+ 8 │ true 1 8
72+
73+ julia> view(ds, 8:-1:1, [1,3])
48748×2 SubDataset
49- Row │ a c
50- │ Int64 Int64
51- ─────┼──────────────
52- 1 │ 4 8
53- 2 │ 3 7
54- 3 │ 2 6
55- 4 │ 1 5
56- 5 │ 4 4
57- 6 │ 3 3
58- 7 │ 2 2
59- 8 │ 1 1
60-
61- julia> sdf3 = groupby(df, :a)[1] # indexing a GroupedDataset returns a SubDataset
62- 2×3 SubDataset
63- Row │ a b c
64- │ Int64 Int64 Int64
65- ─────┼─────────────────────
66- 1 │ 1 2 1
67- 2 │ 1 2 5
75+ Row │ a c
76+ │ iseven identity
77+ │ Int64? Int64?
78+ ─────┼──────────────────
79+ 1 │ true 8
80+ 2 │ false 7
81+ 3 │ true 6
82+ 4 │ false 5
83+ 5 │ true 4
84+ 6 │ false 3
85+ 7 │ true 2
86+ 8 │ false 1
6887```
6988"""
7089struct SubDataset{D<: AbstractDataset , S<: AbstractIndex , T<: AbstractVector{Int} } <: AbstractDataset
7190 parent:: D
7291 colindex:: S
73- rows:: T # maps from subdf row indexes to parent row indexes
92+ rows:: T # maps from subds row indexes to parent row indexes
7493end
7594
7695_attributes (sds:: SubDataset ) = getfield (parent (sds), :attributes )
7796
7897# Experimental
7998function _columns (sds:: SubDataset )
8099 allcols = AbstractArray[]
81- for j in 1 : ncol (parent (sds))
82- push! (allcols, view (_columns (parent (sds))[j], rows (sds)))
100+ colsidx = parentcols (index (sds))
101+ for j in 1 : length (colsidx)
102+ push! (allcols, view (_columns (parent (sds))[colsidx[j]], rows (sds)))
83103 end
84104 allcols
85105end
@@ -148,9 +168,9 @@ function _getformats_for_show(ds::SubDataset)
148168end
149169
150170
151- rows (sdf :: SubDataset ) = getfield (sdf , :rows )
152- Base. parent (sdf :: SubDataset ) = getfield (sdf , :parent )
153- Base. parentindices (sdf :: SubDataset ) = (rows (sdf ), parentcols (index (sdf )))
171+ rows (sds :: SubDataset ) = getfield (sds , :rows )
172+ Base. parent (sds :: SubDataset ) = getfield (sds , :parent )
173+ Base. parentindices (sds :: SubDataset ) = (rows (sds ), parentcols (index (sds )))
154174
155175function Base. view (ds:: Dataset , rowinds, colind:: ColumnIndex )
156176 idx = index (ds)[colind]
@@ -184,7 +204,7 @@ Base.@propagate_inbounds Base.view(ads::AbstractDataset, rowinds::typeof(!),
184204 SubDataset (ads, :, colinds)
185205Base. @propagate_inbounds Base. view (ads:: AbstractDataset , rowinds:: Not ,
186206 colinds:: MultiColumnIndex ) =
187- SubDataset (ads, axes (adf , 1 )[rowinds], colinds)
207+ SubDataset (ads, axes (ads , 1 )[rowinds], colinds)
188208
189209# #############################################################################
190210# #
@@ -216,9 +236,9 @@ Base.@propagate_inbounds Base.getindex(sds::SubDataset, rowinds::Union{AbstractV
216236Base. @propagate_inbounds Base. getindex (sds:: SubDataset , :: Colon ,
217237 colinds:: MultiColumnIndex ) =
218238 parent (sds)[rows (sds), parentcols (index (sds), colinds)]
219- Base. @propagate_inbounds Base. getindex (df :: SubDataset , row_ind:: typeof (! ),
239+ Base. @propagate_inbounds Base. getindex (ds :: SubDataset , row_ind:: typeof (! ),
220240 col_inds:: MultiColumnIndex ) =
221- view (df , :, col_inds)
241+ view (ds , :, col_inds)
222242
223243
224244Base. @propagate_inbounds function Base. setindex! (sds:: SubDataset , val:: Any , idx:: CartesianIndex{2} )
@@ -240,11 +260,11 @@ Base.@propagate_inbounds Base.setindex!(sds::SubDataset, val::Any, rowinds::Bool
240260
241261Base. setproperty! (:: SubDataset , :: Symbol , :: Any ) =
242262 throw (ArgumentError (" Replacing or adding of columns of a SubDataset is not allowed. " *
243- " Instead use `df [:, col_ind] = v` or `df [:, col_ind] .= v` " *
263+ " Instead use `ds [:, col_ind] = v` or `ds [:, col_ind] .= v` " *
244264 " to perform an in-place assignment." ))
245265Base. setproperty! (:: SubDataset , :: AbstractString , :: Any ) =
246266 throw (ArgumentError (" Replacing or adding of columns of a SubDataset is not allowed. " *
247- " Instead use `df [:, col_ind] = v` or `df [:, col_ind] .= v` " *
267+ " Instead use `ds [:, col_ind] = v` or `ds [:, col_ind] .= v` " *
248268 " to perform an in-place assignment." ))
249269
250270# #############################################################################
@@ -258,14 +278,11 @@ Base.copy(sds::SubDataset) = parent(sds)[rows(sds), parentcols(index(sds), :)]
258278Base. delete! (ds:: SubDataset , ind) =
259279 throw (ArgumentError (" SubDataset does not support deleting rows" ))
260280
261- function Dataset (sds:: SubDataset ; copycols:: Bool = true )
262- if copycols
263- sds[:, :]
264- else
265- newds = Dataset (collect (eachcol (sds)), Index (parent (index (sds)). lookup, parent (index (sds)). names, parent (index (sds)). format), copycols= false )
266- setinfo! (newds, _attributes (sds). meta. info[])
267- newds
268- end
281+ function Dataset (sds:: SubDataset )
282+
283+ newds = sds[:, :]
284+ setinfo! (newds, _attributes (sds). meta. info[])
285+ newds
269286end
270287
271288Base. convert (:: Type{Dataset} , sds:: SubDataset ) = Dataset (sds)
0 commit comments