Skip to content

Commit 32ee4fa

Browse files
committed
fix #102 check consistency of grouped and sub datasets
1 parent a066c07 commit 32ee4fa

File tree

5 files changed

+42
-15
lines changed

5 files changed

+42
-15
lines changed

src/dataset/getindex.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ function _check_consistency(ds::Dataset)
6161
end
6262

6363
function _check_consistency(ds::AbstractDataset)
64+
# FIXME We should check the created date of sub-data, however, it is not working in some situations, e.g. modify!(sds, ...)
65+
# TODO However, We should add this whenever it is possible : getfield(ds, :created) == _get_lastmodified(_attributes(parent(ds)))
6466
if ds isa SubDataset
6567
@assert length(index(ds).remap) == length(index(parent(ds))) "The parent data set which this view is based on, has been modified. To fix the issue recreate the view"
6668
end

src/other/broadcasting.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ function Base.Broadcast.broadcast_unalias(dest, src::AbstractDataset)
226226
if src isa SubDataset
227227
if !wascopied
228228
src = SubDataset(_our_copy(parent(src), copycols=false),
229-
index(src), rows(src))
229+
index(src), rows(src), _get_lastmodified(_attributes(parent(src))))
230230
end
231231
parentidx = parentcols(index(src), i)
232232
parent(src)[!, parentidx] = Base.unaliascopy(_columns(parent(src))[parentidx])
@@ -254,7 +254,7 @@ function _broadcast_unalias_helper(dest::AbstractDataset, scol::AbstractVector,
254254
if src isa SubDataset
255255
if !wascopied
256256
src =SubDataset(_our_copy(parent(src), copycols=false),
257-
index(src), rows(src))
257+
index(src), rows(src), _get_lastmodified(_attributes(parent(src))))
258258
end
259259
parentidx = parentcols(index(src), col2)
260260
parent(src)[!, parentidx] = Base.unaliascopy(_columns(parent(src))[parentidx])

src/sort/gatherby.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,10 @@ mutable struct GatherBy
127127
starts
128128
created::DateTime
129129
end
130-
Base.copy(gds::GatherBy) = GatherBy(copy(gds.parent), copy(gds.groupcols), copy(gds.groups), gds.lastvalid, gds.mapformats, gds.perm === nothing ? nothing : copy(gds.perm), gds.starts === nothing ? nothing : copy(gds.starts), gds.created)
130+
function Base.copy(gds::GatherBy)
131+
ds_cpy = copy(gds.parent)
132+
GatherBy(copy(gds.parent), copy(gds.groupcols), copy(gds.groups), gds.lastvalid, gds.mapformats, gds.perm === nothing ? nothing : copy(gds.perm), gds.starts === nothing ? nothing : copy(gds.starts), _get_lastmodified(_attributes(ds_cpy)))
133+
end
131134

132135

133136
nrow(ds::GatherBy) = nrow(ds.parent)
@@ -149,6 +152,7 @@ Base.summary(gds::GatherBy) =
149152
function Base.show(io::IO, gds::GatherBy;
150153

151154
kwargs...)
155+
_check_consistency(gds)
152156
if length(_get_perms(gds)) > 200
153157
_show(io, view(gds.parent, [first(gds.perm, 100);last(gds.perm, 100)], :); title = summary(gds), show_omitted_cell_summary=false, show_row_number = false, kwargs...)
154158
else

src/sort/groupby.jl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,10 @@ mutable struct GroupBy
170170
created::DateTime
171171
end
172172

173-
Base.copy(gds::GroupBy) = GroupBy(copy(gds.parent), copy(gds.groupcols), copy(gds.rev), copy(gds.perm), copy(gds.starts), gds.lastvalid, gds.mapformats, gds.created)
173+
function Base.copy(gds::GroupBy)
174+
ds_cp = copy(gds.parent)
175+
GroupBy(ds_cp, copy(gds.groupcols), copy(gds.rev), copy(gds.perm), copy(gds.starts), gds.lastvalid, gds.mapformats, _get_lastmodified(_attributes(ds_cp)))
176+
end
174177

175178
nrow(ds::GroupBy) = nrow(ds.parent)
176179
ncol(ds::GroupBy) = ncol(ds.parent)
@@ -234,6 +237,7 @@ end
234237

235238
modify(origninal_gds::Union{GroupBy, GatherBy}, @nospecialize(args...); threads::Bool = true) = modify!(copy(origninal_gds), args..., threads = threads)
236239
function modify!(gds::Union{GroupBy, GatherBy}, @nospecialize(args...); threads::Bool = true)
240+
_check_consistency(gds)
237241
if parent(gds) isa SubDataset
238242
idx_cpy = copy(index(parent(gds)))
239243
else
@@ -316,6 +320,7 @@ end
316320

317321

318322
function combine(gds::Union{GroupBy, GatherBy}, @nospecialize(args...); dropgroupcols = false, threads = true)
323+
_check_consistency(gds)
319324
idx_cpy::Index = Index(Dict{Symbol, Int}(), Symbol[], Dict{Int, Function}())
320325
if !dropgroupcols
321326
for i in gds.groupcols
@@ -442,6 +447,7 @@ Base.summary(gds::GroupBy) =
442447
function Base.show(io::IO, gds::GroupBy;
443448

444449
kwargs...)
450+
_check_consistency(gds)
445451
#TODO pretty_table is very slow for large views, temporary workaround, later we should fix this
446452
if length(gds.perm) > 200
447453
_show(io, view(gds.parent, [first(gds.perm, 100);last(gds.perm, 100)], :); title = summary(gds), show_omitted_cell_summary=false, show_row_number = false, kwargs...)

src/subdataset/subdataset.jl

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
SubDataset{<:Dataset, <:AbstractIndex, <:AbstractVector{Int}} <: Dataset
2+
SubDataset{<:Dataset, <:AbstractIndex, <:AbstractVector{Int}, DateTime} <: Dataset
33
44
A view of a `Dataset`. It is returned by a call to the `view` function
55
on an `Dataset` if a collections of rows and columns are specified.
@@ -90,6 +90,7 @@ struct SubDataset{D<:AbstractDataset, S<:AbstractIndex, T<:AbstractVector{Int}}
9090
parent::D
9191
colindex::S
9292
rows::T # maps from subds row indexes to parent row indexes
93+
created::DateTime
9394
end
9495

9596
_attributes(sds::SubDataset) = getfield(parent(sds), :attributes)
@@ -111,11 +112,25 @@ Base.@propagate_inbounds function SubDataset(parent::Dataset, rows::AbstractVect
111112
sindex = SubIndex(index(parent), cols)
112113
# SubDataset without columns should not have any row
113114
if all(==(0), sindex.remap)
114-
SubDataset(parent, sindex, Int[])
115+
SubDataset(parent, sindex, Int[], _get_lastmodified(_attributes(parent)))
115116
else
116-
SubDataset(parent,sindex , rows)
117+
SubDataset(parent,sindex , rows, _get_lastmodified(_attributes(parent)))
117118
end
118119
end
120+
121+
Base.@propagate_inbounds function SubDataset(parent::Dataset, rows::AbstractVector{Int}, cols, created)
122+
@boundscheck if !checkindex(Bool, axes(parent, 1), rows)
123+
throw(BoundsError(parent, (rows, cols)))
124+
end
125+
sindex = SubIndex(index(parent), cols)
126+
# SubDataset without columns should not have any row
127+
if all(==(0), sindex.remap)
128+
SubDataset(parent, sindex, Int[], created)
129+
else
130+
SubDataset(parent,sindex , rows, created)
131+
end
132+
end
133+
119134
Base.@propagate_inbounds SubDataset(parent::Dataset, ::Colon, cols) =
120135
SubDataset(parent, axes(parent, 1), cols)
121136
@inline SubDataset(parent::Dataset, row::Integer, cols) =
@@ -144,7 +159,7 @@ Base.@propagate_inbounds function SubDataset(parent::Dataset, rows::AbstractVect
144159
end
145160

146161
Base.@propagate_inbounds SubDataset(sds::SubDataset, rowind, cols) =
147-
SubDataset(parent(sds), rows(sds)[rowind], parentcols(index(sds), cols))
162+
SubDataset(parent(sds), rows(sds)[rowind], parentcols(index(sds), cols), getfield(sds, :created))
148163
Base.@propagate_inbounds SubDataset(sds::SubDataset, rowind::Bool, cols) =
149164
throw(ArgumentError("invalid row index of type Bool"))
150165

@@ -158,7 +173,7 @@ Base.@propagate_inbounds SubDataset(sds::SubDataset, rowind::Bool, cols) =
158173
Base.@propagate_inbounds SubDataset(sds::SubDataset, rowind::Bool, ::Colon) =
159174
throw(ArgumentError("invalid row index of type Bool"))
160175
Base.@propagate_inbounds SubDataset(sds::SubDataset, ::Colon, cols) =
161-
SubDataset(parent(sds), rows(sds), parentcols(index(sds), cols))
176+
SubDataset(parent(sds), rows(sds), parentcols(index(sds), cols), getfield(sds, :created))
162177
@inline SubDataset(sds::SubDataset, ::Colon, ::Colon) = sds
163178

164179
# just for showing SubDataset
@@ -202,15 +217,15 @@ Base.@propagate_inbounds Base.view(ads::AbstractDataset, ::typeof(!), colind::Co
202217

203218
@inline Base.view(ads::AbstractDataset, rowinds, colind::Bool) =
204219
throw(ArgumentError("invalid column index $colind of type `Bool`"))
205-
Base.@propagate_inbounds Base.view(ads::AbstractDataset, rowinds,
220+
Base.@propagate_inbounds Base.view(parent::AbstractDataset, rowinds,
206221
colinds::MultiColumnIndex) =
207-
SubDataset(ads, rowinds, colinds)
208-
Base.@propagate_inbounds Base.view(ads::AbstractDataset, rowinds::typeof(!),
222+
SubDataset(parent, rowinds, colinds)
223+
Base.@propagate_inbounds Base.view(parent::AbstractDataset, rowinds::typeof(!),
209224
colinds::MultiColumnIndex) =
210-
SubDataset(ads, :, colinds)
211-
Base.@propagate_inbounds Base.view(ads::AbstractDataset, rowinds::Not,
225+
SubDataset(parent, :, colinds)
226+
Base.@propagate_inbounds Base.view(parent::AbstractDataset, rowinds::Not,
212227
colinds::MultiColumnIndex) =
213-
SubDataset(ads, axes(ads, 1)[rowinds], colinds)
228+
SubDataset(parent, axes(parent, 1)[rowinds], colinds)
214229

215230
##############################################################################
216231
##

0 commit comments

Comments
 (0)