Skip to content

Commit 19002e8

Browse files
committed
improve broadcastings
1 parent 093b134 commit 19002e8

File tree

5 files changed

+2064
-20
lines changed

5 files changed

+2064
-20
lines changed

src/dataset/setindex.jl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ function Base.setindex!(ds::Dataset, v::AbstractVector, ::typeof(!), col_ind::Co
6969
return ds
7070
end
7171

72+
function Base.setindex!(ds::Dataset, v::DatasetColumn, ::typeof(!), col_ind::ColumnIndex)
73+
insert_single_column!(ds, __!(v), col_ind)
74+
return ds
75+
end
76+
7277
# ds.col = AbstractVector
7378
# separate methods are needed due to dispatch ambiguity
7479

@@ -85,6 +90,16 @@ function Base.setproperty!(ds::Dataset, col_ind::AbstractString, v::AbstractVect
8590
return ds
8691
end
8792

93+
function Base.setproperty!(ds::Dataset, col_ind::Symbol, v::DatasetColumn)
94+
insert_single_column!(ds, __!(v), col_ind)
95+
return ds
96+
end
97+
98+
function Base.setproperty!(ds::Dataset, col_ind::AbstractString, v::DatasetColumn)
99+
insert_single_column!(ds, __!(v), col_ind)
100+
return ds
101+
end
102+
88103
# Modify Dataset
89104
Base.setproperty!(::Dataset, col_ind::Symbol, v::Any) =
90105
throw(ArgumentError("It is only allowed to pass a vector as a column of a Dataset. " *

src/other/broadcasting.jl

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,17 @@ Base.setindex!(ds::AbstractDataset, val, idx::CartesianIndex{2}) =
66
(ds[idx[1], idx[2]] = val)
77

88
Base.broadcastable(ds::AbstractDataset) = ds
9+
Base.broadcastable(col::DatasetColumn) = __!(col)
10+
Base.broadcastable(col::SubDatasetColumn) = __!(col)
11+
12+
13+
function _our_copy(ds::Dataset; copycols = true)
14+
# TODO currently if the observation is mutable, copying data set doesn't protect it
15+
# Create Dataset
16+
newds = Dataset(copy(_columns(ds)), copy(index(ds)); copycols = copycols)
17+
setinfo!(newds, _attributes(ds).meta.info[])
18+
return newds
19+
end
920

1021
struct DatasetStyle <: Base.Broadcast.BroadcastStyle end
1122

@@ -38,26 +49,26 @@ end
3849
function getcolbc(bcf::Base.Broadcast.Broadcasted{Style}, colind) where {Style}
3950
# we assume that bcf is already flattened and unaliased
4051
newargs = map(bcf.args) do x
41-
Base.Broadcast.extrude(x isa AbstractDataset ? x[!, colind] : x)
52+
Base.Broadcast.extrude(x isa AbstractDataset ? _columns(x)[colind] : x)
4253
end
4354
return Base.Broadcast.Broadcasted{Style}(bcf.f, newargs, bcf.axes)
4455
end
4556

4657
function Base.copy(bc::Base.Broadcast.Broadcasted{DatasetStyle})
4758
ndim = length(axes(bc))
4859
if ndim != 2
49-
throw(DimensionMismatch("cannot broadcast a data frame into $ndim dimensions"))
60+
throw(DimensionMismatch("cannot broadcast a data set into $ndim dimensions"))
5061
end
5162
bcf = Base.Broadcast.flatten(bc)
5263
colnames = unique!(Any[_names(ds) for ds in bcf.args if ds isa AbstractDataset])
5364
if length(colnames) != 1
5465
wrongnames = setdiff(union(colnames...), intersect(colnames...))
5566
if isempty(wrongnames)
56-
throw(ArgumentError("Column names in broadcasted data frames " *
67+
throw(ArgumentError("Column names in broadcasted data sets " *
5768
"must have the same order"))
5869
else
5970
msg = join(wrongnames, ", ", " and ")
60-
throw(ArgumentError("Column names in broadcasted data frames must match. " *
71+
throw(ArgumentError("Column names in broadcasted data sets must match. " *
6172
"Non matching column names are $msg"))
6273
end
6374
end
@@ -152,7 +163,7 @@ function Base.copyto!(lazyds::LazyNewColDataset, bc::Base.Broadcast.Broadcasted{
152163
if bc isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}}
153164
bc_tmp = Base.Broadcast.Broadcasted{T}(bc.f, bc.args, ())
154165
v = Base.Broadcast.materialize(bc_tmp)
155-
col = similar(Vector{typeof(v)}, nrow(lazyds.ds))
166+
col = similar(Vector{Union{typeof(v), Missing}}, nrow(lazyds.ds))
156167
copyto!(col, bc)
157168
else
158169
col = Base.Broadcast.materialize(bc)
@@ -164,7 +175,7 @@ function Base.copyto!(col1::SubDatasetColumn, bc::Base.Broadcast.Broadcasted{T})
164175
if bc isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}}
165176
bc_tmp = Base.Broadcast.Broadcasted{T}(bc.f, bc.args, ())
166177
v = Base.Broadcast.materialize(bc_tmp)
167-
col = similar(Vector{typeof(v)}, length(col1))
178+
col = similar(Vector{Union{Missing, typeof(v)}}, length(col1))
168179
copyto!(col, bc)
169180
else
170181
col = Base.Broadcast.materialize(bc)
@@ -202,53 +213,55 @@ function _copyto_helper!(dscol::Union{SubDatasetColumn, DatasetColumn}, bc::Base
202213
end
203214

204215
function Base.Broadcast.broadcast_unalias(dest::AbstractDataset, src)
205-
for col in eachcol(dest)
206-
src = Base.Broadcast.unalias(col, src)
216+
for i in 1:ncol(dest)
217+
src = Base.Broadcast.unalias(_columns(dest)[i], src)
207218
end
208219
return src
209220
end
210221

211222
function Base.Broadcast.broadcast_unalias(dest, src::AbstractDataset)
212223
wascopied = false
213224
for (i, col) in enumerate(eachcol(src))
214-
if Base.mightalias(dest, col)
225+
if Base.mightalias(dest, __!(col))
215226
if src isa SubDataset
216227
if !wascopied
217-
src = SubDataset(copy(parent(src), copycols=false),
228+
src = SubDataset(_our_copy(parent(src), copycols=false),
218229
index(src), rows(src))
219230
end
220231
parentidx = parentcols(index(src), i)
221-
parent(src)[!, parentidx] = Base.unaliascopy(parent(src)[!, parentidx])
232+
parent(src)[!, parentidx] = Base.unaliascopy(_columns(parent(src))[parentidx])
222233
else
223234
if !wascopied
224-
src = copy(src, copycols=false)
235+
src = _our_copy(src, copycols=false)
225236
end
226-
src[!, i] = Base.unaliascopy(col)
237+
src[!, i] = Base.unaliascopy(__!(col))
227238
end
228239
wascopied = true
229240
end
230241
end
231242
return src
232243
end
233244

245+
#TODO for view of data sets parent columns are copyied, e.g. view(ds, 1:10, :) .+= 1
234246
function _broadcast_unalias_helper(dest::AbstractDataset, scol::AbstractVector,
235247
src::AbstractDataset, col2::Int, wascopied::Bool)
236248
# col1 can be checked till col2 point as we are writing broadcasting
237249
# results from 1 to ncol
238250
# we go downwards because aliasing when col1 == col2 is most probable
239251
for col1 in col2:-1:1
240-
dcol = dest[!, col1]
252+
dcol = _columns(dest)[col1] #dest[!, col1]
241253
if Base.mightalias(dcol, scol)
242254
if src isa SubDataset
243255
if !wascopied
244-
src =SubDataset(copy(parent(src), copycols=false),
256+
src =SubDataset(_our_copy(parent(src), copycols=false),
245257
index(src), rows(src))
246258
end
247259
parentidx = parentcols(index(src), col2)
248-
parent(src)[!, parentidx] = Base.unaliascopy(parent(src)[!, parentidx])
260+
parent(src)[!, parentidx] = Base.unaliascopy(_columns(parent(src))[parentidx])
261+
# parent(src)[!, parentidx] = Base.unaliascopy(parent(src)[!, parentidx])
249262
else
250263
if !wascopied
251-
src = copy(src, copycols=false)
264+
src = _our_copy(src, copycols=false)
252265
end
253266
src[!, col2] = Base.unaliascopy(scol)
254267
end
@@ -261,6 +274,9 @@ end
261274
_broadcast_unalias_helper(dest::AbstractDataset, scol::DatasetColumn,
262275
src::AbstractDataset, col2::Int, wascopied::Bool) = _broadcast_unalias_helper(dest, scol.val,
263276
src, col2, wascopied)
277+
_broadcast_unalias_helper(dest::AbstractDataset, scol::SubDatasetColumn,
278+
src::AbstractDataset, col2::Int, wascopied::Bool) = _broadcast_unalias_helper(dest, __!(scol),
279+
src, col2, wascopied)
264280

265281
function Base.Broadcast.broadcast_unalias(dest::AbstractDataset, src::AbstractDataset)
266282
if size(dest, 2) != size(src, 2)
@@ -271,6 +287,7 @@ function Base.Broadcast.broadcast_unalias(dest::AbstractDataset, src::AbstractDa
271287
scol = src[!, col2]
272288
src, wascopied = _broadcast_unalias_helper(dest, scol, src, col2, wascopied)
273289
end
290+
_modified(_attributes(dest))
274291
return src
275292
end
276293

@@ -302,8 +319,9 @@ function Base.copyto!(ds::AbstractDataset,
302319
# special case of fast approach when bc is providing an untransformed scalar
303320
if bc.f === identity && bc.args isa Tuple{Any} && Base.Broadcast.isflat(bc)
304321
for col in axes(ds, 2)
305-
fill!(ds[!, col], bc.args[1][])
322+
fill!(_columns(ds)[col], bc.args[1][])
306323
end
324+
_modified(_attributes(ds))
307325
return ds
308326
else
309327
return copyto!(ds, convert(Base.Broadcast.Broadcasted{Nothing}, bc))
@@ -337,14 +355,14 @@ function Base.copyto!(crds::ColReplaceDataset, bc::Base.Broadcast.Broadcasted)
337355
if bcf′_col isa Base.Broadcast.Broadcasted{<:Base.Broadcast.AbstractArrayStyle{0}}
338356
bc_tmp = create_bc_tmp(bcf′_col)
339357
v = Base.Broadcast.materialize(bc_tmp)
340-
newcol = similar(Vector{typeof(v)}, nrow(crds.ds))
358+
newcol = similar(Vector{Union{Missing, typeof(v)}}, nrow(crds.ds))
341359
copyto!(newcol, bc)
342360
else
343361
if nrows == 0
344362
newcol = Any[]
345363
else
346364
v1 = bcf′_col[CartesianIndex(1, i)]
347-
startcol = similar(Vector{typeof(v1)}, nrows)
365+
startcol = similar(Vector{Union{Missing, typeof(v1)}}, nrows)
348366
startcol[1] = v1
349367
newcol = copyto_widen!(startcol, bcf′_col, 2, i)
350368
end

src/other/index.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ _modified(x::Attributes) = x.meta.modified[] = now()
5656
# x.ngroups[] = 1
5757
# end
5858

59+
_get_lastmodified(x) = x.meta.modified[]
60+
5961
function Index(names::AbstractVector{Symbol}; makeunique::Bool=false)
6062
u = make_unique(names, makeunique=makeunique)
6163
lookup = Dict{Symbol, Int}(zip(u, 1:length(u)))

0 commit comments

Comments
 (0)