@@ -6,6 +6,17 @@ Base.setindex!(ds::AbstractDataset, val, idx::CartesianIndex{2}) =
66 (ds[idx[1 ], idx[2 ]] = val)
77
88Base. broadcastable (ds:: AbstractDataset ) = ds
9+ Base. broadcastable (col:: DatasetColumn ) = __! (col)
10+ Base. broadcastable (col:: SubDatasetColumn ) = __! (col)
11+
12+
13+ function _our_copy (ds:: Dataset ; copycols = true )
14+ # TODO currently if the observation is mutable, copying data set doesn't protect it
15+ # Create Dataset
16+ newds = Dataset (copy (_columns (ds)), copy (index (ds)); copycols = copycols)
17+ setinfo! (newds, _attributes (ds). meta. info[])
18+ return newds
19+ end
920
1021struct DatasetStyle <: Base.Broadcast.BroadcastStyle end
1122
3849function getcolbc (bcf:: Base.Broadcast.Broadcasted{Style} , colind) where {Style}
3950 # we assume that bcf is already flattened and unaliased
4051 newargs = map (bcf. args) do x
41- Base. Broadcast. extrude (x isa AbstractDataset ? x[ ! , colind] : x)
52+ Base. Broadcast. extrude (x isa AbstractDataset ? _columns (x)[ colind] : x)
4253 end
4354 return Base. Broadcast. Broadcasted {Style} (bcf. f, newargs, bcf. axes)
4455end
4556
4657function Base. copy (bc:: Base.Broadcast.Broadcasted{DatasetStyle} )
4758 ndim = length (axes (bc))
4859 if ndim != 2
49- throw (DimensionMismatch (" cannot broadcast a data frame into $ndim dimensions" ))
60+ throw (DimensionMismatch (" cannot broadcast a data set into $ndim dimensions" ))
5061 end
5162 bcf = Base. Broadcast. flatten (bc)
5263 colnames = unique! (Any[_names (ds) for ds in bcf. args if ds isa AbstractDataset])
5364 if length (colnames) != 1
5465 wrongnames = setdiff (union (colnames... ), intersect (colnames... ))
5566 if isempty (wrongnames)
56- throw (ArgumentError (" Column names in broadcasted data frames " *
67+ throw (ArgumentError (" Column names in broadcasted data sets " *
5768 " must have the same order" ))
5869 else
5970 msg = join (wrongnames, " , " , " and " )
60- throw (ArgumentError (" Column names in broadcasted data frames must match. " *
71+ throw (ArgumentError (" Column names in broadcasted data sets must match. " *
6172 " Non matching column names are $msg " ))
6273 end
6374 end
@@ -152,7 +163,7 @@ function Base.copyto!(lazyds::LazyNewColDataset, bc::Base.Broadcast.Broadcasted{
152163 if bc isa Base. Broadcast. Broadcasted{<: Base.Broadcast.AbstractArrayStyle{0} }
153164 bc_tmp = Base. Broadcast. Broadcasted {T} (bc. f, bc. args, ())
154165 v = Base. Broadcast. materialize (bc_tmp)
155- col = similar (Vector{typeof (v)}, nrow (lazyds. ds))
166+ col = similar (Vector{Union{ typeof (v), Missing} }, nrow (lazyds. ds))
156167 copyto! (col, bc)
157168 else
158169 col = Base. Broadcast. materialize (bc)
@@ -164,7 +175,7 @@ function Base.copyto!(col1::SubDatasetColumn, bc::Base.Broadcast.Broadcasted{T})
164175 if bc isa Base. Broadcast. Broadcasted{<: Base.Broadcast.AbstractArrayStyle{0} }
165176 bc_tmp = Base. Broadcast. Broadcasted {T} (bc. f, bc. args, ())
166177 v = Base. Broadcast. materialize (bc_tmp)
167- col = similar (Vector{typeof (v)}, length (col1))
178+ col = similar (Vector{Union{Missing, typeof (v)} }, length (col1))
168179 copyto! (col, bc)
169180 else
170181 col = Base. Broadcast. materialize (bc)
@@ -202,53 +213,55 @@ function _copyto_helper!(dscol::Union{SubDatasetColumn, DatasetColumn}, bc::Base
202213end
203214
204215function Base. Broadcast. broadcast_unalias (dest:: AbstractDataset , src)
205- for col in eachcol (dest)
206- src = Base. Broadcast. unalias (col , src)
216+ for i in 1 : ncol (dest)
217+ src = Base. Broadcast. unalias (_columns (dest)[i] , src)
207218 end
208219 return src
209220end
210221
211222function Base. Broadcast. broadcast_unalias (dest, src:: AbstractDataset )
212223 wascopied = false
213224 for (i, col) in enumerate (eachcol (src))
214- if Base. mightalias (dest, col)
225+ if Base. mightalias (dest, __! ( col) )
215226 if src isa SubDataset
216227 if ! wascopied
217- src = SubDataset (copy (parent (src), copycols= false ),
228+ src = SubDataset (_our_copy (parent (src), copycols= false ),
218229 index (src), rows (src))
219230 end
220231 parentidx = parentcols (index (src), i)
221- parent (src)[! , parentidx] = Base. unaliascopy (parent (src)[ ! , parentidx])
232+ parent (src)[! , parentidx] = Base. unaliascopy (_columns ( parent (src))[ parentidx])
222233 else
223234 if ! wascopied
224- src = copy (src, copycols= false )
235+ src = _our_copy (src, copycols= false )
225236 end
226- src[! , i] = Base. unaliascopy (col)
237+ src[! , i] = Base. unaliascopy (__! ( col) )
227238 end
228239 wascopied = true
229240 end
230241 end
231242 return src
232243end
233244
245+ # TODO for view of data sets parent columns are copyied, e.g. view(ds, 1:10, :) .+= 1
234246function _broadcast_unalias_helper (dest:: AbstractDataset , scol:: AbstractVector ,
235247 src:: AbstractDataset , col2:: Int , wascopied:: Bool )
236248 # col1 can be checked till col2 point as we are writing broadcasting
237249 # results from 1 to ncol
238250 # we go downwards because aliasing when col1 == col2 is most probable
239251 for col1 in col2: - 1 : 1
240- dcol = dest[! , col1]
252+ dcol = _columns (dest)[col1] # dest[!, col1]
241253 if Base. mightalias (dcol, scol)
242254 if src isa SubDataset
243255 if ! wascopied
244- src = SubDataset (copy (parent (src), copycols= false ),
256+ src = SubDataset (_our_copy (parent (src), copycols= false ),
245257 index (src), rows (src))
246258 end
247259 parentidx = parentcols (index (src), col2)
248- parent (src)[! , parentidx] = Base. unaliascopy (parent (src)[! , parentidx])
260+ parent (src)[! , parentidx] = Base. unaliascopy (_columns (parent (src))[parentidx])
261+ # parent(src)[!, parentidx] = Base.unaliascopy(parent(src)[!, parentidx])
249262 else
250263 if ! wascopied
251- src = copy (src, copycols= false )
264+ src = _our_copy (src, copycols= false )
252265 end
253266 src[! , col2] = Base. unaliascopy (scol)
254267 end
261274_broadcast_unalias_helper (dest:: AbstractDataset , scol:: DatasetColumn ,
262275 src:: AbstractDataset , col2:: Int , wascopied:: Bool ) = _broadcast_unalias_helper (dest, scol. val,
263276 src, col2, wascopied)
277+ _broadcast_unalias_helper (dest:: AbstractDataset , scol:: SubDatasetColumn ,
278+ src:: AbstractDataset , col2:: Int , wascopied:: Bool ) = _broadcast_unalias_helper (dest, __! (scol),
279+ src, col2, wascopied)
264280
265281function Base. Broadcast. broadcast_unalias (dest:: AbstractDataset , src:: AbstractDataset )
266282 if size (dest, 2 ) != size (src, 2 )
@@ -271,6 +287,7 @@ function Base.Broadcast.broadcast_unalias(dest::AbstractDataset, src::AbstractDa
271287 scol = src[! , col2]
272288 src, wascopied = _broadcast_unalias_helper (dest, scol, src, col2, wascopied)
273289 end
290+ _modified (_attributes (dest))
274291 return src
275292end
276293
@@ -302,8 +319,9 @@ function Base.copyto!(ds::AbstractDataset,
302319 # special case of fast approach when bc is providing an untransformed scalar
303320 if bc. f === identity && bc. args isa Tuple{Any} && Base. Broadcast. isflat (bc)
304321 for col in axes (ds, 2 )
305- fill! (ds[ ! , col], bc. args[1 ][])
322+ fill! (_columns (ds)[ col], bc. args[1 ][])
306323 end
324+ _modified (_attributes (ds))
307325 return ds
308326 else
309327 return copyto! (ds, convert (Base. Broadcast. Broadcasted{Nothing}, bc))
@@ -337,14 +355,14 @@ function Base.copyto!(crds::ColReplaceDataset, bc::Base.Broadcast.Broadcasted)
337355 if bcf′_col isa Base. Broadcast. Broadcasted{<: Base.Broadcast.AbstractArrayStyle{0} }
338356 bc_tmp = create_bc_tmp (bcf′_col)
339357 v = Base. Broadcast. materialize (bc_tmp)
340- newcol = similar (Vector{typeof (v)}, nrow (crds. ds))
358+ newcol = similar (Vector{Union{Missing, typeof (v)} }, nrow (crds. ds))
341359 copyto! (newcol, bc)
342360 else
343361 if nrows == 0
344362 newcol = Any[]
345363 else
346364 v1 = bcf′_col[CartesianIndex (1 , i)]
347- startcol = similar (Vector{typeof (v1)}, nrows)
365+ startcol = similar (Vector{Union{Missing, typeof (v1)} }, nrows)
348366 startcol[1 ] = v1
349367 newcol = copyto_widen! (startcol, bcf′_col, 2 , i)
350368 end
0 commit comments