Skip to content

Commit 905a853

Browse files
authored
Add HTML table display for GroupBy and GatherBy (#89)
Update the _show() and getmaxwidths() functions so now it could be used to display HTML table for GroupBy and GatherBy Add size() function for GroupBy Add size() and getformat() function for GatherBy
1 parent ef1e32f commit 905a853

File tree

3 files changed

+30
-13
lines changed

3 files changed

+30
-13
lines changed

src/abstractdataset/io.jl

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@ implicit row ID column contained in every `AbstractDataset`.
4242
- `mapformats`: Whether to calculate the max widths after mapping format
4343
for each column.
4444
"""
45-
function getmaxwidths(ds::AbstractDataset,
45+
function getmaxwidths(ds::Union{AbstractDataset, GroupBy, GatherBy},
4646
io::IO,
47-
rowindices1::AbstractVector{Int},
48-
rowindices2::AbstractVector{Int},
47+
rowindices1::Union{AbstractVector{Int64}, AbstractVector{Int32}},
48+
rowindices2::Union{AbstractVector{Int64}, AbstractVector{Int32}},
4949
rowlabel::Symbol,
5050
rowid::Union{Integer, Nothing},
5151
show_eltype::Bool,
@@ -57,11 +57,12 @@ function getmaxwidths(ds::AbstractDataset,
5757

5858
undefstrwidth = ourstrwidth(io, "#undef", buffer, truncstring)
5959

60-
ct = show_eltype ? batch_compacttype(Any[eltype(c) for c in eachcol(ds)]) : String[]
60+
ct = show_eltype ? batch_compacttype(ds isa AbstractDataset ? Any[eltype(c) for c in eachcol(ds)] : Any[eltype(c) for c in eachcol(ds.parent)]) : String[]
6161
tty_cols = displaysize(io)[2]
6262
maxwidthsum = 0
6363
j = 1
64-
for (col_idx, (name, col)) in enumerate(pairs(eachcol(ds)))
64+
cols = ds isa AbstractDataset ? eachcol(ds) : eachcol(ds.parent)
65+
for (col_idx, (name, col)) in enumerate(pairs(cols))
6566
# (1) Consider length of column name
6667
# do not truncate column name
6768
maxwidth = ourstrwidth(io, name, buffer, 0)
@@ -144,7 +145,7 @@ julia> show(stdout, MIME("text/csv"), Dataset(A = 1:3, B = ["x", "y", "z"]))
144145
```
145146
"""
146147
Base.show(io::IO, mime::MIME, ds::AbstractDataset)
147-
Base.show(io::IO, mime::MIME"text/html", ds::AbstractDataset;
148+
Base.show(io::IO, mime::MIME"text/html", ds::Union{AbstractDataset, GroupBy, GatherBy};
148149
summary::Bool = true, eltypes::Bool = true, mapformats = true) =
149150
_show(io, mime, ds, summary = summary, eltypes = eltypes, mapformats = mapformats)
150151
Base.show(io::IO, mime::MIME"text/latex", ds::AbstractDataset; eltypes::Bool = true, mapformats = true, formats = true) =
@@ -182,9 +183,14 @@ function html_escape(cell::AbstractString)
182183
return cell
183184
end
184185

185-
function _show(io::IO, ::MIME"text/html", ds::AbstractDataset;
186+
function _show(io::IO, ::MIME"text/html", ds::Union{AbstractDataset, GroupBy, GatherBy};
186187
summary::Bool=true, eltypes::Bool=true, rowid::Union{Int, Nothing}=nothing, mapformats = true)
187-
_check_consistency(ds)
188+
# Define a Boolean variable here since we have to determine whether ds is a AbstractDataset for many times
189+
isadataset = false
190+
if ds isa AbstractDataset
191+
_check_consistency(ds)
192+
isadataset = true
193+
end
188194

189195
# we will pass around this buffer to avoid its reallocation in ourstrwidth
190196
buffer = IOBuffer(Vector{UInt8}(undef, 80), read=true, write=true)
@@ -201,7 +207,7 @@ function _show(io::IO, ::MIME"text/html", ds::AbstractDataset;
201207
if get(io, :limit, false)
202208
tty_rows, tty_cols = displaysize(io)
203209
mxrow = min(mxrow, tty_rows)
204-
maxwidths = getmaxwidths(ds, io, 1:mxrow, 0:-1, :X, nothing, true, buffer, 0, mapformats = mapformats) .+ 2
210+
maxwidths = getmaxwidths(ds, io, _get_perms(ds)[1:mxrow], 0:-1, :X, nothing, true, buffer, 0, mapformats = mapformats) .+ 2
205211
mxcol = min(mxcol, searchsortedfirst(cumsum(maxwidths), tty_cols))
206212
end
207213

@@ -225,7 +231,7 @@ function _show(io::IO, ::MIME"text/html", ds::AbstractDataset;
225231
# We put a longer string for the type into the title argument of the <th> element,
226232
# which the users can hover over. The limit of 256 characters is arbitrary, but
227233
# we want some maximum limit, since the types can sometimes get really-really long.
228-
types = Any[eltype(ds[!, idx]) for idx in 1:mxcol]
234+
types = isadataset ? Any[eltype(ds[!, idx]) for idx in 1:mxcol] : Any[eltype(ds.parent[!, idx]) for idx in 1:mxcol]
229235
ct, ct_title = batch_compacttype(types), batch_compacttype(types, 256)
230236
for j in 1:mxcol
231237
s = html_escape(ct[j])
@@ -244,6 +250,12 @@ function _show(io::IO, ::MIME"text/html", ds::AbstractDataset;
244250
end
245251
if ds isa SubDataset
246252
mainmsg = "<p>$(digitsep(nrow(ds))) rows × $(digitsep(ncol(ds))) columns$omitmsg</p><p><b> SubDataset (view of Dataset)</p>"
253+
elseif !isadataset
254+
mainmsg = if ds isa GroupBy
255+
"<p>$(digitsep(nrow(ds))) rows × $(digitsep(ncol(ds))) columns$omitmsg</p><p><b> View of Grouped Dataset </p><p> Grouped by: $(join(_names(ds)[ds.groupcols],", ")) </p>"
256+
elseif ds isa GatherBy
257+
"<p>$(digitsep(nrow(ds))) rows × $(digitsep(ncol(ds))) columns$omitmsg</p><p><b> View of GatherBy Dataset </p><p> Gathered by: $(join(_names(ds)[ds.groupcols],", ")) </p>"
258+
end
247259
else
248260
mainmsg = if !isempty(index(ds).sortedcols) && index(ds).grouped[]
249261
"<p>$(digitsep(nrow(ds))) rows × $(digitsep(ncol(ds))) columns$omitmsg</p><p><b> Grouped Dataset with $(index(ds).ngroups[]) groups </p><p> Grouped by: $(join(_names(ds)[index(ds).sortedcols],", ")) </p>"
@@ -256,16 +268,16 @@ function _show(io::IO, ::MIME"text/html", ds::AbstractDataset;
256268
write(io, mainmsg)
257269

258270
end
259-
for row in 1:mxrow
271+
for row in _get_perms(ds)[1:mxrow]
260272
write(io, "<tr>")
261273
if rowid === nothing
262274
write(io, "<th>$row</th>")
263275
else
264276
write(io, "<th>$rowid</th>")
265277
end
266278
for column_name in cnames
267-
if isassigned(ds[!, column_name], row)
268-
cell_val = getformat(ds, column_name)(ds[row, column_name])
279+
if isassigned(isadataset ? ds[!, column_name] : ds.parent[!, column_name], row)
280+
cell_val = isadataset ? getformat(ds, column_name)(ds[row, column_name]) : getformat(ds.parent, column_name)(ds.parent[row, column_name])
269281
if ismissing(cell_val)
270282
write(io, "<td><em>missing</em></td>")
271283
elseif cell_val isa Markdown.MD

src/sort/gatherby.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ _names(ds::GatherBy) = _names(ds.parent)
5050
_columns(ds::GatherBy) = _columns(ds.parent)
5151
index(ds::GatherBy) = index(ds.parent)
5252
Base.parent(ds::GatherBy) = ds.parent
53+
Base.size(ds::GatherBy) = size(ds.parent)
54+
Base.size(ds::GatherBy, i::Integer) = size(ds.parent, i)
55+
getformat(ds::GatherBy, i) = getformat(ds.parent, i)
5356

5457

5558
Base.summary(gds::GatherBy) =

src/sort/groupby.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ _names(ds::GroupBy) = _names(ds.parent)
2929
_columns(ds::GroupBy) = _columns(ds.parent)
3030
index(ds::GroupBy) = index(ds.parent)
3131
Base.parent(ds::GroupBy) = ds.parent
32+
Base.size(ds::GroupBy) = size(ds.parent)
33+
Base.size(ds::GroupBy, i::Integer) = size(ds.parent, i)
3234

3335
function groupby(ds::Dataset, cols::MultiColumnIndex; alg = HeapSortAlg(), rev = false, mapformats::Bool = true, stable = true, threads = true)
3436
_check_consistency(ds)

0 commit comments

Comments
 (0)