Skip to content

Commit ef1e32f

Browse files
authored
Fix the issue that the maximum width of the column does not change when a format is applied to the column (#86)
* Improve the speed of displaying large datasets in Jupyter Notebook Fix the issue that Jupyter Notebook takes too much time when displaying large datasets with many columns. The getmaxwidths() function now only calculates the maximum width of the columns that will be displayed, not all columns. Please see issue for more details. * Modify the initialization code of maxwidths Now might break the loop before filling all the values of maxwidths. The initialize code of maxwidths has been modified. * Fix the issue that the maximum width of the column does not change when a format is applied to the column When the getmaxwidth() function calculates the maximum width of a column, the format is not applied to the column. When the user sets the format, the maximum number of columns that the Dataset can display is still calculated according to the original data. However, the maximum number of displayed columns may change after setting the format. Now the format of each column will be applied before calculating the maximum width of the column. The maximum number of columns that can be displayed will be changed correctly. * Add mapformats keyword argument to getmaxwidths() and _show() The FOR loop is duplicated so that mapformats only need to be determined once. * Update io.jl Simplify part of the code of getmaxwidths() * Update io.jl Make mapformats as a keyword argument and update the docstring of the getmaxwidths() function.
1 parent b4a33dc commit ef1e32f

File tree

1 file changed

+23
-11
lines changed

1 file changed

+23
-11
lines changed

src/abstractdataset/io.jl

100755100644
Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
rowlabel::Symbol,
77
rowid::Union{Integer, Nothing},
88
show_eltype::Bool,
9-
buffer::IOBuffer)
9+
buffer::IOBuffer,
10+
truncstring::Int;
11+
mapformats::Bool = true)
1012
1113
Calculate, for each column of an AbstractDataset, the maximum
1214
string width used to render the name of that column, its type, and the
1315
longest entry in that column -- among the rows of the data frame
14-
will be rendered to IO. The widths for all columns are returned as a
15-
vector.
16+
will be rendered to IO. The widths for the columns that will be displayed
17+
are returned as a vector.
1618
1719
Return a `Vector{Int}` giving the maximum string widths required to render
1820
each column, including that column's name and type.
@@ -35,6 +37,10 @@ implicit row ID column contained in every `AbstractDataset`.
3537
- `show_eltype`: Whether to print the column type
3638
under the column name in the heading.
3739
- `buffer`: buffer passed around to avoid reallocations in `ourstrwidth`
40+
- `truncstring`: The length of the string to be truncated. The string will
41+
not be truncated if it is equal to 0.
42+
- `mapformats`: Whether to calculate the max widths after mapping format
43+
for each column.
3844
"""
3945
function getmaxwidths(ds::AbstractDataset,
4046
io::IO,
@@ -44,7 +50,9 @@ function getmaxwidths(ds::AbstractDataset,
4450
rowid::Union{Integer, Nothing},
4551
show_eltype::Bool,
4652
buffer::IOBuffer,
47-
truncstring::Int)
53+
truncstring::Int;
54+
mapformats::Bool = true)
55+
4856
maxwidths = zeros(Int, size(ds, 2) + 1)
4957

5058
undefstrwidth = ourstrwidth(io, "#undef", buffer, truncstring)
@@ -57,7 +65,11 @@ function getmaxwidths(ds::AbstractDataset,
5765
# (1) Consider length of column name
5866
# do not truncate column name
5967
maxwidth = ourstrwidth(io, name, buffer, 0)
60-
68+
# Calculates max widths after mapping formats if mapformats = true, because formats may affact the max widths
69+
if mapformats
70+
f = getformat(ds, col_idx)
71+
col = f.(col)
72+
end
6173
# (2) Consider length of longest entry in that column
6274
for indices in (rowindices1, rowindices2), i in indices
6375
if isassigned(col, i)
@@ -133,10 +145,10 @@ julia> show(stdout, MIME("text/csv"), Dataset(A = 1:3, B = ["x", "y", "z"]))
133145
"""
134146
Base.show(io::IO, mime::MIME, ds::AbstractDataset)
135147
Base.show(io::IO, mime::MIME"text/html", ds::AbstractDataset;
136-
summary::Bool=true, eltypes::Bool=true) =
137-
_show(io, mime, ds, summary=summary, eltypes=eltypes)
138-
Base.show(io::IO, mime::MIME"text/latex", ds::AbstractDataset; eltypes::Bool=true, mapformats = true, formats = true) =
139-
_show(io, mime, ds, eltypes=eltypes, mapformats = mapformats, formats = formats)
148+
summary::Bool = true, eltypes::Bool = true, mapformats = true) =
149+
_show(io, mime, ds, summary = summary, eltypes = eltypes, mapformats = mapformats)
150+
Base.show(io::IO, mime::MIME"text/latex", ds::AbstractDataset; eltypes::Bool = true, mapformats = true, formats = true) =
151+
_show(io, mime, ds, eltypes = eltypes, mapformats = mapformats, formats = formats)
140152
Base.show(io::IO, mime::MIME"text/csv", ds::AbstractDataset; mapformats = true) =
141153
printtable(io, ds, header = true, separator = ',', mapformats = mapformats)
142154
Base.show(io::IO, mime::MIME"text/tab-separated-values", ds::AbstractDataset; mapformats = true) =
@@ -171,7 +183,7 @@ function html_escape(cell::AbstractString)
171183
end
172184

173185
function _show(io::IO, ::MIME"text/html", ds::AbstractDataset;
174-
summary::Bool=true, eltypes::Bool=true, rowid::Union{Int, Nothing}=nothing)
186+
summary::Bool=true, eltypes::Bool=true, rowid::Union{Int, Nothing}=nothing, mapformats = true)
175187
_check_consistency(ds)
176188

177189
# we will pass around this buffer to avoid its reallocation in ourstrwidth
@@ -189,7 +201,7 @@ function _show(io::IO, ::MIME"text/html", ds::AbstractDataset;
189201
if get(io, :limit, false)
190202
tty_rows, tty_cols = displaysize(io)
191203
mxrow = min(mxrow, tty_rows)
192-
maxwidths = getmaxwidths(ds, io, 1:mxrow, 0:-1, :X, nothing, true, buffer, 0) .+ 2
204+
maxwidths = getmaxwidths(ds, io, 1:mxrow, 0:-1, :X, nothing, true, buffer, 0, mapformats = mapformats) .+ 2
193205
mxcol = min(mxcol, searchsortedfirst(cumsum(maxwidths), tty_cols))
194206
end
195207

0 commit comments

Comments
 (0)