diff --git a/Project.toml b/Project.toml
index 68ecaaa..ee12dfb 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,12 +5,10 @@ version = "1.0.1-DEV"
[deps]
DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
-ExcelReaders = "c04bee98-12a5-510c-87df-2a230cb6e075"
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
IterableTables = "1c8ee90f-4401-5389-894e-7a04a3dc0f4d"
IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
TableShowUtils = "5e66a065-1f0a-5976-b372-e0b8c017ca10"
TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
TableTraitsUtils = "382cd787-c1b6-5bf2-a167-d5b971a19bda"
@@ -18,15 +16,13 @@ XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0"
[compat]
DataValues = "0.4.11"
-ExcelReaders = "0.11"
FileIO = "1"
IterableTables = "0.8.3, 0.9, 0.10, 0.11, 1"
IteratorInterfaceExtensions = "0.1.1, 1"
-PyCall = "1.90"
TableShowUtils = "0.2"
TableTraits = "0.3.1, 0.4, 1"
TableTraitsUtils = "0.3, 0.4, 1"
-XLSX = "0.4.1, 0.5, 0.6, 0.7, 0.8, 0.9"
+XLSX = "0.10, 0.11"
julia = "1"
[extras]
diff --git a/README.md b/README.md
index f175400..6f037d6 100644
--- a/README.md
+++ b/README.md
@@ -7,9 +7,18 @@
## Overview
-This package provides load support for Excel files under the
+This package provides support for Excel files under the
[FileIO.jl](https://github.com/JuliaIO/FileIO.jl) package.
+It provides functionality to read simple tabular data from
+an Excel (.xlsx) file and to save simple tabular data to an
+Excel file.
+
+For more extensive functionality when reading and writing Excel files,
+consider using [XLSX.jl](https://felipenoris.github.io/XLSX.jl/stable/).
+Under the hood, `ExcelFiles.jl` uses the `XLSX.jl` functions `readtable`
+and `writetable`.
+
## Installation
Use ``Pkg.add("ExcelFiles")`` in Julia to install ExcelFiles and its dependencies.
@@ -18,17 +27,17 @@ Use ``Pkg.add("ExcelFiles")`` in Julia to install ExcelFiles and its dependencie
### Load an Excel file
-To read a Excel file into a ``DataFrame``, use the following julia code:
+To read an Excel file into a `DataFrame`, use the following julia code:
-````julia
+```julia
using ExcelFiles, DataFrames
df = DataFrame(load("data.xlsx", "Sheet1"))
-````
+```
-The call to ``load`` returns a ``struct`` that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing an Excel file into data structures that are not a ``DataFrame``:
+The call to `load` returns an object that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing an Excel file into data structures that are not a `DataFrame`:
-````julia
+```julia
using ExcelFiles, DataTables, IndexedTables, TimeSeries, Temporal, Gadfly
# Load into a DataTable
@@ -45,46 +54,83 @@ ts = TS(load("data.xlsx", "Sheet1"))
# Plot directly with Gadfly
plot(load("data.xlsx", "Sheet1"), x=:a, y=:b, Geom.line)
-````
+```
+
+The `load` function takes a number of arguments and keywords:
+
+```julia
+ FileIO.load(
+ source::String,
+ [sheet::String,
+ [columns::String]];
+ [first_row::Int],
+ [column_labels::Vector{String}],
+ [header::Bool],
+ [normalizenames::Bool]
+ )
+```
-The ``load`` function also takes a number of parameters:
-
-````julia
-function load(f::FileIO.File{FileIO.format"Excel"}, range; keywords...)
-````
#### Arguments:
-* ``range``: either the name of the sheet in the Excel file to read, or a full Excel range specification (i.e. "Sheetname!A1:B2").
-* The ``keywords`` arguments are the same as in [ExcelReaders.jl](https://github.com/queryverse/ExcelReaders.jl) (which is used under the hood to read Excel files). When ``range`` is a sheet name, the keyword arguments for the ``readxlsheet`` function from ExcelReaders.jl apply, if ``range`` is a range specification, the keyword arguments for the ``readxl`` function apply.
+* `source`: The name of the file to be loaded.
+* `sheet`: Specifies the sheet name to be loaded. If `sheet` is not given, the first Excel sheet in the file will be used.
+* `columns`: Determines which columns to read. For example, "B:D" will select columns B, C and D. If columns is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid sheet **must** be specified when specifying columns.
+
+#### Keywords:
+
+* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet.
+* `header`: Indicates if the first row is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`.
+* `column_labels`: Specifies column names for the header of the table. If `column_labels` are given and `header=true`, the headers given by `column_labels` will be used, and the first row of the table (containing headers) will be ignored.
+* `normalizenames`: Set to `true` to normalize column names to valid Julia identifiers. Default=`false`
### Save an Excel file
The following code saves any iterable table as an excel file:
-````julia
+
+```julia
using ExcelFiles
save("output.xlsx", it)
-````
-This will work as long as it is any of the types supported as sources in IterableTables.jl.
+```
+This will work as long as it is any of the types supported as sources in IterableTables.jl (such as a `DataFrame`).
+
+The `save` function takes a number of arguments and keywords:
+
+```julia
+ FileIO.save(
+ source::String;
+ [sheetname::String],
+ [overwrite::Bool]
+ )
+```
+
+#### Arguments:
+
+* `source`: The name of the file to be created on save.
+
+#### Keywords:
+
+* `sheetname`: Specify the sheetname to be used in the created file. By default, the sheetname will be `Sheet1`.
+* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false`.
### Using the pipe syntax
-``load`` also support the pipe syntax. For example, to load an Excel file into a ``DataFrame``, one can use the following code:
+The `load` and `save` functions also support the pipe syntax. For example, to load an Excel file into a `DataFrame`, one can use the following code:
-````julia
+```julia
using ExcelFiles, DataFrame
df = load("data.xlsx", "Sheet1") |> DataFrame
-````
+```
To save an iterable table, one can use the following form:
-````julia
+```julia
using ExcelFiles, DataFrame
df = # Aquire a DataFrame somehow
df |> save("output.xlsx")
-````
+```
-The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/queryverse/Query.jl) queries, for example one can easily load an Excel file, pipe it into a query, then pipe it to the ``save`` function to store the results in a new file.
+The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/queryverse/Query.jl) queries, for example one can easily load an Excel file, pipe it into a query, then pipe it to the `save` function to store the results in a new file.
diff --git a/data/TestData.xlsx b/data/TestData.xlsx
new file mode 100644
index 0000000..d188f4e
Binary files /dev/null and b/data/TestData.xlsx differ
diff --git a/docs/src/index.md b/docs/src/index.md
index e10b99d..1a79d63 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1 +1,124 @@
# Introduction
+
+This package provides support for Excel files under the
+[FileIO.jl](https://github.com/JuliaIO/FileIO.jl) package.
+
+It provides functionality to read simple tabular data from
+an Excel (.xlsx) file and to save simple tabular data to an
+Excel file.
+
+For more extensive functionality when reading and writing Excel files,
+consider using [XLSX.jl](https://felipenoris.github.io/XLSX.jl/stable/).
+Under the hood, `ExcelFiles.jl` uses the `XLSX.jl` functions `readtable`
+and `writetable`.
+
+# Usage
+
+## Load an Excel file
+
+To read an Excel file into a `DataFrame`, use the following julia code:
+
+```julia
+using ExcelFiles, DataFrames
+
+df = DataFrame(load("data.xlsx", "Sheet1"))
+```
+
+The call to `load` returns an object that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing an Excel file into data structures that are not a `DataFrame`:
+
+```julia
+using ExcelFiles, DataTables, IndexedTables, TimeSeries, Temporal, Gadfly
+
+# Load into a DataTable
+dt = DataTable(load("data.xlsx", "Sheet1"))
+
+# Load into an IndexedTable
+it = IndexedTable(load("data.xlsx", "Sheet1"))
+
+# Load into a TimeArray
+ta = TimeArray(load("data.xlsx", "Sheet1"))
+
+# Load into a TS
+ts = TS(load("data.xlsx", "Sheet1"))
+
+# Plot directly with Gadfly
+plot(load("data.xlsx", "Sheet1"), x=:a, y=:b, Geom.line)
+```
+
+The `load` function takes a number of arguments and keywords:
+
+```julia
+ FileIO.load(
+ source::String,
+ [sheet::String,
+ [columns::String]];
+ [first_row::Int],
+ [column_labels::Vector{String}],
+ [header::Bool],
+ [normalizenames::Bool]
+ )
+```
+
+### Arguments:
+
+* `source`: The name of the file to be loaded.
+* `sheet`: Specifies the sheet name to be loaded. If `sheet` is not given, the first Excel sheet in the file will be used.
+* `columns`: Determines which columns to read. For example, "B:D" will select columns B, C and D. If columns is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid sheet **must** be specified when specifying columns.
+
+### Keywords:
+
+* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet.
+* `header`: Indicates if the first row is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`.
+* `column_labels`: Specifies column names for the header of the table. If `column_labels` are given and `header=true`, the headers given by `column_labels` will be used, and the first row of the table (containing headers) will be ignored.
+* `normalizenames`: Set to `true` to normalize column names to valid Julia identifiers. Default=`false`.
+
+## Save an Excel file
+
+The following code saves any iterable table as an excel file:
+```julia
+using ExcelFiles
+
+save("output.xlsx", it)
+```
+This will work as long as it is any of the types supported as sources in IterableTables.jl (such as a `DataFrame`).
+
+The `save` function takes a number of arguments and keywords:
+
+```julia
+ FileIO.save(
+ source::String;
+ [sheetname::String],
+ [overwrite::Bool]
+ )
+```
+
+#### Arguments:
+
+* `source`: The name of the file to be created on save.
+
+#### Keywords:
+
+* `sheetname`: Specify the sheetname to be used in the created file. By default, the sheetname will be `Sheet1`.
+* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false`.
+
+## Using the pipe syntax
+
+The `load` and `save` functions also support the pipe syntax. For example, to load an Excel file into a `DataFrame`, one can use the following code:
+
+```julia
+using ExcelFiles, DataFrame
+
+df = load("data.xlsx", "Sheet1") |> DataFrame
+```
+
+To save an iterable table, one can use the following form:
+
+```julia
+using ExcelFiles, DataFrame
+
+df = # Aquire a DataFrame somehow
+
+df |> save("output.xlsx")
+```
+
+The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/queryverse/Query.jl) queries, for example one can easily load an Excel file, pipe it into a query, then pipe it to the `save` function to store the results in a new file.
diff --git a/src/ExcelFiles.jl b/src/ExcelFiles.jl
index 9b7eb6a..bce2a06 100644
--- a/src/ExcelFiles.jl
+++ b/src/ExcelFiles.jl
@@ -1,7 +1,7 @@
module ExcelFiles
-using ExcelReaders, XLSX, IteratorInterfaceExtensions, TableTraits, DataValues
+using XLSX, IteratorInterfaceExtensions, TableTraits, DataValues
using TableTraitsUtils, FileIO, TableShowUtils, Dates, Printf
import IterableTables
@@ -9,7 +9,8 @@ export load, save, File, @format_str
struct ExcelFile
filename::String
- range::String
+ sheet::Union{Nothing,String}
+ columns::Union{Nothing,String}
keywords
end
@@ -29,100 +30,43 @@ end
Base.Multimedia.showable(::MIME"application/vnd.dataresource+json", source::ExcelFile) = true
-function fileio_load(f::FileIO.File{FileIO.format"Excel"}, range; keywords...)
- return ExcelFile(f.filename, range, keywords)
+function fileio_load(f::FileIO.File{FileIO.format"Excel", String}, sheet, columns; kw...)
+ return ExcelFile(f.filename, sheet, columns, kw)
+end
+function fileio_load(f::FileIO.File{FileIO.format"Excel", String}, sheet; kw...)
+ return ExcelFile(f.filename, sheet, nothing, kw)
+end
+function fileio_load(f::FileIO.File{FileIO.format"Excel", String}; kw...)
+ return ExcelFile(f.filename, nothing, nothing, kw)
end
-function fileio_save(f::FileIO.File{FileIO.format"Excel"}, data; sheetname::AbstractString="")
+function fileio_save(f::FileIO.File{FileIO.format"Excel"}, data; kw...)
cols, colnames = TableTraitsUtils.create_columns_from_iterabletable(data, na_representation=:missing)
- return XLSX.writetable(f.filename, cols, colnames; sheetname=sheetname)
+ return XLSX.writetable(f.filename, cols, colnames; kw...)
end
IteratorInterfaceExtensions.isiterable(x::ExcelFile) = true
TableTraits.isiterabletable(x::ExcelFile) = true
-function gennames(n::Integer)
- res = Vector{Symbol}(undef, n)
- for i in 1:n
- res[i] = Symbol(@sprintf "x%d" i)
- end
- return res
-end
-
-function _readxl(file::ExcelReaders.ExcelFile, sheetname::AbstractString, startrow::Integer, startcol::Integer, endrow::Integer, endcol::Integer; header::Bool=true, colnames::Vector{Symbol}=Symbol[])
- data = ExcelReaders.readxl_internal(file, sheetname, startrow, startcol, endrow, endcol)
-
- nrow, ncol = size(data)
-
- if length(colnames) == 0
- if header
- headervec = data[1, :]
- NAcol = map(i -> isa(i, DataValues.DataValue) && DataValues.isna(i), headervec)
- headervec[NAcol] = gennames(count(!iszero, NAcol))
-
- # This somewhat complicated conditional makes sure that column names
- # that are integer numbers end up without an extra ".0" as their name
- colnames = [isa(i, AbstractFloat) ? ( modf(i)[1] == 0.0 ? Symbol(Int(i)) : Symbol(string(i)) ) : Symbol(i) for i in vec(headervec)]
+function _readxl(file::ExcelFile)
+ if isnothing(file.columns)
+ if isnothing(file.sheet)
+ table=XLSX.readtable(file.filename, "Sheet1"; file.keywords...)
else
- colnames = gennames(ncol)
+ table=XLSX.readtable(file.filename, file.sheet; file.keywords...)
end
- elseif length(colnames) != ncol
- error("Length of colnames must equal number of columns in selected range")
+ else
+ table=XLSX.readtable(file.filename, file.sheet, file.columns; file.keywords...)
end
-
- columns = Array{Any}(undef, ncol)
-
- for i = 1:ncol
- if header
- vals = data[2:end,i]
- else
- vals = data[:,i]
- end
-
- # Check whether all non-NA values in this column
- # are of the same type
- type_of_el = length(vals) > 0 ? typeof(vals[1]) : Any
- for val = vals
- type_of_el = promote_type(type_of_el, typeof(val))
- end
-
- if type_of_el <: DataValue
- columns[i] = convert(DataValueArray{eltype(type_of_el)}, vals)
-
- # TODO Check wether this hack is correct
- for (j, v) in enumerate(columns[i])
- if v isa DataValue && !DataValues.isna(v) && v[] isa DataValue
- columns[i][j] = v[]
- end
- end
- else
- columns[i] = convert(Array{type_of_el}, vals)
- end
+ colnames=Vector{Symbol}(undef, length(table.data))
+ for (k, v) in table.column_label_index
+ colnames[v] = Symbol(k)
end
-
- return columns, colnames
+ return table.data, colnames
end
function IteratorInterfaceExtensions.getiterator(file::ExcelFile)
- column_data, col_names = if occursin("!", file.range)
- excelfile = openxl(file.filename)
-
- sheetname, startrow, startcol, endrow, endcol = ExcelReaders.convert_ref_to_sheet_row_col(file.range)
-
- _readxl(excelfile, sheetname, startrow, startcol, endrow, endcol; file.keywords...)
- else
- excelfile = openxl(file.filename)
- sheet = excelfile.workbook.sheet_by_name(file.range)
-
- keywords = filter(i -> !(i[1] in (:header, :colnames)), file.keywords)
- startrow, startcol, endrow, endcol = ExcelReaders.convert_args_to_row_col(sheet; keywords...)
-
- keywords2 = copy(file.keywords)
- keywords2 = filter(i -> !(i[1] in (:skipstartrows, :skipstartcols, :nrows, :ncols)), file.keywords)
-
- _readxl(excelfile, file.range, startrow, startcol, endrow, endcol; keywords2...)
- end
-
+ column_data, col_names = _readxl(file)
return create_tableiterator(column_data, col_names)
end
diff --git a/test/runtests.jl b/test/runtests.jl
index d1d0372..0415b98 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,24 +1,36 @@
using ExcelFiles
-using ExcelReaders
using IteratorInterfaceExtensions
using TableTraits
using TableTraitsUtils
using Dates
+using XLSX
using DataValues
using DataFrames
using Test
+data_directory = joinpath(dirname(pathof(ExcelFiles)), "..", "data")
+@assert isdir(data_directory)
+
@testset "ExcelFiles" begin
- filename = normpath(dirname(pathof(ExcelReaders)), "..", "test", "TestData.xlsx")
+ filename = joinpath(data_directory, "TestData.xlsx")
efile = load(filename, "Sheet1")
- @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "
| Some Float64s | Some Strings | Some Bools | Mixed column | Mixed with NA | Float64 with NA | String with NA | Bool with NA | Some dates | Dates with NA | Some errors | Errors with NA | Column with NULL and then mixed |
|---|
| 1.0 | "A" | true | 2.0 | 9.0 | 3.0 | "FF" | #NA | 2015-03-03T00:00:00 | 1965-04-03T00:00:00 | #DIV/0! | #DIV/0! | #NA |
| 1.5 | "BB" | false | "EEEEE" | "III" | #NA | #NA | true | 2015-02-04T10:14:00 | 1950-08-09T18:40:00 | #N/A | #N/A | 3.4 |
| 2.0 | "CCC" | false | false | #NA | 3.5 | "GGG" | #NA | 1988-04-09T00:00:00 | 19:00:00 | #REF! | #NAME? | "HKEJW" |
| 2.5 | "DDDD" | true | 1.5 | true | 4.0 | "HHHH" | false | 15:02:00 | #NA | #NAME? | #NA | #NA |
"
+ # XLSX.jl v0.10.4
+ @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "| Some Float64s | Some Strings | Some Bools | Mixed column | Mixed with NA | Float64 with NA | String with NA | Bool with NA | Some dates | Dates with NA | Some errors | Errors with NA | Column with NULL and then mixed |
|---|
| 1 | "A" | true | 2 | 9 | 3 | "FF" | #NA | Date("2015-03-03") | Date("1965-04-03") | #NA | #NA | #NA |
| 1.5 | "BB" | false | "EEEEE" | "III" | #NA | #NA | true | 2015-02-04T10:14:00 | 1950-08-09T18:40:00 | #NA | #NA | 3.4 |
| 2 | "CCC" | false | false | #NA | 3.5 | "GGG" | #NA | Date("1988-04-09") | 19:00:00 | #NA | #NA | "HKEJW" |
| 2.5 | "DDDD" | true | 1.5 | true | 4 | "HHHH" | false | 15:02:00 | #NA | #NA | #NA | #NA |
"
+
+ # XLSX.jl v0.11.0 (default behaviour in `readtable` switches to `infer_eltypes=true` so the type eg. of Bools is inferred correctly)
+# @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "| Some Float64s | Some Strings | Some Bools | Mixed column | Mixed with NA | Float64 with NA | String with NA | Bool with NA | Some dates | Dates with NA | Some errors | Errors with NA | Column with NULL and then mixed |
|---|
| 1.0 | "A" | true | 2 | 9 | 3.0 | "FF" | #NA | 2015-03-03 | Date("1965-04-03") | #NA | #NA | #NA |
| 1.5 | "BB" | false | "EEEEE" | "III" | #NA | #NA | true | 2015-02-04T10:14:00 | 1950-08-09T18:40:00 | #NA | #NA | 3.4 |
| 2.0 | "CCC" | false | false | #NA | 3.5 | "GGG" | #NA | 1988-04-09 | 19:00:00 | #NA | #NA | "HKEJW" |
| 2.5 | "DDDD" | true | 1.5 | true | 4.0 | "HHHH" | false | 15:02:00 | #NA | #NA | #NA | #NA |
"
+
+ # XLSX.jl v0.10.4
+ @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"string\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"string\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"string\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"string\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2,\"Mixed with NA\":9,\"Float64 with NA\":3,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03\",\"Dates with NA\":\"1965-04-03\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09\",\"Dates with NA\":\"19:00:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}"
- @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"number\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"boolean\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"number\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"boolean\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1.0,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2.0,\"Mixed with NA\":9.0,\"Float64 with NA\":3.0,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03T00:00:00\",\"Dates with NA\":\"1965-04-03T00:00:00\",\"Some errors\":{\"errorcode\":7},\"Errors with NA\":{\"errorcode\":7},\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":{\"errorcode\":42},\"Errors with NA\":{\"errorcode\":42},\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2.0,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09T00:00:00\",\"Dates with NA\":\"19:00:00\",\"Some errors\":{\"errorcode\":23},\"Errors with NA\":{\"errorcode\":29},\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4.0,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":{\"errorcode\":29},\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}"
+ # XLSX.jl v0.11.0 (default behaviour in `readtable` switches to `infer_eltypes=true` so the type eg. of Bools is inferred correctly)
+# @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"number\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"boolean\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"number\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"boolean\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1.0,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2,\"Mixed with NA\":9,\"Float64 with NA\":3.0,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03\",\"Dates with NA\":\"1965-04-03\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2.0,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09\",\"Dates with NA\":\"19:00:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4.0,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}"
- @test sprint(show, efile) == "4x13 Excel file\nSome Float64s │ Some Strings │ Some Bools │ Mixed column │ Mixed with NA\n──────────────┼──────────────┼────────────┼──────────────┼──────────────\n1.0 │ A │ true │ 2.0 │ 9.0 \n1.5 │ BB │ false │ \"EEEEE\" │ \"III\" \n2.0 │ CCC │ false │ false │ #NA \n2.5 │ DDDD │ true │ 1.5 │ true \n... with 8 more columns: Float64 with NA, String with NA, Bool with NA, Some dates, Dates with NA, Some errors, Errors with NA, Column with NULL and then mixed"
+# This test is truncated (... with 8 more columns:) so probably isn't robust - although it passes locally.
+# @test sprint(show, efile) == "4x13 Excel file\nSome Float64s │ Some Strings │ Some Bools │ Mixed column │ Mixed with NA\n──────────────┼──────────────┼────────────┼──────────────┼──────────────\n1.0 │ A │ true │ 2 │ 9 \n1.5 │ BB │ false │ \"EEEEE\" │ \"III\" \n2.0 │ CCC │ false │ false │ #NA \n2.5 │ DDDD │ true │ 1.5 │ true \n... with 8 more columns: Float64 with NA, String with NA, Bool with NA, Some dates, Dates with NA, Some errors, Errors with NA, Column with NULL and then mixed"
@test TableTraits.isiterabletable(efile) == true
@test IteratorInterfaceExtensions.isiterable(efile) == true
@@ -27,7 +39,7 @@ using Test
@test isiterable(efile) == true
- full_dfs = [create_columns_from_iterabletable(load(filename, "Sheet1!C3:O7")), create_columns_from_iterabletable(load(filename, "Sheet1"))]
+ full_dfs = [create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=3)), create_columns_from_iterabletable(load(filename, "Sheet1"))]
for (df, names) in full_dfs
@test length(df) == 13
@test length(df[1]) == 4
@@ -42,16 +54,13 @@ using Test
@test df[8] == [NA, true, NA, false]
@test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), Date(1988, 4, 9), Dates.Time(15, 2, 0)]
@test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA]
- @test eltype(df[11]) == ExcelReaders.ExcelErrorCell
- @test df[12][1][] isa ExcelReaders.ExcelErrorCell
- @test df[12][2][] isa ExcelReaders.ExcelErrorCell
- @test df[12][3][] isa ExcelReaders.ExcelErrorCell
- @test df[12][4] == NA
+ @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()]
+ @test df[12] == [DataValue(), DataValue(), DataValue(), NA]
@test df[13] == [NA, 3.4, "HKEJW", NA]
end
- df, names = create_columns_from_iterabletable(load(filename, "Sheet1!C4:O7", header=false))
- @test names == [:x1,:x2,:x3,:x4,:x5,:x6,:x7,:x8,:x9,:x10,:x11,:x12,:x13]
+ df, names = create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=4, header=false))
+ @test names == [:C, :D, :E, :F, :G, :H, :I, :J, :K, :L, :M, :N, :O]
@test length(df[1]) == 4
@test length(df) == 13
@test df[1] == [1., 1.5, 2., 2.5]
@@ -64,19 +73,14 @@ using Test
@test df[8] == [NA, true, NA, false]
@test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)]
@test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA]
- @test isa(df[11][1], ExcelReaders.ExcelErrorCell)
- @test isa(df[11][2], ExcelReaders.ExcelErrorCell)
- @test isa(df[11][3], ExcelReaders.ExcelErrorCell)
- @test isa(df[11][4], ExcelReaders.ExcelErrorCell)
- @test isa(df[12][1][], ExcelReaders.ExcelErrorCell)
- @test isa(df[12][2][], ExcelReaders.ExcelErrorCell)
- @test isa(df[12][3][], ExcelReaders.ExcelErrorCell)
- @test DataValues.isna(df[12][4])
+ @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()]
+ @test df[12] == [DataValue(), DataValue(), DataValue(), NA]
@test df[13] == [NA, 3.4, "HKEJW", NA]
+ @test DataValues.isna(df[12][4])
good_colnames = [:c1, :c2, :c3, :c4, :c5, :c6, :c7, :c8, :c9, :c10, :c11, :c12, :c13]
- df, names = create_columns_from_iterabletable(load(filename, "Sheet1!C4:O7", header=false, colnames=good_colnames))
+ df, names = create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=4, header=false, column_labels=good_colnames))
@test names == good_colnames
@test length(df[1]) == 4
@test length(df) == 13
@@ -90,15 +94,10 @@ using Test
@test df[8] == [NA, true, NA, false]
@test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)]
@test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA]
- @test isa(df[11][1], ExcelReaders.ExcelErrorCell)
- @test isa(df[11][2], ExcelReaders.ExcelErrorCell)
- @test isa(df[11][3], ExcelReaders.ExcelErrorCell)
- @test isa(df[11][4], ExcelReaders.ExcelErrorCell)
- @test isa(df[12][1][], ExcelReaders.ExcelErrorCell)
- @test isa(df[12][2][], ExcelReaders.ExcelErrorCell)
- @test isa(df[12][3][], ExcelReaders.ExcelErrorCell)
- @test DataValues.isna(df[12][4])
+ @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()]
+ @test df[12] == [DataValue(), DataValue(), DataValue(), NA]
@test df[13] == [NA, 3.4, "HKEJW", NA]
+ @test DataValues.isna(df[12][4])
# Test for saving DataFrame to XLSX
input = (Day = ["Nov. 27","Nov. 28","Nov. 29"], Highest = [78,79,75]) |> DataFrame
@@ -114,7 +113,7 @@ using Test
@test input == output
rm("file.xlsx")
- df, names = create_columns_from_iterabletable(load(filename, "Sheet1", colnames=good_colnames))
+ df, names = create_columns_from_iterabletable(load(filename, "Sheet1"; column_labels=good_colnames))
@test names == good_colnames
@test length(df[1]) == 4
@test length(df) == 13
@@ -128,22 +127,25 @@ using Test
@test df[8] == [NA, true, NA, false]
@test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)]
@test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA]
- @test isa(df[11][1], ExcelReaders.ExcelErrorCell)
- @test isa(df[11][2], ExcelReaders.ExcelErrorCell)
- @test isa(df[11][3], ExcelReaders.ExcelErrorCell)
- @test isa(df[11][4], ExcelReaders.ExcelErrorCell)
- @test isa(df[12][1][], ExcelReaders.ExcelErrorCell)
- @test isa(df[12][2][], ExcelReaders.ExcelErrorCell)
- @test isa(df[12][3][], ExcelReaders.ExcelErrorCell)
- @test DataValues.isna(df[12][4])
+ @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()]
+ @test df[12] == [DataValue(), DataValue(), DataValue(), NA]
@test df[13] == [NA, 3.4, "HKEJW", NA]
+ @test DataValues.isna(df[12][4])
-# Too few colnames
- @test_throws ErrorException create_columns_from_iterabletable(load(filename, "Sheet1!C4:O7", header=true, colnames=[:c1, :c2, :c3, :c4]))
+# Too few column labels
+ # XLSX.jl v0.10.4
+ @test_throws AssertionError create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4]))
+
+ # XLSX.jl v0.11.0
+# @test_throws XLSX.XLSXError create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4]))
# Test for constructing DataFrame with empty header cell
- data, names = create_columns_from_iterabletable(load(filename, "Sheet2!C5:E7"))
- @test names == [:Col1, :x1, :Col3]
+ data, names = create_columns_from_iterabletable(load(filename, "Sheet2", "C:E"))
+ @test names == [:Col1, Symbol("#Empty"), :Col3]
+
+ # XLSX.jl v0.11.0. The `normalizenames` keyword not available in 0.10.4
+# data, names = create_columns_from_iterabletable(load(filename, "Sheet2", "C:E"; normalizenames=true))
+# @test names == [:Col1, :_Empty, :Col3]
end