From fa11174c1a1c5ad5eea9705d765698c434eeec31 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sun, 24 Sep 2023 16:37:39 -0600 Subject: [PATCH 01/55] Group: turn on testing --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index c01e615..e0e43f7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,7 +5,7 @@ using Test # include("example.jl") include("object.jl") include("raw.jl") - # include("group.jl") + include("group.jl") # include("file.jl") # include("dataset.jl") # include("attr.jl") From f830c73ab5a63047a0719dad79989813f97bcef6 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sun, 24 Sep 2023 16:37:53 -0600 Subject: [PATCH 02/55] Group: fix import in test --- test/group.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/group.jl b/test/group.jl index 1c05e7a..86f6d19 100644 --- a/test/group.jl +++ b/test/group.jl @@ -1,12 +1,14 @@ using Exdir using Test +import Exdir + include("support.jl") @testset "group_init" begin fx = setup_teardown_folder() - grp = Group( + grp = Exdir.Group( root_directory = fx.testdir, parent_path = "", object_name = "test_object", From 8532c7863baecc5332f49b46bbac38cd00dbdfd1 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sun, 24 Sep 2023 18:43:44 -0600 Subject: [PATCH 03/55] Group: WIP on iteration over group contents --- src/Exdir.jl | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 1a5c5a5..7322c38 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -292,12 +292,37 @@ function Base.getindex(grp::AbstractGroup, name::AbstractString) end end -function Base.iterate(grp::AbstractGroup) - () +mutable struct GroupIteratorState + base + root + current_base + next_name + itr +end + +# Iterate over all the objects in the group. +function Base.iterate(grp::AbstractGroup, state=nothing) + # assert_file_open(grp.file) + if isnothing(state) + itr = walkdir(grp.root_directory) + # The first object we want to return will be the first element of + # `dirs` and not "this" directory (the passed-in group). + (root, dirs, files) = first(itr) + next_name = first(dirs) + state = GroupIteratorState(grp, root, root, next_name, itr) + end + try + (root, dirs, files) = first(state.itr) + @assert files == [META_FILENAME] + obj = getindex(grp, this_name) + (obj, state) + catch + nothing + end end function Base.length(grp::AbstractGroup) - 0 + length(collect(grp)) end function delete!(grp::AbstractGroup, name::AbstractString) From be4dadf6361f77489625435f0e8ccfcd1542cff2 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Tue, 26 Sep 2023 16:52:45 -0600 Subject: [PATCH 04/55] Group: WIP on iteration over group contents --- src/Exdir.jl | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 7322c38..a727d23 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -293,7 +293,7 @@ function Base.getindex(grp::AbstractGroup, name::AbstractString) end mutable struct GroupIteratorState - base + base_grp root current_base next_name @@ -308,21 +308,31 @@ function Base.iterate(grp::AbstractGroup, state=nothing) # The first object we want to return will be the first element of # `dirs` and not "this" directory (the passed-in group). (root, dirs, files) = first(itr) - next_name = first(dirs) + if !isempty(dirs) + next_name = first(dirs) + else + return nothing + end state = GroupIteratorState(grp, root, root, next_name, itr) end - try + # `try` is to attempt getting the next item from the walkdir channel, + # which itself follows the iteration protocol. + if !isempty(state.itr) (root, dirs, files) = first(state.itr) @assert files == [META_FILENAME] - obj = getindex(grp, this_name) - (obj, state) - catch - nothing + obj = getindex(state.base_grp, state.next_name) + # TODO state.root = ? + # state.current_base + state.next_name = first(dirs) + return (obj, state) + else + return nothing end end function Base.length(grp::AbstractGroup) - length(collect(grp)) + # length(collect(grp)) + 0 end function delete!(grp::AbstractGroup, name::AbstractString) @@ -394,14 +404,6 @@ function Base.convert(::Type{Group}, file::File) ) end -function Base.iterate(::File) - ("hello", "world") -end - -function Base.iterate(::File, ::String) - nothing -end - function Base.print(io::IO, file::File) msg = "" print(io, msg) From a9672574e0acf5fd6d75aa1d555f57ee2406301d Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Thu, 5 Oct 2023 21:04:39 -0400 Subject: [PATCH 05/55] Group: iteration is shallow, not deep --- src/Exdir.jl | 131 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 101 insertions(+), 30 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index a727d23..b433659 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -248,6 +248,24 @@ end # nothing # end +function unsafe_dataset(grp::AbstractGroup, name) + Dataset( + root_directory = grp.root_directory, + parent_path = grp.relative_path, + object_name = name, + file = grp.file + ) +end + +function unsafe_group(grp::AbstractGroup, name) + Group( + root_directory = grp.root_directory, + parent_path = grp.relative_path, + object_name = name, + file = grp.file + ) +end + function Base.getindex(grp::AbstractGroup, name::AbstractString) assert_file_open(grp.file) path = name_to_asserted_group_path(name) @@ -283,50 +301,103 @@ function Base.getindex(grp::AbstractGroup, name::AbstractString) meta_data = YAML.load_file(meta_filename) typename = meta_data[EXDIR_METANAME][TYPE_METANAME] if typename == DATASET_TYPENAME - return _dataset(grp, name) + return unsafe_dataset(grp, name) elseif typename == GROUP_TYPENAME - return _group(grp, name) + return unsafe_group(grp, name) else error_string = "Object $name has data type $typename.\nWe cannot open objects of this type." throw(ArgumentError(error_string)) end end -mutable struct GroupIteratorState +struct GroupIteratorState + "Keep track of the base Group originally passed in" base_grp + "Unused" root + "Unused" current_base - next_name + "The current object (group, dset) name we are looking at" + current_obj_name + "Result of collect(walkdir(grp.root_directory))" itr + "Current index into itr" + index + "Fully-typed object" + obj +end + +# This is work on fully recursive iteration. + +# # Iterate over all the objects in the group. +# function Base.iterate(grp::AbstractGroup) +# itr = collect(walkdir(grp.root_directory)) +# # "This" directory (the passed-in group) will always be the first result +# # and we want to ignore it. +# if length(itr) < 2 +# return nothing +# end +# index = 2 +# (root, dirs, files) = itr[index] +# @assert startswith(root, grp.root_directory) +# @assert files == [META_FILENAME] +# len_prefix = length(grp.root_directory) +# current_obj_name = root[len_prefix + 1 : end] +# state = GroupIteratorState( +# grp, +# root, +# root, +# current_obj_name, +# itr, +# index + 1, +# getindex(grp, current_obj_name) +# ) +# item = state.obj.name +# (item, state) +# end + +# # Iterate over all the objects in the group. +# function Base.iterate(grp::AbstractGroup, state) +# # assert_file_open(grp.file) +# if state.index <= length(state.itr) +# (root, dirs, files) = state.itr[state.index] +# @assert startswith(root, grp.root_directory) +# @assert files == [META_FILENAME] +# len_prefix = length(grp.root_directory) +# current_obj_name = root[len_prefix + 1 : end] +# new_state = GroupIteratorState( +# state.base_grp, +# state.root, +# state.root, +# current_obj_name, +# state.itr, +# state.index + 1, +# getindex(state.base_grp, current_obj_name) +# ) +# item = new_state.obj.name +# (item, new_state) +# else +# nothing +# end +# end + +function Base.iterate(grp::AbstractGroup) + itr = walkdir(grp.root_directory) + (root, dirs, files) = first(itr) + @assert root == grp.root_directory + @assert files == [META_FILENAME] + if isempty(dirs) + nothing + else + (dirs[1], dirs[2:end]) + end end -# Iterate over all the objects in the group. -function Base.iterate(grp::AbstractGroup, state=nothing) - # assert_file_open(grp.file) - if isnothing(state) - itr = walkdir(grp.root_directory) - # The first object we want to return will be the first element of - # `dirs` and not "this" directory (the passed-in group). - (root, dirs, files) = first(itr) - if !isempty(dirs) - next_name = first(dirs) - else - return nothing - end - state = GroupIteratorState(grp, root, root, next_name, itr) - end - # `try` is to attempt getting the next item from the walkdir channel, - # which itself follows the iteration protocol. - if !isempty(state.itr) - (root, dirs, files) = first(state.itr) - @assert files == [META_FILENAME] - obj = getindex(state.base_grp, state.next_name) - # TODO state.root = ? - # state.current_base - state.next_name = first(dirs) - return (obj, state) +function Base.iterate(grp::AbstractGroup, dirs) + if isempty(dirs) + nothing else - return nothing + (dirs[1], dirs[2:end]) end end From 1ca48ba71d2943143a8592d0aa7ef68ba580046a Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Thu, 5 Oct 2023 23:11:50 -0400 Subject: [PATCH 06/55] Group: fix iterate and length --- src/Exdir.jl | 30 +++++++++--------------------- test/group.jl | 2 +- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index b433659..ca09de9 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -381,30 +381,18 @@ end # end # end -function Base.iterate(grp::AbstractGroup) - itr = walkdir(grp.root_directory) - (root, dirs, files) = first(itr) - @assert root == grp.root_directory - @assert files == [META_FILENAME] - if isempty(dirs) - nothing - else - (dirs[1], dirs[2:end]) - end -end - -function Base.iterate(grp::AbstractGroup, dirs) - if isempty(dirs) - nothing - else - (dirs[1], dirs[2:end]) +function Base.iterate(grp::AbstractGroup, dirs=nothing) + if isnothing(dirs) + grp_root = joinpath(grp.root_directory, grp.relative_path) + itr = walkdir(grp_root) + (root, dirs, files) = first(itr) + @assert root == grp_root + @assert files == [META_FILENAME] end + isempty(dirs) ? nothing : (dirs[1], dirs[2:end]) end -function Base.length(grp::AbstractGroup) - # length(collect(grp)) - 0 -end +Base.length(grp::AbstractGroup) = length(first(walkdir(joinpath(grp.root_directory, grp.relative_path)))[2]) function delete!(grp::AbstractGroup, name::AbstractString) nothing diff --git a/test/group.jl b/test/group.jl index 86f6d19..133be1a 100644 --- a/test/group.jl +++ b/test/group.jl @@ -47,7 +47,7 @@ end grp2 = create_group(grp, "a") - grp3 = create_group(grp2, "b") + grp3 = create_group(grp, "b") @test length(grp) == 2 @test length(grp2) == 0 From 6a306a3b271b6f680cab22d6afb9e8e44cd0252e Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Fri, 6 Oct 2023 14:29:32 -0400 Subject: [PATCH 07/55] file for testing --- test/dummy.jl | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 test/dummy.jl diff --git a/test/dummy.jl b/test/dummy.jl new file mode 100644 index 0000000..fcff6d7 --- /dev/null +++ b/test/dummy.jl @@ -0,0 +1,8 @@ +include("support.jl") +f = exdir_tmpfile() +g1 = create_group(f, "g1") +g2 = create_group(f, "g2") +g3 = create_group(f, "g3") +g4 = create_group(g2, "g4") +g5 = create_group(g2, "g5") +g6 = create_group(g3, "g6") From f9b8c251c7bf8cc314345b989fbc725caba48169 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 7 Oct 2023 10:54:10 -0400 Subject: [PATCH 08/55] Group: get --- src/Exdir.jl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index ca09de9..5709513 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -244,9 +244,13 @@ function Base.in(name::AbstractString, grp::AbstractGroup) end end -# function Base.get(grp::AbstractGroup, name::AbstractString) -# nothing -# end +function Base.get(grp::AbstractGroup, name::AbstractString, default=nothing) + if name in grp + grp[name] + else + default + end +end function unsafe_dataset(grp::AbstractGroup, name) Dataset( From ebb1da1ddf14aa3df9d6b73f257e5b58a7398c91 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 7 Oct 2023 17:10:55 -0400 Subject: [PATCH 09/55] Group: fix path formation --- src/Exdir.jl | 64 +++++++++++++++++++----------------------------- src/path.jl | 18 +++++++++++--- test/group.jl | 20 +++++++++++++++ test/path.jl | 38 ++++++++++++++++++++++++++++ test/runtests.jl | 1 + 5 files changed, 99 insertions(+), 42 deletions(-) create mode 100644 test/path.jl diff --git a/src/Exdir.jl b/src/Exdir.jl index 5709513..ee2f003 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -8,7 +8,6 @@ export create_dataset, create_group, create_raw, - delete!, exdiropen, IOError, is_nonraw_object_directory, @@ -36,8 +35,7 @@ struct Object <: AbstractObject name::String function Object(; root_directory, parent_path, object_name, file) - relative_path = joinpath(parent_path, object_name) - relative_path = if relative_path == "." "" else relative_path end + relative_path = form_relative_path(parent_path, object_name) name = "/" * relative_path new( root_directory, @@ -159,8 +157,7 @@ struct Raw <: AbstractObject name::String function Raw(; root_directory, parent_path, object_name, file=nothing) - relative_path = joinpath(parent_path, object_name) - relative_path = if relative_path == "." "" else relative_path end + relative_path = form_relative_path(parent_path, object_name) name = "/" * relative_path new( root_directory, @@ -215,8 +212,7 @@ struct Group <: AbstractGroup name::String function Group(; root_directory, parent_path, object_name, file=nothing) - relative_path = joinpath(parent_path, object_name) - relative_path = if relative_path == "." "" else relative_path end + relative_path = form_relative_path(parent_path, object_name) name = "/" * relative_path new( root_directory, @@ -398,7 +394,7 @@ end Base.length(grp::AbstractGroup) = length(first(walkdir(joinpath(grp.root_directory, grp.relative_path)))[2]) -function delete!(grp::AbstractGroup, name::AbstractString) +function Base.delete!(grp::AbstractGroup, name::AbstractString) nothing end @@ -419,8 +415,7 @@ struct File <: AbstractGroup user_mode::String function File(; root_directory, parent_path, object_name, file, user_mode) - relative_path = joinpath(parent_path, object_name) - relative_path = if relative_path == "." "" else relative_path end + relative_path = form_relative_path(parent_path, object_name) name = "/" * relative_path new( root_directory, @@ -472,7 +467,7 @@ function Base.print(io::IO, file::File) print(io, msg) end -function delete!(file::File, name::AbstractString) +function Base.delete!(file::File, name::AbstractString) nothing end @@ -645,41 +640,17 @@ end """ -function create_group(file::File, name::AbstractString) - path = remove_root(name) - _create_group(file, name) -end +create_group(file::File, name::AbstractString) = _create_group(file, remove_root(name)) """ create_group(grp, name) """ -function create_group(grp::Group, name::AbstractString) - _create_group(grp, name) -end - -# """ -# require_group(file, name) - +create_group(grp::Group, name::AbstractString) = _create_group(grp, name) -# """ -# function require_group(file::File, name::AbstractString) -# path = remove_root(name) -# Group( -# root_directory =, -# parent_path = , -# object_name = , -# file =, -# ) -# end - -""" - require_group(grp, name) - - -""" -function require_group(grp::Group, name::AbstractString) +function _require_group(grp, name::AbstractString) + # assert_file_open path = name_to_asserted_group_path(name) if length(splitpath(path)) > 1 @@ -704,6 +675,21 @@ function require_group(grp::Group, name::AbstractString) create_group(grp, name) end +""" + require_group(file, name) + + +""" +require_group(file::File, name::AbstractString) = _require_group(file, remove_root(name)) + +""" + require_group(grp, name) + + +""" +require_group(grp::Group, name::AbstractString) = _require_group(grp, name) + + function Base.write(dset::Dataset, data) error("unimplemented") end diff --git a/src/path.jl b/src/path.jl index 1aa8e5b..4de3f6f 100644 --- a/src/path.jl +++ b/src/path.jl @@ -1,5 +1,14 @@ +function clean_path(path::AbstractString) + path = normpath(path) + if isdirpath(path) + dirname(path) + else + path + end +end + function name_to_asserted_group_path(name::AbstractString) - path = name + path = clean_path(name) if isabspath(path) throw(ArgumentError("Absolute paths are currently not supported and unlikely to be implemented.")) elseif splitpath(path) == [""] @@ -9,11 +18,14 @@ function name_to_asserted_group_path(name::AbstractString) end function remove_root(path::AbstractString) + path = clean_path(path) components = splitpath(path) - rel = if components[1] == "/" + if components[1] == "/" joinpath(components[2:length(components)]) else path end - rel end + +form_relative_path(parent_path::AbstractString, object_name::AbstractString) = + joinpath(parent_path, object_name) |> clean_path diff --git a/test/group.jl b/test/group.jl index 133be1a..d35de43 100644 --- a/test/group.jl +++ b/test/group.jl @@ -25,6 +25,26 @@ include("support.jl") cleanup_fixture(fx) end +@testset "group_init_trailing" begin + fx = setup_teardown_folder() + + grp = Exdir.Group( + root_directory = fx.testdir, + parent_path = "", + object_name = "test_object2/", + file = nothing + ) + + @test grp.root_directory == fx.testdir + @test grp.object_name == "test_object2/" + @test grp.parent_path == "" + @test isnothing(grp.file) + @test grp.relative_path == "test_object2" + @test grp.name == "/test_object2" + + cleanup_fixture(fx) +end + @testset "group_create" begin (fx, f) = setup_teardown_file() diff --git a/test/path.jl b/test/path.jl new file mode 100644 index 0000000..78b68a8 --- /dev/null +++ b/test/path.jl @@ -0,0 +1,38 @@ +using Test + +import Exdir: + clean_path, + name_to_asserted_group_path, + remove_root, + form_relative_path + +@testset "path" begin + @testset "clean_path" begin + @test clean_path("/hello") == "/hello" + @test clean_path("/hello/") == "/hello" + @test clean_path("/hello///////") == "/hello" + @test clean_path("/hello////world///") == "/hello/world" + @test clean_path("./hello////world///") == "hello/world" + end + + # @testset "name_to_asserted_group_path" begin + + # end + + @testset "remove_root" begin + @test remove_root("hello") == "hello" + @test remove_root("/hello") == "hello" + @test remove_root("///hello") == "hello" + end + + @testset "form_relative_path" begin + @test form_relative_path(".", "citrus") == "citrus" + @test form_relative_path("./citrus", "") == "citrus" + @test form_relative_path("citrus", "lemon") == "citrus/lemon" + @test form_relative_path("./citrus", "lemon") == "citrus/lemon" + @test form_relative_path("./citrus", "lemon/") == "citrus/lemon" + @test form_relative_path("./citrus", "lemon/meyer/") == "citrus/lemon/meyer" + @test form_relative_path(".", "") == "" + @test form_relative_path("./", "") == "" + end +end diff --git a/test/runtests.jl b/test/runtests.jl index e0e43f7..ac75924 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,7 @@ using Test @testset "Exdir.jl" begin # include("example_hdf5.jl") # include("example.jl") + include("path.jl") include("object.jl") include("raw.jl") include("group.jl") From f7f472182a1ee052aa9f7c44c9750d0cf54c9cfd Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Mon, 9 Oct 2023 21:13:05 -0400 Subject: [PATCH 10/55] delete from groups --- src/Exdir.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index ee2f003..58b9d42 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -395,7 +395,11 @@ end Base.length(grp::AbstractGroup) = length(first(walkdir(joinpath(grp.root_directory, grp.relative_path)))[2]) function Base.delete!(grp::AbstractGroup, name::AbstractString) - nothing + @assert name in grp + @assert !isabspath(name) + path = joinpath(grp.root_directory, name) + @assert isdir(path) + rm(path, recursive=true) end struct IOError <: Exception @@ -467,10 +471,6 @@ function Base.print(io::IO, file::File) print(io, msg) end -function Base.delete!(file::File, name::AbstractString) - nothing -end - const EXTENSION = ".exdir" """ From 4cfd07b09566a2a4cf4e803ec15c7ebb1c7b02e6 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Tue, 10 Oct 2023 07:45:36 -0400 Subject: [PATCH 11/55] Group: fix forming inner path to delete --- src/Exdir.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 58b9d42..94d831a 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -397,7 +397,7 @@ Base.length(grp::AbstractGroup) = length(first(walkdir(joinpath(grp.root_directo function Base.delete!(grp::AbstractGroup, name::AbstractString) @assert name in grp @assert !isabspath(name) - path = joinpath(grp.root_directory, name) + path = joinpath(grp.root_directory, grp.relative_path, name) @assert isdir(path) rm(path, recursive=true) end From 94d6b251bca3ffc97ff4791018929e3e27e0c23d Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Tue, 10 Oct 2023 09:43:31 -0400 Subject: [PATCH 12/55] Group: keys and values --- src/Exdir.jl | 12 ++++++++---- test/group.jl | 50 +++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 94d831a..5b00cef 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -8,6 +8,7 @@ export create_dataset, create_group, create_raw, + delete_object, exdiropen, IOError, is_nonraw_object_directory, @@ -395,12 +396,16 @@ end Base.length(grp::AbstractGroup) = length(first(walkdir(joinpath(grp.root_directory, grp.relative_path)))[2]) function Base.delete!(grp::AbstractGroup, name::AbstractString) - @assert name in grp + if !in(name, grp) + # throw(KeyError("No such object '$(name)' in path '$(grp.name)'")) + throw(KeyError(name)) + end @assert !isabspath(name) path = joinpath(grp.root_directory, grp.relative_path, name) @assert isdir(path) rm(path, recursive=true) end +delete_object(grp::AbstractGroup, name::AbstractString) = delete!(grp, name) struct IOError <: Exception msg::String @@ -568,10 +573,9 @@ function Base.close(file::File) nothing end -function Base.keys(grp::AbstractGroup) -end - +Base.keys(grp::AbstractGroup) = collect(grp) Base.haskey(grp::AbstractGroup, name::AbstractString) = in(name, grp) +Base.values(grp::AbstractGroup) = [getindex(grp, key) for key in keys(grp)] function Base.setindex!(attrs::Attribute, value, name::AbstractString) end diff --git a/test/group.jl b/test/group.jl index d35de43..57ea016 100644 --- a/test/group.jl +++ b/test/group.jl @@ -93,6 +93,7 @@ end cleanup_fixture(fx) end +# Starting .create_group argument with /. @testset "group_create_absolute" begin (fx, f) = setup_teardown_file() @@ -111,6 +112,7 @@ end end @testset "group_create_intermediate" begin + # intermediate groups can be created automatically. (fx, f) = setup_teardown_file() grp = create_group(f, "test") @@ -129,6 +131,7 @@ end cleanup_fixture(fx) end +# Name conflic causes group creation to fail with ArgumentError. @testset "group_create_exception" begin (fx, f) = setup_teardown_file() @@ -142,6 +145,8 @@ end cleanup_fixture(fx) end +# Feature: Groups can be auto-created, or opened via .require_group +# Existing group is opened and returned. @testset "group_open_existing" begin (fx, f) = setup_teardown_file() @@ -158,19 +163,47 @@ end cleanup_fixture(fx) end +# Group is created if it doesn't exist. @testset "group_create" begin (fx, f) = setup_teardown_file() + grp = create_group(f, "test") + + grp2 = require_group(grp, "foo") + @test isa(grp2, Exdir.Group) + @test grp2.name == "/test/foo" + cleanup_fixture(fx) end +# Opening conflicting object results in TODOError. @testset "group_require_exception" begin (fx, f) = setup_teardown_file() + grp = create_group(f, "test") + + # grp.create_dataset("foo", (1,)) + + # with pytest.raises(TypeError): + # grp.require_group("foo") + cleanup_fixture(fx) end -# set_item_intermediatex +# TODO +# @testset "group_set_item_intermediate" begin +# (_, f) = setup_teardown_file() + +# group1 = create_group(f, "group1") +# group2 = create_group(group1, "group2") +# group3 = create_group(group2, "group3") +# f["group1/group2/group3/dataset"] = [1, 2, 3] + +# @test_ isa(f["group1/group2/group3/dataset"], Exdir.Dataset) +# @test f["group1/group2/group3/dataset"].data == [1, 2, 3] + +# cleanup_fixture(fx) +# end @testset "group_delete" begin (fx, f) = setup_teardown_file() @@ -182,6 +215,8 @@ end delete!(grp, "foo") @test !in("foo", grp) + # alias delete_object as in HDF5.jl + create_group(grp, "bar") @test in("bar", grp) @@ -200,6 +235,8 @@ end delete!(f, "test") @test !in("test", f) + # alias delete_object as in HDF5.jl + create_group(f, "test2") @test in("test2", f) @@ -216,25 +253,28 @@ end create_raw(grp, "foo") @test in("foo", grp) - # Julia dicts delete!(grp, "foo") @test !in("foo", grp) + # alias delete_object as in HDF5.jl + create_raw(grp, "bar") @test in("bar", grp) - # HDF5.jl delete_object(grp, "bar") @test !in("bar", grp) cleanup_fixture(fx) end +# Deleting non-existent object raises TODOError @testset "group_nonexisting" begin (fx, f) = setup_teardown_file() - # TODO - match = "No such object: 'foo' in path *" + grp = create_group(f, "test") + + # @test_throws "KeyError: No such object: 'foo' in path *" delete!(grp, "foo") + @test_throws KeyError delete!(grp, "foo") cleanup_fixture(fx) end From aed17d0c8671f9651a8507aeeec53f96dc3bbfee Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Tue, 10 Oct 2023 09:47:20 -0400 Subject: [PATCH 13/55] Nested testsets --- test/attr.jl | 3 +++ test/dataset.jl | 4 ++++ test/file.jl | 4 ++++ test/group.jl | 4 ++++ test/object.jl | 4 ++++ test/raw.jl | 4 ++++ 6 files changed, 23 insertions(+) diff --git a/test/attr.jl b/test/attr.jl index e69de29..24c8063 100644 --- a/test/attr.jl +++ b/test/attr.jl @@ -0,0 +1,3 @@ +@@testset "attr" begin + +end diff --git a/test/dataset.jl b/test/dataset.jl index ed5c735..e53b592 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -1,6 +1,8 @@ using Exdir using Test +@testset "dataset" begin + # Create a scalar dataset. @testset "dataset_create_scalar" begin # TODO fixture @@ -68,3 +70,5 @@ end # @testset "dataset_intermediate_group" begin # end + +end diff --git a/test/file.jl b/test/file.jl index 1d6c781..c0e8d80 100644 --- a/test/file.jl +++ b/test/file.jl @@ -21,6 +21,8 @@ function remove(name) end end +@testset "file" begin + @testset "form_location" begin @test form_location("/hello.exdir") == "/hello.exdir" @test form_location("/hello") == "/hello.exdir" @@ -478,3 +480,5 @@ end # assert isinstance(f, File) # assert not f + +end diff --git a/test/group.jl b/test/group.jl index 57ea016..d9a596b 100644 --- a/test/group.jl +++ b/test/group.jl @@ -5,6 +5,8 @@ import Exdir include("support.jl") +@testset "group" begin + @testset "group_init" begin fx = setup_teardown_folder() @@ -510,3 +512,5 @@ end cleanup_fixture(fx) end + +end diff --git a/test/object.jl b/test/object.jl index 338e0b9..a15a683 100644 --- a/test/object.jl +++ b/test/object.jl @@ -5,6 +5,8 @@ import Exdir: Object, open_object, ATTRIBUTES_FILENAME, META_FILENAME include("support.jl") +@testset "object" begin + @testset "object_init" begin fx = setup_teardown_folder() @@ -89,3 +91,5 @@ end cleanup_fixture(fx) end + +end diff --git a/test/raw.jl b/test/raw.jl index cc1bee4..7371830 100644 --- a/test/raw.jl +++ b/test/raw.jl @@ -5,6 +5,8 @@ import Exdir include("support.jl") +@testset "raw" begin + @testset "raw_init" begin fx = setup_teardown_folder() @@ -75,3 +77,5 @@ end @test ispath(joinpath(f.directory, "group", "dataset", "raw")) end + +end From 94a20b3c6dabff18401c4265231ec17548c39829 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Wed, 11 Oct 2023 22:18:31 -0400 Subject: [PATCH 14/55] Group: iteration over pairs --- test/group.jl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/group.jl b/test/group.jl index d9a596b..f49b3ae 100644 --- a/test/group.jl +++ b/test/group.jl @@ -451,10 +451,13 @@ end grpd = create_group(grp, "d") grpc = create_group(grp, "c") - # TODO - # for i, (key, value) in enumerate(grp.items()): - # assert key == names[i] - # assert value == groups[i] + names = ["a", "b", "c", "d"] + groups = [grpa, grpb, grpc, grpd] + + for (i, (k, v)) in enumerate(pairs(grp)) + @test k == names[i] + @test v == groups[i] + end cleanup_fixture(fx) end From 504b0ee81f1d84856c35c481850be4db624fe4f1 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Wed, 11 Oct 2023 22:47:26 -0400 Subject: [PATCH 15/55] Dataset: start testing --- src/Exdir.jl | 21 ++++++++------- test/dataset.jl | 67 +++++++++++++++++++++++++++++------------------- test/runtests.jl | 2 +- 3 files changed, 52 insertions(+), 38 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 5b00cef..c7b7f32 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -179,7 +179,9 @@ struct Dataset <: AbstractObject relative_path::String name::String - function Dataset(; root_directory, parent_path, object_name, file) + data + + function Dataset(; root_directory, parent_path, object_name, file, data) relative_path = joinpath(parent_path, object_name) relative_path = if relative_path == "." "" else relative_path end name = "/" * relative_path @@ -189,20 +191,16 @@ struct Dataset <: AbstractObject object_name, file, relative_path, - name + name, + data ) end end -function Base.iterate(dset::Dataset) - () -end - +Base.iterate(dset::Dataset) = iterate(dset.data) +Base.iterate(dset::Dataset, state) = iterate(dset.data, state) Base.length(dset::Dataset) = prod(size(dset)) - -function Base.size(dset::Dataset) - () -end +Base.size(dset::Dataset) = size(dset.data) struct Group <: AbstractGroup root_directory::String @@ -763,7 +761,8 @@ function create_dataset(grp::AbstractGroup, name::AbstractString; root_directory = grp.root_directory, parent_path = grp.relative_path, object_name = name, - file = grp.file + file = grp.file, + data = prepared_data ) # dataset._reset_data(prepared_data, attrs, None) # meta already set above dataset diff --git a/test/dataset.jl b/test/dataset.jl index e53b592..7f22c78 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -5,51 +5,66 @@ using Test # Create a scalar dataset. @testset "dataset_create_scalar" begin - # TODO fixture - f = exdiropen("dataset_create_scalar.exdir", "w") + (fx, f) = setup_teardown_file() + grp = create_group(f, "test") dset = create_dataset(grp, "foo"; shape=()) @test size(dset) == () # @test collect(dset) == 0 + + cleanup_fixture(fx) end -# # Create a size-1 dataset. +# Create a size-1 dataset. @testset "dataset_create_simple" begin - f = exdiropen("dataset_create_simple.exdir", "w") + (fx, f) = setup_teardown_file() + grp = create_group(f, "test") dset = create_dataset(grp, "foo"; shape=(1,)) @test size(dset) == (1,) + + cleanup_fixture(fx) end -# # Create an extended dataset. -# @testset "dataset_create_extended" begin -# f = exdiropen("dataset_create_extended.exdir", "w") -# grp = create_group(f, "test") +# Create an extended dataset. +@testset "dataset_create_extended" begin + (fx, f) = setup_teardown_file() -# dset = create_dataset(grp, "foo"; shape=(63,)) -# @test shape(dset) == (63,) -# @test length(dset) == 63 + grp = create_group(f, "test") -# dset = create_dataset(grp, "bar"; shape=(6, 10)) -# @test shape(dset) == (6, 10) -# @test length(dset) == 60 -# end + dset = create_dataset(grp, "foo"; shape=(63,)) + @test size(dset) == (63,) + @test length(dset) == 63 -# # Confirm that the default dtype is Float64. -# @testset "dataset_default_dtype" begin -# f = exdiropen("dataset_default_dtype.exdir", "w") -# grp = create_group(f, "test") + dset = create_dataset(grp, "bar"; shape=(6, 10)) + @test size(dset) == (6, 10) + @test length(dset) == 60 -# dset = create_dataset(grp, "foo"; shape=(63,)) -# @test isa(collect(dset), AbstractArray{Float64}) -# end + cleanup_fixture(fx) +end -# # Missing shape raises TypeError. -# @testset "dataset_missing_shape" begin - -# end +# Confirm that the default dtype is Float64. +@testset "dataset_default_dtype" begin + (fx, f) = setup_teardown_file() + + grp = create_group(f, "test") + + dset = create_dataset(grp, "foo"; shape=(63,)) + @test isa(collect(dset), AbstractArray{Float64}) + + cleanup_fixture(fx) +end + +# Missing shape raises TypeError. +@testset "dataset_missing_shape" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end # # Confirm that an alternate dtype can be specified. # @testset "dataset_short_int" begin diff --git a/test/runtests.jl b/test/runtests.jl index ac75924..9edf654 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,6 +8,6 @@ using Test include("raw.jl") include("group.jl") # include("file.jl") - # include("dataset.jl") + include("dataset.jl") # include("attr.jl") end From e049740384405f368c746d920d879d9112f29de4 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Thu, 12 Oct 2023 08:50:15 -0400 Subject: [PATCH 16/55] Dataset: stubs for remaining tests --- test/dataset.jl | 362 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 349 insertions(+), 13 deletions(-) diff --git a/test/dataset.jl b/test/dataset.jl index 7f22c78..a1508fc 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -61,29 +61,365 @@ end @testset "dataset_missing_shape" begin (fx, f) = setup_teardown_file() + @test_throws TypeError create_dataset(f, "foo") + + cleanup_fixture(fx) +end + +# Confirm that an alternate dtype can be specified. +@testset "dataset_short_int" begin + (fx, f) = setup_teardown_file() + + dset = create_dataset(f, "foo"; shape=(63,), dtype=Int16) + @test isa(collect(dset), AbstractArray{Int16}) + + cleanup_fixture(fx) +end + +# Create a scalar dataset from existing array. +@testset "dataset_create_scalar_data" begin + (fx, f) = setup_teardown_file() + + grp = create_group(f, "test") + + data = ones() + dset = create_dataset("foo"; data=data) + @test size(dset) == size(data) + + cleanup_fixture(fx) +end + +# Create an extended dataset from existing data. +@testset "dataset_create_extended_data" begin + (fx, f) = setup_teardown_file() + + grp = create_group(f, "test") + + data = ones(63) + dset = create_dataset("foo"; data=data) + @test size(dset) == size(data) + + cleanup_fixture(fx) +end + +@testset "dataset_intermediate_group" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_reshape" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_create" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_create_existing" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_shape_conflict" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_type_conflict" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_dtype_conflict" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_dtype_close" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_create_fillval" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_compound" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_exc" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_string" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_dtype" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_len" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_len_scalar" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_iter" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_iter_scalar" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_trailing_slash" begin + (fx, f) = setup_teardown_file() + # TODO - cleanup_fixture(fx) + cleanup_fixture(fx) +end + +@testset "dataset_compound" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_assign" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_set_data" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_eq_false" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_eq" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_mmap" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) end -# # Confirm that an alternate dtype can be specified. -# @testset "dataset_short_int" begin -# f = exdiropen("dataset_short_int.exdir", "w") +@testset "dataset_modify_view" begin + (fx, f) = setup_teardown_file() -# dset = create_dataset(f, "foo"; shape=(63,), dtype=Int16) -# @test isa(collect(dset), AbstractArray{Int16}) -# end + # TODO -# @testset "dataset_create_scalar_data" begin + cleanup_fixture(fx) +end -# end +@testset "dataset_single_index" begin + (fx, f) = setup_teardown_file() -# @testset "dataset_create_extended_data" begin + # TODO -# end + cleanup_fixture(fx) +end -# @testset "dataset_intermediate_group" begin +@testset "dataset_single_null" begin + (fx, f) = setup_teardown_file() -# end + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_scalar_index" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_scalar_null" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_compound_index" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_negative_stop" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_read" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_write_broadcast" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_write_element" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_write_slices" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_roundtrip" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_slice_zero_length_dimension" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_slice_other_dimension" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_slice_of_length_zero" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end + +@testset "dataset_modify_all" begin + (fx, f) = setup_teardown_file() + + # TODO + + cleanup_fixture(fx) +end end From 58fb6392bad5259959d1fbcec38d48225191edb2 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Thu, 12 Oct 2023 08:59:20 -0400 Subject: [PATCH 17/55] Dataset: add test bodies --- test/dataset.jl | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/test/dataset.jl b/test/dataset.jl index a1508fc..53a3837 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -11,6 +11,7 @@ using Test dset = create_dataset(grp, "foo"; shape=()) @test size(dset) == () + # TODO # @test collect(dset) == 0 cleanup_fixture(fx) @@ -102,22 +103,37 @@ end cleanup_fixture(fx) end +# Create dataset with missing intermediate groups. @testset "dataset_intermediate_group" begin (fx, f) = setup_teardown_file() - # TODO + # Trying to create intermediate groups that are absolute should fail just + # like when creating them on groups. + @test_throws NotImplementedError create_dataset(f, "/foo/bar/baz"; shape=(10, 10), dtype=Int32) + + ds = create_dataset(f, "foo/bar/baz"; shape=(10, 10), dtype=Int32) + @test isa(ds, Exdir.Dataset) + @test "/foo/bar/baz" in f cleanup_fixture(fx) end +# Create from existing data, and make it fit a new shape. @testset "dataset_reshape" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + data = collect(Float64, 1:30) + dset = create_dataset(grp, "foo"; shape=(10, 3), data=data) + @test size(dset) == (10, 3) + @test dset.data == reshape(data, (10, 3)) cleanup_fixture(fx) end +# Feature: Datasets can be created only if they don't exist in the file +# Create new dataset with no conflicts. @testset "dataset_create" begin (fx, f) = setup_teardown_file() From 76c066676bce7106f9a085664cf513d728cc2cd6 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Thu, 12 Oct 2023 12:26:47 -0400 Subject: [PATCH 18/55] Dataset: add test bodies --- test/dataset.jl | 56 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/test/dataset.jl b/test/dataset.jl index 53a3837..3e359ca 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -137,55 +137,97 @@ end @testset "dataset_create" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + dset = require_dataset(grp, "foo"; shape=(10, 3)) + @test isa(dset, Exdir.Dataset) + @test size(dset) == (10, 3) + + dset2 = require_dataset(grp, "bar"; data=(3, 10)) + dset3 = require_dataset(grp, "bar"; data=(4, 11)) + @test isa(dset2, Exdir.Dataset) + @test dset2[:] == (3, 10) + @test dset3[:] == (3, 10) + @test dset2 == dset3 cleanup_fixture(fx) end +# require_dataset yields existing dataset. @testset "dataset_create_existing" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + dset = require_dataset(grp, "foo"; shape=(10, 3), dtype=Float32) + dset2 = require_dataset(grp, "foo"; shape=(10, 3), dtype=Float32) + + @test dset == dset2 cleanup_fixture(fx) end +# require_dataset with shape conflict yields TypeError. @testset "dataset_shape_conflict" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + create_dataset(grp, "foo"; shape=(10, 3)) + @test_throws TypeError require_dataset(grp, "foo"; shape=(10, 4)) cleanup_fixture(fx) end +# require_dataset with object type conflict yields TypeError. @testset "dataset_type_conflict" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + create_group(grp, "foo") + @test_throws TypeError require_dataset(grp, "foo"; shape=(10, 3)) cleanup_fixture(fx) end +# require_dataset with dtype conflict (strict mode) yields TypeError. @testset "dataset_dtype_conflict" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + create_dataset(grp, "foo"; shape=(10, 3), dtype=Float64) + @test_throws TypeError require_dataset(grp, "foo"; shape=(10, 3), dtype=UInt8) cleanup_fixture(fx) end +# require_dataset with convertible type succeeds (non-strict mode)- @testset "dataset_dtype_close" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + dset = create_dataset(grp, "foo"; shape=(10, 3), dtype=Int32) + dset2 = create_dataset(grp, "foo"; shape=(10, 3), dtype=Int16, exact=false) + @test dset == dset2 + # TODO look at dset2.dtype? + @test eltype(dset2) == Int32 cleanup_fixture(fx) end +# Feature: Datasets can be created with fill value +# Fill value is reflected in dataset contents. @testset "dataset_create_fillval" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + dset = create_dataset(grp, "foo"; shape=(10,), fillvalue=4.0) + @test dset[1] == 4.0 + @test dset[8] == 4.0 cleanup_fixture(fx) end From c98a9b9c50fb769f802321db862d1afcf21b14d1 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Thu, 12 Oct 2023 12:40:03 -0400 Subject: [PATCH 19/55] Dataset: add test bodies --- test/dataset.jl | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/test/dataset.jl b/test/dataset.jl index 3e359ca..dea1750 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -232,7 +232,8 @@ end cleanup_fixture(fx) end -@testset "dataset_compound" begin +# Fill value works with compound types. +@testset "dataset_compound_fill" begin (fx, f) = setup_teardown_file() # TODO @@ -240,22 +241,31 @@ end cleanup_fixture(fx) end +# Bogus fill value raises TypeError. @testset "dataset_exc" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + @test_throws TypeError create_dataset(grp, "foo"; shape=(10,), dtype=Float32, fillvalue=Dict("a" => 2)) cleanup_fixture(fx) end +# Assignment of fixed-length byte string produces a fixed-length ASCII dataset @testset "dataset_string" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + dset = create_dataset(grp, "foo"; data="string") + # TODO assert dset.data == "string" cleanup_fixture(fx) end +# Feature: Dataset dtype is available as .dtype property +# Retrieve dtype from dataset. @testset "dataset_dtype" begin (fx, f) = setup_teardown_file() From f28f42f0e0a674da7465758de60fc39b7445d99c Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Fri, 13 Oct 2023 17:56:25 -0400 Subject: [PATCH 20/55] Dataset: start implementing fillvalue in method signatures --- src/Exdir.jl | 13 ++++++++----- test/dataset.jl | 8 ++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index c7b7f32..9b1f244 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -197,6 +197,7 @@ struct Dataset <: AbstractObject end end +Base.collect(dset::Dataset) = collect(dset.data) Base.iterate(dset::Dataset) = iterate(dset.data) Base.iterate(dset::Dataset, state) = iterate(dset.data, state) Base.length(dset::Dataset) = prod(size(dset)) @@ -710,7 +711,8 @@ end function create_dataset(grp::AbstractGroup, name::AbstractString; shape=nothing, dtype=nothing, - data=nothing) + data=nothing, + fillvalue=nothing) # https://github.com/CINPLA/exdir/blob/89c1d34a5ce65fefc09b6fe1c5e8fef68c494e75/exdir/core/group.py#L72 path = name_to_asserted_group_path(name) @@ -718,13 +720,13 @@ function create_dataset(grp::AbstractGroup, name::AbstractString; (parent, pname) = splitdir(path) subgroup = require_group(grp, parent) return create_dataset(subgroup, pname, - shape=shape, dtype=dtype, data=data) + shape=shape, dtype=dtype, data=data, fillvalue=fillvalue) end _assert_valid_name(name, grp) if isnothing(data) && isnothing(shape) - error("Cannot create dataset. Missing shape or data keyword.") + throw(ArgumentError("Cannot create dataset. Missing shape or data keyword.")) end (prepared_data, attrs, meta) = _prepare_write( @@ -745,8 +747,9 @@ function create_dataset(grp::AbstractGroup, name::AbstractString; if isnothing(shape) prepared_data = nothing else - # TODO fillvalue as kwarg - fillvalue = 0.0 + if isnothing(fillvalue) + fillvalue = 0.0 + end prepared_data = fill(dtype(fillvalue), shape) end end diff --git a/test/dataset.jl b/test/dataset.jl index dea1750..6958e30 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -58,11 +58,11 @@ end cleanup_fixture(fx) end -# Missing shape raises TypeError. +# Missing shape raises TypeError in Python, ArgumentError in Julia. @testset "dataset_missing_shape" begin (fx, f) = setup_teardown_file() - @test_throws TypeError create_dataset(f, "foo") + @test_throws ArgumentError create_dataset(f, "foo") cleanup_fixture(fx) end @@ -84,7 +84,7 @@ end grp = create_group(f, "test") data = ones() - dset = create_dataset("foo"; data=data) + dset = create_dataset(grp, "foo"; data=data) @test size(dset) == size(data) cleanup_fixture(fx) @@ -97,7 +97,7 @@ end grp = create_group(f, "test") data = ones(63) - dset = create_dataset("foo"; data=data) + dset = create_dataset(grp, "foo"; data=data) @test size(dset) == size(data) cleanup_fixture(fx) From 3e52a89b289101aa18bfc6b691eb9ed1724853fb Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Fri, 13 Oct 2023 19:37:10 -0400 Subject: [PATCH 21/55] Dataset: add test bodies --- src/Exdir.jl | 81 ++++++++++++++++++++++++++++++++++++++++--------- test/dataset.jl | 37 ++++++++++++++++++---- 2 files changed, 97 insertions(+), 21 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 9b1f244..7aa1ffd 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -12,6 +12,7 @@ export exdiropen, IOError, is_nonraw_object_directory, + require_dataset, require_group, require_raw, setattrs! @@ -182,8 +183,7 @@ struct Dataset <: AbstractObject data function Dataset(; root_directory, parent_path, object_name, file, data) - relative_path = joinpath(parent_path, object_name) - relative_path = if relative_path == "." "" else relative_path end + relative_path = form_relative_path(parent_path, object_name) name = "/" * relative_path new( root_directory, @@ -202,6 +202,8 @@ Base.iterate(dset::Dataset) = iterate(dset.data) Base.iterate(dset::Dataset, state) = iterate(dset.data, state) Base.length(dset::Dataset) = prod(size(dset)) Base.size(dset::Dataset) = size(dset.data) +Base.getindex(dset::Dataset, inds...) = getindex(dset.data, inds...) +Base.eltype(dset::Dataset) = eltype(dset.data) struct Group <: AbstractGroup root_directory::String @@ -771,21 +773,70 @@ function create_dataset(grp::AbstractGroup, name::AbstractString; dataset end -# function create_dataset(grp::AbstractGroup, name::AbstractString; -# shape::Dims, -# dtype::DataType) -# create_dataset(grp, name; shape=shape, dtype=dtype, data=nothing) -# end +function require_dataset(grp::AbstractGroup, name::AbstractString; + shape=nothing, + dtype=nothing, + exact::Bool=false, + data=nothing, + fillvalue=nothing) + assert_file_open(grp.file) + if !in(name, grp) + return create_dataset(grp, name, + shape=shape, dtype=dtype, data=data, fillvalue=fillvalue) + end -# function create_dataset(grp::AbstractGroup, name::AbstractString; -# data) -# create_dataset(grp, name; shape=size(data), dtype=eltype(data), data=data) -# end + current_object = grp[name] -# function create_dataset(grp::AbstractGroup, name::AbstractString; -# shape::Dims) -# create_dataset(grp, name; shape=shape, dtype=Float64, data=nothing) -# end + if !isa(current_object, Dataset) + throw( + TypeError( + require_dataset, + "Incompatible object already exists", + Dataset, + typeof(current_object) + ) + ) + end + + (data, attrs, meta) = _prepare_write(data, attrs=Dict(), meta=Dict()) + + # TODO verify proper attributes + + _assert_data_shape_dtype_match(data, shape, dtype) + (shape, dtype) = _data_to_shape_and_dtype(data, shape, dtype) + + shape_exist = size(current_object) + if shape != shape_exist + throw( + DimensionMismatch( + "Shapes do not match: existing $(shape_exist) vs. new $(shape)" + ) + ) + end + + dtype_exist = eltype(current_object) + if dtype != dtype_exist + if exact + throw( + TypeError( + require_dataset, + "Datatypes do not exactly match", + dtype_exist, + dtype + ) + ) + # if not np.can_cast(dtype, current_object.dtype): + # raise TypeError( + # "Cannot safely cast from {} to {}".format( + # dtype, + # current_object.dtype + # ) + # ) + end + end + + current_object +end function root_directory(path::AbstractString) # https://github.com/CINPLA/exdir/blob/89c1d34a5ce65fefc09b6fe1c5e8fef68c494e75/exdir/core/exdir_object.py#L128 diff --git a/test/dataset.jl b/test/dataset.jl index 6958e30..3d6e10a 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -259,25 +259,34 @@ end grp = create_group(f, "test") dset = create_dataset(grp, "foo"; data="string") - # TODO assert dset.data == "string" + @test dset.data == "string" cleanup_fixture(fx) end -# Feature: Dataset dtype is available as .dtype property +# Feature: Dataset dtype is available as .dtype property in Python, eltype in Julia # Retrieve dtype from dataset. @testset "dataset_dtype" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + dset = create_dataset(grp, "foo"; shape=(5,), dtype=UInt8) + @test eltype(dset) == UInt8 cleanup_fixture(fx) end +# Feature: Size of first axis is available via Python's len; +# For Julia, size(...) gives the full shape and length(...) gives the total number of elements. @testset "dataset_len" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + dset = create_dataset(grp, "foo"; shape=(312, 15)) + @test size(dset) == (312, 15) + @test length(dset) == 312 * 15 cleanup_fixture(fx) end @@ -285,15 +294,31 @@ end @testset "dataset_len_scalar" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + dset = create_dataset(grp, "foo"; data=1) + @test size(dset) == () + @test length(dset) == 1 cleanup_fixture(fx) end +# Feature: Iterating over a dataset yields rows in Python, which is idiomatic +# for NumPy, but yields scalars in Julia. @testset "dataset_iter" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + dtype = Float64 + data = reshape(collect(dtype, 1:30), (10, 3)) + dset = create_dataset(grp, "foo"; data=data) + for (x, y) in zip(dset, data) + @test isa(x, dtype) + @test length(x) == 1 + @test size(x) == () + @test x == y + end cleanup_fixture(fx) end From a1bb5d2d072ef14f41b881f7311d8d2b7aa3755d Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 14 Oct 2023 13:13:05 -0400 Subject: [PATCH 22/55] Start separate file for custom-defined exceptions --- src/Exdir.jl | 4 ++-- src/exceptions.jl | 3 +++ src/path.jl | 4 ++-- test/dataset.jl | 3 +++ test/group.jl | 12 +++++------- test/object.jl | 2 +- test/raw.jl | 2 +- 7 files changed, 17 insertions(+), 13 deletions(-) create mode 100644 src/exceptions.jl diff --git a/src/Exdir.jl b/src/Exdir.jl index 7aa1ffd..7f10d94 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -10,7 +10,6 @@ export create_raw, delete_object, exdiropen, - IOError, is_nonraw_object_directory, require_dataset, require_group, @@ -19,6 +18,7 @@ export include("consistency.jl") include("constants.jl") +include("exceptions.jl") include("mode.jl") include("path.jl") @@ -586,7 +586,7 @@ function defaultmetadata(typename::String) end makemetadata(typename::String) = YAML.write(defaultmetadata(typename)) -makemetadata(_::Object) = error("makemetadata not implemented for Exdir.Object") +makemetadata(_::Object) = throw(NotImplementedError("makemetadata not implemented for Exdir.Object")) makemetadata(_::Dataset) = makemetadata(DATASET_TYPENAME) makemetadata(_::Group) = makemetadata(GROUP_TYPENAME) makemetadata(_::File) = makemetadata(FILE_TYPENAME) diff --git a/src/exceptions.jl b/src/exceptions.jl new file mode 100644 index 0000000..ffe41c9 --- /dev/null +++ b/src/exceptions.jl @@ -0,0 +1,3 @@ +struct NotImplementedError <: Exception + msg::String +end diff --git a/src/path.jl b/src/path.jl index 4de3f6f..38eb5f7 100644 --- a/src/path.jl +++ b/src/path.jl @@ -10,9 +10,9 @@ end function name_to_asserted_group_path(name::AbstractString) path = clean_path(name) if isabspath(path) - throw(ArgumentError("Absolute paths are currently not supported and unlikely to be implemented.")) + throw(NotImplementedError("Absolute paths are currently not supported and unlikely to be implemented.")) elseif splitpath(path) == [""] - throw(ArgumentError("Getting an item on a group with path '$name' is not supported and unlikely to be implemented.")) + throw(NotImplementedError("Getting an item on a group with path '$(name)' is not supported and unlikely to be implemented.")) end path end diff --git a/test/dataset.jl b/test/dataset.jl index 3d6e10a..41f0c92 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -1,6 +1,8 @@ using Exdir using Test +import Exdir: NotImplementedError + @testset "dataset" begin # Create a scalar dataset. @@ -113,6 +115,7 @@ end ds = create_dataset(f, "foo/bar/baz"; shape=(10, 10), dtype=Int32) @test isa(ds, Exdir.Dataset) + # Checking for an absolute path in a file should work, though. @test "/foo/bar/baz" in f cleanup_fixture(fx) diff --git a/test/group.jl b/test/group.jl index f49b3ae..8492826 100644 --- a/test/group.jl +++ b/test/group.jl @@ -1,7 +1,7 @@ using Exdir using Test -import Exdir +import Exdir: NotImplementedError include("support.jl") @@ -101,7 +101,7 @@ end grp = create_group(f, "/a") - @test_throws ArgumentError create_group(grp, "/b") + @test_throws NotImplementedError create_group(grp, "/b") cleanup_fixture(fx) end @@ -313,7 +313,7 @@ end @test grp2.name == grp4.name @test grp2 == grp4 - @test_throws ArgumentError grp["/test"] + @test_throws NotImplementedError grp["/test"] cleanup_fixture(fx) end @@ -350,9 +350,7 @@ end @test in("b", grp) @test !in("c", grp) - # TODO - # @test_throws ArgumentError in("/b", grp) - @test_throws ArgumentError "/b" in grp + @test_throws NotImplementedError "/b" in grp cleanup_fixture(fx) end @@ -393,7 +391,7 @@ end grp = create_group(f, "test") - @test_throws ArgumentError "/" in grp + @test_throws NotImplementedError "/" in grp cleanup_fixture(fx) end diff --git a/test/object.jl b/test/object.jl index a15a683..0fb65a9 100644 --- a/test/object.jl +++ b/test/object.jl @@ -1,7 +1,7 @@ using Exdir using Test -import Exdir: Object, open_object, ATTRIBUTES_FILENAME, META_FILENAME +import Exdir: IOError, Object, open_object, ATTRIBUTES_FILENAME, META_FILENAME include("support.jl") diff --git a/test/raw.jl b/test/raw.jl index 7371830..f958d76 100644 --- a/test/raw.jl +++ b/test/raw.jl @@ -1,7 +1,7 @@ using Exdir using Test -import Exdir +import Exdir: IOError include("support.jl") From da345077dcafe06dfb6e85817d4cebbd31ef0fd7 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 14 Oct 2023 13:33:03 -0400 Subject: [PATCH 23/55] File: specialize in --- src/Exdir.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 7f10d94..7a117dd 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -456,9 +456,7 @@ function Base.getindex(file::File, name::AbstractString) end end -# function Base.in(name::AbstractString, file::File) -# false -# end +Base.in(name::AbstractString, file::File) = in(remove_root(name), convert(Group, file)) # MethodError: Cannot `convert` an object of type Exdir.File to an object of type Exdir.Group # Closest candidates are: From e0ddef215e9cd4a065d3db3378fde196a9e9df9e Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sun, 15 Oct 2023 23:25:17 -0400 Subject: [PATCH 24/55] Path: fix 'cleaning' and forming relative paths --- src/path.jl | 12 +++++------- test/path.jl | 4 ++-- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/path.jl b/src/path.jl index 38eb5f7..68bae25 100644 --- a/src/path.jl +++ b/src/path.jl @@ -1,7 +1,7 @@ function clean_path(path::AbstractString) path = normpath(path) - if isdirpath(path) - dirname(path) + if path[end] == '/' + path = path[1:end-1] else path end @@ -19,12 +19,10 @@ end function remove_root(path::AbstractString) path = clean_path(path) - components = splitpath(path) - if components[1] == "/" - joinpath(components[2:length(components)]) - else - path + if isabspath(path) + path = relpath(path, "/") end + path end form_relative_path(parent_path::AbstractString, object_name::AbstractString) = diff --git a/test/path.jl b/test/path.jl index 78b68a8..3c9b710 100644 --- a/test/path.jl +++ b/test/path.jl @@ -32,7 +32,7 @@ import Exdir: @test form_relative_path("./citrus", "lemon") == "citrus/lemon" @test form_relative_path("./citrus", "lemon/") == "citrus/lemon" @test form_relative_path("./citrus", "lemon/meyer/") == "citrus/lemon/meyer" - @test form_relative_path(".", "") == "" - @test form_relative_path("./", "") == "" + @test form_relative_path(".", "") == "." + @test form_relative_path("./", "") == "." end end From 7f5d55cc4b025c799b5192914377983babd820ea Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sun, 15 Oct 2023 23:59:57 -0400 Subject: [PATCH 25/55] Path: test name_to_asserted_group_path --- test/path.jl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/path.jl b/test/path.jl index 3c9b710..33a2c86 100644 --- a/test/path.jl +++ b/test/path.jl @@ -3,6 +3,7 @@ using Test import Exdir: clean_path, name_to_asserted_group_path, + NotImplementedError, remove_root, form_relative_path @@ -15,9 +16,12 @@ import Exdir: @test clean_path("./hello////world///") == "hello/world" end - # @testset "name_to_asserted_group_path" begin - - # end + @testset "name_to_asserted_group_path" begin + @test name_to_asserted_group_path("mything") == "mything" + @test name_to_asserted_group_path("mynested/thing") == "mynested/thing" + @test name_to_asserted_group_path("") == "." + @test_throws NotImplementedError name_to_asserted_group_path("/mynested/thing") + end @testset "remove_root" begin @test remove_root("hello") == "hello" From cd81cc7454e6b7688b570671e74b18ba9bdaabd6 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Mon, 16 Oct 2023 00:34:25 -0400 Subject: [PATCH 26/55] Dataset: add test bodies --- src/Exdir.jl | 8 ++++++++ test/dataset.jl | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 7a117dd..00580aa 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -197,6 +197,14 @@ struct Dataset <: AbstractObject end end +function Base.getproperty(dset::Dataset, sym::Symbol) + if sym == :dtype + return eltype(dset) + else + return getproperty(convert(Object, dset), sym) + end +end + Base.collect(dset::Dataset) = collect(dset.data) Base.iterate(dset::Dataset) = iterate(dset.data) Base.iterate(dset::Dataset, state) = iterate(dset.data, state) diff --git a/test/dataset.jl b/test/dataset.jl index 41f0c92..90cc3f1 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -27,6 +27,8 @@ end dset = create_dataset(grp, "foo"; shape=(1,)) @test size(dset) == (1,) + # TODO + # @test collect(dset) cleanup_fixture(fx) end @@ -215,8 +217,8 @@ end dset = create_dataset(grp, "foo"; shape=(10, 3), dtype=Int32) dset2 = create_dataset(grp, "foo"; shape=(10, 3), dtype=Int16, exact=false) @test dset == dset2 - # TODO look at dset2.dtype? @test eltype(dset2) == Int32 + @test dset2.dtype == Int32 cleanup_fixture(fx) end @@ -239,7 +241,12 @@ end @testset "dataset_compound_fill" begin (fx, f) = setup_teardown_file() + grp = create_group(f, "test") + # TODO + # dt = np.dtype([('a', 'f4'), ('b', 'i8')]) + # v = np.ones((1,), dtype=dt)[0] + # dset = grp.create_dataset('foo', (10,), dtype=dt, fillvalue=v) cleanup_fixture(fx) end @@ -326,25 +333,45 @@ end cleanup_fixture(fx) end +# Iterating over scalar dataset raises TypeError. @testset "dataset_iter_scalar" begin (fx, f) = setup_teardown_file() - # TODO + grp = create_group(f, "test") + + dset = create_dataset(grp, "foo"; shape=()) + @test_throws TypeError [x for x in dset] cleanup_fixture(fx) end +# Trailing slashes are unconditionally ignored. @testset "dataset_trailing_slash" begin (fx, f) = setup_teardown_file() - # TODO + f["dataset"] = 42 + @test "dataset/" in f cleanup_fixture(fx) end +# Feature: Compound types correctly round-trip +# Compound types are read back in correct order. @testset "dataset_compound" begin (fx, f) = setup_teardown_file() + grp = create_group(f, "test") + + struct dt + weight::Float64 + cputime::Float64 + walltime::Float64 + parents_offset::UInt32 + n_parents::UInt32 + status::UInt8 + endpoint_type::UInt8 + end + # TODO cleanup_fixture(fx) From 689c38744574cdb4291a94cc756fd9d003633db0 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 21 Oct 2023 17:48:12 -0400 Subject: [PATCH 27/55] Dataset: "fix" getting attributes --- src/Exdir.jl | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 00580aa..9328094 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -197,12 +197,46 @@ struct Dataset <: AbstractObject end end +function Base.convert(::Type{Object}, dset::Dataset) + Object(; + root_directory = dset.root_directory, + parent_path = dset.parent_path, + object_name = dset.object_name, + file = dset.file, + ) +end + function Base.getproperty(dset::Dataset, sym::Symbol) if sym == :dtype return eltype(dset) + # TODO + elseif sym == :directory + return joinpath(dset.root_directory, dset.relative_path) + elseif sym == :attrs + return Attribute() + elseif sym == :meta + return Attribute() + elseif sym == :attributes_filename + return joinpath(dset.directory, ATTRIBUTES_FILENAME) + elseif sym == :meta_filename + return joinpath(dset.directory, META_FILENAME) + elseif sym == :parent + if length(splitpath(dset.parent_path)) < 1 + return nothing + end + (parent_parent_path, parent_name) = splitdir(dset.parent_path) + return Group( + root_directory = dset.root_directory, + parent_path = parent_parent_path, + dsetect_name = parent_name, + file = dset.file, + ) else - return getproperty(convert(Object, dset), sym) + return getfield(dset, sym) end + # else + # return getproperty(convert(Object, dset), sym) + # end end Base.collect(dset::Dataset) = collect(dset.data) @@ -466,9 +500,6 @@ end Base.in(name::AbstractString, file::File) = in(remove_root(name), convert(Group, file)) -# MethodError: Cannot `convert` an object of type Exdir.File to an object of type Exdir.Group -# Closest candidates are: -# convert(::Type{T}, ::T) where T function Base.convert(::Type{Group}, file::File) Group(; root_directory = file.root_directory, From 82dd40dd35966c1303a161bb2d9a65b7c8d73cc7 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sun, 22 Oct 2023 15:08:26 -0400 Subject: [PATCH 28/55] TypeError takes a symbol, not a function --- src/Exdir.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 9328094..e433d90 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -827,7 +827,7 @@ function require_dataset(grp::AbstractGroup, name::AbstractString; if !isa(current_object, Dataset) throw( TypeError( - require_dataset, + :require_dataset, "Incompatible object already exists", Dataset, typeof(current_object) @@ -856,7 +856,7 @@ function require_dataset(grp::AbstractGroup, name::AbstractString; if exact throw( TypeError( - require_dataset, + :require_dataset, "Datatypes do not exactly match", dtype_exist, dtype From c38b02f23a9035cd5df9c4696d69dca894af9ea5 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sun, 22 Oct 2023 15:55:06 -0400 Subject: [PATCH 29/55] _assert_data_shape_dtype_match: all args allowed to be nothing --- src/Exdir.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index e433d90..65f6cb8 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -992,14 +992,14 @@ function _dataset(grp::AbstractGroup, name::AbstractString) ) end -function _assert_data_shape_dtype_match(data, shape::Dims, dtype) +function _assert_data_shape_dtype_match(data, shape::Union{Dims, Nothing}, dtype) if !isnothing(data) sz = size(data) - if prod(sz) != prod(shape) + if !isnothing(shape) && (prod(sz) != prod(shape)) error("Provided shape and size(data) do not match: $shape vs $sz") end et = eltype(data) - if et != dtype + if !isnothing(dtype) && (et != dtype) error("Provided dtype and eltype(data) do not match: $dtype vs $et") end end From accd6121760e946ef7b7c21ad9d1e6ba941afc64 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sun, 29 Oct 2023 17:56:32 -0400 Subject: [PATCH 30/55] AbstractGroup: implement setindex! --- src/Exdir.jl | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 65f6cb8..a3b9457 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -247,6 +247,27 @@ Base.size(dset::Dataset) = size(dset.data) Base.getindex(dset::Dataset, inds...) = getindex(dset.data, inds...) Base.eltype(dset::Dataset) = eltype(dset.data) +# TODO this may fail in a gross way if value is a group/file/other Exdir type. +# Can we restrict to scalars and arrays? +function Base.setindex!(grp::AbstractGroup, value, name::AbstractString) + assert_file_open(grp.file) + path = name_to_asserted_group_path(name) + parts = splitpath(path) + if length(parts) > 1 + grp[dirname(path)][basename(path)] = value + return nothing + end + if !in(name, grp) + create_dataset(grp, name; data=value) + return nothing + end + if !isa(grp[name], Dataset) + error("Unable to assign value, $(name) already exists") + end + grp[name].value = value + nothing +end + struct Group <: AbstractGroup root_directory::String parent_path::String @@ -297,7 +318,8 @@ function unsafe_dataset(grp::AbstractGroup, name) root_directory = grp.root_directory, parent_path = grp.relative_path, object_name = name, - file = grp.file + file = grp.file, + data = nothing, ) end @@ -750,6 +772,7 @@ end function create_dataset(grp::AbstractGroup, name::AbstractString; shape=nothing, dtype=nothing, + exact::Bool=false, data=nothing, fillvalue=nothing) # https://github.com/CINPLA/exdir/blob/89c1d34a5ce65fefc09b6fe1c5e8fef68c494e75/exdir/core/group.py#L72 @@ -819,7 +842,7 @@ function require_dataset(grp::AbstractGroup, name::AbstractString; assert_file_open(grp.file) if !in(name, grp) return create_dataset(grp, name, - shape=shape, dtype=dtype, data=data, fillvalue=fillvalue) + shape=shape, dtype=dtype, exact=exact, data=data, fillvalue=fillvalue) end current_object = grp[name] @@ -835,7 +858,7 @@ function require_dataset(grp::AbstractGroup, name::AbstractString; ) end - (data, attrs, meta) = _prepare_write(data, attrs=Dict(), meta=Dict()) + (data, attrs, meta) = _prepare_write(data, Dict(), Dict()) # TODO verify proper attributes From f6854d5f0a036e2100612e398afbe9131873f0a1 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sun, 29 Oct 2023 22:15:45 -0400 Subject: [PATCH 31/55] Test _data_to_shape_and_dtype --- src/consistency.jl | 1 + test/consistency.jl | 20 ++++++++++++++++++++ test/runtests.jl | 1 + 3 files changed, 22 insertions(+) create mode 100644 test/consistency.jl diff --git a/src/consistency.jl b/src/consistency.jl index 67577d4..7a0a452 100644 --- a/src/consistency.jl +++ b/src/consistency.jl @@ -7,6 +7,7 @@ function _data_to_shape_and_dtype(data, shape, dtype) if isnothing(dtype) dtype = eltype(data) end + _assert_data_shape_dtype_match(data, shape, dtype) return (shape, dtype) end if isnothing(dtype) diff --git a/test/consistency.jl b/test/consistency.jl new file mode 100644 index 0000000..c92ab58 --- /dev/null +++ b/test/consistency.jl @@ -0,0 +1,20 @@ +using Test + +import Exdir: + _data_to_shape_and_dtype + +@testset "consistency" begin + @testset "data_to_shape_and_dtype" begin + default_dtype = Float64 + dim = (2, 3) + x = rand(default_dtype, dim...) + z = x * (1 + 1im) + + @test _data_to_shape_and_dtype(nothing, nothing, nothing) == (nothing, default_dtype) + @test _data_to_shape_and_dtype(nothing, nothing, Int32) == (nothing, Int32) + @test _data_to_shape_and_dtype(nothing, dim, nothing) == (dim, default_dtype) + @test _data_to_shape_and_dtype(nothing, dim, ComplexF16) == (dim, ComplexF16) + @test _data_to_shape_and_dtype(x, nothing, nothing) == (dim, default_dtype) + @test _data_to_shape_and_dtype(z, nothing, nothing) == (dim, ComplexF64) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 9edf654..8c94878 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,7 @@ using Test # include("example_hdf5.jl") # include("example.jl") include("path.jl") + include("consistency.jl") include("object.jl") include("raw.jl") include("group.jl") From a9fe948a30a5f796ae39c4486469ce1e0ca7ca5f Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sun, 29 Oct 2023 22:40:43 -0400 Subject: [PATCH 32/55] Test for _data_to_shape_and_dtype arguments that should throw --- src/consistency.jl | 4 ++-- test/consistency.jl | 3 +++ test/group.jl | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/consistency.jl b/src/consistency.jl index 7a0a452..6f7a0cf 100644 --- a/src/consistency.jl +++ b/src/consistency.jl @@ -20,10 +20,10 @@ function _assert_data_shape_dtype_match(data, shape, dtype) # https://github.com/CINPLA/exdir/blob/89c1d34a5ce65fefc09b6fe1c5e8fef68c494e75/exdir/core/group.py#L39 if !isnothing(data) if !isnothing(shape) && (prod(shape) != prod(size(data))) - error("Provided shape and size(data) do not match") + throw(ArgumentError("Provided shape and size(data) do not match")) end if !isnothing(dtype) && (dtype != eltype(data)) - error("Provided dtype and eltype(data) do not match") + throw(ArgumentError("Provided dtype and eltype(data) do not match")) end end end diff --git a/test/consistency.jl b/test/consistency.jl index c92ab58..c4cf3a5 100644 --- a/test/consistency.jl +++ b/test/consistency.jl @@ -16,5 +16,8 @@ import Exdir: @test _data_to_shape_and_dtype(nothing, dim, ComplexF16) == (dim, ComplexF16) @test _data_to_shape_and_dtype(x, nothing, nothing) == (dim, default_dtype) @test _data_to_shape_and_dtype(z, nothing, nothing) == (dim, ComplexF64) + + @test_throws ArgumentError _data_to_shape_and_dtype(x, (5, 9), nothing) + @test_throws ArgumentError _data_to_shape_and_dtype(x, nothing, Float16) end end diff --git a/test/group.jl b/test/group.jl index 8492826..ed077df 100644 --- a/test/group.jl +++ b/test/group.jl @@ -133,7 +133,7 @@ end cleanup_fixture(fx) end -# Name conflic causes group creation to fail with ArgumentError. +# Name conflict causes group creation to fail with ArgumentError. @testset "group_create_exception" begin (fx, f) = setup_teardown_file() From 1256fb597008c796d765626cfbd3ce0a32aa54f7 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sun, 29 Oct 2023 23:16:13 -0400 Subject: [PATCH 33/55] Remove duplicate function --- src/Exdir.jl | 14 -------------- src/consistency.jl | 13 ++++++++----- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index a3b9457..495b23a 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -1015,18 +1015,4 @@ function _dataset(grp::AbstractGroup, name::AbstractString) ) end -function _assert_data_shape_dtype_match(data, shape::Union{Dims, Nothing}, dtype) - if !isnothing(data) - sz = size(data) - if !isnothing(shape) && (prod(sz) != prod(shape)) - error("Provided shape and size(data) do not match: $shape vs $sz") - end - et = eltype(data) - if !isnothing(dtype) && (et != dtype) - error("Provided dtype and eltype(data) do not match: $dtype vs $et") - end - end - nothing -end - end diff --git a/src/consistency.jl b/src/consistency.jl index 6f7a0cf..6bfb413 100644 --- a/src/consistency.jl +++ b/src/consistency.jl @@ -16,14 +16,17 @@ function _data_to_shape_and_dtype(data, shape, dtype) (shape, dtype) end -function _assert_data_shape_dtype_match(data, shape, dtype) +function _assert_data_shape_dtype_match(data, shape::Union{Dims, Nothing}, dtype) # https://github.com/CINPLA/exdir/blob/89c1d34a5ce65fefc09b6fe1c5e8fef68c494e75/exdir/core/group.py#L39 if !isnothing(data) - if !isnothing(shape) && (prod(shape) != prod(size(data))) - throw(ArgumentError("Provided shape and size(data) do not match")) + sz = size(data) + if !isnothing(shape) && (prod(sz) != prod(shape)) + throw(ArgumentError("Provided shape and size(data) do not match: $shape vs $sz")) end - if !isnothing(dtype) && (dtype != eltype(data)) - throw(ArgumentError("Provided dtype and eltype(data) do not match")) + et = eltype(data) + if !isnothing(dtype) && (et != dtype) + throw(ArgumentError("Provided dtype and eltype(data) do not match: $dtype vs $et")) end end + nothing end From 2f48f976b48d4b13ac46be787a62c4d8ac8e62b3 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Tue, 31 Oct 2023 17:49:53 -0400 Subject: [PATCH 34/55] Strings should not be 'collect'ed on write --- src/Exdir.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 495b23a..3ce1019 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -761,7 +761,7 @@ end function _prepare_write(data, attrs::AbstractDict, meta::AbstractDict) if isnothing(data) data = nothing - elseif !isa(data, AbstractArray) + elseif !isa(data, AbstractArray) && !isa(data, AbstractString) data = collect(data) end # If plugins were implemented, they would have been applied to attrs and From b860176f0d85b6cddb0bdcfaa2a9cba8ba0de2f5 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Wed, 8 Nov 2023 00:11:45 -0500 Subject: [PATCH 35/55] Dataset: first attempt at reading from and writing to disk --- src/Exdir.jl | 22 ++++++++++++++-------- src/constants.jl | 1 + 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 3ce1019..791568f 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -1,5 +1,6 @@ module Exdir +import NPZ import YAML export @@ -180,19 +181,19 @@ struct Dataset <: AbstractObject relative_path::String name::String - data - function Dataset(; root_directory, parent_path, object_name, file, data) relative_path = form_relative_path(parent_path, object_name) name = "/" * relative_path + @assert !isnothing(data) + data_filename = _dataset_filename(_directory(root_directory, relative_path)) + NPZ.npzwrite(data_filename, data) new( root_directory, parent_path, object_name, file, relative_path, - name, - data + name ) end end @@ -206,12 +207,18 @@ function Base.convert(::Type{Object}, dset::Dataset) ) end +_directory(root_directory, relative_path) = joinpath(root_directory, relative_path) +_directory(dset::Dataset) = _directory(dset.root_directory, dset.relative_path) +_dataset_filename(dset_directory::AbstractString) = joinpath(dset_directory, DSET_FILENAME) + function Base.getproperty(dset::Dataset, sym::Symbol) if sym == :dtype return eltype(dset) + elseif sym == :data + return NPZ.npzread(_dataset_filename(_directory(dset))) # TODO elseif sym == :directory - return joinpath(dset.root_directory, dset.relative_path) + return _directory(dset) elseif sym == :attrs return Attribute() elseif sym == :meta @@ -822,15 +829,14 @@ function create_dataset(grp::AbstractGroup, name::AbstractString; create_object_directory(joinpath(grp.directory, name), meta) - dataset = Dataset( + # TODO pass attrs, meta + return Dataset( root_directory = grp.root_directory, parent_path = grp.relative_path, object_name = name, file = grp.file, data = prepared_data ) - # dataset._reset_data(prepared_data, attrs, None) # meta already set above - dataset end function require_dataset(grp::AbstractGroup, name::AbstractString; diff --git a/src/constants.jl b/src/constants.jl index eaec39c..2e2186b 100644 --- a/src/constants.jl +++ b/src/constants.jl @@ -7,6 +7,7 @@ const VERSION_METANAME = "version" const META_FILENAME = "exdir.yaml" const ATTRIBUTES_FILENAME = "attributes.yaml" const RAW_FOLDER_NAME = "__raw__" +const DSET_FILENAME = "data.npy" # typenames const DATASET_TYPENAME = "dataset" From fdadd0d6fdca5f71023f38a1c98fae2c3abd1d85 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Thu, 9 Nov 2023 23:27:49 -0500 Subject: [PATCH 36/55] Dataset: fix reading/writing --- src/Exdir.jl | 37 +++++++++++++++++++------------------ test/dataset.jl | 8 ++++---- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 791568f..20b247d 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -181,12 +181,9 @@ struct Dataset <: AbstractObject relative_path::String name::String - function Dataset(; root_directory, parent_path, object_name, file, data) + function Dataset(; root_directory, parent_path, object_name, file) relative_path = form_relative_path(parent_path, object_name) name = "/" * relative_path - @assert !isnothing(data) - data_filename = _dataset_filename(_directory(root_directory, relative_path)) - NPZ.npzwrite(data_filename, data) new( root_directory, parent_path, @@ -215,7 +212,9 @@ function Base.getproperty(dset::Dataset, sym::Symbol) if sym == :dtype return eltype(dset) elseif sym == :data - return NPZ.npzread(_dataset_filename(_directory(dset))) + return NPZ.npzread(dset.data_filename) + elseif sym == :data_filename + return _dataset_filename(_directory(dset)) # TODO elseif sym == :directory return _directory(dset) @@ -246,12 +245,23 @@ function Base.getproperty(dset::Dataset, sym::Symbol) # end end +function Base.setproperty!(dset::Dataset, name::Symbol, value) + if name == :data + assert_file_open(dset.file) + (data, _, _) = _prepare_write(value, Dict(), Dict()) + NPZ.npzwrite(dset.data_filename, data) + else + error("Cannot set property $(name) on Datasets") + end +end + Base.collect(dset::Dataset) = collect(dset.data) Base.iterate(dset::Dataset) = iterate(dset.data) Base.iterate(dset::Dataset, state) = iterate(dset.data, state) Base.length(dset::Dataset) = prod(size(dset)) Base.size(dset::Dataset) = size(dset.data) Base.getindex(dset::Dataset, inds...) = getindex(dset.data, inds...) +Base.setindex!(dset::Dataset, val, inds...) = setindex!(dset.data, val, inds...) Base.eltype(dset::Dataset) = eltype(dset.data) # TODO this may fail in a gross way if value is a group/file/other Exdir type. @@ -326,7 +336,6 @@ function unsafe_dataset(grp::AbstractGroup, name) parent_path = grp.relative_path, object_name = name, file = grp.file, - data = nothing, ) end @@ -830,13 +839,14 @@ function create_dataset(grp::AbstractGroup, name::AbstractString; create_object_directory(joinpath(grp.directory, name), meta) # TODO pass attrs, meta - return Dataset( + dset = Dataset( root_directory = grp.root_directory, parent_path = grp.relative_path, object_name = name, - file = grp.file, - data = prepared_data + file = grp.file ) + dset.data = prepared_data + return dset end function require_dataset(grp::AbstractGroup, name::AbstractString; @@ -1012,13 +1022,4 @@ function _assert_valid_name(name::AbstractString, container) # container.file.name_validation(container.directory, name) end -function _dataset(grp::AbstractGroup, name::AbstractString) - Dataset( - root_directory = grp.root_directory, - parent_path = grp.relative_path, - object_name = name, - file = grp.file, - ) -end - end diff --git a/test/dataset.jl b/test/dataset.jl index 90cc3f1..05d2c39 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -151,8 +151,8 @@ end dset2 = require_dataset(grp, "bar"; data=(3, 10)) dset3 = require_dataset(grp, "bar"; data=(4, 11)) @test isa(dset2, Exdir.Dataset) - @test dset2[:] == (3, 10) - @test dset3[:] == (3, 10) + @test dset2[:] == [3, 10] + @test dset3[:] == [3, 10] @test dset2 == dset3 cleanup_fixture(fx) @@ -172,14 +172,14 @@ end cleanup_fixture(fx) end -# require_dataset with shape conflict yields TypeError. +# require_dataset with shape conflict yields TypeError in Python. @testset "dataset_shape_conflict" begin (fx, f) = setup_teardown_file() grp = create_group(f, "test") create_dataset(grp, "foo"; shape=(10, 3)) - @test_throws TypeError require_dataset(grp, "foo"; shape=(10, 4)) + @test_throws DimensionMismatch require_dataset(grp, "foo"; shape=(10, 4)) cleanup_fixture(fx) end From 3bea6670ac527b03f5876b384956b383cbd1dcbf Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Fri, 22 Dec 2023 16:59:20 -0500 Subject: [PATCH 37/55] debugging --- test/dataset.jl | 282 ++++++++++++++++++++++++------------------------ 1 file changed, 141 insertions(+), 141 deletions(-) diff --git a/test/dataset.jl b/test/dataset.jl index 05d2c39..0cf1604 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -208,20 +208,20 @@ end cleanup_fixture(fx) end -# require_dataset with convertible type succeeds (non-strict mode)- -@testset "dataset_dtype_close" begin - (fx, f) = setup_teardown_file() +# # require_dataset with convertible type succeeds (non-strict mode)- +# @testset "dataset_dtype_close" begin +# (fx, f) = setup_teardown_file() - grp = create_group(f, "test") +# grp = create_group(f, "test") - dset = create_dataset(grp, "foo"; shape=(10, 3), dtype=Int32) - dset2 = create_dataset(grp, "foo"; shape=(10, 3), dtype=Int16, exact=false) - @test dset == dset2 - @test eltype(dset2) == Int32 - @test dset2.dtype == Int32 +# dset = create_dataset(grp, "foo"; shape=(10, 3), dtype=Int32) +# dset2 = create_dataset(grp, "foo"; shape=(10, 3), dtype=Int16, exact=false) +# @test dset == dset2 +# @test eltype(dset2) == Int32 +# @test dset2.dtype == Int32 - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end # Feature: Datasets can be created with fill value # Fill value is reflected in dataset contents. @@ -333,17 +333,17 @@ end cleanup_fixture(fx) end -# Iterating over scalar dataset raises TypeError. -@testset "dataset_iter_scalar" begin - (fx, f) = setup_teardown_file() +# # Iterating over scalar dataset raises TypeError. +# @testset "dataset_iter_scalar" begin +# (fx, f) = setup_teardown_file() - grp = create_group(f, "test") +# grp = create_group(f, "test") - dset = create_dataset(grp, "foo"; shape=()) - @test_throws TypeError [x for x in dset] +# dset = create_dataset(grp, "foo"; shape=()) +# @test_throws TypeError [x for x in dset] - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end # Trailing slashes are unconditionally ignored. @testset "dataset_trailing_slash" begin @@ -355,194 +355,194 @@ end cleanup_fixture(fx) end -# Feature: Compound types correctly round-trip -# Compound types are read back in correct order. -@testset "dataset_compound" begin - (fx, f) = setup_teardown_file() +# # Feature: Compound types correctly round-trip +# # Compound types are read back in correct order. +# @testset "dataset_compound" begin +# (fx, f) = setup_teardown_file() - grp = create_group(f, "test") +# grp = create_group(f, "test") - struct dt - weight::Float64 - cputime::Float64 - walltime::Float64 - parents_offset::UInt32 - n_parents::UInt32 - status::UInt8 - endpoint_type::UInt8 - end +# struct dt +# weight::Float64 +# cputime::Float64 +# walltime::Float64 +# parents_offset::UInt32 +# n_parents::UInt32 +# status::UInt8 +# endpoint_type::UInt8 +# end - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_assign" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_assign" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_set_data" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_set_data" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_eq_false" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_eq_false" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_eq" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_eq" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_mmap" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_mmap" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_modify_view" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_modify_view" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_single_index" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_single_index" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_single_null" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_single_null" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_scalar_index" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_scalar_index" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_scalar_null" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_scalar_null" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_compound_index" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_compound_index" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_negative_stop" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_negative_stop" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_read" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_read" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_write_broadcast" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_write_broadcast" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_write_element" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_write_element" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_write_slices" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_write_slices" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_roundtrip" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_roundtrip" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_slice_zero_length_dimension" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_slice_zero_length_dimension" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_slice_other_dimension" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_slice_other_dimension" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_slice_of_length_zero" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_slice_of_length_zero" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end -@testset "dataset_modify_all" begin - (fx, f) = setup_teardown_file() +# @testset "dataset_modify_all" begin +# (fx, f) = setup_teardown_file() - # TODO +# # TODO - cleanup_fixture(fx) -end +# cleanup_fixture(fx) +# end end From 787448130fc2f180969275c13f9b975e767b1c94 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 4 May 2024 18:15:55 -0400 Subject: [PATCH 38/55] change support include --- test/file.jl | 2 +- test/group.jl | 2 +- test/object.jl | 2 +- test/raw.jl | 2 +- test/runtests.jl | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/test/file.jl b/test/file.jl index c0e8d80..d4c9174 100644 --- a/test/file.jl +++ b/test/file.jl @@ -7,7 +7,7 @@ import Exdir: create_object_directory, DATASET_TYPENAME, FILE_TYPENAME -include("support.jl") +# include("support.jl") """ remove(name) diff --git a/test/group.jl b/test/group.jl index ed077df..df60067 100644 --- a/test/group.jl +++ b/test/group.jl @@ -3,7 +3,7 @@ using Test import Exdir: NotImplementedError -include("support.jl") +# include("support.jl") @testset "group" begin diff --git a/test/object.jl b/test/object.jl index 0fb65a9..2530c43 100644 --- a/test/object.jl +++ b/test/object.jl @@ -3,7 +3,7 @@ using Test import Exdir: IOError, Object, open_object, ATTRIBUTES_FILENAME, META_FILENAME -include("support.jl") +# include("support.jl") @testset "object" begin diff --git a/test/raw.jl b/test/raw.jl index f958d76..affc000 100644 --- a/test/raw.jl +++ b/test/raw.jl @@ -3,7 +3,7 @@ using Test import Exdir: IOError -include("support.jl") +# include("support.jl") @testset "raw" begin diff --git a/test/runtests.jl b/test/runtests.jl index 8c94878..ac38e41 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ using Test @testset "Exdir.jl" begin + include("support.jl") # include("example_hdf5.jl") # include("example.jl") include("path.jl") From 45a63c38e793eb292158a77fce1846fc161f50f5 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:30:37 -0400 Subject: [PATCH 39/55] initial implementation of can_cast --- src/Exdir.jl | 1 + src/can_cast.jl | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ test/can_cast.jl | 19 ++++++++++++++++ test/runtests.jl | 1 + 4 files changed, 77 insertions(+) create mode 100644 src/can_cast.jl create mode 100644 test/can_cast.jl diff --git a/src/Exdir.jl b/src/Exdir.jl index 20b247d..f64fd9e 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -17,6 +17,7 @@ export require_raw, setattrs! +include("can_cast.jl") include("consistency.jl") include("constants.jl") include("exceptions.jl") diff --git a/src/can_cast.jl b/src/can_cast.jl new file mode 100644 index 0000000..cc78bca --- /dev/null +++ b/src/can_cast.jl @@ -0,0 +1,56 @@ +# Returns True if cast between data types can occur according to the +# casting rule. If from is a scalar or array scalar, also returns +# True if the scalar value can be cast without overflow or truncation +# to an integer. +# +# Parameters +# ---------- +# from_ : dtype, dtype specifier, scalar, or array +# Data type, scalar, or array to cast from. +# to : dtype or dtype specifier +# Data type to cast to. +# casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional +# Controls what kind of data casting may occur. +# +# * 'no' means the data types should not be cast at all. +# * 'equiv' means only byte-order changes are allowed. +# * 'safe' means only casts which can preserve values are allowed. +# * 'same_kind' means only safe casts or casts within a kind, +# like float64 to float32, are allowed. +# * 'unsafe' means any data conversions may be done. +# +# Returns +# ------- +# out : bool +# True if cast can occur according to the casting rule. +# +# Notes +# ----- +# .. versionchanged:: 1.17.0 +# Casting between a simple data type and a structured one is possible only +# for "unsafe" casting. Casting to multiple fields is allowed, but +# casting from multiple fields is not. +# +# .. versionchanged:: 1.9.0 +# Casting from numeric to string types in 'safe' casting mode requires +# that the string dtype length is long enough to store the maximum +# integer/float value converted. +# +# See also +# -------- +# dtype, result_type +function can_cast(dtype_from::DataType, dtype_to::DataType, casting="safe") + if casting != "safe" + throw( + ArgumentError( + "can't handle anything other than safe casting for now" + ) + ) + end + if dtype_from == dtype_to + return true + elseif supertype(dtype_from) == supertype(dtype_to) + return sizeof(dtype_from) <= sizeof(dtype_to) + end + return false +end diff --git a/test/can_cast.jl b/test/can_cast.jl new file mode 100644 index 0000000..994605b --- /dev/null +++ b/test/can_cast.jl @@ -0,0 +1,19 @@ +using Test + +import Exdir: + can_cast + +@testset "can_cast" begin + # safe + @test can_cast(Int32, Int32) + @test can_cast(Int32, Int64) + @test !can_cast(Int64, Int32) + + @test can_cast(Float32, Float32) + @test can_cast(Float32, Float64) + @test !can_cast(Float64, Float32) + + @test can_cast(UInt32, UInt32) + @test can_cast(UInt32, UInt64) + @test !can_cast(UInt64, UInt32) +end diff --git a/test/runtests.jl b/test/runtests.jl index ac38e41..92cd08c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,7 @@ using Test @testset "Exdir.jl" begin include("support.jl") + include("can_cast.jl") # include("example_hdf5.jl") # include("example.jl") include("path.jl") From 002ef8184e8372ef7ade03468ca5883e981d5f36 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:31:01 -0400 Subject: [PATCH 40/55] use can_cast in require_dataset --- src/Exdir.jl | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index f64fd9e..250fba6 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -902,13 +902,16 @@ function require_dataset(grp::AbstractGroup, name::AbstractString; dtype ) ) - # if not np.can_cast(dtype, current_object.dtype): - # raise TypeError( - # "Cannot safely cast from {} to {}".format( - # dtype, - # current_object.dtype - # ) - # ) + end + if !can_cast(dtype, dtype_exist) + throw( + TypeError( + :require_dataset, + "Cannot safely cast", + dtype_exist, + dtype + ) + ) end end From 660e8ad83db53a96bca54d5798f4313440577948 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:32:38 -0400 Subject: [PATCH 41/55] add EllipsisNotation and StructArrays for testing --- Project.toml | 4 +++- test/dataset.jl | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 5db1f04..dc3cc47 100644 --- a/Project.toml +++ b/Project.toml @@ -11,8 +11,10 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" julia = "1" [extras] +EllipsisNotation = "da5c29d0-fa7d-589e-88eb-ea29b0a81949" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["HDF5", "Test"] +test = ["EllipsisNotation", "HDF5", "StructArrays", "Test"] diff --git a/test/dataset.jl b/test/dataset.jl index 0cf1604..9529a75 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -1,4 +1,6 @@ +using EllipsisNotation using Exdir +using StructArrays using Test import Exdir: NotImplementedError From f486e45a1752229030502d36503f520c2aa731c0 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:34:27 -0400 Subject: [PATCH 42/55] Dataset tests --- src/Exdir.jl | 9 +- test/dataset.jl | 241 ++++++++++++++++++++++++++++++++++++------------ 2 files changed, 189 insertions(+), 61 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 250fba6..4b61a5f 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -264,6 +264,9 @@ Base.size(dset::Dataset) = size(dset.data) Base.getindex(dset::Dataset, inds...) = getindex(dset.data, inds...) Base.setindex!(dset::Dataset, val, inds...) = setindex!(dset.data, val, inds...) Base.eltype(dset::Dataset) = eltype(dset.data) +Base.firstindex(::Dataset) = 1 +# FIXME +Base.lastindex(::Dataset) = 1 # TODO this may fail in a gross way if value is a group/file/other Exdir type. # Can we restrict to scalars and arrays? @@ -829,7 +832,7 @@ function create_dataset(grp::AbstractGroup, name::AbstractString; if isnothing(fillvalue) fillvalue = 0.0 end - prepared_data = fill(dtype(fillvalue), shape) + prepared_data = fill(convert(dtype, fillvalue), shape) end end @@ -850,6 +853,10 @@ function create_dataset(grp::AbstractGroup, name::AbstractString; return dset end +""" + +Open an existing dataset or create it if it does not exist. +""" function require_dataset(grp::AbstractGroup, name::AbstractString; shape=nothing, dtype=nothing, diff --git a/test/dataset.jl b/test/dataset.jl index 9529a75..621d0a7 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -146,6 +146,8 @@ end grp = create_group(f, "test") + # def require_dataset(self, name, shape=None, dtype=None, exact=False, + # data=None, fillvalue=None): dset = require_dataset(grp, "foo"; shape=(10, 3)) @test isa(dset, Exdir.Dataset) @test size(dset) == (10, 3) @@ -245,10 +247,12 @@ end grp = create_group(f, "test") - # TODO - # dt = np.dtype([('a', 'f4'), ('b', 'i8')]) - # v = np.ones((1,), dtype=dt)[0] - # dset = grp.create_dataset('foo', (10,), dtype=dt, fillvalue=v) + struct dt + a::Float32 + b::Int64 + end + v = StructArray{dt}((a = ones(1), b = ones(1)))[1] + dset = create_dataset(grp, "foo"; shape=(10,), dtype=dt, fillvalue=v) cleanup_fixture(fx) end @@ -374,7 +378,23 @@ end # endpoint_type::UInt8 # end -# # TODO + # TODO + # lo = 0 + # hi = 100 + # d = MappedDistribution(dt, + # Uniform(lo, hi), + # Uniform(lo, hi), + # Uniform(lo, hi), + # Uniform(lo, hi), + # Uniform(lo, hi), + # Uniform(lo, hi), + # Uniform(lo, hi) + # ) + + # dim = 16 + # testdata = StructArray{dt}(undef, dim) + # Random.rand!(testdata) + # testdata *= 100 # cleanup_fixture(fx) # end @@ -387,29 +407,44 @@ end # cleanup_fixture(fx) # end -# @testset "dataset_set_data" begin -# (fx, f) = setup_teardown_file() +# Set data works correctly. +@testset "dataset_set_data" begin + (fx, f) = setup_teardown_file() -# # TODO + # TODO + # grp = create_group(f, "test") -# cleanup_fixture(fx) -# end + # testdata = ones(10, 2) + # grp["testdata"] = testdata -# @testset "dataset_eq_false" begin -# (fx, f) = setup_teardown_file() + cleanup_fixture(fx) +end -# # TODO +@testset "dataset_eq_false" begin + (fx, f) = setup_teardown_file() -# cleanup_fixture(fx) -# end + grp = create_group(f, "test") -# @testset "dataset_eq" begin -# (fx, f) = setup_teardown_file() + dset = create_dataset(grp, "foo"; data=1) + dset2 = create_dataset(grp, "foobar"; shape=(2, 2)) -# # TODO + @test dset != dset2 + @test dset != 2 -# cleanup_fixture(fx) -# end + cleanup_fixture(fx) +end + +@testset "dataset_eq" begin + (fx, f) = setup_teardown_file() + + grp = create_group(f, "test") + + dset = create_dataset(grp, "foo"; data=ones(2, 2)) + + @test dset == dset + + cleanup_fixture(fx) +end # @testset "dataset_mmap" begin # (fx, f) = setup_teardown_file() @@ -483,68 +518,154 @@ end # cleanup_fixture(fx) # end -# @testset "dataset_write_broadcast" begin -# (fx, f) = setup_teardown_file() +# Array fill from constant is supported. +@testset "dataset_write_broadcast" begin + (fx, f) = setup_teardown_file() -# # TODO + dt = Int8 + shape = (10,) + c = 42 -# cleanup_fixture(fx) -# end + dset = create_dataset(f, "x"; shape=shape, dtype=dt) + dset[..] .= c -# @testset "dataset_write_element" begin -# (fx, f) = setup_teardown_file() + data = ones(dt, shape...) * c -# # TODO + @test eltype(dset) == eltype(data) + @test isequal(dset.data, data) -# cleanup_fixture(fx) -# end + cleanup_fixture(fx) +end -# @testset "dataset_write_slices" begin -# (fx, f) = setup_teardown_file() +# Write a single element to the array. +@testset "dataset_write_element" begin + (fx, f) = setup_teardown_file() -# # TODO + dt = Float16 + dset = create_dataset(f, "x"; shape=(10, 3), dtype=dt) -# cleanup_fixture(fx) -# end + data = dt.([1, 2, 3.0]) + dset[5] = data -# @testset "dataset_roundtrip" begin -# (fx, f) = setup_teardown_file() + out = dset[5] + @test eltype(out) == eltype(data) + @test isequal(out, data) -# # TODO + cleanup_fixture(fx) +end -# cleanup_fixture(fx) -# end +# Write slices to array type. +@testset "dataset_write_slices" begin + (fx, f) = setup_teardown_file() -# @testset "dataset_slice_zero_length_dimension" begin -# (fx, f) = setup_teardown_file() + dt = Int32 + data1 = ones(dt, 2, 3) + data2 = ones(dt, 4, 5, 3) -# # TODO + dset = create_dataset(f, "x"; shape=(10, 9, 11), dtype=dt) -# cleanup_fixture(fx) -# end + dset[1, 1, 3:4] = data1 + @test eltype(dset[1, 1, 3:4]) == eltype(data1) + @test isequal(dset[1, 1, 3:4], data1) -# @testset "dataset_slice_other_dimension" begin -# (fx, f) = setup_teardown_file() + dset[4, 2:5, 7:11] = data2 + @test eltype(dset[4, 2:5, 7:11]) == eltype(data2) + @test isequal(dset[4, 2:5, 7:11], data2) -# # TODO + cleanup_fixture(fx) +end -# cleanup_fixture(fx) -# end +# Read the contents of an array and write them back. +# +# The initialization is not the same as in Python, since NumPy allows for +# fancy dtypes where Julia could resort to structs without the array having a +# dtype of object. Use the third-party package StructArrays to efficiently +# emulate this. +@testset "dataset_roundtrip" begin + (fx, f) = setup_teardown_file() -# @testset "dataset_slice_of_length_zero" begin -# (fx, f) = setup_teardown_file() + data = rand(10) + dset = create_dataset(f, "x"; data=data) -# # TODO + out = dset[..] + @test out == data + dset[..] = out + @test dset[..] == out + @test dset[..] == data -# cleanup_fixture(fx) -# end + cleanup_fixture(fx) +end -# @testset "dataset_modify_all" begin -# (fx, f) = setup_teardown_file() +# Slice a dataset with a zero in its shape vector along the zero-length +# dimension. +@testset "dataset_slice_zero_length_dimension" begin + (fx, f) = setup_teardown_file() -# # TODO + shapes = [(0,), (0, 3), (0, 2, 1)] + for (i, shape) in enumerate(shapes) + dset = create_dataset(f, "x$(i)"; shape=shape, dtype=Int32) + @test size(dset) == shape + out = dset[..] + # not AbstractArray, which Dataset obeys TODO + @test isa(out, Array) + @test size(out) == shape + out = dset[:] + @test isa(out, Array) + @test size(out) == shape + if length(shape) > 1 + out = dset[:, :2] + @test isa(out, Array) + @test size(out) == (0, 1) + end + end -# cleanup_fixture(fx) -# end + cleanup_fixture(fx) +end + +# Slice a dataset with a zero in its shape vector along a non-zero-length +# dimension. +@testset "dataset_slice_other_dimension" begin + (fx, f) = setup_teardown_file() + + shapes = [(3, 0), (1, 2, 0), (2, 0, 1)] + for (i, shape) in enumerate(shapes) + dset = create_dataset(f, "x$(i)"; shape=shape, dtype=Int32) + @test size(dset) == shape + out = dset[begin:2] + # not AbstractArray, which Dataset obeys TODO + @test isa(out, Array) + @test size(out) == (1, shape...) + end + + cleanup_fixture(fx) +end + +# Get a slice of length zero from a non-empty dataset. +@testset "dataset_slice_of_length_zero" begin + (fx, f) = setup_teardown_file() + + shapes = [(3,), (2, 2,), (2, 1, 5)] + for (i, shape) in enumerate(shapes) + dset = create_dataset(f, "x$(i)"; data=zeros(Int32, shape)) + @test size(dset) == shape + out = dset[2:2] + # not AbstractArray, which Dataset obeys TODO + @test isa(out, Array) + @test size(out) == (0, shape...) + end + + cleanup_fixture(fx) +end + +@testset "dataset_modify_all" begin + (fx, f) = setup_teardown_file() + + dset = create_dataset(f, "test"; data=1:10) + n = 4 + dset.data = ones(n) + @test dset.data == ones(n) + + cleanup_fixture(fx) +end end From a8107cd1bd7fa443969f3253f5480a2ed3be88a7 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:36:06 -0400 Subject: [PATCH 43/55] fix usage of Base.firstindex and Base.lastindex --- src/Exdir.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 4b61a5f..f4079db 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -264,9 +264,8 @@ Base.size(dset::Dataset) = size(dset.data) Base.getindex(dset::Dataset, inds...) = getindex(dset.data, inds...) Base.setindex!(dset::Dataset, val, inds...) = setindex!(dset.data, val, inds...) Base.eltype(dset::Dataset) = eltype(dset.data) -Base.firstindex(::Dataset) = 1 -# FIXME -Base.lastindex(::Dataset) = 1 +Base.firstindex(dset::Dataset) = firstindex(dset.data) +Base.lastindex(dset::Dataset) = lastindex(dset.data) # TODO this may fail in a gross way if value is a group/file/other Exdir type. # Can we restrict to scalars and arrays? From 4c82610b0d766b9044def021c92983f6758e0ae4 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:36:41 -0400 Subject: [PATCH 44/55] disallow certain fillvalues --- src/Exdir.jl | 2 ++ src/consistency.jl | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/Exdir.jl b/src/Exdir.jl index f4079db..6a57864 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -810,6 +810,8 @@ function create_dataset(grp::AbstractGroup, name::AbstractString; throw(ArgumentError("Cannot create dataset. Missing shape or data keyword.")) end + _assert_allowed_fillvalue(fillvalue) + (prepared_data, attrs, meta) = _prepare_write( data, Dict(), diff --git a/src/consistency.jl b/src/consistency.jl index 6bfb413..413f728 100644 --- a/src/consistency.jl +++ b/src/consistency.jl @@ -30,3 +30,20 @@ function _assert_data_shape_dtype_match(data, shape::Union{Dims, Nothing}, dtype end nothing end + +"""Only scalars and arrays of scalars are allowed as fill values.""" +function _assert_allowed_fillvalue(fillvalue) + if !isnothing(fillvalue) + # TODO + if isa(fillvalue, AbstractDict) + throw( + TypeError( + :allowed_fillvalue, + "fillvalue type is not supported", + AbstractArray, + typeof(fillvalue) + ) + ) + end + end +end From 17136dd18297072b4a0b4b411fe8a69d14559167 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:36:55 -0400 Subject: [PATCH 45/55] fix equality bug --- src/Exdir.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 6a57864..d924c3a 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -129,7 +129,7 @@ function Base.:(==)(obj::AbstractObject, other) # if obj.file.io_mode == OpenMode::FILE_CLOSED # return false # end - if !isa(obj, AbstractObject) + if !isa(other, AbstractObject) false else obj.relative_path == other.relative_path && From f32b83ca59cb06db8d8edef922e58e9286108aed Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:37:21 -0400 Subject: [PATCH 46/55] indexing differences from Python --- test/dataset.jl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/test/dataset.jl b/test/dataset.jl index 621d0a7..a9b28be 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -545,9 +545,10 @@ end dset = create_dataset(f, "x"; shape=(10, 3), dtype=dt) data = dt.([1, 2, 3.0]) - dset[5] = data + # TODO Python is dset[5] + dset[5, :] = data - out = dset[5] + out = dset[5, :] @test eltype(out) == eltype(data) @test isequal(out, data) @@ -648,10 +649,15 @@ end for (i, shape) in enumerate(shapes) dset = create_dataset(f, "x$(i)"; data=zeros(Int32, shape)) @test size(dset) == shape - out = dset[2:2] + # Python + # rng = 2:2 + rng = 2:2:0 + out = dset[rng] # not AbstractArray, which Dataset obeys TODO @test isa(out, Array) - @test size(out) == (0, shape...) + # TODO implications of this being different from Python? non-zero on all other dimensions? + # @test size(out) == (0, shape[2:end]) + @test size(out) == (0,) end cleanup_fixture(fx) From 197c40d2daedc224d0402b38e7be8d66ca2cd4b5 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:39:01 -0400 Subject: [PATCH 47/55] test _prepare_write --- test/prepare_write.jl | 17 +++++++++++++++++ test/runtests.jl | 1 + 2 files changed, 18 insertions(+) create mode 100644 test/prepare_write.jl diff --git a/test/prepare_write.jl b/test/prepare_write.jl new file mode 100644 index 0000000..3d5dedc --- /dev/null +++ b/test/prepare_write.jl @@ -0,0 +1,17 @@ +using Exdir +using Test + +import Exdir: _prepare_write + +@testset "prepare_write" begin + # Julia: _prepare_write(data, attrs::AbstractDict, meta::AbstractDict) + # Python: def _prepare_write(data, plugins, attrs, meta): + + ret = _prepare_write(42, Dict(), Dict()) + ref1 = collect(42) + @test ret[1] == ref1 + + ret = _prepare_write("string", Dict(), Dict()) + ref1 = collect("string") + @test ret[1] == ref1 +end diff --git a/test/runtests.jl b/test/runtests.jl index 92cd08c..117d5b1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,6 +11,7 @@ using Test include("raw.jl") include("group.jl") # include("file.jl") + include("prepare_write.jl") include("dataset.jl") # include("attr.jl") end From 65bbf3a92b6e17e8a656ea431382aa4e6e10711a Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:39:28 -0400 Subject: [PATCH 48/55] ensure strings are arrays of characters --- src/Exdir.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index d924c3a..4a96dff 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -780,7 +780,7 @@ end function _prepare_write(data, attrs::AbstractDict, meta::AbstractDict) if isnothing(data) data = nothing - elseif !isa(data, AbstractArray) && !isa(data, AbstractString) + elseif !isa(data, AbstractArray) data = collect(data) end # If plugins were implemented, they would have been applied to attrs and From 34cf097a45234f195cfe4c4482763d0aff37891a Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:39:52 -0400 Subject: [PATCH 49/55] underlying NPZ library can't write (array of) characters --- test/dataset.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/dataset.jl b/test/dataset.jl index a9b28be..c90f879 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -274,8 +274,10 @@ end grp = create_group(f, "test") - dset = create_dataset(grp, "foo"; data="string") - @test dset.data == "string" + # TODO problem with underlying NPZ library + # dset = create_dataset(grp, "foo"; data="string") + # @test dset.data == "string" + @test_throws "unsupported type Char" create_dataset(grp, "foo"; data="string") cleanup_fixture(fx) end From 11a637d910dbe08dd03d02f5a1632b0b3288f931 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:27:01 -0400 Subject: [PATCH 50/55] start fixing use of Base.setindex! --- src/Exdir.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 4a96dff..5b84d9f 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -262,7 +262,13 @@ Base.iterate(dset::Dataset, state) = iterate(dset.data, state) Base.length(dset::Dataset) = prod(size(dset)) Base.size(dset::Dataset) = size(dset.data) Base.getindex(dset::Dataset, inds...) = getindex(dset.data, inds...) -Base.setindex!(dset::Dataset, val, inds...) = setindex!(dset.data, val, inds...) +# TODO +# Base.setindex!(dset::Dataset, val, inds...) = setindex!(dset.data, val, inds...) +function Base.setindex!(dset::Dataset, val, inds...) + tmp = dset.data + setindex!(tmp, val, inds...) + dset.data = tmp +end Base.eltype(dset::Dataset) = eltype(dset.data) Base.firstindex(dset::Dataset) = firstindex(dset.data) Base.lastindex(dset::Dataset) = lastindex(dset.data) From e96a0a1d41cbeed816677a81fb7e6d00e60de913 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Sat, 18 May 2024 16:54:05 -0400 Subject: [PATCH 51/55] add remaining files to mirror Python testing implementation --- test/attr.jl | 2 +- test/help_functions.jl | 3 +++ test/plugins.jl | 3 +++ test/quantities.jl | 3 +++ test/runtests.jl | 5 ++++- 5 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 test/help_functions.jl create mode 100644 test/plugins.jl create mode 100644 test/quantities.jl diff --git a/test/attr.jl b/test/attr.jl index 24c8063..035d51f 100644 --- a/test/attr.jl +++ b/test/attr.jl @@ -1,3 +1,3 @@ -@@testset "attr" begin +@testset "attr" begin end diff --git a/test/help_functions.jl b/test/help_functions.jl new file mode 100644 index 0000000..d26779c --- /dev/null +++ b/test/help_functions.jl @@ -0,0 +1,3 @@ +@testset "help_functions" begin + +end diff --git a/test/plugins.jl b/test/plugins.jl new file mode 100644 index 0000000..40d0382 --- /dev/null +++ b/test/plugins.jl @@ -0,0 +1,3 @@ +@testset "plugins" begin + +end diff --git a/test/quantities.jl b/test/quantities.jl new file mode 100644 index 0000000..98dc121 --- /dev/null +++ b/test/quantities.jl @@ -0,0 +1,3 @@ +@testset "quantities" begin + +end diff --git a/test/runtests.jl b/test/runtests.jl index 117d5b1..17ae937 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,10 +8,13 @@ using Test include("path.jl") include("consistency.jl") include("object.jl") + include("help_functions.jl") include("raw.jl") include("group.jl") # include("file.jl") include("prepare_write.jl") include("dataset.jl") - # include("attr.jl") + include("attr.jl") + include("plugins.jl") + include("quantities.jl") end From c0d7b4cb67e84fa055ddf57a88aacd556d741cda Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Mon, 27 May 2024 13:49:07 -0400 Subject: [PATCH 52/55] move IOError out to file --- src/Exdir.jl | 6 ------ src/exceptions.jl | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Exdir.jl b/src/Exdir.jl index 5b84d9f..595067e 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -497,12 +497,6 @@ function Base.delete!(grp::AbstractGroup, name::AbstractString) end delete_object(grp::AbstractGroup, name::AbstractString) = delete!(grp, name) -struct IOError <: Exception - msg::String -end - -Base.showerror(io::IO, e::IOError) = print(io, "IOError: $(e.msg)") - struct File <: AbstractGroup root_directory::String parent_path::String diff --git a/src/exceptions.jl b/src/exceptions.jl index ffe41c9..1c2b27a 100644 --- a/src/exceptions.jl +++ b/src/exceptions.jl @@ -1,3 +1,9 @@ struct NotImplementedError <: Exception msg::String end + +struct IOError <: Exception + msg::String +end + +Base.showerror(io::IO, e::IOError) = print(io, "IOError: $(e.msg)") From 580581badbb5491a549218f0dd9dcdc9a1544b25 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Mon, 27 May 2024 13:55:33 -0400 Subject: [PATCH 53/55] start object file --- src/Exdir.jl | 46 +--------------------------------------------- src/object.jl | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 45 deletions(-) create mode 100644 src/object.jl diff --git a/src/Exdir.jl b/src/Exdir.jl index 595067e..af5227b 100644 --- a/src/Exdir.jl +++ b/src/Exdir.jl @@ -23,6 +23,7 @@ include("constants.jl") include("exceptions.jl") include("mode.jl") include("path.jl") +include("object.jl") abstract type AbstractObject end abstract type AbstractGroup <: AbstractObject end @@ -926,51 +927,6 @@ function require_dataset(grp::AbstractGroup, name::AbstractString; current_object end -function root_directory(path::AbstractString) - # https://github.com/CINPLA/exdir/blob/89c1d34a5ce65fefc09b6fe1c5e8fef68c494e75/exdir/core/exdir_object.py#L128 - path = realpath(path) - found = false - while !found - (parent, pname) = splitdir(path) - if parent == path - return nothing - end - if !is_nonraw_object_directory(path) - path = parent - continue - end - meta_data = YAML.load_file(joinpath(path, META_FILENAME)) - if !haskey(meta_data, EXDIR_METANAME) - path = parent - continue - end - exdir_meta = meta_data[EXDIR_METANAME] - if !haskey(exdir_meta, TYPE_METANAME) - path = parent - continue - end - if FILE_TYPENAME != exdir_meta[TYPE_METANAME] - path = parent - continue - end - found = true - end - path -end - -function is_inside_exdir(path::AbstractString) - # https://github.com/CINPLA/exdir/blob/89c1d34a5ce65fefc09b6fe1c5e8fef68c494e75/exdir/core/exdir_object.py#L161 - path = realpath(path) - !isnothing(root_directory(path)) -end - -function assert_inside_exdir(path::AbstractString) - # https://github.com/CINPLA/exdir/blob/89c1d34a5ce65fefc09b6fe1c5e8fef68c494e75/exdir/core/exdir_object.py#L166 - if !is_inside_exdir(path) - error("Path " + path + " is not inside an Exdir repository.") - end -end - function open_object(directory::AbstractString) # https://github.com/CINPLA/exdir/blob/89c1d34a5ce65fefc09b6fe1c5e8fef68c494e75/exdir/core/exdir_object.py#L172 path = realpath(directory) diff --git a/src/object.jl b/src/object.jl new file mode 100644 index 0000000..0f6952c --- /dev/null +++ b/src/object.jl @@ -0,0 +1,44 @@ +function root_directory(path::AbstractString) + # https://github.com/CINPLA/exdir/blob/89c1d34a5ce65fefc09b6fe1c5e8fef68c494e75/exdir/core/exdir_object.py#L128 + path = realpath(path) + found = false + while !found + (parent, pname) = splitdir(path) + if parent == path + return nothing + end + if !is_nonraw_object_directory(path) + path = parent + continue + end + meta_data = YAML.load_file(joinpath(path, META_FILENAME)) + if !haskey(meta_data, EXDIR_METANAME) + path = parent + continue + end + exdir_meta = meta_data[EXDIR_METANAME] + if !haskey(exdir_meta, TYPE_METANAME) + path = parent + continue + end + if FILE_TYPENAME != exdir_meta[TYPE_METANAME] + path = parent + continue + end + found = true + end + path +end + +function is_inside_exdir(path::AbstractString) + # https://github.com/CINPLA/exdir/blob/89c1d34a5ce65fefc09b6fe1c5e8fef68c494e75/exdir/core/exdir_object.py#L161 + path = realpath(path) + !isnothing(root_directory(path)) +end + +function assert_inside_exdir(path::AbstractString) + # https://github.com/CINPLA/exdir/blob/89c1d34a5ce65fefc09b6fe1c5e8fef68c494e75/exdir/core/exdir_object.py#L166 + if !is_inside_exdir(path) + error("Path " + path + " is not inside an Exdir repository.") + end +end From e93cd0e9ef3682e7ef848698ebc9c8a370efc547 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Tue, 28 May 2024 09:57:06 -0400 Subject: [PATCH 54/55] remove is useful in multiple test files --- test/file.jl | 12 ------------ test/support.jl | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/file.jl b/test/file.jl index d4c9174..137987f 100644 --- a/test/file.jl +++ b/test/file.jl @@ -9,18 +9,6 @@ import Exdir: create_object_directory, # include("support.jl") -""" - remove(name) - -If name is a path or directory tree, recursively delete it. -Otherwise, do nothing. -""" -function remove(name) - if ispath(name) - rm(name, recursive=true) - end -end - @testset "file" begin @testset "form_location" begin diff --git a/test/support.jl b/test/support.jl index ea556f2..e449cd7 100644 --- a/test/support.jl +++ b/test/support.jl @@ -60,3 +60,15 @@ function exdir_tmpfile() # close(f) # rm(tmpdir, recursive=true) end + +""" + remove(name) + +If name is a path or directory tree, recursively delete it. +Otherwise, do nothing. +""" +function remove(name) + if ispath(name) + rm(name, recursive=true) + end +end From 1ebd6183d74302ffbdddf10a384b5a453a209531 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Tue, 28 May 2024 09:57:59 -0400 Subject: [PATCH 55/55] .git-blame-ignore-revs --- .git-blame-ignore-revs | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..b8d4e92 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,6 @@ +# move IOError out to file +c0d7b4cb67e84fa055ddf57a88aacd556d741cda +# start object file +580581badbb5491a549218f0dd9dcdc9a1544b25 +# remove is useful in multiple test files +e93cd0e9ef3682e7ef848698ebc9c8a370efc547