Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MIMEs = "6c6e2e6c-3030-632d-7369-2d6c69616d65"
ManifoldsBase = "3362f125-f0bb-47a3-aa74-596ffd7ef2fb"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Expand Down Expand Up @@ -55,6 +56,7 @@ InteractiveUtils = "1.11"
JSON = "1.0.0"
LieGroups = "0.1"
LinearAlgebra = "1.11"
MIMEs = "1.1"
ManifoldsBase = "1, 2"
OrderedCollections = "1.4"
Pkg = "1.4, 1.5"
Expand Down
7 changes: 4 additions & 3 deletions ext/BlobArrow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@ module BlobArrow

using Arrow
using DistributedFactorGraphs
using DistributedFactorGraphs: _MIMETypes
using DistributedFactorGraphs: _MIMEOverrides, getMimetype

function __init__()
@info "Including Arrow blobs support in DFG."
return push!(_MIMETypes, MIME("application/vnd.apache.arrow.file") => format"Arrow") # see issue #507
push!(_MIMEOverrides, format"Arrow" => MIME("application/vnd.apache.arrow.file"))
return nothing
end

# kwargs: compress = :lz4,
function DFG.packBlob(::Type{format"Arrow"}, data; kwargs...)
io = IOBuffer()
Arrow.write(io, data; kwargs...)
blob = take!(io)
mimetype = findfirst(==(format"Arrow"), _MIMETypes)
mimetype = getMimetype(format"Arrow")
return blob, mimetype
end

Expand Down
2 changes: 1 addition & 1 deletion src/Common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ function Timestamp(epoch::Val{:unix}, t::Float64, zone = tz"UTC")
end
Timestamp(t::Float64, zone = tz"UTC") = Timestamp(Val(:unix), t, zone)
function Timestamp(epoch::Val{:rata}, t::Float64, zone = tz"UTC")
return TimeDateZone(convert(DateTime, Millisecond(t*10^3)), zone)
return TimeDateZone(convert(DateTime, Millisecond(t * 10^3)), zone)
end

function now_tdz(zone = tz"UTC")
Expand Down
14 changes: 7 additions & 7 deletions src/DataBlobs/entities/BlobEntry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,23 @@ StructUtils.@kwarg struct Blobentry
""" (Optional) crc32c hash value to ensure data consistency which must correspond to the stored hash upon retrieval."""
crchash::Union{UInt32, Nothing} =
nothing & (
json=(
lower = h->isnothing(h) ? nothing : string(h, base = 16),
lift = s->isnothing(s) ? nothing : parse(UInt32, s; base = 16),
json = (
lower = h -> isnothing(h) ? nothing : string(h; base = 16),
lift = s -> isnothing(s) ? nothing : parse(UInt32, s; base = 16),
)
)
""" (Optional) sha256 hash value to ensure data consistency which must correspond to the stored hash upon retrieval."""
shahash::Union{Vector{UInt8}, Nothing} =
nothing & (
json=(
lower = h->isnothing(h) ? nothing : bytes2hex(h),
lift = s->isnothing(s) ? nothing : hex2bytes(s),
json = (
lower = h -> isnothing(h) ? nothing : bytes2hex(h),
lift = s -> isnothing(s) ? nothing : hex2bytes(s),
)
)
""" Source system or application where the blob was created (e.g., webapp, sdk, robot)"""
origin::String = ""
"""Number of bytes in blob serialized as a string"""
size::Int64 = -1 & (json=(lower = string, lift = x->parse(Int64, x)))
size::Int64 = -1 & (json = (lower = string, lift = x -> parse(Int64, x)))
""" Additional information that can help a different user of the Blob. """
description::String = ""
""" MIME description describing the format of binary data in the `Blob`, e.g. 'image/png' or 'application/json'. """
Expand Down
127 changes: 80 additions & 47 deletions src/DataBlobs/services/BlobPacking.jl
Original file line number Diff line number Diff line change
@@ -1,49 +1,91 @@
# using FileIO
# using ImageIO
# using LasIO
# using BSON
# using OrderedCollections

# 2 types for now with MIME type
# 1. JSON - application/octet-stream/json
# 2. FileIO - application/octet-stream
# - application/bson
# - image/jpeg
# - image/png
# - application/vnd.apache.arrow.file

const _MIMETypes = OrderedDict{MIME, DataType}()
push!(_MIMETypes, MIME("application/octet-stream/json") => format"JSON")
push!(_MIMETypes, MIME("application/bson") => format"BSON")
push!(_MIMETypes, MIME("image/png") => format"PNG")
push!(_MIMETypes, MIME("image/jpeg") => format"JPG")
push!(_MIMETypes, MIME("application/vnd.las") => format"LAS")
push!(_MIMETypes, MIME("application/vnd.apache.parque") => format"Parquet") # Provided by FileIO with ParquetFiles
##==============================================================================
## BlobPacking: format <-> MIME type bridging and blob serialization
##==============================================================================

# Override dictionary for formats not covered by MIMEs.jl + FileIO auto-detection.
# Standard types (PNG, JPEG, CSV, etc.) are auto-detected and don't need entries here.
const _MIMEOverrides = OrderedDict{DataType, MIME}(
format"JSON" => MIME("application/json"),
format"BSON" => MIME("application/bson"),
format"LAS" => MIME("application/vnd.las"),
format"Parquet" => MIME("application/vnd.apache.parquet"),
)

"""
getMimetype(::Type{DataFormat{S}}) -> MIME

Get the MIME type for a FileIO `DataFormat`. Uses FileIO's extension registry
and MIMEs.jl for standard types, falls back to `_MIMEOverrides` for
domain-specific formats.

# Examples
```julia
getMimetype(format"PNG") # MIME("image/png")
getMimetype(format"JSON") # MIME("application/json")
```
"""
function getMimetype(::Type{DataFormat{S}}) where {S}
T = DataFormat{S}
haskey(_MIMEOverrides, T) && return _MIMEOverrides[T]
try
finfo = FileIO.info(T)
ext = finfo[2]
ext = ext isa AbstractVector ? first(ext) : ext
m = mime_from_extension(ext)
!isnothing(m) && return m
catch
end
return MIME("application/octet-stream")
end

"""
getDataFormat(::MIME) -> Union{Type{DataFormat{S}}, Nothing}

Get the FileIO `DataFormat` for a MIME type. Uses MIMEs.jl and FileIO's extension
registry, falls back to `_MIMEOverrides`.

Returns `nothing` if no matching format is found.

# Examples
```julia
getDataFormat(MIME("image/png")) # format"PNG"
getDataFormat(MIME("application/json")) # format"JSON"
```
"""
function getDataFormat(m::MIME)
for (fmt, mime) in _MIMEOverrides
mime == m && return fmt
end
ext = extension_from_mime(m)
sym = get(FileIO.ext2sym, ext, nothing)
!isnothing(sym) && return DataFormat{sym}
return nothing
end

"""
packBlob
Convert a file (JSON, JPG, PNG, BSON, LAS) to Vector{UInt8} for use as a Blob.
Returns the blob and MIME type.
Convert data to `Vector{UInt8}` for use as a Blob. Returns `(blob, mimetype)`.
The MIME type is automatically determined from the DataFormat.
"""
function packBlob end

"""
unpackBlob
Convert a Blob back to the origanal typ using the MIME type or DataFormat type.
Convert a Blob back to the original type using the MIME type or DataFormat type.
"""
function unpackBlob end

unpackBlob(mime::String, blob) = unpackBlob(MIME(mime), blob)

function unpackBlob(T::MIME, blob)
dataformat = get(_MIMETypes, T, nothing)
dataformat = getDataFormat(T)
isnothing(dataformat) && error("Format not found for MIME type $(T)")
return unpackBlob(dataformat, blob)
end

# 1. JSON strings are saved as is
function packBlob(::Type{format"JSON"}, json_str::String)
mimetype = findfirst(==(format"JSON"), _MIMETypes)
# blob = codeunits(json_str)
mimetype = getMimetype(format"JSON")
blob = Vector{UInt8}(json_str)
return blob, mimetype
end
Expand All @@ -55,16 +97,12 @@ end
unpackBlob(entry::Blobentry, blob::Vector{UInt8}) = unpackBlob(entry.mimetype, blob)
unpackBlob(eb::Pair{<:Blobentry, Vector{UInt8}}) = unpackBlob(eb[1], eb[2])

# 2/ FileIO
# 2. FileIO formats (PNG, JPEG, BSON, LAS, Parquet, etc.)
function packBlob(::Type{T}, data::Any; kwargs...) where {T <: DataFormat}
io = IOBuffer()
save(Stream{T}(io), data; kwargs...)
blob = take!(io)
mimetype = findfirst(==(T), _MIMETypes)
if isnothing(mimetype)
@warn "No MIME type found for format $T"
mimetype = MIME"application/octet-stream"
end
mimetype = getMimetype(T)
return blob, mimetype
end

Expand All @@ -73,18 +111,13 @@ function unpackBlob(::Type{T}, blob::Vector{UInt8}) where {T <: DataFormat}
return load(Stream{T}(io))
end

# if false
# json_str = "{\"name\":\"John\"}"
# blob, mimetype = packBlob(format"JSON", json_str)
# @assert json_str == unpackBlob(format"JSON", blob)
# @assert json_str == unpackBlob(MIME("application/octet-stream/json"), blob)
# @assert json_str == unpackBlob("application/octet-stream/json", blob)

# blob,mime = packBlob(format"PNG", img)
# up_img = unpackBlob(format"PNG", blob)

# #TODO BSON does not work yet, can extend [un]packBlob(::Type{format"BSON"}, ...)
# packBlob(format"BSON", Dict("name"=>"John"))
# unpackBlob(format"BSON", Dict("name"=>"John"))
"""
getMimetype(io::IO) -> MIME

# end
Detect the MIME type of data in an IO stream using FileIO's format detection.
"""
function getMimetype(io::IO)
_getFormat(s::FileIO.Stream{T}) where {T} = T
stream = FileIO.query(io)
return getMimetype(_getFormat(stream))
end
71 changes: 64 additions & 7 deletions src/DataBlobs/services/BlobWrappers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,27 @@ $(METHODLIST)
"""
function deleteBlob_Agent! end

"""
Convenience wrapper to load a Blob for a given factor and Blobentry label.

$(METHODLIST)
"""
function loadBlob_Factor end

"""
Convenience wrapper to save a Blob to a Blobstore and a Blobentry to a factor.

$(METHODLIST)
"""
function saveBlob_Factor! end

"""
Convenience wrapper to delete a Blob from a Blobstore and its Blobentry from a factor.

$(METHODLIST)
"""
function deleteBlob_Factor! end

function loadBlob_Variable(
dfg::AbstractDFG,
variable_label::Symbol,
Expand Down Expand Up @@ -166,6 +187,42 @@ function deleteBlob_Agent!(dfg::AbstractDFG, entry_label::Symbol)
return 2
end

function loadBlob_Factor(dfg::AbstractDFG, factor_label::Symbol, entry_label::Symbol)
entry = getFactorBlobentry(dfg, factor_label, entry_label)
blob = getBlob(dfg, entry)
return entry, blob
end

function saveBlob_Factor!(
dfg::AbstractDFG,
factor_label::Symbol,
blob::Vector{UInt8},
entry::Blobentry,
)
addFactorBlobentry!(dfg, factor_label, entry)
addBlob!(dfg, entry, blob)
return entry
end

function saveBlob_Factor!(
dfg::AbstractDFG,
factor_label::Symbol,
blob::Vector{UInt8},
entry_label::Symbol,
blobstore::Symbol = :default;
blobentry_kwargs...,
)
entry = Blobentry(entry_label, blobstore; blobentry_kwargs...)
return saveBlob_Factor!(dfg, factor_label, blob, entry)
end

function deleteBlob_Factor!(dfg::AbstractDFG, factor_label::Symbol, entry_label::Symbol)
entry = getFactorBlobentry(dfg, factor_label, entry_label)
deleteFactorBlobentry!(dfg, factor_label, entry_label)
deleteBlob!(dfg, entry)
return 2
end

function saveImage_Variable!(
dfg::AbstractDFG,
variable_label::Symbol,
Expand All @@ -174,19 +231,19 @@ function saveImage_Variable!(
blobstore::Symbol = :default;
entry_kwargs...,
)
mimeType = get(entry_kwargs, :mimeType, MIME("image/png"))
format = _MIMETypes[mimeType]

blob, mimeType = packBlob(format, img)
mimetype = get(entry_kwargs, :mimeType, MIME("image/png"))
format = getDataFormat(mimetype)
isnothing(format) &&
throw(ArgumentError("Unsupported MIME type for image: $(mimetype)"))
blob, mimetype = packBlob(format, img)

size = string(length(blob))
entry = Blobentry(
entry_label,
blobstore;
blobid = uuid4(),
entry_kwargs...,
size,
mimeType = string(mimeType),
size = length(blob),
mimetype,
)

return saveBlob_Variable!(dfg, variable_label, blob, entry)
Expand Down
Loading
Loading