Skip to content

Validity map with nullable datetime columns #866

@cgiachalis

Description

@cgiachalis

Datetime columns (attributes) toggled as nullables should return NA.

Reprex

library(tiledb)

uri <- tempfile()


domain <- tiledb_domain(tiledb_dim("row", c(0L, 100L), 100L, "INT32"))

attrib <- c(tiledb_attr("date",   type = "DATETIME_DAY", nullable = TRUE),
            tiledb_attr("datetime",   type = "DATETIME_MS", nullable = TRUE),
            tiledb_attr("nanosecs",   type = "DATETIME_NS", nullable = TRUE),
            tiledb_attr("float64",  type = "FLOAT64", nullable = TRUE))

schema <- tiledb_array_schema(domain, attrib, sparse=TRUE)
res <- tiledb_array_create(uri, schema)


df <- data.frame(row     =  1:2,
                 date    =  c(as.Date("1990-01-01"), as.Date(NA)),
                 datetime   =  c(as.POSIXct("1990-01-01"), as.POSIXct(NA)),
                 nanosecs   =  nanotime::as.nanotime(c(100, NA)),
                 float64 =  c(1, NA))
df
  row       date       datetime        nanosecs                            float64
1   1 1990-01-01   1990-01-01    1970-01-01T00:00:00.000000100+00:00       1
2   2       <NA>       <NA>            <NA>                                NA
arr <- tiledb_array(uri, return_as="data.table")
arr[] <- df
arr[]
#>      row       date                  datetime      nanosecs                             float64
#>    <int>     <Date>                    <POSc>      <nanotime>                            <num>
#> 1:     1 1990-01-01       1990-01-01 00:00:00       1970-01-01T00:00:00.000000100+00:00     1
#> 2:     2 1970-01-01      -292275055-05-16 18:21:56      <NA>                               NA

Check

 all.equal(target = df, current = arr[],  check.attributes=FALSE)
[1] "Component “date”: 'is.NA' value mismatch: 0 in current 1 in target"    
[2] "Component “datetime”: 'is.NA' value mismatch: 0 in current 1 in target"

The buffer within libtiledb_query_get_buffer_ptr doesn't apply a validity mapping for datetimes if it is toggled as nullable
as it does for other types:

TileDB-R/src/libtiledb.cpp

Lines 3713 to 3719 in 3760c68

} else if (dtype == "DATETIME_HR" || dtype == "DATETIME_MIN" ||
dtype == "DATETIME_SEC" || dtype == "DATETIME_MS" ||
dtype == "DATETIME_US") {
std::vector<int64_t> v(buf->ncells);
std::memcpy(&(v[0]), (void *)buf->vec.data(), buf->ncells * buf->size);
DatetimeVector dv =
int64_to_datetimes(v, _string_to_tiledb_datatype(dtype));

TileDB-R/src/libtiledb.cpp

Lines 3718 to 3719 in 3760c68

DatetimeVector dv =
int64_to_datetimes(v, _string_to_tiledb_datatype(dtype));

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions