diff --git a/c/driver/postgresql/copy/reader.h b/c/driver/postgresql/copy/reader.h index df9209a342..724724e7f5 100644 --- a/c/driver/postgresql/copy/reader.h +++ b/c/driver/postgresql/copy/reader.h @@ -425,6 +425,70 @@ class PostgresCopyNumericFieldReader : public PostgresCopyFieldReader { static const uint16_t kNumericNinf = 0xF000; }; +template +class PostgresCopyTimeOfDayFieldReader : public PostgresCopyFieldReader { + public: + // Microseconds per day (24h) + static inline constexpr int64_t kUsecsPerDay = 86400LL * 1000000LL; + // Nanoseconds per day (24h) + static inline constexpr int64_t kNsecsPerDay = 86400LL * 1000000000LL; + + ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array, + ArrowError* error) override { + if (field_size_bytes <= 0) { + return ArrowArrayAppendNull(array, 1); + } + + // PostgreSQL TIME binary payload is int64 microseconds since midnight. https://www.postgresql.org/docs/current/datatype-datetime.html + if (field_size_bytes != static_cast(sizeof(int64_t))) { + ArrowErrorSet(error, "Expected field with %d bytes but found field with %d bytes", + static_cast(sizeof(int64_t)), + static_cast(field_size_bytes)); // NOLINT(runtime/int) + return EINVAL; + } + + const int64_t time_usec = ReadUnsafe(data); + + // PostgreSQL time_recv validates microseconds since midnight (0..USECS_PER_DAY). + // Keep this validation here so we don't produce nonsensical Arrow values. + if (time_usec < 0 || time_usec > kUsecsPerDay) { + ArrowErrorSet(error, + "[libpq] TIME value %" PRId64 + " usec is out of range [0, %" PRId64 "]", + time_usec, kUsecsPerDay); + return EINVAL; + } + + // Convert to Arrow representation requested by schema: + // Arrow TIME32 uses int32 in seconds or milliseconds; TIME64 uses int64 in microseconds or nanoseconds. + int64_t out64 = 0; + switch (TU) { + case NANOARROW_TIME_UNIT_SECOND: + out64 = time_usec / 1000000LL; + break; + case NANOARROW_TIME_UNIT_MILLI: + out64 = time_usec / 1000LL; + break; + case NANOARROW_TIME_UNIT_MICRO: + out64 = time_usec; + break; + case NANOARROW_TIME_UNIT_NANO: + out64 = time_usec * 1000LL; + break; + } + + // Ensure the target type can hold the converted value (TIME32 -> int32). + if constexpr (std::is_same::value) { + const int32_t out32 = static_cast(out64); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &out32, sizeof(out32))); + } else { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &out64, sizeof(out64))); + } + + return AppendValid(array); + } +}; + // Reader for Pg->Arrow conversions whose Arrow representation is simply the // bytes of the field representation. This can be used with binary and string // Arrow types and any Postgres type. @@ -935,11 +999,43 @@ static inline ArrowErrorCode MakeCopyFieldReader( return NANOARROW_OK; } + case NANOARROW_TYPE_TIME32: { + switch (pg_type.type_id()) { + case PostgresTypeId::kTime: + switch (schema_view.time_unit) { + case NANOARROW_TIME_UNIT_SECOND: + *out = std::make_unique< + PostgresCopyTimeOfDayFieldReader>(); + return NANOARROW_OK; + case NANOARROW_TIME_UNIT_MILLI: + *out = std::make_unique< + PostgresCopyTimeOfDayFieldReader>(); + return NANOARROW_OK; + default: + // TIME32 only supports second/milli in Arrow. [3](https://arrow.apache.org/docs/cpp/api/datatype.html) + return ErrorCantConvert(error, pg_type, schema_view); + } + default: + return ErrorCantConvert(error, pg_type, schema_view); + } + } + case NANOARROW_TYPE_TIME64: { switch (pg_type.type_id()) { case PostgresTypeId::kTime: - *out = std::make_unique>(); - return NANOARROW_OK; + switch (schema_view.time_unit) { + case NANOARROW_TIME_UNIT_MICRO: + *out = std::make_unique< + PostgresCopyTimeOfDayFieldReader>(); + return NANOARROW_OK; + case NANOARROW_TIME_UNIT_NANO: + *out = std::make_unique< + PostgresCopyTimeOfDayFieldReader>(); + return NANOARROW_OK; + default: + // TIME64 only supports micro/nano in Arrow. [3](https://arrow.apache.org/docs/cpp/api/datatype.html) + return ErrorCantConvert(error, pg_type, schema_view); + } default: return ErrorCantConvert(error, pg_type, schema_view); } diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h index 2b31310e70..40be78d4c6 100644 --- a/c/driver/postgresql/copy/writer.h +++ b/c/driver/postgresql/copy/writer.h @@ -735,6 +735,71 @@ class PostgresCopyTimestampFieldWriter : public PostgresCopyFieldWriter { } }; + +template +class PostgresCopyTimeFieldWriter : public PostgresCopyFieldWriter { + public: + // Microseconds per day (24h) + static inline constexpr int64_t kUsecsPerDay = 86400LL * 1000000LL; + + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { + // PostgreSQL TIME binary format is an int64 microseconds-since-midnight + // and the COPY binary field length must be 8 bytes. https://www.postgresql.org/docs/current/datatype-datetime.html + constexpr int32_t field_size_bytes = sizeof(int64_t); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); + + const int64_t raw_value = ArrowArrayViewGetIntUnsafe(array_view_, index); + int64_t micros = 0; + + bool overflow_safe = true; + switch (TU) { + case NANOARROW_TIME_UNIT_SECOND: + overflow_safe = + raw_value <= kMaxSafeSecondsToMicros && + raw_value >= kMinSafeSecondsToMicros; + if (overflow_safe) { + micros = raw_value * 1000000LL; + } + break; + case NANOARROW_TIME_UNIT_MILLI: + overflow_safe = + raw_value <= kMaxSafeMillisToMicros && + raw_value >= kMinSafeMillisToMicros; + if (overflow_safe) { + micros = raw_value * 1000LL; + } + break; + case NANOARROW_TIME_UNIT_MICRO: + micros = raw_value; + break; + case NANOARROW_TIME_UNIT_NANO: + micros = raw_value / 1000LL; + break; + } + + if (!overflow_safe) { + ArrowErrorSet( + error, + "[libpq] Row %" PRId64 " time value %" PRId64 + " with unit %d would overflow", + index, raw_value, TU); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + if (micros < 0 || micros > kUsecsPerDay) { + ArrowErrorSet(error, + "[libpq] Row %" PRId64 + " time value %" PRId64 " (unit %d) -> %" PRId64 + " microseconds is out of range [0, %" PRId64 "]", + index, raw_value, TU, micros, kUsecsPerDay); + return ADBC_STATUS_INVALID_ARGUMENT; + } + + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, micros, error)); + return ADBC_STATUS_OK; + } +}; + static inline ArrowErrorCode MakeCopyFieldWriter( struct ArrowSchema* schema, struct ArrowArrayView* array_view, const PostgresTypeResolver& type_resolver, @@ -773,12 +838,34 @@ static inline ArrowErrorCode MakeCopyFieldWriter( *out = T::Create(array_view); return NANOARROW_OK; } + case NANOARROW_TYPE_TIME32: { + switch (schema_view.time_unit) { + case NANOARROW_TIME_UNIT_SECOND: { + using T = PostgresCopyTimeFieldWriter; + *out = T::Create(array_view); + return NANOARROW_OK; + } + case NANOARROW_TIME_UNIT_MILLI: { + using T = PostgresCopyTimeFieldWriter; + *out = T::Create(array_view); + return NANOARROW_OK; + } + default: + return ADBC_STATUS_NOT_IMPLEMENTED; + } + } case NANOARROW_TYPE_TIME64: { switch (schema_view.time_unit) { - case NANOARROW_TIME_UNIT_MICRO: - using T = PostgresCopyNetworkEndianFieldWriter; + case NANOARROW_TIME_UNIT_MICRO: { + using T = PostgresCopyTimeFieldWriter; *out = T::Create(array_view); return NANOARROW_OK; + } + case NANOARROW_TIME_UNIT_NANO: { + using T = PostgresCopyTimeFieldWriter; + *out = T::Create(array_view); + return NANOARROW_OK; + } default: return ADBC_STATUS_NOT_IMPLEMENTED; } diff --git a/c/driver/postgresql/validation/queries/ingest/time_ms.txtcase b/c/driver/postgresql/validation/queries/ingest/time_ms.txtcase index 30dfb7d9b3..c102f85a05 100644 --- a/c/driver/postgresql/validation/queries/ingest/time_ms.txtcase +++ b/c/driver/postgresql/validation/queries/ingest/time_ms.txtcase @@ -16,6 +16,29 @@ // under the License. -// part: metadata +// part: expected_schema -skip = "COPY Writer not implemented" +{ + "format": "+s", + "children": [ + { + "name": "idx", + "format": "l", + "flags": ["nullable"] + }, + { + "name": "value", + "format": "ttm", + "flags": ["nullable"] + } + ] +} + +// part: expected + +{"idx": 0, "value": null} +{"idx": 1, "value": 0} +{"idx": 2, "value": 1} +{"idx": 3, "value": 3723123} +{"idx": 4, "value": 86399999} +{"idx": 5, "value": 86400000} diff --git a/c/driver/postgresql/validation/queries/ingest/time_ns.txtcase b/c/driver/postgresql/validation/queries/ingest/time_ns.txtcase index 30dfb7d9b3..c826ff2dbe 100644 --- a/c/driver/postgresql/validation/queries/ingest/time_ns.txtcase +++ b/c/driver/postgresql/validation/queries/ingest/time_ns.txtcase @@ -16,6 +16,29 @@ // under the License. -// part: metadata +// part: expected_schema -skip = "COPY Writer not implemented" +{ + "format": "+s", + "children": [ + { + "name": "idx", + "format": "l", + "flags": ["nullable"] + }, + { + "name": "value", + "format": "ttn", + "flags": ["nullable"] + } + ] +} + +// part: expected + +{"idx": 0, "value": null} +{"idx": 1, "value": 0} +{"idx": 2, "value": 1} +{"idx": 3, "value": 3723123456000} +{"idx": 4, "value": 86399999999999} +{"idx": 5, "value": 86400000000000} diff --git a/c/driver/postgresql/validation/queries/ingest/time_s.txtcase b/c/driver/postgresql/validation/queries/ingest/time_s.txtcase index 30dfb7d9b3..db2fb4e912 100644 --- a/c/driver/postgresql/validation/queries/ingest/time_s.txtcase +++ b/c/driver/postgresql/validation/queries/ingest/time_s.txtcase @@ -16,6 +16,29 @@ // under the License. -// part: metadata +// part: expected_schema -skip = "COPY Writer not implemented" +{ + "format": "+s", + "children": [ + { + "name": "idx", + "format": "l", + "flags": ["nullable"] + }, + { + "name": "value", + "format": "tts", + "flags": ["nullable"] + } + ] +} + +// part: expected + +{"idx": 0, "value": null} +{"idx": 1, "value": 0} +{"idx": 2, "value": 1} +{"idx": 3, "value": 3723} +{"idx": 4, "value": 86399} +{"idx": 5, "value": 86400} diff --git a/c/driver/postgresql/validation/queries/ingest/time_us.txtcase b/c/driver/postgresql/validation/queries/ingest/time_us.txtcase new file mode 100644 index 0000000000..3724d0e4cc --- /dev/null +++ b/c/driver/postgresql/validation/queries/ingest/time_us.txtcase @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +// part: expected_schema + +{ + "format": "+s", + "children": [ + { + "name": "idx", + "format": "l", + "flags": ["nullable"] + }, + { + "name": "value", + "format": "ttu", + "flags": ["nullable"] + } + ] +} + +// part: expected + +{"idx": 0, "value": null} +{"idx": 1, "value": 0} +{"idx": 2, "value": 1} +{"idx": 3, "value": 3723123456} +{"idx": 4, "value": 86399999999} +{"idx": 5, "value": 86400000000} diff --git a/c/driver/postgresql/validation/queries/type/bind/time_ms.txtcase b/c/driver/postgresql/validation/queries/type/bind/time_ms.txtcase index 30dfb7d9b3..cb21b23fb5 100644 --- a/c/driver/postgresql/validation/queries/type/bind/time_ms.txtcase +++ b/c/driver/postgresql/validation/queries/type/bind/time_ms.txtcase @@ -16,6 +16,24 @@ // under the License. -// part: metadata +// part: expected_schema -skip = "COPY Writer not implemented" +{ + "format": "+s", + "children": [ + { + "name": "value", + "format": "ttm", + "flags": ["nullable"] + } + ] +} + +// part: expected + +{"value": null} +{"value": 0} +{"value": 0} +{"value": 3723123} +{"value": 86399999} +{"value": 86400000} diff --git a/c/driver/postgresql/validation/queries/type/bind/time_ns.txtcase b/c/driver/postgresql/validation/queries/type/bind/time_ns.txtcase index 30dfb7d9b3..4f82b044d9 100644 --- a/c/driver/postgresql/validation/queries/type/bind/time_ns.txtcase +++ b/c/driver/postgresql/validation/queries/type/bind/time_ns.txtcase @@ -16,6 +16,24 @@ // under the License. -// part: metadata +// part: expected_schema -skip = "COPY Writer not implemented" +{ + "format": "+s", + "children": [ + { + "name": "value", + "format": "ttn", + "flags": ["nullable"] + } + ] +} + +// part: expected + +{"value": null} +{"value": 0} +{"value": 1000} +{"value": 3723123456000} +{"value": 86399999999000} +{"value": 86400000000000} diff --git a/c/driver/postgresql/validation/queries/type/bind/time_s.txtcase b/c/driver/postgresql/validation/queries/type/bind/time_s.txtcase index 30dfb7d9b3..06b725ad28 100644 --- a/c/driver/postgresql/validation/queries/type/bind/time_s.txtcase +++ b/c/driver/postgresql/validation/queries/type/bind/time_s.txtcase @@ -16,6 +16,24 @@ // under the License. -// part: metadata +// part: expected_schema -skip = "COPY Writer not implemented" +{ + "format": "+s", + "children": [ + { + "name": "value", + "format": "tts", + "flags": ["nullable"] + } + ] +} + +// part: expected + +{"value": null} +{"value": 0} +{"value": 0} +{"value": 3723} +{"value": 86399} +{"value": 86400} diff --git a/c/driver/postgresql/validation/queries/type/bind/time_us.txtcase b/c/driver/postgresql/validation/queries/type/bind/time_us.txtcase new file mode 100644 index 0000000000..062ef9e94b --- /dev/null +++ b/c/driver/postgresql/validation/queries/type/bind/time_us.txtcase @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +// part: expected_schema + +{ + "format": "+s", + "children": [ + { + "name": "value", + "format": "ttu", + "flags": ["nullable"] + } + ] +} + +// part: expected + +{"value": null} +{"value": 0} +{"value": 1} +{"value": 3723123456} +{"value": 86399999999} +{"value": 86400000000}