Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions datafusion/common/src/types/canonical_extensions/bool8.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::error::_internal_err;
use crate::types::extension::DFExtensionType;
use arrow::array::{Array, Int8Array};
use arrow::datatypes::DataType;
use arrow::util::display::{ArrayFormatter, DisplayIndex, FormatOptions, FormatResult};
use std::fmt::Write;

/// Defines the extension type logic for the canonical `arrow.bool8` extension type.
///
/// Bool8 values are displayed as `true` or `false`, where `0` maps to `false` and
/// any non-zero value maps to `true`.
///
/// See [`DFExtensionType`] for information on DataFusion's extension type mechanism.
impl DFExtensionType for arrow_schema::extension::Bool8 {
fn storage_type(&self) -> DataType {
DataType::Int8
}

fn serialize_metadata(&self) -> Option<String> {
// Bool8 metadata is an empty string per the Arrow spec.
Some(String::new())
}

fn create_array_formatter<'fmt>(
&self,
array: &'fmt dyn Array,
options: &FormatOptions<'fmt>,
) -> crate::Result<Option<ArrayFormatter<'fmt>>> {
if array.data_type() != &DataType::Int8 {
return _internal_err!("Wrong array type for Bool8");
}

let display_index = Bool8ValueDisplayIndex {
array: array.as_any().downcast_ref().unwrap(),
null_str: options.null(),
};
Ok(Some(ArrayFormatter::new(
Box::new(display_index),
options.safe(),
)))
}
}

/// Pretty printer for 8-bit Boolean values.
///
/// Displays `false` for zero values and `true` for any non-zero value.
#[derive(Debug, Clone, Copy)]
struct Bool8ValueDisplayIndex<'a> {
array: &'a Int8Array,
null_str: &'a str,
}

impl DisplayIndex for Bool8ValueDisplayIndex<'_> {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
if self.array.is_null(idx) {
write!(f, "{}", self.null_str)?;
return Ok(());
}

let value = self.array.value(idx);
if value == 0 {
write!(f, "false")?;
} else {
write!(f, "true")?;
}
Ok(())
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::ScalarValue;

#[test]
pub fn test_pretty_print_bool8_false() {
let bool8 = ScalarValue::Int8(Some(0)).to_array_of_size(1).unwrap();

let extension_type = arrow_schema::extension::Bool8 {};
let formatter = extension_type
.create_array_formatter(bool8.as_ref(), &FormatOptions::default())
.unwrap()
.unwrap();

assert_eq!(formatter.value(0).to_string(), "false");
}

#[test]
pub fn test_pretty_print_bool8_true() {
let bool8 = ScalarValue::Int8(Some(1)).to_array_of_size(1).unwrap();

let extension_type = arrow_schema::extension::Bool8 {};
let formatter = extension_type
.create_array_formatter(bool8.as_ref(), &FormatOptions::default())
.unwrap()
.unwrap();

assert_eq!(formatter.value(0).to_string(), "true");
}

#[test]
pub fn test_pretty_print_bool8_nonzero_is_true() {
// Any non-zero value should display as "true"
let bool8 = ScalarValue::Int8(Some(42)).to_array_of_size(1).unwrap();

let extension_type = arrow_schema::extension::Bool8 {};
let formatter = extension_type
.create_array_formatter(bool8.as_ref(), &FormatOptions::default())
.unwrap()
.unwrap();

assert_eq!(formatter.value(0).to_string(), "true");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::types::extension::DFExtensionType;
use arrow::datatypes::DataType;
use arrow_schema::extension::ExtensionType;

/// Defines the extension type logic for the canonical `arrow.fixed_shape_tensor` extension type.
///
/// Fixed shape tensors are stored as `FixedSizeList` arrays; the default Arrow formatter
/// is used for display.
///
/// See [`DFExtensionType`] for information on DataFusion's extension type mechanism.
impl DFExtensionType for arrow_schema::extension::FixedShapeTensor {
fn storage_type(&self) -> DataType {
DataType::new_fixed_size_list(
self.value_type().clone(),
i32::try_from(self.list_size()).expect("list size overflow"),
false,
)
}

fn serialize_metadata(&self) -> Option<String> {
<arrow_schema::extension::FixedShapeTensor as ExtensionType>::serialize_metadata(
self,
)
}
}
37 changes: 37 additions & 0 deletions datafusion/common/src/types/canonical_extensions/json.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::types::extension::DFExtensionType;
use arrow::datatypes::DataType;
use arrow_schema::extension::ExtensionType;

/// Defines the extension type logic for the canonical `arrow.json` extension type.
///
/// JSON values are already stored as UTF-8 strings, so the default Arrow string
/// formatter is used for display.
///
/// See [`DFExtensionType`] for information on DataFusion's extension type mechanism.
impl DFExtensionType for arrow_schema::extension::Json {
fn storage_type(&self) -> DataType {
// JSON can be stored as Utf8, LargeUtf8, or Utf8View; Utf8 is the most common default.
DataType::Utf8
}

fn serialize_metadata(&self) -> Option<String> {
<arrow_schema::extension::Json as ExtensionType>::serialize_metadata(self)
}
}
6 changes: 6 additions & 0 deletions datafusion/common/src/types/canonical_extensions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,10 @@
// specific language governing permissions and limitations
// under the License.

mod bool8;
mod fixed_shape_tensor;
mod json;
mod opaque;
mod timestamp_with_offset;
mod uuid;
mod variable_shape_tensor;
37 changes: 37 additions & 0 deletions datafusion/common/src/types/canonical_extensions/opaque.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::types::extension::DFExtensionType;
use arrow::datatypes::DataType;
use arrow_schema::extension::ExtensionType;

/// Defines the extension type logic for the canonical `arrow.opaque` extension type.
///
/// Opaque represents a type received from an external system that cannot be interpreted.
/// The default Arrow formatter is used for display.
///
/// See [`DFExtensionType`] for information on DataFusion's extension type mechanism.
impl DFExtensionType for arrow_schema::extension::Opaque {
fn storage_type(&self) -> DataType {
// Opaque supports any storage type; Null is recommended when there is no underlying data.
DataType::Null
}

fn serialize_metadata(&self) -> Option<String> {
<arrow_schema::extension::Opaque as ExtensionType>::serialize_metadata(self)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::types::extension::DFExtensionType;
use arrow::datatypes::DataType;

/// Defines the extension type logic for the canonical `arrow.timestamp_with_offset` extension type.
///
/// Timestamp with offset values are stored as `Struct` arrays containing a UTC timestamp
/// and an offset in minutes. The default Arrow formatter is used for display.
///
/// See [`DFExtensionType`] for information on DataFusion's extension type mechanism.
impl DFExtensionType for arrow_schema::extension::TimestampWithOffset {
fn storage_type(&self) -> DataType {
// TimestampWithOffset stores no internal state to determine the timestamp precision.
// The actual storage type depends on the time unit chosen by the producer.
// Returning Null here is a placeholder; the actual DataType is validated at registration
// time via ExtensionType::supports_data_type.
DataType::Null
}

fn serialize_metadata(&self) -> Option<String> {
// TimestampWithOffset has no metadata.
None
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::types::extension::DFExtensionType;
use arrow::datatypes::{DataType, Field, Fields};
use arrow_schema::extension::ExtensionType;

/// Defines the extension type logic for the canonical `arrow.variable_shape_tensor` extension type.
///
/// Variable shape tensors are stored as `Struct` arrays containing `data` (a list of elements)
/// and `shape` (a fixed-size list of int32 dimensions). The default Arrow formatter is used
/// for display.
///
/// See [`DFExtensionType`] for information on DataFusion's extension type mechanism.
impl DFExtensionType for arrow_schema::extension::VariableShapeTensor {
fn storage_type(&self) -> DataType {
let dims = i32::try_from(self.dimensions()).expect("dimensions overflow");
DataType::Struct(Fields::from_iter([
Field::new_list(
"data",
Field::new_list_field(self.value_type().clone(), false),
false,
),
Field::new(
"shape",
DataType::new_fixed_size_list(DataType::Int32, dims, false),
false,
),
]))
}

fn serialize_metadata(&self) -> Option<String> {
<arrow_schema::extension::VariableShapeTensor as ExtensionType>::serialize_metadata(self)
}
}
Loading
Loading