diff --git a/clickhouse/columns/factory.cpp b/clickhouse/columns/factory.cpp index a01304f8..2bec7dd8 100644 --- a/clickhouse/columns/factory.cpp +++ b/clickhouse/columns/factory.cpp @@ -246,8 +246,16 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti std::make_shared() ) ); - default: - throw UnimplementedError("LowCardinality(" + nested.name + ") is not supported"); + default: { + // Generic LowCardinality(T): build the inner column and + // wrap it. Works for any fixed-size dictionary type that + // AppendToDictionary supports. + auto inner = CreateColumnFromAst(nested, settings); + if (!inner) { + throw UnimplementedError("LowCardinality(" + nested.name + ") is not supported"); + } + return std::make_shared(std::move(inner)); + } } } } diff --git a/clickhouse/columns/lowcardinality.cpp b/clickhouse/columns/lowcardinality.cpp index 0722ea8f..45a9006e 100644 --- a/clickhouse/columns/lowcardinality.cpp +++ b/clickhouse/columns/lowcardinality.cpp @@ -2,6 +2,13 @@ #include "string.h" #include "nullable.h" +#include "numeric.h" +#include "enum.h" +#include "date.h" +#include "ip4.h" +#include "ip6.h" +#include "uuid.h" +#include "../base/socket.h" // for htonl/ntohl and in_addr/in6_addr #include "../base/wire_format.h" #include @@ -10,6 +17,7 @@ #include #include #include +#include #include @@ -95,13 +103,46 @@ inline auto VisitIndexColumn(Vizitor && vizitor, ColumnType && col) { } } +// Number of bytes an ItemView holds for a fixed-size dictionary type, or 0 for +// variable-size (String/FixedString) or unsupported types. Used to build a +// correctly-sized zero value for the default/null dictionary item. +inline size_t FixedSizeForDictionaryType(Type::Code code) { + switch (code) { + case Type::Int8: case Type::UInt8: case Type::Enum8: + return 1; + case Type::Int16: case Type::UInt16: case Type::Enum16: case Type::Date: + return 2; + case Type::Int32: case Type::UInt32: case Type::Float32: + case Type::DateTime: case Type::Date32: case Type::IPv4: + return 4; + case Type::Int64: case Type::UInt64: case Type::Float64: + case Type::DateTime64: + return 8; + case Type::Int128: case Type::UInt128: case Type::IPv6: case Type::UUID: + return 16; + default: + return 0; + } +} + +// A zero-filled, correctly-sized ItemView for a fixed-size dictionary type. The +// backing buffer is static so the non-owning view stays valid. +inline ItemView ZeroItemForDictionary(Type::Code code) { + if (const auto size = FixedSizeForDictionaryType(code)) { + static const char zeros[16] = {}; + return ItemView{code, std::string_view{zeros, size}}; + } + // Variable-size types (String/FixedString) accept an empty value. + return ItemView{code, std::string_view{}}; +} + // A special NULL-item, which is expected at pos(0) in dictionary, // note that we distinguish empty string from NULL-value. inline auto GetNullItemForDictionary(const ColumnRef dictionary) { if (auto n = dictionary->As()) { return ItemView {}; } else { - return ItemView{dictionary->Type()->GetCode(), std::string_view{}}; + return ZeroItemForDictionary(dictionary->Type()->GetCode()); } } @@ -111,7 +152,7 @@ inline ItemView GetDefaultItemForDictionary(const ColumnRef dictionary) { if (auto n = dictionary->As()) { return GetDefaultItemForDictionary(n->Nested()); } else { - return ItemView{dictionary->Type()->GetCode(), std::string_view{}}; + return ZeroItemForDictionary(dictionary->Type()->GetCode()); } } @@ -147,6 +188,81 @@ inline void AppendToDictionary(Column& dictionary, const ItemView & item) { case Type::Nullable: AppendNullableToDictionary(column_down_cast(dictionary), item); return; + // Fixed-size dictionary types. The ItemView holds the raw stored bytes + // (see the matching ColumnXxx::GetItem), so we re-append the raw value. + case Type::Int8: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::Int16: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::Int32: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::Int64: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::UInt8: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::UInt16: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::UInt32: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::UInt64: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::Int128: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::UInt128: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::Float32: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::Float64: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::Enum8: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::Enum16: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::Date: + column_down_cast(dictionary).AppendRaw(item.get()); + return; + case Type::Date32: + column_down_cast(dictionary).AppendRaw(item.get()); + return; + case Type::DateTime: + column_down_cast(dictionary).AppendRaw(item.get()); + return; + case Type::DateTime64: + column_down_cast(dictionary).Append(item.get()); + return; + case Type::IPv4: + // ColumnIPv4::Append applies htonl, and GetItem returns the stored + // (already byte-swapped) value, so undo the swap to re-store as-is. + column_down_cast(dictionary).Append(ntohl(item.get())); + return; + case Type::IPv6: { + in6_addr addr; + std::memcpy(&addr, item.data.data(), sizeof(addr)); + column_down_cast(dictionary).Append(addr); + return; + } + case Type::UUID: { + UUID value; + std::memcpy(&value.first, item.data.data(), sizeof(value.first)); + std::memcpy(&value.second, item.data.data() + sizeof(value.first), + sizeof(value.second)); + column_down_cast(dictionary).Append(value); + return; + } default: throw ValidationError("Unexpected dictionary column type: " + dictionary.GetType().GetName()); } diff --git a/ut/CreateColumnByType_ut.cpp b/ut/CreateColumnByType_ut.cpp index e312c116..1de80fb9 100644 --- a/ut/CreateColumnByType_ut.cpp +++ b/ut/CreateColumnByType_ut.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,28 @@ TEST(CreateColumnByType, LowCardinalityAsWrappedColumn) { ASSERT_EQ(Type::FixedString, CreateColumnByType("LowCardinality(FixedString(10000))", create_column_settings)->As()->GetType().GetCode()); } +TEST(CreateColumnByType, LowCardinalityGeneralInnerTypes) { + // LowCardinality used to be supported only over String/FixedString. The + // factory now builds a generic ColumnLowCardinality for any fixed-size inner + // type. + for (const auto* type_name : { + "LowCardinality(Int8)", + "LowCardinality(Int64)", + "LowCardinality(UInt64)", + "LowCardinality(Float64)", + "LowCardinality(Date)", + "LowCardinality(DateTime)", + "LowCardinality(Nullable(Int64))", + "LowCardinality(Nullable(Float64))", + }) { + auto col = CreateColumnByType(type_name); + ASSERT_NE(nullptr, col) << type_name; + ASSERT_EQ(Type::LowCardinality, col->GetType().GetCode()) << type_name; + ASSERT_NE(nullptr, col->As()) << type_name; + EXPECT_EQ(std::string{type_name}, col->GetType().GetName()) << type_name; + } +} + TEST(CreateColumnByType, DateTime) { ASSERT_NE(nullptr, CreateColumnByType("DateTime")); ASSERT_NE(nullptr, CreateColumnByType("DateTime('Europe/Moscow')")); diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index 13a1731c..419cb359 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -1189,3 +1189,73 @@ TEST(ColumnsCase, ColumnMapT_Wrap) { EXPECT_EQ("123", map_view.At(1)); EXPECT_EQ("abc", map_view.At(2)); } + +// Regression tests for general LowCardinality support over non-String inner +// types (previously only String/FixedString were supported). +TEST(ColumnLowCardinality, AppendAndReadNumeric) { + auto col = std::make_shared>(); + col->Append(7); + col->Append(7); + col->Append(9); + col->Append(7); + + ASSERT_EQ(4u, col->Size()); + EXPECT_EQ(7, col->At(0)); + EXPECT_EQ(7, col->At(1)); + EXPECT_EQ(9, col->At(2)); + EXPECT_EQ(7, col->At(3)); + // Dictionary holds the default item plus the two distinct values {7, 9}. + EXPECT_EQ(3u, col->GetDictionarySize()); + + // GetItem returns the raw value with the correct type code. + const auto item = col->GetItem(2); + EXPECT_EQ(Type::Int64, item.type); + EXPECT_EQ(9, item.get()); +} + +TEST(ColumnLowCardinality, AppendAndReadNullableNumeric) { + auto col + = std::make_shared>>(); + col->Append(7); + col->Append(std::nullopt); + col->Append(7); + col->Append(9); + col->Append(std::nullopt); + + ASSERT_EQ(5u, col->Size()); + EXPECT_EQ(std::optional{7}, col->At(0)); + EXPECT_EQ(std::nullopt, col->At(1)); + EXPECT_EQ(std::optional{7}, col->At(2)); + EXPECT_EQ(std::optional{9}, col->At(3)); + EXPECT_EQ(std::nullopt, col->At(4)); + + // Null rows are represented by a Void ItemView. + EXPECT_EQ(Type::Void, col->GetItem(1).type); + EXPECT_EQ(Type::Int64, col->GetItem(0).type); +} + +TEST(ColumnLowCardinality, NumericLoadAndSave) { + auto column_A = std::make_shared>(); + for (auto v : {1u, 2u, 1u, 3u, 2u, 1u}) { + column_A->Append(v); + } + + const auto BufferSize = 64 * 1024; + std::unique_ptr buffer = std::make_unique(BufferSize); + memset(buffer.get(), 0, BufferSize); + { + ArrayOutput output(buffer.get(), BufferSize); + ASSERT_NO_THROW(column_A->Save(&output)); + } + + auto column_B = std::make_shared>(); + { + ArrayInput input(buffer.get(), BufferSize); + ASSERT_TRUE(column_B->Load(&input, column_A->Size())); + } + + ASSERT_EQ(column_A->Size(), column_B->Size()); + for (size_t i = 0; i < column_A->Size(); ++i) { + EXPECT_EQ(column_A->At(i), column_B->At(i)) << "row " << i; + } +} diff --git a/ut/roundtrip_column.cpp b/ut/roundtrip_column.cpp index 19b18bb0..c6a325c3 100644 --- a/ut/roundtrip_column.cpp +++ b/ut/roundtrip_column.cpp @@ -38,7 +38,8 @@ ColumnRef RoundtripColumnValues(Client& client, ColumnRef expected) { client.Execute("DROP TEMPORARY TABLE IF EXISTS temporary_roundtrip_table;"); // id column is to have the same order of rows on SELECT client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS temporary_roundtrip_table (id UInt32, col " + type_name + ") " - "ENGINE = Memory SETTINGS enable_time_time64_type = 1"); + "ENGINE = Memory SETTINGS enable_time_time64_type = 1, " + "allow_suspicious_low_cardinality_types = 1"); { Block block; block.AppendColumn("col", expected); diff --git a/ut/roundtrip_tests.cpp b/ut/roundtrip_tests.cpp index 9ff4edf3..9476d2b1 100644 --- a/ut/roundtrip_tests.cpp +++ b/ut/roundtrip_tests.cpp @@ -233,6 +233,34 @@ TEST_P(RoundtripCase, LowCardinalityTNullableString) { EXPECT_TRUE(CompareRecursive(*col, *result_typed)); } +TEST_P(RoundtripCase, LowCardinalityTUInt64) { + using TestColumn = ColumnLowCardinalityT; + auto col = std::make_shared(); + + col->Append(7); + col->Append(42); + col->Append(7); + col->Append(7); + + auto result_typed = RoundtripColumnValues(*client_, col)->As(); + EXPECT_TRUE(CompareRecursive(*col, *result_typed)); +} + +TEST_P(RoundtripCase, LowCardinalityTNullableUInt64) { + using TestColumn = ColumnLowCardinalityT>; + auto col = std::make_shared(); + + col->Append(7); + col->Append(42); + col->Append(std::nullopt); + col->Append(7); + col->Append(std::nullopt); + col->Append(7); + + auto result_typed = RoundtripColumnValues(*client_, col)->As(); + EXPECT_TRUE(CompareRecursive(*col, *result_typed)); +} + TEST_P(RoundtripCase, ArrayTNullableString) { using TestColumn = ColumnArrayT>; auto col = std::make_shared();