Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions clickhouse/columns/factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,16 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti
std::make_shared<ColumnUInt8>()
)
);
default:
throw UnimplementedError("LowCardinality(" + nested.name + ") is not supported");
default: {
// Generic LowCardinality(T): build the inner column and
// wrap it. Works for any fixed-size dictionary type that
// AppendToDictionary supports.
auto inner = CreateColumnFromAst(nested, settings);
if (!inner) {
throw UnimplementedError("LowCardinality(" + nested.name + ") is not supported");
}
return std::make_shared<ColumnLowCardinality>(std::move(inner));
}
}
}
}
Expand Down
120 changes: 118 additions & 2 deletions clickhouse/columns/lowcardinality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

#include "string.h"
#include "nullable.h"
#include "numeric.h"
#include "enum.h"
#include "date.h"
#include "ip4.h"
#include "ip6.h"
#include "uuid.h"
#include "../base/socket.h" // for htonl/ntohl and in_addr/in6_addr
#include "../base/wire_format.h"

#include <city.h>
Expand All @@ -10,6 +17,7 @@
#include <string_view>
#include <type_traits>
#include <cmath>
#include <cstring>

#include <cassert>

Expand Down Expand Up @@ -95,13 +103,46 @@ inline auto VisitIndexColumn(Vizitor && vizitor, ColumnType && col) {
}
}

// Number of bytes an ItemView holds for a fixed-size dictionary type, or 0 for
// variable-size (String/FixedString) or unsupported types. Used to build a
// correctly-sized zero value for the default/null dictionary item.
inline size_t FixedSizeForDictionaryType(Type::Code code) {
switch (code) {
case Type::Int8: case Type::UInt8: case Type::Enum8:
return 1;
case Type::Int16: case Type::UInt16: case Type::Enum16: case Type::Date:
return 2;
case Type::Int32: case Type::UInt32: case Type::Float32:
case Type::DateTime: case Type::Date32: case Type::IPv4:
return 4;
case Type::Int64: case Type::UInt64: case Type::Float64:
case Type::DateTime64:
return 8;
case Type::Int128: case Type::UInt128: case Type::IPv6: case Type::UUID:
return 16;
default:
return 0;
}
}

// A zero-filled, correctly-sized ItemView for a fixed-size dictionary type. The
// backing buffer is static so the non-owning view stays valid.
inline ItemView ZeroItemForDictionary(Type::Code code) {
if (const auto size = FixedSizeForDictionaryType(code)) {
static const char zeros[16] = {};
return ItemView{code, std::string_view{zeros, size}};
}
// Variable-size types (String/FixedString) accept an empty value.
return ItemView{code, std::string_view{}};
}

// A special NULL-item, which is expected at pos(0) in dictionary,
// note that we distinguish empty string from NULL-value.
inline auto GetNullItemForDictionary(const ColumnRef dictionary) {
if (auto n = dictionary->As<ColumnNullable>()) {
return ItemView {};
} else {
return ItemView{dictionary->Type()->GetCode(), std::string_view{}};
return ZeroItemForDictionary(dictionary->Type()->GetCode());
}
}

Expand All @@ -111,7 +152,7 @@ inline ItemView GetDefaultItemForDictionary(const ColumnRef dictionary) {
if (auto n = dictionary->As<ColumnNullable>()) {
return GetDefaultItemForDictionary(n->Nested());
} else {
return ItemView{dictionary->Type()->GetCode(), std::string_view{}};
return ZeroItemForDictionary(dictionary->Type()->GetCode());
}
}

Expand Down Expand Up @@ -147,6 +188,81 @@ inline void AppendToDictionary(Column& dictionary, const ItemView & item) {
case Type::Nullable:
AppendNullableToDictionary(column_down_cast<ColumnNullable>(dictionary), item);
return;
// Fixed-size dictionary types. The ItemView holds the raw stored bytes
// (see the matching ColumnXxx::GetItem), so we re-append the raw value.
case Type::Int8:
column_down_cast<ColumnInt8>(dictionary).Append(item.get<int8_t>());
return;
case Type::Int16:
column_down_cast<ColumnInt16>(dictionary).Append(item.get<int16_t>());
return;
case Type::Int32:
column_down_cast<ColumnInt32>(dictionary).Append(item.get<int32_t>());
return;
case Type::Int64:
column_down_cast<ColumnInt64>(dictionary).Append(item.get<int64_t>());
return;
case Type::UInt8:
column_down_cast<ColumnUInt8>(dictionary).Append(item.get<uint8_t>());
return;
case Type::UInt16:
column_down_cast<ColumnUInt16>(dictionary).Append(item.get<uint16_t>());
return;
case Type::UInt32:
column_down_cast<ColumnUInt32>(dictionary).Append(item.get<uint32_t>());
return;
case Type::UInt64:
column_down_cast<ColumnUInt64>(dictionary).Append(item.get<uint64_t>());
return;
case Type::Int128:
column_down_cast<ColumnInt128>(dictionary).Append(item.get<Int128>());
return;
case Type::UInt128:
column_down_cast<ColumnUInt128>(dictionary).Append(item.get<UInt128>());
return;
case Type::Float32:
column_down_cast<ColumnFloat32>(dictionary).Append(item.get<float>());
return;
case Type::Float64:
column_down_cast<ColumnFloat64>(dictionary).Append(item.get<double>());
return;
case Type::Enum8:
column_down_cast<ColumnEnum8>(dictionary).Append(item.get<int8_t>());
return;
case Type::Enum16:
column_down_cast<ColumnEnum16>(dictionary).Append(item.get<int16_t>());
return;
case Type::Date:
column_down_cast<ColumnDate>(dictionary).AppendRaw(item.get<uint16_t>());
return;
case Type::Date32:
column_down_cast<ColumnDate32>(dictionary).AppendRaw(item.get<int32_t>());
return;
case Type::DateTime:
column_down_cast<ColumnDateTime>(dictionary).AppendRaw(item.get<uint32_t>());
return;
case Type::DateTime64:
column_down_cast<ColumnDateTime64>(dictionary).Append(item.get<int64_t>());
return;
case Type::IPv4:
// ColumnIPv4::Append applies htonl, and GetItem returns the stored
// (already byte-swapped) value, so undo the swap to re-store as-is.
column_down_cast<ColumnIPv4>(dictionary).Append(ntohl(item.get<uint32_t>()));
return;
case Type::IPv6: {
in6_addr addr;
std::memcpy(&addr, item.data.data(), sizeof(addr));
column_down_cast<ColumnIPv6>(dictionary).Append(addr);
return;
}
case Type::UUID: {
UUID value;
std::memcpy(&value.first, item.data.data(), sizeof(value.first));
std::memcpy(&value.second, item.data.data() + sizeof(value.first),
sizeof(value.second));
column_down_cast<ColumnUUID>(dictionary).Append(value);
return;
}
default:
throw ValidationError("Unexpected dictionary column type: " + dictionary.GetType().GetName());
}
Expand Down
23 changes: 23 additions & 0 deletions ut/CreateColumnByType_ut.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <clickhouse/columns/bool.h>
#include <clickhouse/columns/factory.h>
#include <clickhouse/columns/date.h>
#include <clickhouse/columns/lowcardinality.h>
#include <clickhouse/columns/numeric.h>
#include <clickhouse/columns/string.h>
#include <clickhouse/columns/json.h>
Expand Down Expand Up @@ -44,6 +45,28 @@ TEST(CreateColumnByType, LowCardinalityAsWrappedColumn) {
ASSERT_EQ(Type::FixedString, CreateColumnByType("LowCardinality(FixedString(10000))", create_column_settings)->As<ColumnFixedString>()->GetType().GetCode());
}

TEST(CreateColumnByType, LowCardinalityGeneralInnerTypes) {
// LowCardinality used to be supported only over String/FixedString. The
// factory now builds a generic ColumnLowCardinality for any fixed-size inner
// type.
for (const auto* type_name : {
"LowCardinality(Int8)",
"LowCardinality(Int64)",
"LowCardinality(UInt64)",
"LowCardinality(Float64)",
"LowCardinality(Date)",
"LowCardinality(DateTime)",
"LowCardinality(Nullable(Int64))",
"LowCardinality(Nullable(Float64))",
}) {
auto col = CreateColumnByType(type_name);
ASSERT_NE(nullptr, col) << type_name;
ASSERT_EQ(Type::LowCardinality, col->GetType().GetCode()) << type_name;
ASSERT_NE(nullptr, col->As<ColumnLowCardinality>()) << type_name;
EXPECT_EQ(std::string{type_name}, col->GetType().GetName()) << type_name;
}
}

TEST(CreateColumnByType, DateTime) {
ASSERT_NE(nullptr, CreateColumnByType("DateTime"));
ASSERT_NE(nullptr, CreateColumnByType("DateTime('Europe/Moscow')"));
Expand Down
70 changes: 70 additions & 0 deletions ut/columns_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1189,3 +1189,73 @@ TEST(ColumnsCase, ColumnMapT_Wrap) {
EXPECT_EQ("123", map_view.At(1));
EXPECT_EQ("abc", map_view.At(2));
}

// Regression tests for general LowCardinality support over non-String inner
// types (previously only String/FixedString were supported).
TEST(ColumnLowCardinality, AppendAndReadNumeric) {
auto col = std::make_shared<ColumnLowCardinalityT<ColumnInt64>>();
col->Append(7);
col->Append(7);
col->Append(9);
col->Append(7);

ASSERT_EQ(4u, col->Size());
EXPECT_EQ(7, col->At(0));
EXPECT_EQ(7, col->At(1));
EXPECT_EQ(9, col->At(2));
EXPECT_EQ(7, col->At(3));
// Dictionary holds the default item plus the two distinct values {7, 9}.
EXPECT_EQ(3u, col->GetDictionarySize());

// GetItem returns the raw value with the correct type code.
const auto item = col->GetItem(2);
EXPECT_EQ(Type::Int64, item.type);
EXPECT_EQ(9, item.get<int64_t>());
}

TEST(ColumnLowCardinality, AppendAndReadNullableNumeric) {
auto col
= std::make_shared<ColumnLowCardinalityT<ColumnNullableT<ColumnInt64>>>();
col->Append(7);
col->Append(std::nullopt);
col->Append(7);
col->Append(9);
col->Append(std::nullopt);

ASSERT_EQ(5u, col->Size());
EXPECT_EQ(std::optional<int64_t>{7}, col->At(0));
EXPECT_EQ(std::nullopt, col->At(1));
EXPECT_EQ(std::optional<int64_t>{7}, col->At(2));
EXPECT_EQ(std::optional<int64_t>{9}, col->At(3));
EXPECT_EQ(std::nullopt, col->At(4));

// Null rows are represented by a Void ItemView.
EXPECT_EQ(Type::Void, col->GetItem(1).type);
EXPECT_EQ(Type::Int64, col->GetItem(0).type);
}

TEST(ColumnLowCardinality, NumericLoadAndSave) {
auto column_A = std::make_shared<ColumnLowCardinalityT<ColumnUInt64>>();
for (auto v : {1u, 2u, 1u, 3u, 2u, 1u}) {
column_A->Append(v);
}

const auto BufferSize = 64 * 1024;
std::unique_ptr<char[]> buffer = std::make_unique<char[]>(BufferSize);
memset(buffer.get(), 0, BufferSize);
{
ArrayOutput output(buffer.get(), BufferSize);
ASSERT_NO_THROW(column_A->Save(&output));
}

auto column_B = std::make_shared<ColumnLowCardinalityT<ColumnUInt64>>();
{
ArrayInput input(buffer.get(), BufferSize);
ASSERT_TRUE(column_B->Load(&input, column_A->Size()));
}

ASSERT_EQ(column_A->Size(), column_B->Size());
for (size_t i = 0; i < column_A->Size(); ++i) {
EXPECT_EQ(column_A->At(i), column_B->At(i)) << "row " << i;
}
}
3 changes: 2 additions & 1 deletion ut/roundtrip_column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ ColumnRef RoundtripColumnValues(Client& client, ColumnRef expected) {
client.Execute("DROP TEMPORARY TABLE IF EXISTS temporary_roundtrip_table;");
// id column is to have the same order of rows on SELECT
client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS temporary_roundtrip_table (id UInt32, col " + type_name + ") "
"ENGINE = Memory SETTINGS enable_time_time64_type = 1");
"ENGINE = Memory SETTINGS enable_time_time64_type = 1, "
"allow_suspicious_low_cardinality_types = 1");
{
Block block;
block.AppendColumn("col", expected);
Expand Down
28 changes: 28 additions & 0 deletions ut/roundtrip_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,34 @@ TEST_P(RoundtripCase, LowCardinalityTNullableString) {
EXPECT_TRUE(CompareRecursive(*col, *result_typed));
}

TEST_P(RoundtripCase, LowCardinalityTUInt64) {
using TestColumn = ColumnLowCardinalityT<ColumnUInt64>;
auto col = std::make_shared<TestColumn>();

col->Append(7);
col->Append(42);
col->Append(7);
col->Append(7);

auto result_typed = RoundtripColumnValues(*client_, col)->As<TestColumn>();
EXPECT_TRUE(CompareRecursive(*col, *result_typed));
}

TEST_P(RoundtripCase, LowCardinalityTNullableUInt64) {
using TestColumn = ColumnLowCardinalityT<ColumnNullableT<ColumnUInt64>>;
auto col = std::make_shared<TestColumn>();

col->Append(7);
col->Append(42);
col->Append(std::nullopt);
col->Append(7);
col->Append(std::nullopt);
col->Append(7);

auto result_typed = RoundtripColumnValues(*client_, col)->As<TestColumn>();
EXPECT_TRUE(CompareRecursive(*col, *result_typed));
}

TEST_P(RoundtripCase, ArrayTNullableString) {
using TestColumn = ColumnArrayT<ColumnNullableT<ColumnString>>;
auto col = std::make_shared<TestColumn>();
Expand Down
Loading