Skip to content

Commit d0dd36a

Browse files
committed
Add JSON support
Implemented the same as String, which seems to work fine except for round-tripping. I assume it's not actually a string in the protocol so it will need a different format. But hopefully the rest of the infrastructure for it is helpful.
1 parent b8544bb commit d0dd36a

21 files changed

+401
-4
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ C++ client for [ClickHouse](https://clickhouse.com/).
2323
* UUID
2424
* Map
2525
* Point, Ring, Polygon, MultiPolygon
26+
* JSON
2627

2728
## Dependencies
2829
In the most basic case one needs only:

clickhouse/columns/factory.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ static ColumnRef CreateTerminalColumn(const TypeAst& ast) {
8686

8787
case Type::String:
8888
return std::make_shared<ColumnString>();
89+
case Type::JSON:
90+
return std::make_shared<ColumnJSON>();
8991
case Type::FixedString:
9092
return std::make_shared<ColumnFixedString>(GetASTChildElement(ast, 0).value);
9193

@@ -201,6 +203,8 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti
201203
// TODO (nemkov): update this to maximize code reuse.
202204
case Type::String:
203205
return std::make_shared<LowCardinalitySerializationAdaptor<ColumnString>>();
206+
case Type::JSON:
207+
return std::make_shared<LowCardinalitySerializationAdaptor<ColumnJSON>>();
204208
case Type::FixedString:
205209
return std::make_shared<LowCardinalitySerializationAdaptor<ColumnFixedString>>(GetASTChildElement(nested, 0).value);
206210
case Type::Nullable:
@@ -214,6 +218,8 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti
214218
// TODO (nemkov): update this to maximize code reuse.
215219
case Type::String:
216220
return std::make_shared<ColumnLowCardinalityT<ColumnString>>();
221+
case Type::JSON:
222+
return std::make_shared<ColumnLowCardinalityT<ColumnJSON>>();
217223
case Type::FixedString:
218224
return std::make_shared<ColumnLowCardinalityT<ColumnFixedString>>(GetASTChildElement(nested, 0).value);
219225
case Type::Nullable:

clickhouse/columns/itemview.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ void ItemView::ValidateData(Type::Code type, DataType data) {
7070

7171
case Type::Code::String:
7272
case Type::Code::FixedString:
73+
case Type::Code::JSON:
7374
// value can be of any size
7475
return;
7576

clickhouse/columns/itemview.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ struct ItemView {
6969
if (sizeof(ValueType) == data.size()) {
7070
return *reinterpret_cast<const T*>(data.data());
7171
} else {
72-
throw AssertionError("Incompatitable value type and size. Requested size: "
72+
throw AssertionError("Incompatible value type and size. Requested size: "
7373
+ std::to_string(sizeof(ValueType)) + " stored size: " + std::to_string(data.size()));
7474
}
7575
}

clickhouse/columns/string.cpp

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,4 +330,215 @@ ItemView ColumnString::GetItem(size_t index) const {
330330
return ItemView{Type::String, this->At(index)};
331331
}
332332

333+
struct ColumnJSON::Block
334+
{
335+
using CharT = typename std::string::value_type;
336+
337+
explicit Block(size_t starting_capacity)
338+
: size(0),
339+
capacity(starting_capacity),
340+
data_(new CharT[capacity])
341+
{}
342+
343+
inline auto GetAvailable() const {
344+
return capacity - size;
345+
}
346+
347+
std::string_view AppendUnsafe(std::string_view str) {
348+
const auto pos = &data_[size];
349+
350+
memcpy(pos, str.data(), str.size());
351+
size += str.size();
352+
353+
return std::string_view(pos, str.size());
354+
}
355+
356+
auto GetCurrentWritePos() {
357+
return &data_[size];
358+
}
359+
360+
std::string_view ConsumeTailAsJSONViewUnsafe(size_t len) {
361+
const auto start = &data_[size];
362+
size += len;
363+
return std::string_view(start, len);
364+
}
365+
366+
size_t size;
367+
const size_t capacity;
368+
std::unique_ptr<CharT[]> data_;
369+
};
370+
371+
ColumnJSON::ColumnJSON()
372+
: Column(Type::CreateJSON())
373+
{
374+
}
375+
376+
ColumnJSON::ColumnJSON(size_t element_count)
377+
: Column(Type::CreateJSON())
378+
{
379+
items_.reserve(element_count);
380+
// 16 is arbitrary number, assumption that string values are about ~256 bytes long.
381+
blocks_.reserve(std::max<size_t>(1, element_count / 16));
382+
}
383+
384+
ColumnJSON::ColumnJSON(const std::vector<std::string>& data)
385+
: ColumnJSON()
386+
{
387+
items_.reserve(data.size());
388+
blocks_.emplace_back(ComputeTotalSize(data));
389+
390+
for (const auto & s : data) {
391+
AppendUnsafe(s);
392+
}
393+
}
394+
395+
ColumnJSON::ColumnJSON(std::vector<std::string>&& data)
396+
: ColumnJSON()
397+
{
398+
items_.reserve(data.size());
399+
400+
for (auto&& d : data) {
401+
append_data_.emplace_back(std::move(d));
402+
auto& last_data = append_data_.back();
403+
items_.emplace_back(std::string_view{ last_data.data(),last_data.length() });
404+
}
405+
}
406+
407+
ColumnJSON::~ColumnJSON()
408+
{}
409+
410+
void ColumnJSON::Reserve(size_t new_cap) {
411+
items_.reserve(new_cap);
412+
// 16 is arbitrary number, assumption that string values are about ~256 bytes long.
413+
blocks_.reserve(std::max<size_t>(1, new_cap / 16));
414+
}
415+
416+
void ColumnJSON::Append(std::string_view str) {
417+
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) {
418+
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size()));
419+
}
420+
421+
items_.emplace_back(blocks_.back().AppendUnsafe(str));
422+
}
423+
424+
void ColumnJSON::Append(const char* str) {
425+
Append(std::string_view(str, strlen(str)));
426+
}
427+
428+
void ColumnJSON::Append(std::string&& steal_value) {
429+
append_data_.emplace_back(std::move(steal_value));
430+
auto& last_data = append_data_.back();
431+
items_.emplace_back(std::string_view{ last_data.data(),last_data.length() });
432+
}
433+
434+
void ColumnJSON::AppendNoManagedLifetime(std::string_view str) {
435+
items_.emplace_back(str);
436+
}
437+
438+
void ColumnJSON::AppendUnsafe(std::string_view str) {
439+
items_.emplace_back(blocks_.back().AppendUnsafe(str));
440+
}
441+
442+
void ColumnJSON::Clear() {
443+
items_.clear();
444+
blocks_.clear();
445+
append_data_.clear();
446+
}
447+
448+
std::string_view ColumnJSON::At(size_t n) const {
449+
return items_.at(n);
450+
}
451+
452+
void ColumnJSON::Append(ColumnRef column) {
453+
if (auto col = column->As<ColumnJSON>()) {
454+
const auto total_size = ComputeTotalSize(col->items_);
455+
456+
// TODO: fill up existing block with some items and then add a new one for the rest of items
457+
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < total_size)
458+
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, total_size));
459+
460+
// Intentionally not doing items_.reserve() since that cripples performance.
461+
for (size_t i = 0; i < column->Size(); ++i) {
462+
this->AppendUnsafe((*col)[i]);
463+
}
464+
}
465+
}
466+
467+
bool ColumnJSON::LoadBody(InputStream* input, size_t rows) {
468+
if (rows == 0) {
469+
items_.clear();
470+
blocks_.clear();
471+
472+
return true;
473+
}
474+
475+
decltype(items_) new_items;
476+
decltype(blocks_) new_blocks;
477+
478+
new_items.reserve(rows);
479+
480+
// Suboptimzal if the first row string is >DEFAULT_BLOCK_SIZE, but that must be a very rare case.
481+
Block * block = &new_blocks.emplace_back(DEFAULT_BLOCK_SIZE);
482+
483+
for (size_t i = 0; i < rows; ++i) {
484+
uint64_t len;
485+
if (!WireFormat::ReadUInt64(*input, &len))
486+
return false;
487+
488+
if (len > block->GetAvailable())
489+
block = &new_blocks.emplace_back(std::max<size_t>(DEFAULT_BLOCK_SIZE, len));
490+
491+
if (!WireFormat::ReadBytes(*input, block->GetCurrentWritePos(), len))
492+
return false;
493+
494+
new_items.emplace_back(block->ConsumeTailAsJSONViewUnsafe(len));
495+
}
496+
497+
items_.swap(new_items);
498+
blocks_.swap(new_blocks);
499+
500+
return true;
501+
}
502+
503+
void ColumnJSON::SaveBody(OutputStream* output) {
504+
for (const auto & item : items_) {
505+
WireFormat::WriteString(*output, item);
506+
}
507+
}
508+
509+
size_t ColumnJSON::Size() const {
510+
return items_.size();
511+
}
512+
513+
ColumnRef ColumnJSON::Slice(size_t begin, size_t len) const {
514+
auto result = std::make_shared<ColumnJSON>();
515+
516+
if (begin < items_.size()) {
517+
len = std::min(len, items_.size() - begin);
518+
result->items_.reserve(len);
519+
520+
result->blocks_.emplace_back(ComputeTotalSize(items_, begin, len));
521+
for (size_t i = begin; i < begin + len; ++i) {
522+
result->Append(items_[i]);
523+
}
524+
}
525+
526+
return result;
527+
}
528+
529+
ColumnRef ColumnJSON::CloneEmpty() const {
530+
return std::make_shared<ColumnJSON>();
531+
}
532+
533+
void ColumnJSON::Swap(Column& other) {
534+
auto & col = dynamic_cast<ColumnJSON &>(other);
535+
items_.swap(col.items_);
536+
blocks_.swap(col.blocks_);
537+
append_data_.swap(col.append_data_);
538+
}
539+
540+
ItemView ColumnJSON::GetItem(size_t index) const {
541+
return ItemView{Type::JSON, this->At(index)};
542+
}
543+
333544
}

clickhouse/columns/string.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,4 +142,76 @@ class ColumnString : public Column {
142142
std::deque<std::string> append_data_;
143143
};
144144

145+
/**
146+
* Represents column of variable-length strings.
147+
*/
148+
class ColumnJSON : public Column {
149+
public:
150+
// Type this column takes as argument of Append and returns with At() and operator[]
151+
using ValueType = std::string_view;
152+
153+
ColumnJSON();
154+
~ColumnJSON();
155+
156+
explicit ColumnJSON(size_t element_count);
157+
explicit ColumnJSON(const std::vector<std::string> & data);
158+
explicit ColumnJSON(std::vector<std::string>&& data);
159+
ColumnJSON& operator=(const ColumnJSON&) = delete;
160+
ColumnJSON(const ColumnJSON&) = delete;
161+
162+
/// Increase the capacity of the column for large block insertion.
163+
void Reserve(size_t new_cap) override;
164+
165+
/// Appends one element to the column.
166+
void Append(std::string_view str);
167+
168+
/// Appends one element to the column.
169+
void Append(const char* str);
170+
171+
/// Appends one element to the column.
172+
void Append(std::string&& steal_value);
173+
174+
/// Appends one element to the column.
175+
/// If str lifetime is managed elsewhere and guaranteed to outlive the Block sent to the server
176+
void AppendNoManagedLifetime(std::string_view str);
177+
178+
/// Returns element at given row number.
179+
std::string_view At(size_t n) const;
180+
181+
/// Returns element at given row number.
182+
inline std::string_view operator [] (size_t n) const { return At(n); }
183+
184+
public:
185+
/// Appends content of given column to the end of current one.
186+
void Append(ColumnRef column) override;
187+
188+
/// Loads column data from input stream.
189+
bool LoadBody(InputStream* input, size_t rows) override;
190+
191+
/// Saves column data to output stream.
192+
void SaveBody(OutputStream* output) override;
193+
194+
/// Clear column data .
195+
void Clear() override;
196+
197+
/// Returns count of rows in the column.
198+
size_t Size() const override;
199+
200+
/// Makes slice of the current column.
201+
ColumnRef Slice(size_t begin, size_t len) const override;
202+
ColumnRef CloneEmpty() const override;
203+
void Swap(Column& other) override;
204+
ItemView GetItem(size_t) const override;
205+
206+
private:
207+
void AppendUnsafe(std::string_view);
208+
209+
private:
210+
struct Block;
211+
212+
std::vector<std::string_view> items_;
213+
std::vector<Block> blocks_;
214+
std::deque<std::string> append_data_;
215+
};
216+
145217
}

clickhouse/types/type_parser.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ static const std::unordered_map<std::string, Type::Code> kTypeCode = {
4040
{ "Float32", Type::Float32 },
4141
{ "Float64", Type::Float64 },
4242
{ "String", Type::String },
43+
{ "JSON", Type::JSON },
4344
{ "FixedString", Type::FixedString },
4445
{ "DateTime", Type::DateTime },
4546
{ "DateTime64", Type::DateTime64 },
@@ -68,7 +69,7 @@ static const std::unordered_map<std::string, Type::Code> kTypeCode = {
6869
};
6970

7071
template <typename L, typename R>
71-
inline int CompateStringsCaseInsensitive(const L& left, const R& right) {
72+
inline int CompareStringsCaseInsensitive(const L& left, const R& right) {
7273
int64_t size_diff = left.size() - right.size();
7374
if (size_diff != 0)
7475
return size_diff > 0 ? 1 : -1;
@@ -129,7 +130,7 @@ bool ValidateAST(const TypeAst& ast) {
129130
// Void terminal that is not actually "void" produced when unknown type is encountered.
130131
if (ast.meta == TypeAst::Terminal
131132
&& ast.code == Type::Void
132-
&& CompateStringsCaseInsensitive(ast.name, std::string_view("void")) != 0)
133+
&& CompareStringsCaseInsensitive(ast.name, std::string_view("void")) != 0)
133134
//throw UnimplementedError("Unsupported type: " + ast.name);
134135
return false;
135136

0 commit comments

Comments
 (0)