@@ -330,4 +330,215 @@ ItemView ColumnString::GetItem(size_t index) const {
330330 return ItemView{Type::String, this ->At (index)};
331331}
332332
333+ struct ColumnJSON ::Block
334+ {
335+ using CharT = typename std::string::value_type;
336+
337+ explicit Block (size_t starting_capacity)
338+ : size(0 ),
339+ capacity(starting_capacity),
340+ data_(new CharT[capacity])
341+ {}
342+
343+ inline auto GetAvailable () const {
344+ return capacity - size;
345+ }
346+
347+ std::string_view AppendUnsafe (std::string_view str) {
348+ const auto pos = &data_[size];
349+
350+ memcpy (pos, str.data (), str.size ());
351+ size += str.size ();
352+
353+ return std::string_view (pos, str.size ());
354+ }
355+
356+ auto GetCurrentWritePos () {
357+ return &data_[size];
358+ }
359+
360+ std::string_view ConsumeTailAsJSONViewUnsafe (size_t len) {
361+ const auto start = &data_[size];
362+ size += len;
363+ return std::string_view (start, len);
364+ }
365+
366+ size_t size;
367+ const size_t capacity;
368+ std::unique_ptr<CharT[]> data_;
369+ };
370+
371+ ColumnJSON::ColumnJSON ()
372+ : Column(Type::CreateJSON())
373+ {
374+ }
375+
376+ ColumnJSON::ColumnJSON (size_t element_count)
377+ : Column(Type::CreateJSON())
378+ {
379+ items_.reserve (element_count);
380+ // 16 is arbitrary number, assumption that string values are about ~256 bytes long.
381+ blocks_.reserve (std::max<size_t >(1 , element_count / 16 ));
382+ }
383+
384+ ColumnJSON::ColumnJSON (const std::vector<std::string>& data)
385+ : ColumnJSON()
386+ {
387+ items_.reserve (data.size ());
388+ blocks_.emplace_back (ComputeTotalSize (data));
389+
390+ for (const auto & s : data) {
391+ AppendUnsafe (s);
392+ }
393+ }
394+
395+ ColumnJSON::ColumnJSON (std::vector<std::string>&& data)
396+ : ColumnJSON()
397+ {
398+ items_.reserve (data.size ());
399+
400+ for (auto && d : data) {
401+ append_data_.emplace_back (std::move (d));
402+ auto & last_data = append_data_.back ();
403+ items_.emplace_back (std::string_view{ last_data.data (),last_data.length () });
404+ }
405+ }
406+
407+ ColumnJSON::~ColumnJSON ()
408+ {}
409+
410+ void ColumnJSON::Reserve (size_t new_cap) {
411+ items_.reserve (new_cap);
412+ // 16 is arbitrary number, assumption that string values are about ~256 bytes long.
413+ blocks_.reserve (std::max<size_t >(1 , new_cap / 16 ));
414+ }
415+
416+ void ColumnJSON::Append (std::string_view str) {
417+ if (blocks_.size () == 0 || blocks_.back ().GetAvailable () < str.length ()) {
418+ blocks_.emplace_back (std::max (DEFAULT_BLOCK_SIZE, str.size ()));
419+ }
420+
421+ items_.emplace_back (blocks_.back ().AppendUnsafe (str));
422+ }
423+
424+ void ColumnJSON::Append (const char * str) {
425+ Append (std::string_view (str, strlen (str)));
426+ }
427+
428+ void ColumnJSON::Append (std::string&& steal_value) {
429+ append_data_.emplace_back (std::move (steal_value));
430+ auto & last_data = append_data_.back ();
431+ items_.emplace_back (std::string_view{ last_data.data (),last_data.length () });
432+ }
433+
434+ void ColumnJSON::AppendNoManagedLifetime (std::string_view str) {
435+ items_.emplace_back (str);
436+ }
437+
438+ void ColumnJSON::AppendUnsafe (std::string_view str) {
439+ items_.emplace_back (blocks_.back ().AppendUnsafe (str));
440+ }
441+
442+ void ColumnJSON::Clear () {
443+ items_.clear ();
444+ blocks_.clear ();
445+ append_data_.clear ();
446+ }
447+
448+ std::string_view ColumnJSON::At (size_t n) const {
449+ return items_.at (n);
450+ }
451+
452+ void ColumnJSON::Append (ColumnRef column) {
453+ if (auto col = column->As <ColumnJSON>()) {
454+ const auto total_size = ComputeTotalSize (col->items_ );
455+
456+ // TODO: fill up existing block with some items and then add a new one for the rest of items
457+ if (blocks_.size () == 0 || blocks_.back ().GetAvailable () < total_size)
458+ blocks_.emplace_back (std::max (DEFAULT_BLOCK_SIZE, total_size));
459+
460+ // Intentionally not doing items_.reserve() since that cripples performance.
461+ for (size_t i = 0 ; i < column->Size (); ++i) {
462+ this ->AppendUnsafe ((*col)[i]);
463+ }
464+ }
465+ }
466+
467+ bool ColumnJSON::LoadBody (InputStream* input, size_t rows) {
468+ if (rows == 0 ) {
469+ items_.clear ();
470+ blocks_.clear ();
471+
472+ return true ;
473+ }
474+
475+ decltype (items_) new_items;
476+ decltype (blocks_) new_blocks;
477+
478+ new_items.reserve (rows);
479+
480+ // Suboptimzal if the first row string is >DEFAULT_BLOCK_SIZE, but that must be a very rare case.
481+ Block * block = &new_blocks.emplace_back (DEFAULT_BLOCK_SIZE);
482+
483+ for (size_t i = 0 ; i < rows; ++i) {
484+ uint64_t len;
485+ if (!WireFormat::ReadUInt64 (*input, &len))
486+ return false ;
487+
488+ if (len > block->GetAvailable ())
489+ block = &new_blocks.emplace_back (std::max<size_t >(DEFAULT_BLOCK_SIZE, len));
490+
491+ if (!WireFormat::ReadBytes (*input, block->GetCurrentWritePos (), len))
492+ return false ;
493+
494+ new_items.emplace_back (block->ConsumeTailAsJSONViewUnsafe (len));
495+ }
496+
497+ items_.swap (new_items);
498+ blocks_.swap (new_blocks);
499+
500+ return true ;
501+ }
502+
503+ void ColumnJSON::SaveBody (OutputStream* output) {
504+ for (const auto & item : items_) {
505+ WireFormat::WriteString (*output, item);
506+ }
507+ }
508+
509+ size_t ColumnJSON::Size () const {
510+ return items_.size ();
511+ }
512+
513+ ColumnRef ColumnJSON::Slice (size_t begin, size_t len) const {
514+ auto result = std::make_shared<ColumnJSON>();
515+
516+ if (begin < items_.size ()) {
517+ len = std::min (len, items_.size () - begin);
518+ result->items_ .reserve (len);
519+
520+ result->blocks_ .emplace_back (ComputeTotalSize (items_, begin, len));
521+ for (size_t i = begin; i < begin + len; ++i) {
522+ result->Append (items_[i]);
523+ }
524+ }
525+
526+ return result;
527+ }
528+
529+ ColumnRef ColumnJSON::CloneEmpty () const {
530+ return std::make_shared<ColumnJSON>();
531+ }
532+
533+ void ColumnJSON::Swap (Column& other) {
534+ auto & col = dynamic_cast <ColumnJSON &>(other);
535+ items_.swap (col.items_ );
536+ blocks_.swap (col.blocks_ );
537+ append_data_.swap (col.append_data_ );
538+ }
539+
540+ ItemView ColumnJSON::GetItem (size_t index) const {
541+ return ItemView{Type::JSON, this ->At (index)};
542+ }
543+
333544}
0 commit comments