|
19 | 19 |
|
20 | 20 | #include "tablet.h" |
21 | 21 |
|
| 22 | +#include <algorithm> |
22 | 23 | #include <cstdlib> |
| 24 | +#include <numeric> |
23 | 25 |
|
24 | 26 | #include "allocator/alloc_base.h" |
25 | 27 | #include "datatype/date_converter.h" |
@@ -491,6 +493,135 @@ void Tablet::set_column_categories( |
491 | 493 | } |
492 | 494 | } |
493 | 495 |
|
| 496 | +namespace { |
| 497 | + |
| 498 | +template <typename T> |
| 499 | +void permute_array(T* arr, const std::vector<uint32_t>& perm, uint32_t n) { |
| 500 | + std::vector<T> tmp(n); |
| 501 | + for (uint32_t i = 0; i < n; i++) tmp[i] = arr[perm[i]]; |
| 502 | + std::copy(tmp.begin(), tmp.end(), arr); |
| 503 | +} |
| 504 | + |
| 505 | +void permute_string_column(Tablet::StringColumn* sc, BitMap& bm, |
| 506 | + const std::vector<uint32_t>& perm, uint32_t n, |
| 507 | + uint32_t max_rows) { |
| 508 | + Tablet::StringColumn tmp; |
| 509 | + tmp.init(max_rows, sc->buf_used > 0 ? sc->buf_used : 64); |
| 510 | + for (uint32_t i = 0; i < n; i++) { |
| 511 | + uint32_t r = perm[i]; |
| 512 | + if (bm.test(r)) { |
| 513 | + // Null row — write a zero-length entry to keep offsets valid. |
| 514 | + tmp.append(i, "", 0); |
| 515 | + } else { |
| 516 | + int32_t off = sc->offsets[r]; |
| 517 | + uint32_t len = |
| 518 | + static_cast<uint32_t>(sc->offsets[r + 1] - off); |
| 519 | + tmp.append(i, sc->buffer + off, len); |
| 520 | + } |
| 521 | + } |
| 522 | + // Swap contents. |
| 523 | + std::swap(sc->offsets, tmp.offsets); |
| 524 | + std::swap(sc->buffer, tmp.buffer); |
| 525 | + std::swap(sc->buf_capacity, tmp.buf_capacity); |
| 526 | + std::swap(sc->buf_used, tmp.buf_used); |
| 527 | + tmp.destroy(); |
| 528 | +} |
| 529 | + |
| 530 | +void permute_bitmap(BitMap& bm, const std::vector<uint32_t>& perm, |
| 531 | + uint32_t n) { |
| 532 | + if (!bm.get_bitmap()) return; |
| 533 | + uint32_t size_bytes = bm.get_size(); |
| 534 | + // Save original bits. |
| 535 | + std::vector<char> orig(bm.get_bitmap(), bm.get_bitmap() + size_bytes); |
| 536 | + // Clear all bits (= all non-null). |
| 537 | + bm.clear_all(); |
| 538 | + // Re-set null bits through the permutation. |
| 539 | + for (uint32_t i = 0; i < n; i++) { |
| 540 | + uint32_t src = perm[i]; |
| 541 | + if (orig[src >> 3] & (1 << (src & 7))) { |
| 542 | + bm.set(i); |
| 543 | + } |
| 544 | + } |
| 545 | +} |
| 546 | + |
| 547 | +} // anonymous namespace |
| 548 | + |
| 549 | +void Tablet::sort_by_device() { |
| 550 | + if (id_column_indexes_.empty() || cur_row_size_ <= 1) return; |
| 551 | + |
| 552 | + const uint32_t n = cur_row_size_; |
| 553 | + |
| 554 | + // Build permutation sorted by tag column values (stable sort keeps |
| 555 | + // timestamp order within each device). |
| 556 | + std::vector<uint32_t> perm(n); |
| 557 | + std::iota(perm.begin(), perm.end(), 0); |
| 558 | + |
| 559 | + std::stable_sort(perm.begin(), perm.end(), [this](uint32_t a, uint32_t b) { |
| 560 | + for (int idx : id_column_indexes_) { |
| 561 | + bool a_null = bitmaps_[idx].test(a); |
| 562 | + bool b_null = bitmaps_[idx].test(b); |
| 563 | + if (a_null != b_null) return a_null > b_null; // null sorts last |
| 564 | + if (a_null) continue; // both null — equal on this column |
| 565 | + const StringColumn& sc = *value_matrix_[idx].string_col; |
| 566 | + int32_t a_off = sc.offsets[a]; |
| 567 | + uint32_t a_len = static_cast<uint32_t>(sc.offsets[a + 1] - a_off); |
| 568 | + int32_t b_off = sc.offsets[b]; |
| 569 | + uint32_t b_len = static_cast<uint32_t>(sc.offsets[b + 1] - b_off); |
| 570 | + uint32_t min_len = std::min(a_len, b_len); |
| 571 | + int cmp = (min_len > 0) |
| 572 | + ? memcmp(sc.buffer + a_off, sc.buffer + b_off, min_len) |
| 573 | + : 0; |
| 574 | + if (cmp != 0) return cmp < 0; |
| 575 | + if (a_len != b_len) return a_len < b_len; |
| 576 | + } |
| 577 | + return false; |
| 578 | + }); |
| 579 | + |
| 580 | + // Check if already sorted. |
| 581 | + bool sorted = true; |
| 582 | + for (uint32_t i = 0; i < n && sorted; i++) { |
| 583 | + if (perm[i] != i) sorted = false; |
| 584 | + } |
| 585 | + if (sorted) return; |
| 586 | + |
| 587 | + // Apply permutation to timestamps. |
| 588 | + permute_array(timestamps_, perm, n); |
| 589 | + |
| 590 | + // Apply permutation to each column. |
| 591 | + uint32_t col_count = static_cast<uint32_t>(schema_vec_->size()); |
| 592 | + for (uint32_t c = 0; c < col_count; c++) { |
| 593 | + TSDataType dt = schema_vec_->at(c).data_type_; |
| 594 | + switch (dt) { |
| 595 | + case BOOLEAN: |
| 596 | + permute_array(value_matrix_[c].bool_data, perm, n); |
| 597 | + break; |
| 598 | + case INT32: |
| 599 | + case DATE: |
| 600 | + permute_array(value_matrix_[c].int32_data, perm, n); |
| 601 | + break; |
| 602 | + case INT64: |
| 603 | + case TIMESTAMP: |
| 604 | + permute_array(value_matrix_[c].int64_data, perm, n); |
| 605 | + break; |
| 606 | + case FLOAT: |
| 607 | + permute_array(value_matrix_[c].float_data, perm, n); |
| 608 | + break; |
| 609 | + case DOUBLE: |
| 610 | + permute_array(value_matrix_[c].double_data, perm, n); |
| 611 | + break; |
| 612 | + case STRING: |
| 613 | + case TEXT: |
| 614 | + case BLOB: |
| 615 | + permute_string_column(value_matrix_[c].string_col, bitmaps_[c], |
| 616 | + perm, n, max_row_num_); |
| 617 | + break; |
| 618 | + default: |
| 619 | + break; |
| 620 | + } |
| 621 | + permute_bitmap(bitmaps_[c], perm, n); |
| 622 | + } |
| 623 | +} |
| 624 | + |
494 | 625 | void Tablet::reset_string_columns() { |
495 | 626 | size_t schema_count = schema_vec_->size(); |
496 | 627 | for (size_t c = 0; c < schema_count; c++) { |
|
0 commit comments