forked from SeasX/SeasClick
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtypesToPhp.cpp
More file actions
2797 lines (2672 loc) · 111 KB
/
typesToPhp.cpp
File metadata and controls
2797 lines (2672 loc) · 111 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
+----------------------------------------------------------------------+
| php_clickhouse |
+----------------------------------------------------------------------+
| Copyright (c) 1997-2026 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Ilia Alshanetsky <ilia@ilia.ws> |
+----------------------------------------------------------------------+
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
extern "C" {
#include "php.h"
#include "php_ini.h"
#include "ext/standard/info.h"
#include "php7_wrapper.h"
};
#include "php_clickhouse.h"
#include "lib/clickhouse-cpp/clickhouse/client.h"
#include "lib/clickhouse-cpp/clickhouse/error_codes.h"
#include "lib/clickhouse-cpp/clickhouse/types/type_parser.h"
#include "lib/clickhouse-cpp/clickhouse/columns/factory.h"
#include "lib/clickhouse-cpp/clickhouse/columns/geo.h"
#include "lib/clickhouse-cpp/clickhouse/columns/ip4.h"
#include "lib/clickhouse-cpp/clickhouse/columns/ip6.h"
#include "lib/clickhouse-cpp/clickhouse/columns/lowcardinality.h"
#include "lib/clickhouse-cpp/clickhouse/columns/map.h"
#include <sstream>
#include <iomanip>
#include <algorithm>
#include <cmath>
#include <cerrno>
#include <cinttypes>
#include "typesToPhp.hpp"
using namespace clickhouse;
using namespace std;
/*
* Format a 128-bit integer into a decimal string using a stack buffer.
* Avoids the heap allocation a stringstream incurs per cell on the read
* paths for Int128 / UInt128 / Decimal columns. Returns the number of
* bytes written into `out` (which must hold at least 41 bytes: sign +
* 39 digits + NUL margin).
*/
static size_t format_uint128_dec(absl::uint128 v, char *out)
{
char tmp[40];
int len = 0;
do {
tmp[len++] = (char)('0' + (int)(v % 10));
v /= 10;
} while (v != 0);
for (int i = 0; i < len; ++i) {
out[i] = tmp[len - 1 - i];
}
return (size_t)len;
}
static size_t format_int128_dec(absl::int128 v, char *out)
{
if (v >= 0) {
return format_uint128_dec(absl::uint128(v), out);
}
out[0] = '-';
/* Use unsigned negation to handle INT128_MIN (whose magnitude is
* representable in uint128 but not in int128). */
absl::uint128 mag = absl::uint128(0) - absl::uint128(v);
return 1 + format_uint128_dec(mag, out + 1);
}
/*
* Parse a decimal-digit string into a 128-bit unsigned. Both ColumnInt128
* and ColumnUInt128 inserts share the body; the signed wrapper composes
* with negate handled by the caller. Throws on overflow / non-digit.
*
* `out_label` is the type name for the error message ("Int128" / "UInt128").
*/
static absl::uint128 parse_uint128_dec(const char *s, size_t len, const char *out_label)
{
if (len == 0 || len > 39) {
throw std::runtime_error(std::string(out_label) + " string is empty or too long");
}
absl::uint128 v = 0;
for (size_t i = 0; i < len; ++i) {
if (s[i] < '0' || s[i] > '9') {
throw std::runtime_error(std::string(out_label) + " string contains non-digit characters");
}
absl::uint128 next = v * 10 + absl::uint128((unsigned)(s[i] - '0'));
if (next < v) {
throw std::runtime_error(std::string(out_label) + " string overflows the 128-bit range");
}
v = next;
}
return v;
}
/*
* dynamic_pointer_cast helper that throws a contextual error when the
* cast returns null, instead of leaving the caller to deref nullptr.
* The clickhouse-cpp Block schema metadata is server-supplied; a
* mismatch between the declared type code and the actual ColumnRef
* concrete type used to crash the worker. Callers in convertToZval
* (especially Map / Tuple decoders) wrap every typed cast through here.
*/
template <typename TCol>
static inline std::shared_ptr<TCol> as_or_throw(const ColumnRef &c, const char *what)
{
auto p = c->As<TCol>();
if (!p) {
throw std::runtime_error(std::string(what) + ": column type mismatch");
}
return p;
}
/*
* Strict numeric coercion for INSERT cells. PHP's `zval_get_long` and
* `zval_get_double` happily produce 0 / 0.0 for non-numeric strings,
* arrays, objects, etc., which used to land "abc" as 0 in an Int32
* column with no diagnostic. The strict variants below reject every
* non-numeric input and require full string consumption, mirroring
* the strict parsers we already use for Map keys (CR-306) and hex
* literals (CR-508). Range-checking against the destination column's
* width is still the caller's responsibility (appendIntColumn passes
* MinV/MaxV, narrow-int Map dispatch wraps with its own checks).
*
* IS_NULL handling: rejected by default (storing 0 silently corrupts
* non-Nullable columns). The Nullable insert path bumps
* `g_allow_null_in_strict` via AllowNullGuard so its recursive child
* build can accept NULL cells (the null mask makes the placeholder
* value irrelevant).
*/
static thread_local int g_allow_null_in_strict = 0;
struct AllowNullGuard {
AllowNullGuard() { ++g_allow_null_in_strict; }
~AllowNullGuard() { --g_allow_null_in_strict; }
AllowNullGuard(const AllowNullGuard&) = delete;
AllowNullGuard& operator=(const AllowNullGuard&) = delete;
};
static zend_long strict_zval_long(zval *z, const char *type_label)
{
switch (Z_TYPE_P(z)) {
case IS_LONG: return Z_LVAL_P(z);
case IS_TRUE: return 1;
case IS_FALSE: return 0;
case IS_NULL:
if (g_allow_null_in_strict > 0) return 0;
throw std::runtime_error(
std::string("null cannot be assigned to non-Nullable column ") + type_label);
case IS_DOUBLE: {
double d = Z_DVAL_P(z);
if (std::isnan(d) || std::isinf(d)) {
throw std::runtime_error(
std::string("non-finite double cannot be assigned to ") + type_label);
}
double frac, intpart;
frac = std::modf(d, &intpart);
if (frac != 0.0) {
throw std::runtime_error(
std::string("fractional double cannot be assigned to integer column ") + type_label);
}
if (d < (double)ZEND_LONG_MIN || d > (double)ZEND_LONG_MAX) {
throw std::runtime_error(
std::string("double out of range for integer column ") + type_label);
}
return (zend_long)d;
}
case IS_STRING: {
const char *s = Z_STRVAL_P(z);
size_t slen = Z_STRLEN_P(z);
if (slen == 0) {
throw std::runtime_error(
std::string("empty string cannot be assigned to ") + type_label);
}
char *endp = NULL;
errno = 0;
long long v = strtoll(s, &endp, 10);
if (errno == ERANGE || endp == s ||
(size_t)(endp - s) != slen) {
throw std::runtime_error(
std::string("invalid integer string for ") + type_label);
}
return (zend_long)v;
}
default:
throw std::runtime_error(
std::string("array / object / resource cannot be assigned to integer column ") + type_label);
}
}
/* UInt64 needs a strict parser of its own because strict_zval_long
* tops out at ZEND_LONG_MAX (2^63-1): values above that arrive as
* decimal strings (PHP can't fit them in a zend_long) and must be
* parsed via strtoull, not strtoll. Same shape as strict_zval_long
* — full-consumption check, NULL handled under AllowNullGuard,
* fractional / non-finite doubles rejected — but with the unsigned
* range and an additional `0x` hex form. */
static uint64_t strict_zval_u64(zval *z, const char *type_label)
{
switch (Z_TYPE_P(z)) {
case IS_LONG: {
zend_long n = Z_LVAL_P(z);
if (n < 0) {
throw std::runtime_error(
std::string("negative value cannot fit in ") + type_label);
}
return (uint64_t)n;
}
case IS_TRUE: return 1;
case IS_FALSE: return 0;
case IS_NULL:
if (g_allow_null_in_strict > 0) return 0;
throw std::runtime_error(
std::string("null cannot be assigned to non-Nullable column ") + type_label);
case IS_DOUBLE: {
double d = Z_DVAL_P(z);
if (std::isnan(d) || std::isinf(d)) {
throw std::runtime_error(
std::string("non-finite double cannot be assigned to ") + type_label);
}
double frac, intpart;
frac = std::modf(d, &intpart);
if (frac != 0.0) {
throw std::runtime_error(
std::string("fractional double cannot be assigned to integer column ") + type_label);
}
/* 18446744073709551616.0 is the next representable double
* above 2^64. Anything >= it overflows uint64_t. Negatives
* are rejected explicitly. */
if (d < 0.0 || d >= 18446744073709551616.0) {
throw std::runtime_error(
std::string("double out of range for integer column ") + type_label);
}
return (uint64_t)d;
}
case IS_STRING: {
const char *s = Z_STRVAL_P(z);
size_t slen = Z_STRLEN_P(z);
if (slen == 0) {
throw std::runtime_error(
std::string("empty string cannot be assigned to ") + type_label);
}
int base = 10;
const char *p = s;
size_t plen = slen;
if (slen >= 3 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
base = 16;
p = s + 2;
plen = slen - 2;
}
if (*p == '-' || *p == '+') {
throw std::runtime_error(
std::string("invalid integer string for ") + type_label);
}
char *endp = NULL;
errno = 0;
unsigned long long v = strtoull(p, &endp, base);
if (errno == ERANGE || endp == p ||
(size_t)(endp - p) != plen) {
throw std::runtime_error(
std::string("invalid integer string for ") + type_label);
}
return (uint64_t)v;
}
default:
throw std::runtime_error(
std::string("array / object / resource cannot be assigned to integer column ") + type_label);
}
}
static double strict_zval_double(zval *z, const char *type_label)
{
switch (Z_TYPE_P(z)) {
case IS_LONG: return (double)Z_LVAL_P(z);
case IS_TRUE: return 1.0;
case IS_FALSE: return 0.0;
case IS_NULL:
if (g_allow_null_in_strict > 0) return 0.0;
throw std::runtime_error(
std::string("null cannot be assigned to non-Nullable column ") + type_label);
case IS_DOUBLE: {
double d = Z_DVAL_P(z);
if (std::isnan(d) || std::isinf(d)) {
throw std::runtime_error(
std::string("non-finite double cannot be assigned to ") + type_label);
}
return d;
}
case IS_STRING: {
const char *s = Z_STRVAL_P(z);
size_t slen = Z_STRLEN_P(z);
if (slen == 0) {
throw std::runtime_error(
std::string("empty string cannot be assigned to ") + type_label);
}
char *endp = NULL;
errno = 0;
double v = strtod(s, &endp);
if (errno == ERANGE || endp == s ||
(size_t)(endp - s) != slen ||
std::isnan(v) || std::isinf(v)) {
throw std::runtime_error(
std::string("invalid float string for ") + type_label);
}
return v;
}
default:
throw std::runtime_error(
std::string("array / object / resource cannot be assigned to float column ") + type_label);
}
}
static std::string strict_zval_string(zval *z, const char *type_label)
{
if (Z_TYPE_P(z) == IS_NULL) {
if (g_allow_null_in_strict > 0) return std::string();
throw std::runtime_error(
std::string("null cannot be assigned to non-Nullable column ") + type_label);
}
ZStrGuard sg(z);
return std::string(sg.val(), sg.len());
}
static int uuid_hex_value(char c)
{
if (c >= '0' && c <= '9') return c - '0';
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
if (c >= 'A' && c <= 'F') return c - 'A' + 10;
return -1;
}
static UUID parseUUIDString(const char *s, size_t len, const char *error_msg)
{
uint64_t high = 0;
uint64_t low = 0;
size_t digits = 0;
for (size_t i = 0; i < len; ++i) {
if (s[i] == '-') {
continue;
}
int nibble = uuid_hex_value(s[i]);
if (nibble < 0 || digits >= 32) {
throw std::runtime_error(error_msg);
}
if (digits < 16) {
high = (high << 4) | (uint64_t)nibble;
} else {
low = (low << 4) | (uint64_t)nibble;
}
++digits;
}
if (digits != 32) {
throw std::runtime_error(error_msg);
}
return UUID{high, low};
}
/*
* RAII guard that owns a zend_string* obtained from zval_get_string and
* releases it in the destructor. Used at PHP-to-C boundaries where the
* surrounding code can throw (validation, recursive insertColumn, etc.)
* without forcing every site to write try { ... } catch { release; throw; }.
*/
/*
* Extract the width from a "FixedString(N)" type name. The previous
* inline form did `typeName.erase(typeName.find("FixedString("), 12)` —
* if find() returned npos, erase(npos, 12) is undefined. This helper
* validates the prefix and parses the digit run.
*/
static int parseFixedStringWidth(TypeRef type)
{
const std::string &name = type->GetName();
static const char prefix[] = "FixedString(";
static const size_t prefix_len = sizeof(prefix) - 1;
if (name.size() < prefix_len + 2 ||
name.compare(0, prefix_len, prefix) != 0 ||
name.back() != ')') {
throw std::runtime_error("Invalid FixedString type name: " + name);
}
const char *p = name.c_str() + prefix_len;
char *endp = nullptr;
errno = 0;
long w = strtol(p, &endp, 10);
if (errno == ERANGE || endp == p || w <= 0 || w > INT_MAX || endp != name.c_str() + name.size() - 1) {
throw std::runtime_error("Invalid FixedString width: " + name);
}
return (int)w;
}
/*
* Parse "YYYY-MM-DD" or "YYYY-MM-DD HH:MM:SS" into a Unix epoch.
*
* Use timegm, not mktime: the read paths in convertToZval all format
* via gmtime, so the round-trip needs to be UTC symmetric. Using
* mktime would reinterpret the parsed components as local time and
* shift the stored value by the runner's TZ offset.
*/
static std::time_t to_time_t(const std::string& str, bool is_date = true)
{
std::tm t = {};
std::istringstream ss(str);
ss >> std::get_time(&t, is_date ? "%Y-%m-%d" : "%Y-%m-%d %H:%M:%S");
if (ss.fail()) {
/* Reject malformed date/datetime strings instead of silently
* coercing to (time_t)-1, which then read back as 1969-12-31. */
throw std::runtime_error(
std::string("Invalid ") + (is_date ? "Date" : "DateTime") +
" string: " + str);
}
/* std::get_time stops at the first non-matching character without
* raising failbit, so "2024-01-01abc" parses cleanly as 2024-01-01.
* Require EOF after the expected format so trailing garbage is
* rejected at the boundary. peek() returns EOF when the stream is
* fully consumed; anything else is an extra character we didn't
* sign up for. */
if (ss.peek() != std::char_traits<char>::eof()) {
throw std::runtime_error(
std::string("Invalid ") + (is_date ? "Date" : "DateTime") +
" string (trailing characters): " + str);
}
/* Capture the parsed components before timegm so we can detect the
* normalization that February 30 → March 2 silently performs. */
int p_year = t.tm_year, p_mon = t.tm_mon, p_mday = t.tm_mday,
p_hour = t.tm_hour, p_min = t.tm_min, p_sec = t.tm_sec;
#ifdef _WIN32
/* MSVC has no timegm(); _mkgmtime() is the documented equivalent. */
std::time_t out = _mkgmtime(&t);
#else
std::time_t out = timegm(&t);
#endif
if (out == (std::time_t)-1) {
throw std::runtime_error(
std::string("Invalid ") + (is_date ? "Date" : "DateTime") +
" string (out of range): " + str);
}
/* Round-trip-validate: convert back to UTC components and compare.
* This catches "2024-02-30" → 2024-03-01 normalization that timegm
* does silently. */
std::tm rt = {};
#ifdef _WIN32
if (gmtime_s(&rt, &out) != 0) {
#else
if (!gmtime_r(&out, &rt)) {
#endif
throw std::runtime_error(
std::string("Invalid ") + (is_date ? "Date" : "DateTime") +
" string (gmtime failed): " + str);
}
if (rt.tm_year != p_year || rt.tm_mon != p_mon || rt.tm_mday != p_mday ||
rt.tm_hour != p_hour || rt.tm_min != p_min || rt.tm_sec != p_sec) {
throw std::runtime_error(
std::string("Invalid ") + (is_date ? "Date" : "DateTime") +
" string (normalized to a different value): " + str);
}
return out;
}
/*
* Parse "YYYY-MM-DD HH:MM:SS[.ffffff...]" into (whole-seconds, fractional)
* pair. The fractional component is multiplied by 10^precision so the
* caller can encode straight into ColumnDateTime64. The previous insert
* path used to_time_t alone, which dropped any sub-second part of the
* string entirely.
*/
static std::pair<std::time_t, int64_t> to_time_t_with_frac(const std::string &str, size_t precision, int64_t scale)
{
auto dot = str.find('.');
std::time_t whole = to_time_t(dot == std::string::npos ? str : str.substr(0, dot), false);
int64_t frac = 0;
if (dot != std::string::npos) {
if (precision == 0) {
/* DateTime64(0) has no fractional component; any text after
* the dot is invalid. The prior pass silently dropped the
* suffix at precision 0, which let "00:00:00.garbage" land
* as a clean DateTime64(0). */
throw std::runtime_error(
"Invalid DateTime64(0) string (fractional suffix on a "
"zero-precision column): " + str);
}
const char *p = str.c_str() + dot + 1;
const char *end = str.c_str() + str.size();
if (p >= end) {
/* Bare "12:34:56." with no digits after the dot. Previously
* accepted (consumed=0 took the no-op path). Reject. */
throw std::runtime_error(
"Invalid DateTime64 string (bare dot without fractional digits): " + str);
}
size_t consumed = 0;
while (p < end && consumed < precision && *p >= '0' && *p <= '9') {
frac = frac * 10 + (*p - '0');
++p;
++consumed;
}
if (consumed == 0) {
/* The first character after the dot wasn't a digit. */
throw std::runtime_error(
"Invalid DateTime64 string (non-digit after dot): " + str);
}
// Pad missing digits up to precision so "12:34:56.5" with precision 3
// contributes 500 (ms), not 5.
for (size_t pad = consumed; pad < precision; ++pad) frac *= 10;
/* Reject trailing non-digit characters after the fractional part.
* Without this "2024-01-01 00:00:00.123abc" silently truncated to
* .123 and dropped the abc. */
if (p < end) {
throw std::runtime_error(
"Invalid DateTime64 string (trailing characters after fraction): " + str);
}
} else {
frac = 0;
}
(void)scale;
return {whole, frac};
}
/* Cap recursion through nested column types (Tuple/Array/Map/Nullable/
* LowCardinality) so a server-supplied schema like Tuple(Tuple(Tuple(...)))
* cannot stack-overflow the worker. Used by both the read path
* (convertToZval) and the write path (createColumn / insertColumn);
* BeginInsert returns a server-built block schema, so an adversarial or
* MITM'd server can craft a deeply-nested type just like on the read side.
*
* thread_local because clickhouse-cpp may dispatch from worker threads. */
static thread_local int convert_depth = 0;
static const int MAX_CONVERT_DEPTH = 32;
struct ConvertDepthGuard {
ConvertDepthGuard() {
if (++convert_depth > MAX_CONVERT_DEPTH) {
--convert_depth;
throw std::runtime_error("ClickHouse column nested-type depth exceeds limit");
}
}
~ConvertDepthGuard() { --convert_depth; }
};
ColumnRef createColumn(TypeRef type)
{
ConvertDepthGuard _depth_guard;
switch (type->GetCode())
{
case Type::Code::UInt64:
{
return std::make_shared<ColumnUInt64>();
}
case Type::Code::UInt8:
{
return std::make_shared<ColumnUInt8>();
}
case Type::Code::UInt16:
{
return std::make_shared<ColumnUInt16>();
}
case Type::Code::UInt32:
{
return std::make_shared<ColumnUInt32>();
}
case Type::Code::Int8:
{
return std::make_shared<ColumnInt8>();
}
case Type::Code::Int16:
{
return std::make_shared<ColumnInt16>();
}
case Type::Code::Int32:
{
return std::make_shared<ColumnInt32>();
}
case Type::Code::Int64:
{
return std::make_shared<ColumnInt64>();
}
case Type::Code::UUID:
{
return std::make_shared<ColumnUUID>();
}
case Type::Code::Float32:
{
return std::make_shared<ColumnFloat32>();
}
case Type::Code::Float64:
{
return std::make_shared<ColumnFloat64>();
}
case Type::Code::String:
{
return std::make_shared<ColumnString>();
}
case Type::Code::FixedString:
{
return std::make_shared<ColumnFixedString>(parseFixedStringWidth(type));
}
case Type::Code::DateTime:
{
return std::make_shared<ColumnDateTime>();
}
case Type::Code::DateTime64:
{
return std::make_shared<ColumnDateTime64>(type->As<DateTime64Type>()->GetPrecision());
}
case Type::Code::Date:
{
return std::make_shared<ColumnDate>();
}
case Type::Code::Date32:
{
return std::make_shared<ColumnDate32>();
}
case Type::Code::Time:
{
return std::make_shared<ColumnTime>();
}
case Type::Code::Time64:
{
return std::make_shared<ColumnTime64>(type->As<Time64Type>()->GetPrecision());
}
case Type::Code::Int128:
{
return std::make_shared<ColumnInt128>();
}
case Type::Code::UInt128:
{
return std::make_shared<ColumnUInt128>();
}
case Type::Code::Decimal:
case Type::Code::Decimal32:
case Type::Code::Decimal64:
case Type::Code::Decimal128:
{
auto dt = type->As<DecimalType>();
return std::make_shared<ColumnDecimal>(dt->GetPrecision(), dt->GetScale());
}
case Type::Code::Array:
{
return std::make_shared<ColumnArray>(createColumn(type->As<ArrayType>()->GetItemType()));
}
case Type::Code::Enum8:
{
return std::make_shared<ColumnEnum8>(type);
}
case Type::Code::Enum16:
{
return std::make_shared<ColumnEnum16>(type);
}
case Type::Code::Nullable:
{
return std::make_shared<ColumnNullable>(createColumn(type->As<NullableType>()->GetNestedType()), std::make_shared<ColumnUInt8>());
}
case Type::Code::LowCardinality:
{
TypeRef nested = type->As<LowCardinalityType>()->GetNestedType();
bool is_nullable = (nested->GetCode() == Type::Code::Nullable);
TypeRef inner = is_nullable
? nested->As<NullableType>()->GetNestedType()
: nested;
if (inner->GetCode() == Type::Code::String) {
if (is_nullable) {
return std::make_shared<ColumnLowCardinalityT<ColumnNullableT<ColumnString>>>();
}
return std::make_shared<ColumnLowCardinalityT<ColumnString>>();
}
if (inner->GetCode() == Type::Code::FixedString) {
int width = parseFixedStringWidth(inner);
if (is_nullable) {
return std::make_shared<ColumnLowCardinalityT<ColumnNullableT<ColumnFixedString>>>(width);
}
return std::make_shared<ColumnLowCardinalityT<ColumnFixedString>>(width);
}
throw std::runtime_error("LowCardinality only supported over String / FixedString (Nullable allowed)");
}
case Type::Code::Map:
{
TypeRef k = type->As<MapType>()->GetKeyType();
TypeRef v = type->As<MapType>()->GetValueType();
Type::Code kc = k->GetCode();
Type::Code vc = v->GetCode();
if (kc == Type::Code::String && vc == Type::Code::String) {
return std::make_shared<ColumnMapT<ColumnString, ColumnString>>(
std::make_shared<ColumnString>(), std::make_shared<ColumnString>());
}
if (kc == Type::Code::String && vc == Type::Code::Int64) {
return std::make_shared<ColumnMapT<ColumnString, ColumnInt64>>(
std::make_shared<ColumnString>(), std::make_shared<ColumnInt64>());
}
if (kc == Type::Code::String && vc == Type::Code::UInt64) {
return std::make_shared<ColumnMapT<ColumnString, ColumnUInt64>>(
std::make_shared<ColumnString>(), std::make_shared<ColumnUInt64>());
}
if (kc == Type::Code::String && vc == Type::Code::Float64) {
return std::make_shared<ColumnMapT<ColumnString, ColumnFloat64>>(
std::make_shared<ColumnString>(), std::make_shared<ColumnFloat64>());
}
if (kc == Type::Code::Int64 && vc == Type::Code::String) {
return std::make_shared<ColumnMapT<ColumnInt64, ColumnString>>(
std::make_shared<ColumnInt64>(), std::make_shared<ColumnString>());
}
return CreateColumnByType(type->GetName());
}
case Type::Code::Tuple:
{
throw std::runtime_error("can't support Tuple");
}
case Type::Code::Void:
{
throw std::runtime_error("can't support Void");
}
default:
return CreateColumnByType(type->GetName());
}
}
// Build a column of plain integer cells from a PHP rows array. Used by
// every signed and unsigned integer type that doesn't accept hex
// strings (UInt8/16, Int8..Int64).
//
// MinV/MaxV bound the destination column's representable range so that an
// out-of-range PHP value throws instead of silently wrapping in the
// narrowing assignment to ClickHouse's int8/int16/int32. Values are
// pulled non-mutatingly via zval_get_long so the caller's row arrays
// don't get their types coerced in place.
template <typename TCol>
static ColumnRef appendIntColumn(HashTable *values_ht,
zend_long MinV, zend_long MaxV,
const char *type_label)
{
auto value = std::make_shared<TCol>();
zval *array_value;
ZEND_HASH_FOREACH_VAL(values_ht, array_value) {
zend_long n = strict_zval_long(array_value, type_label);
if (n < MinV || n > MaxV) {
throw std::runtime_error(
std::string("value out of range for ") + type_label);
}
value->Append((typename TCol::ValueType)n);
} ZEND_HASH_FOREACH_END();
return value;
}
// Build an unsigned integer column with a hex-string fast path. UInt32
// and UInt64 both accept "0x..." strings as a way to land values in the
// upper half of the range that a PHP signed long can't represent.
// `MaxV` bounds the destination column width: strtoul on 64-bit Linux
// returns 64-bit values regardless of the target column, so without a
// width check "0x100000000" silently truncated to UInt32 0.
template <typename TCol, typename TStrtoul>
static ColumnRef appendUIntColumnWithHex(HashTable *values_ht,
TStrtoul strtoul_fn,
uint64_t MaxV,
const char *type_label)
{
auto value = std::make_shared<TCol>();
zval *array_value;
ZEND_HASH_FOREACH_VAL(values_ht, array_value) {
if (Z_TYPE_P(array_value) == IS_STRING && Z_STRLEN_P(array_value) >= 3 &&
*Z_STRVAL_P(array_value) == '0' &&
(*(Z_STRVAL_P(array_value) + 1) == 'x' || *(Z_STRVAL_P(array_value) + 1) == 'X')) {
const char *s = Z_STRVAL_P(array_value);
size_t slen = Z_STRLEN_P(array_value);
char *endp = NULL;
errno = 0;
auto n = strtoul_fn(s, &endp, 0);
/* PHP zend_string is length-prefixed and may carry embedded
* NUL bytes. Comparing endp against ZSTR_LEN is the right
* "fully consumed" check; checking *endp == '\0' would let
* "0xABCD\0garbage" silently parse as 0xABCD because endp
* lands on the NUL. Same fix CR-306 applied to Map keys. */
if (errno == ERANGE || endp == s ||
(size_t)(endp - s) != slen) {
throw std::runtime_error(
std::string("invalid hex literal for ") + type_label);
}
if ((uint64_t)n > MaxV) {
throw std::runtime_error(
std::string("hex literal out of range for ") + type_label);
}
value->Append((typename TCol::ValueType)n);
} else {
zend_long n = strict_zval_long(array_value, type_label);
if (n < 0) {
throw std::runtime_error(
std::string("negative value cannot fit in ") + type_label);
}
if ((uint64_t)n > MaxV) {
throw std::runtime_error(
std::string("value out of range for ") + type_label);
}
value->Append((typename TCol::ValueType)n);
}
} ZEND_HASH_FOREACH_END();
return value;
}
// Build an Enum8 / Enum16 column from a PHP rows array. Integer cells
// validate against the type's declared value set; the prior unchecked
// Append silently stored values like 0 / 3 / 127 inside an
// `Enum8('One'=1,'Two'=2)` column, after which normal reads threw
// `map::at` because the read path looks up the name for the stored
// integer. String cells go through ColumnEnum*::Append(name) which
// validates internally.
//
// IS_NULL handling: rejected for non-Nullable enums (would otherwise
// store raw 0, which is usually not a declared enum value and poisons
// reads). The Nullable insert path bumps AllowNullGuard so its
// recursive child build accepts NULL → declared-value placeholder; the
// null mask captures the actual NULL.
template <typename TCol, typename TInt>
static ColumnRef appendEnumColumn(TypeRef type, HashTable *values_ht)
{
auto value = std::make_shared<TCol>(type);
auto enum_type = type->As<clickhouse::EnumType>();
/* Choose a placeholder int that's actually declared in the enum so
* NULL cells under AllowNullGuard land safely. EnumType exposes
* begin()/end() iterators over (name, value) pairs; just take the
* first one. enum_type can be null on an unexpected schema; in
* that case we conservatively use 0 and let HasEnumValue reject. */
TInt placeholder = 0;
if (enum_type) {
auto it = enum_type->BeginValueToName();
if (it != enum_type->EndValueToName()) {
placeholder = (TInt)it->first;
}
}
zval *array_value;
ZEND_HASH_FOREACH_VAL(values_ht, array_value) {
if (Z_TYPE_P(array_value) == IS_NULL) {
if (g_allow_null_in_strict <= 0) {
throw std::runtime_error(
"null cannot be assigned to non-Nullable Enum column");
}
value->Append(placeholder);
} else if (Z_TYPE_P(array_value) == IS_LONG) {
zend_long n = Z_LVAL_P(array_value);
int16_t narrow = (int16_t)n;
if ((zend_long)narrow != n || !enum_type || !enum_type->HasEnumValue(narrow)) {
throw std::runtime_error(
"Enum integer value " + std::to_string(n) +
" is not declared in " + type->GetName());
}
value->Append((TInt)narrow);
} else {
/* String path: ColumnEnum*::Append(name) validates internally
* and throws on unknown names. */
ZStrGuard sg(array_value);
value->Append(std::string(sg.val(), sg.len()));
}
} ZEND_HASH_FOREACH_END();
return value;
}
// Build a ColumnDate / ColumnDate32 / ColumnDateTime column from a PHP
// rows array. Each row is either an int (epoch seconds) or a "YYYY-MM-DD"
// (or "YYYY-MM-DD HH:MM:SS" for is_date=false) string. The string path
// goes through to_time_t which throws on parse failure.
template <typename TCol>
static ColumnRef appendDateColumn(HashTable *values_ht, bool is_date)
{
auto value = std::make_shared<TCol>();
zval *array_value;
const char *type_label = is_date ? "Date" : "DateTime";
ZEND_HASH_FOREACH_VAL(values_ht, array_value) {
/* Any string is treated as a formatted date/datetime. The prior
* dash-only gate routed dashless strings through zval_get_long,
* which silently coerced "abc" to 0 and landed it as the epoch.
* to_time_t now does full validation (EOF after format, gmtime
* round-trip). Numeric inputs go through strict_zval_long so
* non-numeric, fractional, NaN/Inf are rejected. */
if (Z_TYPE_P(array_value) == IS_STRING) {
value->Append((std::time_t)to_time_t(
std::string(Z_STRVAL_P(array_value), Z_STRLEN_P(array_value)),
is_date));
} else {
value->Append((std::time_t)strict_zval_long(array_value, type_label));
}
} ZEND_HASH_FOREACH_END();
return value;
}
// Build a LowCardinality(String) / LowCardinality(FixedString) column,
// optionally wrapped in Nullable. The four code paths used to be
// near-identical 12-line ZEND_HASH_FOREACH blocks; the template
// parameterizes on the column type and a compile-time `nullable` flag
// that decides whether IS_NULL maps to std::nullopt.
template <typename TCol, bool nullable>
static ColumnRef appendLowCardinalityColumn(HashTable *values_ht, std::shared_ptr<TCol> value, const char *type_label)
{
zval *array_value;
ZEND_HASH_FOREACH_VAL(values_ht, array_value) {
if constexpr (nullable) {
if (Z_TYPE_P(array_value) == IS_NULL) {
value->Append(std::nullopt);
continue;
}
}
std::string s = strict_zval_string(array_value, type_label);
value->Append(std::string_view(s.data(), s.size()));
} ZEND_HASH_FOREACH_END();
return value;
}
// Build a Float32/Float64 column from a PHP rows array.
template <typename TCol>
static ColumnRef appendFloatColumn(HashTable *values_ht, const char *type_label)
{
auto value = std::make_shared<TCol>();
zval *array_value;
ZEND_HASH_FOREACH_VAL(values_ht, array_value) {
value->Append((typename TCol::ValueType)strict_zval_double(array_value, type_label));
} ZEND_HASH_FOREACH_END();
return value;
}
// Build a ColumnMapT<KCol, VCol> from PHP rows. Each row is an assoc
// array; the caller supplies extractors that turn (zend_string*, ulong)
// into K and (zval*) into V.
template <typename K, typename V, typename KCol, typename VCol,
typename KFn, typename VFn>
static ColumnRef appendMapColumn(HashTable *values_ht, KFn extract_key, VFn extract_val)
{
auto col = std::make_shared<ColumnMapT<KCol, VCol>>(
std::make_shared<KCol>(), std::make_shared<VCol>());
/* Reuse the entries vector across rows so the per-row push_back
* path doesn't fresh-heap-allocate; clear() preserves capacity. */
std::vector<std::pair<K, V>> entries;
zval *array_value;
ZEND_HASH_FOREACH_VAL(values_ht, array_value) {
if (Z_TYPE_P(array_value) != IS_ARRAY) {
throw std::runtime_error("Map row must be a PHP array");
}
entries.clear();
HashTable *mh = Z_ARRVAL_P(array_value);
zend_string *zk;
zend_ulong nk;
zval *mv;
ZEND_HASH_FOREACH_KEY_VAL(mh, nk, zk, mv) {
entries.emplace_back(extract_key(zk, nk), extract_val(mv));
} ZEND_HASH_FOREACH_END();
col->Append(entries);
} ZEND_HASH_FOREACH_END();
return col;
}
// Parse a PHP zval into a clickhouse UUID. Mirrors the standalone-UUID
// insert path; used by Map(*, UUID) value extraction.
static UUID phpToUUID(zval *zv)
{
if (Z_TYPE_P(zv) == IS_NULL) {
if (g_allow_null_in_strict <= 0) {
throw std::runtime_error("null cannot be assigned to non-Nullable column UUID");
}
return UUID{0, 0};
}
ZStrGuard sg(zv);
return parseUUIDString(sg.val(), sg.len(), "UUID format error");
}
// Second-stage Map dispatch: key column type already resolved at the
// call site, dispatch on value type code. Kept as a function template so
// each (KCol, K) tuple instantiates its own value-side switch and the
// compiler can fold identical extractor lambdas across instantiations.
template <typename KCol, typename K, typename KFn>
static ColumnRef appendMapByValueType(HashTable *values_ht, TypeRef vtype, KFn key_fn)
{
auto strVal = [](zval *mv) -> std::string {
return strict_zval_string(mv, "Map value String");
};
/* Narrow-typed int extractors range-check before truncation. The
* non-Map insert path has had these via appendIntColumn since pass 1;
* the Map dispatch was using a single i64Val/u64Val for all widths
* which silently wrapped Map(K, Int8) value 1000 to int8_t -24. */
/* All Map value extractors go through strict_zval_long /
* strict_zval_double so non-numeric strings, fractional doubles, and