From 4e19e0d169b9e100a0e7e141410e47e086c8f3f2 Mon Sep 17 00:00:00 2001 From: Sergey Chernov Date: Wed, 17 Jun 2026 21:51:02 -0700 Subject: [PATCH 1/2] client v2 binary string initial impl --- .../ClickHouseBinaryFormatWriter.java | 4 + .../RowBinaryFormatSerializer.java | 8 + .../data_formats/RowBinaryFormatWriter.java | 10 + .../client/api/data_formats/StringValue.java | 251 ++++++++++++++++++ .../internal/AbstractBinaryFormatReader.java | 28 +- .../internal/BinaryStreamReader.java | 67 ++++- .../internal/MapBackedRecord.java | 13 + .../internal/SerializerUtils.java | 37 ++- .../api/internal/DataTypeConverter.java | 5 +- .../api/data_formats/StringValueTests.java | 224 ++++++++++++++++ .../internal/BaseReaderTests.java | 75 ++++++ .../datatypes/RowBinaryFormatWriterTest.java | 67 +++++ .../clickhouse/client/insert/InsertTests.java | 14 +- .../clickhouse/client/insert/SamplePOJO.java | 3 + 14 files changed, 793 insertions(+), 13 deletions(-) create mode 100644 client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java create mode 100644 client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java index 8494c16c3..a9a212f6a 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java @@ -84,6 +84,10 @@ public interface ClickHouseBinaryFormatWriter { void setString(int colIndex, String value); + void setString(String column, byte[] value); + + void setString(int colIndex, byte[] value); + void setDate(String column, LocalDate value); void setDate(int colIndex, LocalDate value); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java index ad8ee680a..303a5e7f4 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java @@ -126,10 +126,18 @@ public void writeString(String value) throws IOException { BinaryStreamUtils.writeString(out, value); } + public void writeString(byte[] value) throws IOException { + BinaryStreamUtils.writeString(out, value); + } + public void writeFixedString(String value, int len) throws IOException { BinaryStreamUtils.writeFixedString(out, value, len); } + public void writeFixedString(byte[] value, int len) throws IOException { + SerializerUtils.writeFixedStringBytes(out, value, len); + } + public void writeDate(ZonedDateTime value) throws IOException { SerializerUtils.writeDate(out, value, value.getZone()); } diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java index a487da1b9..2a2ecedd3 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java @@ -203,6 +203,16 @@ public void setString(int colIndex, String value) { setValue(colIndex, value); } + @Override + public void setString(String column, byte[] value) { + setValue(column, value); + } + + @Override + public void setString(int colIndex, byte[] value) { + setValue(colIndex, value); + } + @Override public void setDate(String column, LocalDate value) { setValue(column, value); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java new file mode 100644 index 000000000..8a78455b2 --- /dev/null +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java @@ -0,0 +1,251 @@ +package com.clickhouse.client.api.data_formats; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +/** + * Holder for a ClickHouse {@code String} (or {@code FixedString}) value that keeps the original bytes + * as they were received from the server instead of eagerly decoding them into a {@link String}. + *

+ * ClickHouse {@code String} columns are arbitrary byte sequences and are not guaranteed to be valid + * text in any particular encoding (for example a {@code String} may store a hash, a serialized blob or + * text in a non UTF-8 charset). Decoding such values as UTF-8 is lossy. This class preserves the raw + * bytes so that: + *

+ *

+ * The value is backed by a {@link ByteBuffer} which exposes a richer API to callers and allows the + * implementation to use direct (off-heap) memory in the future without changing this contract. + * Instances are immutable: the backing buffer is never mutated and callers receive read-only views or + * copies. The {@link String} produced by {@link #asString()} is cached so repeated access (for example + * inside a row loop) does not allocate a new object every time. + */ +public final class StringValue { + + /** Charset used by {@link #asString()} and {@link #toString()} when no charset is provided. */ + public static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; + + private final ByteBuffer buffer; + + private final Charset defaultCharset; + + private volatile String cached; + + /** + * Creates a value backed by the given bytes. The array is wrapped, not copied, so it must not be + * modified after being passed in. + * + * @param bytes raw value bytes (not null) + */ + public StringValue(byte[] bytes) { + this(ByteBuffer.wrap(bytes), DEFAULT_CHARSET); + } + + /** + * Creates a value backed by the given bytes using the provided default charset. The array is wrapped, + * not copied, so it must not be modified after being passed in. + * + * @param bytes raw value bytes (not null) + * @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null) + */ + public StringValue(byte[] bytes, Charset defaultCharset) { + this(ByteBuffer.wrap(bytes), defaultCharset); + } + + /** + * Creates a value backed by a region of the given array. The array is referenced, not copied. + * + * @param bytes raw value bytes (not null) + * @param offset start offset in the array + * @param length number of bytes + */ + public StringValue(byte[] bytes, int offset, int length) { + this(ByteBuffer.wrap(bytes, offset, length), DEFAULT_CHARSET); + } + + /** + * Creates a value backed by the remaining content of the given buffer. + * + * @param buffer backing buffer (not null); its remaining bytes define the value + */ + public StringValue(ByteBuffer buffer) { + this(buffer, DEFAULT_CHARSET); + } + + /** + * Creates a value backed by the remaining content of the given buffer using the provided default charset. + * + * @param buffer backing buffer (not null); its remaining bytes define the value + * @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null) + */ + public StringValue(ByteBuffer buffer, Charset defaultCharset) { + if (buffer == null) { + throw new NullPointerException("buffer is null"); + } + if (defaultCharset == null) { + throw new NullPointerException("defaultCharset is null"); + } + // Keep an independent view so external position/limit changes do not affect this value. + this.buffer = buffer.slice(); + this.defaultCharset = defaultCharset; + } + + /** + * Creates a value from a Java string encoded with UTF-8. + * + * @param value source string (not null) + * @return new value + */ + public static StringValue of(String value) { + return of(value, DEFAULT_CHARSET); + } + + /** + * Creates a value from a Java string encoded with the given charset. + * + * @param value source string (not null) + * @param charset charset used to encode the string (not null) + * @return new value + */ + public static StringValue of(String value, Charset charset) { + StringValue sv = new StringValue(value.getBytes(charset), charset); + if (charset.equals(DEFAULT_CHARSET)) { + sv.cached = value; + } + return sv; + } + + /** + * Creates a value from the given bytes. The array is wrapped, not copied. + * + * @param bytes raw value bytes (not null) + * @return new value + */ + public static StringValue of(byte[] bytes) { + return new StringValue(bytes); + } + + /** + * Returns a read-only view over the raw bytes of this value. The returned buffer is independent + * (its own position/limit) and shares no mutable state with this value. + * + * @return read-only buffer positioned at the first byte of the value + */ + public ByteBuffer asByteBuffer() { + return buffer.asReadOnlyBuffer(); + } + + /** + * Returns a fresh copy of the raw bytes of this value. + * + * @return new byte array with the value bytes + */ + public byte[] toByteArray() { + ByteBuffer view = buffer.duplicate(); + if (view.hasArray()) { + int start = view.arrayOffset() + view.position(); + return Arrays.copyOfRange(view.array(), start, start + view.remaining()); + } + byte[] out = new byte[view.remaining()]; + view.get(out); + return out; + } + + /** + * @return number of bytes in this value + */ + public int size() { + return buffer.remaining(); + } + + /** + * @return {@code true} if the value has no bytes + */ + public boolean isEmpty() { + return buffer.remaining() == 0; + } + + /** + * Decodes the value using the default charset (UTF-8 unless another was provided at construction). + * The result is cached so repeated calls do not allocate a new string. + * + * @return decoded string + */ + public String asString() { + String s = cached; + if (s == null) { + s = decode(defaultCharset); + cached = s; + } + return s; + } + + /** + * Decodes the value using the given charset. The result is cached only when the charset matches the + * default charset of this value. + * + * @param charset charset to decode with (not null) + * @return decoded string + */ + public String asString(Charset charset) { + if (charset == null) { + throw new NullPointerException("charset is null"); + } + if (charset.equals(defaultCharset)) { + return asString(); + } + return decode(charset); + } + + /** + * Returns a stream over the raw bytes of this value. Useful for JDBC binary/ascii stream access. + * + * @return input stream over the value bytes + */ + public InputStream asInputStream() { + ByteBuffer view = buffer.duplicate(); + if (view.hasArray()) { + int start = view.arrayOffset() + view.position(); + return new ByteArrayInputStream(view.array(), start, view.remaining()); + } + return new ByteArrayInputStream(toByteArray()); + } + + private String decode(Charset charset) { + ByteBuffer view = buffer.duplicate(); + if (view.hasArray()) { + return new String(view.array(), view.arrayOffset() + view.position(), view.remaining(), charset); + } + byte[] tmp = new byte[view.remaining()]; + view.get(tmp); + return new String(tmp, charset); + } + + @Override + public String toString() { + return asString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof StringValue)) { + return false; + } + return buffer.equals(((StringValue) o).buffer); + } + + @Override + public int hashCode() { + return buffer.hashCode(); + } +} diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java index e5892748d..54b907749 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java @@ -4,6 +4,7 @@ import com.clickhouse.client.api.ClientException; import com.clickhouse.client.api.DataTypeUtils; import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.internal.DataTypeConverter; import com.clickhouse.client.api.internal.MapUtils; import com.clickhouse.client.api.internal.ServerSettings; @@ -532,8 +533,9 @@ private T getPrimitiveArray(int index, Class componentType) { } return (T)array; } else if (componentType == byte.class) { - if (value instanceof String) { - return (T) ((String) value).getBytes(StandardCharsets.UTF_8); + byte[] bytes = stringLikeToBytes(value); + if (bytes != null) { + return (T) bytes; } else if (value instanceof InetAddress) { return (T) ((InetAddress) value).getAddress(); } @@ -676,6 +678,24 @@ public Instant getInstant(int index) { throw new ClientException("Column of type " + column.getDataType() + " cannot be converted to Instant"); } + /** + * Converts a string-like value into its raw bytes. For a {@link StringValue} the original bytes are + * returned without re-encoding (so binary content is preserved). For a {@link String} the bytes are + * produced using UTF-8, matching the historical behaviour. Returns {@code null} when the value is not + * a string-like type so callers can fall back to other handling. + * + * @param value value to convert + * @return raw bytes or {@code null} if the value is not string-like + */ + public static byte[] stringLikeToBytes(Object value) { + if (value instanceof StringValue) { + return ((StringValue) value).toByteArray(); + } else if (value instanceof String) { + return ((String) value).getBytes(StandardCharsets.UTF_8); + } + return null; + } + static Instant objectToInstant(Object value) { if (value instanceof LocalDateTime) { LocalDateTime dateTime = (LocalDateTime) value; @@ -866,6 +886,10 @@ public String[] getStringArray(int index) { BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) value; if (array.itemType == String.class) { return (String[]) array.getArray(); + } else if (array.itemType == StringValue.class) { + StringValue[] stringValues = (StringValue[]) array.getArray(); + return Arrays.stream(stringValues) + .map(sv -> sv == null ? null : sv.asString()).toArray(String[]::new); } else if (array.itemType == BinaryStreamReader.EnumValue.class) { BinaryStreamReader.EnumValue[] enumValues = (BinaryStreamReader.EnumValue[]) array.getArray(); return Arrays.stream(enumValues).map(BinaryStreamReader.EnumValue::getName).toArray(String[]::new); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java index 8a6b76a5a..fda7a88e2 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java @@ -2,6 +2,7 @@ import com.clickhouse.client.api.ClientException; import com.clickhouse.client.api.DataTypeUtils; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.data.ClickHouseColumn; import com.clickhouse.data.ClickHouseDataType; import com.clickhouse.data.ClickHouseEnum; @@ -55,6 +56,8 @@ public class BinaryStreamReader { private final Class arrayDefaultTypeHint; + private final boolean stringAsBinaryDefault; + private static final int SB_INIT_SIZE = 100; private ClickHouseColumn lastDataColumn = null; @@ -69,7 +72,7 @@ public class BinaryStreamReader { * @param jsonAsString - use string to serialize/deserialize JSON columns * @param typeHintMapping - what type use as hint if hint is not set or may not be known. */ - BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator, boolean jsonAsString, Map> typeHintMapping) { + public BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator, boolean jsonAsString, Map> typeHintMapping) { this.log = log == null ? NOPLogger.NOP_LOGGER : log; this.timeZone = timeZone; this.input = input; @@ -78,6 +81,26 @@ public class BinaryStreamReader { this.arrayDefaultTypeHint = typeHintMapping == null || typeHintMapping.isEmpty()? NO_TYPE_HINT : typeHintMapping.get(ClickHouseDataType.Array); + this.stringAsBinaryDefault = typeHintMapping != null && + typeHintMapping.get(ClickHouseDataType.String) == StringValue.class; + } + + /** + * Decides whether a {@code String}/{@code FixedString} value should be read as a {@link StringValue} + * (preserving raw bytes) instead of a {@link String}. A per-call type hint takes precedence over the + * default type hint mapping configured for the reader. + * + * @param typeHint per-call type hint or {@code null} + * @return {@code true} when the value should be read as {@link StringValue} + */ + private boolean readStringAsBinary(Class typeHint) { + if (typeHint == StringValue.class) { + return true; + } + if (typeHint == String.class) { + return false; + } + return stringAsBinaryDefault; } /** @@ -121,12 +144,18 @@ public T readValue(ClickHouseColumn column, Class typeHint) throws IOExce switch (dataType) { // Primitives case FixedString: { + if (readStringAsBinary(typeHint)) { + return (T) new StringValue(readStringBytes(input, precision)); + } byte[] bytes = precision > STRING_BUFF.length ? new byte[precision] : STRING_BUFF; readNBytes(input, bytes, 0, precision); return (T) new String(bytes, 0, precision, StandardCharsets.UTF_8); } case String: { + if (readStringAsBinary(typeHint)) { + return (T) readStringValue(); + } return (T) readString(); } case Int8: @@ -1119,17 +1148,41 @@ public String readString() throws IOException { } /** - * Reads a decimal value from input stream. + * Reads a string from the internal input stream preserving the raw bytes as a {@link StringValue}. + * Unlike {@link #readString()} this does not decode bytes into a {@link String} and never reuses the + * shared buffer, so the value is safe to keep after the next read. + * + * @return string value holding the raw bytes + * @throws IOException when IO error occurs + */ + public StringValue readStringValue() throws IOException { + return new StringValue(readStringBytes(input, readVarInt(input))); + } + + /** + * Reads the raw bytes of a string from the input stream given its length. + * * @param input - source of bytes - * @return String + * @param len - number of bytes to read + * @return byte[] containing the raw string bytes * @throws IOException when IO error occurs */ - public static String readString(InputStream input) throws IOException { - int len = readVarInt(input); + public static byte[] readStringBytes(InputStream input, int len) throws IOException { if (len == 0) { - return ""; + return new byte[0]; } - return new String(readNBytes(input, len), StandardCharsets.UTF_8); + return readNBytes(input, len); + } + + /** + * Reads a string value from input stream. + * @param input - source of bytes + * @return String + * @throws IOException when IO error occurs + */ + public static String readString(InputStream input) throws IOException { + byte[] bytes = readStringBytes(input, readVarInt(input)); + return bytes.length == 0 ? "" : new String(bytes, StandardCharsets.UTF_8); } public static int readByteOrEOF(InputStream input) throws IOException { diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java index 75f7ea314..fe963417c 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java @@ -2,6 +2,7 @@ import com.clickhouse.client.api.ClientException; import com.clickhouse.client.api.DataTypeUtils; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.internal.DataTypeConverter; import com.clickhouse.client.api.metadata.NoSuchColumnException; import com.clickhouse.client.api.metadata.TableSchema; @@ -276,6 +277,14 @@ private T getPrimitiveArray(String colName) { @Override public byte[] getByteArray(String colName) { + Object value = readValue(colName); + if (value == null) { + return null; + } + byte[] bytes = AbstractBinaryFormatReader.stringLikeToBytes(value); + if (bytes != null) { + return bytes; + } return getPrimitiveArray(colName); } @@ -319,6 +328,10 @@ public String[] getStringArray(String colName) { BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) value; if (array.itemType == String.class) { return (String[]) array.getArray(); + } else if (array.itemType == StringValue.class) { + StringValue[] stringValues = (StringValue[]) array.getArray(); + return Arrays.stream(stringValues) + .map(sv -> sv == null ? null : sv.asString()).toArray(String[]::new); } else if (array.itemType == BinaryStreamReader.EnumValue.class) { BinaryStreamReader.EnumValue[] enumValues = (BinaryStreamReader.EnumValue[]) array.getArray(); return Arrays.stream(enumValues).map(BinaryStreamReader.EnumValue::getName).toArray(String[]::new); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java index cc7e91792..a4e8fb598 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java @@ -2,6 +2,7 @@ import com.clickhouse.client.api.Client; import com.clickhouse.client.api.ClientException; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.serde.POJOFieldDeserializer; import com.clickhouse.data.ClickHouseAggregateFunction; import com.clickhouse.data.ClickHouseColumn; @@ -552,10 +553,22 @@ private static void serializePrimitiveData(OutputStream stream, Object value, Cl BinaryStreamUtils.writeBoolean(stream, (Boolean) value); break; case String: - BinaryStreamUtils.writeString(stream, convertToString(value)); + if (value instanceof byte[]) { + BinaryStreamUtils.writeString(stream, (byte[]) value); + } else if (value instanceof StringValue) { + BinaryStreamUtils.writeString(stream, ((StringValue) value).toByteArray()); + } else { + BinaryStreamUtils.writeString(stream, convertToString(value)); + } break; case FixedString: - BinaryStreamUtils.writeFixedString(stream, convertToString(value), column.getPrecision()); + if (value instanceof byte[]) { + writeFixedStringBytes(stream, (byte[]) value, column.getPrecision()); + } else if (value instanceof StringValue) { + writeFixedStringBytes(stream, ((StringValue) value).toByteArray(), column.getPrecision()); + } else { + BinaryStreamUtils.writeFixedString(stream, convertToString(value), column.getPrecision()); + } break; case Date: writeDate(stream, value, ZoneId.of("UTC")); // TODO: check @@ -912,6 +925,26 @@ public static String convertToString(Object value) { return java.lang.String.valueOf(value); } + /** + * Writes raw bytes as a ClickHouse {@code FixedString(length)} value. The bytes are written as-is and + * right-padded with zero bytes when shorter than {@code length}. + * + * @param stream output stream + * @param value raw bytes + * @param length fixed string length + * @throws IOException when failed to write to the stream + */ + public static void writeFixedStringBytes(OutputStream stream, byte[] value, int length) throws IOException { + if (value.length > length) { + throw new IllegalArgumentException("Value of length " + value.length + + " is longer than FixedString(" + length + ")"); + } + stream.write(value); + for (int i = value.length; i < length; i++) { + stream.write(0); + } + } + public static > Set parseEnumList(String value, Class enumType) { Set values = new HashSet<>(); for (StringTokenizer causes = new StringTokenizer(value, Client.VALUES_LIST_DELIMITER); causes.hasMoreTokens(); ) { diff --git a/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java b/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java index b1f6a8520..0faa2a7a8 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java @@ -2,6 +2,7 @@ import com.clickhouse.client.api.ClickHouseException; import com.clickhouse.client.api.DataTypeUtils; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; import com.clickhouse.data.ClickHouseColumn; import com.clickhouse.data.ClickHouseDataType; @@ -85,7 +86,9 @@ public String stringToString(Object bytesOrString, ClickHouseColumn column) { if (column.isArray()) { sb.append(QUOTE); } - if (bytesOrString instanceof CharSequence) { + if (bytesOrString instanceof StringValue) { + sb.append(((StringValue) bytesOrString).asString()); + } else if (bytesOrString instanceof CharSequence) { sb.append(((CharSequence) bytesOrString)); } else if (bytesOrString instanceof byte[]) { sb.append(new String((byte[]) bytesOrString)); diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java new file mode 100644 index 000000000..1d5528594 --- /dev/null +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java @@ -0,0 +1,224 @@ +package com.clickhouse.client.api.data_formats; + +import com.clickhouse.client.api.data_formats.internal.AbstractBinaryFormatReader; +import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; +import com.clickhouse.client.api.data_formats.internal.SerializerUtils; +import com.clickhouse.data.ClickHouseColumn; +import com.clickhouse.data.ClickHouseDataType; +import com.clickhouse.data.format.BinaryStreamUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.Map; +import java.util.TimeZone; + +public class StringValueTests { + + private static final Map> STRING_AS_BINARY = + Collections.singletonMap(ClickHouseDataType.String, (Class) StringValue.class); + + private static BinaryStreamReader reader(byte[] input, Map> hints) { + return new BinaryStreamReader(new ByteArrayInputStream(input), TimeZone.getTimeZone("UTC"), null, + new BinaryStreamReader.DefaultByteBufferAllocator(), false, hints); + } + + // ---- StringValue API ---- + + @Test + public void testStringValueApiBasics() { + byte[] bytes = "hello world".getBytes(StandardCharsets.UTF_8); + StringValue sv = new StringValue(bytes); + + Assert.assertEquals(sv.size(), bytes.length); + Assert.assertFalse(sv.isEmpty()); + Assert.assertEquals(sv.asString(), "hello world"); + Assert.assertEquals(sv.toString(), "hello world"); + Assert.assertEquals(sv.toByteArray(), bytes); + } + + @Test + public void testToByteArrayReturnsIndependentCopy() { + byte[] bytes = {1, 2, 3, 4}; + StringValue sv = new StringValue(bytes); + byte[] copy = sv.toByteArray(); + copy[0] = 42; + Assert.assertEquals(sv.toByteArray()[0], 1, "Mutating the returned array must not affect the value"); + } + + @Test + public void testAsByteBufferIsReadOnly() { + StringValue sv = new StringValue(new byte[]{1, 2, 3}); + ByteBuffer buffer = sv.asByteBuffer(); + Assert.assertTrue(buffer.isReadOnly()); + Assert.assertEquals(buffer.remaining(), 3); + } + + @Test + public void testAsStringIsCached() { + StringValue sv = new StringValue("cached".getBytes(StandardCharsets.UTF_8)); + String first = sv.asString(); + String second = sv.asString(); + Assert.assertSame(first, second, "asString() should cache and return the same instance"); + } + + @Test + public void testAsStringWithCharset() { + String original = "Привет, мир"; + StringValue sv = new StringValue(original.getBytes(StandardCharsets.UTF_16)); + Assert.assertEquals(sv.asString(StandardCharsets.UTF_16), original); + } + + @Test + public void testAsInputStream() throws IOException { + byte[] bytes = {(byte) 0x00, (byte) 0xFF, (byte) 0x10, (byte) 0x7F}; + StringValue sv = new StringValue(bytes); + try (InputStream is = sv.asInputStream()) { + byte[] read = new byte[bytes.length]; + int n = is.read(read); + Assert.assertEquals(n, bytes.length); + Assert.assertEquals(read, bytes); + } + } + + @Test + public void testEqualsAndHashCode() { + StringValue a = StringValue.of("abc"); + StringValue b = new StringValue("abc".getBytes(StandardCharsets.UTF_8)); + StringValue c = StringValue.of("abd"); + Assert.assertEquals(a, b); + Assert.assertEquals(a.hashCode(), b.hashCode()); + Assert.assertNotEquals(a, c); + } + + @Test + public void testOfStringCachesValue() { + StringValue sv = StringValue.of("preset"); + Assert.assertSame(sv.asString(), sv.asString()); + Assert.assertEquals(sv.asString(), "preset"); + } + + @Test + public void testEmptyValue() { + StringValue sv = new StringValue(new byte[0]); + Assert.assertTrue(sv.isEmpty()); + Assert.assertEquals(sv.size(), 0); + Assert.assertEquals(sv.asString(), ""); + Assert.assertEquals(sv.toByteArray().length, 0); + } + + // ---- Reading String columns as StringValue ---- + + @DataProvider(name = "charsetStrings") + private Object[][] charsetStrings() { + return new Object[][]{ + {"plain ascii", StandardCharsets.UTF_8}, + {"unicode: Привет 你好 🚀", StandardCharsets.UTF_8}, + {"latin1 café", StandardCharsets.ISO_8859_1}, + {"utf16 текст", StandardCharsets.UTF_16}, + {" leading and trailing ", StandardCharsets.UTF_8}, + {"", StandardCharsets.UTF_8}, + }; + } + + @Test(dataProvider = "charsetStrings") + public void testReadStringAsStringValuePreservesBytes(String value, Charset charset) throws IOException { + byte[] encoded = value.getBytes(charset); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, encoded); // binary string write (raw bytes) + + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertTrue(read instanceof StringValue, "Expected StringValue but got " + read.getClass()); + StringValue sv = (StringValue) read; + Assert.assertEquals(sv.toByteArray(), encoded, "Raw bytes must be preserved"); + Assert.assertEquals(sv.asString(charset), value, "Decoding with the source charset must round-trip"); + } + + @Test + public void testReadBinaryNonUtf8IsPreserved() throws IOException { + // Bytes that are not valid UTF-8 (e.g. a binary hash). Decoding as UTF-8 would be lossy. + byte[] binary = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF, + (byte) 0xFF, (byte) 0x00, (byte) 0x80, (byte) 0xC0, (byte) 0xFE}; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, binary); + + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + StringValue sv = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertEquals(sv.toByteArray(), binary, "Binary content must be preserved exactly"); + Assert.assertEquals(AbstractBinaryFormatReader.stringLikeToBytes(sv), binary, + "Shared string->bytes conversion must preserve binary content"); + } + + @Test + public void testFixedStringAsStringValue() throws IOException { + byte[] binary = new byte[]{(byte) 0x01, (byte) 0xFF, (byte) 0x00, (byte) 0x10, (byte) 0x80}; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + baos.write(binary); // FixedString(5) is written as exactly 5 raw bytes + + ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(5)"); + Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertTrue(read instanceof StringValue); + Assert.assertEquals(((StringValue) read).toByteArray(), binary); + } + + @Test + public void testDefaultBehaviorReturnsString() throws IOException { + byte[] encoded = "still a string".getBytes(StandardCharsets.UTF_8); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, encoded); + + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + Object read = reader(baos.toByteArray(), AbstractBinaryFormatReader.NO_TYPE_HINT_MAPPING).readValue(column); + + Assert.assertTrue(read instanceof String, "Without a type hint Strings must still be returned as String"); + Assert.assertEquals(read, "still a string"); + } + + // ---- Writing binary String values ---- + + @Test + public void testWriteByteArrayToStringRoundTrip() throws IOException { + byte[] binary = new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0xAB, (byte) 0xCD, (byte) 0x7F}; + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + SerializerUtils.serializeData(baos, binary, column); + StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Assert.assertEquals(read.toByteArray(), binary); + } + + @Test + public void testWriteStringValueToStringRoundTrip() throws IOException { + byte[] binary = new byte[]{(byte) 0x10, (byte) 0x20, (byte) 0xFE, (byte) 0xFF, (byte) 0x00}; + StringValue value = new StringValue(binary); + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + SerializerUtils.serializeData(baos, value, column); + StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Assert.assertEquals(read.toByteArray(), binary); + } + + @Test + public void testWriteByteArrayToFixedStringRoundTrip() throws IOException { + byte[] binary = new byte[]{(byte) 0xAA, (byte) 0xBB, (byte) 0xCC}; + ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(3)"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + SerializerUtils.serializeData(baos, binary, column); + StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Assert.assertEquals(read.toByteArray(), binary); + } +} diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java index b3e9f0676..7cd08d957 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java @@ -7,6 +7,7 @@ import com.clickhouse.client.api.Client; import com.clickhouse.client.api.command.CommandSettings; import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.enums.Protocol; import com.clickhouse.client.api.query.GenericRecord; import com.clickhouse.client.api.query.QueryResponse; @@ -572,4 +573,78 @@ private Client.Builder newClient() { .setPassword(ClickHouseServerForTest.getPassword()); } + @Test(groups = {"integration"}) + public void testReadingStringValue() throws Exception { + final String table = "test_reading_stringvalue"; + + client.execute("DROP TABLE IF EXISTS " + table).get(); + client.execute("CREATE TABLE " + table + " (id Int32, s String, fs FixedString(5), e FixedString(1)) ENGINE = Memory").get(); + client.execute("INSERT INTO " + table + " VALUES (1, 'hello', 'world', 'a'), (2, 'ClickHouse', 'Rocks', 'b')").get(); + + java.util.Map> typeHints = new java.util.HashMap<>(); + typeHints.put(ClickHouseDataType.String, StringValue.class); + typeHints.put(ClickHouseDataType.FixedString, StringValue.class); + + Client customClient = newClient() + .typeHintMapping(typeHints) + .build(); + + try { + try (QueryResponse response = customClient.query("SELECT * FROM " + table + " ORDER BY id").get()) { + ClickHouseBinaryFormatReader reader = customClient.newBinaryFormatReader(response); + + // Test reading multiple strings in a row and check that their content differs + Assert.assertNotNull(reader.next()); + Assert.assertEquals(reader.getInteger("id"), 1); + StringValue s1 = (StringValue) reader.readValue("s"); + StringValue fs1 = (StringValue) reader.readValue("fs"); + StringValue e1 = (StringValue) reader.readValue("e"); + + Assert.assertEquals(s1.asString(), "hello"); + Assert.assertEquals(fs1.asString(), "world"); + Assert.assertEquals(e1.asString(), "a"); + + // Test getting read value multiple times + Assert.assertSame(s1, reader.readValue("s"), "Consecutive reads for the same row should return the same instance or equal value"); + Assert.assertEquals(reader.getString("s"), "hello"); + // Test reading byte[] from String columns + Assert.assertEquals(reader.getByteArray("s"), "hello".getBytes()); + Assert.assertEquals(reader.getByteArray("fs"), "world".getBytes()); + Assert.assertEquals(reader.getByteArray("e"), "a".getBytes()); + + Assert.assertNotNull(reader.next()); + Assert.assertEquals(reader.getInteger("id"), 2); + StringValue s2 = (StringValue) reader.readValue("s"); + StringValue fs2 = (StringValue) reader.readValue("fs"); + StringValue e2 = (StringValue) reader.readValue("e"); + + Assert.assertEquals(s2.asString(), "ClickHouse"); + Assert.assertEquals(fs2.asString(), "Rocks"); + Assert.assertEquals(e2.asString(), "b"); + + Assert.assertNotEquals(s1.asString(), s2.asString()); + Assert.assertNotEquals(fs1.asString(), fs2.asString()); + } + + // test queryAll with string value + List records = customClient.queryAll("SELECT * FROM " + table + " ORDER BY id"); + Assert.assertEquals(records.size(), 2); + + Assert.assertEquals(records.get(0).getInteger("id"), 1); + Assert.assertEquals(records.get(0).getString("s"), "hello"); + Assert.assertEquals(records.get(0).getString("fs"), "world"); + Assert.assertEquals(records.get(0).getByteArray("s"), "hello".getBytes()); + Assert.assertEquals(records.get(0).getByteArray("fs"), "world".getBytes()); + Assert.assertEquals(records.get(0).getByteArray("e"), "a".getBytes()); + + Assert.assertEquals(records.get(1).getInteger("id"), 2); + Assert.assertEquals(records.get(1).getString("s"), "ClickHouse"); + Assert.assertEquals(records.get(1).getString("fs"), "Rocks"); + Assert.assertEquals(records.get(1).getByteArray("s"), "ClickHouse".getBytes()); + Assert.assertEquals(records.get(1).getByteArray("fs"), "Rocks".getBytes()); + Assert.assertEquals(records.get(1).getByteArray("e"), "b".getBytes()); + } finally { + customClient.close(); + } + } } \ No newline at end of file diff --git a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java index 1a0ee2287..e36f8a480 100644 --- a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java +++ b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java @@ -158,6 +158,11 @@ private static void assertEqualsKinda(Object actual, Object expected) { expected = ((BigDecimal) expected).stripTrailingZeros(); } + if (actual instanceof byte[] && expected instanceof byte[]) { + org.testng.Assert.assertEquals((byte[]) actual, (byte[]) expected); + return; + } + assertEquals(String.valueOf(actual), String.valueOf(expected)); } @@ -376,6 +381,68 @@ public void writeStringsTest() throws Exception { writeTest(tableName, tableCreate, rows); } + @Test (groups = { "integration" }) + public void writeBinaryStringsTest() throws Exception { + String tableName = "rowBinaryFormatWriterTest_writeBinaryStringsTests_" + UUID.randomUUID().toString().replace('-', '_'); + String tableCreate = "CREATE TABLE \"" + tableName + "\" " + + " (id Int32, " + + " string String, " + + " fixed_string FixedString(5), " + + " fixed_string_one FixedString(1) " + + " ) Engine = MergeTree ORDER BY id"; + + byte[] binaryData = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF, (byte) 0x00, (byte) 0xFF, (byte) 0x80}; + byte[] fixedStringData = new byte[]{(byte) 0xAA, (byte) 0xBB, (byte) 0xCC, (byte) 0xDD, (byte) 0xEE}; + byte[] fixedStringOneData = new byte[]{(byte) 0x7F}; + + // Instead of writeTest which reads back using default string decoding, we write manually + // and query back using typeHintMapping to preserve raw bytes + initTable(tableName, tableCreate, new CommandSettings()); + TableSchema schema = client.getTableSchema(tableName); + + ClickHouseFormat format = ClickHouseFormat.RowBinaryWithDefaults; + try (InsertResponse response = client.insert(tableName, out -> { + RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format); + w.setValue(schema.nameToColumnIndex("id"), 1); + w.setValue(schema.nameToColumnIndex("string"), binaryData); + w.setValue(schema.nameToColumnIndex("fixed_string"), fixedStringData); + w.setValue(schema.nameToColumnIndex("fixed_string_one"), fixedStringOneData); + w.commitRow(); + }, format, settings).get()) { + System.out.println("Rows written (Field-like): " + response.getWrittenRows()); + } + + // Also test inserting with byte[] directly via RowBinaryFormatWriter + try (InsertResponse response = client.insert(tableName, out -> { + RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format); + w.setValue(schema.nameToColumnIndex("id"), 2); + w.setString("string", binaryData); + w.setString("fixed_string", fixedStringData); + w.setString("fixed_string_one", fixedStringOneData); + w.commitRow(); + }, format, settings).get()) { + System.out.println("Rows written (manual): " + response.getWrittenRows()); + } + + java.util.Map> typeHints = new java.util.HashMap<>(); + typeHints.put(com.clickhouse.data.ClickHouseDataType.String, com.clickhouse.client.api.data_formats.StringValue.class); + typeHints.put(com.clickhouse.data.ClickHouseDataType.FixedString, com.clickhouse.client.api.data_formats.StringValue.class); + + Client customClient = newClient() + .typeHintMapping(typeHints) + .build(); + + List records = customClient.queryAll("SELECT * FROM \"" + tableName + "\" ORDER BY id" ); + assertEquals(records.size(), 2); + + for (GenericRecord record : records) { + org.testng.Assert.assertEquals(record.getByteArray("string"), binaryData); + org.testng.Assert.assertEquals(record.getByteArray("fixed_string"), fixedStringData); + org.testng.Assert.assertEquals(record.getByteArray("fixed_string_one"), fixedStringOneData); + } + + customClient.close(); + } @Test (groups = { "integration" }) public void writeDatetimeTests() throws Exception { diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java b/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java index 6ffdfea5e..5045e290c 100644 --- a/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java @@ -199,7 +199,14 @@ public void insertPOJOAndReadBack() throws Exception { try (QueryResponse queryResponse = client.query("SELECT * FROM " + tableName + " LIMIT 1").get(EXECUTE_CMD_TIMEOUT, TimeUnit.SECONDS)) { - ClickHouseBinaryFormatReader reader = client.newBinaryFormatReader(queryResponse); + // To read the binaryString properly as raw bytes, we must map String to StringValue + Client readerClient = client; + if (pojo.getBinaryString() != null) { + readerClient = newClient() + .typeHintMapping(java.util.Collections.singletonMap(com.clickhouse.data.ClickHouseDataType.String, com.clickhouse.client.api.data_formats.StringValue.class)) + .build(); + } + ClickHouseBinaryFormatReader reader = readerClient.newBinaryFormatReader(queryResponse); Assert.assertNotNull(reader.next()); Assert.assertEquals(reader.getByte("byteValue"), pojo.getByteValue()); @@ -212,12 +219,17 @@ public void insertPOJOAndReadBack() throws Exception { Assert.assertEquals(reader.getDouble("float64"), pojo.getFloat64()); Assert.assertEquals(reader.getString("string"), pojo.getString()); Assert.assertEquals(reader.getString("fixedString"), pojo.getFixedString()); + Assert.assertEquals(reader.getByteArray("binaryString"), pojo.getBinaryString()); Assert.assertTrue(reader.getZonedDateTime("zonedDateTime").isEqual(pojo.getZonedDateTime().withNano(0))); Assert.assertTrue(reader.getZonedDateTime("zonedDateTime64").isEqual(pojo.getZonedDateTime64())); Assert.assertTrue(reader.getOffsetDateTime("offsetDateTime").isEqual(pojo.getOffsetDateTime().withNano(0))); Assert.assertTrue(reader.getOffsetDateTime("offsetDateTime64").isEqual(pojo.getOffsetDateTime64())); Assert.assertEquals(reader.getInstant("instant"), pojo.getInstant().with(ChronoField.MICRO_OF_SECOND, 0)); Assert.assertEquals(reader.getInstant("instant64"), pojo.getInstant64()); + + if (readerClient != client) { + readerClient.close(); + } } } diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java index 6661b94bc..4d0de854e 100644 --- a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java +++ b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java @@ -63,6 +63,7 @@ public class SamplePOJO { private String string; private String fixedString; + private byte[] binaryString; private LocalDate date; private LocalDate date32; @@ -145,6 +146,7 @@ public SamplePOJO() { string = RandomStringUtils.randomAlphabetic(1, 256); fixedString = RandomStringUtils.randomAlphabetic(3); + binaryString = new byte[] { (byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF }; date = LocalDate.now(); date32 = LocalDate.now(); @@ -308,6 +310,7 @@ public static String generateTableCreateSQL(String tableName) { // "boxedBool UInt8, " + "string String, " + "fixedString FixedString(3), " + + "binaryString String, " + "date Date, " + "date32 Date, " + "dateTime DateTime, " + From 3012f42a4ff4efd2aceeba9266c2a11be62835ea Mon Sep 17 00:00:00 2001 From: Sergey Chernov Date: Wed, 24 Jun 2026 00:34:07 -0700 Subject: [PATCH 2/2] Cleaned code and added tests --- .../client/api/data_formats/StringValue.java | 150 ++++------------- .../internal/BinaryStreamReader.java | 20 +-- .../internal/MapBackedRecord.java | 2 - .../api/data_formats/StringValueTests.java | 154 +++++++++++++++--- .../internal/BaseReaderTests.java | 43 +++++ .../datatypes/RowBinaryFormatWriterTest.java | 73 +++++++++ .../clickhouse/client/insert/SamplePOJO.java | 23 ++- .../src/test/resources/clickhouse-logo.png | Bin 0 -> 9874 bytes 8 files changed, 310 insertions(+), 155 deletions(-) create mode 100644 client-v2/src/test/resources/clickhouse-logo.png diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java index 8a78455b2..3ca94c923 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java @@ -1,33 +1,28 @@ package com.clickhouse.client.api.data_formats; -import java.io.ByteArrayInputStream; -import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; -import java.util.Arrays; +import java.util.Objects; /** - * Holder for a ClickHouse {@code String} (or {@code FixedString}) value that keeps the original bytes - * as they were received from the server instead of eagerly decoding them into a {@link String}. + * Holder for ClickHouse {@code String} or {@code FixedString} values that preserves raw bytes + * to avoid lossy decoding and unnecessary allocations. *

- * ClickHouse {@code String} columns are arbitrary byte sequences and are not guaranteed to be valid - * text in any particular encoding (for example a {@code String} may store a hash, a serialized blob or - * text in a non UTF-8 charset). Decoding such values as UTF-8 is lossy. This class preserves the raw - * bytes so that: - *

+ * This is a mutable structure and must be used with care. To avoid copying, it does not + * duplicate the bytes it is given: the constructor wraps the supplied array/buffer instead of + * copying it, and {@link #toByteArray()} returns a direct reference to the backing array rather + * than a defensive copy. Consequently, mutating the source array, the array returned by + * {@link #toByteArray()}, or reading the same value concurrently while it is being modified will + * change the observed value. Callers that need an independent snapshot must copy the bytes + * themselves. *

- * The value is backed by a {@link ByteBuffer} which exposes a richer API to callers and allows the - * implementation to use direct (off-heap) memory in the future without changing this contract. - * Instances are immutable: the backing buffer is never mutated and callers receive read-only views or - * copies. The {@link String} produced by {@link #asString()} is cached so repeated access (for example - * inside a row loop) does not allocate a new object every time. + * Backed by a {@link ByteBuffer} for a richer API and future off-heap memory support. Only heap + * buffers (with an accessible backing array) are supported today; constructing a value from a + * direct (off-heap) buffer is rejected. The decoded {@link String} produced by {@link #asString()} + * is cached. */ -public final class StringValue { +public class StringValue { /** Charset used by {@link #asString()} and {@link #toString()} when no charset is provided. */ public static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; @@ -45,7 +40,7 @@ public final class StringValue { * @param bytes raw value bytes (not null) */ public StringValue(byte[] bytes) { - this(ByteBuffer.wrap(bytes), DEFAULT_CHARSET); + this(bytes, DEFAULT_CHARSET); } /** @@ -59,79 +54,27 @@ public StringValue(byte[] bytes, Charset defaultCharset) { this(ByteBuffer.wrap(bytes), defaultCharset); } - /** - * Creates a value backed by a region of the given array. The array is referenced, not copied. - * - * @param bytes raw value bytes (not null) - * @param offset start offset in the array - * @param length number of bytes - */ - public StringValue(byte[] bytes, int offset, int length) { - this(ByteBuffer.wrap(bytes, offset, length), DEFAULT_CHARSET); - } - - /** - * Creates a value backed by the remaining content of the given buffer. - * - * @param buffer backing buffer (not null); its remaining bytes define the value - */ - public StringValue(ByteBuffer buffer) { - this(buffer, DEFAULT_CHARSET); - } - /** * Creates a value backed by the remaining content of the given buffer using the provided default charset. + * The buffer is referenced, not copied, so its content must not be modified afterwards. * - * @param buffer backing buffer (not null); its remaining bytes define the value + * @param buffer backing heap buffer (not null); its remaining bytes define the value * @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null) + * @throws IllegalArgumentException if the buffer is a direct (off-heap) buffer with no accessible array */ public StringValue(ByteBuffer buffer, Charset defaultCharset) { - if (buffer == null) { - throw new NullPointerException("buffer is null"); - } - if (defaultCharset == null) { - throw new NullPointerException("defaultCharset is null"); + Objects.requireNonNull(buffer, "buffer cannot be null"); + Objects.requireNonNull(defaultCharset, "charset is required to convert buffer to String"); + + if (!buffer.hasArray()) { + throw new IllegalArgumentException("Can work only with heap buffer."); } + // Keep an independent view so external position/limit changes do not affect this value. this.buffer = buffer.slice(); this.defaultCharset = defaultCharset; } - /** - * Creates a value from a Java string encoded with UTF-8. - * - * @param value source string (not null) - * @return new value - */ - public static StringValue of(String value) { - return of(value, DEFAULT_CHARSET); - } - - /** - * Creates a value from a Java string encoded with the given charset. - * - * @param value source string (not null) - * @param charset charset used to encode the string (not null) - * @return new value - */ - public static StringValue of(String value, Charset charset) { - StringValue sv = new StringValue(value.getBytes(charset), charset); - if (charset.equals(DEFAULT_CHARSET)) { - sv.cached = value; - } - return sv; - } - - /** - * Creates a value from the given bytes. The array is wrapped, not copied. - * - * @param bytes raw value bytes (not null) - * @return new value - */ - public static StringValue of(byte[] bytes) { - return new StringValue(bytes); - } - /** * Returns a read-only view over the raw bytes of this value. The returned buffer is independent * (its own position/limit) and shares no mutable state with this value. @@ -143,19 +86,16 @@ public ByteBuffer asByteBuffer() { } /** - * Returns a fresh copy of the raw bytes of this value. + * Returns a direct reference to the backing byte array of this value (no copy is made). + *

+ * The returned array is the live backing storage: mutating it mutates this value, and any change + * to the underlying bytes is reflected here. Callers that need an independent, immutable snapshot + * must copy the result themselves. * - * @return new byte array with the value bytes + * @return the backing array holding the value bytes */ public byte[] toByteArray() { - ByteBuffer view = buffer.duplicate(); - if (view.hasArray()) { - int start = view.arrayOffset() + view.position(); - return Arrays.copyOfRange(view.array(), start, start + view.remaining()); - } - byte[] out = new byte[view.remaining()]; - view.get(out); - return out; + return buffer.array(); } /** @@ -195,37 +135,15 @@ public String asString() { * @return decoded string */ public String asString(Charset charset) { - if (charset == null) { - throw new NullPointerException("charset is null"); - } + Objects.requireNonNull(charset, "charset cannot be null"); if (charset.equals(defaultCharset)) { return asString(); } return decode(charset); } - /** - * Returns a stream over the raw bytes of this value. Useful for JDBC binary/ascii stream access. - * - * @return input stream over the value bytes - */ - public InputStream asInputStream() { - ByteBuffer view = buffer.duplicate(); - if (view.hasArray()) { - int start = view.arrayOffset() + view.position(); - return new ByteArrayInputStream(view.array(), start, view.remaining()); - } - return new ByteArrayInputStream(toByteArray()); - } - private String decode(Charset charset) { - ByteBuffer view = buffer.duplicate(); - if (view.hasArray()) { - return new String(view.array(), view.arrayOffset() + view.position(), view.remaining(), charset); - } - byte[] tmp = new byte[view.remaining()]; - view.get(tmp); - return new String(tmp, charset); + return new String(buffer.array(), charset); } @Override diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java index fda7a88e2..6d0f19971 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java @@ -85,20 +85,14 @@ public BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, Byte typeHintMapping.get(ClickHouseDataType.String) == StringValue.class; } - /** - * Decides whether a {@code String}/{@code FixedString} value should be read as a {@link StringValue} - * (preserving raw bytes) instead of a {@link String}. A per-call type hint takes precedence over the - * default type hint mapping configured for the reader. - * - * @param typeHint per-call type hint or {@code null} - * @return {@code true} when the value should be read as {@link StringValue} - */ private boolean readStringAsBinary(Class typeHint) { - if (typeHint == StringValue.class) { - return true; - } - if (typeHint == String.class) { - return false; + if (typeHint != null) { + if (typeHint == StringValue.class) { + return true; + } + if (typeHint == String.class) { + return false; + } } return stringAsBinaryDefault; } diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java index fe963417c..a8708cbfb 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java @@ -4,7 +4,6 @@ import com.clickhouse.client.api.DataTypeUtils; import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.internal.DataTypeConverter; -import com.clickhouse.client.api.metadata.NoSuchColumnException; import com.clickhouse.client.api.metadata.TableSchema; import com.clickhouse.client.api.query.GenericRecord; import com.clickhouse.client.api.query.NullValueException; @@ -14,7 +13,6 @@ import com.clickhouse.data.value.ClickHouseGeoPointValue; import com.clickhouse.data.value.ClickHouseGeoPolygonValue; import com.clickhouse.data.value.ClickHouseGeoRingValue; -import com.google.common.collect.ImmutableList; import java.math.BigDecimal; import java.math.BigInteger; diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java index 1d5528594..feecf1d9f 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java @@ -13,7 +13,6 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; @@ -46,12 +45,14 @@ public void testStringValueApiBasics() { } @Test - public void testToByteArrayReturnsIndependentCopy() { + public void testToByteArrayReturnsBackingArrayReference() { byte[] bytes = {1, 2, 3, 4}; StringValue sv = new StringValue(bytes); - byte[] copy = sv.toByteArray(); - copy[0] = 42; - Assert.assertEquals(sv.toByteArray()[0], 1, "Mutating the returned array must not affect the value"); + byte[] backing = sv.toByteArray(); + // No copy is made: the returned array is the live backing storage and mutating it mutates the value. + Assert.assertSame(backing, bytes, "toByteArray() must return the backing array without copying"); + backing[0] = 42; + Assert.assertEquals(sv.toByteArray()[0], 42, "Mutating the returned array mutates the value (no copy)"); } @Test @@ -77,33 +78,69 @@ public void testAsStringWithCharset() { Assert.assertEquals(sv.asString(StandardCharsets.UTF_16), original); } - @Test - public void testAsInputStream() throws IOException { - byte[] bytes = {(byte) 0x00, (byte) 0xFF, (byte) 0x10, (byte) 0x7F}; - StringValue sv = new StringValue(bytes); - try (InputStream is = sv.asInputStream()) { - byte[] read = new byte[bytes.length]; - int n = is.read(read); - Assert.assertEquals(n, bytes.length); - Assert.assertEquals(read, bytes); - } - } - @Test public void testEqualsAndHashCode() { - StringValue a = StringValue.of("abc"); + StringValue a = new StringValue("abc".getBytes(StandardCharsets.UTF_8)); StringValue b = new StringValue("abc".getBytes(StandardCharsets.UTF_8)); - StringValue c = StringValue.of("abd"); + StringValue c = new StringValue("abd".getBytes(StandardCharsets.UTF_8)); + + // Reflexive + Assert.assertEquals(a, a); + // Equal content -> equal value and equal hash code Assert.assertEquals(a, b); + Assert.assertEquals(b, a, "equals must be symmetric"); Assert.assertEquals(a.hashCode(), b.hashCode()); + // Different content -> not equal Assert.assertNotEquals(a, c); } @Test - public void testOfStringCachesValue() { - StringValue sv = StringValue.of("preset"); - Assert.assertSame(sv.asString(), sv.asString()); - Assert.assertEquals(sv.asString(), "preset"); + public void testEqualsRejectsNullAndOtherTypes() { + StringValue a = new StringValue("abc".getBytes(StandardCharsets.UTF_8)); + Assert.assertFalse(a.equals(null), "A value must never equal null"); + Assert.assertFalse(a.equals("abc"), "A value must not equal a raw String of the same text"); + Assert.assertNotEquals(a, new Object()); + } + + @Test + public void testEqualsIgnoresDefaultCharset() { + // equals/hashCode are defined on the raw bytes, so the default charset must not affect them. + byte[] bytes = "abc".getBytes(StandardCharsets.UTF_8); + StringValue utf8 = new StringValue(bytes, StandardCharsets.UTF_8); + StringValue latin1 = new StringValue("abc".getBytes(StandardCharsets.UTF_8), StandardCharsets.ISO_8859_1); + Assert.assertEquals(utf8, latin1, "Values with identical bytes must be equal regardless of default charset"); + Assert.assertEquals(utf8.hashCode(), latin1.hashCode()); + } + + @Test + public void testEqualsDistinguishesByContentAndLength() { + StringValue ab = new StringValue(new byte[]{1, 2}); + StringValue abc = new StringValue(new byte[]{1, 2, 3}); + StringValue empty = new StringValue(new byte[0]); + + // Same prefix but different length must not be equal. + Assert.assertNotEquals(ab, abc); + Assert.assertNotEquals(abc, ab); + // Empty values are only equal to other empty values. + Assert.assertEquals(empty, new StringValue(new byte[0])); + Assert.assertNotEquals(empty, ab); + } + + @Test + public void testEqualsIsConsistentWithBinaryReads() throws IOException { + // Two independently read StringValues over the same bytes must compare equal. + byte[] binary = new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0x80, (byte) 0x7F}; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, binary); + byte[] wire = baos.toByteArray(); + + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + StringValue first = reader(wire, STRING_AS_BINARY).readValue(column); + StringValue second = reader(wire, STRING_AS_BINARY).readValue(column); + + Assert.assertEquals(first, second); + Assert.assertEquals(first.hashCode(), second.hashCode()); + Assert.assertEquals(first, new StringValue(binary)); } @Test @@ -173,6 +210,77 @@ public void testFixedStringAsStringValue() throws IOException { Assert.assertEquals(((StringValue) read).toByteArray(), binary); } + @Test + public void testReadStringArrayAsStringValue() throws IOException { + // Array(String) elements must be preserved as StringValue (including non-UTF-8 content). + byte[][] elements = { + "plain".getBytes(StandardCharsets.UTF_8), + "Привет".getBytes(StandardCharsets.UTF_8), + new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF}, + new byte[0], + }; + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeVarInt(baos, elements.length); + for (byte[] element : elements) { + BinaryStreamUtils.writeString(baos, element); + } + + ClickHouseColumn column = ClickHouseColumn.of("a", "Array(String)"); + Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertTrue(read instanceof BinaryStreamReader.ArrayValue, + "Expected ArrayValue but got " + read.getClass()); + BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) read; + Assert.assertEquals(array.length(), elements.length); + + Object raw = array.getArray(); + Assert.assertTrue(raw instanceof StringValue[], "Array items must be StringValue, got " + raw.getClass()); + StringValue[] values = (StringValue[]) raw; + for (int i = 0; i < elements.length; i++) { + Assert.assertEquals(values[i].toByteArray(), elements[i], "Element " + i + " bytes must be preserved"); + } + } + + @Test + public void testReadStringMapAsStringValue() throws IOException { + // Map(String, String) keys and values must be preserved as StringValue. + byte[][] keys = { + "k1".getBytes(StandardCharsets.UTF_8), + "ключ".getBytes(StandardCharsets.UTF_8), + }; + byte[][] vals = { + "v1".getBytes(StandardCharsets.UTF_8), + new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0x80}, + }; + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeVarInt(baos, keys.length); + for (int i = 0; i < keys.length; i++) { + BinaryStreamUtils.writeString(baos, keys[i]); + BinaryStreamUtils.writeString(baos, vals[i]); + } + + ClickHouseColumn column = ClickHouseColumn.of("m", "Map(String, String)"); + Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertTrue(read instanceof Map, "Expected Map but got " + read.getClass()); + Map map = (Map) read; + Assert.assertEquals(map.size(), keys.length); + + int i = 0; + for (Map.Entry entry : map.entrySet()) { + Assert.assertTrue(entry.getKey() instanceof StringValue, "Map key must be a StringValue"); + Assert.assertTrue(entry.getValue() instanceof StringValue, "Map value must be a StringValue"); + Assert.assertEquals(((StringValue) entry.getKey()).toByteArray(), keys[i], "Key " + i + " bytes"); + Assert.assertEquals(((StringValue) entry.getValue()).toByteArray(), vals[i], "Value " + i + " bytes"); + i++; + } + + // Lookup by an equal StringValue key must work (relies on equals/hashCode over raw bytes). + Assert.assertEquals(((StringValue) map.get(new StringValue(keys[0]))).toByteArray(), vals[0]); + } + @Test public void testDefaultBehaviorReturnsString() throws IOException { byte[] encoded = "still a string".getBytes(StandardCharsets.UTF_8); diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java index 7cd08d957..1ffdb0782 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java @@ -647,4 +647,47 @@ public void testReadingStringValue() throws Exception { customClient.close(); } } + + /** + * Regression test for https://github.com/ClickHouse/clickhouse-java/issues/1397: a String value that holds + * arbitrary binary content (here a SHA-512 hash, which is almost never valid UTF-8) must be read back byte + * for byte instead of being mangled by lossy UTF-8 decoding. + */ + @Test(groups = {"integration"}) + public void testReadingBinaryStringFromHash() throws Exception { + final String message = "abc"; + final byte[] expectedHash = java.security.MessageDigest.getInstance("SHA-512") + .digest(message.getBytes(java.nio.charset.StandardCharsets.UTF_8)); + Assert.assertEquals(expectedHash.length, 64); + + java.util.Map> typeHints = new java.util.HashMap<>(); + typeHints.put(ClickHouseDataType.String, StringValue.class); + typeHints.put(ClickHouseDataType.FixedString, StringValue.class); + + Client customClient = newClient() + .typeHintMapping(typeHints) + .build(); + + final String query = "SELECT SHA512('" + message + "') AS hash"; + try { + try (QueryResponse response = customClient.query(query).get()) { + ClickHouseBinaryFormatReader reader = customClient.newBinaryFormatReader(response); + Assert.assertNotNull(reader.next()); + + StringValue hash = (StringValue) reader.readValue("hash"); + Assert.assertEquals(hash.size(), expectedHash.length); + Assert.assertEquals(hash.toByteArray(), expectedHash, + "Binary hash bytes must be preserved exactly"); + // getByteArray must agree with the raw StringValue bytes + Assert.assertEquals(reader.getByteArray("hash"), expectedHash); + } + + List records = customClient.queryAll(query); + Assert.assertEquals(records.size(), 1); + Assert.assertEquals(records.get(0).getByteArray("hash"), expectedHash, + "Binary hash read via queryAll must match the locally computed digest"); + } finally { + customClient.close(); + } + } } \ No newline at end of file diff --git a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java index e36f8a480..2ed1889c9 100644 --- a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java +++ b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java @@ -444,6 +444,79 @@ public void writeBinaryStringsTest() throws Exception { customClient.close(); } + @Test (groups = { "integration" }) + public void writeAndReadImageTest() throws Exception { + // Demonstrates that large binary blobs (here a ~10KB PNG) survive a full write/read round-trip + // through a String column without being corrupted by lossy UTF-8 decoding. + byte[] imageData = readResource("clickhouse-logo.png"); + org.testng.Assert.assertTrue(imageData.length > 1024, "Expected a non-trivial binary payload"); + + String tableName = "rowBinaryFormatWriterTest_writeAndReadImageTest_" + UUID.randomUUID().toString().replace('-', '_'); + String tableCreate = "CREATE TABLE \"" + tableName + "\" " + + " (id Int32, image String) Engine = MergeTree ORDER BY id"; + + initTable(tableName, tableCreate, new CommandSettings()); + TableSchema schema = client.getTableSchema(tableName); + + ClickHouseFormat format = ClickHouseFormat.RowBinaryWithDefaults; + try (InsertResponse response = client.insert(tableName, out -> { + RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format); + w.setValue(schema.nameToColumnIndex("id"), 1); + w.setValue(schema.nameToColumnIndex("image"), imageData); + w.commitRow(); + }, format, settings).get()) { + System.out.println("Image bytes written: " + imageData.length + ", rows: " + response.getWrittenRows()); + } + + Map> typeHints = new HashMap<>(); + typeHints.put(com.clickhouse.data.ClickHouseDataType.String, + com.clickhouse.client.api.data_formats.StringValue.class); + + try (Client customClient = newClient().typeHintMapping(typeHints).build()) { + // Idiomatic path: stream rows and read the binary payload via the index-based getByteArray(int). + try (com.clickhouse.client.api.query.QueryResponse response = + customClient.query("SELECT * FROM \"" + tableName + "\" ORDER BY id").get()) { + com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader reader = + customClient.newBinaryFormatReader(response); + org.testng.Assert.assertNotNull(reader.next()); + + int imageIndex = reader.getSchema().nameToColumnIndex("image"); + byte[] streamed = reader.getByteArray(imageIndex); + org.testng.Assert.assertEquals(streamed, imageData, + "Image bytes read via getByteArray(int) must match the source exactly"); + // The name-based overload must agree with the index-based one. + org.testng.Assert.assertEquals(reader.getByteArray("image"), streamed); + } + + List records = customClient.queryAll("SELECT * FROM \"" + tableName + "\" ORDER BY id"); + assertEquals(records.size(), 1); + + GenericRecord record = records.get(0); + // Raw bytes must be preserved exactly, regardless of how they are accessed. + org.testng.Assert.assertEquals(record.getByteArray("image"), imageData, + "Image bytes read back via getByteArray must match the source exactly"); + + com.clickhouse.client.api.data_formats.StringValue value = + (com.clickhouse.client.api.data_formats.StringValue) record.getObject("image"); + org.testng.Assert.assertEquals(value.size(), imageData.length); + org.testng.Assert.assertEquals(value.toByteArray(), imageData, + "StringValue must preserve the full binary payload"); + } + } + + private byte[] readResource(String name) throws IOException { + try (java.io.InputStream is = getClass().getClassLoader().getResourceAsStream(name)) { + org.testng.Assert.assertNotNull(is, "Test resource not found on classpath: " + name); + java.io.ByteArrayOutputStream buffer = new java.io.ByteArrayOutputStream(); + byte[] chunk = new byte[8192]; + int read; + while ((read = is.read(chunk)) != -1) { + buffer.write(chunk, 0, read); + } + return buffer.toByteArray(); + } + } + @Test (groups = { "integration" }) public void writeDatetimeTests() throws Exception { String tableName = "rowBinaryFormatWriterTest_writeDatetimeTests_" + UUID.randomUUID().toString().replace('-', '_'); diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java index 4d0de854e..920f86317 100644 --- a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java +++ b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java @@ -5,6 +5,9 @@ import lombok.Setter; import org.apache.commons.lang3.RandomStringUtils; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; import java.math.BigDecimal; import java.math.BigInteger; import java.net.Inet4Address; @@ -146,7 +149,8 @@ public SamplePOJO() { string = RandomStringUtils.randomAlphabetic(1, 256); fixedString = RandomStringUtils.randomAlphabetic(3); - binaryString = new byte[] { (byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF }; + // Use a real binary blob (a PNG image) to exercise inserting/reading large non-UTF-8 String values. + binaryString = loadClickHouseLogo(); date = LocalDate.now(); date32 = LocalDate.now(); @@ -209,6 +213,23 @@ public SamplePOJO() { keyword = "database"; } + private static byte[] loadClickHouseLogo() { + try (InputStream is = SamplePOJO.class.getClassLoader().getResourceAsStream("clickhouse-logo.png")) { + if (is == null) { + throw new IllegalStateException("Test resource not found on classpath: clickhouse-logo.png"); + } + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + byte[] chunk = new byte[8192]; + int read; + while ((read = is.read(chunk)) != -1) { + buffer.write(chunk, 0, read); + } + return buffer.toByteArray(); + } catch (IOException e) { + throw new IllegalStateException("Failed to read test resource clickhouse-logo.png", e); + } + } + @Override public String toString() { return "SamplePOJO{" + diff --git a/client-v2/src/test/resources/clickhouse-logo.png b/client-v2/src/test/resources/clickhouse-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..d68e65e1107ac8b7ab5b091164df05b8551b0120 GIT binary patch literal 9874 zcmd6N`9G9x^#3(880!#OvNeTdZ?h(*EGeNR%OGpEq3m0h`;jHd^0bgWvc{mt3?us> z$`&$&7!nm(LYAoSP0#21dVQbQ_Ye5oKX`dvbKmE>&N=V1UMKdl;du^LK~?|&#|1rY zV*pTC`2PrICiri|rBW^cj#ymKK5G(q{o4mLMebTCqvw4+9P+Mt1avXwFxM5XHpm>u zsKdw|sV#db8Q7aK=RVEl<~aHz>#iP`gZtC|NCA#Mf!Mnc$0=Q%D`$^KQoa)d)DmgN*lb-N-|5x(l=YtAl%hy8cZgR6^MHQ3!v_WJY^+Ftc^0}e{?wE- zuyi4sfFBAfGm4;U5MK<0Iq7_IYHf?aiow>Jc*6;Pl=wkLSv_5w-wl3Mo@O6wNP}Or zDrn=*d`>n7<~&VAylmfDd~X;0xXXjU!XIHgJ8`s&jC4)1d2VAL{PuP_pT2K2?IMD{ zPQDziIS{-{*vY3EA^rx@y8B%zA3EtMn(HL|RN{qkxx%>J)h;(L5bF(&?=F|L{cn0% zb|2K(&*{?|PT-hR0KfBY*|rXlqEk~-QF}{P`vjoCr znzshvwNwjU4E72JOH%~|A3M z_fS{9;#h&I%rNL;Vow3^4;A9oyFCQo6yD~}L#ZJOFdD>L0e?ML+71e=6bHQjK zbnIc`IjE3>D#3Pk!{iws5y$~iTG)HY=@nvn4zJ$VLQXcA0sk~1T4AIKdhg=tg)S&% zq)XUQdXpRjx}1e^4S8)m-^$4DC(ucZ7%Z$STGF1Xcu^Jn5Mh-=6#D&k_JKW2Q4FS7 z|No(Q7-SusF=fAUrQHjDD;2Na96(Bt>qR*Jg}RHno2obsxIY1V3sA~I-`Og_FbA;N zSZ15uJAh8|voPZ1TIMD?bou}bI+%hnz@fSe0rP*}vdn0RYWe4Cl$%ttV;SfdKv*H# zf@xjH;yhtjiF_)M3iQN6YY!C3k(m~N|8x5L-0Yb1-3Dx`SAJz}jTv{VL0k|DSmmSf z5iL{W&D{sroITY3!>5kdB0HF!6S8PF$QfAAefwfOxTfTXokNQ?)fzHGY%oA`WZTxC zq?-vabdjDh6`)CGBLifJC`oVYgZ2LKTdw~(4Id-jHlrOYDH(;E9aa-@krZVX!OR z|B+}XvO+6LRiKeYtMmwl=%zoMx^oLLCNQ!bxT9PnjGIaS4^)mImu_?TPh4;1`Ax&u z>f3!P)BL#7*Vy?9oS69UX|9}gw)C?1?F$nMy|$F5041eEe~jXr!3zhRAc;* zCIX9f^S@9V1c8YSt_OaFiMF6sp)TeBNS=snO13*$%gs+S$5)pN<3<>5<$#cUknfg- z_x}rrVQEljGf|LlNqYlxYYUsLK250K4DTNCU;I zO9_2=yaMdIHIaG(n4e5w$PFc9+a4F7Up4g6U&1%d=ZwW1iKA&|CCH8Hh&&wrWH1AkIw2-V$F z&)>lk#>dpKOJvy?D@zvw=6 zlW_^3HA=Sj@B*)S)F$f0wkCor4r~}aQ3wwAaJHmgrX&)y@UL>*PrFz?*o$PttQ=u4 z4i=GT9-}%O?I?o>qO_t$JTt0}>)0TfA#1kv$;q0+K<1Id=%kw{JqjT(#ExDXKXLpt zL1>f?9we~U0Sb<@AGO`{`>Mw+kBqA*y4sn*^BcEcPhJmNDcFk`?#nxM786AbDKI;* z0jdEZ1jU?FT}%DYxY=lGE+-;V%Z|0($0rN-_}XV-+)FmB-8G7WE6Qm;Tt&)k6rYaL z69nP=OH)QWKM2v{G{1{@lqvlSQ-4p$C>q_~<+I3iv%>U5p3#ew z0Z%kiV4NiLD`mOAEJB~A3sYD9LHYIXn<|K=VmscFI1p03G_}w|`d|?CxS^*HC1I~Q zV+GI#miNWnRhtZiHq%bkw5&?vZ~w`^#L)D!TlNw!t7&Ng7_7`lQ>K49y@F@qUc+Nr z){*(ZTAyK#G_w~+RQ1B;&krHQ)g`lz`7E=JH{i?RG}jc>F}iA-8pcO$Te#4I<4dUV z8djc?!ONn|kiki`n^c5j5Ev&i@+|*+jCeHhINp>|Q1y#((7N>`T+R`_02Bz2QR0Y+QfIP1NXS=k?S# z`;-Wlg1UiI)|S7m6Xaog>tDYv_XIMNw)_^U3Xynl)V!5`AA#AiIU1)eEz!oqyB*fET2(ZwV+Vd%$8M!b4Qq3AV@10zD00sS1B{)f2(yiVYgMEk?){2u*v6y>Dz04CaH#NJyRr<+= zC$bkAGd*LgSK@DmY5Ks%hn!qkdT56A-A(om)I=EaXXg36vh6~M%c3n#97(ONs_qL8 zHCd{?ydMYZOxPsU@B}gprZCv$KNS_we*fPFEju8=QhEQdyE$1BntMiH?6q(@vdI@8QDLdil^`N4c+W7S> zPY9my*b}7QZwV%uw^ou~B}GOZ2sD#uCy)*080|jKvajM}qJ(h;J}{%y_zJv5XyM{3 zLD&fyQ!c*IC!1!IVUHwiwW0PZC8V?n3w5QBAf_(_Lk#Le-*U)qD9`c;nFdlV?F}3T zES|`ud`u+2N&+euvoABu?huRp+*o|_8w$|z<~vsO-5mCWJ)lX47*v`bOXC;AVB1|M z8O+{^N{&@2p_pUjWKX7^jMgtbQtov14oYtg7@t`GD1tAPW7G=!EK7XYiEumt762!Isj%~DrRJ4zx5>O5)gLrxUbBl=|=$9d{AKc$*>yW!KCX0~ipfY%8=(32;KioQ zKA~@}{gEiWL16q(zsbi<-Wo6_F&6Z?&qdWAo%EVASH~Ae$Iwr^W;V!-EsEc+Y&Hg% z!8_;c%-B?*OS7;>w)^|}rJFhM&a%3KxQjo233R@5=Z@2b#!0mynS>f(|X@g$!Z2Ughd@g03`%*_p6c&FHhhg#Mh7RGs62d$qkZYFgi z^6(&t!e{82M*DJc(s4DEWGD?YL(RxobZus+t|%W^%jL~yJeEzJoelHYb&AvqiGg#QsvukW0^%*ofNU>RbI^{>5?CTUct{ zyHXRtsr;ojB!zcA%8KkL<8ga5?;U2Ht;UYQ4kGmy#7)yIm^sYGecGqPadUZaUM>n5 zwCI-QQ@)OOQr_p$7Va#KuOt{G^sFD087^9Rny)zzPGrIWSavK_FhMq;?@-z-N10I_ zf8mccx7uCU#eh;u_Cz`T^w)M!WsP^H3FDqKa>*Y_a+qncWEO}1+Cn4vpHzf$8`Q3D|TXtx-Z2gzKnN_yctE)+sy+% zJ6jh-v3X_R^Fau|ospj^2J@!h8n<~Cz` zJK3?-QQ4D}&Bmd|OP)edJ&7Zf4vZxr_aYshx(jo@L{Ev*+(VHVtSfNm<|}q-$p@G( zTV>3UGC^ym)iPP*0-PENtT66{E#ZLfa>{yQH@G4^aHW z7Z(InArltt{HYl=m~{L@7xERBd(qZvP~JyRBf$Ultn^EbOZY{{EJmo3gz%Zl)X&cQ zqZ%~b2PPl*H|@ZkeN0JX))|fYd(@Aq3zvqQ)7AZ3Nph9bdk24}!(;M|lng2wm86@; z@se=8k#)>5O>5fwM?pv1zI}`Rj7n~(r3+`4GCaX+Iym7S)hMQuot`5E3nqf^{zd@t zxOggTD&1kl;pU$|jU>YbWHC3+pO(v#^~~=j6aC6a5d+#Kb3X=qLnQ#!RJaN~_PSbM zO(M(sHj84lCc%T8A~9I%nyl+`8_vX&Ot5ZiO3=LFw|<;;0>xj#5!)}SoK0b*G9-N< zyF?VaWtMqef1o6Nwy%E_gYAY>&pTLepW*#ps54 zNfgHyg{2-cXdKWrSrr3DeGVzeC4)0f|09VET#sr+>yY)XkP@O78oW!-l<73?Z;h|@Are5@S^O895zBrK|>tPa+Niw{LU`%=tbo?Y6HjX-1dk{*D z&U{OldQ~zvcl;`1qm=Ky%l7H_s&LJ5mXV8qh6Yv*E;a>W<8QTLUNfVZQyjX;_5n+W zp6|y|nrZh8>VbOck|FaQeaX3HqbXoqSc#(R$_X!P5c?ECA>kSK*tNMlfy-c0H?+n{ z6qk&wIBqtIS`zVaK`jn_%tLyX@x>Jd%YTCd9oz$5-ciR*a@aURCTsx%~v z_cjv`2%GS}XL=L0gpk|sWjRz!R-W1GxP@q{T0NcS`xZl{sl%3{dXDYLu+bd5&K>Zb zXZ+jS;QfXh!Vj+5AITIzC%GY}KjKxU@VgwDxjs8w_D--ER-R#>uK1JJi#HQ0OjadE zmM&QZ$=v2W18rxzb?g6#-n}?sZJApIbzA z90o=Vo@9-`*eB2x$+S$SrX0NZ+1ZJx_TTD4s5Nt&5_TLJ#RG#D~w1LFxu`B%9-dTJGm&c@1^D)SdG$22C zIQBZs4U9oah-+5QW%;gKdk=3zyH5q`y|3oov7QAM{4;|np_Bbs6m!(%m71KnTD)Wd zJ>Vk~2zaMf|7i~xGKgWziBk4mAhqHO(wV|7{+WzXSV6HE5ZHM_0K%_=pp3NS`rv9(&3(e z<`>-gr?Yc~B6Y~iCP{=eu71E31VMZTO+DY8@Fj~cnX&tm>wD^{dKvQ;q}g+DpXE6s z&yoMzL$cv0P}dUro9>5~J>OQwrt8KNsqNZPaJd>K_`*Hb_w65RV2~uAxf_voXapT zL+ibLe^1jcs>*7Dczm(kwtT;u$s7sM6g-?`*6KF5KDytS?8o#34C94DP0e+m&pIb%LI2!rAbv=rL& zrK?Y&b~sQ@_WtF5=b(I=^p!x#B_#2xWlQbBQ>(z+CiL7sWw-eKQTm!I0maK7s%d{v zu5I<*aDsAaERguyhWh|qM-|x?wdMx65gWel_R+OSF1d?{;mdP_FfNp#%agk0{3H=N zqV6nb8iWY^I+I=u)GgyAZPC8`#A|Hp9}r6(XIu($&`DyeEIRuY^ezvF*JlaUxe>4b zX~V@*>JFdg+nTvC75E{gDwNw`-%TmyuNCp?kT9|BWJcba8=Dro#&M=_ZPB>%Tj*&jw-aLc*<8c{babtU{cw%;7p3uzp26B^b&viyg+rsL5LH%@mScw6A^jN z+)1ye?e(u(*ZL#EI6ND6G|KP>%S~=dCasJ2N6zce@Ra8#)*Yl4g!0g@n=Z@?xprH@ z_oL=S_cTF-qm+E&Z)%kpg!D8ydu+!bs*jOVPGuq2CdR+I!CSc38elkuR>%1ge!J}i z$o%opCOMTuj*3Ya0?nc_k!6^c8{x`B9}RiRp16w|3WkeB=jK^u1^`zqyHUr7dVlV$ zieVOe^RdkIr(a$ibx(MeplbF-^$7d5Dc(1(=_wp1Ag=PmUlH71I=Q+cP(G_#5e6Ge z&5SY5xV@|dYTZ6OS-Ra?LY^;uJhg5l)RHAyhJMo;B^)UK9!D_ZSy2BjAHINDl5#^Q z>8J;IdnL{evWwZ}M2=1T)VZb9Lv+LOb2wy20aBxs-eE7zT=OZ<2%ydY=_A z@%&x;;OBv_($LZILzBA6b+#RgC$D#WJjXQF8T-4tkDWcwU?Eb@FRiMP1z1WZIO(+w z-DxDGM9=oJsk1L^-?~nkY%>_YGBxh3*?-V*8sxTd!Wk{E6oLA@wxM5U_qqNNMJHMVV zG<~#8b1;p_oPWG>9n@L|$bQjq&w+w(KeNY`O&UqCW@DT)Cbnr;`@;v02~(f5xlrY^_t6sFO~#Y--gF;yh~ zdL}ia2rXd4Kjk>GIXDYlI-OzVt8uW96QInCgl;T9fsPUQ{ zoLTrr)`-+lo*VQrIbpCLn7CMHP2?YT=KDLxKJ=>|{NcyZl-=Vg3M9qLt6DwG^QG1n zy6PCYn9dnG-m06uSaxjwCvVim9cxd}XCEt!D`j2KW257M&0FO<$-qxew>$#ikpG3M zZ7gqN?xR({UtQHZ{>K@Tf$Ek3?&z4E^p#T=-XZdSf`6Es5>Dbj-{C1ubUEQ$URB|{ zz&rAm&+F*e3qYe)$-Xg{r(6J&HLFQXJ1<8lZZwUNo-zbHd0@ezBwHHK&qhJNpN_W` z*`288H;Xz3}r$64lO?*}gMnBJd^zMMayUrcfCNcSz$E*3>|G1d07 z{QmuP&0ZMi#E;`c1ZtlT*suC3D|5k*%n=Ic2_Q=(TATdI*-orwZFNKXnGt+_pUfJ7 zn@FocnsLle+W=b|kAc#toQcBdnxmCXrybAx5Re|D(kC=z;qC@2Y*LcT6v@NJEb=Fd+#P)aCuBS)+VWZwtxUI ztb}MYP9am9Cnnwb=d<^XYxx6t_Ql0P{qX<|mBPDh!b>Y&#-BqrLaX}h3y+m@x7Bvm zJ@vWah$i^It115E3^Fiu&zq|SelgKndDOBQ$;ii38#~RsB?X%oD1lzO0^_H4zODg4 z;fHow7!QoI%q)p#iShRq);aVn_RqEa{=P!hoNar`iBOU7eI8Uz?Cbwf$%mBAMhDc) zRve8kevpH&(|dKObu2F_yF8|{N>m|skIU55_qZtAMlXRhD*DbvUzNT(mYoM2EjscI zm~`(ItDOKP44@%A!{raK)4e7a2LGOHK14NQ+P!0r+SE<=>d2-^{i&mN$AV1ZdKQj4 z@GwoPh1_0tvb*X;{xcY}yA0=fyW