diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java
index 8494c16c3..a9a212f6a 100644
--- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java
+++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java
@@ -84,6 +84,10 @@ public interface ClickHouseBinaryFormatWriter {
void setString(int colIndex, String value);
+ void setString(String column, byte[] value);
+
+ void setString(int colIndex, byte[] value);
+
void setDate(String column, LocalDate value);
void setDate(int colIndex, LocalDate value);
diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java
index ad8ee680a..303a5e7f4 100644
--- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java
+++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java
@@ -126,10 +126,18 @@ public void writeString(String value) throws IOException {
BinaryStreamUtils.writeString(out, value);
}
+ public void writeString(byte[] value) throws IOException {
+ BinaryStreamUtils.writeString(out, value);
+ }
+
public void writeFixedString(String value, int len) throws IOException {
BinaryStreamUtils.writeFixedString(out, value, len);
}
+ public void writeFixedString(byte[] value, int len) throws IOException {
+ SerializerUtils.writeFixedStringBytes(out, value, len);
+ }
+
public void writeDate(ZonedDateTime value) throws IOException {
SerializerUtils.writeDate(out, value, value.getZone());
}
diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java
index a487da1b9..2a2ecedd3 100644
--- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java
+++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java
@@ -203,6 +203,16 @@ public void setString(int colIndex, String value) {
setValue(colIndex, value);
}
+ @Override
+ public void setString(String column, byte[] value) {
+ setValue(column, value);
+ }
+
+ @Override
+ public void setString(int colIndex, byte[] value) {
+ setValue(colIndex, value);
+ }
+
@Override
public void setDate(String column, LocalDate value) {
setValue(column, value);
diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java
new file mode 100644
index 000000000..3ca94c923
--- /dev/null
+++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java
@@ -0,0 +1,169 @@
+package com.clickhouse.client.api.data_formats;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+
+/**
+ * Holder for ClickHouse {@code String} or {@code FixedString} values that preserves raw bytes
+ * to avoid lossy decoding and unnecessary allocations.
+ *
+ * This is a mutable structure and must be used with care. To avoid copying, it does not
+ * duplicate the bytes it is given: the constructor wraps the supplied array/buffer instead of
+ * copying it, and {@link #toByteArray()} returns a direct reference to the backing array rather
+ * than a defensive copy. Consequently, mutating the source array, the array returned by
+ * {@link #toByteArray()}, or reading the same value concurrently while it is being modified will
+ * change the observed value. Callers that need an independent snapshot must copy the bytes
+ * themselves.
+ *
+ * Backed by a {@link ByteBuffer} for a richer API and future off-heap memory support. Only heap
+ * buffers (with an accessible backing array) are supported today; constructing a value from a
+ * direct (off-heap) buffer is rejected. The decoded {@link String} produced by {@link #asString()}
+ * is cached.
+ */
+public class StringValue {
+
+ /** Charset used by {@link #asString()} and {@link #toString()} when no charset is provided. */
+ public static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
+
+ private final ByteBuffer buffer;
+
+ private final Charset defaultCharset;
+
+ private volatile String cached;
+
+ /**
+ * Creates a value backed by the given bytes. The array is wrapped, not copied, so it must not be
+ * modified after being passed in.
+ *
+ * @param bytes raw value bytes (not null)
+ */
+ public StringValue(byte[] bytes) {
+ this(bytes, DEFAULT_CHARSET);
+ }
+
+ /**
+ * Creates a value backed by the given bytes using the provided default charset. The array is wrapped,
+ * not copied, so it must not be modified after being passed in.
+ *
+ * @param bytes raw value bytes (not null)
+ * @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null)
+ */
+ public StringValue(byte[] bytes, Charset defaultCharset) {
+ this(ByteBuffer.wrap(bytes), defaultCharset);
+ }
+
+ /**
+ * Creates a value backed by the remaining content of the given buffer using the provided default charset.
+ * The buffer is referenced, not copied, so its content must not be modified afterwards.
+ *
+ * @param buffer backing heap buffer (not null); its remaining bytes define the value
+ * @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null)
+ * @throws IllegalArgumentException if the buffer is a direct (off-heap) buffer with no accessible array
+ */
+ public StringValue(ByteBuffer buffer, Charset defaultCharset) {
+ Objects.requireNonNull(buffer, "buffer cannot be null");
+ Objects.requireNonNull(defaultCharset, "charset is required to convert buffer to String");
+
+ if (!buffer.hasArray()) {
+ throw new IllegalArgumentException("Can work only with heap buffer.");
+ }
+
+ // Keep an independent view so external position/limit changes do not affect this value.
+ this.buffer = buffer.slice();
+ this.defaultCharset = defaultCharset;
+ }
+
+ /**
+ * Returns a read-only view over the raw bytes of this value. The returned buffer is independent
+ * (its own position/limit) and shares no mutable state with this value.
+ *
+ * @return read-only buffer positioned at the first byte of the value
+ */
+ public ByteBuffer asByteBuffer() {
+ return buffer.asReadOnlyBuffer();
+ }
+
+ /**
+ * Returns a direct reference to the backing byte array of this value (no copy is made).
+ *
+ * The returned array is the live backing storage: mutating it mutates this value, and any change
+ * to the underlying bytes is reflected here. Callers that need an independent, immutable snapshot
+ * must copy the result themselves.
+ *
+ * @return the backing array holding the value bytes
+ */
+ public byte[] toByteArray() {
+ return buffer.array();
+ }
+
+ /**
+ * @return number of bytes in this value
+ */
+ public int size() {
+ return buffer.remaining();
+ }
+
+ /**
+ * @return {@code true} if the value has no bytes
+ */
+ public boolean isEmpty() {
+ return buffer.remaining() == 0;
+ }
+
+ /**
+ * Decodes the value using the default charset (UTF-8 unless another was provided at construction).
+ * The result is cached so repeated calls do not allocate a new string.
+ *
+ * @return decoded string
+ */
+ public String asString() {
+ String s = cached;
+ if (s == null) {
+ s = decode(defaultCharset);
+ cached = s;
+ }
+ return s;
+ }
+
+ /**
+ * Decodes the value using the given charset. The result is cached only when the charset matches the
+ * default charset of this value.
+ *
+ * @param charset charset to decode with (not null)
+ * @return decoded string
+ */
+ public String asString(Charset charset) {
+ Objects.requireNonNull(charset, "charset cannot be null");
+ if (charset.equals(defaultCharset)) {
+ return asString();
+ }
+ return decode(charset);
+ }
+
+ private String decode(Charset charset) {
+ return new String(buffer.array(), charset);
+ }
+
+ @Override
+ public String toString() {
+ return asString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof StringValue)) {
+ return false;
+ }
+ return buffer.equals(((StringValue) o).buffer);
+ }
+
+ @Override
+ public int hashCode() {
+ return buffer.hashCode();
+ }
+}
diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java
index e5892748d..54b907749 100644
--- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java
+++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java
@@ -4,6 +4,7 @@
import com.clickhouse.client.api.ClientException;
import com.clickhouse.client.api.DataTypeUtils;
import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader;
+import com.clickhouse.client.api.data_formats.StringValue;
import com.clickhouse.client.api.internal.DataTypeConverter;
import com.clickhouse.client.api.internal.MapUtils;
import com.clickhouse.client.api.internal.ServerSettings;
@@ -532,8 +533,9 @@ private T getPrimitiveArray(int index, Class> componentType) {
}
return (T)array;
} else if (componentType == byte.class) {
- if (value instanceof String) {
- return (T) ((String) value).getBytes(StandardCharsets.UTF_8);
+ byte[] bytes = stringLikeToBytes(value);
+ if (bytes != null) {
+ return (T) bytes;
} else if (value instanceof InetAddress) {
return (T) ((InetAddress) value).getAddress();
}
@@ -676,6 +678,24 @@ public Instant getInstant(int index) {
throw new ClientException("Column of type " + column.getDataType() + " cannot be converted to Instant");
}
+ /**
+ * Converts a string-like value into its raw bytes. For a {@link StringValue} the original bytes are
+ * returned without re-encoding (so binary content is preserved). For a {@link String} the bytes are
+ * produced using UTF-8, matching the historical behaviour. Returns {@code null} when the value is not
+ * a string-like type so callers can fall back to other handling.
+ *
+ * @param value value to convert
+ * @return raw bytes or {@code null} if the value is not string-like
+ */
+ public static byte[] stringLikeToBytes(Object value) {
+ if (value instanceof StringValue) {
+ return ((StringValue) value).toByteArray();
+ } else if (value instanceof String) {
+ return ((String) value).getBytes(StandardCharsets.UTF_8);
+ }
+ return null;
+ }
+
static Instant objectToInstant(Object value) {
if (value instanceof LocalDateTime) {
LocalDateTime dateTime = (LocalDateTime) value;
@@ -866,6 +886,10 @@ public String[] getStringArray(int index) {
BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) value;
if (array.itemType == String.class) {
return (String[]) array.getArray();
+ } else if (array.itemType == StringValue.class) {
+ StringValue[] stringValues = (StringValue[]) array.getArray();
+ return Arrays.stream(stringValues)
+ .map(sv -> sv == null ? null : sv.asString()).toArray(String[]::new);
} else if (array.itemType == BinaryStreamReader.EnumValue.class) {
BinaryStreamReader.EnumValue[] enumValues = (BinaryStreamReader.EnumValue[]) array.getArray();
return Arrays.stream(enumValues).map(BinaryStreamReader.EnumValue::getName).toArray(String[]::new);
diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java
index 8a6b76a5a..6d0f19971 100644
--- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java
+++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java
@@ -2,6 +2,7 @@
import com.clickhouse.client.api.ClientException;
import com.clickhouse.client.api.DataTypeUtils;
+import com.clickhouse.client.api.data_formats.StringValue;
import com.clickhouse.data.ClickHouseColumn;
import com.clickhouse.data.ClickHouseDataType;
import com.clickhouse.data.ClickHouseEnum;
@@ -55,6 +56,8 @@ public class BinaryStreamReader {
private final Class> arrayDefaultTypeHint;
+ private final boolean stringAsBinaryDefault;
+
private static final int SB_INIT_SIZE = 100;
private ClickHouseColumn lastDataColumn = null;
@@ -69,7 +72,7 @@ public class BinaryStreamReader {
* @param jsonAsString - use string to serialize/deserialize JSON columns
* @param typeHintMapping - what type use as hint if hint is not set or may not be known.
*/
- BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator, boolean jsonAsString, Map> typeHintMapping) {
+ public BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator, boolean jsonAsString, Map> typeHintMapping) {
this.log = log == null ? NOPLogger.NOP_LOGGER : log;
this.timeZone = timeZone;
this.input = input;
@@ -78,6 +81,20 @@ public class BinaryStreamReader {
this.arrayDefaultTypeHint = typeHintMapping == null ||
typeHintMapping.isEmpty()? NO_TYPE_HINT : typeHintMapping.get(ClickHouseDataType.Array);
+ this.stringAsBinaryDefault = typeHintMapping != null &&
+ typeHintMapping.get(ClickHouseDataType.String) == StringValue.class;
+ }
+
+ private boolean readStringAsBinary(Class> typeHint) {
+ if (typeHint != null) {
+ if (typeHint == StringValue.class) {
+ return true;
+ }
+ if (typeHint == String.class) {
+ return false;
+ }
+ }
+ return stringAsBinaryDefault;
}
/**
@@ -121,12 +138,18 @@ public T readValue(ClickHouseColumn column, Class> typeHint) throws IOExce
switch (dataType) {
// Primitives
case FixedString: {
+ if (readStringAsBinary(typeHint)) {
+ return (T) new StringValue(readStringBytes(input, precision));
+ }
byte[] bytes = precision > STRING_BUFF.length ?
new byte[precision] : STRING_BUFF;
readNBytes(input, bytes, 0, precision);
return (T) new String(bytes, 0, precision, StandardCharsets.UTF_8);
}
case String: {
+ if (readStringAsBinary(typeHint)) {
+ return (T) readStringValue();
+ }
return (T) readString();
}
case Int8:
@@ -1119,17 +1142,41 @@ public String readString() throws IOException {
}
/**
- * Reads a decimal value from input stream.
+ * Reads a string from the internal input stream preserving the raw bytes as a {@link StringValue}.
+ * Unlike {@link #readString()} this does not decode bytes into a {@link String} and never reuses the
+ * shared buffer, so the value is safe to keep after the next read.
+ *
+ * @return string value holding the raw bytes
+ * @throws IOException when IO error occurs
+ */
+ public StringValue readStringValue() throws IOException {
+ return new StringValue(readStringBytes(input, readVarInt(input)));
+ }
+
+ /**
+ * Reads the raw bytes of a string from the input stream given its length.
+ *
* @param input - source of bytes
- * @return String
+ * @param len - number of bytes to read
+ * @return byte[] containing the raw string bytes
* @throws IOException when IO error occurs
*/
- public static String readString(InputStream input) throws IOException {
- int len = readVarInt(input);
+ public static byte[] readStringBytes(InputStream input, int len) throws IOException {
if (len == 0) {
- return "";
+ return new byte[0];
}
- return new String(readNBytes(input, len), StandardCharsets.UTF_8);
+ return readNBytes(input, len);
+ }
+
+ /**
+ * Reads a string value from input stream.
+ * @param input - source of bytes
+ * @return String
+ * @throws IOException when IO error occurs
+ */
+ public static String readString(InputStream input) throws IOException {
+ byte[] bytes = readStringBytes(input, readVarInt(input));
+ return bytes.length == 0 ? "" : new String(bytes, StandardCharsets.UTF_8);
}
public static int readByteOrEOF(InputStream input) throws IOException {
diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java
index 75f7ea314..a8708cbfb 100644
--- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java
+++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java
@@ -2,8 +2,8 @@
import com.clickhouse.client.api.ClientException;
import com.clickhouse.client.api.DataTypeUtils;
+import com.clickhouse.client.api.data_formats.StringValue;
import com.clickhouse.client.api.internal.DataTypeConverter;
-import com.clickhouse.client.api.metadata.NoSuchColumnException;
import com.clickhouse.client.api.metadata.TableSchema;
import com.clickhouse.client.api.query.GenericRecord;
import com.clickhouse.client.api.query.NullValueException;
@@ -13,7 +13,6 @@
import com.clickhouse.data.value.ClickHouseGeoPointValue;
import com.clickhouse.data.value.ClickHouseGeoPolygonValue;
import com.clickhouse.data.value.ClickHouseGeoRingValue;
-import com.google.common.collect.ImmutableList;
import java.math.BigDecimal;
import java.math.BigInteger;
@@ -276,6 +275,14 @@ private T getPrimitiveArray(String colName) {
@Override
public byte[] getByteArray(String colName) {
+ Object value = readValue(colName);
+ if (value == null) {
+ return null;
+ }
+ byte[] bytes = AbstractBinaryFormatReader.stringLikeToBytes(value);
+ if (bytes != null) {
+ return bytes;
+ }
return getPrimitiveArray(colName);
}
@@ -319,6 +326,10 @@ public String[] getStringArray(String colName) {
BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) value;
if (array.itemType == String.class) {
return (String[]) array.getArray();
+ } else if (array.itemType == StringValue.class) {
+ StringValue[] stringValues = (StringValue[]) array.getArray();
+ return Arrays.stream(stringValues)
+ .map(sv -> sv == null ? null : sv.asString()).toArray(String[]::new);
} else if (array.itemType == BinaryStreamReader.EnumValue.class) {
BinaryStreamReader.EnumValue[] enumValues = (BinaryStreamReader.EnumValue[]) array.getArray();
return Arrays.stream(enumValues).map(BinaryStreamReader.EnumValue::getName).toArray(String[]::new);
diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java
index cc7e91792..a4e8fb598 100644
--- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java
+++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java
@@ -2,6 +2,7 @@
import com.clickhouse.client.api.Client;
import com.clickhouse.client.api.ClientException;
+import com.clickhouse.client.api.data_formats.StringValue;
import com.clickhouse.client.api.serde.POJOFieldDeserializer;
import com.clickhouse.data.ClickHouseAggregateFunction;
import com.clickhouse.data.ClickHouseColumn;
@@ -552,10 +553,22 @@ private static void serializePrimitiveData(OutputStream stream, Object value, Cl
BinaryStreamUtils.writeBoolean(stream, (Boolean) value);
break;
case String:
- BinaryStreamUtils.writeString(stream, convertToString(value));
+ if (value instanceof byte[]) {
+ BinaryStreamUtils.writeString(stream, (byte[]) value);
+ } else if (value instanceof StringValue) {
+ BinaryStreamUtils.writeString(stream, ((StringValue) value).toByteArray());
+ } else {
+ BinaryStreamUtils.writeString(stream, convertToString(value));
+ }
break;
case FixedString:
- BinaryStreamUtils.writeFixedString(stream, convertToString(value), column.getPrecision());
+ if (value instanceof byte[]) {
+ writeFixedStringBytes(stream, (byte[]) value, column.getPrecision());
+ } else if (value instanceof StringValue) {
+ writeFixedStringBytes(stream, ((StringValue) value).toByteArray(), column.getPrecision());
+ } else {
+ BinaryStreamUtils.writeFixedString(stream, convertToString(value), column.getPrecision());
+ }
break;
case Date:
writeDate(stream, value, ZoneId.of("UTC")); // TODO: check
@@ -912,6 +925,26 @@ public static String convertToString(Object value) {
return java.lang.String.valueOf(value);
}
+ /**
+ * Writes raw bytes as a ClickHouse {@code FixedString(length)} value. The bytes are written as-is and
+ * right-padded with zero bytes when shorter than {@code length}.
+ *
+ * @param stream output stream
+ * @param value raw bytes
+ * @param length fixed string length
+ * @throws IOException when failed to write to the stream
+ */
+ public static void writeFixedStringBytes(OutputStream stream, byte[] value, int length) throws IOException {
+ if (value.length > length) {
+ throw new IllegalArgumentException("Value of length " + value.length +
+ " is longer than FixedString(" + length + ")");
+ }
+ stream.write(value);
+ for (int i = value.length; i < length; i++) {
+ stream.write(0);
+ }
+ }
+
public static > Set parseEnumList(String value, Class enumType) {
Set values = new HashSet<>();
for (StringTokenizer causes = new StringTokenizer(value, Client.VALUES_LIST_DELIMITER); causes.hasMoreTokens(); ) {
diff --git a/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java b/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java
index b1f6a8520..0faa2a7a8 100644
--- a/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java
+++ b/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java
@@ -2,6 +2,7 @@
import com.clickhouse.client.api.ClickHouseException;
import com.clickhouse.client.api.DataTypeUtils;
+import com.clickhouse.client.api.data_formats.StringValue;
import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader;
import com.clickhouse.data.ClickHouseColumn;
import com.clickhouse.data.ClickHouseDataType;
@@ -85,7 +86,9 @@ public String stringToString(Object bytesOrString, ClickHouseColumn column) {
if (column.isArray()) {
sb.append(QUOTE);
}
- if (bytesOrString instanceof CharSequence) {
+ if (bytesOrString instanceof StringValue) {
+ sb.append(((StringValue) bytesOrString).asString());
+ } else if (bytesOrString instanceof CharSequence) {
sb.append(((CharSequence) bytesOrString));
} else if (bytesOrString instanceof byte[]) {
sb.append(new String((byte[]) bytesOrString));
diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java
new file mode 100644
index 000000000..feecf1d9f
--- /dev/null
+++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java
@@ -0,0 +1,332 @@
+package com.clickhouse.client.api.data_formats;
+
+import com.clickhouse.client.api.data_formats.internal.AbstractBinaryFormatReader;
+import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader;
+import com.clickhouse.client.api.data_formats.internal.SerializerUtils;
+import com.clickhouse.data.ClickHouseColumn;
+import com.clickhouse.data.ClickHouseDataType;
+import com.clickhouse.data.format.BinaryStreamUtils;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.Map;
+import java.util.TimeZone;
+
+public class StringValueTests {
+
+ private static final Map> STRING_AS_BINARY =
+ Collections.singletonMap(ClickHouseDataType.String, (Class>) StringValue.class);
+
+ private static BinaryStreamReader reader(byte[] input, Map> hints) {
+ return new BinaryStreamReader(new ByteArrayInputStream(input), TimeZone.getTimeZone("UTC"), null,
+ new BinaryStreamReader.DefaultByteBufferAllocator(), false, hints);
+ }
+
+ // ---- StringValue API ----
+
+ @Test
+ public void testStringValueApiBasics() {
+ byte[] bytes = "hello world".getBytes(StandardCharsets.UTF_8);
+ StringValue sv = new StringValue(bytes);
+
+ Assert.assertEquals(sv.size(), bytes.length);
+ Assert.assertFalse(sv.isEmpty());
+ Assert.assertEquals(sv.asString(), "hello world");
+ Assert.assertEquals(sv.toString(), "hello world");
+ Assert.assertEquals(sv.toByteArray(), bytes);
+ }
+
+ @Test
+ public void testToByteArrayReturnsBackingArrayReference() {
+ byte[] bytes = {1, 2, 3, 4};
+ StringValue sv = new StringValue(bytes);
+ byte[] backing = sv.toByteArray();
+ // No copy is made: the returned array is the live backing storage and mutating it mutates the value.
+ Assert.assertSame(backing, bytes, "toByteArray() must return the backing array without copying");
+ backing[0] = 42;
+ Assert.assertEquals(sv.toByteArray()[0], 42, "Mutating the returned array mutates the value (no copy)");
+ }
+
+ @Test
+ public void testAsByteBufferIsReadOnly() {
+ StringValue sv = new StringValue(new byte[]{1, 2, 3});
+ ByteBuffer buffer = sv.asByteBuffer();
+ Assert.assertTrue(buffer.isReadOnly());
+ Assert.assertEquals(buffer.remaining(), 3);
+ }
+
+ @Test
+ public void testAsStringIsCached() {
+ StringValue sv = new StringValue("cached".getBytes(StandardCharsets.UTF_8));
+ String first = sv.asString();
+ String second = sv.asString();
+ Assert.assertSame(first, second, "asString() should cache and return the same instance");
+ }
+
+ @Test
+ public void testAsStringWithCharset() {
+ String original = "Привет, мир";
+ StringValue sv = new StringValue(original.getBytes(StandardCharsets.UTF_16));
+ Assert.assertEquals(sv.asString(StandardCharsets.UTF_16), original);
+ }
+
+ @Test
+ public void testEqualsAndHashCode() {
+ StringValue a = new StringValue("abc".getBytes(StandardCharsets.UTF_8));
+ StringValue b = new StringValue("abc".getBytes(StandardCharsets.UTF_8));
+ StringValue c = new StringValue("abd".getBytes(StandardCharsets.UTF_8));
+
+ // Reflexive
+ Assert.assertEquals(a, a);
+ // Equal content -> equal value and equal hash code
+ Assert.assertEquals(a, b);
+ Assert.assertEquals(b, a, "equals must be symmetric");
+ Assert.assertEquals(a.hashCode(), b.hashCode());
+ // Different content -> not equal
+ Assert.assertNotEquals(a, c);
+ }
+
+ @Test
+ public void testEqualsRejectsNullAndOtherTypes() {
+ StringValue a = new StringValue("abc".getBytes(StandardCharsets.UTF_8));
+ Assert.assertFalse(a.equals(null), "A value must never equal null");
+ Assert.assertFalse(a.equals("abc"), "A value must not equal a raw String of the same text");
+ Assert.assertNotEquals(a, new Object());
+ }
+
+ @Test
+ public void testEqualsIgnoresDefaultCharset() {
+ // equals/hashCode are defined on the raw bytes, so the default charset must not affect them.
+ byte[] bytes = "abc".getBytes(StandardCharsets.UTF_8);
+ StringValue utf8 = new StringValue(bytes, StandardCharsets.UTF_8);
+ StringValue latin1 = new StringValue("abc".getBytes(StandardCharsets.UTF_8), StandardCharsets.ISO_8859_1);
+ Assert.assertEquals(utf8, latin1, "Values with identical bytes must be equal regardless of default charset");
+ Assert.assertEquals(utf8.hashCode(), latin1.hashCode());
+ }
+
+ @Test
+ public void testEqualsDistinguishesByContentAndLength() {
+ StringValue ab = new StringValue(new byte[]{1, 2});
+ StringValue abc = new StringValue(new byte[]{1, 2, 3});
+ StringValue empty = new StringValue(new byte[0]);
+
+ // Same prefix but different length must not be equal.
+ Assert.assertNotEquals(ab, abc);
+ Assert.assertNotEquals(abc, ab);
+ // Empty values are only equal to other empty values.
+ Assert.assertEquals(empty, new StringValue(new byte[0]));
+ Assert.assertNotEquals(empty, ab);
+ }
+
+ @Test
+ public void testEqualsIsConsistentWithBinaryReads() throws IOException {
+ // Two independently read StringValues over the same bytes must compare equal.
+ byte[] binary = new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0x80, (byte) 0x7F};
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ BinaryStreamUtils.writeString(baos, binary);
+ byte[] wire = baos.toByteArray();
+
+ ClickHouseColumn column = ClickHouseColumn.of("s", "String");
+ StringValue first = reader(wire, STRING_AS_BINARY).readValue(column);
+ StringValue second = reader(wire, STRING_AS_BINARY).readValue(column);
+
+ Assert.assertEquals(first, second);
+ Assert.assertEquals(first.hashCode(), second.hashCode());
+ Assert.assertEquals(first, new StringValue(binary));
+ }
+
+ @Test
+ public void testEmptyValue() {
+ StringValue sv = new StringValue(new byte[0]);
+ Assert.assertTrue(sv.isEmpty());
+ Assert.assertEquals(sv.size(), 0);
+ Assert.assertEquals(sv.asString(), "");
+ Assert.assertEquals(sv.toByteArray().length, 0);
+ }
+
+ // ---- Reading String columns as StringValue ----
+
+ @DataProvider(name = "charsetStrings")
+ private Object[][] charsetStrings() {
+ return new Object[][]{
+ {"plain ascii", StandardCharsets.UTF_8},
+ {"unicode: Привет 你好 🚀", StandardCharsets.UTF_8},
+ {"latin1 café", StandardCharsets.ISO_8859_1},
+ {"utf16 текст", StandardCharsets.UTF_16},
+ {" leading and trailing ", StandardCharsets.UTF_8},
+ {"", StandardCharsets.UTF_8},
+ };
+ }
+
+ @Test(dataProvider = "charsetStrings")
+ public void testReadStringAsStringValuePreservesBytes(String value, Charset charset) throws IOException {
+ byte[] encoded = value.getBytes(charset);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ BinaryStreamUtils.writeString(baos, encoded); // binary string write (raw bytes)
+
+ ClickHouseColumn column = ClickHouseColumn.of("s", "String");
+ Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column);
+
+ Assert.assertTrue(read instanceof StringValue, "Expected StringValue but got " + read.getClass());
+ StringValue sv = (StringValue) read;
+ Assert.assertEquals(sv.toByteArray(), encoded, "Raw bytes must be preserved");
+ Assert.assertEquals(sv.asString(charset), value, "Decoding with the source charset must round-trip");
+ }
+
+ @Test
+ public void testReadBinaryNonUtf8IsPreserved() throws IOException {
+ // Bytes that are not valid UTF-8 (e.g. a binary hash). Decoding as UTF-8 would be lossy.
+ byte[] binary = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF,
+ (byte) 0xFF, (byte) 0x00, (byte) 0x80, (byte) 0xC0, (byte) 0xFE};
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ BinaryStreamUtils.writeString(baos, binary);
+
+ ClickHouseColumn column = ClickHouseColumn.of("s", "String");
+ StringValue sv = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column);
+
+ Assert.assertEquals(sv.toByteArray(), binary, "Binary content must be preserved exactly");
+ Assert.assertEquals(AbstractBinaryFormatReader.stringLikeToBytes(sv), binary,
+ "Shared string->bytes conversion must preserve binary content");
+ }
+
+ @Test
+ public void testFixedStringAsStringValue() throws IOException {
+ byte[] binary = new byte[]{(byte) 0x01, (byte) 0xFF, (byte) 0x00, (byte) 0x10, (byte) 0x80};
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ baos.write(binary); // FixedString(5) is written as exactly 5 raw bytes
+
+ ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(5)");
+ Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column);
+
+ Assert.assertTrue(read instanceof StringValue);
+ Assert.assertEquals(((StringValue) read).toByteArray(), binary);
+ }
+
+ @Test
+ public void testReadStringArrayAsStringValue() throws IOException {
+ // Array(String) elements must be preserved as StringValue (including non-UTF-8 content).
+ byte[][] elements = {
+ "plain".getBytes(StandardCharsets.UTF_8),
+ "Привет".getBytes(StandardCharsets.UTF_8),
+ new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF},
+ new byte[0],
+ };
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ BinaryStreamUtils.writeVarInt(baos, elements.length);
+ for (byte[] element : elements) {
+ BinaryStreamUtils.writeString(baos, element);
+ }
+
+ ClickHouseColumn column = ClickHouseColumn.of("a", "Array(String)");
+ Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column);
+
+ Assert.assertTrue(read instanceof BinaryStreamReader.ArrayValue,
+ "Expected ArrayValue but got " + read.getClass());
+ BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) read;
+ Assert.assertEquals(array.length(), elements.length);
+
+ Object raw = array.getArray();
+ Assert.assertTrue(raw instanceof StringValue[], "Array items must be StringValue, got " + raw.getClass());
+ StringValue[] values = (StringValue[]) raw;
+ for (int i = 0; i < elements.length; i++) {
+ Assert.assertEquals(values[i].toByteArray(), elements[i], "Element " + i + " bytes must be preserved");
+ }
+ }
+
+ @Test
+ public void testReadStringMapAsStringValue() throws IOException {
+ // Map(String, String) keys and values must be preserved as StringValue.
+ byte[][] keys = {
+ "k1".getBytes(StandardCharsets.UTF_8),
+ "ключ".getBytes(StandardCharsets.UTF_8),
+ };
+ byte[][] vals = {
+ "v1".getBytes(StandardCharsets.UTF_8),
+ new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0x80},
+ };
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ BinaryStreamUtils.writeVarInt(baos, keys.length);
+ for (int i = 0; i < keys.length; i++) {
+ BinaryStreamUtils.writeString(baos, keys[i]);
+ BinaryStreamUtils.writeString(baos, vals[i]);
+ }
+
+ ClickHouseColumn column = ClickHouseColumn.of("m", "Map(String, String)");
+ Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column);
+
+ Assert.assertTrue(read instanceof Map, "Expected Map but got " + read.getClass());
+ Map, ?> map = (Map, ?>) read;
+ Assert.assertEquals(map.size(), keys.length);
+
+ int i = 0;
+ for (Map.Entry, ?> entry : map.entrySet()) {
+ Assert.assertTrue(entry.getKey() instanceof StringValue, "Map key must be a StringValue");
+ Assert.assertTrue(entry.getValue() instanceof StringValue, "Map value must be a StringValue");
+ Assert.assertEquals(((StringValue) entry.getKey()).toByteArray(), keys[i], "Key " + i + " bytes");
+ Assert.assertEquals(((StringValue) entry.getValue()).toByteArray(), vals[i], "Value " + i + " bytes");
+ i++;
+ }
+
+ // Lookup by an equal StringValue key must work (relies on equals/hashCode over raw bytes).
+ Assert.assertEquals(((StringValue) map.get(new StringValue(keys[0]))).toByteArray(), vals[0]);
+ }
+
+ @Test
+ public void testDefaultBehaviorReturnsString() throws IOException {
+ byte[] encoded = "still a string".getBytes(StandardCharsets.UTF_8);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ BinaryStreamUtils.writeString(baos, encoded);
+
+ ClickHouseColumn column = ClickHouseColumn.of("s", "String");
+ Object read = reader(baos.toByteArray(), AbstractBinaryFormatReader.NO_TYPE_HINT_MAPPING).readValue(column);
+
+ Assert.assertTrue(read instanceof String, "Without a type hint Strings must still be returned as String");
+ Assert.assertEquals(read, "still a string");
+ }
+
+ // ---- Writing binary String values ----
+
+ @Test
+ public void testWriteByteArrayToStringRoundTrip() throws IOException {
+ byte[] binary = new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0xAB, (byte) 0xCD, (byte) 0x7F};
+ ClickHouseColumn column = ClickHouseColumn.of("s", "String");
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ SerializerUtils.serializeData(baos, binary, column);
+ StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column);
+ Assert.assertEquals(read.toByteArray(), binary);
+ }
+
+ @Test
+ public void testWriteStringValueToStringRoundTrip() throws IOException {
+ byte[] binary = new byte[]{(byte) 0x10, (byte) 0x20, (byte) 0xFE, (byte) 0xFF, (byte) 0x00};
+ StringValue value = new StringValue(binary);
+ ClickHouseColumn column = ClickHouseColumn.of("s", "String");
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ SerializerUtils.serializeData(baos, value, column);
+ StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column);
+ Assert.assertEquals(read.toByteArray(), binary);
+ }
+
+ @Test
+ public void testWriteByteArrayToFixedStringRoundTrip() throws IOException {
+ byte[] binary = new byte[]{(byte) 0xAA, (byte) 0xBB, (byte) 0xCC};
+ ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(3)");
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ SerializerUtils.serializeData(baos, binary, column);
+ StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column);
+ Assert.assertEquals(read.toByteArray(), binary);
+ }
+}
diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java
index b3e9f0676..1ffdb0782 100644
--- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java
+++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java
@@ -7,6 +7,7 @@
import com.clickhouse.client.api.Client;
import com.clickhouse.client.api.command.CommandSettings;
import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader;
+import com.clickhouse.client.api.data_formats.StringValue;
import com.clickhouse.client.api.enums.Protocol;
import com.clickhouse.client.api.query.GenericRecord;
import com.clickhouse.client.api.query.QueryResponse;
@@ -572,4 +573,121 @@ private Client.Builder newClient() {
.setPassword(ClickHouseServerForTest.getPassword());
}
+ @Test(groups = {"integration"})
+ public void testReadingStringValue() throws Exception {
+ final String table = "test_reading_stringvalue";
+
+ client.execute("DROP TABLE IF EXISTS " + table).get();
+ client.execute("CREATE TABLE " + table + " (id Int32, s String, fs FixedString(5), e FixedString(1)) ENGINE = Memory").get();
+ client.execute("INSERT INTO " + table + " VALUES (1, 'hello', 'world', 'a'), (2, 'ClickHouse', 'Rocks', 'b')").get();
+
+ java.util.Map> typeHints = new java.util.HashMap<>();
+ typeHints.put(ClickHouseDataType.String, StringValue.class);
+ typeHints.put(ClickHouseDataType.FixedString, StringValue.class);
+
+ Client customClient = newClient()
+ .typeHintMapping(typeHints)
+ .build();
+
+ try {
+ try (QueryResponse response = customClient.query("SELECT * FROM " + table + " ORDER BY id").get()) {
+ ClickHouseBinaryFormatReader reader = customClient.newBinaryFormatReader(response);
+
+ // Test reading multiple strings in a row and check that their content differs
+ Assert.assertNotNull(reader.next());
+ Assert.assertEquals(reader.getInteger("id"), 1);
+ StringValue s1 = (StringValue) reader.readValue("s");
+ StringValue fs1 = (StringValue) reader.readValue("fs");
+ StringValue e1 = (StringValue) reader.readValue("e");
+
+ Assert.assertEquals(s1.asString(), "hello");
+ Assert.assertEquals(fs1.asString(), "world");
+ Assert.assertEquals(e1.asString(), "a");
+
+ // Test getting read value multiple times
+ Assert.assertSame(s1, reader.readValue("s"), "Consecutive reads for the same row should return the same instance or equal value");
+ Assert.assertEquals(reader.getString("s"), "hello");
+ // Test reading byte[] from String columns
+ Assert.assertEquals(reader.getByteArray("s"), "hello".getBytes());
+ Assert.assertEquals(reader.getByteArray("fs"), "world".getBytes());
+ Assert.assertEquals(reader.getByteArray("e"), "a".getBytes());
+
+ Assert.assertNotNull(reader.next());
+ Assert.assertEquals(reader.getInteger("id"), 2);
+ StringValue s2 = (StringValue) reader.readValue("s");
+ StringValue fs2 = (StringValue) reader.readValue("fs");
+ StringValue e2 = (StringValue) reader.readValue("e");
+
+ Assert.assertEquals(s2.asString(), "ClickHouse");
+ Assert.assertEquals(fs2.asString(), "Rocks");
+ Assert.assertEquals(e2.asString(), "b");
+
+ Assert.assertNotEquals(s1.asString(), s2.asString());
+ Assert.assertNotEquals(fs1.asString(), fs2.asString());
+ }
+
+ // test queryAll with string value
+ List records = customClient.queryAll("SELECT * FROM " + table + " ORDER BY id");
+ Assert.assertEquals(records.size(), 2);
+
+ Assert.assertEquals(records.get(0).getInteger("id"), 1);
+ Assert.assertEquals(records.get(0).getString("s"), "hello");
+ Assert.assertEquals(records.get(0).getString("fs"), "world");
+ Assert.assertEquals(records.get(0).getByteArray("s"), "hello".getBytes());
+ Assert.assertEquals(records.get(0).getByteArray("fs"), "world".getBytes());
+ Assert.assertEquals(records.get(0).getByteArray("e"), "a".getBytes());
+
+ Assert.assertEquals(records.get(1).getInteger("id"), 2);
+ Assert.assertEquals(records.get(1).getString("s"), "ClickHouse");
+ Assert.assertEquals(records.get(1).getString("fs"), "Rocks");
+ Assert.assertEquals(records.get(1).getByteArray("s"), "ClickHouse".getBytes());
+ Assert.assertEquals(records.get(1).getByteArray("fs"), "Rocks".getBytes());
+ Assert.assertEquals(records.get(1).getByteArray("e"), "b".getBytes());
+ } finally {
+ customClient.close();
+ }
+ }
+
+ /**
+ * Regression test for https://github.com/ClickHouse/clickhouse-java/issues/1397: a String value that holds
+ * arbitrary binary content (here a SHA-512 hash, which is almost never valid UTF-8) must be read back byte
+ * for byte instead of being mangled by lossy UTF-8 decoding.
+ */
+ @Test(groups = {"integration"})
+ public void testReadingBinaryStringFromHash() throws Exception {
+ final String message = "abc";
+ final byte[] expectedHash = java.security.MessageDigest.getInstance("SHA-512")
+ .digest(message.getBytes(java.nio.charset.StandardCharsets.UTF_8));
+ Assert.assertEquals(expectedHash.length, 64);
+
+ java.util.Map> typeHints = new java.util.HashMap<>();
+ typeHints.put(ClickHouseDataType.String, StringValue.class);
+ typeHints.put(ClickHouseDataType.FixedString, StringValue.class);
+
+ Client customClient = newClient()
+ .typeHintMapping(typeHints)
+ .build();
+
+ final String query = "SELECT SHA512('" + message + "') AS hash";
+ try {
+ try (QueryResponse response = customClient.query(query).get()) {
+ ClickHouseBinaryFormatReader reader = customClient.newBinaryFormatReader(response);
+ Assert.assertNotNull(reader.next());
+
+ StringValue hash = (StringValue) reader.readValue("hash");
+ Assert.assertEquals(hash.size(), expectedHash.length);
+ Assert.assertEquals(hash.toByteArray(), expectedHash,
+ "Binary hash bytes must be preserved exactly");
+ // getByteArray must agree with the raw StringValue bytes
+ Assert.assertEquals(reader.getByteArray("hash"), expectedHash);
+ }
+
+ List records = customClient.queryAll(query);
+ Assert.assertEquals(records.size(), 1);
+ Assert.assertEquals(records.get(0).getByteArray("hash"), expectedHash,
+ "Binary hash read via queryAll must match the locally computed digest");
+ } finally {
+ customClient.close();
+ }
+ }
}
\ No newline at end of file
diff --git a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java
index 1a0ee2287..2ed1889c9 100644
--- a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java
+++ b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java
@@ -158,6 +158,11 @@ private static void assertEqualsKinda(Object actual, Object expected) {
expected = ((BigDecimal) expected).stripTrailingZeros();
}
+ if (actual instanceof byte[] && expected instanceof byte[]) {
+ org.testng.Assert.assertEquals((byte[]) actual, (byte[]) expected);
+ return;
+ }
+
assertEquals(String.valueOf(actual), String.valueOf(expected));
}
@@ -376,6 +381,141 @@ public void writeStringsTest() throws Exception {
writeTest(tableName, tableCreate, rows);
}
+ @Test (groups = { "integration" })
+ public void writeBinaryStringsTest() throws Exception {
+ String tableName = "rowBinaryFormatWriterTest_writeBinaryStringsTests_" + UUID.randomUUID().toString().replace('-', '_');
+ String tableCreate = "CREATE TABLE \"" + tableName + "\" " +
+ " (id Int32, " +
+ " string String, " +
+ " fixed_string FixedString(5), " +
+ " fixed_string_one FixedString(1) " +
+ " ) Engine = MergeTree ORDER BY id";
+
+ byte[] binaryData = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF, (byte) 0x00, (byte) 0xFF, (byte) 0x80};
+ byte[] fixedStringData = new byte[]{(byte) 0xAA, (byte) 0xBB, (byte) 0xCC, (byte) 0xDD, (byte) 0xEE};
+ byte[] fixedStringOneData = new byte[]{(byte) 0x7F};
+
+ // Instead of writeTest which reads back using default string decoding, we write manually
+ // and query back using typeHintMapping to preserve raw bytes
+ initTable(tableName, tableCreate, new CommandSettings());
+ TableSchema schema = client.getTableSchema(tableName);
+
+ ClickHouseFormat format = ClickHouseFormat.RowBinaryWithDefaults;
+ try (InsertResponse response = client.insert(tableName, out -> {
+ RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format);
+ w.setValue(schema.nameToColumnIndex("id"), 1);
+ w.setValue(schema.nameToColumnIndex("string"), binaryData);
+ w.setValue(schema.nameToColumnIndex("fixed_string"), fixedStringData);
+ w.setValue(schema.nameToColumnIndex("fixed_string_one"), fixedStringOneData);
+ w.commitRow();
+ }, format, settings).get()) {
+ System.out.println("Rows written (Field-like): " + response.getWrittenRows());
+ }
+
+ // Also test inserting with byte[] directly via RowBinaryFormatWriter
+ try (InsertResponse response = client.insert(tableName, out -> {
+ RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format);
+ w.setValue(schema.nameToColumnIndex("id"), 2);
+ w.setString("string", binaryData);
+ w.setString("fixed_string", fixedStringData);
+ w.setString("fixed_string_one", fixedStringOneData);
+ w.commitRow();
+ }, format, settings).get()) {
+ System.out.println("Rows written (manual): " + response.getWrittenRows());
+ }
+
+ java.util.Map> typeHints = new java.util.HashMap<>();
+ typeHints.put(com.clickhouse.data.ClickHouseDataType.String, com.clickhouse.client.api.data_formats.StringValue.class);
+ typeHints.put(com.clickhouse.data.ClickHouseDataType.FixedString, com.clickhouse.client.api.data_formats.StringValue.class);
+
+ Client customClient = newClient()
+ .typeHintMapping(typeHints)
+ .build();
+
+ List records = customClient.queryAll("SELECT * FROM \"" + tableName + "\" ORDER BY id" );
+ assertEquals(records.size(), 2);
+
+ for (GenericRecord record : records) {
+ org.testng.Assert.assertEquals(record.getByteArray("string"), binaryData);
+ org.testng.Assert.assertEquals(record.getByteArray("fixed_string"), fixedStringData);
+ org.testng.Assert.assertEquals(record.getByteArray("fixed_string_one"), fixedStringOneData);
+ }
+
+ customClient.close();
+ }
+
+ @Test (groups = { "integration" })
+ public void writeAndReadImageTest() throws Exception {
+ // Demonstrates that large binary blobs (here a ~10KB PNG) survive a full write/read round-trip
+ // through a String column without being corrupted by lossy UTF-8 decoding.
+ byte[] imageData = readResource("clickhouse-logo.png");
+ org.testng.Assert.assertTrue(imageData.length > 1024, "Expected a non-trivial binary payload");
+
+ String tableName = "rowBinaryFormatWriterTest_writeAndReadImageTest_" + UUID.randomUUID().toString().replace('-', '_');
+ String tableCreate = "CREATE TABLE \"" + tableName + "\" " +
+ " (id Int32, image String) Engine = MergeTree ORDER BY id";
+
+ initTable(tableName, tableCreate, new CommandSettings());
+ TableSchema schema = client.getTableSchema(tableName);
+
+ ClickHouseFormat format = ClickHouseFormat.RowBinaryWithDefaults;
+ try (InsertResponse response = client.insert(tableName, out -> {
+ RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format);
+ w.setValue(schema.nameToColumnIndex("id"), 1);
+ w.setValue(schema.nameToColumnIndex("image"), imageData);
+ w.commitRow();
+ }, format, settings).get()) {
+ System.out.println("Image bytes written: " + imageData.length + ", rows: " + response.getWrittenRows());
+ }
+
+ Map> typeHints = new HashMap<>();
+ typeHints.put(com.clickhouse.data.ClickHouseDataType.String,
+ com.clickhouse.client.api.data_formats.StringValue.class);
+
+ try (Client customClient = newClient().typeHintMapping(typeHints).build()) {
+ // Idiomatic path: stream rows and read the binary payload via the index-based getByteArray(int).
+ try (com.clickhouse.client.api.query.QueryResponse response =
+ customClient.query("SELECT * FROM \"" + tableName + "\" ORDER BY id").get()) {
+ com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader reader =
+ customClient.newBinaryFormatReader(response);
+ org.testng.Assert.assertNotNull(reader.next());
+
+ int imageIndex = reader.getSchema().nameToColumnIndex("image");
+ byte[] streamed = reader.getByteArray(imageIndex);
+ org.testng.Assert.assertEquals(streamed, imageData,
+ "Image bytes read via getByteArray(int) must match the source exactly");
+ // The name-based overload must agree with the index-based one.
+ org.testng.Assert.assertEquals(reader.getByteArray("image"), streamed);
+ }
+
+ List records = customClient.queryAll("SELECT * FROM \"" + tableName + "\" ORDER BY id");
+ assertEquals(records.size(), 1);
+
+ GenericRecord record = records.get(0);
+ // Raw bytes must be preserved exactly, regardless of how they are accessed.
+ org.testng.Assert.assertEquals(record.getByteArray("image"), imageData,
+ "Image bytes read back via getByteArray must match the source exactly");
+
+ com.clickhouse.client.api.data_formats.StringValue value =
+ (com.clickhouse.client.api.data_formats.StringValue) record.getObject("image");
+ org.testng.Assert.assertEquals(value.size(), imageData.length);
+ org.testng.Assert.assertEquals(value.toByteArray(), imageData,
+ "StringValue must preserve the full binary payload");
+ }
+ }
+
+ private byte[] readResource(String name) throws IOException {
+ try (java.io.InputStream is = getClass().getClassLoader().getResourceAsStream(name)) {
+ org.testng.Assert.assertNotNull(is, "Test resource not found on classpath: " + name);
+ java.io.ByteArrayOutputStream buffer = new java.io.ByteArrayOutputStream();
+ byte[] chunk = new byte[8192];
+ int read;
+ while ((read = is.read(chunk)) != -1) {
+ buffer.write(chunk, 0, read);
+ }
+ return buffer.toByteArray();
+ }
+ }
@Test (groups = { "integration" })
public void writeDatetimeTests() throws Exception {
diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java b/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java
index 6ffdfea5e..5045e290c 100644
--- a/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java
+++ b/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java
@@ -199,7 +199,14 @@ public void insertPOJOAndReadBack() throws Exception {
try (QueryResponse queryResponse =
client.query("SELECT * FROM " + tableName + " LIMIT 1").get(EXECUTE_CMD_TIMEOUT, TimeUnit.SECONDS)) {
- ClickHouseBinaryFormatReader reader = client.newBinaryFormatReader(queryResponse);
+ // To read the binaryString properly as raw bytes, we must map String to StringValue
+ Client readerClient = client;
+ if (pojo.getBinaryString() != null) {
+ readerClient = newClient()
+ .typeHintMapping(java.util.Collections.singletonMap(com.clickhouse.data.ClickHouseDataType.String, com.clickhouse.client.api.data_formats.StringValue.class))
+ .build();
+ }
+ ClickHouseBinaryFormatReader reader = readerClient.newBinaryFormatReader(queryResponse);
Assert.assertNotNull(reader.next());
Assert.assertEquals(reader.getByte("byteValue"), pojo.getByteValue());
@@ -212,12 +219,17 @@ public void insertPOJOAndReadBack() throws Exception {
Assert.assertEquals(reader.getDouble("float64"), pojo.getFloat64());
Assert.assertEquals(reader.getString("string"), pojo.getString());
Assert.assertEquals(reader.getString("fixedString"), pojo.getFixedString());
+ Assert.assertEquals(reader.getByteArray("binaryString"), pojo.getBinaryString());
Assert.assertTrue(reader.getZonedDateTime("zonedDateTime").isEqual(pojo.getZonedDateTime().withNano(0)));
Assert.assertTrue(reader.getZonedDateTime("zonedDateTime64").isEqual(pojo.getZonedDateTime64()));
Assert.assertTrue(reader.getOffsetDateTime("offsetDateTime").isEqual(pojo.getOffsetDateTime().withNano(0)));
Assert.assertTrue(reader.getOffsetDateTime("offsetDateTime64").isEqual(pojo.getOffsetDateTime64()));
Assert.assertEquals(reader.getInstant("instant"), pojo.getInstant().with(ChronoField.MICRO_OF_SECOND, 0));
Assert.assertEquals(reader.getInstant("instant64"), pojo.getInstant64());
+
+ if (readerClient != client) {
+ readerClient.close();
+ }
}
}
diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java
index 6661b94bc..920f86317 100644
--- a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java
+++ b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java
@@ -5,6 +5,9 @@
import lombok.Setter;
import org.apache.commons.lang3.RandomStringUtils;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.net.Inet4Address;
@@ -63,6 +66,7 @@ public class SamplePOJO {
private String string;
private String fixedString;
+ private byte[] binaryString;
private LocalDate date;
private LocalDate date32;
@@ -145,6 +149,8 @@ public SamplePOJO() {
string = RandomStringUtils.randomAlphabetic(1, 256);
fixedString = RandomStringUtils.randomAlphabetic(3);
+ // Use a real binary blob (a PNG image) to exercise inserting/reading large non-UTF-8 String values.
+ binaryString = loadClickHouseLogo();
date = LocalDate.now();
date32 = LocalDate.now();
@@ -207,6 +213,23 @@ public SamplePOJO() {
keyword = "database";
}
+ private static byte[] loadClickHouseLogo() {
+ try (InputStream is = SamplePOJO.class.getClassLoader().getResourceAsStream("clickhouse-logo.png")) {
+ if (is == null) {
+ throw new IllegalStateException("Test resource not found on classpath: clickhouse-logo.png");
+ }
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+ byte[] chunk = new byte[8192];
+ int read;
+ while ((read = is.read(chunk)) != -1) {
+ buffer.write(chunk, 0, read);
+ }
+ return buffer.toByteArray();
+ } catch (IOException e) {
+ throw new IllegalStateException("Failed to read test resource clickhouse-logo.png", e);
+ }
+ }
+
@Override
public String toString() {
return "SamplePOJO{" +
@@ -308,6 +331,7 @@ public static String generateTableCreateSQL(String tableName) {
// "boxedBool UInt8, " +
"string String, " +
"fixedString FixedString(3), " +
+ "binaryString String, " +
"date Date, " +
"date32 Date, " +
"dateTime DateTime, " +
diff --git a/client-v2/src/test/resources/clickhouse-logo.png b/client-v2/src/test/resources/clickhouse-logo.png
new file mode 100644
index 000000000..d68e65e11
Binary files /dev/null and b/client-v2/src/test/resources/clickhouse-logo.png differ