diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java index 8494c16c3..a9a212f6a 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/ClickHouseBinaryFormatWriter.java @@ -84,6 +84,10 @@ public interface ClickHouseBinaryFormatWriter { void setString(int colIndex, String value); + void setString(String column, byte[] value); + + void setString(int colIndex, byte[] value); + void setDate(String column, LocalDate value); void setDate(int colIndex, LocalDate value); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java index ad8ee680a..303a5e7f4 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatSerializer.java @@ -126,10 +126,18 @@ public void writeString(String value) throws IOException { BinaryStreamUtils.writeString(out, value); } + public void writeString(byte[] value) throws IOException { + BinaryStreamUtils.writeString(out, value); + } + public void writeFixedString(String value, int len) throws IOException { BinaryStreamUtils.writeFixedString(out, value, len); } + public void writeFixedString(byte[] value, int len) throws IOException { + SerializerUtils.writeFixedStringBytes(out, value, len); + } + public void writeDate(ZonedDateTime value) throws IOException { SerializerUtils.writeDate(out, value, value.getZone()); } diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java index a487da1b9..2a2ecedd3 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/RowBinaryFormatWriter.java @@ -203,6 +203,16 @@ public void setString(int colIndex, String value) { setValue(colIndex, value); } + @Override + public void setString(String column, byte[] value) { + setValue(column, value); + } + + @Override + public void setString(int colIndex, byte[] value) { + setValue(colIndex, value); + } + @Override public void setDate(String column, LocalDate value) { setValue(column, value); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java new file mode 100644 index 000000000..3ca94c923 --- /dev/null +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/StringValue.java @@ -0,0 +1,169 @@ +package com.clickhouse.client.api.data_formats; + +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Objects; + +/** + * Holder for ClickHouse {@code String} or {@code FixedString} values that preserves raw bytes + * to avoid lossy decoding and unnecessary allocations. + *

+ * This is a mutable structure and must be used with care. To avoid copying, it does not + * duplicate the bytes it is given: the constructor wraps the supplied array/buffer instead of + * copying it, and {@link #toByteArray()} returns a direct reference to the backing array rather + * than a defensive copy. Consequently, mutating the source array, the array returned by + * {@link #toByteArray()}, or reading the same value concurrently while it is being modified will + * change the observed value. Callers that need an independent snapshot must copy the bytes + * themselves. + *

+ * Backed by a {@link ByteBuffer} for a richer API and future off-heap memory support. Only heap + * buffers (with an accessible backing array) are supported today; constructing a value from a + * direct (off-heap) buffer is rejected. The decoded {@link String} produced by {@link #asString()} + * is cached. + */ +public class StringValue { + + /** Charset used by {@link #asString()} and {@link #toString()} when no charset is provided. */ + public static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; + + private final ByteBuffer buffer; + + private final Charset defaultCharset; + + private volatile String cached; + + /** + * Creates a value backed by the given bytes. The array is wrapped, not copied, so it must not be + * modified after being passed in. + * + * @param bytes raw value bytes (not null) + */ + public StringValue(byte[] bytes) { + this(bytes, DEFAULT_CHARSET); + } + + /** + * Creates a value backed by the given bytes using the provided default charset. The array is wrapped, + * not copied, so it must not be modified after being passed in. + * + * @param bytes raw value bytes (not null) + * @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null) + */ + public StringValue(byte[] bytes, Charset defaultCharset) { + this(ByteBuffer.wrap(bytes), defaultCharset); + } + + /** + * Creates a value backed by the remaining content of the given buffer using the provided default charset. + * The buffer is referenced, not copied, so its content must not be modified afterwards. + * + * @param buffer backing heap buffer (not null); its remaining bytes define the value + * @param defaultCharset charset used by {@link #asString()} and {@link #toString()} (not null) + * @throws IllegalArgumentException if the buffer is a direct (off-heap) buffer with no accessible array + */ + public StringValue(ByteBuffer buffer, Charset defaultCharset) { + Objects.requireNonNull(buffer, "buffer cannot be null"); + Objects.requireNonNull(defaultCharset, "charset is required to convert buffer to String"); + + if (!buffer.hasArray()) { + throw new IllegalArgumentException("Can work only with heap buffer."); + } + + // Keep an independent view so external position/limit changes do not affect this value. + this.buffer = buffer.slice(); + this.defaultCharset = defaultCharset; + } + + /** + * Returns a read-only view over the raw bytes of this value. The returned buffer is independent + * (its own position/limit) and shares no mutable state with this value. + * + * @return read-only buffer positioned at the first byte of the value + */ + public ByteBuffer asByteBuffer() { + return buffer.asReadOnlyBuffer(); + } + + /** + * Returns a direct reference to the backing byte array of this value (no copy is made). + *

+ * The returned array is the live backing storage: mutating it mutates this value, and any change + * to the underlying bytes is reflected here. Callers that need an independent, immutable snapshot + * must copy the result themselves. + * + * @return the backing array holding the value bytes + */ + public byte[] toByteArray() { + return buffer.array(); + } + + /** + * @return number of bytes in this value + */ + public int size() { + return buffer.remaining(); + } + + /** + * @return {@code true} if the value has no bytes + */ + public boolean isEmpty() { + return buffer.remaining() == 0; + } + + /** + * Decodes the value using the default charset (UTF-8 unless another was provided at construction). + * The result is cached so repeated calls do not allocate a new string. + * + * @return decoded string + */ + public String asString() { + String s = cached; + if (s == null) { + s = decode(defaultCharset); + cached = s; + } + return s; + } + + /** + * Decodes the value using the given charset. The result is cached only when the charset matches the + * default charset of this value. + * + * @param charset charset to decode with (not null) + * @return decoded string + */ + public String asString(Charset charset) { + Objects.requireNonNull(charset, "charset cannot be null"); + if (charset.equals(defaultCharset)) { + return asString(); + } + return decode(charset); + } + + private String decode(Charset charset) { + return new String(buffer.array(), charset); + } + + @Override + public String toString() { + return asString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof StringValue)) { + return false; + } + return buffer.equals(((StringValue) o).buffer); + } + + @Override + public int hashCode() { + return buffer.hashCode(); + } +} diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java index e5892748d..54b907749 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/AbstractBinaryFormatReader.java @@ -4,6 +4,7 @@ import com.clickhouse.client.api.ClientException; import com.clickhouse.client.api.DataTypeUtils; import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.internal.DataTypeConverter; import com.clickhouse.client.api.internal.MapUtils; import com.clickhouse.client.api.internal.ServerSettings; @@ -532,8 +533,9 @@ private T getPrimitiveArray(int index, Class componentType) { } return (T)array; } else if (componentType == byte.class) { - if (value instanceof String) { - return (T) ((String) value).getBytes(StandardCharsets.UTF_8); + byte[] bytes = stringLikeToBytes(value); + if (bytes != null) { + return (T) bytes; } else if (value instanceof InetAddress) { return (T) ((InetAddress) value).getAddress(); } @@ -676,6 +678,24 @@ public Instant getInstant(int index) { throw new ClientException("Column of type " + column.getDataType() + " cannot be converted to Instant"); } + /** + * Converts a string-like value into its raw bytes. For a {@link StringValue} the original bytes are + * returned without re-encoding (so binary content is preserved). For a {@link String} the bytes are + * produced using UTF-8, matching the historical behaviour. Returns {@code null} when the value is not + * a string-like type so callers can fall back to other handling. + * + * @param value value to convert + * @return raw bytes or {@code null} if the value is not string-like + */ + public static byte[] stringLikeToBytes(Object value) { + if (value instanceof StringValue) { + return ((StringValue) value).toByteArray(); + } else if (value instanceof String) { + return ((String) value).getBytes(StandardCharsets.UTF_8); + } + return null; + } + static Instant objectToInstant(Object value) { if (value instanceof LocalDateTime) { LocalDateTime dateTime = (LocalDateTime) value; @@ -866,6 +886,10 @@ public String[] getStringArray(int index) { BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) value; if (array.itemType == String.class) { return (String[]) array.getArray(); + } else if (array.itemType == StringValue.class) { + StringValue[] stringValues = (StringValue[]) array.getArray(); + return Arrays.stream(stringValues) + .map(sv -> sv == null ? null : sv.asString()).toArray(String[]::new); } else if (array.itemType == BinaryStreamReader.EnumValue.class) { BinaryStreamReader.EnumValue[] enumValues = (BinaryStreamReader.EnumValue[]) array.getArray(); return Arrays.stream(enumValues).map(BinaryStreamReader.EnumValue::getName).toArray(String[]::new); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java index 8a6b76a5a..6d0f19971 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/BinaryStreamReader.java @@ -2,6 +2,7 @@ import com.clickhouse.client.api.ClientException; import com.clickhouse.client.api.DataTypeUtils; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.data.ClickHouseColumn; import com.clickhouse.data.ClickHouseDataType; import com.clickhouse.data.ClickHouseEnum; @@ -55,6 +56,8 @@ public class BinaryStreamReader { private final Class arrayDefaultTypeHint; + private final boolean stringAsBinaryDefault; + private static final int SB_INIT_SIZE = 100; private ClickHouseColumn lastDataColumn = null; @@ -69,7 +72,7 @@ public class BinaryStreamReader { * @param jsonAsString - use string to serialize/deserialize JSON columns * @param typeHintMapping - what type use as hint if hint is not set or may not be known. */ - BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator, boolean jsonAsString, Map> typeHintMapping) { + public BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator, boolean jsonAsString, Map> typeHintMapping) { this.log = log == null ? NOPLogger.NOP_LOGGER : log; this.timeZone = timeZone; this.input = input; @@ -78,6 +81,20 @@ public class BinaryStreamReader { this.arrayDefaultTypeHint = typeHintMapping == null || typeHintMapping.isEmpty()? NO_TYPE_HINT : typeHintMapping.get(ClickHouseDataType.Array); + this.stringAsBinaryDefault = typeHintMapping != null && + typeHintMapping.get(ClickHouseDataType.String) == StringValue.class; + } + + private boolean readStringAsBinary(Class typeHint) { + if (typeHint != null) { + if (typeHint == StringValue.class) { + return true; + } + if (typeHint == String.class) { + return false; + } + } + return stringAsBinaryDefault; } /** @@ -121,12 +138,18 @@ public T readValue(ClickHouseColumn column, Class typeHint) throws IOExce switch (dataType) { // Primitives case FixedString: { + if (readStringAsBinary(typeHint)) { + return (T) new StringValue(readStringBytes(input, precision)); + } byte[] bytes = precision > STRING_BUFF.length ? new byte[precision] : STRING_BUFF; readNBytes(input, bytes, 0, precision); return (T) new String(bytes, 0, precision, StandardCharsets.UTF_8); } case String: { + if (readStringAsBinary(typeHint)) { + return (T) readStringValue(); + } return (T) readString(); } case Int8: @@ -1119,17 +1142,41 @@ public String readString() throws IOException { } /** - * Reads a decimal value from input stream. + * Reads a string from the internal input stream preserving the raw bytes as a {@link StringValue}. + * Unlike {@link #readString()} this does not decode bytes into a {@link String} and never reuses the + * shared buffer, so the value is safe to keep after the next read. + * + * @return string value holding the raw bytes + * @throws IOException when IO error occurs + */ + public StringValue readStringValue() throws IOException { + return new StringValue(readStringBytes(input, readVarInt(input))); + } + + /** + * Reads the raw bytes of a string from the input stream given its length. + * * @param input - source of bytes - * @return String + * @param len - number of bytes to read + * @return byte[] containing the raw string bytes * @throws IOException when IO error occurs */ - public static String readString(InputStream input) throws IOException { - int len = readVarInt(input); + public static byte[] readStringBytes(InputStream input, int len) throws IOException { if (len == 0) { - return ""; + return new byte[0]; } - return new String(readNBytes(input, len), StandardCharsets.UTF_8); + return readNBytes(input, len); + } + + /** + * Reads a string value from input stream. + * @param input - source of bytes + * @return String + * @throws IOException when IO error occurs + */ + public static String readString(InputStream input) throws IOException { + byte[] bytes = readStringBytes(input, readVarInt(input)); + return bytes.length == 0 ? "" : new String(bytes, StandardCharsets.UTF_8); } public static int readByteOrEOF(InputStream input) throws IOException { diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java index 75f7ea314..a8708cbfb 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/MapBackedRecord.java @@ -2,8 +2,8 @@ import com.clickhouse.client.api.ClientException; import com.clickhouse.client.api.DataTypeUtils; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.internal.DataTypeConverter; -import com.clickhouse.client.api.metadata.NoSuchColumnException; import com.clickhouse.client.api.metadata.TableSchema; import com.clickhouse.client.api.query.GenericRecord; import com.clickhouse.client.api.query.NullValueException; @@ -13,7 +13,6 @@ import com.clickhouse.data.value.ClickHouseGeoPointValue; import com.clickhouse.data.value.ClickHouseGeoPolygonValue; import com.clickhouse.data.value.ClickHouseGeoRingValue; -import com.google.common.collect.ImmutableList; import java.math.BigDecimal; import java.math.BigInteger; @@ -276,6 +275,14 @@ private T getPrimitiveArray(String colName) { @Override public byte[] getByteArray(String colName) { + Object value = readValue(colName); + if (value == null) { + return null; + } + byte[] bytes = AbstractBinaryFormatReader.stringLikeToBytes(value); + if (bytes != null) { + return bytes; + } return getPrimitiveArray(colName); } @@ -319,6 +326,10 @@ public String[] getStringArray(String colName) { BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) value; if (array.itemType == String.class) { return (String[]) array.getArray(); + } else if (array.itemType == StringValue.class) { + StringValue[] stringValues = (StringValue[]) array.getArray(); + return Arrays.stream(stringValues) + .map(sv -> sv == null ? null : sv.asString()).toArray(String[]::new); } else if (array.itemType == BinaryStreamReader.EnumValue.class) { BinaryStreamReader.EnumValue[] enumValues = (BinaryStreamReader.EnumValue[]) array.getArray(); return Arrays.stream(enumValues).map(BinaryStreamReader.EnumValue::getName).toArray(String[]::new); diff --git a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java index cc7e91792..a4e8fb598 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/data_formats/internal/SerializerUtils.java @@ -2,6 +2,7 @@ import com.clickhouse.client.api.Client; import com.clickhouse.client.api.ClientException; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.serde.POJOFieldDeserializer; import com.clickhouse.data.ClickHouseAggregateFunction; import com.clickhouse.data.ClickHouseColumn; @@ -552,10 +553,22 @@ private static void serializePrimitiveData(OutputStream stream, Object value, Cl BinaryStreamUtils.writeBoolean(stream, (Boolean) value); break; case String: - BinaryStreamUtils.writeString(stream, convertToString(value)); + if (value instanceof byte[]) { + BinaryStreamUtils.writeString(stream, (byte[]) value); + } else if (value instanceof StringValue) { + BinaryStreamUtils.writeString(stream, ((StringValue) value).toByteArray()); + } else { + BinaryStreamUtils.writeString(stream, convertToString(value)); + } break; case FixedString: - BinaryStreamUtils.writeFixedString(stream, convertToString(value), column.getPrecision()); + if (value instanceof byte[]) { + writeFixedStringBytes(stream, (byte[]) value, column.getPrecision()); + } else if (value instanceof StringValue) { + writeFixedStringBytes(stream, ((StringValue) value).toByteArray(), column.getPrecision()); + } else { + BinaryStreamUtils.writeFixedString(stream, convertToString(value), column.getPrecision()); + } break; case Date: writeDate(stream, value, ZoneId.of("UTC")); // TODO: check @@ -912,6 +925,26 @@ public static String convertToString(Object value) { return java.lang.String.valueOf(value); } + /** + * Writes raw bytes as a ClickHouse {@code FixedString(length)} value. The bytes are written as-is and + * right-padded with zero bytes when shorter than {@code length}. + * + * @param stream output stream + * @param value raw bytes + * @param length fixed string length + * @throws IOException when failed to write to the stream + */ + public static void writeFixedStringBytes(OutputStream stream, byte[] value, int length) throws IOException { + if (value.length > length) { + throw new IllegalArgumentException("Value of length " + value.length + + " is longer than FixedString(" + length + ")"); + } + stream.write(value); + for (int i = value.length; i < length; i++) { + stream.write(0); + } + } + public static > Set parseEnumList(String value, Class enumType) { Set values = new HashSet<>(); for (StringTokenizer causes = new StringTokenizer(value, Client.VALUES_LIST_DELIMITER); causes.hasMoreTokens(); ) { diff --git a/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java b/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java index b1f6a8520..0faa2a7a8 100644 --- a/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java +++ b/client-v2/src/main/java/com/clickhouse/client/api/internal/DataTypeConverter.java @@ -2,6 +2,7 @@ import com.clickhouse.client.api.ClickHouseException; import com.clickhouse.client.api.DataTypeUtils; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; import com.clickhouse.data.ClickHouseColumn; import com.clickhouse.data.ClickHouseDataType; @@ -85,7 +86,9 @@ public String stringToString(Object bytesOrString, ClickHouseColumn column) { if (column.isArray()) { sb.append(QUOTE); } - if (bytesOrString instanceof CharSequence) { + if (bytesOrString instanceof StringValue) { + sb.append(((StringValue) bytesOrString).asString()); + } else if (bytesOrString instanceof CharSequence) { sb.append(((CharSequence) bytesOrString)); } else if (bytesOrString instanceof byte[]) { sb.append(new String((byte[]) bytesOrString)); diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java new file mode 100644 index 000000000..feecf1d9f --- /dev/null +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/StringValueTests.java @@ -0,0 +1,332 @@ +package com.clickhouse.client.api.data_formats; + +import com.clickhouse.client.api.data_formats.internal.AbstractBinaryFormatReader; +import com.clickhouse.client.api.data_formats.internal.BinaryStreamReader; +import com.clickhouse.client.api.data_formats.internal.SerializerUtils; +import com.clickhouse.data.ClickHouseColumn; +import com.clickhouse.data.ClickHouseDataType; +import com.clickhouse.data.format.BinaryStreamUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.Map; +import java.util.TimeZone; + +public class StringValueTests { + + private static final Map> STRING_AS_BINARY = + Collections.singletonMap(ClickHouseDataType.String, (Class) StringValue.class); + + private static BinaryStreamReader reader(byte[] input, Map> hints) { + return new BinaryStreamReader(new ByteArrayInputStream(input), TimeZone.getTimeZone("UTC"), null, + new BinaryStreamReader.DefaultByteBufferAllocator(), false, hints); + } + + // ---- StringValue API ---- + + @Test + public void testStringValueApiBasics() { + byte[] bytes = "hello world".getBytes(StandardCharsets.UTF_8); + StringValue sv = new StringValue(bytes); + + Assert.assertEquals(sv.size(), bytes.length); + Assert.assertFalse(sv.isEmpty()); + Assert.assertEquals(sv.asString(), "hello world"); + Assert.assertEquals(sv.toString(), "hello world"); + Assert.assertEquals(sv.toByteArray(), bytes); + } + + @Test + public void testToByteArrayReturnsBackingArrayReference() { + byte[] bytes = {1, 2, 3, 4}; + StringValue sv = new StringValue(bytes); + byte[] backing = sv.toByteArray(); + // No copy is made: the returned array is the live backing storage and mutating it mutates the value. + Assert.assertSame(backing, bytes, "toByteArray() must return the backing array without copying"); + backing[0] = 42; + Assert.assertEquals(sv.toByteArray()[0], 42, "Mutating the returned array mutates the value (no copy)"); + } + + @Test + public void testAsByteBufferIsReadOnly() { + StringValue sv = new StringValue(new byte[]{1, 2, 3}); + ByteBuffer buffer = sv.asByteBuffer(); + Assert.assertTrue(buffer.isReadOnly()); + Assert.assertEquals(buffer.remaining(), 3); + } + + @Test + public void testAsStringIsCached() { + StringValue sv = new StringValue("cached".getBytes(StandardCharsets.UTF_8)); + String first = sv.asString(); + String second = sv.asString(); + Assert.assertSame(first, second, "asString() should cache and return the same instance"); + } + + @Test + public void testAsStringWithCharset() { + String original = "Привет, мир"; + StringValue sv = new StringValue(original.getBytes(StandardCharsets.UTF_16)); + Assert.assertEquals(sv.asString(StandardCharsets.UTF_16), original); + } + + @Test + public void testEqualsAndHashCode() { + StringValue a = new StringValue("abc".getBytes(StandardCharsets.UTF_8)); + StringValue b = new StringValue("abc".getBytes(StandardCharsets.UTF_8)); + StringValue c = new StringValue("abd".getBytes(StandardCharsets.UTF_8)); + + // Reflexive + Assert.assertEquals(a, a); + // Equal content -> equal value and equal hash code + Assert.assertEquals(a, b); + Assert.assertEquals(b, a, "equals must be symmetric"); + Assert.assertEquals(a.hashCode(), b.hashCode()); + // Different content -> not equal + Assert.assertNotEquals(a, c); + } + + @Test + public void testEqualsRejectsNullAndOtherTypes() { + StringValue a = new StringValue("abc".getBytes(StandardCharsets.UTF_8)); + Assert.assertFalse(a.equals(null), "A value must never equal null"); + Assert.assertFalse(a.equals("abc"), "A value must not equal a raw String of the same text"); + Assert.assertNotEquals(a, new Object()); + } + + @Test + public void testEqualsIgnoresDefaultCharset() { + // equals/hashCode are defined on the raw bytes, so the default charset must not affect them. + byte[] bytes = "abc".getBytes(StandardCharsets.UTF_8); + StringValue utf8 = new StringValue(bytes, StandardCharsets.UTF_8); + StringValue latin1 = new StringValue("abc".getBytes(StandardCharsets.UTF_8), StandardCharsets.ISO_8859_1); + Assert.assertEquals(utf8, latin1, "Values with identical bytes must be equal regardless of default charset"); + Assert.assertEquals(utf8.hashCode(), latin1.hashCode()); + } + + @Test + public void testEqualsDistinguishesByContentAndLength() { + StringValue ab = new StringValue(new byte[]{1, 2}); + StringValue abc = new StringValue(new byte[]{1, 2, 3}); + StringValue empty = new StringValue(new byte[0]); + + // Same prefix but different length must not be equal. + Assert.assertNotEquals(ab, abc); + Assert.assertNotEquals(abc, ab); + // Empty values are only equal to other empty values. + Assert.assertEquals(empty, new StringValue(new byte[0])); + Assert.assertNotEquals(empty, ab); + } + + @Test + public void testEqualsIsConsistentWithBinaryReads() throws IOException { + // Two independently read StringValues over the same bytes must compare equal. + byte[] binary = new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0x80, (byte) 0x7F}; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, binary); + byte[] wire = baos.toByteArray(); + + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + StringValue first = reader(wire, STRING_AS_BINARY).readValue(column); + StringValue second = reader(wire, STRING_AS_BINARY).readValue(column); + + Assert.assertEquals(first, second); + Assert.assertEquals(first.hashCode(), second.hashCode()); + Assert.assertEquals(first, new StringValue(binary)); + } + + @Test + public void testEmptyValue() { + StringValue sv = new StringValue(new byte[0]); + Assert.assertTrue(sv.isEmpty()); + Assert.assertEquals(sv.size(), 0); + Assert.assertEquals(sv.asString(), ""); + Assert.assertEquals(sv.toByteArray().length, 0); + } + + // ---- Reading String columns as StringValue ---- + + @DataProvider(name = "charsetStrings") + private Object[][] charsetStrings() { + return new Object[][]{ + {"plain ascii", StandardCharsets.UTF_8}, + {"unicode: Привет 你好 🚀", StandardCharsets.UTF_8}, + {"latin1 café", StandardCharsets.ISO_8859_1}, + {"utf16 текст", StandardCharsets.UTF_16}, + {" leading and trailing ", StandardCharsets.UTF_8}, + {"", StandardCharsets.UTF_8}, + }; + } + + @Test(dataProvider = "charsetStrings") + public void testReadStringAsStringValuePreservesBytes(String value, Charset charset) throws IOException { + byte[] encoded = value.getBytes(charset); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, encoded); // binary string write (raw bytes) + + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertTrue(read instanceof StringValue, "Expected StringValue but got " + read.getClass()); + StringValue sv = (StringValue) read; + Assert.assertEquals(sv.toByteArray(), encoded, "Raw bytes must be preserved"); + Assert.assertEquals(sv.asString(charset), value, "Decoding with the source charset must round-trip"); + } + + @Test + public void testReadBinaryNonUtf8IsPreserved() throws IOException { + // Bytes that are not valid UTF-8 (e.g. a binary hash). Decoding as UTF-8 would be lossy. + byte[] binary = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF, + (byte) 0xFF, (byte) 0x00, (byte) 0x80, (byte) 0xC0, (byte) 0xFE}; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, binary); + + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + StringValue sv = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertEquals(sv.toByteArray(), binary, "Binary content must be preserved exactly"); + Assert.assertEquals(AbstractBinaryFormatReader.stringLikeToBytes(sv), binary, + "Shared string->bytes conversion must preserve binary content"); + } + + @Test + public void testFixedStringAsStringValue() throws IOException { + byte[] binary = new byte[]{(byte) 0x01, (byte) 0xFF, (byte) 0x00, (byte) 0x10, (byte) 0x80}; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + baos.write(binary); // FixedString(5) is written as exactly 5 raw bytes + + ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(5)"); + Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertTrue(read instanceof StringValue); + Assert.assertEquals(((StringValue) read).toByteArray(), binary); + } + + @Test + public void testReadStringArrayAsStringValue() throws IOException { + // Array(String) elements must be preserved as StringValue (including non-UTF-8 content). + byte[][] elements = { + "plain".getBytes(StandardCharsets.UTF_8), + "Привет".getBytes(StandardCharsets.UTF_8), + new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF}, + new byte[0], + }; + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeVarInt(baos, elements.length); + for (byte[] element : elements) { + BinaryStreamUtils.writeString(baos, element); + } + + ClickHouseColumn column = ClickHouseColumn.of("a", "Array(String)"); + Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertTrue(read instanceof BinaryStreamReader.ArrayValue, + "Expected ArrayValue but got " + read.getClass()); + BinaryStreamReader.ArrayValue array = (BinaryStreamReader.ArrayValue) read; + Assert.assertEquals(array.length(), elements.length); + + Object raw = array.getArray(); + Assert.assertTrue(raw instanceof StringValue[], "Array items must be StringValue, got " + raw.getClass()); + StringValue[] values = (StringValue[]) raw; + for (int i = 0; i < elements.length; i++) { + Assert.assertEquals(values[i].toByteArray(), elements[i], "Element " + i + " bytes must be preserved"); + } + } + + @Test + public void testReadStringMapAsStringValue() throws IOException { + // Map(String, String) keys and values must be preserved as StringValue. + byte[][] keys = { + "k1".getBytes(StandardCharsets.UTF_8), + "ключ".getBytes(StandardCharsets.UTF_8), + }; + byte[][] vals = { + "v1".getBytes(StandardCharsets.UTF_8), + new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0x80}, + }; + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeVarInt(baos, keys.length); + for (int i = 0; i < keys.length; i++) { + BinaryStreamUtils.writeString(baos, keys[i]); + BinaryStreamUtils.writeString(baos, vals[i]); + } + + ClickHouseColumn column = ClickHouseColumn.of("m", "Map(String, String)"); + Object read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + + Assert.assertTrue(read instanceof Map, "Expected Map but got " + read.getClass()); + Map map = (Map) read; + Assert.assertEquals(map.size(), keys.length); + + int i = 0; + for (Map.Entry entry : map.entrySet()) { + Assert.assertTrue(entry.getKey() instanceof StringValue, "Map key must be a StringValue"); + Assert.assertTrue(entry.getValue() instanceof StringValue, "Map value must be a StringValue"); + Assert.assertEquals(((StringValue) entry.getKey()).toByteArray(), keys[i], "Key " + i + " bytes"); + Assert.assertEquals(((StringValue) entry.getValue()).toByteArray(), vals[i], "Value " + i + " bytes"); + i++; + } + + // Lookup by an equal StringValue key must work (relies on equals/hashCode over raw bytes). + Assert.assertEquals(((StringValue) map.get(new StringValue(keys[0]))).toByteArray(), vals[0]); + } + + @Test + public void testDefaultBehaviorReturnsString() throws IOException { + byte[] encoded = "still a string".getBytes(StandardCharsets.UTF_8); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryStreamUtils.writeString(baos, encoded); + + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + Object read = reader(baos.toByteArray(), AbstractBinaryFormatReader.NO_TYPE_HINT_MAPPING).readValue(column); + + Assert.assertTrue(read instanceof String, "Without a type hint Strings must still be returned as String"); + Assert.assertEquals(read, "still a string"); + } + + // ---- Writing binary String values ---- + + @Test + public void testWriteByteArrayToStringRoundTrip() throws IOException { + byte[] binary = new byte[]{(byte) 0x00, (byte) 0xFF, (byte) 0xAB, (byte) 0xCD, (byte) 0x7F}; + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + SerializerUtils.serializeData(baos, binary, column); + StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Assert.assertEquals(read.toByteArray(), binary); + } + + @Test + public void testWriteStringValueToStringRoundTrip() throws IOException { + byte[] binary = new byte[]{(byte) 0x10, (byte) 0x20, (byte) 0xFE, (byte) 0xFF, (byte) 0x00}; + StringValue value = new StringValue(binary); + ClickHouseColumn column = ClickHouseColumn.of("s", "String"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + SerializerUtils.serializeData(baos, value, column); + StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Assert.assertEquals(read.toByteArray(), binary); + } + + @Test + public void testWriteByteArrayToFixedStringRoundTrip() throws IOException { + byte[] binary = new byte[]{(byte) 0xAA, (byte) 0xBB, (byte) 0xCC}; + ClickHouseColumn column = ClickHouseColumn.of("s", "FixedString(3)"); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + SerializerUtils.serializeData(baos, binary, column); + StringValue read = reader(baos.toByteArray(), STRING_AS_BINARY).readValue(column); + Assert.assertEquals(read.toByteArray(), binary); + } +} diff --git a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java index b3e9f0676..1ffdb0782 100644 --- a/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/api/data_formats/internal/BaseReaderTests.java @@ -7,6 +7,7 @@ import com.clickhouse.client.api.Client; import com.clickhouse.client.api.command.CommandSettings; import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader; +import com.clickhouse.client.api.data_formats.StringValue; import com.clickhouse.client.api.enums.Protocol; import com.clickhouse.client.api.query.GenericRecord; import com.clickhouse.client.api.query.QueryResponse; @@ -572,4 +573,121 @@ private Client.Builder newClient() { .setPassword(ClickHouseServerForTest.getPassword()); } + @Test(groups = {"integration"}) + public void testReadingStringValue() throws Exception { + final String table = "test_reading_stringvalue"; + + client.execute("DROP TABLE IF EXISTS " + table).get(); + client.execute("CREATE TABLE " + table + " (id Int32, s String, fs FixedString(5), e FixedString(1)) ENGINE = Memory").get(); + client.execute("INSERT INTO " + table + " VALUES (1, 'hello', 'world', 'a'), (2, 'ClickHouse', 'Rocks', 'b')").get(); + + java.util.Map> typeHints = new java.util.HashMap<>(); + typeHints.put(ClickHouseDataType.String, StringValue.class); + typeHints.put(ClickHouseDataType.FixedString, StringValue.class); + + Client customClient = newClient() + .typeHintMapping(typeHints) + .build(); + + try { + try (QueryResponse response = customClient.query("SELECT * FROM " + table + " ORDER BY id").get()) { + ClickHouseBinaryFormatReader reader = customClient.newBinaryFormatReader(response); + + // Test reading multiple strings in a row and check that their content differs + Assert.assertNotNull(reader.next()); + Assert.assertEquals(reader.getInteger("id"), 1); + StringValue s1 = (StringValue) reader.readValue("s"); + StringValue fs1 = (StringValue) reader.readValue("fs"); + StringValue e1 = (StringValue) reader.readValue("e"); + + Assert.assertEquals(s1.asString(), "hello"); + Assert.assertEquals(fs1.asString(), "world"); + Assert.assertEquals(e1.asString(), "a"); + + // Test getting read value multiple times + Assert.assertSame(s1, reader.readValue("s"), "Consecutive reads for the same row should return the same instance or equal value"); + Assert.assertEquals(reader.getString("s"), "hello"); + // Test reading byte[] from String columns + Assert.assertEquals(reader.getByteArray("s"), "hello".getBytes()); + Assert.assertEquals(reader.getByteArray("fs"), "world".getBytes()); + Assert.assertEquals(reader.getByteArray("e"), "a".getBytes()); + + Assert.assertNotNull(reader.next()); + Assert.assertEquals(reader.getInteger("id"), 2); + StringValue s2 = (StringValue) reader.readValue("s"); + StringValue fs2 = (StringValue) reader.readValue("fs"); + StringValue e2 = (StringValue) reader.readValue("e"); + + Assert.assertEquals(s2.asString(), "ClickHouse"); + Assert.assertEquals(fs2.asString(), "Rocks"); + Assert.assertEquals(e2.asString(), "b"); + + Assert.assertNotEquals(s1.asString(), s2.asString()); + Assert.assertNotEquals(fs1.asString(), fs2.asString()); + } + + // test queryAll with string value + List records = customClient.queryAll("SELECT * FROM " + table + " ORDER BY id"); + Assert.assertEquals(records.size(), 2); + + Assert.assertEquals(records.get(0).getInteger("id"), 1); + Assert.assertEquals(records.get(0).getString("s"), "hello"); + Assert.assertEquals(records.get(0).getString("fs"), "world"); + Assert.assertEquals(records.get(0).getByteArray("s"), "hello".getBytes()); + Assert.assertEquals(records.get(0).getByteArray("fs"), "world".getBytes()); + Assert.assertEquals(records.get(0).getByteArray("e"), "a".getBytes()); + + Assert.assertEquals(records.get(1).getInteger("id"), 2); + Assert.assertEquals(records.get(1).getString("s"), "ClickHouse"); + Assert.assertEquals(records.get(1).getString("fs"), "Rocks"); + Assert.assertEquals(records.get(1).getByteArray("s"), "ClickHouse".getBytes()); + Assert.assertEquals(records.get(1).getByteArray("fs"), "Rocks".getBytes()); + Assert.assertEquals(records.get(1).getByteArray("e"), "b".getBytes()); + } finally { + customClient.close(); + } + } + + /** + * Regression test for https://github.com/ClickHouse/clickhouse-java/issues/1397: a String value that holds + * arbitrary binary content (here a SHA-512 hash, which is almost never valid UTF-8) must be read back byte + * for byte instead of being mangled by lossy UTF-8 decoding. + */ + @Test(groups = {"integration"}) + public void testReadingBinaryStringFromHash() throws Exception { + final String message = "abc"; + final byte[] expectedHash = java.security.MessageDigest.getInstance("SHA-512") + .digest(message.getBytes(java.nio.charset.StandardCharsets.UTF_8)); + Assert.assertEquals(expectedHash.length, 64); + + java.util.Map> typeHints = new java.util.HashMap<>(); + typeHints.put(ClickHouseDataType.String, StringValue.class); + typeHints.put(ClickHouseDataType.FixedString, StringValue.class); + + Client customClient = newClient() + .typeHintMapping(typeHints) + .build(); + + final String query = "SELECT SHA512('" + message + "') AS hash"; + try { + try (QueryResponse response = customClient.query(query).get()) { + ClickHouseBinaryFormatReader reader = customClient.newBinaryFormatReader(response); + Assert.assertNotNull(reader.next()); + + StringValue hash = (StringValue) reader.readValue("hash"); + Assert.assertEquals(hash.size(), expectedHash.length); + Assert.assertEquals(hash.toByteArray(), expectedHash, + "Binary hash bytes must be preserved exactly"); + // getByteArray must agree with the raw StringValue bytes + Assert.assertEquals(reader.getByteArray("hash"), expectedHash); + } + + List records = customClient.queryAll(query); + Assert.assertEquals(records.size(), 1); + Assert.assertEquals(records.get(0).getByteArray("hash"), expectedHash, + "Binary hash read via queryAll must match the locally computed digest"); + } finally { + customClient.close(); + } + } } \ No newline at end of file diff --git a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java index 1a0ee2287..2ed1889c9 100644 --- a/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java +++ b/client-v2/src/test/java/com/clickhouse/client/datatypes/RowBinaryFormatWriterTest.java @@ -158,6 +158,11 @@ private static void assertEqualsKinda(Object actual, Object expected) { expected = ((BigDecimal) expected).stripTrailingZeros(); } + if (actual instanceof byte[] && expected instanceof byte[]) { + org.testng.Assert.assertEquals((byte[]) actual, (byte[]) expected); + return; + } + assertEquals(String.valueOf(actual), String.valueOf(expected)); } @@ -376,6 +381,141 @@ public void writeStringsTest() throws Exception { writeTest(tableName, tableCreate, rows); } + @Test (groups = { "integration" }) + public void writeBinaryStringsTest() throws Exception { + String tableName = "rowBinaryFormatWriterTest_writeBinaryStringsTests_" + UUID.randomUUID().toString().replace('-', '_'); + String tableCreate = "CREATE TABLE \"" + tableName + "\" " + + " (id Int32, " + + " string String, " + + " fixed_string FixedString(5), " + + " fixed_string_one FixedString(1) " + + " ) Engine = MergeTree ORDER BY id"; + + byte[] binaryData = new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF, (byte) 0x00, (byte) 0xFF, (byte) 0x80}; + byte[] fixedStringData = new byte[]{(byte) 0xAA, (byte) 0xBB, (byte) 0xCC, (byte) 0xDD, (byte) 0xEE}; + byte[] fixedStringOneData = new byte[]{(byte) 0x7F}; + + // Instead of writeTest which reads back using default string decoding, we write manually + // and query back using typeHintMapping to preserve raw bytes + initTable(tableName, tableCreate, new CommandSettings()); + TableSchema schema = client.getTableSchema(tableName); + + ClickHouseFormat format = ClickHouseFormat.RowBinaryWithDefaults; + try (InsertResponse response = client.insert(tableName, out -> { + RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format); + w.setValue(schema.nameToColumnIndex("id"), 1); + w.setValue(schema.nameToColumnIndex("string"), binaryData); + w.setValue(schema.nameToColumnIndex("fixed_string"), fixedStringData); + w.setValue(schema.nameToColumnIndex("fixed_string_one"), fixedStringOneData); + w.commitRow(); + }, format, settings).get()) { + System.out.println("Rows written (Field-like): " + response.getWrittenRows()); + } + + // Also test inserting with byte[] directly via RowBinaryFormatWriter + try (InsertResponse response = client.insert(tableName, out -> { + RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format); + w.setValue(schema.nameToColumnIndex("id"), 2); + w.setString("string", binaryData); + w.setString("fixed_string", fixedStringData); + w.setString("fixed_string_one", fixedStringOneData); + w.commitRow(); + }, format, settings).get()) { + System.out.println("Rows written (manual): " + response.getWrittenRows()); + } + + java.util.Map> typeHints = new java.util.HashMap<>(); + typeHints.put(com.clickhouse.data.ClickHouseDataType.String, com.clickhouse.client.api.data_formats.StringValue.class); + typeHints.put(com.clickhouse.data.ClickHouseDataType.FixedString, com.clickhouse.client.api.data_formats.StringValue.class); + + Client customClient = newClient() + .typeHintMapping(typeHints) + .build(); + + List records = customClient.queryAll("SELECT * FROM \"" + tableName + "\" ORDER BY id" ); + assertEquals(records.size(), 2); + + for (GenericRecord record : records) { + org.testng.Assert.assertEquals(record.getByteArray("string"), binaryData); + org.testng.Assert.assertEquals(record.getByteArray("fixed_string"), fixedStringData); + org.testng.Assert.assertEquals(record.getByteArray("fixed_string_one"), fixedStringOneData); + } + + customClient.close(); + } + + @Test (groups = { "integration" }) + public void writeAndReadImageTest() throws Exception { + // Demonstrates that large binary blobs (here a ~10KB PNG) survive a full write/read round-trip + // through a String column without being corrupted by lossy UTF-8 decoding. + byte[] imageData = readResource("clickhouse-logo.png"); + org.testng.Assert.assertTrue(imageData.length > 1024, "Expected a non-trivial binary payload"); + + String tableName = "rowBinaryFormatWriterTest_writeAndReadImageTest_" + UUID.randomUUID().toString().replace('-', '_'); + String tableCreate = "CREATE TABLE \"" + tableName + "\" " + + " (id Int32, image String) Engine = MergeTree ORDER BY id"; + + initTable(tableName, tableCreate, new CommandSettings()); + TableSchema schema = client.getTableSchema(tableName); + + ClickHouseFormat format = ClickHouseFormat.RowBinaryWithDefaults; + try (InsertResponse response = client.insert(tableName, out -> { + RowBinaryFormatWriter w = new RowBinaryFormatWriter(out, schema, format); + w.setValue(schema.nameToColumnIndex("id"), 1); + w.setValue(schema.nameToColumnIndex("image"), imageData); + w.commitRow(); + }, format, settings).get()) { + System.out.println("Image bytes written: " + imageData.length + ", rows: " + response.getWrittenRows()); + } + + Map> typeHints = new HashMap<>(); + typeHints.put(com.clickhouse.data.ClickHouseDataType.String, + com.clickhouse.client.api.data_formats.StringValue.class); + + try (Client customClient = newClient().typeHintMapping(typeHints).build()) { + // Idiomatic path: stream rows and read the binary payload via the index-based getByteArray(int). + try (com.clickhouse.client.api.query.QueryResponse response = + customClient.query("SELECT * FROM \"" + tableName + "\" ORDER BY id").get()) { + com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader reader = + customClient.newBinaryFormatReader(response); + org.testng.Assert.assertNotNull(reader.next()); + + int imageIndex = reader.getSchema().nameToColumnIndex("image"); + byte[] streamed = reader.getByteArray(imageIndex); + org.testng.Assert.assertEquals(streamed, imageData, + "Image bytes read via getByteArray(int) must match the source exactly"); + // The name-based overload must agree with the index-based one. + org.testng.Assert.assertEquals(reader.getByteArray("image"), streamed); + } + + List records = customClient.queryAll("SELECT * FROM \"" + tableName + "\" ORDER BY id"); + assertEquals(records.size(), 1); + + GenericRecord record = records.get(0); + // Raw bytes must be preserved exactly, regardless of how they are accessed. + org.testng.Assert.assertEquals(record.getByteArray("image"), imageData, + "Image bytes read back via getByteArray must match the source exactly"); + + com.clickhouse.client.api.data_formats.StringValue value = + (com.clickhouse.client.api.data_formats.StringValue) record.getObject("image"); + org.testng.Assert.assertEquals(value.size(), imageData.length); + org.testng.Assert.assertEquals(value.toByteArray(), imageData, + "StringValue must preserve the full binary payload"); + } + } + + private byte[] readResource(String name) throws IOException { + try (java.io.InputStream is = getClass().getClassLoader().getResourceAsStream(name)) { + org.testng.Assert.assertNotNull(is, "Test resource not found on classpath: " + name); + java.io.ByteArrayOutputStream buffer = new java.io.ByteArrayOutputStream(); + byte[] chunk = new byte[8192]; + int read; + while ((read = is.read(chunk)) != -1) { + buffer.write(chunk, 0, read); + } + return buffer.toByteArray(); + } + } @Test (groups = { "integration" }) public void writeDatetimeTests() throws Exception { diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java b/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java index 6ffdfea5e..5045e290c 100644 --- a/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java +++ b/client-v2/src/test/java/com/clickhouse/client/insert/InsertTests.java @@ -199,7 +199,14 @@ public void insertPOJOAndReadBack() throws Exception { try (QueryResponse queryResponse = client.query("SELECT * FROM " + tableName + " LIMIT 1").get(EXECUTE_CMD_TIMEOUT, TimeUnit.SECONDS)) { - ClickHouseBinaryFormatReader reader = client.newBinaryFormatReader(queryResponse); + // To read the binaryString properly as raw bytes, we must map String to StringValue + Client readerClient = client; + if (pojo.getBinaryString() != null) { + readerClient = newClient() + .typeHintMapping(java.util.Collections.singletonMap(com.clickhouse.data.ClickHouseDataType.String, com.clickhouse.client.api.data_formats.StringValue.class)) + .build(); + } + ClickHouseBinaryFormatReader reader = readerClient.newBinaryFormatReader(queryResponse); Assert.assertNotNull(reader.next()); Assert.assertEquals(reader.getByte("byteValue"), pojo.getByteValue()); @@ -212,12 +219,17 @@ public void insertPOJOAndReadBack() throws Exception { Assert.assertEquals(reader.getDouble("float64"), pojo.getFloat64()); Assert.assertEquals(reader.getString("string"), pojo.getString()); Assert.assertEquals(reader.getString("fixedString"), pojo.getFixedString()); + Assert.assertEquals(reader.getByteArray("binaryString"), pojo.getBinaryString()); Assert.assertTrue(reader.getZonedDateTime("zonedDateTime").isEqual(pojo.getZonedDateTime().withNano(0))); Assert.assertTrue(reader.getZonedDateTime("zonedDateTime64").isEqual(pojo.getZonedDateTime64())); Assert.assertTrue(reader.getOffsetDateTime("offsetDateTime").isEqual(pojo.getOffsetDateTime().withNano(0))); Assert.assertTrue(reader.getOffsetDateTime("offsetDateTime64").isEqual(pojo.getOffsetDateTime64())); Assert.assertEquals(reader.getInstant("instant"), pojo.getInstant().with(ChronoField.MICRO_OF_SECOND, 0)); Assert.assertEquals(reader.getInstant("instant64"), pojo.getInstant64()); + + if (readerClient != client) { + readerClient.close(); + } } } diff --git a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java index 6661b94bc..920f86317 100644 --- a/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java +++ b/client-v2/src/test/java/com/clickhouse/client/insert/SamplePOJO.java @@ -5,6 +5,9 @@ import lombok.Setter; import org.apache.commons.lang3.RandomStringUtils; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; import java.math.BigDecimal; import java.math.BigInteger; import java.net.Inet4Address; @@ -63,6 +66,7 @@ public class SamplePOJO { private String string; private String fixedString; + private byte[] binaryString; private LocalDate date; private LocalDate date32; @@ -145,6 +149,8 @@ public SamplePOJO() { string = RandomStringUtils.randomAlphabetic(1, 256); fixedString = RandomStringUtils.randomAlphabetic(3); + // Use a real binary blob (a PNG image) to exercise inserting/reading large non-UTF-8 String values. + binaryString = loadClickHouseLogo(); date = LocalDate.now(); date32 = LocalDate.now(); @@ -207,6 +213,23 @@ public SamplePOJO() { keyword = "database"; } + private static byte[] loadClickHouseLogo() { + try (InputStream is = SamplePOJO.class.getClassLoader().getResourceAsStream("clickhouse-logo.png")) { + if (is == null) { + throw new IllegalStateException("Test resource not found on classpath: clickhouse-logo.png"); + } + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + byte[] chunk = new byte[8192]; + int read; + while ((read = is.read(chunk)) != -1) { + buffer.write(chunk, 0, read); + } + return buffer.toByteArray(); + } catch (IOException e) { + throw new IllegalStateException("Failed to read test resource clickhouse-logo.png", e); + } + } + @Override public String toString() { return "SamplePOJO{" + @@ -308,6 +331,7 @@ public static String generateTableCreateSQL(String tableName) { // "boxedBool UInt8, " + "string String, " + "fixedString FixedString(3), " + + "binaryString String, " + "date Date, " + "date32 Date, " + "dateTime DateTime, " + diff --git a/client-v2/src/test/resources/clickhouse-logo.png b/client-v2/src/test/resources/clickhouse-logo.png new file mode 100644 index 000000000..d68e65e11 Binary files /dev/null and b/client-v2/src/test/resources/clickhouse-logo.png differ