Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ else if (type == Type.Long)
{
return UTF8String.from(getInt(row));
}

return UTF8String.from(getAny(row));
}

Expand Down Expand Up @@ -459,7 +460,7 @@ default Object valueAsObject(int row)
}
}

/** Return value as Java string for provided row */
/** Return value as Java string for provided row. Null is returned when value is null. */
default String valueAsString(int row)
{
Object value = valueAsObject(row);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
package se.kuseman.payloadbuilder.api.execution;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.nio.charset.StandardCharsets;

import org.junit.jupiter.api.Test;

class UTF8StringTest
{
@Test
void test_hash_code_equals_between_encodings()
{
UTF8String str1 = UTF8String.utf8("three".getBytes(StandardCharsets.UTF_8));
UTF8String str2 = UTF8String.latin("three".getBytes(StandardCharsets.ISO_8859_1));
assertEquals(str1.hashCode(), str2.hashCode());
}

@Test
void test_charSequence_String()
{
// String
UTF8String str = UTF8String.from("three");
assertFalse(str.isLatin1());
assertEquals(5, str.length());
assertEquals(5, str.length());
assertThrows(IndexOutOfBoundsException.class, () -> str.charAt(-1));
assertThrows(IndexOutOfBoundsException.class, () -> str.charAt(5));
assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(3, 2));
assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(-1, 2));
assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(0, 6));
assertEquals('t', str.charAt(0));
assertEquals('h', str.charAt(1));
assertEquals('r', str.charAt(2));
assertEquals('e', str.charAt(3));
assertEquals('e', str.charAt(4));
assertEquals("ee", str.subSequence(3, 5));
assertEquals("three", str.toString());
}

@Test
void test_charSequence_UTF8()
{
// CSOFF
UTF8String str = UTF8String.from("\u2705\u5F3A".getBytes(StandardCharsets.UTF_8));
assertFalse(str.isLatin1());
assertFalse(str.hasString());
assertEquals(2, str.length());
assertEquals(2, str.length());
assertThrows(IndexOutOfBoundsException.class, () -> str.charAt(-1));
assertThrows(IndexOutOfBoundsException.class, () -> str.charAt(5));
assertFalse(str.hasString());
assertEquals('\u2705', str.charAt(0));
assertEquals('\u5F3A', str.charAt(1));
assertEquals("\u2705\u5F3A", str.toString());
assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(3, 2));
assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(-1, 2));
assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(0, 6));
// CSON
}

@Test
void test_complex_UTF8()
{
UTF8String str = UTF8String.from("Зарегистрируйтесь сейчас на Десятую Международную Конференцию по".getBytes(StandardCharsets.UTF_8));
assertFalse(str.isLatin1());
assertEquals(64, str.length());
assertEquals(122, str.getByteLength());
assertEquals('р', str.charAt(10));

assertEquals(64, str.toString()
.length());
assertEquals('р', str.toString()
.charAt(10));

str = UTF8String.from("สิบสองกษัตริย์ก่อนหน้าแลถัดไป".getBytes(StandardCharsets.UTF_8));
assertFalse(str.isLatin1());
assertEquals(29, str.length());
assertEquals(87, str.getByteLength());
assertEquals('ร', str.charAt(10));

assertEquals(29, str.toString()
.length());
assertEquals('ร', str.toString()
.charAt(10));

str = UTF8String.from("ድር ቢያብር አንበሳ ያስር።".getBytes(StandardCharsets.UTF_8));
assertFalse(str.isLatin1());
assertEquals(17, str.length());
assertEquals(45, str.getByteLength());
assertEquals('በ', str.charAt(10));

assertEquals(17, str.toString()
.length());
assertEquals('በ', str.toString()
.charAt(10));

str = UTF8String.from("⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝".getBytes(StandardCharsets.UTF_8));
assertFalse(str.isLatin1());
assertEquals(15, str.length());
assertEquals(41, str.getByteLength());
assertEquals('⠋', str.charAt(10));

assertEquals(15, str.toString()
.length());
assertEquals('⠋', str.toString()
.charAt(10));

// 4 bytes chars
str = UTF8String.from("𓄉𓄫𓅒𓅤".getBytes(StandardCharsets.UTF_8));
assertEquals(8, str.length());
assertEquals(16, str.getByteLength());
assertEquals(56619, str.charAt(3));

assertEquals(8, str.toString()
.length());
assertEquals(56619, str.toString()
.charAt(3));

}

@Test
void test_charSequence_Latin1()
{
// Latin1 bytes
UTF8String str = UTF8String.from("three".getBytes(StandardCharsets.ISO_8859_1));
assertTrue(str.isLatin1());
assertFalse(str.hasString());
assertEquals(5, str.length());
assertEquals(5, str.length());
assertThrows(IndexOutOfBoundsException.class, () -> str.charAt(-1));
assertThrows(IndexOutOfBoundsException.class, () -> str.charAt(5));
assertFalse(str.hasString());
assertEquals('t', str.charAt(0));
assertEquals('h', str.charAt(1));
assertEquals('r', str.charAt(2));
assertEquals('e', str.charAt(3));
assertEquals('e', str.charAt(4));
assertEquals("three", str.toString());
assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(3, 2));
assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(-1, 2));
assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(0, 6));
}
}
Original file line number Diff line number Diff line change
@@ -1,35 +1,34 @@
package se.kuseman.payloadbuilder.bytes;

import se.kuseman.payloadbuilder.api.execution.ValueVector;
import se.kuseman.payloadbuilder.bytes.PayloadWriter.WriterSettings;

/** A writer that writes vectors who's items are references to other places in the buffer */
abstract class AReferenceVectorWriter implements VectorWriter
{
@Override
public void write(BytesWriter writer, WriteCache cache, ValueVector vector, int from, int to, int nullCount)
{
Encoding encoding = getEncoding(vector, from, to, cache.getSettings());

// Find out if we have a literal vector
if (nullCount == 0)
if (nullCount == 0
&& encoding.isLiteral)
{
boolean literal = isLiteral(vector, from, to);

if (literal)
{
writer.putByte(PayloadReader.LITERAL_ENCODING);
writer.putByte(encoding.encoding);

int valueOffset = writer.position();
int valueOffset = writer.position();

// Set writer to position after literl data
writer.position(valueOffset + AVector.REFERENCE_HEADER_SIZE);
// Set writer to position after literl data
writer.position(valueOffset + AVector.REFERENCE_HEADER_SIZE);

// Get cached position
int position = getAndCachePosition(writer, cache, vector, from);
writer.putInt(valueOffset, position);
return;
}
// Get cached position
int position = getAndCachePosition(writer, cache, vector, from);
writer.putInt(valueOffset, position);
return;
}

writer.putByte(PayloadReader.REGULAR_ENCODING);
writer.putByte(encoding.encoding);

writeMeta(writer, vector);

Expand Down Expand Up @@ -63,9 +62,35 @@ protected void writeMeta(BytesWriter writer, ValueVector vector)
{
}

/** Returns true if vector is literal */
protected abstract boolean isLiteral(ValueVector vector, int from, int to);
/**
* Return encoding byte for this vector. Reserved encodings: 0 - REGULAR_LITERAL_ENCODING 1 - REGULAR_ENCODING
*/
protected Encoding getEncoding(ValueVector vector, int from, int to, WriterSettings settings)
{
return Encoding.REGULAR;
}

/** Get and cache position of provided row */
protected abstract int getAndCachePosition(BytesWriter writer, WriteCache cache, ValueVector vector, int row);

record Encoding(byte encoding, boolean isLiteral)
{

static final Encoding REGULAR = new Encoding(PayloadReader.REGULAR_ENCODING, false);
static final Encoding REGULAR_LITERAL = new Encoding(PayloadReader.REGULAR_LITERAL_ENCODING, true);

Encoding
{
if (isLiteral
&& encoding == PayloadReader.REGULAR_ENCODING)
{
throw new IllegalArgumentException("Illegal encoding byte. " + PayloadReader.REGULAR_ENCODING + " is reserved for regular encoding");
}
else if (!isLiteral
&& encoding == PayloadReader.REGULAR_LITERAL_ENCODING)
{
throw new IllegalArgumentException("Illegal literal flag. Encoding byte: " + PayloadReader.REGULAR_LITERAL_ENCODING + " is reserved for regular literal encoding");
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@ public byte getVersion()
return ArrayVector.VERSION;
}

@Override
protected boolean isLiteral(ValueVector vector, int from, int to)
{
return false;
}

@Override
protected int getAndCachePosition(BytesWriter writer, WriteCache cache, ValueVector vector, int row)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ static ValueVector getVector(ByteBuffer buffer, int position, NullBuffer nullBuf
}

int encoding = buffer.get(position++);
if (encoding == PayloadReader.LITERAL_ENCODING)
if (encoding == PayloadReader.REGULAR_LITERAL_ENCODING)
{
byte value = buffer.get(position);
return ValueVector.literalBoolean(value == 1, size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public void write(BytesWriter writer, WriteCache cache, ValueVector vector, int

if (literal)
{
writer.putByte(PayloadReader.LITERAL_ENCODING);
writer.putByte(PayloadReader.REGULAR_LITERAL_ENCODING);
// Literal boolean then we have the literal value in the data position
writer.putByte((byte) (value ? 1
: 0));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ static ValueVector getVector(ByteBuffer buffer, int position, NullBuffer nullBuf
}

int encoding = buffer.get(position++);
if (encoding == PayloadReader.LITERAL_ENCODING)
if (encoding == PayloadReader.REGULAR_LITERAL_ENCODING)
{
int valueOffset = buffer.getInt(position);
return ValueVector.literalDateTime(EpochDateTime.from(buffer.getLong(valueOffset)), size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import se.kuseman.payloadbuilder.api.catalog.Column;
import se.kuseman.payloadbuilder.api.execution.ValueVector;
import se.kuseman.payloadbuilder.bytes.PayloadWriter.WriterSettings;

/** Writer of {@link Column.Type#DateTime} */
class DateTimeVectorWriter extends AReferenceVectorWriter
Expand All @@ -15,19 +16,29 @@ public byte getVersion()
}

@Override
protected boolean isLiteral(ValueVector vector, int from, int to)
protected Encoding getEncoding(ValueVector vector, int from, int to, WriterSettings settings)
{
long value = vector.getDateTime(from)
.getEpoch();
for (int i = from + 1; i < to; i++)
boolean firstSet = false;
long value = -1;
for (int i = from + 0; i < to; i++)
{
if (value != vector.getDateTime(i)
if (vector.isNull(i))
{
return Encoding.REGULAR;
}
else if (!firstSet)
{
value = vector.getDateTime(i)
.getEpoch();
firstSet = true;
}
else if (value != vector.getDateTime(i)
.getEpoch())
{
return false;
return Encoding.REGULAR;
}
}
return true;
return Encoding.REGULAR_LITERAL;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ static ValueVector getVector(ByteBuffer buffer, int position, ReadContext contex
}

int encoding = buffer.get(position++);
if (encoding == PayloadReader.LITERAL_ENCODING)
if (encoding == PayloadReader.REGULAR_LITERAL_ENCODING)
{
int valueOffset = buffer.getInt(position);
return ValueVector.literalDecimal(getDecimal(buffer, context, valueOffset), size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import se.kuseman.payloadbuilder.api.catalog.Column;
import se.kuseman.payloadbuilder.api.execution.Decimal;
import se.kuseman.payloadbuilder.api.execution.ValueVector;
import se.kuseman.payloadbuilder.bytes.PayloadWriter.WriterSettings;

/** Writer of {@link Column.Type#Decimal} */
class DecimalVectorWriter extends AReferenceVectorWriter
Expand All @@ -19,17 +20,26 @@ public byte getVersion()
}

@Override
protected boolean isLiteral(ValueVector vector, int from, int to)
protected Encoding getEncoding(ValueVector vector, int from, int to, WriterSettings settings)
{
Decimal value = vector.getDecimal(from);
for (int i = from + 1; i < to; i++)
Decimal value = null;
for (int i = from + 0; i < to; i++)
{
if (!value.equals(vector.getDecimal(i)))
if (vector.isNull(i))
{
return false;
return Encoding.REGULAR;
}
else if (value == null)
{
value = vector.getDecimal(i);
continue;
}
else if (!value.equals(vector.getDecimal(i)))
{
return Encoding.REGULAR;
}
}
return true;
return Encoding.REGULAR_LITERAL;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ static ValueVector getVector(ByteBuffer buffer, int position, NullBuffer nullBuf
}

int encoding = buffer.get(position++);
if (encoding == PayloadReader.LITERAL_ENCODING)
if (encoding == PayloadReader.REGULAR_LITERAL_ENCODING)
{
int valueOffset = buffer.getInt(position);
return ValueVector.literalDouble(buffer.getDouble(valueOffset), size);
Expand Down
Loading
Loading