diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..69cdb02 --- /dev/null +++ b/.gitignore @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Maven ignores +/target/ + +# IDE ignores +/.settings/ +/.project +/.classpath +/.pydevproject +/.idea +/*.iml +/nbproject/ +/nbactions.xml +/nb-configuration.xml + +# OS and editor ignores +.DS_Store +*~ +*.swp + diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..cbf560d --- /dev/null +++ b/.travis.yml @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +language: java +jdk: + - openjdk8 +notifications: + irc: + channels: + - "chat.freenode.net#fluo" + on_success: change + on_failure: always + use_notice: true + skip_join: true +# speed up builds; don't use with 'mvn install', only 'mvn verify' +cache: + directories: + - $HOME/.m2 +# skip pre-fetch of maven dependencies by making install step a NOOP +install: true +script: mvn -C clean verify javadoc:jar diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..f72f2d0 --- /dev/null +++ b/pom.xml @@ -0,0 +1,192 @@ + + + + 4.0.0 + + org.apache.fluo + fluo-parent + 2 + + fluo-bytes + 1.0.0-SNAPSHOT + jar + Apache Fluo Bytes + A library for working with bytes with an extremely stable API + 2017 + + + Apache License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0 + repo + + + + scm:git:https://gitbox.apache.org/repos/asf/fluo-bytes.git + scm:git:https://gitbox.apache.org/repos/asf/fluo-bytes.git + HEAD + https://gitbox.apache.org/repos/asf?p=fluo-bytes.git + + + GitHub + https://github.com/apache/fluo-bytes/issues + + + 20 + ${project.basedir}/src/main/build-resources/eclipse-formatter.xml + true + fluo-bytes-release + + + + + junit + junit + 4.12 + + + + + + junit + junit + test + + + + + + + net.revelc.code + apilyzer-maven-plugin + 1.0.1 + + + org[.]apache[.]bytes[.].* + + + + + + + net.revelc.code.formatter + formatter-maven-plugin + + + true + + + + + + + net.revelc.code + formatter-maven-plugin + 0.5.2 + + LF + true + ${formatter.config} + + + + org.apache.fluo + build-resources + ${build-resources.version} + + + + + format-source + + format + + process-sources + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + api-check + + check + + + + + + + + + + + + + + + true + ${project.build.directory}/checkstyle-results-imports.xml + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + analyze + + analyze-only + + + ${mdep.failOnWarning} + + + + + + net.revelc.code + apilyzer-maven-plugin + + + apilyzer + + analyze + + + + + + + + + fluo-bytes-release + + + true + true + true + true + true + + + + diff --git a/src/main/build-resources/eclipse-formatter.xml b/src/main/build-resources/eclipse-formatter.xml new file mode 100644 index 0000000..20a6796 --- /dev/null +++ b/src/main/build-resources/eclipse-formatter.xml @@ -0,0 +1,353 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/main/java/org/apache/bytes/AbstractByteSequence.java b/src/main/java/org/apache/bytes/AbstractByteSequence.java new file mode 100644 index 0000000..c0080ca --- /dev/null +++ b/src/main/java/org/apache/bytes/AbstractByteSequence.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bytes; + +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.PrimitiveIterator; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.function.IntConsumer; +import java.util.stream.IntStream; +import java.util.stream.StreamSupport; + +/** + * Abstract class for implementations of {@link ByteSequence}. + * + * @since 1.0.0 + */ +abstract class AbstractByteSequence implements ByteSequence { + + private void checkBounds(int i, String positionName) { + int len = length(); + if (i < 0 || i >= len) { + String msg = positionName + " not valid for "; + msg += len == 0 ? "empty Bytes" : "range [0," + len + ")"; + msg += ": " + i; + throw new IndexOutOfBoundsException(msg); + } + } + + protected void checkBounds(int i) { + checkBounds(i, "Index"); + } + + protected void checkBounds(int begin, int end) { + if (begin > end) { + throw new IndexOutOfBoundsException("End position (" + end + + ") occurs before begin position (" + begin + ")"); + } + checkBounds(begin, "Begin position"); + checkBounds(begin, "End position"); + } + + @Override + public IntStream intStream() { + class ByteIterator implements PrimitiveIterator.OfInt { + int cur = 0; + + @Override + public boolean hasNext() { + return cur < length(); + } + + @Override + public int nextInt() { + if (hasNext()) { + return byteAt(cur++); // upcast to int + } else { + throw new NoSuchElementException(); + } + } + + @Override + public void forEachRemaining(IntConsumer block) { + for (; cur < length(); cur++) { + block.accept(byteAt(cur)); + } + } + + } + + return StreamSupport.intStream( + () -> Spliterators.spliterator(new ByteIterator(), length(), Spliterator.ORDERED), + Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED, false); + } + + @Override + public Iterator iterator() { + return new Iterator() { + int cur = 0; + + @Override + public boolean hasNext() { + return cur < length(); + } + + @Override + public Byte next() { + if (hasNext()) { + return byteAt(cur++); // auto-boxing here + } else { + throw new NoSuchElementException(); + } + } + + }; + } + + @Override + public Spliterator spliterator() { + return Spliterators.spliterator(iterator(), length(), Spliterator.ORDERED); + } + +} diff --git a/src/main/java/org/apache/bytes/ByteSequence.java b/src/main/java/org/apache/bytes/ByteSequence.java new file mode 100644 index 0000000..6a3029b --- /dev/null +++ b/src/main/java/org/apache/bytes/ByteSequence.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bytes; + +import java.util.stream.IntStream; + +/** + * Interface representing a sequence of bytes. + * + * @since 1.0.0 + */ +public interface ByteSequence extends Iterable { + + /** + * The length of the sequence this object represents. It does not necessarily reflect the size of + * any internal data structures, such as an internal byte array. + * + * @return the length of the sequence + */ + int length(); + + /** + * Retrieve a byte at the specified index. Valid indices are between 0 (for the first + * byte) and length() - 1 for the last byte. + * + * @param index the position within the sequence to retrieve + * @return the byte at the specified index + */ + byte byteAt(int index); + + /** + * Retrieve a sequence of bytes from the original sequence. The returned sequence includes all + * bytes between begin and end - 1, inclusive. + * + * @param begin the index of the first byte to be included in the result + * @param end the index after the last byte to be included in the result + * @return a byte sequence containing the bytes between begin and + * end - 1, inclusive + */ + ByteSequence subSequence(int begin, int end); + + /** + * Return an IntStream representation of this byte sequence. This avoids auto-boxing when not + * necessary. Each int represents a single byte from the sequence. + * + * @return a stream of integers, one for each byte in the sequence + */ + IntStream intStream(); + + /** + * Compares this sequence with the provided byte array using a lexicographical comparison. + * + * @param bytes the byte array with which to compare this sequence + * @return a value following the same conventions as {@link Comparable#compareTo(Object)} + */ + int compareTo(byte[] bytes); + + /** + * Determines if the contents of this byte sequence is equivalent to the content of the provided + * byte array. + * + * @param bytes the byte array with which to compare this sequence + * @return true if the bytes they represent are the same + */ + boolean contentEquals(byte[] bytes); +} diff --git a/src/main/java/org/apache/bytes/Bytes.java b/src/main/java/org/apache/bytes/Bytes.java new file mode 100644 index 0000000..c76d125 --- /dev/null +++ b/src/main/java/org/apache/bytes/Bytes.java @@ -0,0 +1,452 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bytes; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.lang.ref.WeakReference; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Objects; + +/** + * Represents bytes in Fluo. Bytes is an immutable wrapper around a byte array. Bytes always copies + * on creation and never lets its internal byte array escape. Its modeled after Java's String which + * is an immutable wrapper around a char array. It was created because there is nothing in Java like + * it at the moment. Its very nice having this immutable type, it avoids having to do defensive + * copies to ensure correctness. Maybe one day Java will have equivalents of String, StringBuilder, + * and Charsequence for bytes. + * + *

+ * The reason Fluo did not use ByteBuffer is because its not immutable, even a read only ByteBuffer + * has a mutable position. This makes ByteBuffer unsuitable for place where an immutable data type + * is desirable, like a key for a map. + * + *

+ * Bytes.EMPTY is used to represent a Bytes object with no data. + * + * @since 1.0.0 + */ +public final class Bytes extends AbstractByteSequence implements Comparable, Serializable { + + private static final long serialVersionUID = 1L; + + private final byte[] data; + + private transient WeakReference utf8String; + + public static final Bytes EMPTY = new Bytes(new byte[0]); + + private int hashCode = 0; + + public Bytes() { + data = EMPTY.data; + } + + private Bytes(byte[] data) { + this.data = data; + } + + private Bytes(byte[] data, String utf8String) { + this.data = data; + this.utf8String = new WeakReference<>(utf8String); + } + + /** + * Gets a byte within this sequence of bytes + * + * @param i index into sequence + * @return byte + * @throws IllegalArgumentException if i is out of range + */ + @Override + public byte byteAt(int i) { + checkBounds(i); + return data[i]; + } + + /** + * Gets the length of bytes + */ + @Override + public int length() { + return data.length; + } + + /** + * Returns a portion of the Bytes object + * + * @param begin index of subsequence begin (inclusive) + * @param end index of subsequence end (exclusive) + */ + @Override + public Bytes subSequence(int begin, int end) { + checkBounds(begin, end); + return Bytes.of(data, begin, end - begin); + } + + /** + * Returns a byte array containing a copy of the bytes + */ + public byte[] toArray() { + byte[] copy = new byte[length()]; + System.arraycopy(data, 0, copy, 0, length()); + return copy; + } + + /** + * Provides a String representation, decoding these bytes with the provided charset + * + * @param charset the character set to decode these bytes + */ + public String toString(Charset charset) { + if (charset == StandardCharsets.UTF_8) { + // cache the utf8string if that's the charset provided + return toString(); + } + return new String(data, charset); + } + + /** + * Provides a String representation, decoding the bytes as UTF-8 + */ + @Override + public String toString() { + if (utf8String != null) { + String s = utf8String.get(); + if (s != null) { + return s; + } + } + + String s = new String(data, StandardCharsets.UTF_8); + utf8String = new WeakReference<>(s); + return s; + } + + /** + * @return A read only byte buffer thats backed by the internal byte array. + */ + public ByteBuffer toByteBuffer() { + return ByteBuffer.wrap(data).asReadOnlyBuffer(); + } + + /** + * @return An input stream thats backed by the internal byte array + */ + public InputStream toInputStream() { + return new ByteArrayInputStream(data); + } + + public void writeTo(OutputStream out) throws IOException { + // since Bytes is immutable, its important that we do not let the internal byte array escape + if (length() <= 32) { + int end = length(); + for (int i = 0; i < end; i++) { + out.write(data[i]); + } + } else { + out.write(toArray()); + } + } + + /** + * Compares this to the passed bytes, byte by byte, returning a negative, zero, or positive result + * if the first sequence is less than, equal to, or greater than the second. The comparison is + * performed starting with the first byte of each sequence, and proceeds until a pair of bytes + * differs, or one sequence runs out of byte (is shorter). A shorter sequence is considered less + * than a longer one. + * + * @return comparison result + */ + @Override + public final int compareTo(Bytes other) { + if (this == other) { + return 0; + } else { + return compareTo(other.data); + } + } + + @Override + public int compareTo(byte[] bytes) { + int minLen = Math.min(length(), bytes.length); + for (int i = 0, j = 0; i < minLen; i++, j++) { + int a = (this.data[i] & 0xff); + int b = (bytes[j] & 0xff); + + if (a != b) { + return a - b; + } + } + return length() - bytes.length; + } + + /** + * Returns true if, and only if, this Bytes object contains the same byte sequence as another + * Bytes object + */ + @Override + public final boolean equals(Object other) { + if (this == other) { + return true; + } + if (other instanceof Bytes) { + Bytes otherBytes = (Bytes) other; + if (hashCode != 0 && otherBytes.hashCode != 0 && hashCode != otherBytes.hashCode) { + // if both hashCodes have been pre-computed (by calling hashCode(), and fail to match, then + // they can't be equal + return false; + } + return contentEquals(otherBytes.data); + } + return false; + } + + @Override + public boolean contentEquals(byte[] bytes) { + if (data.length != bytes.length) { + // can't be equal if they differ in length; this is checked again in Arrays.equals, but we + // check here because it's a prerequisite for the last byte + // comparison optimization below + return false; + } + if (data.length == 0 && bytes.length == 0) { + // both are empty + return true; + } + int lastByte = data.length - 1; + if (data[lastByte] != bytes[lastByte]) { + // at this point, both byte arrays are non-zero and the same length; quickly compare last byte + // before checking the full array; this is particularly + // helpful for sorted data which have long prefixes in common + return false; + } + return Arrays.equals(data, bytes); + } + + @Override + public final int hashCode() { + return hashCode == 0 ? (hashCode = Arrays.hashCode(data)) : hashCode; + } + + /** + * Creates a Bytes object by copying the data of the given byte array + */ + public static final Bytes of(byte[] array) { + Objects.requireNonNull(array); + if (array.length == 0) { + return EMPTY; + } + byte[] copy = new byte[array.length]; + System.arraycopy(array, 0, copy, 0, array.length); + return new Bytes(copy); + } + + /** + * Creates a Bytes object by copying the data of a subsequence of the given byte array + * + * @param data Byte data + * @param offset Starting offset in byte array (inclusive) + * @param length Number of bytes to include + */ + public static final Bytes of(byte[] data, int offset, int length) { + Objects.requireNonNull(data); + if (length == 0) { + return EMPTY; + } + byte[] copy = new byte[length]; + System.arraycopy(data, offset, copy, 0, length); + return new Bytes(copy); + } + + /** + * Creates a Bytes object by copying the data of the given ByteBuffer. + * + * @param bb Data will be read from this ByteBuffer in such a way that its position is not + * changed. + */ + public static final Bytes of(ByteBuffer bb) { + Objects.requireNonNull(bb); + if (bb.remaining() == 0) { + return EMPTY; + } + byte[] data; + if (bb.hasArray()) { + data = + Arrays.copyOfRange(bb.array(), bb.position() + bb.arrayOffset(), + bb.limit() + bb.arrayOffset()); + } else { + data = new byte[bb.remaining()]; + // duplicate so that it does not change position + bb.duplicate().get(data); + } + return new Bytes(data); + } + + public static final Bytes of(CharSequence cs, Charset charset) { + if (cs instanceof String) { + return of((String) cs, charset); + } + + Objects.requireNonNull(cs); + Objects.requireNonNull(charset); + + if (cs instanceof String) { + return of((String) cs); + } + + Objects.requireNonNull(cs); + if (cs.length() == 0) { + return EMPTY; + } + + ByteBuffer bb = charset.encode(CharBuffer.wrap(cs)); + + // this byte buffer has never escaped so can use its byte array directly + if (bb.hasArray()) { + return Bytes.of(bb.array(), bb.position() + bb.arrayOffset(), bb.limit()); + } else { + byte[] data = new byte[bb.remaining()]; + bb.get(data); + return new Bytes(data); + } + } + + /** + * Creates a Bytes object by copying the data of the CharSequence and encoding it using UTF-8. + */ + public static final Bytes of(CharSequence cs) { + return of(cs, StandardCharsets.UTF_8); + } + + /** + * Creates a Bytes object by copying the value of the given String + */ + public static final Bytes of(String s) { + Objects.requireNonNull(s); + if (s.length() == 0) { + return EMPTY; + } + byte[] data = s.getBytes(StandardCharsets.UTF_8); + return new Bytes(data, s); + } + + /** + * Creates a Bytes object by copying the value of the given String with a given charset + */ + public static final Bytes of(String s, Charset c) { + if (c == StandardCharsets.UTF_8) { + return of(s); + } + Objects.requireNonNull(s); + Objects.requireNonNull(c); + if (s.length() == 0) { + return EMPTY; + } + byte[] data = s.getBytes(c); + return new Bytes(data); + } + + /** + * Checks if this has the passed prefix + * + * @param prefix is a Bytes object to compare to this + * @return true or false + */ + public boolean beginsWith(Bytes prefix) { + Objects.requireNonNull(prefix, "beginsWith(Bytes prefix) cannot have null parameter"); + + if (prefix.length() > this.length()) { + return false; + } else { + int end = prefix.length(); + for (int i = 0, j = 0; i < end; i++, j++) { + if (this.data[i] != prefix.data[j]) { + return false; + } + } + } + return true; + } + + /** + * Check if this has the provided suffix. + * + * @param suffix is a Bytes object to compare to this + * @return true or false + */ + public boolean endsWith(Bytes suffix) { + Objects.requireNonNull(suffix, "endsWith(Bytes suffix) cannot have null parameter"); + int suffixLen = suffix.length(); + int len = length(); + if (suffixLen > len) { + return false; + } + + // comparing from the back; TODO see if comparing forwards is faster + for (int i = suffixLen - 1, j = len - 1; i >= 0; i--, j--) { + if (suffix.data[i] != data[j]) { + return false; + } + } + return true; + } + + /** + * Copy this entire Bytes object into the destination byte array, dest, at position + * destPos. + * + * @param dest destination array into which bytes are copied + * @param destPos the position in the destination array where the subsequence will be copied + * @exception IndexOutOfBoundsException if copying would cause access of data outside array + * bounds. + * @exception NullPointerException if either src or dest is + * null. + */ + public void copyTo(byte[] dest, int destPos) { + copyTo(0, dest, destPos, length()); + } + + /** + * Copy length bytes from this Bytes object, starting at the begin + * position into the destination byte array, dest, at position destPos. + * All bytes between begin and begin+length-1, inclusive, are copied. + * The destination array must be large enough. + * + * @param begin index of the beginning of the subsequence to copy (inclusive) + * @param dest destination array into which bytes are copied + * @param destPos the position in the destination array where the subsequence will be copied + * @param length the length of the sequence to copy + * @exception IndexOutOfBoundsException if copying would cause access of data outside array + * bounds. + * @exception NullPointerException if either src or dest is + * null. + */ + public void copyTo(int begin, byte[] dest, int destPos, int length) { + // since dest is byte[], we can't get the ArrayStoreException + System.arraycopy(data, begin, dest, destPos, length); + } + +} diff --git a/src/main/java/org/apache/bytes/BytesBuilder.java b/src/main/java/org/apache/bytes/BytesBuilder.java new file mode 100644 index 0000000..d5d8d2d --- /dev/null +++ b/src/main/java/org/apache/bytes/BytesBuilder.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bytes; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +/** + * This class provides an easy, efficient, reusable mechanism for building immutable Bytes objects. + * + * @since 1.0.0 + */ +public class BytesBuilder extends AbstractByteSequence { + + private byte[] ba; + private int len; + + /** + * Construct a builder with the specified initial capacity + * + * @param initialCapacity the initial size of the internal buffer + */ + public BytesBuilder(int initialCapacity) { + ba = new byte[initialCapacity]; + len = 0; + } + + /** + * Construct a builder with the default initial capacity (32) + */ + public BytesBuilder() { + this(32); + } + + private void ensureCapacity(int min) { + if (ba.length < min) { + int newLen = ba.length * 2; + if (newLen < min) { + newLen = min; + } + + ba = Arrays.copyOf(ba, newLen); + } + } + + /** + * Converts a character sequence to bytes using UTF-8 encoding and appends the resulting bytes + * + * @return self + */ + public BytesBuilder append(CharSequence cs) { + return append(cs, StandardCharsets.UTF_8); + } + + public BytesBuilder append(CharSequence cs, Charset charset) { + if (cs instanceof String) { + return append((String) cs, charset); + } + + ByteBuffer bb = charset.encode(CharBuffer.wrap(cs)); + + int length = bb.remaining(); + ensureCapacity(len + length); + bb.get(ba, len, length); + len += length; + return this; + } + + /** + * Converts string to bytes using UTF-8 encoding and appends bytes. + * + * @return self + */ + public BytesBuilder append(String s) { + return append(s, StandardCharsets.UTF_8); + } + + public BytesBuilder append(String s, Charset charset) { + return append(s.getBytes(charset)); + } + + public BytesBuilder append(Bytes b) { + ensureCapacity(len + b.length()); + b.copyTo(ba, len); + len += b.length(); + return this; + } + + public BytesBuilder append(byte[] bytes) { + ensureCapacity(len + bytes.length); + System.arraycopy(bytes, 0, ba, len, bytes.length); + len += bytes.length; + return this; + } + + /** + * Append a single byte. + * + * @param b take the lower 8 bits and appends it. + * @return self + */ + public BytesBuilder append(int b) { + ensureCapacity(len + 1); + ba[len] = (byte) b; + len += 1; + return this; + } + + /** + * Append a section of bytes from array + * + * @param bytes - bytes to be appended + * @param offset - start of bytes to be appended + * @param length - how many bytes from 'offset' to be appended + * @return self + */ + public BytesBuilder append(byte[] bytes, int offset, int length) { + ensureCapacity(len + length); + System.arraycopy(bytes, offset, ba, len, length); + len += length; + return this; + } + + /** + * Append a sequence of bytes from an InputStream + * + * @param in data source to append from + * @param length number of bytes to read from data source + * @return self + */ + public BytesBuilder append(InputStream in, int length) throws IOException { + ensureCapacity(len + length); + new DataInputStream(in).readFully(ba, len, length); + len += length; + return this; + } + + /** + * Append data from a ByteBuffer + * + * @param bb data is read from the ByteBuffer in such a way that its position is not changed. + * @return self + */ + public BytesBuilder append(ByteBuffer bb) { + int length = bb.remaining(); + ensureCapacity(len + length); + bb.duplicate().get(ba, len, length); + len += length; + return this; + } + + /** + * Sets the point at which appending will start. This method can shrink or grow the ByteBuilder + * from its current state. If it grows it will zero pad. + */ + public void setLength(int newLen) { + if (newLen < 0) { + throw new IllegalArgumentException("Negative length passed : " + newLen); + } + if (newLen > ba.length) { + ba = Arrays.copyOf(ba, newLen); + } + + if (newLen > len) { + Arrays.fill(ba, len, newLen, (byte) 0); + } + + len = newLen; + } + + @Override + public int length() { + return len; + } + + public Bytes toBytes() { + return Bytes.of(ba, 0, len); + } + + @Override + public byte byteAt(int index) { + return ba[index]; + } + + @Override + public BytesBuilder subSequence(int begin, int end) { + checkBounds(begin, end); + int size = end - begin; + return new BytesBuilder(size).append(ba, begin, size); + } + + @Override + public int compareTo(byte[] bytes) { + // TODO Auto-generated method stub + return 0; + } + + @Override + public boolean contentEquals(byte[] bytes) { + // TODO Auto-generated method stub + return false; + } + +} diff --git a/src/test/java/org/apache/bytes/BytesTest.java b/src/test/java/org/apache/bytes/BytesTest.java new file mode 100644 index 0000000..5806728 --- /dev/null +++ b/src/test/java/org/apache/bytes/BytesTest.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bytes; + +import java.nio.ByteBuffer; + +import org.junit.Test; + +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.US_ASCII; +import static java.nio.charset.StandardCharsets.UTF_16; +import static java.nio.charset.StandardCharsets.UTF_16BE; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class BytesTest { + + private static final Bytes BYTES_EMPTY = Bytes.EMPTY; + private static final Bytes BYTES_STRING = Bytes.of("test String"); + private static final Bytes BYTES_STRING_CHARSET = Bytes.of("test String with Charset", US_ASCII); + private static final Bytes BYTES_CHARSEQ = Bytes.of(new StringBuilder("test CharSequence")); + private static final Bytes BYTES_CHARSEQ_CHARSET = Bytes.of(new StringBuilder( + "test CharSequence with Charset"), US_ASCII); + private static final Bytes BYTES_BB = Bytes.of(ByteBuffer.wrap("test ByteBuffer" + .getBytes(US_ASCII))); + private static final Bytes BYTES_ARRAY = Bytes.of("test byte[]".getBytes(US_ASCII)); + private static final Bytes BYTES_ARRAY_OFFSET = Bytes.of( + "---test byte[] with offset and length---".getBytes(US_ASCII), 3, 34); + + @Test + public void testToString() { + assertEquals("", BYTES_EMPTY.toString()); + assertEquals("test String", BYTES_STRING.toString()); + assertEquals("test String with Charset", BYTES_STRING_CHARSET.toString()); + assertEquals("test CharSequence", BYTES_CHARSEQ.toString()); + assertEquals("test CharSequence with Charset", BYTES_CHARSEQ_CHARSET.toString()); + assertEquals("test ByteBuffer", BYTES_BB.toString()); + assertEquals("test byte[]", BYTES_ARRAY.toString()); + assertEquals("test byte[] with offset and length", BYTES_ARRAY_OFFSET.toString()); + } + + @Test + public void testBeginsWith() { + assertTrue(BYTES_EMPTY.beginsWith(BYTES_EMPTY)); + assertFalse(BYTES_EMPTY.beginsWith(BYTES_STRING)); + assertTrue(BYTES_STRING.beginsWith(BYTES_EMPTY)); + assertTrue(BYTES_STRING_CHARSET.beginsWith(BYTES_STRING)); + assertFalse(BYTES_STRING.beginsWith(BYTES_STRING_CHARSET)); + assertFalse(BYTES_CHARSEQ.beginsWith(BYTES_STRING)); + assertFalse(Bytes.of("abcdef").beginsWith(Bytes.of("Abcd"))); + assertFalse(Bytes.of("abcdef").beginsWith(Bytes.of("abcD"))); + assertFalse(Bytes.of("abcdef").beginsWith(Bytes.of("abCd"))); + } + + @Test + public void testEndsWith() { + assertTrue(BYTES_EMPTY.endsWith(BYTES_EMPTY)); + assertFalse(BYTES_EMPTY.endsWith(BYTES_STRING)); + assertTrue(BYTES_STRING.endsWith(BYTES_EMPTY)); + assertTrue(BYTES_STRING.endsWith(Bytes.of("ing"))); + assertFalse(Bytes.of("ing").endsWith(BYTES_STRING)); + assertFalse(BYTES_CHARSEQ.endsWith(BYTES_STRING)); + assertFalse(Bytes.of("abcdef").endsWith(Bytes.of("Cdef"))); + assertFalse(Bytes.of("abcdef").endsWith(Bytes.of("cdeF"))); + assertFalse(Bytes.of("abcdef").endsWith(Bytes.of("cdEf"))); + } + + @Test + public void testToArray() { + assertArrayEquals("".getBytes(US_ASCII), BYTES_EMPTY.toArray()); + assertArrayEquals("test String".getBytes(UTF_8), BYTES_STRING.toArray()); + assertArrayEquals("test String with Charset".getBytes(UTF_8), BYTES_STRING_CHARSET.toArray()); + assertArrayEquals("test CharSequence".getBytes(UTF_8), BYTES_CHARSEQ.toArray()); + assertArrayEquals("test CharSequence with Charset".getBytes(UTF_8), + BYTES_CHARSEQ_CHARSET.toArray()); + assertArrayEquals("test ByteBuffer".getBytes(UTF_8), BYTES_BB.toArray()); + assertArrayEquals("test byte[]".getBytes(UTF_8), BYTES_ARRAY.toArray()); + assertArrayEquals("test byte[] with offset and length".getBytes(UTF_8), + BYTES_ARRAY_OFFSET.toArray()); + // test array with custom charset for String + assertArrayEquals("test utf16".getBytes(UTF_16), Bytes.of("test utf16", UTF_16).toArray()); + // test array with custom charset for CharSequence + assertArrayEquals("test ISO_8859_1".getBytes(ISO_8859_1), + Bytes.of(new StringBuilder("test ISO_8859_1"), ISO_8859_1).toArray()); + } + + @Test + public void testByteAt() { + String s = "1234"; + Bytes b = Bytes.of(s, UTF_16BE); + assertEquals(s.length() * 2, b.length()); // no BOM with UTF_16BE + // for each char in string, check that its corresponding bytes exist in the correct position + for (int i = 0; i < s.length(); ++i) { + int codePoint = s.codePointAt(i); + assertEquals(codePoint >> Byte.SIZE, b.byteAt(2 * i)); // check most significant bits + assertEquals(codePoint & 0xFF, b.byteAt(2 * i + 1)); // check least significant bits + } + + try { + int a = b.byteAt(b.length()); + fail("Previous line should have failed; byte: " + a); + } catch (IndexOutOfBoundsException e) { + // this is expected + } + + try { + int a = b.byteAt(-1); + fail("Previous line should have failed; byte: " + a); + } catch (IndexOutOfBoundsException e) { + // this is expected + } + } + + @Test + public void testLength() { + assertEquals(0, BYTES_EMPTY.length()); + // string length should be equal to array length, because all these use 7-bit ASCII chars with + // US_ASCII or UTF_8 encoding + assertEquals("test String".length(), BYTES_STRING.length()); + assertEquals("test String with Charset".length(), BYTES_STRING_CHARSET.length()); + assertEquals("test CharSequence".length(), BYTES_CHARSEQ.length()); + assertEquals("test CharSequence with Charset".length(), BYTES_CHARSEQ_CHARSET.length()); + assertEquals("test ByteBuffer".length(), BYTES_BB.length()); + assertEquals("test byte[]".length(), BYTES_ARRAY.length()); + assertEquals("test byte[] with offset and length".length(), BYTES_ARRAY_OFFSET.length()); + + // UTF_16 uses 2 bytes per char + assertEquals("test UTF_16BE".length() * 2, Bytes.of("test UTF_16BE", UTF_16BE).length()); + } + +}