From 243a0d69a63fe6ddbcc85fdf254e1a3ea158257d Mon Sep 17 00:00:00 2001 From: Christopher Tubbs Date: Thu, 10 Aug 2017 19:19:00 -0400 Subject: [PATCH 1/8] Add build tooling for Maven, Travis CI, and Git --- .gitignore | 34 ++++++++++++ .travis.yml | 32 +++++++++++ pom.xml | 157 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 223 insertions(+) create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 pom.xml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..69cdb02 --- /dev/null +++ b/.gitignore @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Maven ignores +/target/ + +# IDE ignores +/.settings/ +/.project +/.classpath +/.pydevproject +/.idea +/*.iml +/nbproject/ +/nbactions.xml +/nb-configuration.xml + +# OS and editor ignores +.DS_Store +*~ +*.swp + diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..454e7cb --- /dev/null +++ b/.travis.yml @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +language: java +jdk: + - oraclejdk8 +notifications: + irc: + channels: + - "chat.freenode.net#fluo" + on_success: change + on_failure: always + use_notice: true + skip_join: true +# speed up builds; don't use with 'mvn install', only 'mvn verify' +cache: + directories: + - $HOME/.m2 +# skip pre-fetch of maven dependencies by making install step a NOOP +install: true +script: mvn clean verify javadoc:jar diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..f2ff5f8 --- /dev/null +++ b/pom.xml @@ -0,0 +1,157 @@ + + + + 4.0.0 + + org.apache.fluo + fluo-parent + 1-incubating + + fluo-bytes + 1.0.0-SNAPSHOT + jar + Apache Fluo Bytes + A library for working with bytes with an extremely stable API + 2017 + + + Apache License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0 + repo + + + + scm:git:https://gitbox.apache.org/repos/asf/fluo-bytes.git + scm:git:https://gitbox.apache.org/repos/asf/fluo-bytes.git + HEAD + https://gitbox.apache.org/repos/asf?p=fluo-bytes.git + + + GitHub + https://github.com/apache/fluo-bytes/issues + + + 20 + true + fluo-bytes-release + + + + + junit + junit + 4.12 + + + + + + junit + junit + test + + + + + + + net.revelc.code + apilyzer-maven-plugin + 1.0.1 + + + org[.]apache[.]bytes[.].* + + + + + + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + api-check + + check + + + + + + + + + + + + + + + true + ${project.build.directory}/checkstyle-results-imports.xml + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + analyze + + analyze-only + + + ${mdep.failOnWarning} + + + + + + net.revelc.code + apilyzer-maven-plugin + + + apilyzer + + analyze + + + + + + + + + fluo-bytes-release + + + true + true + true + true + true + + + + From 7ac47ea8fd1e716454f451b22f8dbe15d4c45588 Mon Sep 17 00:00:00 2001 From: Christopher Tubbs Date: Tue, 15 Aug 2017 21:55:46 -0400 Subject: [PATCH 2/8] WIP --- .../apache/bytes/AbstractByteSequence.java | 116 +++++ .../java/org/apache/bytes/ByteSequence.java | 35 ++ src/main/java/org/apache/bytes/Bytes.java | 408 ++++++++++++++++++ .../java/org/apache/bytes/BytesBuilder.java | 219 ++++++++++ .../org/apache/bytes/ByteSequenceTest.java | 29 ++ 5 files changed, 807 insertions(+) create mode 100644 src/main/java/org/apache/bytes/AbstractByteSequence.java create mode 100644 src/main/java/org/apache/bytes/ByteSequence.java create mode 100644 src/main/java/org/apache/bytes/Bytes.java create mode 100644 src/main/java/org/apache/bytes/BytesBuilder.java create mode 100644 src/test/java/org/apache/bytes/ByteSequenceTest.java diff --git a/src/main/java/org/apache/bytes/AbstractByteSequence.java b/src/main/java/org/apache/bytes/AbstractByteSequence.java new file mode 100644 index 0000000..7307c04 --- /dev/null +++ b/src/main/java/org/apache/bytes/AbstractByteSequence.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bytes; + +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.PrimitiveIterator; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.function.IntConsumer; +import java.util.stream.IntStream; +import java.util.stream.StreamSupport; + +/** + * @since 1.0.0 + */ +abstract class AbstractByteSequence implements ByteSequence { + + private void checkBounds(int i, String positionName) { + int len = length(); + if (i < 0 || i >= len) { + String msg = positionName + " not valid for "; + msg += len == 0 ? "empty Bytes" : "range [0," + len + ")"; + msg += ": " + i; + throw new IndexOutOfBoundsException(msg); + } + } + + protected void checkBounds(int i) { + checkBounds(i, "Index"); + } + + protected void checkBounds(int begin, int end) { + if (begin > end) { + throw new IndexOutOfBoundsException("End position (" + end + ") occurs before begin position (" + begin + ")"); + } + checkBounds(begin, "Begin position"); + checkBounds(begin, "End position"); + } + + @Override + public IntStream bytes() { + // return int stream to avoid auto-boxing + class ByteIterator implements PrimitiveIterator.OfInt { + int cur = 0; + + @Override + public boolean hasNext() { + return cur < length(); + } + + @Override + public int nextInt() { + if (hasNext()) { + return byteAt(cur++); // upcast to int + } else { + throw new NoSuchElementException(); + } + } + + @Override + public void forEachRemaining(IntConsumer block) { + for (; cur < length(); cur++) { + block.accept(byteAt(cur)); + } + } + + } + + return StreamSupport.intStream(() -> Spliterators.spliterator(new ByteIterator(), length(), Spliterator.ORDERED), + Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED, false); + } + + @Override + public Iterator iterator() { + return new Iterator() { + int cur = 0; + + @Override + public boolean hasNext() { + return cur < length(); + } + + @Override + public Byte next() { + if (hasNext()) { + return byteAt(cur++); // auto-boxing here + } else { + throw new NoSuchElementException(); + } + } + + }; + } + + @Override + public Spliterator spliterator() { + return Spliterators.spliterator(iterator(), length(), Spliterator.ORDERED); + } + +} diff --git a/src/main/java/org/apache/bytes/ByteSequence.java b/src/main/java/org/apache/bytes/ByteSequence.java new file mode 100644 index 0000000..ae7d798 --- /dev/null +++ b/src/main/java/org/apache/bytes/ByteSequence.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bytes; + +import java.util.stream.IntStream; + +/** + * @since 1.0.0 + */ +public interface ByteSequence extends Iterable { + + int length(); + + byte byteAt(int index); + + ByteSequence subSequence(int start, int end); + + IntStream bytes(); + +} diff --git a/src/main/java/org/apache/bytes/Bytes.java b/src/main/java/org/apache/bytes/Bytes.java new file mode 100644 index 0000000..398b787 --- /dev/null +++ b/src/main/java/org/apache/bytes/Bytes.java @@ -0,0 +1,408 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bytes; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.lang.ref.WeakReference; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Objects; + +/** + * Represents bytes in Fluo. Bytes is an immutable wrapper around a byte array. Bytes always copies on creation and never lets its internal byte array escape. + * Its modeled after Java's String which is an immutable wrapper around a char array. It was created because there is nothing in Java like it at the moment. Its + * very nice having this immutable type, it avoids having to do defensive copies to ensure correctness. Maybe one day Java will have equivalents of String, + * StringBuilder, and Charsequence for bytes. + * + *

+ * The reason Fluo did not use ByteBuffer is because its not immutable, even a read only ByteBuffer has a mutable position. This makes ByteBuffer unsuitable for + * place where an immutable data type is desirable, like a key for a map. + * + *

+ * Bytes.EMPTY is used to represent a Bytes object with no data. + * + * @since 1.0.0 + */ +public final class Bytes extends AbstractByteSequence implements Comparable, Serializable { + + private static final long serialVersionUID = 1L; + + final byte[] data; + + private transient WeakReference utf8String; + + public static final Bytes EMPTY = new Bytes(new byte[0]); + + private int hashCode = 0; + + public Bytes() { + data = EMPTY.data; + } + + private Bytes(byte[] data) { + this.data = data; + } + + private Bytes(byte[] data, String utf8String) { + this.data = data; + this.utf8String = new WeakReference<>(utf8String); + } + + /** + * Gets a byte within this sequence of bytes + * + * @param i + * index into sequence + * @return byte + * @throws IllegalArgumentException + * if i is out of range + */ + @Override + public byte byteAt(int i) { + checkBounds(i); + return data[i]; + } + + /** + * Gets the length of bytes + */ + @Override + public int length() { + return data.length; + } + + /** + * Returns a portion of the Bytes object + * + * @param start + * index of subsequence start (inclusive) + * @param end + * index of subsequence end (exclusive) + */ + @Override + public Bytes subSequence(int begin, int end) { + checkBounds(begin, end); + return Bytes.of(data, begin, end - begin); + } + + /** + * Returns a byte array containing a copy of the bytes + */ + public byte[] toArray() { + byte[] copy = new byte[length()]; + System.arraycopy(data, 0, copy, 0, length()); + return copy; + } + + /** + * Provides a String representation, decoding these bytes with the provided charset + * + * @param charset + * the character set to decode these bytes + */ + public String toString(Charset charset) { + if (charset == StandardCharsets.UTF_8) { + // cache the utf8string if that's the charset provided + return toString(); + } + return new String(data, charset); + } + + /** + * Creates UTF-8 String using Bytes data + */ + @Override + public String toString() { + if (utf8String != null) { + String s = utf8String.get(); + if (s != null) { + return s; + } + } + + String s = new String(data, StandardCharsets.UTF_8); + utf8String = new WeakReference<>(s); + return s; + } + + /** + * @return A read only byte buffer thats backed by the internal byte array. + */ + public ByteBuffer toByteBuffer() { + return ByteBuffer.wrap(data).asReadOnlyBuffer(); + } + + /** + * @return An input stream thats backed by the internal byte array + */ + public InputStream toInputStream() { + return new ByteArrayInputStream(data); + } + + public void writeTo(OutputStream out) throws IOException { + // since Bytes is immutable, its important that we do not let the internal byte array escape + if (length() <= 32) { + int end = length(); + for (int i = 0; i < end; i++) { + out.write(data[i]); + } + } else { + out.write(toArray()); + } + } + + /** + * Compares this to the passed bytes, byte by byte, returning a negative, zero, or positive result if the first sequence is less than, equal to, or greater + * than the second. The comparison is performed starting with the first byte of each sequence, and proceeds until a pair of bytes differs, or one sequence + * runs out of byte (is shorter). A shorter sequence is considered less than a longer one. + * + * @return comparison result + */ + @Override + public final int compareTo(Bytes other) { + if (this == other) { + return 0; + } else { + int minLen = Math.min(length(), other.length()); + for (int i = 0, j = 0; i < minLen; i++, j++) { + int a = (this.data[i] & 0xff); + int b = (other.data[j] & 0xff); + + if (a != b) { + return a - b; + } + } + return length() - other.length(); + } + } + + /** + * Returns true if, and only if, this Bytes object contains the same byte sequence as another Bytes object + */ + @Override + public final boolean equals(Object other) { + return this == other || ((other instanceof Bytes) && Arrays.equals(data, ((Bytes) other).data)); + } + + @Override + public final int hashCode() { + return hashCode == 0 ? (hashCode = Arrays.hashCode(data)) : hashCode; + } + + /** + * Creates a Bytes object by copying the data of the given byte array + */ + public static final Bytes of(byte[] array) { + Objects.requireNonNull(array); + if (array.length == 0) { + return EMPTY; + } + byte[] copy = new byte[array.length]; + System.arraycopy(array, 0, copy, 0, array.length); + return new Bytes(copy); + } + + /** + * Creates a Bytes object by copying the data of a subsequence of the given byte array + * + * @param data + * Byte data + * @param offset + * Starting offset in byte array (inclusive) + * @param length + * Number of bytes to include + */ + public static final Bytes of(byte[] data, int offset, int length) { + Objects.requireNonNull(data); + if (length == 0) { + return EMPTY; + } + byte[] copy = new byte[length]; + System.arraycopy(data, offset, copy, 0, length); + return new Bytes(copy); + } + + /** + * Creates a Bytes object by copying the data of the given ByteBuffer. + * + * @param bb + * Data will be read from this ByteBuffer in such a way that its position is not changed. + */ + public static final Bytes of(ByteBuffer bb) { + Objects.requireNonNull(bb); + if (bb.remaining() == 0) { + return EMPTY; + } + byte[] data; + if (bb.hasArray()) { + data = Arrays.copyOfRange(bb.array(), bb.position() + bb.arrayOffset(), bb.limit() + bb.arrayOffset()); + } else { + data = new byte[bb.remaining()]; + // duplicate so that it does not change position + bb.duplicate().get(data); + } + return new Bytes(data); + } + + /** + * Creates a Bytes object by copying the data of the CharSequence and encoding it using UTF-8. + */ + public static final Bytes of(CharSequence cs) { + if (cs instanceof String) { + return of((String) cs); + } + + Objects.requireNonNull(cs); + if (cs.length() == 0) { + return EMPTY; + } + + ByteBuffer bb = StandardCharsets.UTF_8.encode(CharBuffer.wrap(cs)); + + if (bb.hasArray()) { + // this byte buffer has never escaped so can use its byte array directly + return Bytes.of(bb.array(), bb.position() + bb.arrayOffset(), bb.limit()); + } else { + byte[] data = new byte[bb.remaining()]; + bb.get(data); + return new Bytes(data); + } + } + + /** + * Creates a Bytes object by copying the value of the given String + */ + public static final Bytes of(String s) { + Objects.requireNonNull(s); + if (s.length() == 0) { + return EMPTY; + } + byte[] data = s.getBytes(StandardCharsets.UTF_8); + return new Bytes(data, s); + } + + /** + * Creates a Bytes object by copying the value of the given String with a given charset + */ + public static final Bytes of(String s, Charset c) { + Objects.requireNonNull(s); + Objects.requireNonNull(c); + if (s.length() == 0) { + return EMPTY; + } + byte[] data = s.getBytes(c); + return new Bytes(data); + } + + /** + * Checks if this has the passed prefix + * + * @param prefix + * is a Bytes object to compare to this + * @return true or false + */ + public boolean startsWith(Bytes prefix) { + Objects.requireNonNull(prefix, "startWith(Bytes prefix) cannot have null parameter"); + + if (prefix.length() > this.length()) { + return false; + } else { + int end = prefix.length(); + for (int i = 0, j = 0; i < end; i++, j++) { + if (this.data[i] != prefix.data[j]) { + return false; + } + } + } + return true; + } + + /** + * Checks if this has the passed suffix + * + * @param suffix + * is a Bytes object to compare to this + * @return true or false + */ + public boolean endsWith(Bytes suffix) { + Objects.requireNonNull(suffix, "endsWith(Bytes suffix) cannot have null parameter"); + int startOffset = this.length() - suffix.length(); + + if (startOffset < 0) { + return false; + } else { + int end = startOffset + suffix.length(); + for (int i = startOffset, j = 0; i < end; i++, j++) { + if (this.data[i] != suffix.data[j]) { + return false; + } + } + } + return true; + } + + /** + * Copy entire Bytes object to specific byte array. Uses the specified offset in the dest byte array to start the copy. + * + * @param dest + * destination array + * @param destPos + * starting position in the destination data. + * @exception IndexOutOfBoundsException + * if copying would cause access of data outside array bounds. + * @exception NullPointerException + * if either src or dest is null. + */ + public void copyTo(byte[] dest, int destPos) { + arraycopy(0, dest, destPos, this.length()); + } + + /** + * Copy a subsequence of Bytes to specific byte array. Uses the specified offset in the dest byte array to start the copy. + * + * @param start + * index of subsequence start (inclusive) + * @param end + * index of subsequence end (exclusive) + * @param dest + * destination array + * @param destPos + * starting position in the destination data. + * @exception IndexOutOfBoundsException + * if copying would cause access of data outside array bounds. + * @exception NullPointerException + * if either src or dest is null. + */ + public void copyTo(int start, int end, byte[] dest, int destPos) { + // this.subSequence(start, end).copyTo(dest, destPos) would allocate another Bytes object + arraycopy(start, dest, destPos, end - start); + } + + private void arraycopy(int start, byte[] dest, int destPos, int length) { + // since dest is byte[], we can't get the ArrayStoreException + System.arraycopy(this.data, start, dest, destPos, length); + } + +} diff --git a/src/main/java/org/apache/bytes/BytesBuilder.java b/src/main/java/org/apache/bytes/BytesBuilder.java new file mode 100644 index 0000000..17076c7 --- /dev/null +++ b/src/main/java/org/apache/bytes/BytesBuilder.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bytes; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +/** + * This class provides an easy, efficient, reusable mechanism for building immutable Bytes objects. + * + * @since 1.0.0 + */ +public class BytesBuilder extends AbstractByteSequence { + + private byte[] ba; + private int len; + + /** + * Construct a builder with the specified initial capacity + * + * @param initialCapacity + * the initial size of the internal buffer + */ + public BytesBuilder(int initialCapacity) { + ba = new byte[initialCapacity]; + len = 0; + } + + /** + * Construct a builder with the default initial capacity (32) + */ + public BytesBuilder() { + this(32); + } + + private void ensureCapacity(int min) { + if (ba.length < min) { + int newLen = ba.length * 2; + if (newLen < min) { + newLen = min; + } + + ba = Arrays.copyOf(ba, newLen); + } + } + + /** + * Converts a character sequence to bytes using UTF-8 encoding and appends the resulting bytes + * + * @return self + */ + public BytesBuilder append(CharSequence cs) { + return append(cs, StandardCharsets.UTF_8); + } + + public BytesBuilder append(CharSequence cs, Charset charset) { + if (cs instanceof String) { + return append(cs, charset); + } + + ByteBuffer bb = charset.encode(CharBuffer.wrap(cs)); + + int length = bb.remaining(); + ensureCapacity(len + length); + bb.get(ba, len, length); + len += length; + return this; + } + + /** + * Converts string to bytes using UTF-8 encoding and appends bytes. + * + * @return self + */ + public BytesBuilder append(String s) { + return append(s, StandardCharsets.UTF_8); + } + + public BytesBuilder append(String s, Charset charset) { + return append(s.getBytes(charset)); + } + + public BytesBuilder append(Bytes b) { + ensureCapacity(len + b.length()); + // note: Bytes always uses all of its internal array, so source offset is 0 here + System.arraycopy(b.data, 0, ba, len, b.length()); + len += b.length(); + return this; + } + + public BytesBuilder append(byte[] bytes) { + ensureCapacity(len + bytes.length); + System.arraycopy(bytes, 0, ba, len, bytes.length); + len += bytes.length; + return this; + } + + /** + * Append a single byte. + * + * @param b + * take the lower 8 bits and appends it. + * @return self + */ + public BytesBuilder append(int b) { + ensureCapacity(len + 1); + ba[len] = (byte) b; + len += 1; + return this; + } + + /** + * Append a section of bytes from array + * + * @param bytes + * - bytes to be appended + * @param offset + * - start of bytes to be appended + * @param length + * - how many bytes from 'offset' to be appended + * @return self + */ + public BytesBuilder append(byte[] bytes, int offset, int length) { + ensureCapacity(len + length); + System.arraycopy(bytes, offset, ba, len, length); + len += length; + return this; + } + + /** + * Append a sequence of bytes from an InputStream + * + * @param in + * data source to append from + * @param length + * number of bytes to read from data source + * @return self + */ + public BytesBuilder append(InputStream in, int length) throws IOException { + ensureCapacity(len + length); + new DataInputStream(in).readFully(ba, len, length); + len += length; + return this; + } + + /** + * Append data from a ByteBuffer + * + * @param bb + * data is read from the ByteBuffer in such a way that its position is not changed. + * @return self + */ + public BytesBuilder append(ByteBuffer bb) { + int length = bb.remaining(); + ensureCapacity(len + length); + bb.duplicate().get(ba, len, length); + len += length; + return this; + } + + /** + * Sets the point at which appending will start. This method can shrink or grow the ByteBuilder from its current state. If it grows it will zero pad. + */ + public void setLength(int newLen) { + if (newLen < 0) { + throw new IllegalArgumentException("Negative length passed : " + newLen); + } + if (newLen > ba.length) { + ba = Arrays.copyOf(ba, newLen); + } + + if (newLen > len) { + Arrays.fill(ba, len, newLen, (byte) 0); + } + + len = newLen; + } + + @Override + public int length() { + return len; + } + + public Bytes toBytes() { + return Bytes.of(ba, 0, len); + } + + @Override + public byte byteAt(int index) { + return ba[index]; + } + + @Override + public ByteSequence subSequence(int start, int end) { + return Bytes.of(ba, start, end - start); + } + +} diff --git a/src/test/java/org/apache/bytes/ByteSequenceTest.java b/src/test/java/org/apache/bytes/ByteSequenceTest.java new file mode 100644 index 0000000..ff7bb46 --- /dev/null +++ b/src/test/java/org/apache/bytes/ByteSequenceTest.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bytes; + +import org.junit.Test; + +public class ByteSequenceTest { + + @Test + public void initialTest() { + // noop + } + +} From bd1ddb93b810c0a8fa6f294f9221a8cacacf3652 Mon Sep 17 00:00:00 2001 From: Christopher Tubbs Date: Wed, 16 Aug 2017 00:24:55 -0400 Subject: [PATCH 3/8] WIP --- pom.xml | 1 + .../build-resources/eclipse-formatter.xml | 353 ++++++++++++++++++ .../apache/bytes/AbstractByteSequence.java | 6 +- src/main/java/org/apache/bytes/Bytes.java | 104 +++--- .../java/org/apache/bytes/BytesBuilder.java | 29 +- 5 files changed, 418 insertions(+), 75 deletions(-) create mode 100644 src/main/build-resources/eclipse-formatter.xml diff --git a/pom.xml b/pom.xml index f2ff5f8..c338725 100644 --- a/pom.xml +++ b/pom.xml @@ -47,6 +47,7 @@ 20 + ${project.basedir}/src/main/build-resources/eclipse-formatter.xml true fluo-bytes-release diff --git a/src/main/build-resources/eclipse-formatter.xml b/src/main/build-resources/eclipse-formatter.xml new file mode 100644 index 0000000..20a6796 --- /dev/null +++ b/src/main/build-resources/eclipse-formatter.xml @@ -0,0 +1,353 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/main/java/org/apache/bytes/AbstractByteSequence.java b/src/main/java/org/apache/bytes/AbstractByteSequence.java index 7307c04..878ed5a 100644 --- a/src/main/java/org/apache/bytes/AbstractByteSequence.java +++ b/src/main/java/org/apache/bytes/AbstractByteSequence.java @@ -47,7 +47,8 @@ protected void checkBounds(int i) { protected void checkBounds(int begin, int end) { if (begin > end) { - throw new IndexOutOfBoundsException("End position (" + end + ") occurs before begin position (" + begin + ")"); + throw new IndexOutOfBoundsException("End position (" + end + + ") occurs before begin position (" + begin + ")"); } checkBounds(begin, "Begin position"); checkBounds(begin, "End position"); @@ -82,7 +83,8 @@ public void forEachRemaining(IntConsumer block) { } - return StreamSupport.intStream(() -> Spliterators.spliterator(new ByteIterator(), length(), Spliterator.ORDERED), + return StreamSupport.intStream( + () -> Spliterators.spliterator(new ByteIterator(), length(), Spliterator.ORDERED), Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED, false); } diff --git a/src/main/java/org/apache/bytes/Bytes.java b/src/main/java/org/apache/bytes/Bytes.java index 398b787..0de1beb 100644 --- a/src/main/java/org/apache/bytes/Bytes.java +++ b/src/main/java/org/apache/bytes/Bytes.java @@ -31,14 +31,17 @@ import java.util.Objects; /** - * Represents bytes in Fluo. Bytes is an immutable wrapper around a byte array. Bytes always copies on creation and never lets its internal byte array escape. - * Its modeled after Java's String which is an immutable wrapper around a char array. It was created because there is nothing in Java like it at the moment. Its - * very nice having this immutable type, it avoids having to do defensive copies to ensure correctness. Maybe one day Java will have equivalents of String, - * StringBuilder, and Charsequence for bytes. + * Represents bytes in Fluo. Bytes is an immutable wrapper around a byte array. Bytes always copies + * on creation and never lets its internal byte array escape. Its modeled after Java's String which + * is an immutable wrapper around a char array. It was created because there is nothing in Java like + * it at the moment. Its very nice having this immutable type, it avoids having to do defensive + * copies to ensure correctness. Maybe one day Java will have equivalents of String, StringBuilder, + * and Charsequence for bytes. * *

- * The reason Fluo did not use ByteBuffer is because its not immutable, even a read only ByteBuffer has a mutable position. This makes ByteBuffer unsuitable for - * place where an immutable data type is desirable, like a key for a map. + * The reason Fluo did not use ByteBuffer is because its not immutable, even a read only ByteBuffer + * has a mutable position. This makes ByteBuffer unsuitable for place where an immutable data type + * is desirable, like a key for a map. * *

* Bytes.EMPTY is used to represent a Bytes object with no data. @@ -73,11 +76,9 @@ private Bytes(byte[] data, String utf8String) { /** * Gets a byte within this sequence of bytes * - * @param i - * index into sequence + * @param i index into sequence * @return byte - * @throws IllegalArgumentException - * if i is out of range + * @throws IllegalArgumentException if i is out of range */ @Override public byte byteAt(int i) { @@ -96,10 +97,8 @@ public int length() { /** * Returns a portion of the Bytes object * - * @param start - * index of subsequence start (inclusive) - * @param end - * index of subsequence end (exclusive) + * @param start index of subsequence start (inclusive) + * @param end index of subsequence end (exclusive) */ @Override public Bytes subSequence(int begin, int end) { @@ -119,8 +118,7 @@ public byte[] toArray() { /** * Provides a String representation, decoding these bytes with the provided charset * - * @param charset - * the character set to decode these bytes + * @param charset the character set to decode these bytes */ public String toString(Charset charset) { if (charset == StandardCharsets.UTF_8) { @@ -174,9 +172,11 @@ public void writeTo(OutputStream out) throws IOException { } /** - * Compares this to the passed bytes, byte by byte, returning a negative, zero, or positive result if the first sequence is less than, equal to, or greater - * than the second. The comparison is performed starting with the first byte of each sequence, and proceeds until a pair of bytes differs, or one sequence - * runs out of byte (is shorter). A shorter sequence is considered less than a longer one. + * Compares this to the passed bytes, byte by byte, returning a negative, zero, or positive result + * if the first sequence is less than, equal to, or greater than the second. The comparison is + * performed starting with the first byte of each sequence, and proceeds until a pair of bytes + * differs, or one sequence runs out of byte (is shorter). A shorter sequence is considered less + * than a longer one. * * @return comparison result */ @@ -199,7 +199,8 @@ public final int compareTo(Bytes other) { } /** - * Returns true if, and only if, this Bytes object contains the same byte sequence as another Bytes object + * Returns true if, and only if, this Bytes object contains the same byte sequence as another + * Bytes object */ @Override public final boolean equals(Object other) { @@ -227,12 +228,9 @@ public static final Bytes of(byte[] array) { /** * Creates a Bytes object by copying the data of a subsequence of the given byte array * - * @param data - * Byte data - * @param offset - * Starting offset in byte array (inclusive) - * @param length - * Number of bytes to include + * @param data Byte data + * @param offset Starting offset in byte array (inclusive) + * @param length Number of bytes to include */ public static final Bytes of(byte[] data, int offset, int length) { Objects.requireNonNull(data); @@ -247,8 +245,8 @@ public static final Bytes of(byte[] data, int offset, int length) { /** * Creates a Bytes object by copying the data of the given ByteBuffer. * - * @param bb - * Data will be read from this ByteBuffer in such a way that its position is not changed. + * @param bb Data will be read from this ByteBuffer in such a way that its position is not + * changed. */ public static final Bytes of(ByteBuffer bb) { Objects.requireNonNull(bb); @@ -257,7 +255,9 @@ public static final Bytes of(ByteBuffer bb) { } byte[] data; if (bb.hasArray()) { - data = Arrays.copyOfRange(bb.array(), bb.position() + bb.arrayOffset(), bb.limit() + bb.arrayOffset()); + data = + Arrays.copyOfRange(bb.array(), bb.position() + bb.arrayOffset(), + bb.limit() + bb.arrayOffset()); } else { data = new byte[bb.remaining()]; // duplicate so that it does not change position @@ -319,8 +319,7 @@ public static final Bytes of(String s, Charset c) { /** * Checks if this has the passed prefix * - * @param prefix - * is a Bytes object to compare to this + * @param prefix is a Bytes object to compare to this * @return true or false */ public boolean startsWith(Bytes prefix) { @@ -342,8 +341,7 @@ public boolean startsWith(Bytes prefix) { /** * Checks if this has the passed suffix * - * @param suffix - * is a Bytes object to compare to this + * @param suffix is a Bytes object to compare to this * @return true or false */ public boolean endsWith(Bytes suffix) { @@ -364,36 +362,32 @@ public boolean endsWith(Bytes suffix) { } /** - * Copy entire Bytes object to specific byte array. Uses the specified offset in the dest byte array to start the copy. + * Copy entire Bytes object to specific byte array. Uses the specified offset in the dest byte + * array to start the copy. * - * @param dest - * destination array - * @param destPos - * starting position in the destination data. - * @exception IndexOutOfBoundsException - * if copying would cause access of data outside array bounds. - * @exception NullPointerException - * if either src or dest is null. + * @param dest destination array + * @param destPos starting position in the destination data. + * @exception IndexOutOfBoundsException if copying would cause access of data outside array + * bounds. + * @exception NullPointerException if either src or dest is + * null. */ public void copyTo(byte[] dest, int destPos) { arraycopy(0, dest, destPos, this.length()); } /** - * Copy a subsequence of Bytes to specific byte array. Uses the specified offset in the dest byte array to start the copy. + * Copy a subsequence of Bytes to specific byte array. Uses the specified offset in the dest byte + * array to start the copy. * - * @param start - * index of subsequence start (inclusive) - * @param end - * index of subsequence end (exclusive) - * @param dest - * destination array - * @param destPos - * starting position in the destination data. - * @exception IndexOutOfBoundsException - * if copying would cause access of data outside array bounds. - * @exception NullPointerException - * if either src or dest is null. + * @param start index of subsequence start (inclusive) + * @param end index of subsequence end (exclusive) + * @param dest destination array + * @param destPos starting position in the destination data. + * @exception IndexOutOfBoundsException if copying would cause access of data outside array + * bounds. + * @exception NullPointerException if either src or dest is + * null. */ public void copyTo(int start, int end, byte[] dest, int destPos) { // this.subSequence(start, end).copyTo(dest, destPos) would allocate another Bytes object diff --git a/src/main/java/org/apache/bytes/BytesBuilder.java b/src/main/java/org/apache/bytes/BytesBuilder.java index 17076c7..5c945d0 100644 --- a/src/main/java/org/apache/bytes/BytesBuilder.java +++ b/src/main/java/org/apache/bytes/BytesBuilder.java @@ -39,8 +39,7 @@ public class BytesBuilder extends AbstractByteSequence { /** * Construct a builder with the specified initial capacity * - * @param initialCapacity - * the initial size of the internal buffer + * @param initialCapacity the initial size of the internal buffer */ public BytesBuilder(int initialCapacity) { ba = new byte[initialCapacity]; @@ -76,7 +75,7 @@ public BytesBuilder append(CharSequence cs) { public BytesBuilder append(CharSequence cs, Charset charset) { if (cs instanceof String) { - return append(cs, charset); + return append((String) cs, charset); } ByteBuffer bb = charset.encode(CharBuffer.wrap(cs)); @@ -119,8 +118,7 @@ public BytesBuilder append(byte[] bytes) { /** * Append a single byte. * - * @param b - * take the lower 8 bits and appends it. + * @param b take the lower 8 bits and appends it. * @return self */ public BytesBuilder append(int b) { @@ -133,12 +131,9 @@ public BytesBuilder append(int b) { /** * Append a section of bytes from array * - * @param bytes - * - bytes to be appended - * @param offset - * - start of bytes to be appended - * @param length - * - how many bytes from 'offset' to be appended + * @param bytes - bytes to be appended + * @param offset - start of bytes to be appended + * @param length - how many bytes from 'offset' to be appended * @return self */ public BytesBuilder append(byte[] bytes, int offset, int length) { @@ -151,10 +146,8 @@ public BytesBuilder append(byte[] bytes, int offset, int length) { /** * Append a sequence of bytes from an InputStream * - * @param in - * data source to append from - * @param length - * number of bytes to read from data source + * @param in data source to append from + * @param length number of bytes to read from data source * @return self */ public BytesBuilder append(InputStream in, int length) throws IOException { @@ -167,8 +160,7 @@ public BytesBuilder append(InputStream in, int length) throws IOException { /** * Append data from a ByteBuffer * - * @param bb - * data is read from the ByteBuffer in such a way that its position is not changed. + * @param bb data is read from the ByteBuffer in such a way that its position is not changed. * @return self */ public BytesBuilder append(ByteBuffer bb) { @@ -180,7 +172,8 @@ public BytesBuilder append(ByteBuffer bb) { } /** - * Sets the point at which appending will start. This method can shrink or grow the ByteBuilder from its current state. If it grows it will zero pad. + * Sets the point at which appending will start. This method can shrink or grow the ByteBuilder + * from its current state. If it grows it will zero pad. */ public void setLength(int newLen) { if (newLen < 0) { From 4d984e1f2525c3e9ea738890be8c7e8b2d34aebd Mon Sep 17 00:00:00 2001 From: Christopher Tubbs Date: Wed, 13 Sep 2017 19:07:58 -0400 Subject: [PATCH 4/8] WIP --- .../apache/bytes/AbstractByteSequence.java | 5 +- .../java/org/apache/bytes/ByteSequence.java | 32 +++++++- src/main/java/org/apache/bytes/Bytes.java | 42 +++++++---- .../org/apache/bytes/ByteSequenceTest.java | 29 ------- src/test/java/org/apache/bytes/BytesTest.java | 75 +++++++++++++++++++ 5 files changed, 137 insertions(+), 46 deletions(-) delete mode 100644 src/test/java/org/apache/bytes/ByteSequenceTest.java create mode 100644 src/test/java/org/apache/bytes/BytesTest.java diff --git a/src/main/java/org/apache/bytes/AbstractByteSequence.java b/src/main/java/org/apache/bytes/AbstractByteSequence.java index 878ed5a..c0080ca 100644 --- a/src/main/java/org/apache/bytes/AbstractByteSequence.java +++ b/src/main/java/org/apache/bytes/AbstractByteSequence.java @@ -27,6 +27,8 @@ import java.util.stream.StreamSupport; /** + * Abstract class for implementations of {@link ByteSequence}. + * * @since 1.0.0 */ abstract class AbstractByteSequence implements ByteSequence { @@ -55,8 +57,7 @@ protected void checkBounds(int begin, int end) { } @Override - public IntStream bytes() { - // return int stream to avoid auto-boxing + public IntStream intStream() { class ByteIterator implements PrimitiveIterator.OfInt { int cur = 0; diff --git a/src/main/java/org/apache/bytes/ByteSequence.java b/src/main/java/org/apache/bytes/ByteSequence.java index ae7d798..c07a506 100644 --- a/src/main/java/org/apache/bytes/ByteSequence.java +++ b/src/main/java/org/apache/bytes/ByteSequence.java @@ -20,16 +20,46 @@ import java.util.stream.IntStream; /** + * Interface representing a sequence of bytes. + * * @since 1.0.0 */ public interface ByteSequence extends Iterable { + /** + * Supply the length of the sequence. Note: this is not the capacity of any underlying byte array, + * but the actual length of the sequence this object represents. + * + * @return the length of the sequence + */ int length(); + /** + * Retrieve a byte at the specified index. Values are between 0 (for the first byte) + * and length() - 1 for the last byte. + * + * @param index the position within the sequence to retrieve + * @return the byte at the specified index + */ byte byteAt(int index); + /** + * Retrieve a sequence of bytes from the original sequence. The returned sequence includes all + * bytes between start and end - 1, inclusive. + * + * @param start the index of the first byte to be included in the result + * @param end the index after the last byte to be included in the result + * @return a byte sequence containing the bytes between start and + * end - 1, inclusive + */ ByteSequence subSequence(int start, int end); - IntStream bytes(); + /** + * Return an IntStream representation of this byte sequence. This avoids auto-boxing when not + * necessary. Each int represents a single byte from the sequence. + * + * @return a stream of integers, one for each byte in the sequence + */ + IntStream intStream(); } diff --git a/src/main/java/org/apache/bytes/Bytes.java b/src/main/java/org/apache/bytes/Bytes.java index 0de1beb..a671b63 100644 --- a/src/main/java/org/apache/bytes/Bytes.java +++ b/src/main/java/org/apache/bytes/Bytes.java @@ -97,7 +97,7 @@ public int length() { /** * Returns a portion of the Bytes object * - * @param start index of subsequence start (inclusive) + * @param begin index of subsequence begin (inclusive) * @param end index of subsequence end (exclusive) */ @Override @@ -266,10 +266,14 @@ public static final Bytes of(ByteBuffer bb) { return new Bytes(data); } - /** - * Creates a Bytes object by copying the data of the CharSequence and encoding it using UTF-8. - */ - public static final Bytes of(CharSequence cs) { + public static final Bytes of(CharSequence cs, Charset charset) { + if (cs instanceof String) { + return of((String) cs, charset); + } + + Objects.requireNonNull(cs); + Objects.requireNonNull(charset); + if (cs instanceof String) { return of((String) cs); } @@ -279,10 +283,10 @@ public static final Bytes of(CharSequence cs) { return EMPTY; } - ByteBuffer bb = StandardCharsets.UTF_8.encode(CharBuffer.wrap(cs)); + ByteBuffer bb = charset.encode(CharBuffer.wrap(cs)); + // this byte buffer has never escaped so can use its byte array directly if (bb.hasArray()) { - // this byte buffer has never escaped so can use its byte array directly return Bytes.of(bb.array(), bb.position() + bb.arrayOffset(), bb.limit()); } else { byte[] data = new byte[bb.remaining()]; @@ -291,6 +295,13 @@ public static final Bytes of(CharSequence cs) { } } + /** + * Creates a Bytes object by copying the data of the CharSequence and encoding it using UTF-8. + */ + public static final Bytes of(CharSequence cs) { + return of(cs, StandardCharsets.UTF_8); + } + /** * Creates a Bytes object by copying the value of the given String */ @@ -307,6 +318,9 @@ public static final Bytes of(String s) { * Creates a Bytes object by copying the value of the given String with a given charset */ public static final Bytes of(String s, Charset c) { + if (c == StandardCharsets.UTF_8) { + return of(s); + } Objects.requireNonNull(s); Objects.requireNonNull(c); if (s.length() == 0) { @@ -322,8 +336,8 @@ public static final Bytes of(String s, Charset c) { * @param prefix is a Bytes object to compare to this * @return true or false */ - public boolean startsWith(Bytes prefix) { - Objects.requireNonNull(prefix, "startWith(Bytes prefix) cannot have null parameter"); + public boolean beginsWith(Bytes prefix) { + Objects.requireNonNull(prefix, "beginsWith(Bytes prefix) cannot have null parameter"); if (prefix.length() > this.length()) { return false; @@ -380,7 +394,7 @@ public void copyTo(byte[] dest, int destPos) { * Copy a subsequence of Bytes to specific byte array. Uses the specified offset in the dest byte * array to start the copy. * - * @param start index of subsequence start (inclusive) + * @param begin index of subsequence start (inclusive) * @param end index of subsequence end (exclusive) * @param dest destination array * @param destPos starting position in the destination data. @@ -389,14 +403,14 @@ public void copyTo(byte[] dest, int destPos) { * @exception NullPointerException if either src or dest is * null. */ - public void copyTo(int start, int end, byte[] dest, int destPos) { + public void copyTo(int begin, int end, byte[] dest, int destPos) { // this.subSequence(start, end).copyTo(dest, destPos) would allocate another Bytes object - arraycopy(start, dest, destPos, end - start); + arraycopy(begin, dest, destPos, end - begin); } - private void arraycopy(int start, byte[] dest, int destPos, int length) { + private void arraycopy(int begin, byte[] dest, int destPos, int length) { // since dest is byte[], we can't get the ArrayStoreException - System.arraycopy(this.data, start, dest, destPos, length); + System.arraycopy(this.data, begin, dest, destPos, length); } } diff --git a/src/test/java/org/apache/bytes/ByteSequenceTest.java b/src/test/java/org/apache/bytes/ByteSequenceTest.java deleted file mode 100644 index ff7bb46..0000000 --- a/src/test/java/org/apache/bytes/ByteSequenceTest.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.bytes; - -import org.junit.Test; - -public class ByteSequenceTest { - - @Test - public void initialTest() { - // noop - } - -} diff --git a/src/test/java/org/apache/bytes/BytesTest.java b/src/test/java/org/apache/bytes/BytesTest.java new file mode 100644 index 0000000..1e88d9e --- /dev/null +++ b/src/test/java/org/apache/bytes/BytesTest.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.bytes; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; + +import org.junit.Test; + +public class BytesTest { + + private static final Bytes BYTES_EMPTY = Bytes.EMPTY; + private static final Bytes BYTES_STRING = Bytes.of("test String"); + private static final Bytes BYTES_STRING_CHARSET = Bytes.of("test String with Charset", + StandardCharsets.US_ASCII); + private static final Bytes BYTES_CHARSEQ = Bytes.of(new StringBuilder("test CharSequence")); + private static final Bytes BYTES_CHARSEQ_CHARSET = Bytes.of(new StringBuilder( + "test CharSequence with Charset"), StandardCharsets.US_ASCII); + private static final Bytes BYTES_BB = Bytes.of(ByteBuffer.wrap("test ByteBuffer" + .getBytes(StandardCharsets.US_ASCII))); + private static final Bytes BYTES_ARRAY = Bytes.of("test byte[]" + .getBytes(StandardCharsets.US_ASCII)); + private static final Bytes BYTES_ARRAY_OFFSET = Bytes.of( + "---test byte[] with offset and length---".getBytes(StandardCharsets.US_ASCII), 3, 34); + + @Test + public void testToString() { + assertEquals("", BYTES_EMPTY.toString()); + assertEquals("test String", BYTES_STRING.toString()); + assertEquals("test String with Charset", BYTES_STRING_CHARSET.toString()); + assertEquals("test CharSequence", BYTES_CHARSEQ.toString()); + assertEquals("test CharSequence with Charset", BYTES_CHARSEQ_CHARSET.toString()); + assertEquals("test ByteBuffer", BYTES_BB.toString()); + assertEquals("test byte[]", BYTES_ARRAY.toString()); + assertEquals("test byte[] with offset and length", BYTES_ARRAY_OFFSET.toString()); + } + + @Test + public void testBeginsWith() { + assertTrue(BYTES_EMPTY.beginsWith(BYTES_EMPTY)); + assertFalse(BYTES_EMPTY.beginsWith(BYTES_STRING)); + assertTrue(BYTES_STRING.beginsWith(BYTES_EMPTY)); + assertTrue(BYTES_STRING_CHARSET.beginsWith(BYTES_STRING)); + assertFalse(BYTES_STRING.beginsWith(BYTES_STRING_CHARSET)); + } + + @Test + public void testEndsWith() { + assertTrue(BYTES_EMPTY.endsWith(BYTES_EMPTY)); + assertFalse(BYTES_EMPTY.endsWith(BYTES_STRING)); + assertTrue(BYTES_STRING.endsWith(BYTES_EMPTY)); + assertTrue(BYTES_STRING.endsWith(Bytes.of("ing"))); + assertFalse(Bytes.of("ing").endsWith(BYTES_STRING)); + } + +} From f7a73c1dab0d11238dfb76f01b129da98b12ef19 Mon Sep 17 00:00:00 2001 From: Christopher Tubbs Date: Fri, 15 Sep 2017 18:42:58 -0400 Subject: [PATCH 5/8] WIP --- src/test/java/org/apache/bytes/BytesTest.java | 83 +++++++++++++++++-- 1 file changed, 75 insertions(+), 8 deletions(-) diff --git a/src/test/java/org/apache/bytes/BytesTest.java b/src/test/java/org/apache/bytes/BytesTest.java index 1e88d9e..4adab7e 100644 --- a/src/test/java/org/apache/bytes/BytesTest.java +++ b/src/test/java/org/apache/bytes/BytesTest.java @@ -17,12 +17,18 @@ package org.apache.bytes; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.US_ASCII; +import static java.nio.charset.StandardCharsets.UTF_16; +import static java.nio.charset.StandardCharsets.UTF_16BE; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import org.junit.Test; @@ -30,17 +36,15 @@ public class BytesTest { private static final Bytes BYTES_EMPTY = Bytes.EMPTY; private static final Bytes BYTES_STRING = Bytes.of("test String"); - private static final Bytes BYTES_STRING_CHARSET = Bytes.of("test String with Charset", - StandardCharsets.US_ASCII); + private static final Bytes BYTES_STRING_CHARSET = Bytes.of("test String with Charset", US_ASCII); private static final Bytes BYTES_CHARSEQ = Bytes.of(new StringBuilder("test CharSequence")); private static final Bytes BYTES_CHARSEQ_CHARSET = Bytes.of(new StringBuilder( - "test CharSequence with Charset"), StandardCharsets.US_ASCII); + "test CharSequence with Charset"), US_ASCII); private static final Bytes BYTES_BB = Bytes.of(ByteBuffer.wrap("test ByteBuffer" - .getBytes(StandardCharsets.US_ASCII))); - private static final Bytes BYTES_ARRAY = Bytes.of("test byte[]" - .getBytes(StandardCharsets.US_ASCII)); + .getBytes(US_ASCII))); + private static final Bytes BYTES_ARRAY = Bytes.of("test byte[]".getBytes(US_ASCII)); private static final Bytes BYTES_ARRAY_OFFSET = Bytes.of( - "---test byte[] with offset and length---".getBytes(StandardCharsets.US_ASCII), 3, 34); + "---test byte[] with offset and length---".getBytes(US_ASCII), 3, 34); @Test public void testToString() { @@ -72,4 +76,67 @@ public void testEndsWith() { assertFalse(Bytes.of("ing").endsWith(BYTES_STRING)); } + @Test + public void testToArray() { + assertArrayEquals("".getBytes(US_ASCII), BYTES_EMPTY.toArray()); + assertArrayEquals("test String".getBytes(UTF_8), BYTES_STRING.toArray()); + assertArrayEquals("test String with Charset".getBytes(UTF_8), BYTES_STRING_CHARSET.toArray()); + assertArrayEquals("test CharSequence".getBytes(UTF_8), BYTES_CHARSEQ.toArray()); + assertArrayEquals("test CharSequence with Charset".getBytes(UTF_8), + BYTES_CHARSEQ_CHARSET.toArray()); + assertArrayEquals("test ByteBuffer".getBytes(UTF_8), BYTES_BB.toArray()); + assertArrayEquals("test byte[]".getBytes(UTF_8), BYTES_ARRAY.toArray()); + assertArrayEquals("test byte[] with offset and length".getBytes(UTF_8), + BYTES_ARRAY_OFFSET.toArray()); + // test array with custom charset for String + assertArrayEquals("test utf16".getBytes(UTF_16), Bytes.of("test utf16", UTF_16).toArray()); + // test array with custom charset for CharSequence + assertArrayEquals("test ISO_8859_1".getBytes(ISO_8859_1), + Bytes.of(new StringBuilder("test ISO_8859_1"), ISO_8859_1).toArray()); + } + + @Test + public void testByteAt() { + String s = "1234"; + Bytes b = Bytes.of(s, UTF_16BE); + assertEquals(s.length() * 2, b.length()); // no BOM with UTF_16BE + // for each char in string, check that its corresponding bytes exist in the correct position + for (int i = 0; i < s.length(); ++i) { + int codePoint = s.codePointAt(i); + assertEquals(codePoint >> Byte.SIZE, b.byteAt(2 * i)); // check most significant bits + assertEquals(codePoint & 0xFF, b.byteAt(2 * i + 1)); // check least significant bits + } + + try { + b.byteAt(b.length()); + fail("Previous line should have failed"); + } catch (IndexOutOfBoundsException e) { + // this is expected + } + + try { + b.byteAt(-1); + fail("Previous line should have failed"); + } catch (IndexOutOfBoundsException e) { + // this is expected + } + } + + @Test + public void testLength() { + assertEquals(0, BYTES_EMPTY.length()); + // string length should be equal to array length, because all these use 7-bit ASCII chars with + // US_ASCII or UTF_8 encoding + assertEquals("test String".length(), BYTES_STRING.length()); + assertEquals("test String with Charset".length(), BYTES_STRING_CHARSET.length()); + assertEquals("test CharSequence".length(), BYTES_CHARSEQ.length()); + assertEquals("test CharSequence with Charset".length(), BYTES_CHARSEQ_CHARSET.length()); + assertEquals("test ByteBuffer".length(), BYTES_BB.length()); + assertEquals("test byte[]".length(), BYTES_ARRAY.length()); + assertEquals("test byte[] with offset and length".length(), BYTES_ARRAY_OFFSET.length()); + + // UTF_16 uses 2 bytes per char + assertEquals("test UTF_16BE".length() * 2, Bytes.of("test UTF_16BE", UTF_16BE).length()); + } + } From 79a399f3226ad5b51acb1b784e38c560b1a8f62b Mon Sep 17 00:00:00 2001 From: Christopher Tubbs Date: Thu, 28 Sep 2017 20:49:13 -0400 Subject: [PATCH 6/8] WIP --- .travis.yml | 4 +- pom.xml | 2 +- .../apache/bytes/AbstractByteSequence.java | 4 +- .../java/org/apache/bytes/ByteSequence.java | 16 ++--- src/main/java/org/apache/bytes/Bytes.java | 65 ++++++++++--------- .../java/org/apache/bytes/BytesBuilder.java | 9 +-- src/test/java/org/apache/bytes/BytesTest.java | 36 ++++++---- 7 files changed, 73 insertions(+), 63 deletions(-) diff --git a/.travis.yml b/.travis.yml index 454e7cb..cbf560d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ # limitations under the License. language: java jdk: - - oraclejdk8 + - openjdk8 notifications: irc: channels: @@ -29,4 +29,4 @@ cache: - $HOME/.m2 # skip pre-fetch of maven dependencies by making install step a NOOP install: true -script: mvn clean verify javadoc:jar +script: mvn -C clean verify javadoc:jar diff --git a/pom.xml b/pom.xml index c338725..87595d3 100644 --- a/pom.xml +++ b/pom.xml @@ -20,7 +20,7 @@ org.apache.fluo fluo-parent - 1-incubating + 2 fluo-bytes 1.0.0-SNAPSHOT diff --git a/src/main/java/org/apache/bytes/AbstractByteSequence.java b/src/main/java/org/apache/bytes/AbstractByteSequence.java index c0080ca..8d46432 100644 --- a/src/main/java/org/apache/bytes/AbstractByteSequence.java +++ b/src/main/java/org/apache/bytes/AbstractByteSequence.java @@ -49,8 +49,8 @@ protected void checkBounds(int i) { protected void checkBounds(int begin, int end) { if (begin > end) { - throw new IndexOutOfBoundsException("End position (" + end - + ") occurs before begin position (" + begin + ")"); + throw new IndexOutOfBoundsException( + "End position (" + end + ") occurs before begin position (" + begin + ")"); } checkBounds(begin, "Begin position"); checkBounds(begin, "End position"); diff --git a/src/main/java/org/apache/bytes/ByteSequence.java b/src/main/java/org/apache/bytes/ByteSequence.java index c07a506..7bcd2f6 100644 --- a/src/main/java/org/apache/bytes/ByteSequence.java +++ b/src/main/java/org/apache/bytes/ByteSequence.java @@ -27,16 +27,16 @@ public interface ByteSequence extends Iterable { /** - * Supply the length of the sequence. Note: this is not the capacity of any underlying byte array, - * but the actual length of the sequence this object represents. + * The length of the sequence this object represents. It does not necessarily reflect the size of + * any internal data structures, such as an internal byte array. * * @return the length of the sequence */ int length(); /** - * Retrieve a byte at the specified index. Values are between 0 (for the first byte) - * and length() - 1 for the last byte. + * Retrieve a byte at the specified index. Valid indices are between 0 (for the first + * byte) and length() - 1 for the last byte. * * @param index the position within the sequence to retrieve * @return the byte at the specified index @@ -45,14 +45,14 @@ public interface ByteSequence extends Iterable { /** * Retrieve a sequence of bytes from the original sequence. The returned sequence includes all - * bytes between start and end - 1, inclusive. + * bytes between begin and end - 1, inclusive. * - * @param start the index of the first byte to be included in the result + * @param begin the index of the first byte to be included in the result * @param end the index after the last byte to be included in the result - * @return a byte sequence containing the bytes between start and + * @return a byte sequence containing the bytes between begin and * end - 1, inclusive */ - ByteSequence subSequence(int start, int end); + ByteSequence subSequence(int begin, int end); /** * Return an IntStream representation of this byte sequence. This avoids auto-boxing when not diff --git a/src/main/java/org/apache/bytes/Bytes.java b/src/main/java/org/apache/bytes/Bytes.java index a671b63..d058338 100644 --- a/src/main/java/org/apache/bytes/Bytes.java +++ b/src/main/java/org/apache/bytes/Bytes.java @@ -52,7 +52,7 @@ public final class Bytes extends AbstractByteSequence implements Comparable utf8String; @@ -204,6 +204,11 @@ public final int compareTo(Bytes other) { */ @Override public final boolean equals(Object other) { + // TODO consider potential optimizations: + // 1. use hashcode if computed for both objects already, since we only compute hashcode once and + // store it + // 2. compare last byte or last 2 bytes first, to quickly see if they are different at the end; + // very helpful for sorted data return this == other || ((other instanceof Bytes) && Arrays.equals(data, ((Bytes) other).data)); } @@ -255,9 +260,8 @@ public static final Bytes of(ByteBuffer bb) { } byte[] data; if (bb.hasArray()) { - data = - Arrays.copyOfRange(bb.array(), bb.position() + bb.arrayOffset(), - bb.limit() + bb.arrayOffset()); + data = Arrays.copyOfRange(bb.array(), bb.position() + bb.arrayOffset(), + bb.limit() + bb.arrayOffset()); } else { data = new byte[bb.remaining()]; // duplicate so that it does not change position @@ -353,64 +357,61 @@ public boolean beginsWith(Bytes prefix) { } /** - * Checks if this has the passed suffix + * Check if this has the provided suffix. * * @param suffix is a Bytes object to compare to this * @return true or false */ public boolean endsWith(Bytes suffix) { Objects.requireNonNull(suffix, "endsWith(Bytes suffix) cannot have null parameter"); - int startOffset = this.length() - suffix.length(); - - if (startOffset < 0) { + int suffixLen = suffix.length(); + int len = length(); + if (suffixLen > len) { return false; - } else { - int end = startOffset + suffix.length(); - for (int i = startOffset, j = 0; i < end; i++, j++) { - if (this.data[i] != suffix.data[j]) { - return false; - } + } + + // comparing from the back; TODO see if comparing forwards is faster + for (int i = suffixLen - 1, j = len - 1; i >= 0; i--, j--) { + if (suffix.data[i] != data[j]) { + return false; } } return true; } /** - * Copy entire Bytes object to specific byte array. Uses the specified offset in the dest byte - * array to start the copy. + * Copy this entire Bytes object into the destination byte array, dest, at position + * destPos. * - * @param dest destination array - * @param destPos starting position in the destination data. + * @param dest destination array into which bytes are copied + * @param destPos the position in the destination array where the subsequence will be copied * @exception IndexOutOfBoundsException if copying would cause access of data outside array * bounds. * @exception NullPointerException if either src or dest is * null. */ public void copyTo(byte[] dest, int destPos) { - arraycopy(0, dest, destPos, this.length()); + copyTo(0, dest, destPos, length()); } /** - * Copy a subsequence of Bytes to specific byte array. Uses the specified offset in the dest byte - * array to start the copy. + * Copy length bytes from this Bytes object, starting at the begin + * position into the destination byte array, dest, at position destPos. + * All bytes between begin and begin+length-1, inclusive, are copied. + * The destination array must be large enough. * - * @param begin index of subsequence start (inclusive) - * @param end index of subsequence end (exclusive) - * @param dest destination array - * @param destPos starting position in the destination data. + * @param begin index of the beginning of the subsequence to copy (inclusive) + * @param dest destination array into which bytes are copied + * @param destPos the position in the destination array where the subsequence will be copied + * @param length the length of the sequence to copy * @exception IndexOutOfBoundsException if copying would cause access of data outside array * bounds. * @exception NullPointerException if either src or dest is * null. */ - public void copyTo(int begin, int end, byte[] dest, int destPos) { - // this.subSequence(start, end).copyTo(dest, destPos) would allocate another Bytes object - arraycopy(begin, dest, destPos, end - begin); - } - - private void arraycopy(int begin, byte[] dest, int destPos, int length) { + public void copyTo(int begin, byte[] dest, int destPos, int length) { // since dest is byte[], we can't get the ArrayStoreException - System.arraycopy(this.data, begin, dest, destPos, length); + System.arraycopy(data, begin, dest, destPos, length); } } diff --git a/src/main/java/org/apache/bytes/BytesBuilder.java b/src/main/java/org/apache/bytes/BytesBuilder.java index 5c945d0..33517d2 100644 --- a/src/main/java/org/apache/bytes/BytesBuilder.java +++ b/src/main/java/org/apache/bytes/BytesBuilder.java @@ -102,8 +102,7 @@ public BytesBuilder append(String s, Charset charset) { public BytesBuilder append(Bytes b) { ensureCapacity(len + b.length()); - // note: Bytes always uses all of its internal array, so source offset is 0 here - System.arraycopy(b.data, 0, ba, len, b.length()); + b.copyTo(ba, len); len += b.length(); return this; } @@ -205,8 +204,10 @@ public byte byteAt(int index) { } @Override - public ByteSequence subSequence(int start, int end) { - return Bytes.of(ba, start, end - start); + public BytesBuilder subSequence(int begin, int end) { + checkBounds(begin, end); + int size = end - begin; + return new BytesBuilder(size).append(ba, begin, size); } } diff --git a/src/test/java/org/apache/bytes/BytesTest.java b/src/test/java/org/apache/bytes/BytesTest.java index 4adab7e..43d179d 100644 --- a/src/test/java/org/apache/bytes/BytesTest.java +++ b/src/test/java/org/apache/bytes/BytesTest.java @@ -17,6 +17,10 @@ package org.apache.bytes; +import java.nio.ByteBuffer; + +import org.junit.Test; + import static java.nio.charset.StandardCharsets.ISO_8859_1; import static java.nio.charset.StandardCharsets.US_ASCII; import static java.nio.charset.StandardCharsets.UTF_16; @@ -28,23 +32,19 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import java.nio.ByteBuffer; - -import org.junit.Test; - public class BytesTest { private static final Bytes BYTES_EMPTY = Bytes.EMPTY; private static final Bytes BYTES_STRING = Bytes.of("test String"); private static final Bytes BYTES_STRING_CHARSET = Bytes.of("test String with Charset", US_ASCII); private static final Bytes BYTES_CHARSEQ = Bytes.of(new StringBuilder("test CharSequence")); - private static final Bytes BYTES_CHARSEQ_CHARSET = Bytes.of(new StringBuilder( - "test CharSequence with Charset"), US_ASCII); - private static final Bytes BYTES_BB = Bytes.of(ByteBuffer.wrap("test ByteBuffer" - .getBytes(US_ASCII))); + private static final Bytes BYTES_CHARSEQ_CHARSET = + Bytes.of(new StringBuilder("test CharSequence with Charset"), US_ASCII); + private static final Bytes BYTES_BB = + Bytes.of(ByteBuffer.wrap("test ByteBuffer".getBytes(US_ASCII))); private static final Bytes BYTES_ARRAY = Bytes.of("test byte[]".getBytes(US_ASCII)); - private static final Bytes BYTES_ARRAY_OFFSET = Bytes.of( - "---test byte[] with offset and length---".getBytes(US_ASCII), 3, 34); + private static final Bytes BYTES_ARRAY_OFFSET = + Bytes.of("---test byte[] with offset and length---".getBytes(US_ASCII), 3, 34); @Test public void testToString() { @@ -65,6 +65,10 @@ public void testBeginsWith() { assertTrue(BYTES_STRING.beginsWith(BYTES_EMPTY)); assertTrue(BYTES_STRING_CHARSET.beginsWith(BYTES_STRING)); assertFalse(BYTES_STRING.beginsWith(BYTES_STRING_CHARSET)); + assertFalse(BYTES_CHARSEQ.beginsWith(BYTES_STRING)); + assertFalse(Bytes.of("abcdef").beginsWith(Bytes.of("Abcd"))); + assertFalse(Bytes.of("abcdef").beginsWith(Bytes.of("abcD"))); + assertFalse(Bytes.of("abcdef").beginsWith(Bytes.of("abCd"))); } @Test @@ -74,6 +78,10 @@ public void testEndsWith() { assertTrue(BYTES_STRING.endsWith(BYTES_EMPTY)); assertTrue(BYTES_STRING.endsWith(Bytes.of("ing"))); assertFalse(Bytes.of("ing").endsWith(BYTES_STRING)); + assertFalse(BYTES_CHARSEQ.endsWith(BYTES_STRING)); + assertFalse(Bytes.of("abcdef").endsWith(Bytes.of("Cdef"))); + assertFalse(Bytes.of("abcdef").endsWith(Bytes.of("cdeF"))); + assertFalse(Bytes.of("abcdef").endsWith(Bytes.of("cdEf"))); } @Test @@ -108,15 +116,15 @@ public void testByteAt() { } try { - b.byteAt(b.length()); - fail("Previous line should have failed"); + int a = b.byteAt(b.length()); + fail("Previous line should have failed; byte: " + a); } catch (IndexOutOfBoundsException e) { // this is expected } try { - b.byteAt(-1); - fail("Previous line should have failed"); + int a = b.byteAt(-1); + fail("Previous line should have failed; byte: " + a); } catch (IndexOutOfBoundsException e) { // this is expected } From 42ec654a28d072c9a189016ab8bc77ca5b958621 Mon Sep 17 00:00:00 2001 From: Christopher Tubbs Date: Wed, 18 Oct 2017 17:12:43 -0400 Subject: [PATCH 7/8] WIP --- pom.xml | 34 ++++++++++ .../apache/bytes/AbstractByteSequence.java | 4 +- .../java/org/apache/bytes/ByteSequence.java | 16 +++++ src/main/java/org/apache/bytes/Bytes.java | 67 ++++++++++++++----- .../java/org/apache/bytes/BytesBuilder.java | 12 ++++ src/test/java/org/apache/bytes/BytesTest.java | 12 ++-- 6 files changed, 121 insertions(+), 24 deletions(-) diff --git a/pom.xml b/pom.xml index 87595d3..f72f2d0 100644 --- a/pom.xml +++ b/pom.xml @@ -82,9 +82,43 @@ + + net.revelc.code.formatter + formatter-maven-plugin + + + true + + + + net.revelc.code + formatter-maven-plugin + 0.5.2 + + LF + true + ${formatter.config} + + + + org.apache.fluo + build-resources + ${build-resources.version} + + + + + format-source + + format + + process-sources + + + org.apache.maven.plugins maven-checkstyle-plugin diff --git a/src/main/java/org/apache/bytes/AbstractByteSequence.java b/src/main/java/org/apache/bytes/AbstractByteSequence.java index 8d46432..c0080ca 100644 --- a/src/main/java/org/apache/bytes/AbstractByteSequence.java +++ b/src/main/java/org/apache/bytes/AbstractByteSequence.java @@ -49,8 +49,8 @@ protected void checkBounds(int i) { protected void checkBounds(int begin, int end) { if (begin > end) { - throw new IndexOutOfBoundsException( - "End position (" + end + ") occurs before begin position (" + begin + ")"); + throw new IndexOutOfBoundsException("End position (" + end + + ") occurs before begin position (" + begin + ")"); } checkBounds(begin, "Begin position"); checkBounds(begin, "End position"); diff --git a/src/main/java/org/apache/bytes/ByteSequence.java b/src/main/java/org/apache/bytes/ByteSequence.java index 7bcd2f6..6a3029b 100644 --- a/src/main/java/org/apache/bytes/ByteSequence.java +++ b/src/main/java/org/apache/bytes/ByteSequence.java @@ -62,4 +62,20 @@ public interface ByteSequence extends Iterable { */ IntStream intStream(); + /** + * Compares this sequence with the provided byte array using a lexicographical comparison. + * + * @param bytes the byte array with which to compare this sequence + * @return a value following the same conventions as {@link Comparable#compareTo(Object)} + */ + int compareTo(byte[] bytes); + + /** + * Determines if the contents of this byte sequence is equivalent to the content of the provided + * byte array. + * + * @param bytes the byte array with which to compare this sequence + * @return true if the bytes they represent are the same + */ + boolean contentEquals(byte[] bytes); } diff --git a/src/main/java/org/apache/bytes/Bytes.java b/src/main/java/org/apache/bytes/Bytes.java index d058338..855bf8b 100644 --- a/src/main/java/org/apache/bytes/Bytes.java +++ b/src/main/java/org/apache/bytes/Bytes.java @@ -185,17 +185,22 @@ public final int compareTo(Bytes other) { if (this == other) { return 0; } else { - int minLen = Math.min(length(), other.length()); - for (int i = 0, j = 0; i < minLen; i++, j++) { - int a = (this.data[i] & 0xff); - int b = (other.data[j] & 0xff); + return compareTo(other.data); + } + } - if (a != b) { - return a - b; - } + @Override + public int compareTo(byte[] bytes) { + int minLen = Math.min(length(), bytes.length); + for (int i = 0, j = 0; i < minLen; i++, j++) { + int a = (this.data[i] & 0xff); + int b = (bytes[j] & 0xff); + + if (a != b) { + return a - b; } - return length() - other.length(); } + return length() - bytes.length; } /** @@ -204,12 +209,41 @@ public final int compareTo(Bytes other) { */ @Override public final boolean equals(Object other) { - // TODO consider potential optimizations: - // 1. use hashcode if computed for both objects already, since we only compute hashcode once and - // store it - // 2. compare last byte or last 2 bytes first, to quickly see if they are different at the end; - // very helpful for sorted data - return this == other || ((other instanceof Bytes) && Arrays.equals(data, ((Bytes) other).data)); + if (this == other) { + return true; + } + if (other instanceof Bytes) { + Bytes otherBytes = (Bytes) other; + if (hashCode != 0 && otherBytes.hashCode != 0 && hashCode != otherBytes.hashCode) { + // if both hashCodes have been pre-computed (by calling hashCode(), and fail to match, then + // they can't be equal + return false; + } + return contentEquals(otherBytes.data); + } + return false; + } + + @Override + public boolean contentEquals(byte[] bytes) { + if (data.length != bytes.length) { + // can't be equal if they differ in length; this is checked again in Arrays.equals, but we + // check here because it's a prerequisite for the last byte + // comparison optimization below + return false; + } + if (data.length == 0 && bytes.length == 0) { + // both are empty + return true; + } + int lastByte = data.length - 1; + if (data[lastByte] != bytes[lastByte]) { + // at this point, both byte arrays are non-zero and the same length; quickly compare last byte + // before checking the full array; this is particularly + // helpful for sorted data which have long prefixes in common + return false; + } + return Arrays.equals(data, bytes); } @Override @@ -260,8 +294,9 @@ public static final Bytes of(ByteBuffer bb) { } byte[] data; if (bb.hasArray()) { - data = Arrays.copyOfRange(bb.array(), bb.position() + bb.arrayOffset(), - bb.limit() + bb.arrayOffset()); + data = + Arrays.copyOfRange(bb.array(), bb.position() + bb.arrayOffset(), + bb.limit() + bb.arrayOffset()); } else { data = new byte[bb.remaining()]; // duplicate so that it does not change position diff --git a/src/main/java/org/apache/bytes/BytesBuilder.java b/src/main/java/org/apache/bytes/BytesBuilder.java index 33517d2..d5d8d2d 100644 --- a/src/main/java/org/apache/bytes/BytesBuilder.java +++ b/src/main/java/org/apache/bytes/BytesBuilder.java @@ -210,4 +210,16 @@ public BytesBuilder subSequence(int begin, int end) { return new BytesBuilder(size).append(ba, begin, size); } + @Override + public int compareTo(byte[] bytes) { + // TODO Auto-generated method stub + return 0; + } + + @Override + public boolean contentEquals(byte[] bytes) { + // TODO Auto-generated method stub + return false; + } + } diff --git a/src/test/java/org/apache/bytes/BytesTest.java b/src/test/java/org/apache/bytes/BytesTest.java index 43d179d..5806728 100644 --- a/src/test/java/org/apache/bytes/BytesTest.java +++ b/src/test/java/org/apache/bytes/BytesTest.java @@ -38,13 +38,13 @@ public class BytesTest { private static final Bytes BYTES_STRING = Bytes.of("test String"); private static final Bytes BYTES_STRING_CHARSET = Bytes.of("test String with Charset", US_ASCII); private static final Bytes BYTES_CHARSEQ = Bytes.of(new StringBuilder("test CharSequence")); - private static final Bytes BYTES_CHARSEQ_CHARSET = - Bytes.of(new StringBuilder("test CharSequence with Charset"), US_ASCII); - private static final Bytes BYTES_BB = - Bytes.of(ByteBuffer.wrap("test ByteBuffer".getBytes(US_ASCII))); + private static final Bytes BYTES_CHARSEQ_CHARSET = Bytes.of(new StringBuilder( + "test CharSequence with Charset"), US_ASCII); + private static final Bytes BYTES_BB = Bytes.of(ByteBuffer.wrap("test ByteBuffer" + .getBytes(US_ASCII))); private static final Bytes BYTES_ARRAY = Bytes.of("test byte[]".getBytes(US_ASCII)); - private static final Bytes BYTES_ARRAY_OFFSET = - Bytes.of("---test byte[] with offset and length---".getBytes(US_ASCII), 3, 34); + private static final Bytes BYTES_ARRAY_OFFSET = Bytes.of( + "---test byte[] with offset and length---".getBytes(US_ASCII), 3, 34); @Test public void testToString() { From db0c89f19298b101109f7249566e058c6dc75add Mon Sep 17 00:00:00 2001 From: Lapo Luchini Date: Tue, 4 Dec 2018 00:52:26 +0100 Subject: [PATCH 8/8] Clarify Javadoc for Bytes.toString() Update Javadoc for Bytes.toString() method to clarify the strategy for constructing the Java String is to decode the Bytes with UTF-8. --- src/main/java/org/apache/bytes/Bytes.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/bytes/Bytes.java b/src/main/java/org/apache/bytes/Bytes.java index 855bf8b..c76d125 100644 --- a/src/main/java/org/apache/bytes/Bytes.java +++ b/src/main/java/org/apache/bytes/Bytes.java @@ -129,7 +129,7 @@ public String toString(Charset charset) { } /** - * Creates UTF-8 String using Bytes data + * Provides a String representation, decoding the bytes as UTF-8 */ @Override public String toString() {