diff --git a/README.md b/README.md index c46b61c49e..49c2061226 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,73 @@ under the License. --> -# Arrow Java +# Siren fork of Arrow Java + +The properties `drill.enable_unsafe_memory_access` and +`arrow.enable_unsafe_memory_access` are prefixed with `siren` and their +default value is set to `true`. The first property is deprecated. + +## Check that Arrow uses Unsafe class to access off-heap memory for memory allocation +In order to check that Arrow uses Unsafe class for memory allocation, run the unit test `CheckArrowTest` in +`https://github.com/sirensolutions/siren-platform/blob/master/core/src/test/java/io/siren/federate/core/common/CheckArrowTest.java`. + +## Build + +To build the `memory`, `format`, `vector` and `algorithm` modules: + +```sh +$ cd java +$ mvn clean package +``` + +Because of the default value change of `unsafe_memory_access` property, some +tests in `vector` fail. + +```sh +mvn -pl memory,memory/memory-core,memory/memory-unsafe,format,vector,algorithm install -Dsiren.arrow.enable_unsafe_memory_access=false -Dsiren.drill.enable_unsafe_memory_access=false +``` + +## Make a new release of Siren's Apache Arrow + +- Tests should pass. + +- Make a new version: + +```sh +mvn versions:set -DnewVersion=siren-0.14.1-2 +``` + +- tag the commit for the release + +```sh +git tag --sign siren-0.14.1-2 +```` + +- Deploy to Siren's Google Artifact Registry: +```sh +# Deploy all modules (format, memory, vector, algorithm) +$ mvn -pl memory,memory/memory-core,memory/memory-unsafe,format,vector,algorithm deploy \ + -Dsiren.arrow.enable_unsafe_memory_access=false \ + -Dsiren.drill.enable_unsafe_memory_access=false \ + -DskipTests \ + -DaltDeploymentRepository=gar-maven-local-siren-snapshot::default::artifactregistry://europe-west1-maven.pkg.dev/siren-cicd/maven-local-siren-snapshot + +# Deploy the parent POM +$ mvn deploy:deploy-file \ + -Durl=artifactregistry://europe-west1-maven.pkg.dev/siren-cicd/maven-local-siren-snapshot \ + -DpomFile=pom.xml -Dfile=pom.xml \ + -DgroupId=org.apache.arrow \ + -DartifactId=arrow-java-root \ + -Dversion=siren-19.0.0-1-SNAPSHOT \ + -Dpackaging=pom +``` +## Update to a new version of Siren's Apache Arrow +Developer tips on updating to a new version of Arrow can be found here: https://sirensolutions.atlassian.net/wiki/spaces/EN/pages/3108864001/Upgrading+Federate+Apache+Arrow+Version . + +- add `git@github.com:apache/arrow-java.git` as the `upstream` remote. +- execute `git fetch --all --tags` +- create a temporary branch from `siren-changes` +- rebase against the new tag. ## Getting Started diff --git a/algorithm/pom.xml b/algorithm/pom.xml index 898c2605b6..90b8f2a2c0 100644 --- a/algorithm/pom.xml +++ b/algorithm/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 19.0.0-SNAPSHOT + siren-19.0.0-1-SNAPSHOT arrow-algorithm Arrow Algorithms @@ -32,7 +32,8 @@ under the License. org.apache.arrow arrow-vector - ${arrow.vector.classifier} + ${project.version} + shade org.apache.arrow @@ -41,18 +42,10 @@ under the License. test-jar test - - org.apache.arrow - arrow-memory-core - - - org.apache.arrow - arrow-memory-netty - test - org.immutables value-annotations + diff --git a/algorithm/src/main/java/module-info.java b/algorithm/src/main/java/module-info.java index b347f55aa4..bb4f0f6afc 100644 --- a/algorithm/src/main/java/module-info.java +++ b/algorithm/src/main/java/module-info.java @@ -15,7 +15,7 @@ * limitations under the License. */ -module org.apache.arrow.algorithm { +module arrow.algorithm.siren { exports org.apache.arrow.algorithm.search; exports org.apache.arrow.algorithm.deduplicate; exports org.apache.arrow.algorithm.dictionary; @@ -24,6 +24,5 @@ exports org.apache.arrow.algorithm.sort; requires jdk.unsupported; - requires org.apache.arrow.memory.core; - requires org.apache.arrow.vector; + requires arrow.vector.siren; } diff --git a/format/pom.xml b/format/pom.xml index d3578b63d2..f0ade14f58 100644 --- a/format/pom.xml +++ b/format/pom.xml @@ -23,7 +23,7 @@ under the License. org.apache.arrow arrow-java-root - 19.0.0-SNAPSHOT + siren-19.0.0-1-SNAPSHOT arrow-format diff --git a/gandiva/pom.xml b/gandiva/pom.xml index 5367bfdedf..bc26fa110d 100644 --- a/gandiva/pom.xml +++ b/gandiva/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 19.0.0-SNAPSHOT + siren-19.0.0-1-SNAPSHOT org.apache.arrow.gandiva @@ -40,6 +40,7 @@ under the License. org.apache.arrow arrow-memory-core + ${project.version org.immutables @@ -66,6 +67,7 @@ under the License. org.slf4j slf4j-api + provided diff --git a/memory/memory-core/pom.xml b/memory/memory-core/pom.xml index 72ee69d60a..580df913dc 100644 --- a/memory/memory-core/pom.xml +++ b/memory/memory-core/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 19.0.0-SNAPSHOT + siren-19.0.0-1-SNAPSHOT arrow-memory-core @@ -39,6 +39,7 @@ under the License. org.slf4j slf4j-api + provided org.immutables diff --git a/memory/memory-core/src/main/java/org/apache/arrow/memory/BoundsChecking.java b/memory/memory-core/src/main/java/org/apache/arrow/memory/BoundsChecking.java index 50be9ad1fb..fcd9fbebf1 100644 --- a/memory/memory-core/src/main/java/org/apache/arrow/memory/BoundsChecking.java +++ b/memory/memory-core/src/main/java/org/apache/arrow/memory/BoundsChecking.java @@ -31,16 +31,18 @@ public class BoundsChecking { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BoundsChecking.class); static { - String envProperty = System.getenv("ARROW_ENABLE_UNSAFE_MEMORY_ACCESS"); - String oldProperty = System.getProperty("drill.enable_unsafe_memory_access"); + String envProperty = + System.getenv().getOrDefault("SIREN_ARROW_ENABLE_UNSAFE_MEMORY_ACCESS", "true"); + String oldProperty = System.getProperty("siren.drill.enable_unsafe_memory_access", "true"); if (oldProperty != null) { logger.warn( - "\"drill.enable_unsafe_memory_access\" has been renamed to \"arrow.enable_unsafe_memory_access\""); + "\"siren.drill.enable_unsafe_memory_access\" has been renamed to " + + "\"siren.arrow.enable_unsafe_memory_access\""); logger.warn( - "\"arrow.enable_unsafe_memory_access\" can be set to: " - + " true (to not check) or false (to check, default)"); + "\"siren.arrow.enable_unsafe_memory_access\" can be set to: " + + " true (to not check, default) or false (to check)"); } - String newProperty = System.getProperty("arrow.enable_unsafe_memory_access"); + String newProperty = System.getProperty("siren.arrow.enable_unsafe_memory_access", "true"); // The priority of determining the unsafe flag: // 1. The system properties take precedence over the environmental variable. diff --git a/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java b/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java index 91bd7cd905..e46c5912f3 100644 --- a/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java +++ b/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java @@ -34,10 +34,10 @@ public class MemoryUtil { private static final @Nullable Constructor DIRECT_BUFFER_CONSTRUCTOR; /** The unsafe object from which to access the off-heap memory. */ - private static final Unsafe UNSAFE; + public static final Unsafe UNSAFE; /** The start offset of array data relative to the start address of the array object. */ - private static final long BYTE_ARRAY_BASE_OFFSET; + public static final long BYTE_ARRAY_BASE_OFFSET; /** The offset of the address field with the {@link java.nio.ByteBuffer} object. */ private static final long BYTE_BUFFER_ADDRESS_OFFSET; @@ -82,7 +82,6 @@ public Object run() { // get the offset of the address field in a java.nio.Buffer object Field addressField = java.nio.Buffer.class.getDeclaredField("address"); - addressField.setAccessible(true); BYTE_BUFFER_ADDRESS_OFFSET = UNSAFE.objectFieldOffset(addressField); Constructor directBufferConstructor; @@ -103,10 +102,7 @@ public Object run() { constructor.setAccessible(true); logger.debug("Constructor for direct buffer found and made accessible"); return constructor; - } catch (NoSuchMethodException e) { - logger.debug("Cannot get constructor for direct buffer allocation", e); - return e; - } catch (SecurityException e) { + } catch (Exception e) { logger.debug("Cannot get constructor for direct buffer allocation", e); return e; } diff --git a/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBoundaryChecking.java b/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBoundaryChecking.java index 7d4d1e7b67..a3bb6c5b55 100644 --- a/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBoundaryChecking.java +++ b/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBoundaryChecking.java @@ -17,7 +17,6 @@ package org.apache.arrow.memory; import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; import java.lang.reflect.Field; import java.net.URLClassLoader; @@ -55,15 +54,15 @@ private boolean getFlagValue(ClassLoader classLoader) throws Exception { } /** - * Ensure the flag for bounds checking is enabled by default. This will protect users from JVM - * crashes. + * Siren: Ensure the flag for bounds checking is disabled by default. Enabling it will protect + * users from JVM crashes. */ @Test public void testDefaultValue() throws Exception { ClassLoader classLoader = copyClassLoader(); if (classLoader != null) { boolean boundsCheckingEnabled = getFlagValue(classLoader); - assertTrue(boundsCheckingEnabled); + assertFalse(boundsCheckingEnabled); } } diff --git a/memory/memory-core/src/test/java/org/apache/arrow/memory/TestOpens.java b/memory/memory-core/src/test/java/org/apache/arrow/memory/TestOpens.java index b5e0a71e7e..cb0b5471b1 100644 --- a/memory/memory-core/src/test/java/org/apache/arrow/memory/TestOpens.java +++ b/memory/memory-core/src/test/java/org/apache/arrow/memory/TestOpens.java @@ -26,7 +26,7 @@ public class TestOpens { /** Instantiating the RootAllocator should poke MemoryUtil and fail. */ @Test - @EnabledForJreRange(min = JAVA_16) + @EnabledForJreRange(max = JAVA_16) public void testMemoryUtilFailsLoudly() { // This test is configured by Maven to run WITHOUT add-opens. So this should fail on JDK16+ // (where JEP396 means that add-opens is required to access JDK internals). diff --git a/memory/memory-netty-buffer-patch/pom.xml b/memory/memory-netty-buffer-patch/pom.xml index 07dc7d2403..8e875b804f 100644 --- a/memory/memory-netty-buffer-patch/pom.xml +++ b/memory/memory-netty-buffer-patch/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 19.0.0-SNAPSHOT + siren-19.0.0-1-SNAPSHOT arrow-memory-netty-buffer-patch @@ -33,6 +33,7 @@ under the License. org.apache.arrow arrow-memory-core + ${project.version} io.netty diff --git a/memory/memory-netty/pom.xml b/memory/memory-netty/pom.xml index 6d660da117..f9c896408a 100644 --- a/memory/memory-netty/pom.xml +++ b/memory/memory-netty/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 19.0.0-SNAPSHOT + siren-19.0.0-1-SNAPSHOT arrow-memory-netty @@ -33,6 +33,7 @@ under the License. org.apache.arrow arrow-memory-core + ${project.version} org.apache.arrow diff --git a/memory/memory-unsafe/pom.xml b/memory/memory-unsafe/pom.xml index 92dc0c9fe5..655afe2b0d 100644 --- a/memory/memory-unsafe/pom.xml +++ b/memory/memory-unsafe/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-memory - 19.0.0-SNAPSHOT + siren-19.0.0-1-SNAPSHOT arrow-memory-unsafe @@ -33,6 +33,7 @@ under the License. org.apache.arrow arrow-memory-core + ${project.version} org.immutables diff --git a/memory/pom.xml b/memory/pom.xml index bc34c26050..46807ca393 100644 --- a/memory/pom.xml +++ b/memory/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 19.0.0-SNAPSHOT + siren-19.0.0-1-SNAPSHOT arrow-memory pom diff --git a/pom.xml b/pom.xml index 777f7d5cef..6eb7bfd3c9 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ under the License. org.apache.arrow arrow-java-root - 19.0.0-SNAPSHOT + siren-19.0.0-1-SNAPSHOT pom Apache Arrow Java Root POM @@ -65,17 +65,17 @@ under the License. - bom + format memory vector - tools - adapter/jdbc - flight - performance + + + + algorithm - adapter/avro - compression + + @@ -128,18 +128,22 @@ under the License. We have to wait new maven-jar-plugin release, and a new Apache POM release providing it --> 3.2.2 + + 19.0.0 + org.checkerframework checker-qual @@ -308,6 +312,8 @@ under the License. true false + + ${jpms.module.version} org.immutables @@ -693,7 +699,7 @@ under the License. verify true - true + false javax.annotation:javax.annotation-api:* @@ -767,6 +773,11 @@ under the License. os-maven-plugin 1.7.1 + + com.google.cloud.artifactregistry + artifactregistry-maven-wagon + 2.1.0 + @@ -1066,7 +1077,6 @@ under the License. cmake --build cpp-jni --target install --config Release ../ - ${cpp.dependencies.builded} @@ -1272,5 +1282,22 @@ under the License. + + + artifactory + + + + artifact-registry + gar-releases + artifactregistry://europe-west1-maven.pkg.dev/siren-cicd/maven-local-siren-release + + + artifact-registry + gar-snapshots + artifactregistry://europe-west1-maven.pkg.dev/siren-cicd/maven-local-siren-snapshot + + + diff --git a/vector/pom.xml b/vector/pom.xml index 89e9779008..453b8bea11 100644 --- a/vector/pom.xml +++ b/vector/pom.xml @@ -22,7 +22,7 @@ under the License. org.apache.arrow arrow-java-root - 19.0.0-SNAPSHOT + siren-19.0.0-1-SNAPSHOT arrow-vector Arrow Vectors @@ -32,10 +32,12 @@ under the License. org.apache.arrow arrow-format + ${project.version} org.apache.arrow arrow-memory-core + ${project.version} org.immutables @@ -62,15 +64,18 @@ under the License. commons-codec 1.20.0 + org.apache.arrow arrow-memory-unsafe - test + ${project.version} com.google.flatbuffers @@ -149,6 +154,7 @@ under the License. org.apache.maven.plugins maven-shade-plugin + 3.3.0 @@ -158,13 +164,17 @@ under the License. + org.apache.arrow:arrow-memory-core + org.apache.arrow:arrow-memory-unsafe org.apache.arrow:arrow-format com.google.flatbuffers:* false true - shade-format-flatbuffers + shade + true + true com.google.flatbuffers