diff --git a/.gitignore b/.gitignore index bb9686ae629..e8d186e0849 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,7 @@ src/main/webapp/resources/images/dataverseproject.png.thumb140 /conf/keycloak/docker-dev-volumes /docker-dev-volumes /.vs + +# Personal Docker Compose overrides for local development +# (use with: docker compose -f docker-compose-dev.yml -f docker-compose.override.yml ...) +/docker-compose.override.yml diff --git a/doc/release-notes/10156-fast-redeploy-scripts.md b/doc/release-notes/10156-fast-redeploy-scripts.md deleted file mode 100644 index 2d53ba13ab8..00000000000 --- a/doc/release-notes/10156-fast-redeploy-scripts.md +++ /dev/null @@ -1,42 +0,0 @@ -## Fast Redeploy Scripts for Container-Based Development - -Three new shell scripts in `scripts/dev/` enable fast iterative development for Dataverse contributors working with the container-based development environment: - -- **`dev-start-frd.sh`**: One-time setup (full build → exploded WAR → start containers) -- **`dev-frd.sh`**: Incremental recompile + redeploy (~12s vs. ~54s for traditional full rebuild workflow, 4.5x faster) -- **`dev-down-frd.sh`**: Clean shutdown of dev environment - -This command-line workflow provides a fast feedback loop for developers who prefer CLI-based development or use lightweight editors like VS Code or Vim, complementing the existing IDE-based hot reload options (IntelliJ Ultimate, NetBeans). - -### New Files - -- **`docker-compose.override.yml`**: Increases memory limits to 8GB (from the 2GB limit set for GitHub Actions CI) for local development. Automatically used by the fast-redeploy scripts. - -### Key Features - -- No infrastructure changes (works with existing docker-compose-dev.yml) -- Optional workflow (doesn't affect other development approaches) -- Completes in ~12 seconds instead of ~54 seconds after code changes (4.5x faster) - -**Note:** Performance timings may vary depending on your hardware configuration. - -### Typical Workflow - -```bash -# One-time setup -./scripts/dev/dev-start-frd.sh - -# Make code changes... - -# Fast redeploy -./scripts/dev/dev-frd.sh - -# Repeat as needed - -# When finished -./scripts/dev/dev-down-frd.sh -``` - -### Documentation - -See the [Fast Redeploy (Command-Line)](https://guides.dataverse.org/en/latest/container/dev-usage.html#dev-fast-redeploy) section in the Container Guide for complete usage instructions and limitations. diff --git a/doc/release-notes/10156-fast-redeploy.md b/doc/release-notes/10156-fast-redeploy.md new file mode 100644 index 00000000000..7b78d161f21 --- /dev/null +++ b/doc/release-notes/10156-fast-redeploy.md @@ -0,0 +1,56 @@ +## Faster Deployments and a Fast Redeploy Workflow for Container-Based Development + +### Tables Are Now Only Created When Missing (Faster Deployments for Everyone) + +The application no longer runs EclipseLink DDL generation (`eclipselink.ddl-generation=create-tables`) on every +deployment. Instead, a quick check at startup detects whether any entity tables are missing from the database +(first boot on an empty database, or newly added entities) and only then creates them - using the same EclipseLink +schema framework as before, so the semantics are unchanged. On all other (re)deployments, table creation is skipped +entirely, which noticeably speeds up deployment - in containers, classic installations and development environments +alike. Incremental schema changes continue to be handled by Flyway migrations on every startup, as before. + +### Fast Redeploy for Container-Based Development + +Container-based development gains a portable, Maven-based fast-redeploy workflow that works on any +platform and with any editor or IDE: + +```bash +# Start the dev environment the usual way +mvn -Pct clean package docker:run + +# Make code changes, then hot-redeploy them into the running container in ~10-15 seconds +mvn -Pfrd package + +# Repeat as needed; when finished, stop the environment the usual way +mvn -Pct docker:stop +``` + +`mvn -Pfrd package` incrementally compiles your changes, refreshes the exploded WAR at `target/dataverse` (bind +mounted into the application container) and makes Payara hot-redeploy it - no container restarts, no image rebuilds. +Flyway migrations run on every redeploy, and tables for newly added entities are created automatically (see above). + +### Metadata Blocks and Solr Schema Updates in the Dev Environment + +A new one-shot service `dev_metadata_update` in `docker-compose-dev.yml` keeps a running dev instance in sync with +the metadata block TSV files in your working tree: on every start of the stack (and on demand via +`docker start -a dev_metadata_update`) it reloads the standard metadata blocks and +updates the Solr schema of an already bootstrapped instance. Previously, TSV changes were only picked up when +bootstrapping a fresh database. The `dev_bootstrap` service now also uses the TSV files from your working tree +instead of the ones baked into the config baker image. This is backed by a new `update-metadata.sh` script in the +config baker image. + +### Memory Configuration of the Dev Environment + +The application container in `docker-compose-dev.yml` now runs with a 6 GiB memory limit (previously 2.5 GiB). +The old limit could not support the hot-redeploy workflow: each redeploy retains some memory in the running server +(roughly 150-200 MiB, mostly classloader leftovers), and measurements showed the container being OOM-killed by the +kernel after only about 3 redeploys at 2.5 GiB. At 6 GiB, well over 20 consecutive redeploys have been verified. +Since a limit is not a reservation (an idle instance uses about 1.5 GiB), this does not increase the baseline +footprint. The `docker-compose.override.yml` file name is gitignored and documented as the place for +personal local overrides. + +### Documentation + +See the [Fast Redeploy (Command-Line)](https://guides.dataverse.org/en/latest/container/dev-usage.html#dev-fast-redeploy) section in the Container Guide for complete usage instructions and limitations. + +See also #10156 and #11961. diff --git a/doc/sphinx-guides/source/container/configbaker-image.rst b/doc/sphinx-guides/source/container/configbaker-image.rst index 587c20f1014..d3eb3f23a37 100644 --- a/doc/sphinx-guides/source/container/configbaker-image.rst +++ b/doc/sphinx-guides/source/container/configbaker-image.rst @@ -109,6 +109,10 @@ Scripts * - ``solr-driver.sh`` - Automate updates to a ``schema.xml`` in a Solr Core. Either run in watch mode or as a oneshot script. See ``solr-driver.sh -h`` for usage details. Best used as a sidecar or a one-off job. + * - ``update-metadata.sh`` + - Update the standard metadata blocks and the Solr schema of a running, already bootstrapped instance by + reloading the metadata block TSV files. Idempotent, best used as a one-off job. + See ``update-metadata.sh -h`` for usage details and :ref:`dev-fast-redeploy` for an example use case. Solr Template ^^^^^^^^^^^^^ diff --git a/doc/sphinx-guides/source/container/dev-usage.rst b/doc/sphinx-guides/source/container/dev-usage.rst index 7b95b6c5823..4c8f5838d35 100644 --- a/doc/sphinx-guides/source/container/dev-usage.rst +++ b/doc/sphinx-guides/source/container/dev-usage.rst @@ -121,6 +121,18 @@ this console unopened. Note that data is persisted in ``./docker-dev-volumes`` in the root of the Git repo. For a clean start, you should remove this directory before running the ``mvn`` commands above. +The application container runs with a memory limit of 6 GiB by default (see ``docker-compose-dev.yml``). This is +sized for iterative development: every hot redeploy of the application retains some memory in the running server +(roughly 150-200 MiB each, mostly classloader leftovers - a long-known Jakarta EE issue), and with the previous +2.5 GiB limit the container got OOM-killed by the kernel after only about 3 redeploys. With 6 GiB, well over 20 +redeploys in a row have been verified. Note that a limit is not a reservation: an idle instance uses about 1.5 GiB, +and usage only grows toward the limit during long redeploy sessions or heavy load. If you need to tweak this (or +anything else) for your local setup, put your personal overrides into a (gitignored) +``docker-compose.override.yml`` file and add it to your Compose commands: +``docker compose -f docker-compose-dev.yml -f docker-compose.override.yml up``. Note that because we are not using +the default Compose file name, the override file is *not* picked up automatically - neither by +``docker compose -f docker-compose-dev.yml ...`` nor by the Maven commands above. + .. _dev-logs: @@ -198,10 +210,11 @@ The safest and most reliable way to redeploy code is to stop the running contain Safe, but also slowing down the development cycle a lot. Triggering redeployment of changes using an IDE can greatly improve your feedback loop when changing code. -You have at least two options: +You have at least three options: #. Use builtin features of IDEs or `IDE plugins from Payara `_. #. Use a paid product like `JRebel `_. +#. Use the IDE independent, command-line based :ref:`dev-fast-redeploy` workflow. The main differences between the first and the second options are support for hot deploys of non-class files and limitations in what the JVM HotswapAgent can do for you. Find more details in a `blog article by JRebel `_. @@ -408,23 +421,20 @@ The steps below describe options to enable the later in different IDEs. Fast Redeploy (Command-Line) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -For developers who prefer command-line workflows over IDE integration, Dataverse provides scripts for fast iterative development without full container rebuilds. +For developers who prefer command-line workflows over IDE integration, the ``frd`` ("fast redeploy") Maven profile +enables fast iterative development without full container rebuilds. It is IDE and editor independent and platform +independent - the only requirements are Maven and the Docker CLI. **Initial Setup** -Run once per development session: +Start the dev environment the usual way (see :ref:`dev-run`): .. code-block:: bash - ./scripts/dev/dev-start-frd.sh - -This command: + mvn -Pct clean package docker:run -- Builds the full Dataverse WAR with ``mvn package`` -- Extracts it into ``target/dataverse/`` as an exploded WAR -- Configures JPA settings for development (``ddl-generation=none``) -- Starts the dev stack with ``SKIP_DEPLOY=1`` -- Manually deploys the application via ``asadmin`` +The application container deploys the application from the exploded WAR at ``target/dataverse``, which +``docker-compose-dev.yml`` bind mounts into the container - this is what makes the fast redeploy below possible. **Iterative Development** @@ -432,48 +442,54 @@ After making code changes, run: .. code-block:: bash - ./scripts/dev/dev-frd.sh + mvn -Pfrd package -This script: +This command: -- Compiles Java sources incrementally (``mvn compile``, ~5-10s) -- Syncs updated classes and webapp resources into the mounted exploded WAR -- Forces Payara to redeploy the application without restarting containers -- Key features: - - Skips full Maven rebuilds (only compiles changed Java files) - - Avoids container restarts (uses hot-redeployment) - - Completes in ~12 seconds vs. ~54s for traditional full rebuild workflow (4.5x faster) - - Preserves database state between deployments +- Compiles Java sources incrementally (only changed files) +- Refreshes the exploded WAR at ``target/dataverse`` with compiled classes and webapp resources (XHTML etc.) +- Makes Payara hot-redeploy the application inside the running container (via ``docker exec dev_dataverse redeploy.sh``), without restarting any containers -**Typical Workflow** +A redeploy completes in roughly 10-15 seconds, compared to about a minute for stopping the containers, rebuilding the +images and starting them again. (Performance varies a lot between machines, treat these numbers as a relative +comparison only.) -.. code-block:: bash +Database state is preserved between redeploys. And because a redeployment restarts the application, everything that +usually happens on application startup happens on every redeploy, too: new Flyway migration scripts under +``src/main/resources/db/migration`` are applied and tables for newly added JPA entities are created automatically. - # Start dev environment once - ./scripts/dev/dev-start-frd.sh +**Updating Metadata Blocks** - # Edit Java or XHTML files... +Changes to the standard metadata block TSV files under ``scripts/api/data/metadatablocks`` are not part of the +deployed application. Instead, the one-shot service ``dev_metadata_update`` loads them into the running instance and +updates the Solr schema accordingly, straight from your working tree. It runs automatically on every start of the +stack (both the Maven and the Compose variants) and can also be run on demand, without restarting anything: - # Fast redeploy - ./scripts/dev/dev-frd.sh +.. code-block:: bash - # Repeat as needed + docker start -a dev_metadata_update - # When finished, stop containers - ./scripts/dev/dev-down-frd.sh +If the changed fields affect data you already created, trigger a reindex with +``curl http://localhost:8080/api/admin/index`` afterwards. -**Memory Configuration** +**Stopping** -The fast-redeploy workflow includes ``docker-compose.override.yml`` that increases the memory limit to 8GB -(from the default 2GB limit set for GitHub Actions CI) which is insufficient for local Dataverse development. -The override file is automatically used by the scripts. +Stop the environment as usual with ``mvn -Pct docker:stop`` or ``docker compose -f docker-compose-dev.yml down``. +Your data is kept in ``docker-dev-volumes/`` either way. **Limitations** -- Does not update dependencies (run full ``mvn package`` + restart if ``pom.xml`` changes) -- Static resources (CSS, JS) may require browser cache clear -- For database schema changes, use ``dev-rebuild.sh`` instead -- Performance timings may vary depending on your hardware configuration +- Dependency changes in ``pom.xml`` require a full image rebuild and restart: ``mvn -Pct clean package docker:run``. +- Deleted source and webapp files linger in the exploded WAR until a full rebuild and restart (files are only added + and updated, never removed). +- Hot-redeployment reuses the running JVM, and each redeploy retains some memory (roughly 150-200 MiB, mostly + classloader leftovers). With the default 6 GiB memory limit there is room for roughly 25-30 redeploys - if the + application becomes slow or unresponsive after a long session, simply restart the stack. +- The OpenAPI schema is not regenerated on fast redeploys (the one from the last full build is kept). + +**Tip**: most of a no-change cycle is Payara's own redeployment (~8-10s); the Maven part is only 1-2 seconds. If you +want to shave off the JVM startup overhead of Maven itself, the ``frd`` profile works fine with the +`Maven daemon `_: ``mvnd -Pfrd package``. **Note**: This workflow complements IDE-based redeployment. Use whichever fits your development style. diff --git a/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst b/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst index 409242101b8..e6de646c3c1 100644 --- a/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst +++ b/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst @@ -17,7 +17,7 @@ In the past (before adopting Flyway) we used to keep SQL upgrade scripts in ``sc How to Determine if You Need to Create a SQL Upgrade Script ----------------------------------------------------------- -If you are creating a new database table (which maps to an ``@Entity`` in JPA), you do not need to create or update a SQL upgrade script. The reason for this is that we use ``create-tables`` in ``src/main/resources/META-INF/persistence.xml`` so that new tables are automatically created by the app server when you deploy your war file. +If you are creating a new database table (which maps to an ``@Entity`` in JPA), you do not need to create or update a SQL upgrade script. The reason for this is that missing tables are automatically created at startup when you deploy your war file (see the ``ConditionalSchemaCreator`` session event listener registered in ``src/main/resources/META-INF/persistence.xml``). If you are doing anything other than creating a new database table such as adding a column to an existing table, you must create or update a SQL upgrade script. diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index b24bf0ed6f6..d245d680df1 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -84,8 +84,12 @@ services: tmpfs: - /dumps:mode=770,size=2052M,uid=1000,gid=1000 - /tmp:mode=770,size=2052M,uid=1000,gid=1000 - mem_limit: 2684354560 # 2.5 GiB - mem_reservation: 1024m + # A reasonable default for local development and CI. + # For personal tweaks, use a local (gitignored) docker-compose.override.yml: + # https://docs.docker.com/compose/how-tos/multiple-compose-files/merge + # Note: keep these as raw byte values - the Docker Maven Plugin cannot parse "6g" etc. + mem_limit: 6442450944 # 6 GiB + mem_reservation: 2147483648 # 2 GiB privileged: false dev_bootstrap: @@ -99,6 +103,27 @@ services: - dataverse volumes: - ./docker-dev-volumes/solr/data:/var/solr + # Use the metadata block TSV files from your working tree (instead of the ones baked into + # the image), so a fresh instance is bootstrapped with your current state. + - ./scripts/api/data/metadatablocks:/scripts/bootstrap/base/data/metadatablocks:ro + + # On every "up", this one-shot service refreshes the standard metadata blocks and the Solr + # schema of an already bootstrapped instance from the TSV files in your working tree. + # (On a fresh instance it is a no-op - dev_bootstrap takes care of the initial load.) + # It can also be run on demand against a running stack, without a restart - this works no + # matter whether the stack was started via Maven or via Compose: + # docker start -a dev_metadata_update + dev_metadata_update: + container_name: "dev_metadata_update" + image: gdcc/configbaker:unstable + restart: "no" + command: + - update-metadata.sh + networks: + - dataverse + volumes: + - ./docker-dev-volumes/solr/data:/var/solr + - ./scripts/api/data/metadatablocks:/scripts/bootstrap/base/data/metadatablocks:ro dev_dv_initializer: container_name: "dev_dv_initializer" diff --git a/docker-compose.override.yml b/docker-compose.override.yml deleted file mode 100644 index 68ac8e778aa..00000000000 --- a/docker-compose.override.yml +++ /dev/null @@ -1,12 +0,0 @@ -version: "2.4" - -# Local development overrides for docker-compose-dev.yml -# This file is used by the fast-redeploy scripts (dev-start-frd.sh, dev-down-frd.sh) -# and can be used manually with: docker compose -f docker-compose-dev.yml -f docker-compose.override.yml up - -services: - dev_dataverse: - # Increase memory limits for local development - # (upstream has 2GB limit for GitHub Actions CI, which is too restrictive for local dev) - mem_limit: 8g - mem_reservation: 4g diff --git a/modules/container-configbaker/scripts/update-metadata.sh b/modules/container-configbaker/scripts/update-metadata.sh new file mode 100755 index 00000000000..bb70359280a --- /dev/null +++ b/modules/container-configbaker/scripts/update-metadata.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# [INFO]: Update the metadata blocks and Solr schema of a running, bootstrapped instance + +set -euo pipefail + +function usage() { + echo "Usage: $(basename "$0") [-h] [-u instanceUrl] [-s solrUrl] [-c solrCore] [-t timeout]" + echo "" + echo "Update the standard metadata blocks of a running, already bootstrapped Dataverse instance" + echo "by reloading the metadata block TSV files, then update the Solr schema to match." + echo "" + echo "Parameters:" + echo " instanceUrl - Location on container network where to reach your instance. Default: 'http://dataverse:8080'" + echo " solrUrl - Location on container network where to reach Solr. Default: 'http://solr:8983'" + echo " solrCore - Name of the Solr core to update and reload. Default: 'collection1'" + echo " timeout - How long to wait for the instance to become available. Default: '3m'" + echo "" + echo "Note: This script is a no-op on an instance that has not been bootstrapped yet (bootstrap.sh" + echo " loads the same TSV files anyway). It is idempotent and safe to run on every startup." + echo "" + exit 1 +} + +# Set some defaults as documented +DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} +SOLR_URL=${SOLR_URL:-"http://solr:8983"} +SOLR_CORE=${SOLR_CORE:-"collection1"} +TIMEOUT=${TIMEOUT:-"3m"} + +while getopts "u:s:c:t:h" OPTION +do + case "$OPTION" in + u) DATAVERSE_URL="$OPTARG" ;; + s) SOLR_URL="$OPTARG" ;; + c) SOLR_CORE="$OPTARG" ;; + t) TIMEOUT="$OPTARG" ;; + h) usage;; + \?) usage;; + esac +done + +# Export the URL to be reused in the setup scripts +export DATAVERSE_URL + +# Wait for the instance to become available +echo "Waiting for ${DATAVERSE_URL} to become ready in max ${TIMEOUT}." +wait4x http "${DATAVERSE_URL}/api/info/version" -i 8s -t "$TIMEOUT" --expect-status-code 200 --expect-body-json data.version + +# Only update an instance that has been bootstrapped before - on a fresh instance, bootstrap.sh +# loads the metadata blocks anyway, so there is nothing to update yet. +echo "Waiting for ${DATAVERSE_URL} to be bootstrapped in max ${TIMEOUT}." +if ! wait4x http "${DATAVERSE_URL}/api/metadatablocks" -i 8s -t "$TIMEOUT" --expect-status-code 200 --expect-body-json data.0; then + echo "Your instance has not been bootstrapped (yet?), skipping metadata update." + exit 0 +fi + +echo "Reloading standard metadata block TSV files..." +"${BOOTSTRAP_DIR}/base/setup-datasetfields.sh" +echo "" + +echo "Updating Solr schema for core ${SOLR_CORE} at ${SOLR_URL}..." +solr-driver.sh --mode oneshot --startup-check wait \ + --dataverse-url "${DATAVERSE_URL}" --solr-url "${SOLR_URL}" --core "${SOLR_CORE}" + +echo "" +echo "Done. If metadata fields were added or changed, you may want to reindex:" +echo " curl \"${DATAVERSE_URL}/api/admin/index\" (from within the container network)" diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 15763697c41..79421f77895 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -198,6 +198,7 @@ 3.2.2 1.6.13 1.7.0 + 3.5.0 0.48.0 @@ -266,6 +267,11 @@ maven-enforcer-plugin ${maven-enforcer-plugin.version} + + org.codehaus.mojo + exec-maven-plugin + ${exec-maven-plugin.version} + org.apache.maven.plugins maven-checkstyle-plugin diff --git a/pom.xml b/pom.xml index dd4d92c4f0d..0c10368a704 100644 --- a/pom.xml +++ b/pom.xml @@ -1358,6 +1358,81 @@ + + + frd + + true + + + + + org.apache.maven.plugins + maven-war-plugin + + + + default-war + none + + + + frd-exploded-war + prepare-package + + exploded + + + + + + io.smallrye + smallrye-open-api-maven-plugin + + + + default + none + + + + + org.codehaus.mojo + exec-maven-plugin + + + + frd-redeploy + package + + exec + + + docker + + exec + dev_dataverse + redeploy.sh + + + + + + + + external-search-get diff --git a/scripts/dev/dev-down-frd.sh b/scripts/dev/dev-down-frd.sh deleted file mode 100755 index 13eba9420f5..00000000000 --- a/scripts/dev/dev-down-frd.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Ensure we're in project root -cd "$(dirname "${BASH_SOURCE[0]}")/../.." - -echo "Stopping and removing dev containers..." - -# Use override file if it exists (for local customizations like memory limits) -if [ -f docker-compose.override.yml ]; then - docker compose -f docker-compose-dev.yml -f docker-compose.override.yml down -else - docker compose -f docker-compose-dev.yml down -fi - -echo "" -echo "✓ Dev environment stopped" -echo " To restart: ./scripts/dev/dev-start-frd.sh" -echo " To clean volumes: sudo rm -rf docker-dev-volumes/" diff --git a/scripts/dev/dev-frd.sh b/scripts/dev/dev-frd.sh deleted file mode 100755 index e7eacab661b..00000000000 --- a/scripts/dev/dev-frd.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -cd "$(dirname "${BASH_SOURCE[0]}")/../.." - -# Verify dev environment is running -if ! docker ps --filter "name=dev_dataverse" --filter "status=running" -q | grep -q .; then - echo "Error: dev_dataverse container not running." >&2 - echo "Run './scripts/dev/dev-start-frd.sh' first to set up the environment." >&2 - exit 1 -fi - -echo "Compiling Dataverse sources..." -mvn -T 1C -DskipTests -DskipUnitTests -DskipIntegrationTests compile >/dev/null - -if [ ! -d "target/classes" ]; then - echo "ERROR: target/classes missing after compile." >&2 - exit 1 -fi - -echo "Syncing compiled classes..." -# --delete removes files in dest not present in source -rsync -a --delete --exclude 'META-INF/persistence.xml' \ - target/classes/ target/dataverse/WEB-INF/classes/ - -if [ -d "src/main/webapp" ]; then - echo "Syncing webapp resources..." - rsync -a --delete \ - --exclude 'WEB-INF/classes' \ - --exclude 'WEB-INF/lib' \ - src/main/webapp/ target/dataverse/ -fi - -echo "Redeploying to Payara..." -docker exec dev_dataverse /bin/bash -lc ' - printf "AS_ADMIN_PASSWORD=%s\n" admin > /tmp/pwdfile; - asadmin --user admin --passwordfile /tmp/pwdfile \ - deploy --force --upload=false /opt/payara/deployments/dataverse 2>&1 \ - | grep -v "PER01001\|PER01003\|Command deploy completed with warnings"; - rm /tmp/pwdfile' - -echo "" -echo "✓ Fast redeploy complete (~12s)" -echo " Test your changes at http://localhost:8080" diff --git a/scripts/dev/dev-start-frd.sh b/scripts/dev/dev-start-frd.sh deleted file mode 100755 index d113f677bad..00000000000 --- a/scripts/dev/dev-start-frd.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Ensure we're in project root -cd "$(dirname "${BASH_SOURCE[0]}")/../.." - -echo "Building Dataverse WAR for fast redeploy..." -mvn -T 1C -DskipTests -DskipUnitTests -DskipIntegrationTests clean package - -echo "Extracting WAR into target/dataverse/..." -mkdir -p target/dataverse -unzip -oq target/dataverse-*.war -d target/dataverse/ - -# Check if database is already initialized (before creating directories) -# If postgres has initialized, the data dir will have restrictive permissions (0700) -# On first run, the directory either doesn't exist or has default permissions -DB_INITIALIZED=false -if [ -d "docker-dev-volumes/postgresql/data" ]; then - # Try to list the directory - if permission denied, it means postgres owns it (initialized) - if ! ls docker-dev-volumes/postgresql/data >/dev/null 2>&1; then - DB_INITIALIZED=true - fi -fi - -echo "Ensuring docker volume mount points exist..." -mkdir -p docker-dev-volumes/app/data -mkdir -p docker-dev-volumes/app/secrets -mkdir -p docker-dev-volumes/postgresql/data -mkdir -p docker-dev-volumes/solr/data -mkdir -p docker-dev-volumes/solr/conf -mkdir -p docker-dev-volumes/minio_storage - -# Only disable DDL generation if database is already initialized -# (on first run, we need create-tables to bootstrap the schema) -if [ "$DB_INITIALIZED" = true ]; then - echo "Detected existing database - disabling DDL generation to preserve schema..." - sed -i.bak 's/\(eclipselink.ddl-generation" value="\)create-tables/\1none/' \ - target/dataverse/WEB-INF/classes/META-INF/persistence.xml - rm -f target/dataverse/WEB-INF/classes/META-INF/persistence.xml.bak -else - echo "First-time setup detected - keeping DDL generation enabled for schema creation..." -fi - -echo "Starting dev stack (SKIP_DEPLOY=1)..." -export SKIP_DEPLOY=1 -# Use override file if it exists (for local customizations like memory limits) -if [ -f docker-compose.override.yml ]; then - docker compose -f docker-compose-dev.yml -f docker-compose.override.yml up -d -else - docker compose -f docker-compose-dev.yml up -d -fi - -echo "Waiting for Payara to be ready..." -until curl -sf http://localhost:8080/ >/dev/null 2>&1; do - sleep 2 -done - -echo "Deploying exploded WAR..." -docker exec dev_dataverse /bin/bash -lc ' - printf "AS_ADMIN_PASSWORD=%s\n" admin > /tmp/pwdfile; - asadmin --user admin --passwordfile /tmp/pwdfile \ - deploy --upload=false /opt/payara/deployments/dataverse 2>&1 \ - | grep -v "PER01001\|PER01003\|Command deploy completed with warnings"; - rm /tmp/pwdfile' - -echo "" -echo "✓ Fast redeploy environment ready!" -echo " Next: Make code changes, then run './scripts/dev/dev-frd.sh' to redeploy (~12s)" diff --git a/src/main/docker/scripts/redeploy.sh b/src/main/docker/scripts/redeploy.sh new file mode 100755 index 00000000000..9d3f9b5e6e3 --- /dev/null +++ b/src/main/docker/scripts/redeploy.sh @@ -0,0 +1,21 @@ +#!/bin/bash +################################################################################ +# Hot-redeploy the application from the exploded WAR at ${DEPLOY_DIR}/dataverse. +# +# Meant to be run inside a running application container, e.g. via +# "docker exec dev_dataverse redeploy.sh" - which is what "mvn -Pfrd package" +# does after refreshing the (bind mounted) exploded WAR. See the "Fast Redeploy" +# section of the container guide for details. +################################################################################ + +# Fail on any error +set -euo pipefail + +# These env vars are provided by the (base) image with sane defaults. +PASSWORD_FILE=$(mktemp) +trap 'rm -f "${PASSWORD_FILE}"' EXIT +echo "AS_ADMIN_PASSWORD=${PAYARA_ADMIN_PASSWORD}" > "${PASSWORD_FILE}" + +echo "Redeploying application from ${DEPLOY_DIR}/dataverse..." +"${PAYARA_DIR}/bin/asadmin" --user="${PAYARA_ADMIN_USER}" --passwordfile="${PASSWORD_FILE}" \ + deploy --force --upload=false "${DEPLOY_DIR}/dataverse" diff --git a/src/main/java/edu/harvard/iq/dataverse/persistence/ConditionalSchemaCreator.java b/src/main/java/edu/harvard/iq/dataverse/persistence/ConditionalSchemaCreator.java new file mode 100644 index 00000000000..d15d9947f77 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/persistence/ConditionalSchemaCreator.java @@ -0,0 +1,86 @@ +package edu.harvard.iq.dataverse.persistence; + +import org.eclipse.persistence.descriptors.ClassDescriptor; +import org.eclipse.persistence.internal.helper.DatabaseTable; +import org.eclipse.persistence.queries.DataReadQuery; +import org.eclipse.persistence.sessions.DatabaseSession; +import org.eclipse.persistence.sessions.Session; +import org.eclipse.persistence.sessions.SessionEvent; +import org.eclipse.persistence.sessions.SessionEventAdapter; +import org.eclipse.persistence.tools.schemaframework.SchemaManager; + +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; + +/** + * Creates missing entity tables right after EclipseLink logs in to the database, but only when + * needed. Registered in {@code META-INF/persistence.xml} via the + * {@code eclipselink.session-event-listener} property. + * + *

This replaces the former {@code eclipselink.ddl-generation=create-tables} setting, which made + * EclipseLink issue CREATE statements for every entity on every single deployment - a significant + * part of (re)deployment time. The semantics are preserved: on first boot against an empty + * database (or when a new entity has been added) the missing tables, indexes and sequences are + * created by the very same EclipseLink schema framework that {@code create-tables} uses, at the + * same point in the application lifecycle (after session login, before the application is used). + * When all entity tables already exist - the common case - table creation is skipped entirely at + * the cost of a single catalog query. + * + *

Note that just like {@code create-tables}, this never alters existing tables. Incremental + * schema changes are managed by Flyway, see + * {@link edu.harvard.iq.dataverse.flyway.StartupFlywayMigrator} and + * {@code src/main/resources/db/migration}. + */ +public class ConditionalSchemaCreator extends SessionEventAdapter { + + private static final Logger logger = Logger.getLogger(ConditionalSchemaCreator.class.getCanonicalName()); + + @Override + public void postLogin(SessionEvent event) { + Session session = event.getSession(); + + Set missingTables = getExpectedTables(session); + missingTables.removeAll(getExistingTables(session)); + + if (missingTables.isEmpty()) { + logger.fine("All entity tables present in the database, skipping DDL generation."); + return; + } + + logger.info("Found " + missingTables.size() + " entity table(s) missing from the database " + + "(empty database or newly added entities). Creating missing tables, indexes and sequences..."); + SchemaManager schemaManager = new SchemaManager((DatabaseSession) session); + schemaManager.createDefaultTables(true); + logger.info("Schema creation done."); + } + + /** + * All table names (lowercased) the entity mappings of this persistence unit expect to exist. + */ + private Set getExpectedTables(Session session) { + Set expectedTables = new HashSet<>(); + for (ClassDescriptor descriptor : session.getDescriptors().values()) { + for (DatabaseTable table : descriptor.getTables()) { + expectedTables.add(table.getName().toLowerCase()); + } + } + return expectedTables; + } + + /** + * All table names (lowercased) present in the current schema of the database we logged in to. + */ + private Set getExistingTables(Session session) { + DataReadQuery query = new DataReadQuery( + "SELECT lower(table_name) AS table_name FROM information_schema.tables WHERE table_schema = current_schema()"); + List rows = (List) session.executeQuery(query); + Set existingTables = new HashSet<>(); + for (Object row : rows) { + existingTables.add(String.valueOf(((Map) row).get("table_name"))); + } + return existingTables; + } +} diff --git a/src/main/resources/META-INF/persistence.xml b/src/main/resources/META-INF/persistence.xml index 151410c04c2..a691ea33f2f 100644 --- a/src/main/resources/META-INF/persistence.xml +++ b/src/main/resources/META-INF/persistence.xml @@ -8,10 +8,14 @@ - - + + +