diff --git a/.env.dist b/.env.dist index 3fd6ada..a058b21 100644 --- a/.env.dist +++ b/.env.dist @@ -5,6 +5,9 @@ DEBUG_LOGGING=false DEBUG_TOOLBAR=false +# Allow self-registration (users can sign up without an invitation) +ALLOW_SELF_REGISTRATION=false + # Encryption settings SECRET_KEY='${SECRET_KEY}' ENCRYPTION_KEY='${ENCRYPTION_KEY}' @@ -28,7 +31,7 @@ DATABASE_PASSWORD=hexa-app # Initial Django admin user DJANGO_SUPERUSER_USERNAME=root@openhexa.org DJANGO_SUPERUSER_EMAIL=root@openhexa.org -DJANGO_SUPERUSER_PASSWORD=root +DJANGO_SUPERUSER_PASSWORD=${DJANGO_SUPERUSER_PASSWORD} # Networking ############ @@ -125,9 +128,10 @@ WORKSPACE_BUCKET_PREFIX=hexa-test- # Storage backend to use for workspace files. # Options: -# - "fs" (local filesystem) -# - "gcp" (Google Cloud Storage) +# - "fs" (local filesystem) +# - "gcp" (Google Cloud Storage) # - "azure" (Azure Blob Storage) +# - "s3" (Amazon S3 or S3-compatible, e.g. MinIO) # Default: fs STORAGE_BACKEND=fs @@ -140,16 +144,19 @@ WORKSPACE_STORAGE_LOCATION=$WORKSPACE_STORAGE_LOCATION # Generate with: base64 -w 0 service-account-key.json WORKSPACE_STORAGE_BACKEND_GCS_SERVICE_ACCOUNT_KEY= -# # openssl rand -hex 16 -# WORKSPACE_STORAGE_ENGINE_AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID -# # openssl rand -base64 42 -# WORKSPACE_STORAGE_ENGINE_AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY -## AWS: To run it in AWS mode or in LocalHosting mode set the variable to s3 -WORKSPACE_STORAGE_BACKEND_AWS_ENDPOINT_URL= -WORKSPACE_STORAGE_BACKEND_AWS_PUBLIC_ENDPOINT_URL= -WORKSPACE_STORAGE_BACKEND_AWS_SECRET_ACCESS_KEY= +## S3 (STORAGE_BACKEND=s3): Amazon S3 or S3-compatible storage (e.g. MinIO) WORKSPACE_STORAGE_BACKEND_AWS_ACCESS_KEY_ID= +WORKSPACE_STORAGE_BACKEND_AWS_SECRET_ACCESS_KEY= +# Region where buckets will be created (default: eu-central-1) WORKSPACE_STORAGE_BACKEND_AWS_BUCKET_REGION= +# Internal endpoint for server-side API calls, set for MinIO or custom S3 (e.g. http://minio:9000) +WORKSPACE_STORAGE_BACKEND_AWS_ENDPOINT_URL= +# Public endpoint used in presigned URLs returned to clients: set when internal and public hosts differ +# Falls back to WORKSPACE_STORAGE_BACKEND_AWS_ENDPOINT_URL when not set +WORKSPACE_STORAGE_BACKEND_AWS_PUBLIC_ENDPOINT_URL= +# Optional IAM role ARN to assume when generating short-lived notebook credentials via STS. +# Falls back to static access key/secret when not set (required for MinIO or simple setups). +WORKSPACE_STORAGE_BACKEND_AWS_ROLE_ARN= WORKSPACE_BUCKET_REGION= @@ -163,3 +170,61 @@ WORKSPACE_BUCKET_REGION= # Bucket to store datasets for all workspaces WORKSPACE_DATASETS_BUCKET=hexa-datasets +# Maximum number of files snapshotted per dataset version (used for previews) +WORKSPACE_DATASETS_FILE_SNAPSHOT_SIZE=50 + +# Static Webapps +################ + +# Optional parent domain to serve static webapps from a subdomain (e.g. +# `app1.webapps.example.com`). Requires wildcard DNS pointing at this host. +# Leave empty to completely disable webapps. +# Example: +# WEBAPPS_DOMAIN=webapps.example.com +WEBAPPS_DOMAIN= + +# Comma-separated list of custom domains attached to public webapps. Each +# domain must be set on the Webapp via Django admin AND listed here so Django +# accepts the Host header. Example: +# ADDITIONAL_ALLOWED_HOSTS=carte-sanitaire.gouv.ne,dashboard.example.org +ADDITIONAL_ALLOWED_HOSTS= + +# Git server (Forgejo) +###################### +# Backs OpenHEXA static webapps. Runs as a sibling container `forgejo`. +# The admin password is auto-generated by setup.sh on first install. + +GIT_SERVER_ADMIN_USERNAME=openhexa-admin +GIT_SERVER_ADMIN_PASSWORD=${GIT_SERVER_ADMIN_PASSWORD} + +# Absolute path to the directory where Forgejo persists its data +# (SQLite metadata DB, git repositories, attachments, app.ini, ...). +FORGEJO_STORAGE_LOCATION=$FORGEJO_STORAGE_LOCATION + +# AI Assistant +############## + +# Monthly cap on AI assistant requests per workspace. +ASSISTANT_MONTHLY_LIMIT=200 + +# Optional Pydantic Logfire integration for AI agent observability. +# When `true`, requires LOGFIRE_TOKEN to be set. +LOGFIRE_SEND_TO_LOGFIRE=false +# LOGFIRE_TOKEN= + +# OAuth2 +######## + +OAUTH2_ACCESS_TOKEN_EXPIRE_SECONDS=3600 +# Comma-separated list of hosts allowed as OAuth2 redirect URIs. +OAUTH2_ALLOWED_REDIRECT_URI_HOSTS= + +# JWT Workspace Tokens (optional) +################################# +# Required only for the `issueWorkspaceToken` GraphQL mutation. Generate a key +# with: openssl genpkey -algorithm RSA -out private_key.pem -pkeyopt rsa_keygen_bits:2048 +# OPENHEXA_JWT_PRIVATE_KEY= +# OPENHEXA_JWT_KID= +# OPENHEXA_JWT_ISSUER=https://app.openhexa.org +# OPENHEXA_JWT_AUDIENCE=openhexa-clients +# OPENHEXA_JWT_TTL=3600 diff --git a/.github/workflows/build_debian_package.yml b/.github/workflows/build_debian_package.yml index 6d473bd..472c712 100644 --- a/.github/workflows/build_debian_package.yml +++ b/.github/workflows/build_debian_package.yml @@ -140,7 +140,11 @@ jobs: run: docker build -t openhexa/smoke-tests . - name: Run smoke tests - run: docker run -t --net=host -v "$(pwd)/test-results:/code/test-results" openhexa/smoke-tests http://localhost:3000/ root@openhexa.org root + run: | + SUPERUSER_PASSWORD=$(sudo grep -E '^DJANGO_SUPERUSER_PASSWORD=' /etc/openhexa/env.conf | cut -d= -f2-) + docker run -t --net=host \ + -v "$(pwd)/test-results:/code/test-results" \ + openhexa/smoke-tests http://localhost:3000/ root@openhexa.org "$SUPERUSER_PASSWORD" - name: Keep test results uses: actions/upload-artifact@v7 diff --git a/.gitignore b/.gitignore index 414f2c4..229853e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,9 @@ test-results/ .secrets openhexa.nginx workspaces/ +forgejo_data/ backup.conf* backup/ workspaces-* -.artifacts/ \ No newline at end of file +forgejo_data-* +.artifacts/ diff --git a/README.md b/README.md index b64579a..7dc1d0e 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,7 @@ [![build_debian_package](https://github.com/BLSQ/openhexa/actions/workflows/build_debian_package.yml/badge.svg)](https://github.com/BLSQ/openhexa/actions/workflows/build_debian_package.yml) -OpenHEXA -======== +# OpenHEXA OpenHEXA is an open-source data integration and data analysis platform developed by [Bluesquare](https://bluesquarehub.com). @@ -27,8 +26,7 @@ OpenHEXA allows you to: Please note that this repository **does not contain any code**: it is a starting point for OpenHEXA users and implementers. Please refer to the [technical architecture](https://github.com/BLSQ/openhexa/wiki/Technical-architecture) page of our wiki for more information about the different OpenHEXA components, including the links to the relevant GitHub repositories. -Documentation -------------- +## Documentation The OpenHEXA documentation lives in our [wiki](https://github.com/BLSQ/openhexa/wiki). @@ -37,20 +35,20 @@ To get started, you might be interested in the following pages: - [User manual](https://github.com/BLSQ/openhexa/wiki/User-manual) - [Installation instructions](https://github.com/BLSQ/openhexa/wiki/Installation-instructions) -Roadmap, issues and discussions -------------------------------- +## Roadmap, issues and discussions Feel free to reach out in the [discussions section](https://github.com/BLSQ/openhexa/discussions) if you have questions or suggestions! -Quick Start ------------ +## Quick Start Requirements: + - a least [Docker 26.1](https://docs.docker.com/engine/install/debian/#install-using-the-repository) - Debian bookworm -- Debian packages `gettext-base`, `postgresql` (14+), `postgresql--postgis-3`, `duplicity` (optional to manage backup and restore) +- Debian packages `gettext-base`, `postgresql` (16+), `postgresql--postgis-3`, `duplicity` (optional to manage backup and restore) - [yq](https://github.com/mikefarah/yq/#install) +- Host port `3100` available for the bundled Forgejo Git server (override with `FORGEJO_PORT`) After having cloned this repo and change your current dir to it, you can check your installation by running first @@ -79,7 +77,7 @@ Then you can prepare the database and environment with ``` > [!IMPORTANT] -> The `prepare` command will create an initial superuser for your installation. If you are setting up a real server, make sure you **choose a secure password**. +> The `prepare` command will create an initial superuser using the credentials in `.env` (`DJANGO_SUPERUSER_USERNAME` / `DJANGO_SUPERUSER_PASSWORD`). On a fresh install, `setup.sh` auto-generates a random `DJANGO_SUPERUSER_PASSWORD`. Check `.env` to retrieve it for the first login, or edit it before running `prepare` if you want to set your own. Finally, you can run openhexa with @@ -97,7 +95,7 @@ If you need to purge the configuration and the database after having stopped it, you can do it by executing the following command ```bash -./script/openhexa.sh purge +./script/setup.sh purge ``` Once installed, it could be interesting to make sure you have the last version. @@ -130,10 +128,9 @@ docker run -it -v $(pwd):/work openhexa-build You can then follow the instructions below to build the package as usual. - #### Release, changelog, and versions -The versions are described into the [changelog file](debian/changelog). The last +The versions are described into the [changelog file](debian/changelog). The last one is unreleased and is the one that is published. To manage versions and changelog, we use the debhelper tool `dch`. @@ -177,10 +174,14 @@ The resulting package is available in the parent directory: #### Install Requirements: + - a least [Docker 26.1](https://docs.docker.com/engine/install/debian/#install-using-the-repository) - Debian bookworm - Systemd - [yq](https://github.com/mikefarah/yq/#install) +- PostgreSQL 16+ (required by OpenHEXA 4.1.0+) +- Host port `3100` free for the bundled Forgejo Git server (override with + `FORGEJO_PORT` in `/etc/openhexa/env.conf`) First of all, you need to add our APT repository and GPG public key: @@ -212,7 +213,6 @@ sudo apt install openhexa If you want to manage backup and retore through our script, you can install it with recommended packages `sudo apt install --install-recommends openhexa`. - If you have Systemd, OpenHexa is run as a Systemd service `openhexa` (that you can then manage with `systemctl`). If you don't use Systemd, you can still run the service by running `/usr/share/openhexa/openhexa -g start`. @@ -269,34 +269,97 @@ During the setup, the following is done on the PostgreSQL side: ##### Backup -You can manage your backup and restore directly with OpenHexa. It will backup -all the workspaces data, and all databases. This relies on the tool `duplicity`. -Make sure that it is installed if you haven't installed it yet (if you install -OpenHexa with `apt`, do it with the recommended packages). +You can manage your backup and restore directly with OpenHexa. It backs up: -First, you need to set it up: +- a `pg_dumpall` of the PostgreSQL cluster (covers the `hexa-app` and +- the workspace files at `WORKSPACE_STORAGE_LOCATION`, + `hexa-hub` databases), +- the Forgejo data directory at `FORGEJO_STORAGE_LOCATION` (git repositories + for static webapps plus Forgejo's SQLite metadata database), +- a snapshot of `.env` (so the encryption keys needed to read the restored + database are kept alongside the data). -```bash -/usr/share/openhexa/setup.sh backup /mylocaldirecotry/where/to/do/thebackup/ encryption_passkey -``` +This relies on the tool `duplicity`. Make sure that it is installed if you +haven't installed it yet (if you install OpenHexa with `apt`, do it with the +recommended packages). -Then you can back up the data with: +First, you need to set it up: ```bash -/usr/share/openhexa/openhexa.sh backup +/usr/share/openhexa/setup.sh backup file:///mylocaldirectory/where/to/do/thebackup/ encryption_passkey ``` +The target directory will contain two duplicity backends side by side: +`/workspaces` and `/forgejo`. + Depending on the user activities, it might be a good idea to stop the service or simply redirect the website to a maintenance HTML page. -To restore the data, you execute the following: +Once configured, the following commands are available: + +| Command | Description | +| --- | --- | +| `/usr/share/openhexa/openhexa.sh backup` | Back up the PostgreSQL cluster, workspace files, Forgejo data and `.env` snapshot. | +| `/usr/share/openhexa/openhexa.sh backup-status` | Show the duplicity `collection-status` for both the `workspaces` and `forgejo` backends. | +| `/usr/share/openhexa/openhexa.sh restore` | Restore the latest backup. This requires stopping the services before a full restore. | + +After a restore, an `openhexa-env.bak` file is left next to the workspace data: +compare it with the live `.env` to make sure `ENCRYPTION_KEY`, `SECRET_KEY` and +the JupyterHub/Forgejo secrets match the restored database. + +###### Restoring onto a populated PostgreSQL cluster + +`restore` replays a `pg_dumpall` produced without `--clean`, so it expects an empty target cluster (e.g. a fresh install). If the application databases or roles already exist, the `CREATE DATABASE` / `CREATE ROLE` statements will fail, leaving the live data effectively untouched. + +To restore on top of an existing setup, drop the application objects manually before running `restore`. Stop the services first so nothing holds open +connections: ```bash -/usr/share/openhexa/openhexa.sh backup -``` +# 1. Stop everything that talks to PostgreSQL. +/usr/share/openhexa/openhexa.sh stop + +# 2. Drop the OpenHexa databases and roles as the postgres superuser. Replace +# the database/role names below with whatever your `.env` defines (typically +# DATABASE_NAME, JUPYTERHUB_DATABASE_NAME, plus any per-workspace databases +# matching `[a-z0-9]{16}` that you can list with `\l` in psql). +sudo -u postgres psql -p "$DATABASE_PORT" <<'SQL' +DROP DATABASE IF EXISTS "hexa-app"; +DROP DATABASE IF EXISTS "hexa-hub"; +-- repeat DROP DATABASE for every workspace database +DROP ROLE IF EXISTS "hexa-app"; +DROP ROLE IF EXISTS "hexa-hub"; +-- repeat DROP ROLE for every workspace role +SQL + +# 3. Now run the restore. +/usr/share/openhexa/openhexa.sh restore +``` + +###### Restoring a pre-Forgejo backup (legacy layout) + +Backups taken before the Forgejo upgrade used a single duplicity backend at +`` (no `workspaces` / `forgejo` sub-prefix) and did not include a +Forgejo data directory or an `.env` snapshot. `openhexa.sh restore` won't +recover them as-is — it expects both new sub-prefixes to exist. Restore them +by hand with `duplicity`: -In this case, we advise you to stop the service before performing a full -restore. +```bash +# Stop the services first +sudo systemctl stop openhexa + +# Restore the workspace tree (includes the legacy openhexa-dumpall.sql) +sudo -u openhexa PASSPHRASE='your-passphrase' duplicity restore \ + file:///path/to/old/backup/ \ + /var/lib/openhexa/workspaces + +# Load the PostgreSQL dump +sudo -u postgres psql -f /var/lib/openhexa/workspaces/openhexa-dumpall.sql template1 + +# Forgejo had no data in the legacy layout: leave FORGEJO_STORAGE_LOCATION +# empty and let `openhexa.sh prepare` bootstrap a fresh Forgejo on next start. +sudo systemctl start openhexa +/usr/share/openhexa/openhexa.sh prepare +``` #### Configuration properties @@ -313,6 +376,7 @@ Finally, we need the port number where the local PostgreSQL cluster listens: ##### Email server In order to be able to send mails to users, you have to provide the configuration options: + - `EMAIL_HOST` - `EMAIL_PORT` - `EMAIL_HOST_USER` @@ -330,6 +394,65 @@ You can override it by setting this ENV variable to the local IP of the server: OVERRIDE_WORKSPACES_DATABASE_HOST="" ``` +##### Forgejo Git server + +Since OpenHEXA 5.0.0, the Static Webapps feature is backed by a Forgejo Git +server that runs as a sibling container. The package ships a `forgejo` +service (image `codeberg.org/forgejo/forgejo:14`) and a custom entrypoint +at `/usr/share/openhexa/forgejo/entrypoint.sh` that creates the admin user +on first boot. + +The relevant configuration properties: + +- `GIT_SERVER_ADMIN_USERNAME` (default `openhexa-admin`) +- `GIT_SERVER_ADMIN_PASSWORD`: auto-generated by `setup.sh` on first install +- `FORGEJO_PORT` (default `3100`): host port mapped to the Forgejo UI + +The Django backend talks to Forgejo over the internal Docker network at +`http://forgejo:3000`. This is set in `compose.yml` and does not require +configuration. Forgejo's data lives in the named Docker volume +`forgejo_data` and is preserved across `update`/`restart`. + +##### Static Webapps subdomain (optional) + +Set `WEBAPPS_DOMAIN=webapps.example.com` to serve each public webapp from +its own subdomain (e.g. `app1.webapps.example.com`). This requires a +wildcard DNS record pointing at this host. Leave the variable empty to keep +webapps on the main backend host. + +For custom-domain webapps, list each domain in `ADDITIONAL_ALLOWED_HOSTS` +_and_ attach it to the corresponding Webapp via the Django admin. + +#### Upgrading from 4.6.0 + +The 5.x series introduces Forgejo as a hard dependency. To upgrade an +existing 4.6.0 installation: + +```bash +sudo systemctl stop openhexa +sudo apt update && sudo apt install --only-upgrade openhexa +# Pull the new app/frontend images and the Forgejo image: +sudo /usr/share/openhexa/openhexa.sh -g update +# Run migrations and bootstrap the Git server admin user: +sudo /usr/share/openhexa/openhexa.sh -g prepare +sudo systemctl start openhexa +``` + +The package post-install hook runs `update` and `prepare` automatically when +installing for the first time, but on upgrades you should re-run them +explicitly to apply Django migrations introduced between 4.6.0 and 5.6.2 +(custom webapp domains, AI agent tables, scheduled-run version selection, +read-only table protection). + +The new `GIT_SERVER_ADMIN_PASSWORD` is generated only when `.env` does not +yet exist. On an in-place upgrade, your existing `.env` will not contain +this variable and you should add these env variables manually: + +```bash +GIT_SERVER_ADMIN_USERNAME=openhexa-admin +GIT_SERVER_ADMIN_PASSWORD=something-secure +``` + #### Test To test if OpenHexa has been correctly installed, you can run smoke tests that diff --git a/compose.yml b/compose.yml index c3d70af..05251e3 100644 --- a/compose.yml +++ b/compose.yml @@ -3,7 +3,7 @@ # Do not change it without good reaons. x-app: &common - image: "blsq/openhexa-app:4.6.0" + image: "blsq/openhexa-app:5.6.2" platform: linux/amd64 networks: - openhexa @@ -35,15 +35,22 @@ x-app: &common # Storage backend for workspace files STORAGE_BACKEND: ${STORAGE_BACKEND:-fs} # To control what hostname can access the backend - ADDITIONAL_ALLOWED_HOSTS: "app,frontend" + ADDITIONAL_ALLOWED_HOSTS: "app,frontend,${ADDITIONAL_ALLOWED_HOSTS:-}" + # Forgejo Git server URL (reachable on the local Docker network) + GIT_SERVER_URL: http://forgejo:3000 + # Optional: parent domain for serving static webapps from a subdomain. + # Requires wildcard DNS pointing at this host. Leave empty to disable. + WEBAPPS_DOMAIN: ${WEBAPPS_DOMAIN:-} services: app: <<: *common - command: "manage runserver 0:8000" + command: "start" restart: unless-stopped container_name: app ports: - "${APP_PORT:-8000}:8000" + depends_on: + - forgejo healthcheck: test: curl "http://app:8000/ready" || exit 1 interval: "60s" @@ -52,7 +59,7 @@ services: retries: 3 frontend: - image: "blsq/openhexa-frontend:4.6.0" + image: "blsq/openhexa-frontend:5.6.2" platform: linux/amd64 networks: - openhexa @@ -171,6 +178,43 @@ services: start_period: "5s" retries: 3 + # Forgejo provides the Git server backing OpenHEXA static webapps. + # The `app` service requires it to be reachable at http://forgejo:3000. + forgejo: + image: codeberg.org/forgejo/forgejo:14.0.5 + platform: linux/amd64 + container_name: forgejo + networks: + - openhexa + entrypoint: /custom-entrypoint.sh + environment: + # Align Forgejo's internal `git` user with the host owner of + # FORGEJO_STORAGE_LOCATION, mirroring how the `app` service runs. + # Note: we can't set the container user directly since Forgejo + # (and custom-entrypoint.sh) must start as root. + USER_UID: ${OH_UID:-1000} + USER_GID: ${OH_GID:-1000} + GIT_SERVER_ADMIN_USERNAME: ${GIT_SERVER_ADMIN_USERNAME} + GIT_SERVER_ADMIN_PASSWORD: ${GIT_SERVER_ADMIN_PASSWORD} + FORGEJO__database__DB_TYPE: sqlite3 + FORGEJO__security__INSTALL_LOCK: "true" + FORGEJO__server__ROOT_URL: "http://localhost:${FORGEJO_PORT:-3100}/" + FORGEJO__server__LANDING_PAGE: explore + FORGEJO__service__DISABLE_REGISTRATION: "true" + FORGEJO__service__REQUIRE_SIGNIN_VIEW: "true" + volumes: + - "${FORGEJO_STORAGE_LOCATION}:/data" + - ${FORGEJO_ENTRYPOINT_PATH:-./forgejo/entrypoint.sh}:/custom-entrypoint.sh:ro + ports: + - "${FORGEJO_PORT:-3100}:3000" + restart: unless-stopped + healthcheck: + test: curl -fs "http://localhost:3000/api/healthz" || exit 1 + interval: "60s" + timeout: "3s" + start_period: "10s" + retries: 3 + networks: openhexa: name: openhexa diff --git a/debian/changelog b/debian/changelog index edb34c0..e75d26e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,33 @@ +openhexa (4.0-1) stable; urgency=medium + + [ Bram Jans ] + * Update OpenHEXA to version 5.6.2 + * Breaking: add Forgejo Git server (required for Static Webapps since + OpenHEXA 5.0.0). New `forgejo` service backed by a `forgejo_data` + volume, listening on host port 3100. + * Switch the app service command from `manage runserver` to `start` + (gunicorn + UvicornWorkerNoLifespan) — required for SSE in pipeline + run details and the AI agent loop. + * Generate `GIT_SERVER_ADMIN_PASSWORD` on first install via + `script/setup.sh`. + * Add ENV vars to `.env.dist`: `WEBAPPS_DOMAIN`, + `ADDITIONAL_ALLOWED_HOSTS`, `GIT_SERVER_ADMIN_USERNAME`, + `GIT_SERVER_ADMIN_PASSWORD`, `WORKSPACE_STORAGE_BACKEND_AWS_ROLE_ARN`, + `WORKSPACE_DATASETS_FILE_SNAPSHOT_SIZE`, `ASSISTANT_MONTHLY_LIMIT`, + `LOGFIRE_SEND_TO_LOGFIRE`, `OAUTH2_ACCESS_TOKEN_EXPIRE_SECONDS`, + `OAUTH2_ALLOWED_REDIRECT_URI_HOSTS`, `ALLOW_SELF_REGISTRATION`, and + optional `OPENHEXA_JWT_*` for issuing workspace tokens. + * Refresh the S3 storage backend section in `.env.dist` to match + upstream (S3 backend support, new in OpenHEXA 5.3.0). + * Backup/restore now also captures `FORGEJO_STORAGE_LOCATION` and a + snapshot of `.env`. The on-disk layout under the backup `LOCATION` + changes: data is split into `/workspaces` and + `/forgejo`. Existing pre-upgrade backups remain readable + by pointing `duplicity restore` at the old path, but new backups + will start a fresh full at the new sub-prefix. + + -- Bram Jans Fri, 08 May 2026 16:03:52 +0200 + openhexa (3.0-1) stable; urgency=medium [ Bram Jans ] diff --git a/debian/install b/debian/install index f184abb..1731ec5 100644 --- a/debian/install +++ b/debian/install @@ -3,3 +3,4 @@ compose.yml usr/share/openhexa script/openhexa.sh usr/share/openhexa script/setup.sh usr/share/openhexa script/common_functions.sh /usr/share/openhexa +forgejo/entrypoint.sh usr/share/openhexa/forgejo diff --git a/forgejo/entrypoint.sh b/forgejo/entrypoint.sh new file mode 100755 index 0000000..45827f2 --- /dev/null +++ b/forgejo/entrypoint.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -e + +/usr/bin/entrypoint & +FORGEJO_PID=$! + +echo "Waiting for Forgejo to be ready..." +until curl -so /dev/null -w '%{http_code}' http://localhost:3000/api/v1/version 2>/dev/null | grep -qE '200|403'; do + sleep 1 +done +echo "Forgejo is ready." + +if ! su-exec git forgejo admin user list --admin 2>/dev/null | grep -q "$GIT_SERVER_ADMIN_USERNAME"; then + echo "Creating admin user '$GIT_SERVER_ADMIN_USERNAME'..." + su-exec git forgejo admin user create \ + --admin \ + --username "$GIT_SERVER_ADMIN_USERNAME" \ + --password "$GIT_SERVER_ADMIN_PASSWORD" \ + --email "admin@openhexa.org" \ + --must-change-password=false + echo "Admin user created." +else + echo "Admin user '$GIT_SERVER_ADMIN_USERNAME' already exists." +fi + +wait $FORGEJO_PID diff --git a/script/common_functions.sh b/script/common_functions.sh index dc42e20..a059f74 100644 --- a/script/common_functions.sh +++ b/script/common_functions.sh @@ -2,6 +2,7 @@ COMPOSE_FILE_PATH="compose.yml" CONFIG_FILE_PATH=".env" BACKUP_CONFIG_FILE_PATH="backup.conf" WORKSPACE_DATA_DIRECTORY="workspaces" +FORGEJO_DATA_DIRECTORY="forgejo_data" SUDO_COMMAND="sudo" @@ -24,6 +25,8 @@ function setup() { CONFIG_FILE_PATH="/etc/openhexa/env.conf" BACKUP_CONFIG_FILE_PATH="/etc/openhexa/backup.conf" WORKSPACE_DATA_DIRECTORY="/var/lib/openhexa/workspaces" + FORGEJO_DATA_DIRECTORY="/var/lib/openhexa/forgejo_data" + FORGEJO_ENTRYPOINT_PATH="/usr/share/openhexa/forgejo/entrypoint.sh" fi if ((UID == 0)); then SUDO_COMMAND="" @@ -96,6 +99,7 @@ function run_compose() { OH_UID="${oh_uid}" \ OH_GID="${oh_gid}" \ DOCKER_GID="${docker_gid}" \ + FORGEJO_ENTRYPOINT_PATH="${FORGEJO_ENTRYPOINT_PATH}" \ docker compose \ --env-file "${CONFIG_FILE_PATH}" \ --file "${COMPOSE_FILE_PATH}" \ diff --git a/script/openhexa.sh b/script/openhexa.sh index ebec7f0..b4e60c8 100755 --- a/script/openhexa.sh +++ b/script/openhexa.sh @@ -33,8 +33,9 @@ function usage() { update pulls last container images prepare runs databases migrations and installs fixtures logs gets all the logs - backup backs up OpenHexa - restore restores OpenHexa, more details with \`help restore\` + backup backs up OpenHexa + backup-status show duplicity collection-status for the workspaces and forgejo backends + restore restores OpenHexa, more details with \`help restore\` help [cmd] prints current usage documentation or of the given command \`cmd\` version prints current version """ @@ -104,6 +105,15 @@ function is_backend_reachable() { (($(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/ready) == 200)) } +function is_forgejo_reachable() { + local port + port=$( + load_env 2>/dev/null + echo "${FORGEJO_PORT:-3100}" + ) + (($(curl -s -o /dev/null -w "%{http_code}" "http://localhost:${port}/api/healthz") == 200)) +} + function is_db_accepting_connexion() { # TODO replace with get_config_or_default ( @@ -147,20 +157,50 @@ function duplicity_parameters_for_some_type() { *) ;; esac } +function require_backup_config() { + local location passphrase + if [[ ! -r "$(backup_conf_file)" ]]; then + echo "Backup not configured: $(backup_conf_file) is missing." + echo "Run \`setup.sh backup \` first." + return 1 + fi + location=$(get_backup_config LOCATION) + passphrase=$(get_backup_config PASSPHRASE) + if [[ -z $location || -z $passphrase ]]; then + echo "Backup config $(backup_conf_file) is missing LOCATION or PASSPHRASE." + return 1 + fi + return 0 +} + +function full_if_older_than_arg() { + local age=$1 + [[ -n $age ]] && echo "--full-if-older-than ${age}" +} + +function run_duplicity_backup() { + local source=$1 target=$2 type=$3 passphrase=$4 oldest_full_age=$5 + # No action verb on purpose: duplicity picks `full` on first run and + # `incremental` thereafter, so the same command works on empty targets. + PASSPHRASE=$passphrase \ + duplicity \ + $(duplicity_parameters_for_some_type "${type}") \ + $(full_if_older_than_arg "${oldest_full_age}") \ + "${source}" \ + "${target}" +} + function perform_backup() { + require_backup_config || return 1 ( - echo -n "Prepare dump of the whole PostgreSQL cluster dedicated to OpenHexa ... " - local dumpfile_path load_env - dumpfile_path="${WORKSPACE_STORAGE_LOCATION}/openhexa-dumpall.sql" - pgpassfile=$(begin_pgsql_session localhost "${DATABASE_PORT}" "${DATABASE_USER}" "${DATABASE_PASSWORD}") - PGPASSFILE=$pgpassfile pg_dumpall --file "${dumpfile_path}" --host localhost --port "${DATABASE_PORT}" --username "${DATABASE_USER}" - end_pgsql_session "${pgpassfile}" - echo "OK" + local dumpfile_path envcopy_path type location passphrase oldest_full_age pgpassfile rc=0 echo -n "Load backup configuration ... " - local type type=$(get_backup_config TYPE) + location=$(get_backup_config LOCATION) + passphrase=$(get_backup_config PASSPHRASE) + oldest_full_age=$(get_backup_config OLDEST_FULL_BCK_AGE) # case $type in # s3) # export AWS_ACCESS_KEY_ID=$(get_backup_config ACCESS_KEY_ID) @@ -174,51 +214,132 @@ function perform_backup() { # esac echo "OK" - echo -n "Back up workspace files and PostgreSQL dump ... " - PASSPHRASE=$(get_backup_config PASSPHRASE) \ - duplicity incremental \ - $(duplicity_parameters_for_some_type "${type}") \ - --full-if-older-than "$(get_backup_config OLDEST_FULL_BCK_AGE)" \ - "${WORKSPACE_STORAGE_LOCATION}" \ - "$(get_backup_config LOCATION)" + echo -n "Prepare dump of the whole PostgreSQL cluster dedicated to OpenHexa ... " + dumpfile_path="${WORKSPACE_STORAGE_LOCATION}/openhexa-dumpall.sql" + pgpassfile=$(begin_pgsql_session localhost "${DATABASE_PORT}" "${DATABASE_USER}" "${DATABASE_PASSWORD}") + PGPASSFILE=$pgpassfile pg_dumpall --file "${dumpfile_path}" --host localhost --port "${DATABASE_PORT}" --username "${DATABASE_USER}" + end_pgsql_session "${pgpassfile}" echo "OK" - echo -n "Remove DB cluster dump ... " - rm "${dumpfile_path}" + + # Snapshot .env alongside the data. ENCRYPTION_KEY, SECRET_KEY, + # JUPYTERHUB_CRYPT_KEY, HUB_API_TOKEN and the admin passwords live + # only here; without them a DB restore is unrecoverable. + echo -n "Stage configuration snapshot (.env) ... " + envcopy_path="${WORKSPACE_STORAGE_LOCATION}/openhexa-env.bak" + cp -a "$(dot_env_file)" "${envcopy_path}" echo "OK" + + echo "Back up workspace files, PostgreSQL dump, and .env snapshot ..." + run_duplicity_backup "${WORKSPACE_STORAGE_LOCATION}" "${location}/workspaces" \ + "${type}" "${passphrase}" "${oldest_full_age}" || rc=$? + + if ((rc == 0)); then + echo "Back up Forgejo data (repos + SQLite metadata) ..." + run_duplicity_backup "${FORGEJO_STORAGE_LOCATION}" "${location}/forgejo" \ + "${type}" "${passphrase}" "${oldest_full_age}" || rc=$? + fi + + echo -n "Remove staged DB dump and env snapshot ... " + rm "${dumpfile_path}" "${envcopy_path}" + echo "OK" + + if ((rc != 0)); then + echo "Backup FAILED (duplicity exit ${rc}). The data on the remote target may be incomplete." + fi + return $rc ) } +function perform_backup_status() { + require_backup_config || return 1 + local location passphrase rc=0 + location=$(get_backup_config LOCATION) + passphrase=$(get_backup_config PASSPHRASE) + for backend in workspaces forgejo; do + echo "=== ${backend} (${location}/${backend}) ===" + PASSPHRASE=$passphrase duplicity collection-status "${location}/${backend}" || rc=$? + echo + done + return $rc +} + function perform_restore() { + require_backup_config || return 1 ( load_env - echo -n "Keep a copy of the target ..." + local location passphrase dumpfile_path envcopy_path pgpassfile psql_exit_code psql_result running_services + location=$(get_backup_config LOCATION) + passphrase=$(get_backup_config PASSPHRASE) + + echo -n "Check that no OpenHEXA services are running ... " + running_services=$(number_of_running_services) + if ((running_services > 0)); then + echo "KO" + echo "Refusing to restore while ${running_services} service(s) are running." + echo "Active connections would block DROP DATABASE during the dump replay." + echo "Stop everything first with: ./script/openhexa.sh stop" + return 1 + fi + echo "OK" + + echo -n "Keep a copy of the workspace target ... " mv "${WORKSPACE_STORAGE_LOCATION}" "${WORKSPACE_STORAGE_LOCATION}-$(date -Iseconds)" mkdir "${WORKSPACE_STORAGE_LOCATION}" echo "OK" + echo -n "Restore workspace files ... " - PASSPHRASE=$(get_backup_config PASSPHRASE) \ + PASSPHRASE=$passphrase \ duplicity restore \ - "$(get_backup_config LOCATION)" \ + "${location}/workspaces" \ "${WORKSPACE_STORAGE_LOCATION}" echo "OK" - echo -n "Restore PostgreSQL dump ..." - local dumpfile_path pgpassfile psql_exit_code psql_result + echo -n "Keep a copy of the Forgejo target ... " + mv "${FORGEJO_STORAGE_LOCATION}" "${FORGEJO_STORAGE_LOCATION}-$(date -Iseconds)" + mkdir "${FORGEJO_STORAGE_LOCATION}" + echo "OK" + + echo -n "Restore Forgejo data ... " + PASSPHRASE=$passphrase \ + duplicity restore \ + "${location}/forgejo" \ + "${FORGEJO_STORAGE_LOCATION}" + echo "OK" + + echo -n "Restore PostgreSQL dump ... " dumpfile_path="${WORKSPACE_STORAGE_LOCATION}/openhexa-dumpall.sql" if [[ ! -r $dumpfile_path ]]; then echo "KO: the dump file ${dumpfile_path} is not readable." return 1 fi pgpassfile=$(begin_pgsql_session localhost "${DATABASE_PORT}" "${DATABASE_USER}" "${DATABASE_PASSWORD}") - psql_result=$(PGPASSFILE=$pgpassfile psql -f "${dumpfile_path}" --host localhost --port "${DATABASE_PORT}" --username "${DATABASE_USER}" template1 2>&1) + psql_result=$(PGPASSFILE=$pgpassfile psql -v ON_ERROR_STOP=1 -f "${dumpfile_path}" --host localhost --port "${DATABASE_PORT}" --username "${DATABASE_USER}" template1 2>&1) psql_exit_code=$? end_pgsql_session "${pgpassfile}" if [[ $psql_exit_code -eq 0 ]]; then echo "OK" rm "${dumpfile_path}" else - echo "KO: ${psql_result}" + echo "KO" + echo + echo "===================== PostgreSQL restore FAILED ======================" + echo "psql exited with code ${psql_exit_code}. Output:" + echo + echo "${psql_result}" + echo + if echo "${psql_result}" | grep -qE 'already exists'; then + echo "Hint: the target cluster already contains OpenHEXA databases or roles. Drop them manually first." + fi + echo "The dump file has been kept at: ${dumpfile_path}" + echo "======================================================================" + return $psql_exit_code fi - return $psql_exit_code + + envcopy_path="${WORKSPACE_STORAGE_LOCATION}/openhexa-env.bak" + if [[ -r $envcopy_path ]]; then + echo "Configuration snapshot restored to ${envcopy_path}." + echo "Compare it with $(dot_env_file) before discarding; the encryption keys must match the restored database." + fi + return 0 ) } @@ -257,6 +378,13 @@ function execute() { echo "No" exit_code=1 fi + echo -n "Forgejo HTTP Reachable: " + if is_forgejo_reachable; then + echo "Yes" + else + echo "No" + exit_code=1 + fi echo "PostgreSQL: " echo -n "- accepting connections: " if is_db_accepting_connexion; then @@ -288,6 +416,7 @@ function execute() { ;; prepare) run_compose_with_profiles run app fixtures --localhosting + run_compose_with_profiles run app python manage.py sync_git_orgs run_compose_with_profiles run jupyterhub jupyterhub upgrade-db -f /etc/jupyterhub/jupyterhub_dev_config.py exit_properly 0 ;; @@ -299,6 +428,10 @@ function execute() { perform_backup exit_properly $? ;; + backup-status) + perform_backup_status + exit_properly $? + ;; restore) perform_restore exit_properly $? diff --git a/script/setup.sh b/script/setup.sh index eb7817e..87bb727 100755 --- a/script/setup.sh +++ b/script/setup.sh @@ -19,7 +19,7 @@ PGSQL_CLUSTER="openhexa" function usage() { if [[ -z $1 ]]; then echo """ - + Usage: $0 [OPTIONS] COMMAND OPTIONS: @@ -28,7 +28,7 @@ function usage() { installed on the system. By default, it runs in its current working directory - -d enables debug output + -d enables debug output COMMANDS: @@ -49,7 +49,7 @@ function usage() { case $cmd in backup) echo """ - + Usage: backup LOCATION PASSPHRASE [OPTIONS] LOCATION @@ -153,7 +153,7 @@ function is_docker_engine_running() { try the following: - Start the services \`sudo systemctl start docker.service containerd.service\` - Check their status \`sudo systemctl status docker.service containerd.service\` - + For more details see https://docs.docker.com/engine/install/linux-postinstall/ If you run in a container, you very likely need to share the Docker socket @@ -191,7 +191,7 @@ function is_postgresql_service_running() { If you use init.d, please try the following with a user having the superuser rights (that works also in a container): - Start the service \`/etc/init.d/postgresql start\` - - Check its the status \`/etc/init.d/postgresql status\` + - Check its the status \`/etc/init.d/postgresql status\` EOF return 1 fi @@ -351,11 +351,11 @@ function setup_user() { function setup_local_storage() { if [[ $OPTION_GLOBAL == "on" ]]; then - $SUDO_COMMAND mkdir -p "${WORKSPACE_DATA_DIRECTORY}" - $SUDO_COMMAND chown $(id -u openhexa):$(id -g openhexa) "${WORKSPACE_DATA_DIRECTORY}" + $SUDO_COMMAND mkdir -p "${WORKSPACE_DATA_DIRECTORY}" "${FORGEJO_DATA_DIRECTORY}" + $SUDO_COMMAND chown $(id -u openhexa):$(id -g openhexa) "${WORKSPACE_DATA_DIRECTORY}" "${FORGEJO_DATA_DIRECTORY}" else - mkdir -p "${WORKSPACE_DATA_DIRECTORY}" - chmod 770 "${WORKSPACE_DATA_DIRECTORY}" + mkdir -p "${WORKSPACE_DATA_DIRECTORY}" "${FORGEJO_DATA_DIRECTORY}" + chmod 770 "${WORKSPACE_DATA_DIRECTORY}" "${FORGEJO_DATA_DIRECTORY}" fi } @@ -401,7 +401,10 @@ function setup_env() { HUB_API_TOKEN=$(openssl rand -hex 32) \ SECRET_KEY=$(generate_django_secret_key) \ ENCRYPTION_KEY=$(generate_fernet_encryption_key) \ + GIT_SERVER_ADMIN_PASSWORD=$(openssl rand -hex 16) \ + DJANGO_SUPERUSER_PASSWORD=$(openssl rand -hex 16) \ WORKSPACE_STORAGE_LOCATION="${current_working_directory}${WORKSPACE_DATA_DIRECTORY}" \ + FORGEJO_STORAGE_LOCATION="${current_working_directory}${FORGEJO_DATA_DIRECTORY}" \ DB_PORT=$db_port \ envsubst <"$(dist_dot_env_file)" >"$(dot_env_file)" ) @@ -428,7 +431,7 @@ function setup_db() { if ! does_postgresql_cluster_openhexa_exist; then create_pgsql_cluster echo "created" - + echo -n "- make the cluster listening on the Docker network ... " listen_on_docker_network echo "done" @@ -478,6 +481,12 @@ function remove_local_storage() { if (($exit_code != 0)) && [[ ! -d $WORKSPACE_DATA_DIRECTORY ]]; then exit_code=0 fi + local forgejo_exit_code=0 + $SUDO_COMMAND find "${FORGEJO_DATA_DIRECTORY}" -delete 2>/dev/null || forgejo_exit_code=$? + if (($forgejo_exit_code != 0)) && [[ ! -d $FORGEJO_DATA_DIRECTORY ]]; then + forgejo_exit_code=0 + fi + (($exit_code == 0)) && exit_code=$forgejo_exit_code return $exit_code } @@ -517,7 +526,7 @@ function purge_env() { else echo "failed" fi - echo -n "- workspace data ... " + echo -n "- workspace and forgejo (git server) data ... " if remove_local_storage; then echo "removed" else @@ -560,8 +569,11 @@ function detect_file_server_type() { echo "${scheme}" ;; *) - echo "The scheme \`${scheme}\` is not supported. Supported file server types are:" - echo "local file systeme (file://), and SFTP (sftp://)." + # stderr, not stdout: the caller invokes us inside `$(...)`, so anything + # on stdout becomes the captured "type" instead of reaching the user. + echo "The scheme \`${scheme}\` is not supported. Supported file server types are:" >&2 + echo "local file systeme (file://), and SFTP (sftp://)." >&2 + echo "Did you mean \`file://${location}\`?" >&2 # AWS S3 (s3://), Google Cloud Storage (gs://), exit 1 ;; @@ -634,7 +646,9 @@ function generate_or_update_backup_config() { done shift $((OPTIND - 1)) local type - type=$(detect_file_server_type "${location}") + if ! type=$(detect_file_server_type "${location}"); then + exit_properly 1 + fi echo "- file server location: ${location}" echo "- file server type detected: ${type}" # case $type in diff --git a/smoke-tests/tests/simple.spec.ts b/smoke-tests/tests/simple.spec.ts index 5103fb8..afea074 100644 --- a/smoke-tests/tests/simple.spec.ts +++ b/smoke-tests/tests/simple.spec.ts @@ -18,9 +18,8 @@ test.describe("it can view a workspace's common pages", () => { test("view workspace's tables", async ({ workspacePage }) => { await workspacePage .getByRole("navigation") - .getByRole("link") - .nth(2) - .click(); // Tables link + .getByRole("link", { name: "Database" }) + .click(); await expect( workspacePage.getByRole("heading", { name: "Tables" }), ).toBeVisible(); @@ -32,9 +31,8 @@ test.describe("it can view a workspace's common pages", () => { test("view workspace's files", async ({ workspacePage }) => { await workspacePage .getByRole("navigation") - .getByRole("link") - .nth(1) - .click(); // Files link + .getByRole("link", { name: "Files" }) + .click(); await expect( workspacePage.getByRole("columnheader", { name: "Size" }).first(), ).toBeVisible(); @@ -46,9 +44,8 @@ test.describe("it can view a workspace's common pages", () => { test("view workspace's jupyterlab environment", async ({ workspacePage }) => { await workspacePage .getByRole("navigation") - .getByRole("link") - .nth(6) - .click(); // Jupyterlab link + .getByRole("link", { name: "JupyterHub" }) + .click(); await expect( await workspacePage .frameLocator("iframe")