LCOGT · timbeccue · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -62,6 +62,7 @@ distribute-*.tar.gz
 # Local banzai runtime data
 local_banzai
 site_banzai
+data
 
 # Other
 .cache

diff --git a/.gitignore b/.gitignore
@@ -73,6 +73,7 @@ justfile
 
 # Local Banzai Setup
 site-banzai-env
+local-banzai-env
 example_data
 local_banzai
 site_banzai
@@ -94,3 +95,6 @@ test.db
 
 # Site E2E test data
 banzai/tests/site_e2e/site_e2e.env
+
+# Claude Code
+.claude
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,6 +1,22 @@
 Versions
 ========
 
+1.34.0 (2026-04-08)
+-------------------
+
+- Added an alembic migration to go from old versions of the banzai instrument
+  table to current version that has simplified uniqueness constraints
+
+1.33.2 (2026-04-07)
+-------------------
+
+- Suppress ingester warning for opentsdb if OPENTSDB_HOSTNAME env variable is not set.
+
+1.33.1 (2026-04-07)
+-------------------
+
+- Update psycopg to v3 to work with the defaults in sqlalchemy 2
+
 1.33.0 (2026-03-30)
 -------------------
 

diff --git a/README.rst b/README.rst
@@ -99,6 +99,13 @@ agnostic as it uses SQLAlchemy. To create a new database to run BANZAI, run
 
 This will create an sqlite3 database file in your current directory called `banzai-test.db`.
 
+To run database migrations (e.g. after upgrading BANZAI), use the Alembic migration files bundled
+with the package:
+
+.. code-block:: bash
+
+    DB_ADDRESS=postgresql://user:pass@host/dbname alembic -c $(python -c "import importlib.resources; print(importlib.resources.files('banzai').joinpath('alembic.ini'))") upgrade head
+
 If you are not running this at LCO, you will have to add the instrument of interest to your database
 by running `banzai_add_instrument` before you can process any data.
 
@@ -124,24 +131,35 @@ See the `docker-compose-local.yml` file for details on this setup.
 Running Locally
 ---------------
 
-To run BANZAI as a local pipeline, use `docker-compose-local.yml`. This is the recommended setup
+To run BANZAI as a local pipeline, use ``docker-compose-local.yml``. This is the recommended setup
 for development and for processing data independently of LCO's site infrastructure.
 
-1. Copy `local-banzai-env.default` to `local-banzai-env` and set your `AUTH_TOKEN` and `DB_ADDRESS`.
+Redis and RabbitMQ are managed separately via ``docker-compose-dependencies.yml`` so they can be
+left running across pipeline restarts. Start them first:
+
+.. code-block:: bash
+
+    docker compose -f docker-compose-dependencies.yml up -d
+
+Pipeline containers connect via the ``REDIS_URL`` and ``RABBITMQ_URL`` environment variables
+(defaulting to ``host.docker.internal``). Site deployments can point these at existing
+infrastructure instead.
+
+1. Copy ``local-banzai-env.default`` to ``local-banzai-env`` and set your ``AUTH_TOKEN`` and ``DB_ADDRESS``.
 
 2. Start the containers:
 
 .. code-block:: bash
 
     docker compose -f docker-compose-local.yml --env-file local-banzai-env up -d --build
 
-3. Queue images for processing. Raw files must be in `$HOST_RAW_DIR`:
+3. Queue images for processing. Raw files must be in ``$HOST_RAW_DIR``:
 
 .. code-block:: bash
 
     uv run python scripts/queue_images.py $HOST_RAW_DIR
 
-Processed output will be saved in `$HOST_REDUCED_DIR`.
+Processed output will be saved in ``$HOST_REDUCED_DIR``.
 
 Tests
 -----
@@ -180,7 +198,8 @@ Site Deployment E2E Tests
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 The site E2E tests validate the full site deployment caching system, including PostgreSQL
 logical replication, calibration file caching, and frame reduction. These tests require
-Docker and an LCO archive API token.
+Docker, an LCO archive API token, and Redis/RabbitMQ running via
+``docker compose -f docker-compose-dependencies.yml up -d``.
 
 To run the site E2E tests:
 

diff --git a/banzai/__init__.py b/banzai/__init__.py
@@ -3,9 +3,19 @@
 # Packages may add whatever they like to this file, but
 # should keep this content at the top.
 # ----------------------------------------------------------------------------
+import os
 import banzai.logs  # noqa: F401
 # ----------------------------------------------------------------------------
 
+# OPENTSDB_PYTHON_METRICS_TEST_MODE activates test mode if it has been set,
+# regardless of its value. Test mode is good in dev environments because it
+# surpresses noisy "max retries exceeded" connection warnings, but we need it
+# disabled in production to enable metrics reporting.
+#
+# If OPENTSDB_HOSTNAME is set, we can assume we're running in prod.
+if "OPENTSDB_HOSTNAME" not in os.environ:
+    os.environ["OPENTSDB_PYTHON_METRICS_TEST_MODE"] = "True"
+
 try:
     import importlib.metadata as metadata
 except ImportError:

diff --git a/banzai/alembic.ini b/banzai/alembic.ini
@@ -0,0 +1,149 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts.
+# this is typically a path given in POSIX (e.g. forward slashes)
+# format, relative to the token %(here)s which refers to the location of this
+# ini file
+script_location = %(here)s
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+# Or organize into date-based subdirectories (requires recursive_version_locations = true)
+# file_template = %%(year)d/%%(month).2d/%%(day).2d_%%(hour).2d%%(minute).2d_%%(second).2d_%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.  for multiple paths, the path separator
+# is defined by "path_separator" below.
+prepend_sys_path = %(here)s
+
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the tzdata library which can be installed by adding
+# `alembic[tz]` to the pip requirements.
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to <script_location>/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "path_separator"
+# below.
+# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
+
+# path_separator; This indicates what character is used to split lists of file
+# paths, including version_locations and prepend_sys_path within configparser
+# files such as alembic.ini.
+# The default rendered in new alembic.ini files is "os", which uses os.pathsep
+# to provide os-dependent path splitting.
+#
+# Note that in order to support legacy alembic.ini files, this default does NOT
+# take place if path_separator is not present in alembic.ini.  If this
+# option is omitted entirely, fallback logic is as follows:
+#
+# 1. Parsing of the version_locations option falls back to using the legacy
+#    "version_path_separator" key, which if absent then falls back to the legacy
+#    behavior of splitting on spaces and/or commas.
+# 2. Parsing of the prepend_sys_path option falls back to the legacy
+#    behavior of splitting on spaces, commas, or colons.
+#
+# Valid values for path_separator are:
+#
+# path_separator = :
+# path_separator = ;
+# path_separator = space
+# path_separator = newline
+#
+# Use os.pathsep. Default configuration used for new projects.
+path_separator = os
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+# database URL.  This is consumed by the user-maintained env.py script only.
+# other means of configuring database URLs may be customized within the env.py
+# file.
+sqlalchemy.url = %(DB_ADDRESS)s
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
+# hooks = ruff
+# ruff.type = module
+# ruff.module = ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
+
+# Alternatively, use the exec runner to execute a binary found on your PATH
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration.  This is also consumed by the user-maintained
+# env.py script only.
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARNING
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARNING
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/banzai/alembic/README b/banzai/alembic/README
@@ -0,0 +1,4 @@
+# Alembic
+
+Alembic (https://alembic.sqlalchemy.org/) scripts to migrate BANZAI database
+from previous versions.
diff --git a/banzai/alembic/env.py b/banzai/alembic/env.py
@@ -0,0 +1,84 @@
+from logging.config import fileConfig
+
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+
+from alembic import context
+import os
+from banzai.dbs import Base
+
+config = context.config
+db_address = os.getenv("DB_ADDRESS")
+if db_address is None:
+    raise RuntimeError(
+        "Environment variable DB_ADDRESS is not set; cannot configure Alembic database connection."
+    )
+config.set_main_option("DB_ADDRESS", db_address)
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection, target_metadata=target_metadata
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/banzai/alembic/script.py.mako b/banzai/alembic/script.py.mako
@@ -0,0 +1,28 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    ${downgrades if downgrades else "pass"}
-Original file line number
+Diff line change
@@ Expand Up / @@ -62,6 +62,7 @@ distribute-*.tar.gz @@
     # Local banzai runtime data
     local_banzai
     site_banzai
+    data
     # Other
     .cache
@@ Expand Down @@