Skip to content

Commit 3670c99

Browse files
authored
Merge pull request #10 from dlcs/feature/updates
Tweak dockerfile + add entrypoints
2 parents 2805704 + f01c8ff commit 3670c99

10 files changed

Lines changed: 154 additions & 21 deletions

File tree

.env.dist

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ WEB_SERVER_HOSTNAME=localhost:8000
1111
ORIGIN_CHUNK_SIZE=8192
1212

1313
# Database
14-
DATABASE_URL=postgresql://dlcs:password@127.0.0.1:5432/compositedb
14+
DATABASE_URL=postgresql://dlcs:password@postgres:5432/compositedb
1515

1616
# CACHE
1717
CACHE_URL=dbcache://app_cache
@@ -33,3 +33,12 @@ ENGINE_WORKER_COUNT=2
3333
ENGINE_WORKER_TIMEOUT=3600
3434
ENGINE_WORKER_RETRY=4500
3535
ENGINE_WORKER_MAX_ATTEMPTS=0
36+
37+
# Run migrations
38+
MIGRATE=True
39+
40+
# DJANGO ADMIN
41+
INIT_SUPERUSER=True
42+
DJANGO_SUPERUSER_EMAIL=x.y@z
43+
DJANGO_SUPERUSER_PASSWORD=composite-handler-password
44+
DJANGO_SUPERUSER_USERNAME=Admin

.gitignore

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,35 @@ dmypy.json
152152
# Cython debug symbols
153153
cython_debug/
154154

155-
# PyCharm
155+
# User-specific stuff
156+
.idea/**/workspace.xml
157+
src/.idea/workspace.xml
158+
.idea/**/tasks.xml
159+
.idea/**/usage.statistics.xml
160+
.idea/**/dictionaries
161+
.idea/**/shelf
162+
163+
# AWS User-specific
164+
.idea/**/aws.xml
165+
166+
# Generated files
167+
.idea/**/contentModel.xml
168+
169+
# Sensitive or high-churn files
170+
.idea/**/dataSources/
171+
.idea/**/dataSources.ids
172+
.idea/**/dataSources.local.xml
173+
.idea/**/sqlDataSources.xml
174+
.idea/**/dynamic.xml
175+
.idea/**/uiDesigner.xml
176+
.idea/**/dbnavigator.xml
177+
178+
# File-based project format
179+
*.iws
180+
181+
# Editor-based Rest Client
156182
.idea/
157183

158184
# Temp working files
159-
src/scratch/
185+
src/scratch/
186+
scratch/

Dockerfile

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:3.9
1+
FROM python:3.11
22
LABEL maintainer="Donald Gray <donald.gray@digirati.com>"
33
LABEL org.opencontainers.image.source=https://github.com/dlcs/composite-handler
44

@@ -13,9 +13,14 @@ RUN apt-get update && apt-get --yes install apt-utils && apt-get --yes upgrade \
1313
&& useradd --create-home --home-dir /srv/dlcs --shell /bin/bash --uid 1000 dlcs \
1414
&& python -m pip install --upgrade pip
1515

16+
COPY --chown=dlcs:dlcs ./src/requirements.txt /srv/dlcs
17+
RUN pip install --no-warn-script-location --requirement /srv/dlcs/requirements.txt
18+
1619
COPY --chown=dlcs:dlcs ./src /srv/dlcs
20+
COPY --chown=dlcs:dlcs ./entrypoints /srv/dlcs
21+
RUN chmod +x /srv/dlcs/entrypoint.sh
22+
RUN chmod +x /srv/dlcs/entrypoint-api.sh
23+
RUN chmod +x /srv/dlcs/entrypoint-worker.sh
1724

1825
USER dlcs
1926
WORKDIR /srv/dlcs
20-
21-
RUN pip install --no-warn-script-location --requirement requirements.txt

README.md

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ The project ships with a [`docker-compose.yml`](docker-compose.yml) that can be
2727
docker-compose up
2828
```
2929

30-
Note that for the Composite Handler to be able to interact with the target S3 bucket, the Docker Compose assumes that the `AWS_PROFILE` environment variable has been set and a valid AWS session is available.
30+
> Note that for the Composite Handler to be able to interact with the target S3 bucket, the Docker Compose assumes that the `AWS_PROFILE` environment variable has been set and a valid AWS session is available.
3131
3232
This will create a PostgreSQL instance, bootstrap it with the required tables, deploy a single instance of the API, and three instances of the engine. Requests can then be targetted at `localhost:8000`.
3333

@@ -47,18 +47,26 @@ python manage.py createcachetable
4747

4848
Once the API is running, an administrator interface can be accessed via the browser at `http://localhost:8000/admin`. To create an administrator login, run the following command:
4949

50-
```
50+
```bash
5151
python manage.py createsuperuser
5252
```
5353

5454
The administrator user can be used to browse the database and manage the queue (including deleting tasks and resubmitting failed tasks into the queue).
5555

56+
### Entrypoints
57+
58+
There are 3 possible entrypoints to make the above easier:
59+
60+
* `entrypoint.sh` - this will wait for Postgres to be available and run `manage.py migrate` and `manage.py createcachetable` if `MIGRATE=True`. It will run `manage.py createsuperuser` is `INIT_SUPERUSER=True` (also needs `DJANGO_SUPERUSER_*` envvars)
61+
* `entrypoint-api.sh` - this runs above then `python manage.py runserver 0.0.0.0:8000`
62+
* `entrypoint-worker.sh` - this runs above then `python manage.py qcluster`
63+
5664
## Configuration
5765

5866
The following list of environment variables are supported:
5967

6068
| Environment Variable | Default Value | Component(s) | Description |
61-
|-------------------------------|--------------------------------|--------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
69+
| ----------------------------- | ------------------------------ | ------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
6270
| `DJANGO_DEBUG` | `True` | API, Engine | Whether Django should run in debug. Useful for development purposes but should be set to `False` in production. |
6371
| `DJANGO_SECRET_KEY` | None | API, Engine | The secret key used by Django when generating sensitive tokens. This should a randomly generated 50 character string. |
6472
| `SCRATCH_DIRECTORY` | `/tmp/scratch` | Engine | A locally accessible filesystem path where work-in-progress files are written during rasterization. |
@@ -78,6 +86,8 @@ The following list of environment variables are supported:
7886
| `ENGINE_WORKER_TIMEOUT` | `3600` | Engine | The number of seconds that a task (i.e. the processing of a single PDF) can run for before being terminated and treated as a failure. This value is useful to purging "stuck" tasks which haven't technically failed but are occupying a worker. |
7987
| `ENGINE_WORKER_RETRY` | `4500` | Engine | The number of seconds since a task was presented for processing before a worker will re-run, regardless of whether it is still running or failed. As such, this value must be higher than `ENGINE_WORKER_TIMEOUT`. |
8088
| `ENGINE_WORKER_MAX_ATTEMPTS` | `0` | Engine | The number of processing attempts a single task will undergo before it is abandoned. Setting this value to `0` will cause a task to be retried forever. |
89+
| `MIGRATE` | None | API, Engine | If "True" will run migrations + createcachetable on startup if entrypoint used. |
90+
| `INIT_SUPERUSER` | None | API, Engine | If "True" will attempt to create superuser. Needs standard Django envvars to be set (e.g. `DJANGO_SUPERUSER_USERNAME`, `DJANGO_SUPERUSER_EMAIL`, `DJANGO_SUPERUSER_PASSWORD`) if entrypoint used. |
8191

8292
Note that in order to access the S3 bucket, the Composite Handler assumes that valid AWS credentials are available in the environment - this can be in the former of [environment variables](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html), or in the form of ambient credentials.
8393

docker-compose.local.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
version: "3.9"
2+
3+
services:
4+
postgres:
5+
image: postgres:13.3
6+
environment:
7+
- POSTGRES_USER=dlcs
8+
- POSTGRES_PASSWORD=password
9+
- POSTGRES_DB=compositedb
10+
volumes:
11+
- postgres_data:/var/lib/postgresql/data
12+
ports:
13+
- "5432:5432"
14+
15+
volumes:
16+
postgres_data:

docker-compose.yml

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,18 @@ services:
66
command: chown -R 1000:1000 /data/scratch
77
volumes:
88
- ./scratch:/data/scratch
9-
migrate:
10-
build: .
11-
command: bash -c "python manage.py migrate && python manage.py createcachetable"
12-
env_file: .env
13-
depends_on:
14-
- postgres
159
api:
1610
build: .
17-
command: python manage.py runserver 0.0.0.0:8000
11+
command: /srv/dlcs/entrypoint-api.sh
1812
env_file: .env
13+
environment:
14+
- MIGRATE=True
15+
- INIT_SUPERUSER=True
1916
ports:
2017
- "8000:8000"
21-
depends_on:
22-
- migrate
2318
engine:
2419
build: .
25-
command: python manage.py qcluster
20+
command: /srv/dlcs/entrypoint-worker.sh
2621
deploy:
2722
replicas: 3
2823
env_file: .env
@@ -32,7 +27,6 @@ services:
3227
- ./scratch:/data/scratch
3328
- $HOME/.aws:/srv/dlcs/.aws:ro
3429
depends_on:
35-
- migrate
3630
- init
3731
postgres:
3832
image: postgres:13.3

entrypoints/entrypoint-api.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
3+
set -o errexit
4+
set -o pipefail
5+
6+
bash entrypoint.sh
7+
python manage.py runserver 0.0.0.0:8000

entrypoints/entrypoint-worker.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
3+
set -o errexit
4+
set -o pipefail
5+
6+
bash entrypoint.sh
7+
python manage.py qcluster

entrypoints/entrypoint.sh

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/bin/bash
2+
3+
set -o errexit
4+
set -o pipefail
5+
6+
POSTGRES_USER=$(echo $DATABASE_URL | grep -oP "postgresql://\K(.+?):" | cut -d: -f1)
7+
POSTGRES_PASSWORD=$(echo $DATABASE_URL | grep -oP "postgresql://.*:\K(.+?)@" | cut -d@ -f1)
8+
POSTGRES_HOST=$(echo $DATABASE_URL | grep -oP "postgresql://.*@\K(.+?):" | cut -d: -f1)
9+
POSTGRES_PORT=$(echo $DATABASE_URL | grep -oP "postgresql://.*@.*:\K(\d+)/" | cut -d/ -f1)
10+
POSTGRES_DB=$(echo $DATABASE_URL | grep -oP "postgresql://.*@.*:.*/\K(.+?)$")
11+
12+
# Loop until postgres is ready
13+
postgres_ready() {
14+
python3 << END
15+
import sys
16+
17+
import psycopg2
18+
19+
try:
20+
psycopg2.connect(
21+
dbname="${POSTGRES_DB}",
22+
user="${POSTGRES_USER}",
23+
password="${POSTGRES_PASSWORD}",
24+
host="${POSTGRES_HOST}",
25+
port="${POSTGRES_PORT}",
26+
)
27+
except psycopg2.OperationalError:
28+
sys.exit(-1)
29+
sys.exit(0)
30+
31+
END
32+
}
33+
until postgres_ready; do
34+
>&2 echo 'entrypoint: Waiting for PostgreSQL to become available...'
35+
sleep 1
36+
done
37+
>&2 echo 'entrypoint: PostgreSQL is available'
38+
39+
if [[ ($MIGRATE) && ("$MIGRATE" = "True") ]]; then
40+
python manage.py migrate --no-input
41+
echo "entrypoint: Migrations finished"
42+
python manage.py createcachetable
43+
echo "entrypoint: Create cache table finished"
44+
fi
45+
46+
if [[ ($INIT_SUPERUSER) && ("$INIT_SUPERUSER" = "True") ]]; then
47+
echo "entrypoint: Creating superuser"
48+
if python3 manage.py createsuperuser --noinput; then
49+
echo "entrypoint: Created superuser"
50+
else
51+
echo "entrypoint: Superuser already exists, unset INIT_SUPERUSER"
52+
fi
53+
fi

src/app/engine/tasks.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import shutil
23

34
from app.common.dlcs import DLCS
@@ -9,14 +10,18 @@
910
from app.engine.serializers import DLCSBatchSerializer
1011
from django_q.tasks import async_task
1112

13+
logger = logging.Logger(__name__)
1214
http_origin = HttpOrigin()
1315
pdf_rasterizer = PdfRasterizer()
1416
s3_client = S3Client()
1517
dlcs = DLCS()
1618

1719

1820
def cleanup_scratch(folder_path):
19-
shutil.rmtree(folder_path)
21+
try:
22+
shutil.rmtree(folder_path)
23+
except FileNotFoundError:
24+
logger.info(f"cleanup failed for {folder_path}, not found")
2025

2126

2227
def process_member(args):

0 commit comments

Comments
 (0)