Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions .github/workflows/publish_translation_worker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
name: Publish translation-worker

on:
workflow_dispatch:
push:
tags:
- 'translation-worker-*'

jobs:
create-release:
runs-on: ubuntu-latest
env:
PYTHON_VERSION: 3.12
ASTRAL_VERSION: 0.11.6
steps:
- uses: actions/checkout@v6
- name: Create GH release
run: gh release create "$tag" --generate-notes
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
tag: ${{ github.ref_name }}

publish-io-worker:
runs-on: ubuntu-latest
steps:
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: icij/datashare-translation-io-worker
tags: |
type=match,pattern=translation-worker-(.*),group=1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
# You'll need to set these secrets
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and push image
uses: docker/build-push-action@v7
with:
target: io-worker
context: "{{defaultContext}}:workers/translation-worker"
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

publish-inference-worker:
runs-on: ubuntu-latest
steps:
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: icij/datashare-translation-inference-worker
tags: |
type=match,pattern=translation-worker-(.*),group=1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
# You'll need to set these secrets
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and push image
uses: docker/build-push-action@v7
with:
target: inference-worker
context: "{{defaultContext}}:workers/translation-worker"
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: false
2 changes: 1 addition & 1 deletion .github/workflows/test_translation_worker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
- name: Run tests
run: |
cd workers/translation-worker
uv sync --frozen --all-extras
uv sync --frozen --all-extras --dev
uv run --frozen python -m pytest --timeout=180 -vvv --cache-clear --show-capture=all -r A
Expand Down
6 changes: 3 additions & 3 deletions workers/asr-worker/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
[tool.uv-dynamic-versioning]
fallback-version = "0.0.0"

[project]
name = "asr-worker"
dynamic = ["version"]
Expand Down Expand Up @@ -73,6 +70,9 @@ dev = [
"typing-extensions>=4.15.0",
]

[tool.uv-dynamic-versioning]
fallback-version = "0.0.0"

[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
Expand Down
54 changes: 54 additions & 0 deletions workers/translation-worker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
FROM python:3.13-slim-trixie AS builder


FROM python:3.13-slim-trixie AS translation-worker-builder

ENV PYTHONUNBUFFERED=1
ENV UV_HTTP_TIMEOUT=300
ENV UV_LINK_MODE=copy
ENV UV_COMPILE_BYTECODE=1
ENV UV_NO_DEV=1

COPY --from=ghcr.io/astral-sh/uv:0.11.6 /uv /uvx /bin/

WORKDIR /app


FROM translation-worker-builder AS io-worker
# Install deps first to optimize layer cache
RUN --mount=type=cache,target=~/.cache/uv \
--mount=type=bind,source=uv.dist.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync -v --frozen --no-editable --no-install-project

# Then copy code
ADD uv.dist.lock ./uv.lock
ADD pyproject.toml README.md ./
ADD translation_worker ./translation_worker/
ADD entrypoints/io_worker.sh ./entrypoints/io_worker.sh

# Then install service
RUN --mount=type=cache,target=~/.cache/uv uv sync -v --frozen --no-editable
RUN rm -rf ~/.cache/pip

ENTRYPOINT ["entrypoints/io_worker.sh"]


FROM translation-worker-builder AS inference-worker
# Install deps first to optimize layer cache
RUN --mount=type=cache,target=~/.cache/uv \
--mount=type=bind,source=uv.dist.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync -v --frozen --no-editable --no-install-project --extra inference

# Then copy code
ADD uv.dist.lock ./uv.lock
ADD pyproject.toml README.md ./
ADD translation_worker ./translation_worker/
ADD entrypoints/inference_worker.sh ./entrypoints/inference_worker.sh

# Then install service
RUN --mount=type=cache,target=~/.cache/uv uv sync -v --frozen --no-editable --extra inference
RUN rm -rf ~/.cache/pip

ENTRYPOINT ["entrypoints/inference_worker.sh"]
6 changes: 6 additions & 0 deletions workers/translation-worker/entrypoints/inference_worker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

uv run --no-sync datashare-python worker start \
--dependencies inference \
--queue translation.inference.cpu \
--activities translation.translate_docs
9 changes: 9 additions & 0 deletions workers/translation-worker/entrypoints/io_worker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
set -e

uv run --no-sync datashare-python worker start \
--dependencies io \
--queue translation.io \
--activities translation.worker_config \
--activities translation.create_translation_batches \
--workflows translation
100 changes: 54 additions & 46 deletions workers/translation-worker/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,37 +1,18 @@
[build-system]
requires = ["hatchling", "uv-dynamic-versioning"]
build-backend = "hatchling.build"

[tool.hatch.version]
source = "uv-dynamic-versioning"

[tool.hatch.build.targets.wheel]
packages = ["translation_worker"]

[tool.hatch.build.targets.sdist]
exclude = [
"tests"
]

[tool.uv-dynamic-versioning]
fallback-version = "0.0.0"

[project]
name = "translation-worker"
dynamic = ["version"]
description = "Translation worker implementation in Temporal"
authors = [
{ name = "Clément Doumouro", email = "cdoumouro@icij.org" },
{ name = "Clément Doumouro", email = "clement.doumouro@gmail.com" },
{ name = "Lion Summerbell", email = "lsummerbell@icij.org" }
{ name = "Clément Doumouro", email = "cdoumouro@icij.org" },
{ name = "Clément Doumouro", email = "clement.doumouro@gmail.com" },
{ name = "Lion Summerbell", email = "lsummerbell@icij.org" }
]
readme = "README.md"
requires-python = ">=3.11.0, <3.14"
requires-python = ">=3.11.0, <3.13"

dependencies = [
"datashare-python~=0.8.4",
"argostranslate>=1.11.0",
"temporalio>=1.22.0",
"pycountry~=26.2.16",
"datashare-python~=0.8.4",
"pydantic-extra-types[pycountry]==2.11.1",
]

[project.entry-points."datashare.workflows"]
Expand All @@ -40,39 +21,66 @@ workflows = "translation_worker.workflows:WORKFLOWS"
[project.entry-points."datashare.activities"]
activities = "translation_worker.activities:ACTIVITIES"

[tool.uv.sources]
torch = [
{ index = "pytorch-cpu" },
[project.entry-points."datashare.dependencies"]
dependencies = "translation_worker.dependencies:REGISTRY"

[project.entry-points."datashare.worker_config_cls"]
worker_config_cls = "translation_worker.objects:WORKER_CONFIG_CLS"

[project.optional-dependencies]
inference = [
"argostranslate==1.11.0",
]

[tool.uv.sources]
datashare-python = { path = "../../datashare-python", editable = true }

[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true
[tool.uv]
package = true
exclude-dependencies = [
"torch",
]

[tool.uv-dynamic-versioning]
fallback-version = "0.0.0"

[dependency-groups]
dev = [
"datashare-python~=0.2",
"nest-asyncio>=1.6.0",
"pre-commit>=4.5.1",
"psutil>=6.1.0",
"pytest~=8.1",
"pytest-asyncio~=0.24",
"pytest-timeout==2.4.0",
"redis[hiredis]>=5.2.1",
"ruff==0.15.2",
"typing-extensions>=4.15.0",
"datashare-python~=0.2",
"nest-asyncio>=1.6.0",
"pre-commit>=4.5.1",
"psutil>=6.1.0",
"pytest~=8.1",
"pytest-asyncio~=0.24",
"pytest-timeout==2.4.0",
"redis[hiredis]>=5.2.1",
"ruff==0.15.2",
"typing-extensions>=4.15.0",
]

[build-system]
requires = ["hatchling", "uv-dynamic-versioning"]
build-backend = "hatchling.build"

[tool.hatch.version]
source = "uv-dynamic-versioning"

[tool.hatch.build.targets.wheel]
packages = ["translation_worker"]

[tool.hatch.build.targets.sdist]
exclude = [
"tests"
]

[tool.pytest.ini_options]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "session"
markers = [
"integration",
"pull",
"integration",
"pull",
]
log_cli = 1
log_cli_level = "DEBUG"
log_file_format = "[%(levelname)s][%(asctime)s.%(msecs)03d][%(name)s]: %(message)s"
log_file_date_format = "%Y-%m-%d %H:%M:%S"
log_file_date_format = "%Y-%m-%d %H:%M:%S"
Loading
Loading