Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
*.egg
*.egg-info/
dist/
build/
.eggs/

# Virtual environments
.venv/
venv/
ENV/
env/

# IDE
.vscode/
.idea/
*.swp
*.swo
*~
.DS_Store

# Testing
.pytest_cache/
.coverage
.coverage.*
htmlcov/
.tox/
*.cover

# Notebooks
notebooks/
*.ipynb
.ipynb_checkpoints

# Documentation
docs/
*.md
!README.md
!DOCKER_DEPLOY.md

# Git
.git/
.gitignore
.gitattributes

# CI/CD
.github/
.gitlab-ci.yml

# Local test data and logs
tests/
*.log
/tmp/
.test.env

# UV/pip cache
.uv/
uv.lock

# Docker
Dockerfile*
docker-compose*.yml
.dockerignore
89 changes: 89 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: Build and Push Docker Images

on:
push:
branches:
- main
paths:
- 'src/**'
- 'apps/**'
- 'Dockerfile*'
- 'pyproject.toml'
- '.github/workflows/docker-publish.yml'
pull_request:
branches:
- main
workflow_dispatch: # Allow manual trigger
inputs:
tag:
description: 'Docker image tag suffix (default: latest)'
required: false
default: 'latest'

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

jobs:
build-and-push:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write

strategy:
matrix:
include:
- dockerfile: Dockerfile
suffix: ""
description: "Full image with all loader dependencies"
- dockerfile: Dockerfile.snowflake
suffix: "-snowflake"
description: "Snowflake-only image (minimal dependencies)"

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
flavor: |
suffix=${{ matrix.suffix }},onlatest=true
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha,prefix=sha-
type=raw,value=latest,enable={{is_default_branch}}

- name: Build and push Docker image (${{ matrix.description }})
uses: docker/build-push-action@v5
with:
context: .
file: ./${{ matrix.dockerfile }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha,scope=${{ matrix.dockerfile }}
cache-to: type=gha,mode=max,scope=${{ matrix.dockerfile }}
platforms: linux/amd64,linux/arm64

- name: Image digest
run: |
echo "### ${{ matrix.description }}" >> $GITHUB_STEP_SUMMARY
echo "Digest: ${{ steps.meta.outputs.digest }}" >> $GITHUB_STEP_SUMMARY
echo "Tags: ${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY
62 changes: 62 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Environment files
.env
.test.env
*.env

# Kubernetes secrets (NEVER commit these!)
k8s/secret.yaml
k8s/secrets.yaml

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
*.egg
*.egg-info/
dist/
build/
.eggs/

# Virtual environments
.venv/
venv/
ENV/
env/

# IDE
.vscode/
.idea/
*.swp
*.swo
*~
.DS_Store

# Testing
.pytest_cache/
.coverage
.coverage.*
htmlcov/
.tox/
*.cover
.hypothesis/

# Notebooks
.ipynb_checkpoints/

# Logs
*.log
/tmp/

# UV/pip cache
.uv/
uv.lock

# Data directories (local development)
# Large datasets should be downloaded on-demand or mounted via ConfigMaps
data/

# Build artifacts
*.tar.gz
*.zip
93 changes: 93 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Multi-stage build for optimized image size
# Stage 1: Build dependencies
FROM python:3.12-slim AS builder

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
&& rm -rf /var/lib/apt/lists/*

# Install UV for fast dependency management
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv

# Set working directory
WORKDIR /app

# Copy dependency files
COPY pyproject.toml README.md ./

# Install dependencies using UV (much faster than pip)
# Install ALL dependencies including all loader dependencies
# This ensures optional dependencies don't cause import errors
RUN uv pip install --system --no-cache \
pandas>=2.3.1 \
pyarrow>=20.0.0 \
typer>=0.15.2 \
adbc-driver-manager>=1.5.0 \
adbc-driver-postgresql>=1.5.0 \
protobuf>=4.21.0 \
base58>=2.1.1 \
'eth-hash[pysha3]>=0.7.1' \
eth-utils>=5.2.0 \
google-cloud-bigquery>=3.30.0 \
google-cloud-storage>=3.1.0 \
arro3-core>=0.5.1 \
arro3-compute>=0.5.1 \
psycopg2-binary>=2.9.0 \
redis>=4.5.0 \
deltalake>=1.0.2 \
'pyiceberg[sql-sqlite]>=0.10.0' \
'pydantic>=2.0,<2.12' \
snowflake-connector-python>=4.0.0 \
snowpipe-streaming>=1.0.0 \
lmdb>=1.4.0

# Stage 2: Runtime image
FROM python:3.12-slim

# Install runtime dependencies only
RUN apt-get update && apt-get install -y --no-install-recommends \
libpq5 \
&& rm -rf /var/lib/apt/lists/*

# Create non-root user for security
RUN useradd -m -u 1000 amp && \
mkdir -p /app /data && \
chown -R amp:amp /app /data

# Set working directory
WORKDIR /app

# Copy Python packages from builder
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages

# Copy UV from builder for package installation
COPY --from=builder /usr/local/bin/uv /usr/local/bin/uv

# Copy application code
COPY --chown=amp:amp src/ ./src/
COPY --chown=amp:amp apps/ ./apps/
COPY --chown=amp:amp pyproject.toml README.md ./

# Note: /data directory is created but empty by default
# Mount data files at runtime using Kubernetes ConfigMaps or volumes

# Install the amp package in the system Python (NOT editable for Docker)
RUN uv pip install --system --no-cache .

# Switch to non-root user
USER amp

# Set Python path
ENV PYTHONPATH=/app
ENV PYTHONUNBUFFERED=1

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import sys; sys.exit(0)"

# Default command - run ERC20 loader
# Can be overridden with docker run arguments
ENTRYPOINT ["python", "apps/test_erc20_labeled_parallel.py"]
CMD ["--blocks", "100000", "--workers", "8", "--flush-interval", "0.5"]
Loading
Loading