Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Git
.git
.gitignore

# Python
__pycache__
*.pyc
*.pyo
*.pyd
.Python
env
pip-log.txt
pip-delete-this-directory.txt
.tox
.coverage
.coverage.*
.pytest_cache
nosetests.xml
coverage.xml
*.cover
*.log
.cache
.mypy_cache

# Virtual environments
venv/
env/
ENV/

# IDE
.vscode/
.idea/
*.swp
*.swo
*~

# OS
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

# Project specific
*.csv
*.xlsx
*.xls
data/
logs/
output/
temp/
tmp/

# Documentation
docs/_build/
*.md

# Docker
Dockerfile*
docker-compose*.yml
.dockerignore

# CI/CD
.github/
.gitlab-ci.yml

# Environment files
.env
.env.local
.env.*.local

# Test files
tests/
test_*.py
*_test.py

# Backup files
*.bak
*.backup
140 changes: 140 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Multi-stage Dockerfile for Name Matching Application
FROM python:3.11-slim as base

# Set environment variables
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PYTHONPATH=/app \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1

# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
g++ \
pkg-config \
libmysqlclient-dev \
curl \
&& rm -rf /var/lib/apt/lists/*

# Create app user
RUN groupadd -r appuser && useradd -r -g appuser appuser

# Set work directory
WORKDIR /app

# Copy requirements first for better caching
COPY requirements.txt .

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy application code
COPY . .

# Change ownership to app user
RUN chown -R appuser:appuser /app

# Switch to app user
USER appuser

# Expose port
EXPOSE 8000

# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1

# Default command
CMD ["python", "-m", "uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8000"]


# Development stage
FROM base as development

USER root

# Install development dependencies
RUN pip install --no-cache-dir pytest pytest-cov black isort flake8 mypy

# Install additional development tools
RUN apt-get update && apt-get install -y \
git \
vim \
&& rm -rf /var/lib/apt/lists/*

USER appuser

# Override command for development
CMD ["python", "-m", "uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]


# Production stage
FROM base as production

# Copy only necessary files for production
COPY --from=base /app /app

# Set production environment
ENV ENVIRONMENT=production

# Use production command
CMD ["python", "-m", "uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "4"]


# GPU-enabled stage
FROM nvidia/cuda:11.8-runtime-ubuntu22.04 as gpu

# Set environment variables
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PYTHONPATH=/app \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1

# Install Python and system dependencies
RUN apt-get update && apt-get install -y \
python3.11 \
python3.11-dev \
python3-pip \
gcc \
g++ \
pkg-config \
libmysqlclient-dev \
curl \
&& rm -rf /var/lib/apt/lists/*

# Create symlink for python
RUN ln -s /usr/bin/python3.11 /usr/bin/python

# Create app user
RUN groupadd -r appuser && useradd -r -g appuser appuser

# Set work directory
WORKDIR /app

# Copy requirements
COPY requirements.txt requirements-gpu.txt ./

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install --no-cache-dir -r requirements-gpu.txt

# Copy application code
COPY . .

# Change ownership to app user
RUN chown -R appuser:appuser /app

# Switch to app user
USER appuser

# Expose port
EXPOSE 8000

# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1

# GPU-enabled command
CMD ["python", "-m", "uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]
70 changes: 70 additions & 0 deletions config.json.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"environment": "development",
"debug": true,
"database": {
"host": "localhost",
"port": 3306,
"user": "nameuser",
"password": "namepass",
"database": "namematching",
"use_ssl": false,
"pool_size": 10,
"max_overflow": 20,
"pool_timeout": 30,
"pool_recycle": 3600
},
"redis": {
"host": "localhost",
"port": 6379,
"db": 0,
"password": null,
"default_ttl": 3600,
"key_prefix": "name_match:",
"max_connections": 10
},
"api": {
"host": "0.0.0.0",
"port": 8000,
"workers": 4,
"reload": false,
"log_level": "info",
"cors_origins": ["*"],
"jwt_secret_key": "your-secret-key-change-in-production",
"jwt_algorithm": "HS256",
"jwt_expire_minutes": 30
},
"matching": {
"match_threshold": 0.75,
"non_match_threshold": 0.55,
"enable_gpu": false,
"enable_caching": true,
"cache_ttl": 3600,
"max_batch_size": 10000,
"default_similarity_function": "jaro_winkler",
"component_weights": {
"first_name": 0.3,
"middle_name": 0.2,
"last_name": 0.3,
"birthdate": 0.1,
"geography": 0.1
}
},
"monitoring": {
"enable_prometheus": true,
"prometheus_port": 8001,
"enable_health_checks": true,
"health_check_interval": 30,
"log_level": "INFO",
"log_format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
"log_file": null,
"max_log_size": 10485760,
"log_backup_count": 5
},
"gpu": {
"enabled": false,
"device_id": 0,
"memory_limit": null,
"batch_size": 1000,
"enable_mixed_precision": false
}
}
Loading