-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
40 lines (32 loc) · 1.27 KB
/
Dockerfile
File metadata and controls
40 lines (32 loc) · 1.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Use Python 3.9 slim image for smaller size
FROM python:3.9-slim
# Set working directory
WORKDIR /app
# Install system dependencies required for PDF processing
RUN apt-get update && apt-get install -y \
build-essential \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements first to leverage Docker cache
COPY requirements.txt .
# Install Python packages and download models
RUN pip install --no-cache-dir -r requirements.txt && \
python -m spacy download en_core_web_sm && \
# Pre-download sentence transformer model
python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('paraphrase-MiniLM-L6-v2')" && \
# Create cache directories
mkdir -p /root/.cache/torch/sentence_transformers && \
mkdir -p /root/.cache/huggingface
#
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-mpnet-base-v2')"
# Copy the script
COPY script.py .
# Create input and output directories with proper permissions
RUN mkdir -p /app/input /app/output && \
chmod 777 /app/output
# Set environment variable to use local models
ENV HF_DATASETS_OFFLINE=1 \
TRANSFORMERS_OFFLINE=1 \
HF_HUB_OFFLINE=1
# Set the entrypoint
ENTRYPOINT ["python", "script.py"]