-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
40 lines (32 loc) · 1.11 KB
/
Dockerfile
File metadata and controls
40 lines (32 loc) · 1.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
FROM python:3.11-slim-bullseye
# Install system dependencies
RUN apt-get update && apt-get install -y \
libjpeg-dev \
libwebp-dev \
libopenjp2-7-dev \
libtiff5-dev \
zlib1g-dev \
libxml2-dev \
libxslt1-dev \
build-essential \
curl \
git \
&& rm -rf /var/lib/apt/lists/*
# Install Node.js for mercury-parser
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
&& apt-get install -y nodejs \
&& rm -rf /var/lib/apt/lists/*
# Install mercury-parser globally
RUN npm install -g @postlight/mercury-parser
WORKDIR /app
COPY requirements.txt requirements-ml.txt ./
ARG WITH_ML=0
RUN pip install --no-cache-dir -r requirements.txt \
&& if [ "$WITH_ML" = "1" ]; then pip install --no-cache-dir -r requirements-ml.txt; fi
# Pre-download NLTK data to avoid repeated downloads
RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab')"
COPY . .
# Optionally install Playwright browsers inside the image
ARG WITH_PLAYWRIGHT_BROWSERS=0
RUN if [ "$WITH_PLAYWRIGHT_BROWSERS" = "1" ]; then python install_playwright.py; fi
CMD ["tail", "-f", "/dev/null"]