-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
66 lines (56 loc) · 1.73 KB
/
Dockerfile
File metadata and controls
66 lines (56 loc) · 1.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# A2C Knowledge Base Docker Image
#
# Supports: PDF, DOCX, PPTX, HTML, Markdown, Images, Audio (Whisper ASR), Video (ffmpeg)
#
# Build:
# docker build -t a2c-knowledge .
#
# Run:
# docker run -it --rm -v $(pwd)/data:/app/data a2c-knowledge
#
# With GPU (NVIDIA):
# docker run -it --rm --gpus all -v $(pwd)/data:/app/data a2c-knowledge
FROM python:3.11-slim
# Set environment variables
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
# Build tools
build-essential \
# ffmpeg for video/audio processing
ffmpeg \
# PDF processing dependencies
poppler-utils \
tesseract-ocr \
tesseract-ocr-chi-sim \
tesseract-ocr-chi-tra \
# Image processing
libgl1-mesa-glx \
libglib2.0-0 \
# Cleanup
&& rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /app
# Install uv for faster package installation
RUN pip install uv
# Copy project files
COPY pyproject.toml .
COPY knowledge/ knowledge/
COPY config/ config/
COPY generator/ generator/
COPY models/ models/
COPY renderer/ renderer/
COPY utils/ utils/
COPY exporter/ exporter/
# Install Python dependencies
RUN uv pip install --system -e ".[asr]"
# Pre-download models (optional, makes first run faster)
# Uncomment to include models in the image (increases image size significantly)
# RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('Qwen/Qwen3-Embedding-0.6B')"
# Create data directory for ChromaDB
RUN mkdir -p /app/data/chroma
# Default command
CMD ["python", "-c", "from knowledge import KnowledgePipeline; print('A2C Knowledge Base ready!')"]