Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src/dummy_onbase/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Use an official Python image
FROM python:3.11-slim

# Set environment vars
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1

# Create app directory
WORKDIR /app

# Copy app files
COPY . /app

# Install dependencies
RUN pip install --no-cache-dir flask flask-cors

# Ensure data folder exists
RUN mkdir -p /app/data

# Expose Flask port
EXPOSE 5000

# Run the app
CMD ["python", "app.py"]
30 changes: 30 additions & 0 deletions src/dummy_onbase/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Flask PDF Uploader

A simple Flask app to upload individual PDF files or ZIP archives containing multiple PDFs. Uploaded files are stored with a timestamp and unique ID.

## Features

- Upload a single `.pdf` file
- Upload a `.zip` file containing multiple PDFs
- View and download uploaded PDFs
- Filter documents by date range

## Requirements

- Python 3.11+ (if running without Docker)
- Docker (recommended for deployment)

---

## 🐳 Run with Docker

### 1. Build the Docker image

```bash
docker build -t pdf-uploader .
```

### 2. Run the container
```bash
docker run -d -p 49123:5000 -v $(pwd)/data:/app/data pdf-uploader
```
112 changes: 112 additions & 0 deletions src/dummy_onbase/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
from flask import Flask, request, send_from_directory, render_template, jsonify
from flask_cors import CORS
from datetime import datetime
import os
import uuid
import zipfile

app = Flask(__name__)
CORS(app)

UPLOAD_FOLDER = 'data'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

@app.route('/')
def index():
files = sorted(f for f in os.listdir(UPLOAD_FOLDER) if f.endswith('.pdf'))
return render_template('upload.html', files=files)

@app.route('/upload', methods=['POST'])
def upload_file():
uploaded_file = request.files.get('file')
if not uploaded_file:
return "No file uploaded", 400

if uploaded_file.filename.endswith('.pdf'):
return save_pdf_file(uploaded_file)

if uploaded_file.filename.endswith('.zip'):
return extract_and_save_pdfs_from_zip(uploaded_file)

return "Invalid file type", 400

def save_pdf_file(file):
timestamp = datetime.now().strftime('%Y%m%d')
doc_id = str(uuid.uuid4())[:8]
original = os.path.splitext(file.filename)[0]
filename = f"{original}_{doc_id}_{timestamp}.pdf"
filepath = os.path.join(UPLOAD_FOLDER, filename)
file.save(filepath)
return f"PDF uploaded as {filename}<br><a href='/'>Back</a>", 200

def extract_and_save_pdfs_from_zip(file):
try:
with zipfile.ZipFile(file) as zip_ref:
seen = set()
count = 0
for member in zip_ref.infolist():
filename = os.path.basename(member.filename)

# Skip directories, hidden files, __MACOSX, or already-seen names
if (not filename or
not filename.lower().endswith('.pdf') or
'__macosx' in member.filename.lower() or
filename in seen):
continue

seen.add(filename)

with zip_ref.open(member) as pdf_file:
timestamp = datetime.now().strftime('%Y%m%d')
doc_id = str(uuid.uuid4())[:8]
original = os.path.splitext(filename)[0]
saved_filename = f"{original}_{doc_id}_{timestamp}.pdf"
filepath = os.path.join(UPLOAD_FOLDER, saved_filename)
with open(filepath, 'wb') as f:
f.write(pdf_file.read())
count += 1
return f"{count} PDF(s) extracted and uploaded<br><a href='/'>Back</a>", 200
except zipfile.BadZipFile:
return "Invalid ZIP file", 400


@app.route('/document/<filename>', methods=['GET'])
def get_document_by_filename(filename):
return send_from_directory(UPLOAD_FOLDER, filename, as_attachment=True)

@app.route('/documents', methods=['GET'])
def get_documents_in_range():
start = request.args.get('start')
end = request.args.get('end')
if not start or not end:
return "Missing start or end parameter", 400

try:
start_date = datetime.strptime(start, '%Y%m%d')
end_date = datetime.strptime(end, '%Y%m%d')
except ValueError:
return "Invalid date format. Use YYYYMMDD.", 400

results = []
for filename in os.listdir(UPLOAD_FOLDER):
if filename.endswith('.pdf'):
parts = filename.rsplit('_', 2)
if len(parts) == 3:
_, doc_id, date_str = parts
date_str = date_str.replace('.pdf', '')
try:
file_date = datetime.strptime(date_str, '%Y%m%d')
if start_date <= file_date <= end_date:
results.append({
"doc_id": doc_id,
"date": date_str,
"file_name": filename
})
except ValueError:
continue

return jsonify({"documents": results})

if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)

Empty file added src/dummy_onbase/data/.keep
Empty file.
21 changes: 21 additions & 0 deletions src/dummy_onbase/templates/upload.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<!DOCTYPE html>
<html>
<head>
<title>Upload a Text File</title>
</head>
<body>
<h1>Upload a .pdf File</h1>
<form method="POST" action="/upload" enctype="multipart/form-data">
<input type="file" name="file" accept=".pdf,.zip" required />
<br><br>
<input type="submit" value="Upload">
</form>

<h2>Uploaded Files:</h2>
<ul>
{% for file in files %}
<li><a href="/document/{{ file }}">{{ file }}</a></li>
{% endfor %}
</ul>
</body>
</html>
Loading