-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpopulate_database.py
More file actions
78 lines (64 loc) · 2.27 KB
/
populate_database.py
File metadata and controls
78 lines (64 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
"""Populate the database with all files found in the upload folder."""
import os
import sys
from pathlib import Path
from config import config
from db.db_manager import DatabaseManager
def populate_database():
"""Scan upload folder and add all files to database."""
upload_folder = config.UPLOAD_FOLDER
db_path = config.DATABASE_PATH
print(f"Upload folder: {upload_folder}")
print(f"Database: {db_path}")
print("=" * 70)
print()
if not os.path.exists(upload_folder):
print(f"❌ Upload folder does not exist: {upload_folder}")
return
# Initialize database manager
db = DatabaseManager(db_path)
# Scan for files
pdf_files = []
html_files = []
print("Scanning filesystem...")
for root, dirs, files in os.walk(upload_folder):
for filename in files:
if filename.startswith('.'):
continue
file_path = os.path.join(root, filename)
if filename.lower().endswith('.pdf'):
pdf_files.append(Path(file_path))
elif filename.lower().endswith(('.html', '.htm')):
html_files.append(Path(file_path))
print(f"Found {len(pdf_files)} PDF files")
print(f"Found {len(html_files)} HTML files")
print()
if not pdf_files and not html_files:
print("No files found to add!")
return
# Add to database
added = 0
skipped = 0
print("Adding files to database...")
for filepath in pdf_files + html_files:
try:
doc_id = db.add_document(filepath)
if doc_id:
added += 1
print(f" ✓ Added: {filepath.name}")
else:
skipped += 1
print(f" - Skipped: {filepath.name} (already exists)")
except Exception as e:
print(f" ✗ Error adding {filepath.name}: {e}")
print()
print("=" * 70)
print(f"✓ Added {added} new documents")
print(f"- Skipped {skipped} existing documents")
print()
print("Next steps:")
print(" 1. Restart the service to start processing: sudo systemctl restart rectangular-file")
print(" 2. Or manually trigger processing by visiting: http://rf.broken.works/")
if __name__ == "__main__":
populate_database()