diff --git a/Dockerfile b/Dockerfile index 87cf115..5c182ed 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,9 +3,10 @@ FROM python:3.14-slim # Set working directory WORKDIR /app -# Install system dependencies +# Install system dependencies (ffmpeg for video thumbnail extraction) RUN apt-get update && apt-get install -y \ gcc \ + ffmpeg \ && rm -rf /var/lib/apt/lists/* # Install uv for fast, reproducible dependency installation diff --git a/alembic/versions/20260524_013_fix_media_file_paths.py b/alembic/versions/20260524_013_fix_media_file_paths.py index 44d05e5..960cffa 100644 --- a/alembic/versions/20260524_013_fix_media_file_paths.py +++ b/alembic/versions/20260524_013_fix_media_file_paths.py @@ -23,7 +23,9 @@ branch_labels = None depends_on = None -_BATCH_SIZE = 5000 +# Can't import from src.web.media_utils in migrations (different runtime context) +# Define locally to keep migration self-contained +_CHANNEL_ID_OFFSET = 1_000_000_000_000 def _derive_stale_folder(chat_id: int) -> str | None: @@ -36,71 +38,46 @@ def _derive_stale_folder(chat_id: int) -> str | None: if chat_id >= 0: return None raw = -chat_id - if raw > 1000000000000: - return str(raw - 1000000000000) + if raw > _CHANNEL_ID_OFFSET: + return str(raw - _CHANNEL_ID_OFFSET) return str(raw) def upgrade(): conn = op.get_bind() - dialect = conn.dialect.name - - if dialect == "postgresql": - # Get all distinct negative chat_ids that have media - result = conn.execute(sa.text("SELECT DISTINCT chat_id FROM media WHERE chat_id < 0 AND file_path IS NOT NULL")) - chat_ids = [row[0] for row in result] - - for chat_id in chat_ids: - stale_folder = _derive_stale_folder(chat_id) - if stale_folder is None: - continue - correct_folder = str(chat_id) - - # Only update rows where file_path contains the stale folder - # Use pattern: ...//... → ...//... - stale_pattern = f"%/{stale_folder}/%" - conn.execute( - sa.text( - "UPDATE media SET file_path = REPLACE(file_path, :old_seg, :new_seg) " - "WHERE chat_id = :cid AND file_path LIKE :pattern" - ), - { - "old_seg": f"/{stale_folder}/", - "new_seg": f"/{correct_folder}/", - "cid": chat_id, - "pattern": stale_pattern, - }, - ) - - elif dialect == "sqlite": - result = conn.execute(sa.text("SELECT DISTINCT chat_id FROM media WHERE chat_id < 0 AND file_path IS NOT NULL")) - chat_ids = [row[0] for row in result] - - for chat_id in chat_ids: - stale_folder = _derive_stale_folder(chat_id) - if stale_folder is None: - continue - correct_folder = str(chat_id) - - stale_pattern = f"%/{stale_folder}/%" - conn.execute( - sa.text( - "UPDATE media SET file_path = REPLACE(file_path, :old_seg, :new_seg) " - "WHERE chat_id = :cid AND file_path LIKE :pattern" - ), - { - "old_seg": f"/{stale_folder}/", - "new_seg": f"/{correct_folder}/", - "cid": chat_id, - "pattern": stale_pattern, - }, - ) + + # Get all distinct negative chat_ids that have media + result = conn.execute(sa.text("SELECT DISTINCT chat_id FROM media WHERE chat_id < 0 AND file_path IS NOT NULL")) + chat_ids = [row[0] for row in result] + + for chat_id in chat_ids: + stale_folder = _derive_stale_folder(chat_id) + if stale_folder is None: + continue + correct_folder = str(chat_id) + + # Only update rows where file_path contains the stale folder + # Use pattern: ...//... → ...//... + stale_pattern = f"%/{stale_folder}/%" + conn.execute( + sa.text( + "UPDATE media SET file_path = REPLACE(file_path, :old_seg, :new_seg) " + "WHERE chat_id = :cid AND file_path LIKE :pattern" + ), + { + "old_seg": f"/{stale_folder}/", + "new_seg": f"/{correct_folder}/", + "cid": chat_id, + "pattern": stale_pattern, + }, + ) def downgrade(): - # Reversible: swap the folder components back + # WARNING: This reverses ALL negative-folder paths to positive, including rows + # created after the upgrade. This is intentional — old code expects positive + # folders in file_path. The runtime fallback handles disk resolution. conn = op.get_bind() - dialect = conn.dialect.name result = conn.execute(sa.text("SELECT DISTINCT chat_id FROM media WHERE chat_id < 0 AND file_path IS NOT NULL")) chat_ids = [row[0] for row in result] diff --git a/pyproject.toml b/pyproject.toml index 7e94774..e2b052e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "telegram-archive" -version = "7.10.14" +version = "7.11.0" description = "Automated Telegram backup with Docker. Performs incremental backups of messages and media on a configurable schedule." readme = "README.md" requires-python = ">=3.14" diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 5c19933..0600566 100644 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -93,15 +93,24 @@ if has_tables and not has_alembic: ); \"\"\") # Check artifact from migration 013: file_path values use negative chat_id folders - # If any media row for a negative chat_id has a correctly-negative folder, 013 has run + # Guard: media table may not exist on very old databases cur.execute(\"\"\" SELECT EXISTS ( - SELECT 1 FROM media - WHERE chat_id < 0 AND file_path LIKE '%/' || CAST(chat_id AS TEXT) || '/%' - LIMIT 1 + SELECT FROM information_schema.tables + WHERE table_name = 'media' ); \"\"\") - has_013_paths = cur.fetchone()[0] + has_media_table = cur.fetchone()[0] + has_013_paths = False + if has_media_table: + cur.execute(\"\"\" + SELECT EXISTS ( + SELECT 1 FROM media + WHERE chat_id < 0 AND file_path LIKE '%/' || CAST(chat_id AS TEXT) || '/%' + LIMIT 1 + ); + \"\"\") + has_013_paths = cur.fetchone()[0] # Check artifact from migration 012: idx_media_chat_type index cur.execute(\"\"\" @@ -300,8 +309,13 @@ if has_tables and not has_alembic: ''') # Check artifact from migration 013: file_path values use negative chat_id folders - cur.execute(\"SELECT EXISTS(SELECT 1 FROM media WHERE chat_id < 0 AND file_path LIKE '%/' || CAST(chat_id AS TEXT) || '/%' LIMIT 1)\") - has_013_paths = cur.fetchone()[0] + # Guard: media table may not exist on very old databases + cur.execute(\"SELECT name FROM sqlite_master WHERE type='table' AND name='media'\") + has_media_table = cur.fetchone() is not None + has_013_paths = False + if has_media_table: + cur.execute(\"SELECT EXISTS(SELECT 1 FROM media WHERE chat_id < 0 AND file_path LIKE '%/' || CAST(chat_id AS TEXT) || '/%' LIMIT 1)\") + has_013_paths = cur.fetchone()[0] # Check artifact from migration 012: idx_media_chat_type index cur.execute(\"SELECT name FROM sqlite_master WHERE type='index' AND name='idx_media_chat_type'\") diff --git a/src/__init__.py b/src/__init__.py index a5cbc1a..d54fb36 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -2,4 +2,4 @@ Telegram Backup Automation - Main Package """ -__version__ = "7.10.14" +__version__ = "7.11.0" diff --git a/src/web/main.py b/src/web/main.py index 7af3fd1..825c856 100644 --- a/src/web/main.py +++ b/src/web/main.py @@ -31,6 +31,7 @@ from ..config import Config from ..db import DatabaseAdapter, close_database, get_db_manager, init_database from ..realtime import RealtimeListener +from .media_utils import THUMBNAIL_EXTENSIONS, legacy_folder_alternates, legacy_marked_chat_ids if TYPE_CHECKING: from .push import PushNotificationManager @@ -821,8 +822,8 @@ def _enforce_media_acl(path: str, user: UserContext, *, thumbnail: bool = False) raise HTTPException(status_code=403, detail="Access denied") if media_chat_id not in user_chat_ids: # Legacy fallback: positive folder may correspond to negative marked ID - if media_chat_id > 0 and (-media_chat_id in user_chat_ids or -(1000000000000 + media_chat_id) in user_chat_ids): - pass + if media_chat_id > 0 and any(mid in user_chat_ids for mid in legacy_marked_chat_ids(media_chat_id)): + logger.debug("ACL legacy grant: positive folder mapped to allowed chat via marked-ID convention") else: raise HTTPException(status_code=403, detail="Access denied") @@ -885,7 +886,7 @@ async def serve_thumbnail(size: int, folder: str, filename: str, user: UserConte if user.no_download and not folder.startswith("avatars/"): raise HTTPException(status_code=403, detail="Downloads disabled for this account") - # Chat-level access check + # Early ACL check on requested path (prevents existence leakage) _enforce_media_acl(f"{folder}/{filename}", user, thumbnail=True) from .thumbnails import ensure_thumbnail, resolve_cache_dir @@ -894,10 +895,15 @@ async def serve_thumbnail(size: int, folder: str, filename: str, user: UserConte if _thumb_cache_dir is None: _thumb_cache_dir = resolve_cache_dir(_media_root) - thumb_path = await ensure_thumbnail(_media_root, size, folder, filename, cache_dir=_thumb_cache_dir) - if not thumb_path: + result = await ensure_thumbnail(_media_root, size, folder, filename, cache_dir=_thumb_cache_dir) + if not result: raise HTTPException(status_code=404, detail="Thumbnail not available") + thumb_path, resolved_folder = result + # Secondary ACL on resolved path if it differs (prevents bypass via legacy fallback) + if resolved_folder != folder: + _enforce_media_acl(f"{resolved_folder}/{filename}", user, thumbnail=True) + return FileResponse(thumb_path, media_type="image/webp", headers={"Cache-Control": "public, max-age=86400"}) @@ -928,23 +934,20 @@ async def serve_media(path: str, download: int = Query(0), user: UserContext = D resolved = None if len(parts) == 2: folder, rest = parts - alt_folders = [] - if not folder.startswith("-"): - alt_folders = [f"-{folder}", f"-100{folder}"] - else: - alt_folders = [folder[1:]] + alt_folders = legacy_folder_alternates(folder) for alt in alt_folders: try: resolved = (_media_root / alt / rest).resolve(strict=True) + logger.debug("Legacy fallback: served media via alternate folder resolution") break - except OSError, ValueError: + except OSError, ValueError, RuntimeError: continue if resolved is None: raise HTTPException(status_code=404, detail="File not found") if not resolved.is_relative_to(_media_root): raise HTTPException(status_code=403, detail="Access denied") - _enforce_media_acl(path, user) + _enforce_media_acl(str(resolved.relative_to(_media_root)), user) if not resolved.is_file(): raise HTTPException(status_code=404, detail="File not found") @@ -1502,7 +1505,7 @@ async def get_chat_media( if len(parts) == 2: folder, filename = parts ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else "" - if ext in ("jpg", "jpeg", "png", "gif", "webp", "bmp", "tiff"): + if ext in THUMBNAIL_EXTENSIONS: item["thumb_url"] = f"/media/thumb/200/{folder}/{filename}" else: item["thumb_url"] = None diff --git a/src/web/media_utils.py b/src/web/media_utils.py new file mode 100644 index 0000000..bb49ed5 --- /dev/null +++ b/src/web/media_utils.py @@ -0,0 +1,62 @@ +"""Shared utilities for legacy media path resolution. + +Centralizes the Telegram marked-ID convention so it's defined once +and used consistently across serve_media, thumbnails, and ACL checks. +""" + +CHANNEL_ID_OFFSET: int = 1_000_000_000_000 + +IMAGE_EXTENSIONS: set[str] = {"jpg", "jpeg", "png", "gif", "webp", "bmp", "tiff"} +VIDEO_EXTENSIONS: set[str] = {"mp4", "mkv", "avi", "mov", "webm", "m4v", "3gp"} +THUMBNAIL_EXTENSIONS: set[str] = IMAGE_EXTENSIONS | VIDEO_EXTENSIONS + + +def legacy_folder_alternates(folder: str) -> list[str]: + """Return alternate folder names for legacy positive/negative ID paths. + + Forward (positive folder → possible negative marked IDs on disk): + "1234567890" → ["-1234567890", "-1001234567890"] + + Reverse (negative folder → possible old positive folder on disk): + "-1234567890" → ["1234567890"] (basic group) + "-1001234567890" → ["1234567890"] (channel) + """ + try: + if not folder.startswith("-"): + folder_int = int(folder) + if folder_int <= 0: + return [] + return [f"-{folder}", str(-(CHANNEL_ID_OFFSET + folder_int))] + folder_int = int(folder) + except ValueError: + return [] + raw = -folder_int + if raw > CHANNEL_ID_OFFSET: + return [str(raw - CHANNEL_ID_OFFSET)] + return [str(raw)] + + +def legacy_marked_chat_ids(positive_id: int) -> list[int]: + """Return possible marked chat_ids for a legacy positive folder ID. + + Used by ACL checks to determine if a user has access to a chat + referenced by its old positive folder name. + """ + return [-positive_id, -(CHANNEL_ID_OFFSET + positive_id)] + + +def derive_stale_folder(chat_id: int) -> str | None: + """Derive the old positive folder name from a marked chat_id. + + Basic groups: chat_id = -X → old folder = "X" + Channels: chat_id = -(10^12 + X) → old folder = "X" + Users: chat_id > 0 → no mismatch possible, return None + + Used by migration 013 and tests (not imported at web runtime). + """ + if chat_id >= 0: + return None + raw = -chat_id + if raw > CHANNEL_ID_OFFSET: + return str(raw - CHANNEL_ID_OFFSET) + return str(raw) diff --git a/src/web/templates/index.html b/src/web/templates/index.html index 12ecbc1..a405f45 100644 --- a/src/web/templates/index.html +++ b/src/web/templates/index.html @@ -1291,7 +1291,7 @@

Jump to Date

-