Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ FROM python:3.14-slim
# Set working directory
WORKDIR /app

# Install system dependencies
# Install system dependencies (ffmpeg for video thumbnail extraction)
RUN apt-get update && apt-get install -y \
gcc \
ffmpeg \
&& rm -rf /var/lib/apt/lists/*

# Install uv for fast, reproducible dependency installation
Expand Down
91 changes: 34 additions & 57 deletions alembic/versions/20260524_013_fix_media_file_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
branch_labels = None
depends_on = None

_BATCH_SIZE = 5000
# Can't import from src.web.media_utils in migrations (different runtime context)
# Define locally to keep migration self-contained
_CHANNEL_ID_OFFSET = 1_000_000_000_000


def _derive_stale_folder(chat_id: int) -> str | None:
Expand All @@ -36,71 +38,46 @@ def _derive_stale_folder(chat_id: int) -> str | None:
if chat_id >= 0:
return None
raw = -chat_id
if raw > 1000000000000:
return str(raw - 1000000000000)
if raw > _CHANNEL_ID_OFFSET:
return str(raw - _CHANNEL_ID_OFFSET)
return str(raw)


def upgrade():
conn = op.get_bind()
dialect = conn.dialect.name

if dialect == "postgresql":
# Get all distinct negative chat_ids that have media
result = conn.execute(sa.text("SELECT DISTINCT chat_id FROM media WHERE chat_id < 0 AND file_path IS NOT NULL"))
chat_ids = [row[0] for row in result]

for chat_id in chat_ids:
stale_folder = _derive_stale_folder(chat_id)
if stale_folder is None:
continue
correct_folder = str(chat_id)

# Only update rows where file_path contains the stale folder
# Use pattern: .../<stale_folder>/... → .../<correct_folder>/...
stale_pattern = f"%/{stale_folder}/%"
conn.execute(
sa.text(
"UPDATE media SET file_path = REPLACE(file_path, :old_seg, :new_seg) "
"WHERE chat_id = :cid AND file_path LIKE :pattern"
),
{
"old_seg": f"/{stale_folder}/",
"new_seg": f"/{correct_folder}/",
"cid": chat_id,
"pattern": stale_pattern,
},
)

elif dialect == "sqlite":
result = conn.execute(sa.text("SELECT DISTINCT chat_id FROM media WHERE chat_id < 0 AND file_path IS NOT NULL"))
chat_ids = [row[0] for row in result]

for chat_id in chat_ids:
stale_folder = _derive_stale_folder(chat_id)
if stale_folder is None:
continue
correct_folder = str(chat_id)

stale_pattern = f"%/{stale_folder}/%"
conn.execute(
sa.text(
"UPDATE media SET file_path = REPLACE(file_path, :old_seg, :new_seg) "
"WHERE chat_id = :cid AND file_path LIKE :pattern"
),
{
"old_seg": f"/{stale_folder}/",
"new_seg": f"/{correct_folder}/",
"cid": chat_id,
"pattern": stale_pattern,
},
)

# Get all distinct negative chat_ids that have media
result = conn.execute(sa.text("SELECT DISTINCT chat_id FROM media WHERE chat_id < 0 AND file_path IS NOT NULL"))
chat_ids = [row[0] for row in result]

for chat_id in chat_ids:
stale_folder = _derive_stale_folder(chat_id)
if stale_folder is None:
continue
correct_folder = str(chat_id)

# Only update rows where file_path contains the stale folder
# Use pattern: .../<stale_folder>/... → .../<correct_folder>/...
stale_pattern = f"%/{stale_folder}/%"
conn.execute(
sa.text(
"UPDATE media SET file_path = REPLACE(file_path, :old_seg, :new_seg) "
"WHERE chat_id = :cid AND file_path LIKE :pattern"
),
{
"old_seg": f"/{stale_folder}/",
"new_seg": f"/{correct_folder}/",
"cid": chat_id,
"pattern": stale_pattern,
},
)


def downgrade():
# Reversible: swap the folder components back
# WARNING: This reverses ALL negative-folder paths to positive, including rows
# created after the upgrade. This is intentional — old code expects positive
# folders in file_path. The runtime fallback handles disk resolution.
conn = op.get_bind()
dialect = conn.dialect.name

result = conn.execute(sa.text("SELECT DISTINCT chat_id FROM media WHERE chat_id < 0 AND file_path IS NOT NULL"))
chat_ids = [row[0] for row in result]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "telegram-archive"
version = "7.10.14"
version = "7.11.0"
description = "Automated Telegram backup with Docker. Performs incremental backups of messages and media on a configurable schedule."
readme = "README.md"
requires-python = ">=3.14"
Expand Down
28 changes: 21 additions & 7 deletions scripts/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,24 @@ if has_tables and not has_alembic:
);
\"\"\")
# Check artifact from migration 013: file_path values use negative chat_id folders
# If any media row for a negative chat_id has a correctly-negative folder, 013 has run
# Guard: media table may not exist on very old databases
cur.execute(\"\"\"
SELECT EXISTS (
SELECT 1 FROM media
WHERE chat_id < 0 AND file_path LIKE '%/' || CAST(chat_id AS TEXT) || '/%'
LIMIT 1
SELECT FROM information_schema.tables
WHERE table_name = 'media'
);
\"\"\")
has_013_paths = cur.fetchone()[0]
has_media_table = cur.fetchone()[0]
has_013_paths = False
if has_media_table:
cur.execute(\"\"\"
SELECT EXISTS (
SELECT 1 FROM media
WHERE chat_id < 0 AND file_path LIKE '%/' || CAST(chat_id AS TEXT) || '/%'
LIMIT 1
);
\"\"\")
has_013_paths = cur.fetchone()[0]

# Check artifact from migration 012: idx_media_chat_type index
cur.execute(\"\"\"
Expand Down Expand Up @@ -300,8 +309,13 @@ if has_tables and not has_alembic:
''')

# Check artifact from migration 013: file_path values use negative chat_id folders
cur.execute(\"SELECT EXISTS(SELECT 1 FROM media WHERE chat_id < 0 AND file_path LIKE '%/' || CAST(chat_id AS TEXT) || '/%' LIMIT 1)\")
has_013_paths = cur.fetchone()[0]
# Guard: media table may not exist on very old databases
cur.execute(\"SELECT name FROM sqlite_master WHERE type='table' AND name='media'\")
has_media_table = cur.fetchone() is not None
has_013_paths = False
if has_media_table:
cur.execute(\"SELECT EXISTS(SELECT 1 FROM media WHERE chat_id < 0 AND file_path LIKE '%/' || CAST(chat_id AS TEXT) || '/%' LIMIT 1)\")
has_013_paths = cur.fetchone()[0]

# Check artifact from migration 012: idx_media_chat_type index
cur.execute(\"SELECT name FROM sqlite_master WHERE type='index' AND name='idx_media_chat_type'\")
Expand Down
2 changes: 1 addition & 1 deletion src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Telegram Backup Automation - Main Package
"""

__version__ = "7.10.14"
__version__ = "7.11.0"
29 changes: 16 additions & 13 deletions src/web/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from ..config import Config
from ..db import DatabaseAdapter, close_database, get_db_manager, init_database
from ..realtime import RealtimeListener
from .media_utils import THUMBNAIL_EXTENSIONS, legacy_folder_alternates, legacy_marked_chat_ids

if TYPE_CHECKING:
from .push import PushNotificationManager
Expand Down Expand Up @@ -821,8 +822,8 @@ def _enforce_media_acl(path: str, user: UserContext, *, thumbnail: bool = False)
raise HTTPException(status_code=403, detail="Access denied")
if media_chat_id not in user_chat_ids:
# Legacy fallback: positive folder may correspond to negative marked ID
if media_chat_id > 0 and (-media_chat_id in user_chat_ids or -(1000000000000 + media_chat_id) in user_chat_ids):
pass
if media_chat_id > 0 and any(mid in user_chat_ids for mid in legacy_marked_chat_ids(media_chat_id)):
logger.debug("ACL legacy grant: positive folder mapped to allowed chat via marked-ID convention")
else:
raise HTTPException(status_code=403, detail="Access denied")

Expand Down Expand Up @@ -885,7 +886,7 @@ async def serve_thumbnail(size: int, folder: str, filename: str, user: UserConte
if user.no_download and not folder.startswith("avatars/"):
raise HTTPException(status_code=403, detail="Downloads disabled for this account")

# Chat-level access check
# Early ACL check on requested path (prevents existence leakage)
_enforce_media_acl(f"{folder}/{filename}", user, thumbnail=True)

from .thumbnails import ensure_thumbnail, resolve_cache_dir
Expand All @@ -894,10 +895,15 @@ async def serve_thumbnail(size: int, folder: str, filename: str, user: UserConte
if _thumb_cache_dir is None:
_thumb_cache_dir = resolve_cache_dir(_media_root)

thumb_path = await ensure_thumbnail(_media_root, size, folder, filename, cache_dir=_thumb_cache_dir)
if not thumb_path:
result = await ensure_thumbnail(_media_root, size, folder, filename, cache_dir=_thumb_cache_dir)
if not result:
raise HTTPException(status_code=404, detail="Thumbnail not available")

thumb_path, resolved_folder = result
# Secondary ACL on resolved path if it differs (prevents bypass via legacy fallback)
if resolved_folder != folder:
_enforce_media_acl(f"{resolved_folder}/{filename}", user, thumbnail=True)

return FileResponse(thumb_path, media_type="image/webp", headers={"Cache-Control": "public, max-age=86400"})


Expand Down Expand Up @@ -928,23 +934,20 @@ async def serve_media(path: str, download: int = Query(0), user: UserContext = D
resolved = None
if len(parts) == 2:
folder, rest = parts
alt_folders = []
if not folder.startswith("-"):
alt_folders = [f"-{folder}", f"-100{folder}"]
else:
alt_folders = [folder[1:]]
alt_folders = legacy_folder_alternates(folder)
for alt in alt_folders:
try:
resolved = (_media_root / alt / rest).resolve(strict=True)
logger.debug("Legacy fallback: served media via alternate folder resolution")
break
except OSError, ValueError:
except OSError, ValueError, RuntimeError:
continue
if resolved is None:
raise HTTPException(status_code=404, detail="File not found")
if not resolved.is_relative_to(_media_root):
raise HTTPException(status_code=403, detail="Access denied")

_enforce_media_acl(path, user)
_enforce_media_acl(str(resolved.relative_to(_media_root)), user)

if not resolved.is_file():
raise HTTPException(status_code=404, detail="File not found")
Expand Down Expand Up @@ -1502,7 +1505,7 @@ async def get_chat_media(
if len(parts) == 2:
folder, filename = parts
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
if ext in ("jpg", "jpeg", "png", "gif", "webp", "bmp", "tiff"):
if ext in THUMBNAIL_EXTENSIONS:
item["thumb_url"] = f"/media/thumb/200/{folder}/{filename}"
else:
item["thumb_url"] = None
Expand Down
62 changes: 62 additions & 0 deletions src/web/media_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Shared utilities for legacy media path resolution.

Centralizes the Telegram marked-ID convention so it's defined once
and used consistently across serve_media, thumbnails, and ACL checks.
"""

CHANNEL_ID_OFFSET: int = 1_000_000_000_000

IMAGE_EXTENSIONS: set[str] = {"jpg", "jpeg", "png", "gif", "webp", "bmp", "tiff"}
VIDEO_EXTENSIONS: set[str] = {"mp4", "mkv", "avi", "mov", "webm", "m4v", "3gp"}
THUMBNAIL_EXTENSIONS: set[str] = IMAGE_EXTENSIONS | VIDEO_EXTENSIONS


def legacy_folder_alternates(folder: str) -> list[str]:
"""Return alternate folder names for legacy positive/negative ID paths.

Forward (positive folder → possible negative marked IDs on disk):
"1234567890" → ["-1234567890", "-1001234567890"]

Reverse (negative folder → possible old positive folder on disk):
"-1234567890" → ["1234567890"] (basic group)
"-1001234567890" → ["1234567890"] (channel)
"""
try:
if not folder.startswith("-"):
folder_int = int(folder)
if folder_int <= 0:
return []
return [f"-{folder}", str(-(CHANNEL_ID_OFFSET + folder_int))]
folder_int = int(folder)
except ValueError:
return []
raw = -folder_int
if raw > CHANNEL_ID_OFFSET:
return [str(raw - CHANNEL_ID_OFFSET)]
return [str(raw)]


def legacy_marked_chat_ids(positive_id: int) -> list[int]:
"""Return possible marked chat_ids for a legacy positive folder ID.

Used by ACL checks to determine if a user has access to a chat
referenced by its old positive folder name.
"""
return [-positive_id, -(CHANNEL_ID_OFFSET + positive_id)]


def derive_stale_folder(chat_id: int) -> str | None:
"""Derive the old positive folder name from a marked chat_id.

Basic groups: chat_id = -X → old folder = "X"
Channels: chat_id = -(10^12 + X) → old folder = "X"
Users: chat_id > 0 → no mismatch possible, return None

Used by migration 013 and tests (not imported at web runtime).
"""
if chat_id >= 0:
return None
raw = -chat_id
if raw > CHANNEL_ID_OFFSET:
return str(raw - CHANNEL_ID_OFFSET)
return str(raw)
Loading
Loading