Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 34 additions & 57 deletions alembic/versions/20260524_013_fix_media_file_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
branch_labels = None
depends_on = None

_BATCH_SIZE = 5000
# Can't import from src.web.media_utils in migrations (different runtime context)
# Define locally to keep migration self-contained
_CHANNEL_ID_OFFSET = 1_000_000_000_000


def _derive_stale_folder(chat_id: int) -> str | None:
Expand All @@ -36,71 +38,46 @@ def _derive_stale_folder(chat_id: int) -> str | None:
if chat_id >= 0:
return None
raw = -chat_id
if raw > 1000000000000:
return str(raw - 1000000000000)
if raw > _CHANNEL_ID_OFFSET:
return str(raw - _CHANNEL_ID_OFFSET)
return str(raw)


def upgrade():
conn = op.get_bind()
dialect = conn.dialect.name

if dialect == "postgresql":
# Get all distinct negative chat_ids that have media
result = conn.execute(sa.text("SELECT DISTINCT chat_id FROM media WHERE chat_id < 0 AND file_path IS NOT NULL"))
chat_ids = [row[0] for row in result]

for chat_id in chat_ids:
stale_folder = _derive_stale_folder(chat_id)
if stale_folder is None:
continue
correct_folder = str(chat_id)

# Only update rows where file_path contains the stale folder
# Use pattern: .../<stale_folder>/... → .../<correct_folder>/...
stale_pattern = f"%/{stale_folder}/%"
conn.execute(
sa.text(
"UPDATE media SET file_path = REPLACE(file_path, :old_seg, :new_seg) "
"WHERE chat_id = :cid AND file_path LIKE :pattern"
),
{
"old_seg": f"/{stale_folder}/",
"new_seg": f"/{correct_folder}/",
"cid": chat_id,
"pattern": stale_pattern,
},
)

elif dialect == "sqlite":
result = conn.execute(sa.text("SELECT DISTINCT chat_id FROM media WHERE chat_id < 0 AND file_path IS NOT NULL"))
chat_ids = [row[0] for row in result]

for chat_id in chat_ids:
stale_folder = _derive_stale_folder(chat_id)
if stale_folder is None:
continue
correct_folder = str(chat_id)

stale_pattern = f"%/{stale_folder}/%"
conn.execute(
sa.text(
"UPDATE media SET file_path = REPLACE(file_path, :old_seg, :new_seg) "
"WHERE chat_id = :cid AND file_path LIKE :pattern"
),
{
"old_seg": f"/{stale_folder}/",
"new_seg": f"/{correct_folder}/",
"cid": chat_id,
"pattern": stale_pattern,
},
)

# Get all distinct negative chat_ids that have media
result = conn.execute(sa.text("SELECT DISTINCT chat_id FROM media WHERE chat_id < 0 AND file_path IS NOT NULL"))
chat_ids = [row[0] for row in result]

for chat_id in chat_ids:
stale_folder = _derive_stale_folder(chat_id)
if stale_folder is None:
continue
correct_folder = str(chat_id)

# Only update rows where file_path contains the stale folder
# Use pattern: .../<stale_folder>/... → .../<correct_folder>/...
stale_pattern = f"%/{stale_folder}/%"
conn.execute(
sa.text(
"UPDATE media SET file_path = REPLACE(file_path, :old_seg, :new_seg) "
"WHERE chat_id = :cid AND file_path LIKE :pattern"
),
{
"old_seg": f"/{stale_folder}/",
"new_seg": f"/{correct_folder}/",
"cid": chat_id,
"pattern": stale_pattern,
},
)


def downgrade():
# Reversible: swap the folder components back
# WARNING: This reverses ALL negative-folder paths to positive, including rows
# created after the upgrade. This is intentional — old code expects positive
# folders in file_path. The runtime fallback handles disk resolution.
conn = op.get_bind()
dialect = conn.dialect.name

result = conn.execute(sa.text("SELECT DISTINCT chat_id FROM media WHERE chat_id < 0 AND file_path IS NOT NULL"))
chat_ids = [row[0] for row in result]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "telegram-archive"
version = "7.10.14"
version = "7.10.15"
description = "Automated Telegram backup with Docker. Performs incremental backups of messages and media on a configurable schedule."
readme = "README.md"
requires-python = ">=3.14"
Expand Down
28 changes: 21 additions & 7 deletions scripts/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,24 @@ if has_tables and not has_alembic:
);
\"\"\")
# Check artifact from migration 013: file_path values use negative chat_id folders
# If any media row for a negative chat_id has a correctly-negative folder, 013 has run
# Guard: media table may not exist on very old databases
cur.execute(\"\"\"
SELECT EXISTS (
SELECT 1 FROM media
WHERE chat_id < 0 AND file_path LIKE '%/' || CAST(chat_id AS TEXT) || '/%'
LIMIT 1
SELECT FROM information_schema.tables
WHERE table_name = 'media'
);
\"\"\")
has_013_paths = cur.fetchone()[0]
has_media_table = cur.fetchone()[0]
has_013_paths = False
if has_media_table:
cur.execute(\"\"\"
SELECT EXISTS (
SELECT 1 FROM media
WHERE chat_id < 0 AND file_path LIKE '%/' || CAST(chat_id AS TEXT) || '/%'
LIMIT 1
);
\"\"\")
has_013_paths = cur.fetchone()[0]

# Check artifact from migration 012: idx_media_chat_type index
cur.execute(\"\"\"
Expand Down Expand Up @@ -300,8 +309,13 @@ if has_tables and not has_alembic:
''')

# Check artifact from migration 013: file_path values use negative chat_id folders
cur.execute(\"SELECT EXISTS(SELECT 1 FROM media WHERE chat_id < 0 AND file_path LIKE '%/' || CAST(chat_id AS TEXT) || '/%' LIMIT 1)\")
has_013_paths = cur.fetchone()[0]
# Guard: media table may not exist on very old databases
cur.execute(\"SELECT name FROM sqlite_master WHERE type='table' AND name='media'\")
has_media_table = cur.fetchone() is not None
has_013_paths = False
if has_media_table:
cur.execute(\"SELECT EXISTS(SELECT 1 FROM media WHERE chat_id < 0 AND file_path LIKE '%/' || CAST(chat_id AS TEXT) || '/%' LIMIT 1)\")
has_013_paths = cur.fetchone()[0]

# Check artifact from migration 012: idx_media_chat_type index
cur.execute(\"SELECT name FROM sqlite_master WHERE type='index' AND name='idx_media_chat_type'\")
Expand Down
2 changes: 1 addition & 1 deletion src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Telegram Backup Automation - Main Package
"""

__version__ = "7.10.14"
__version__ = "7.10.15"
27 changes: 15 additions & 12 deletions src/web/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from ..config import Config
from ..db import DatabaseAdapter, close_database, get_db_manager, init_database
from ..realtime import RealtimeListener
from .media_utils import legacy_folder_alternates, legacy_marked_chat_ids

if TYPE_CHECKING:
from .push import PushNotificationManager
Expand Down Expand Up @@ -821,8 +822,8 @@ def _enforce_media_acl(path: str, user: UserContext, *, thumbnail: bool = False)
raise HTTPException(status_code=403, detail="Access denied")
if media_chat_id not in user_chat_ids:
# Legacy fallback: positive folder may correspond to negative marked ID
if media_chat_id > 0 and (-media_chat_id in user_chat_ids or -(1000000000000 + media_chat_id) in user_chat_ids):
pass
if media_chat_id > 0 and any(mid in user_chat_ids for mid in legacy_marked_chat_ids(media_chat_id)):
logger.debug("ACL legacy grant: positive folder mapped to allowed chat via marked-ID convention")
else:
raise HTTPException(status_code=403, detail="Access denied")

Expand Down Expand Up @@ -885,7 +886,7 @@ async def serve_thumbnail(size: int, folder: str, filename: str, user: UserConte
if user.no_download and not folder.startswith("avatars/"):
raise HTTPException(status_code=403, detail="Downloads disabled for this account")

# Chat-level access check
# Early ACL check on requested path (prevents existence leakage)
_enforce_media_acl(f"{folder}/{filename}", user, thumbnail=True)

from .thumbnails import ensure_thumbnail, resolve_cache_dir
Expand All @@ -894,10 +895,15 @@ async def serve_thumbnail(size: int, folder: str, filename: str, user: UserConte
if _thumb_cache_dir is None:
_thumb_cache_dir = resolve_cache_dir(_media_root)

thumb_path = await ensure_thumbnail(_media_root, size, folder, filename, cache_dir=_thumb_cache_dir)
if not thumb_path:
result = await ensure_thumbnail(_media_root, size, folder, filename, cache_dir=_thumb_cache_dir)
if not result:
raise HTTPException(status_code=404, detail="Thumbnail not available")

thumb_path, resolved_folder = result
# Secondary ACL on resolved path if it differs (prevents bypass via legacy fallback)
if resolved_folder != folder:
_enforce_media_acl(f"{resolved_folder}/{filename}", user, thumbnail=True)

return FileResponse(thumb_path, media_type="image/webp", headers={"Cache-Control": "public, max-age=86400"})


Expand Down Expand Up @@ -928,23 +934,20 @@ async def serve_media(path: str, download: int = Query(0), user: UserContext = D
resolved = None
if len(parts) == 2:
folder, rest = parts
alt_folders = []
if not folder.startswith("-"):
alt_folders = [f"-{folder}", f"-100{folder}"]
else:
alt_folders = [folder[1:]]
alt_folders = legacy_folder_alternates(folder)
for alt in alt_folders:
try:
resolved = (_media_root / alt / rest).resolve(strict=True)
logger.debug("Legacy fallback: served media via alternate folder resolution")
break
except OSError, ValueError:
except OSError, ValueError, RuntimeError:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail
rg -nP 'except\s+[^(\n:]+,\s*[^:\n]+:' src/web/main.py src/web/thumbnails.py

Repository: GeiserX/Telegram-Archive

Length of output: 876


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Show the exact exception block around line 938 in src/web/main.py
nl -ba src/web/main.py | sed -n '920,950p'

Repository: GeiserX/Telegram-Archive

Length of output: 111


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Print src/web/main.py lines 920-950 with line numbers (no nl dependency)
awk 'NR>=920 && NR<=950 {printf "%d:%s\n", NR, $0}' src/web/main.py

Repository: GeiserX/Telegram-Archive

Length of output: 1636


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Check Python version / runtime hints
ls -1 | rg -i 'pyproject\.toml|setup\.cfg|setup\.py|requirements.*|Pipfile|tox\.ini'
[ -f pyproject.toml ] && { echo '--- pyproject.toml ---'; sed -n '1,120p' pyproject.toml; }
[ -f setup.cfg ] && { echo '--- setup.cfg ---'; sed -n '1,160p' setup.cfg; }
[ -f setup.py ] && { echo '--- setup.py ---'; sed -n '1,120p' setup.py; }
[ -f requirements.txt ] && { echo '--- requirements.txt ---'; sed -n '1,80p' requirements.txt; }

# Inspect beginning of main.py for shebang / future imports
echo '--- src/web/main.py (top) ---'
sed -n '1,80p' src/web/main.py

Repository: GeiserX/Telegram-Archive

Length of output: 7691


Fix Python 3–invalid except syntax (line 938).

src/web/main.py uses Python-2 comma exception syntax: except OSError, ValueError, RuntimeError: which won’t parse under the project’s requires-python >= 3.14. Use tuple form; this comma-except pattern also appears elsewhere (e.g., src/web/main.py:925, src/web/thumbnails.py:141).

Diff
-                except OSError, ValueError, RuntimeError:
+                except (OSError, ValueError, RuntimeError):
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
except OSError, ValueError, RuntimeError:
except (OSError, ValueError, RuntimeError):
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/web/main.py` at line 938, Replace Python-2 style comma-separated
exception handlers with tuple form: locate the flawed except clause using the
pattern "except OSError, ValueError, RuntimeError:" in src/web/main.py (and
similar patterns at the other noted locations, e.g., the earlier handler around
line 925 and in src/web/thumbnails.py around line 141) and change it to use the
tuple form "except (OSError, ValueError, RuntimeError):" so the code is valid
under Python 3; ensure any accompanying except blocks or variable bindings are
adapted consistently.

continue
if resolved is None:
raise HTTPException(status_code=404, detail="File not found")
Comment on lines +937 to 946
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Guard alternate-folder generation for non-numeric folder names.

Line 932 can raise ValueError before the per-alt try, so missing non-chat paths can fail with 500 instead of clean 404 handling.

Proposed fix
-            alt_folders = legacy_folder_alternates(folder)
+            try:
+                alt_folders = legacy_folder_alternates(folder)
+            except ValueError:
+                alt_folders = []
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/web/main.py` around lines 932 - 941, legacy_folder_alternates(folder) can
raise ValueError for non-numeric folder names before the per-alt try/except,
causing a 500; to fix, guard generation by validating `folder` (e.g., only call
legacy_folder_alternates if `folder.isnumeric()` or otherwise matches the
expected pattern) or wrap the call in a try/except that sets `alt_folders = []`
on ValueError; update the code around the `alt_folders =
legacy_folder_alternates(folder)` line and ensure the rest of the loop uses
`alt_folders`, leaving `resolved` as None so missing files still raise the 404
HTTPException.

if not resolved.is_relative_to(_media_root):
raise HTTPException(status_code=403, detail="Access denied")

_enforce_media_acl(path, user)
_enforce_media_acl(str(resolved.relative_to(_media_root)), user)

if not resolved.is_file():
raise HTTPException(status_code=404, detail="File not found")
Expand Down
56 changes: 56 additions & 0 deletions src/web/media_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Shared utilities for legacy media path resolution.

Centralizes the Telegram marked-ID convention so it's defined once
and used consistently across serve_media, thumbnails, and ACL checks.
"""

CHANNEL_ID_OFFSET: int = 1_000_000_000_000


def legacy_folder_alternates(folder: str) -> list[str]:
"""Return alternate folder names for legacy positive/negative ID paths.

Forward (positive folder → possible negative marked IDs on disk):
"1234567890" → ["-1234567890", "-1001234567890"]

Reverse (negative folder → possible old positive folder on disk):
"-1234567890" → ["1234567890"] (basic group)
"-1001234567890" → ["1234567890"] (channel)
"""
try:
if not folder.startswith("-"):
folder_int = int(folder)
if folder_int <= 0:
return []
return [f"-{folder}", str(-(CHANNEL_ID_OFFSET + folder_int))]
folder_int = int(folder)
except ValueError:
return []
raw = -folder_int
if raw > CHANNEL_ID_OFFSET:
return [str(raw - CHANNEL_ID_OFFSET)]
return [str(raw)]


def legacy_marked_chat_ids(positive_id: int) -> list[int]:
"""Return possible marked chat_ids for a legacy positive folder ID.

Used by ACL checks to determine if a user has access to a chat
referenced by its old positive folder name.
"""
return [-positive_id, -(CHANNEL_ID_OFFSET + positive_id)]


def derive_stale_folder(chat_id: int) -> str | None:
"""Derive the old positive folder name from a marked chat_id.

Basic groups: chat_id = -X → old folder = "X"
Channels: chat_id = -(10^12 + X) → old folder = "X"
Users: chat_id > 0 → no mismatch possible, return None
"""
if chat_id >= 0:
return None
raw = -chat_id
if raw > CHANNEL_ID_OFFSET:
return str(raw - CHANNEL_ID_OFFSET)
return str(raw)
40 changes: 22 additions & 18 deletions src/web/thumbnails.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

from PIL import Image

from .media_utils import legacy_folder_alternates

logger = logging.getLogger(__name__)

# Limit decompression to prevent pixel-bomb OOM attacks (~50 megapixels)
Expand Down Expand Up @@ -87,11 +89,12 @@

async def ensure_thumbnail(
media_root: Path, size: int, folder: str, filename: str, *, cache_dir: Path | None = None
) -> Path | None:
"""Return the path to a cached thumbnail, generating it if needed.
) -> tuple[Path, str] | None:
"""Return (thumb_path, resolved_folder) or None.

Returns None when the request is invalid or generation fails.
Includes path traversal protection.
resolved_folder is the actual folder the source was found in (may differ
from the requested folder due to legacy ID fallback). Callers use this
for ACL enforcement on the resolved path.

When cache_dir is provided, thumbnails are written there instead of
under {media_root}/.thumbs/ — this supports read-only media volumes.
Expand Down Expand Up @@ -120,28 +123,29 @@
if not dest.is_relative_to(thumbs_root):
return None

resolved_folder = folder

if dest.exists():
return dest
return dest, resolved_folder

if not source.exists():
# Legacy fallback: pre-v4.0.5 paths used positive IDs, disk uses negative marked IDs.
# Try alternate folder names: X→-X (basic group), X→-100X (channel/supergroup)
alt_folders = []
if not folder.startswith("-"):
alt_folders = [f"-{folder}", f"-100{folder}"]
else:
alt_folders = [folder[1:]]
alt_folders = legacy_folder_alternates(folder)
found = False
Comment on lines +132 to 133
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Protect fallback from invalid folder formats.

If folder is non-numeric, legacy_folder_alternates(folder) can raise and break thumbnail requests. Treat invalid folders as “no alternates”.

Proposed fix
-        alt_folders = legacy_folder_alternates(folder)
+        try:
+            alt_folders = legacy_folder_alternates(folder)
+        except ValueError:
+            alt_folders = []
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/web/thumbnails.py` around lines 131 - 132,
legacy_folder_alternates(folder) can throw for non-numeric folder values and
break thumbnail requests; guard the call by validating or normalizing folder
first and treat invalid formats as "no alternates" (empty list). Update the code
around the call to legacy_folder_alternates (where alt_folders and found are
set) to either check the folder is numeric (e.g., str(folder).isdigit() or
isinstance(folder, int)) before calling, or wrap the call in a try/except that
catches the specific exception (e.g., ValueError) and sets alt_folders = [] on
failure so thumbnail paths continue to be resolved safely.

for alt in alt_folders:
alt_source = (media_root / alt / filename).resolve()
if alt_source.is_relative_to(media_root_resolved) and alt_source.exists():
source = alt_source
found = True
break
try:
alt_source = (media_root / alt / filename).resolve()

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.
if alt_source.is_relative_to(media_root_resolved) and alt_source.exists():

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.
logger.debug("Thumbnail legacy fallback resolved via alternate folder")
source = alt_source
resolved_folder = alt
found = True
break
except OSError, RuntimeError:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail
rg -nP 'except\s+[^(\n:]+,\s*[^:\n]+:' src/web/main.py src/web/thumbnails.py

Repository: GeiserX/Telegram-Archive

Length of output: 876


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Show the exact code around the reported line
nl -ba src/web/thumbnails.py | sed -n '120,170p'

# Find all occurrences of the specific invalid syntax pattern in the repo
rg -n "except OSError, RuntimeError" -S src/web

# Check declared Python version / requirements (if present)
ls -la
rg -n "python_requires|Programming Language :: Python|requires-python" -S pyproject.toml setup.cfg setup.py requirements*.txt .github/workflows 2>/dev/null || true

Repository: GeiserX/Telegram-Archive

Length of output: 111


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Code context around the reported line
cat -n src/web/thumbnails.py | sed -n '120,165p'

# Locate the exact offending syntax in the repo
rg -n "except OSError, RuntimeError" -S src/web || true

# Show declared Python version (if any)
rg -n "python_requires|Programming Language :: Python|requires-python" -S pyproject.toml setup.cfg setup.py requirements*.txt .github/workflows 2>/dev/null || true

# Parse the file as Python 3 to check syntax validity
python3 - <<'PY'
import ast, pathlib, sys
p = pathlib.Path("src/web/thumbnails.py")
src = p.read_text(encoding="utf-8")
try:
    ast.parse(src)
    print("PY3_AST_PARSE_OK")
except SyntaxError as e:
    print("PY3_AST_PARSE_SYNTAX_ERROR")
    print(e)
PY

Repository: GeiserX/Telegram-Archive

Length of output: 1871


Fix invalid multi-exception syntax in src/web/thumbnails.py (line 141).
except OSError, RuntimeError: is Python 2-style syntax and is invalid on Python 3 (the repo targets Python >=3.14), so it will fail module import.

Suggested fix
-            except OSError, RuntimeError:
+            except (OSError, RuntimeError):
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
except OSError, RuntimeError:
except (OSError, RuntimeError):
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/web/thumbnails.py` at line 141, The except line uses Python2-style syntax
"except OSError, RuntimeError:" which is invalid in Python 3; update the
exception handling in the thumbnails module (around the try/except block that
currently references OSError and RuntimeError) to use modern exception tuple
syntax by catching both exceptions with "except (OSError, RuntimeError):" or
split into separate "except OSError:" and "except RuntimeError:" clauses as
appropriate, preserving the existing error handling logic in the surrounding
function/method.

continue
if not found:
return None

async with _generation_semaphore:
loop = asyncio.get_running_loop()
ok = await loop.run_in_executor(None, _generate_sync, source, dest, size)
return dest if ok else None
return (dest, resolved_folder) if ok else None
Loading
Loading