diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2240909..d2c953b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: Validate JavaScript Build
+name: CI
on:
pull_request:
@@ -6,6 +6,7 @@ on:
jobs:
check:
+ name: Validate JavaScript Build
runs-on: ubuntu-latest
container:
image: debian:bookworm
@@ -61,3 +62,22 @@ jobs:
git add video2commons/frontend/static/*.min.js video2commons/frontend/templates/*.min.html
git commit -m "Update built files from CI"
git push origin $GITHUB_HEAD_REF
+
+ ruff:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Setup uv
+ uses: astral-sh/setup-uv@v7.2.1
+ with:
+ python-version: "3.14"
+ enable-cache: true
+
+ - name: Run Ruff
+ run: uv run ruff check --output-format=github .
+
+ - name: Run the Ruff formatter
+ run: uv run ruff format --check .
diff --git a/.python-version b/.python-version
new file mode 100644
index 0000000..6324d40
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.14
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..0bb5381
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,13 @@
+[project]
+name = "video2commons"
+version = "0.1.0"
+requires-python = ">=3.14"
+
+[dependency-groups]
+dev = ["ruff>=0.14.14"]
+
+[tool.ruff.lint]
+ignore = ["E722"]
+
+[tool.ruff.format]
+docstring-code-format = true
diff --git a/user-config.py b/user-config.py
index 6e90921..884b1ac 100644
--- a/user-config.py
+++ b/user-config.py
@@ -4,7 +4,7 @@
"""Pywikibot configs."""
-family = 'commons'
-mylang = 'commons'
+family = "commons"
+mylang = "commons"
socket_timeout = 30, 300 # chunked uploading unreliable
diff --git a/utils/cleanuptasks.py b/utils/cleanuptasks.py
index 01a3362..1d2038f 100644
--- a/utils/cleanuptasks.py
+++ b/utils/cleanuptasks.py
@@ -23,21 +23,20 @@
import sys
from redis import Redis
-sys.path.append(os.path.dirname(os.path.realpath(__file__)) +
- "/../video2commons")
+sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../video2commons")
from config import redis_pw, redis_host # NOQA
redisconnection = Redis(host=redis_host, db=3, password=redis_pw)
-for userkey in redisconnection.keys('tasks:*') + ['alltasks']:
+for userkey in redisconnection.keys("tasks:*") + ["alltasks"]:
for taskid in redisconnection.lrange(userkey, 0, -1):
- if not redisconnection.exists('titles:' + taskid):
+ if not redisconnection.exists("titles:" + taskid):
redisconnection.lrem(userkey, 0, taskid)
print("delete %s from %s" % (taskid, userkey))
-for pattern in ['params:*', 'restarted:*']: # 'tasklock:*'
+for pattern in ["params:*", "restarted:*"]: # 'tasklock:*'
for key in redisconnection.keys(pattern):
- taskid = key.split(':')[1]
- if not redisconnection.exists('titles:' + taskid):
+ taskid = key.split(":")[1]
+ if not redisconnection.exists("titles:" + taskid):
redisconnection.delete(key)
print("delete %s" % (key))
diff --git a/utils/extracti18n.py b/utils/extracti18n.py
index b98af85..db9178f 100644
--- a/utils/extracti18n.py
+++ b/utils/extracti18n.py
@@ -27,50 +27,55 @@
import re
import json
-if not len(sys.argv) > 1 or '/messages' not in sys.argv[1]:
- print(("usage: python " + sys.argv[0] + "
\n\n"
- " The path to mediawiki/languages/messages\n"))
+if not len(sys.argv) > 1 or "/messages" not in sys.argv[1]:
+ print(
+ (
+ "usage: python " + sys.argv[0] + " \n\n"
+ " The path to mediawiki/languages/messages\n"
+ )
+ )
sys.exit(1)
msgDir = sys.argv[1]
-dest = os.path.dirname(os.path.realpath(__file__)) + \
- '/../video2commons/frontend/i18n-metadata'
+dest = (
+ os.path.dirname(os.path.realpath(__file__))
+ + "/../video2commons/frontend/i18n-metadata"
+)
data = {
- 'fallbacks': {},
- 'rtl': [],
- 'alllangs': [],
+ "fallbacks": {},
+ "rtl": [],
+ "alllangs": [],
}
rFallback = re.compile(r"fallback = '(.*?)'", re.I)
-rIsRtl = re.compile(r'rtl = true', re.I)
+rIsRtl = re.compile(r"rtl = true", re.I)
for file in os.listdir(msgDir):
filePath = msgDir + "/" + file
- if file in ['.', '..'] or not os.path.isfile(filePath):
+ if file in [".", ".."] or not os.path.isfile(filePath):
continue
- with open(filePath, 'r') as openfile:
+ with open(filePath, "r") as openfile:
content = openfile.read()
- fileMatch = re.match(r'Messages(.*?)\.php', file)
- source = fileMatch.group(1).lower().replace('_', '-')
+ fileMatch = re.match(r"Messages(.*?)\.php", file)
+ source = fileMatch.group(1).lower().replace("_", "-")
contentMatch = rFallback.search(content)
if contentMatch:
- fallbacks = [s.strip() for s in contentMatch.group(1).split(',')]
- data['fallbacks'][source] = \
- fallbacks if len(fallbacks) > 1 else fallbacks[0]
+ fallbacks = [s.strip() for s in contentMatch.group(1).split(",")]
+ data["fallbacks"][source] = fallbacks if len(fallbacks) > 1 else fallbacks[0]
if rIsRtl.search(content):
- data['rtl'].append(source)
+ data["rtl"].append(source)
- data['alllangs'].append(source)
+ data["alllangs"].append(source)
def _write(key):
dest_file = dest + "/" + key + ".json"
- with open(dest_file, 'w') as openfile:
- json.dump(data[key], openfile, sort_keys=True,
- indent=4, separators=(',', ': '))
+ with open(dest_file, "w") as openfile:
+ json.dump(data[key], openfile, sort_keys=True, indent=4, separators=(",", ": "))
+
for key in data:
_write(key)
diff --git a/utils/stats.py b/utils/stats.py
index d72b837..ed69bc2 100644
--- a/utils/stats.py
+++ b/utils/stats.py
@@ -6,7 +6,7 @@
import sys
import time
-sys.path.insert(0, '/srv/v2c')
+sys.path.insert(0, "/srv/v2c")
from redis import Redis
@@ -17,7 +17,7 @@
collect_worker_stats,
get_worker_stats,
release_write_lock,
- update_worker_stats
+ update_worker_stats,
)
# Stats are considered stale if they haven't been updated in 30 minutes.
@@ -41,8 +41,8 @@ def main():
# Don't update stats if they've been updated recently by another job.
existing_stats = get_worker_stats(app_conn)
- if existing_stats and 'last_updated_by_job' in existing_stats:
- if int(time.time()) - existing_stats['last_updated_by_job'] < STALE_SECS:
+ if existing_stats and "last_updated_by_job" in existing_stats:
+ if int(time.time()) - existing_stats["last_updated_by_job"] < STALE_SECS:
print("Stats have been updated recently, skipping update.")
return
@@ -58,5 +58,6 @@ def main():
finally:
release_write_lock(app_conn)
-if __name__ == '__main__':
+
+if __name__ == "__main__":
main()
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000..3443e5f
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,44 @@
+version = 1
+revision = 3
+requires-python = ">=3.14"
+
+[[package]]
+name = "ruff"
+version = "0.14.14"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" },
+ { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" },
+ { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" },
+ { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" },
+ { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" },
+ { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" },
+ { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" },
+ { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" },
+ { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" },
+ { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" },
+ { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" },
+ { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" },
+ { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" },
+ { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" },
+ { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" },
+ { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" },
+ { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" },
+ { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" },
+]
+
+[[package]]
+name = "video2commons"
+version = "0.1.0"
+source = { virtual = "." }
+
+[package.dev-dependencies]
+dev = [
+ { name = "ruff" },
+]
+
+[package.metadata]
+
+[package.metadata.requires-dev]
+dev = [{ name = "ruff", specifier = ">=0.14.14" }]
diff --git a/video2commons/backend/__init__.py b/video2commons/backend/__init__.py
index 5cf83f2..71a83ab 100644
--- a/video2commons/backend/__init__.py
+++ b/video2commons/backend/__init__.py
@@ -19,8 +19,6 @@
"""videocommons backend."""
-
-
from video2commons.backend import worker
-__all__ = ['worker']
+__all__ = ["worker"]
diff --git a/video2commons/backend/categories/__init__.py b/video2commons/backend/categories/__init__.py
index b137b4b..5037ca3 100644
--- a/video2commons/backend/categories/__init__.py
+++ b/video2commons/backend/categories/__init__.py
@@ -24,18 +24,26 @@
def has_video_track(source: str) -> bool:
"""Check if a video has an audio track."""
- result = subprocess.run([
- ffprobe_location,
- '-loglevel', 'error',
- '-select_streams', 'v',
- '-show_entries', 'stream=index,codec_type',
- '-of', 'json',
- source
- ], capture_output=True, text=True)
+ result = subprocess.run(
+ [
+ ffprobe_location,
+ "-loglevel",
+ "error",
+ "-select_streams",
+ "v",
+ "-show_entries",
+ "stream=index,codec_type",
+ "-of",
+ "json",
+ source,
+ ],
+ capture_output=True,
+ text=True,
+ )
if result.returncode == 0:
- for stream in json.loads(result.stdout).get('streams', []):
- if stream.get('codec_type') == 'video':
+ for stream in json.loads(result.stdout).get("streams", []):
+ if stream.get("codec_type") == "video":
return True
return False
@@ -44,18 +52,26 @@ def has_video_track(source: str) -> bool:
def has_audio_track(source: str) -> bool:
"""Check if a video has an audio track."""
- result = subprocess.run([
- ffprobe_location,
- '-loglevel', 'error',
- '-select_streams', 'a',
- '-show_entries', 'stream=index,codec_type',
- '-of', 'json',
- source
- ], capture_output=True, text=True)
+ result = subprocess.run(
+ [
+ ffprobe_location,
+ "-loglevel",
+ "error",
+ "-select_streams",
+ "a",
+ "-show_entries",
+ "stream=index,codec_type",
+ "-of",
+ "json",
+ source,
+ ],
+ capture_output=True,
+ text=True,
+ )
if result.returncode == 0:
- for stream in json.loads(result.stdout).get('streams', []):
- if stream.get('codec_type') == 'audio':
+ for stream in json.loads(result.stdout).get("streams", []):
+ if stream.get("codec_type") == "audio":
return True
return False
@@ -82,7 +98,7 @@ def get_inferable_categories(source: str) -> Set[str]:
categories = set()
if not has_audio_track(source):
- categories.add('[[Category:Videos without audio]]')
+ categories.add("[[Category:Videos without audio]]")
return categories
diff --git a/video2commons/backend/download/__init__.py b/video2commons/backend/download/__init__.py
index d04f6a4..fe9d262 100644
--- a/video2commons/backend/download/__init__.py
+++ b/video2commons/backend/download/__init__.py
@@ -17,8 +17,6 @@
"""Wrapper around youtube-dl."""
-
-
import os
from urllib.parse import urlparse
@@ -31,15 +29,15 @@
def download(
- url, ie_key, formats, subtitles, outputdir,
- statuscallback=None, errorcallback=None
+ url, ie_key, formats, subtitles, outputdir, statuscallback=None, errorcallback=None
):
"""Download a video from url to outputdir."""
- if url.startswith('uploads:'):
+ if url.startswith("uploads:"):
# FIXME; this should be a configuration variable
- url = url.replace('uploads:', 'https://video2commons.toolforge.org/'
- 'static/uploads/', 1)
+ url = url.replace(
+ "uploads:", "https://video2commons.toolforge.org/static/uploads/", 1
+ )
ie_key = None
url_blacklisted(url)
@@ -47,103 +45,107 @@ def download(
outputdir = os.path.abspath(outputdir)
statuscallback = statuscallback or (lambda text, percent: None)
errorcallback = errorcallback or (lambda text: None)
- outtmpl = outputdir + '/dl.%(ext)s'
+ outtmpl = outputdir + "/dl.%(ext)s"
params = {
- 'format': formats,
- 'outtmpl': outtmpl,
- 'writedescription': True,
- 'writeinfojson': True,
- 'writesubtitles': subtitles,
- 'writeautomaticsub': False,
- 'subtitleslangs': ['all', '-live_chat'],
- 'subtitlesformat': 'srt/ass/vtt/best',
- 'cachedir': '/tmp/',
- 'noplaylist': True, # not implemented in video2commons
- 'postprocessors': [{
- 'key': 'FFmpegSubtitlesConvertor',
- 'format': 'srt',
- }],
- 'max_filesize': 5 * (1 << 30),
- 'retries': 10,
- 'fragment_retries': 10,
- 'prefer_ffmpeg': True, # avconv do not have srt encoder
- 'prefer_free_formats': True,
- 'logger': get_logger('celery.task.v2c.main.yt_dlp')
+ "format": formats,
+ "outtmpl": outtmpl,
+ "writedescription": True,
+ "writeinfojson": True,
+ "writesubtitles": subtitles,
+ "writeautomaticsub": False,
+ "subtitleslangs": ["all", "-live_chat"],
+ "subtitlesformat": "srt/ass/vtt/best",
+ "cachedir": "/tmp/",
+ "noplaylist": True, # not implemented in video2commons
+ "postprocessors": [
+ {
+ "key": "FFmpegSubtitlesConvertor",
+ "format": "srt",
+ }
+ ],
+ "max_filesize": 5 * (1 << 30),
+ "retries": 10,
+ "fragment_retries": 10,
+ "prefer_ffmpeg": True, # avconv do not have srt encoder
+ "prefer_free_formats": True,
+ "logger": get_logger("celery.task.v2c.main.yt_dlp"),
}
- old_ua = std_headers['User-Agent']
- if ie_key == 'Youtube':
+ old_ua = std_headers["User-Agent"]
+ if ie_key == "Youtube":
# HACK: Get equirectangular for 360° videos (ytdl-org/youtube-dl#15267)
- std_headers['User-Agent'] = ''
+ std_headers["User-Agent"] = ""
# https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies
# https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp
- params.update({
- 'cookiefile': tooldir + '/../cookies.txt',
- 'username': youtube_user,
- 'password': youtube_pass
- })
+ params.update(
+ {
+ "cookiefile": tooldir + "/../cookies.txt",
+ "username": youtube_user,
+ "password": youtube_pass,
+ }
+ )
last_percentage = [Ellipsis]
def progresshook(d):
- if d['status'] == 'downloading':
- total = d.get('total_bytes') or d.get('total_bytes_estimate')
- percentage = int(100.0 * d['downloaded_bytes'] / total)\
- if total else None
+ if d["status"] == "downloading":
+ total = d.get("total_bytes") or d.get("total_bytes_estimate")
+ percentage = int(100.0 * d["downloaded_bytes"] / total) if total else None
if percentage != last_percentage[0]:
last_percentage[0] = percentage
statuscallback(
- 'Downloading to ' + (d['tmpfilename'] or d['filename']),
- percentage
+ "Downloading to " + (d["tmpfilename"] or d["filename"]), percentage
)
- elif d['status'] == 'finished':
- statuscallback('Postprocessing...', -1)
- elif d['status'] == 'error':
- errorcallback('Error raised by YoutubeDL')
+ elif d["status"] == "finished":
+ statuscallback("Postprocessing...", -1)
+ elif d["status"] == "error":
+ errorcallback("Error raised by YoutubeDL")
- statuscallback('Creating YoutubeDL instance', -1)
+ statuscallback("Creating YoutubeDL instance", -1)
try:
# Not using provided ie_key because of the existance of extractors that
# targets another extractor, such as TwitterIE.
with yt_dlp.YoutubeDL(params) as dl:
dl.add_progress_hook(progresshook)
- statuscallback('Preprocessing...', -1)
+ statuscallback("Preprocessing...", -1)
info = dl.extract_info(url, download=True, ie_key=None)
except DownloadError:
- params['cachedir'] = False
- statuscallback('Download failed.'
- ' creating YoutubeDL instance without local cache', -1)
+ params["cachedir"] = False
+ statuscallback(
+ "Download failed. creating YoutubeDL instance without local cache", -1
+ )
with yt_dlp.YoutubeDL(params) as dl:
dl.add_progress_hook(progresshook)
info = dl.extract_info(url, download=True, ie_key=None)
finally:
- std_headers['User-Agent'] = old_ua
+ std_headers["User-Agent"] = old_ua
- if info.get('webpage_url'):
- url_blacklisted(info['webpage_url'])
+ if info.get("webpage_url"):
+ url_blacklisted(info["webpage_url"])
- filename = outtmpl % {'ext': info['ext']}
+ filename = outtmpl % {"ext": info["ext"]}
if not os.path.isfile(filename):
# https://github.com/rg3/youtube-dl/issues/8349
- filename = outtmpl % {'ext': 'mkv'}
- assert os.path.isfile(filename), \
- 'Failed to determine the path of the downloaded video. ' + \
- 'Is the video too large?'
+ filename = outtmpl % {"ext": "mkv"}
+ assert os.path.isfile(filename), (
+ "Failed to determine the path of the downloaded video. "
+ + "Is the video too large?"
+ )
ret = {
- 'extractor': ie_key,
- 'subtitles': {},
- 'target': filename,
+ "extractor": ie_key,
+ "subtitles": {},
+ "target": filename,
}
- for key in info.get('subtitles', {}):
+ for key in info.get("subtitles", {}):
# Postprocesed: converted to srt
- filename = outtmpl % {'ext': key + '.srt'}
+ filename = outtmpl % {"ext": key + ".srt"}
if os.path.isfile(filename):
- ret['subtitles'][key] = filename
+ ret["subtitles"][key] = filename
return ret
@@ -151,6 +153,6 @@ def progresshook(d):
def url_blacklisted(url):
"""Define download url blacklist."""
parseresult = urlparse(url)
- if parseresult.scheme in ['http', 'https']:
- if parseresult.netloc.endswith('.googlevideo.com'):
- raise TaskError('Your downloading URL has been blacklisted.')
+ if parseresult.scheme in ["http", "https"]:
+ if parseresult.netloc.endswith(".googlevideo.com"):
+ raise TaskError("Your downloading URL has been blacklisted.")
diff --git a/video2commons/backend/encode/__init__.py b/video2commons/backend/encode/__init__.py
index 3a50099..256565c 100644
--- a/video2commons/backend/encode/__init__.py
+++ b/video2commons/backend/encode/__init__.py
@@ -21,6 +21,7 @@
from .transcodejob import WebVideoTranscodeJob
from .transcode import WebVideoTranscode
from .globals import ffmpeg_location, ffprobe_location
+
# https://github.com/senko/python-video-converter
from converter import Converter
@@ -28,7 +29,7 @@
def encode(source, origkey, statuscallback=None, errorcallback=None, concurrency=None):
"""Main encode function."""
source = os.path.abspath(source)
- preserve = {'video': False, 'audio': False}
+ preserve = {"video": False, "audio": False}
c = Converter(ffmpeg_path=ffmpeg_location, ffprobe_path=ffprobe_location)
info = c.probe(source)
@@ -38,14 +39,20 @@ def encode(source, origkey, statuscallback=None, errorcallback=None, concurrency
targettype = WebVideoTranscode.settings.get(key)
if info and targettype:
- if info.video and info.video.codec == targettype.get('videoCodec'):
- preserve['video'] = True
- if info.audio and info.audio.codec == targettype.get('audioCodec'):
- preserve['audio'] = True
+ if info.video and info.video.codec == targettype.get("videoCodec"):
+ preserve["video"] = True
+ if info.audio and info.audio.codec == targettype.get("audioCodec"):
+ preserve["audio"] = True
- target = source + '.' + key
+ target = source + "." + key
job = WebVideoTranscodeJob(
- source, target, key, preserve, statuscallback, errorcallback, info,
+ source,
+ target,
+ key,
+ preserve,
+ statuscallback,
+ errorcallback,
+ info,
concurrency,
)
@@ -55,33 +62,40 @@ def encode(source, origkey, statuscallback=None, errorcallback=None, concurrency
def getbestkey(info, targettype):
"""Find the bext convert key to use."""
# Asserts
- assert info, 'The file format could not be recognized'
- assert targettype, 'The target format is invalid.'
- assert info.video or info.audio, 'The file has no video or audio tracks.'
- assert info.video or not targettype.get('videoCodec'), \
- 'Video is asked to be kept but the file has no video tracks.'
- assert info.audio or not targettype.get('audioCodec'), \
- 'Audio is asked to be kept but the file has no audio tracks.'
-
- if targettype.get('videoCodec') and targettype.get('audioCodec'):
+ assert info, "The file format could not be recognized"
+ assert targettype, "The target format is invalid."
+ assert info.video or info.audio, "The file has no video or audio tracks."
+ assert info.video or not targettype.get("videoCodec"), (
+ "Video is asked to be kept but the file has no video tracks."
+ )
+ assert info.audio or not targettype.get("audioCodec"), (
+ "Audio is asked to be kept but the file has no audio tracks."
+ )
+
+ if targettype.get("videoCodec") and targettype.get("audioCodec"):
# need both video & audio -- no codec change in video & audio
for newkey, newtargettype in list(WebVideoTranscode.settings.items()):
- if info.video.codec == newtargettype.get('videoCodec') and \
- info.audio.codec == newtargettype.get('audioCodec'):
+ if info.video.codec == newtargettype.get(
+ "videoCodec"
+ ) and info.audio.codec == newtargettype.get("audioCodec"):
return newkey
- elif targettype.get('videoCodec') and 'noaudio' in targettype:
+ elif targettype.get("videoCodec") and "noaudio" in targettype:
# need video only -- no codec change in video & remove audio
for newkey, newtargettype in list(WebVideoTranscode.settings.items()):
- if info.video.codec == newtargettype.get('videoCodec') and \
- 'noaudio' in newtargettype:
+ if (
+ info.video.codec == newtargettype.get("videoCodec")
+ and "noaudio" in newtargettype
+ ):
return newkey
- elif 'novideo' in targettype and targettype.get('audioCodec'):
+ elif "novideo" in targettype and targettype.get("audioCodec"):
# need video only -- no codec change in audio & remove video
for newkey, newtargettype in list(WebVideoTranscode.settings.items()):
- if info.audio.codec == newtargettype.get('audioCodec') and \
- 'novideo' in newtargettype:
+ if (
+ info.audio.codec == newtargettype.get("audioCodec")
+ and "novideo" in newtargettype
+ ):
return newkey
return None
diff --git a/video2commons/backend/encode/globals.py b/video2commons/backend/encode/globals.py
index 7235cc1..b502b36 100644
--- a/video2commons/backend/encode/globals.py
+++ b/video2commons/backend/encode/globals.py
@@ -39,26 +39,27 @@
# Maximum file size transcoding processes can create, in KB
background_size_limit = 10 * 1024 * 1024 # 10GB
# Number of threads to use in avconv for transcoding
-ffmpeg_threads = __import__('multiprocessing').cpu_count()
+ffmpeg_threads = __import__("multiprocessing").cpu_count()
# Location of the avconv/ffmpeg binary (used to encode WebM and for thumbnails)
-ffmpeg_location = '/mnt/nfs/labstore-secondary-project/gentoo-prefix/usr/bin/ffmpeg'
-ffprobe_location = '/usr/bin/ffprobe'
+ffmpeg_location = "/mnt/nfs/labstore-secondary-project/gentoo-prefix/usr/bin/ffmpeg"
+ffprobe_location = "/usr/bin/ffprobe"
def escape_shellarg(*args):
"""Escape shell arguments."""
import pipes
+
return " ".join([pipes.quote(str(arg)) for arg in args])
-def format_size(num, suffix='B'):
+def format_size(num, suffix="B"):
"""Format the size with prefixes."""
# Source: StackOverflow
- for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
+ for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
- return "%.1f%s%s" % (num, 'Y', suffix)
+ return "%.1f%s%s" % (num, "Y", suffix)
def format_time(s):
@@ -70,5 +71,4 @@ def format_time(s):
def time_to_seconds(time):
"""Get the number of seconds from time expression."""
- return \
- sum([a * b for a, b in zip([3600, 60, 1], list(map(int, time.split(':'))))])
+ return sum([a * b for a, b in zip([3600, 60, 1], list(map(int, time.split(":"))))])
diff --git a/video2commons/backend/encode/transcode.py b/video2commons/backend/encode/transcode.py
index dbaded0..79af2ac 100644
--- a/video2commons/backend/encode/transcode.py
+++ b/video2commons/backend/encode/transcode.py
@@ -36,131 +36,117 @@ class WebVideoTranscode:
"""
settings = {
- 'ogv':
- {
- 'videoQuality': 7,
- 'audioQuality': 6,
- 'noUpscaling': 'True',
- 'twopass': 'False',
- 'optimize': 'True',
- 'keyframeInterval': '128',
- 'videoCodec': 'theora',
- 'audioCodec': 'vorbis',
- 'type': 'video/ogg codecs="theora, vorbis"',
- },
- 'an.ogv':
- {
- 'videoQuality': 7,
- 'noUpscaling': 'True',
- 'twopass': 'False',
- 'optimize': 'True',
- 'keyframeInterval': '128',
- 'videoCodec': 'theora',
- 'noaudio': 'True',
- 'type': 'video/ogg codecs="theora, vorbis"',
- },
-
+ "ogv": {
+ "videoQuality": 7,
+ "audioQuality": 6,
+ "noUpscaling": "True",
+ "twopass": "False",
+ "optimize": "True",
+ "keyframeInterval": "128",
+ "videoCodec": "theora",
+ "audioCodec": "vorbis",
+ "type": 'video/ogg codecs="theora, vorbis"',
+ },
+ "an.ogv": {
+ "videoQuality": 7,
+ "noUpscaling": "True",
+ "twopass": "False",
+ "optimize": "True",
+ "keyframeInterval": "128",
+ "videoCodec": "theora",
+ "noaudio": "True",
+ "type": 'video/ogg codecs="theora, vorbis"',
+ },
# WebM transcode:
- 'webm':
- {
- 'crf': 10,
- 'videoBitrate': '0',
- 'audioQuality': 6,
- 'noUpscaling': 'True',
- 'twopass': 'True',
- 'videoCodec': 'vp8',
- 'audioCodec': 'vorbis',
- 'type': 'video/webm codecs="vp8, vorbis"',
- },
- 'an.webm':
- {
- 'crf': 10,
- 'videoBitrate': '0',
- 'noUpscaling': 'True',
- 'twopass': 'True',
- 'videoCodec': 'vp8',
- 'noaudio': 'True',
- 'type': 'video/webm codecs="vp8, vorbis"',
- },
-
+ "webm": {
+ "crf": 10,
+ "videoBitrate": "0",
+ "audioQuality": 6,
+ "noUpscaling": "True",
+ "twopass": "True",
+ "videoCodec": "vp8",
+ "audioCodec": "vorbis",
+ "type": 'video/webm codecs="vp8, vorbis"',
+ },
+ "an.webm": {
+ "crf": 10,
+ "videoBitrate": "0",
+ "noUpscaling": "True",
+ "twopass": "True",
+ "videoCodec": "vp8",
+ "noaudio": "True",
+ "type": 'video/webm codecs="vp8, vorbis"',
+ },
# WebM VP9 transcode:
- 'vp9.webm':
- {
- 'crf': 35,
- 'videoBitrate': '0',
- 'audioBitrate': '128',
- 'samplerate': '48000',
- 'noUpscaling': 'True',
- 'twopass': 'True',
- 'altref': 'True',
- 'videoCodec': 'vp9',
- 'audioCodec': 'opus',
- 'tileColumns': '4',
- 'speed': '2',
- 'quality': 'good',
- 'type': 'video/webm codecs="vp9, opus"',
- },
- 'an.vp9.webm':
- {
- 'crf': 35,
- 'videoBitrate': '0',
- 'noUpscaling': 'True',
- 'twopass': 'True',
- 'altref': 'True',
- 'videoCodec': 'vp9',
- 'noaudio': 'True',
- 'tileColumns': '4',
- 'speed': '2',
- 'quality': 'good',
- 'type': 'video/webm codecs="vp9, opus"',
- },
-
+ "vp9.webm": {
+ "crf": 35,
+ "videoBitrate": "0",
+ "audioBitrate": "128",
+ "samplerate": "48000",
+ "noUpscaling": "True",
+ "twopass": "True",
+ "altref": "True",
+ "videoCodec": "vp9",
+ "audioCodec": "opus",
+ "tileColumns": "4",
+ "speed": "2",
+ "quality": "good",
+ "type": 'video/webm codecs="vp9, opus"',
+ },
+ "an.vp9.webm": {
+ "crf": 35,
+ "videoBitrate": "0",
+ "noUpscaling": "True",
+ "twopass": "True",
+ "altref": "True",
+ "videoCodec": "vp9",
+ "noaudio": "True",
+ "tileColumns": "4",
+ "speed": "2",
+ "quality": "good",
+ "type": 'video/webm codecs="vp9, opus"',
+ },
# WebM AV1 transcode:
#
# Presets: https://gitlab.com/AOMediaCodec/SVT-AV1/-/blob/master/Docs/CommonQuestions.md#what-presets-do
# Multipass: https://github.com/HandBrake/HandBrake/issues/4831#issuecomment-1546617210
- 'av1.webm':
- {
- 'audioBitrate': '128',
- 'audioCodec': 'opus',
- 'crf': 30,
- 'preset': '6',
- 'samplerate': '48000',
- 'twopass': 'False', # twopass is not supported for AV1 with CRF
- 'type': 'video/webm codecs="av01, opus"',
- 'videoBitrate': '0',
- 'videoCodec': 'av1',
- },
- 'an.av1.webm':
- {
- 'crf': 30,
- 'noaudio': 'True',
- 'preset': '6',
- 'twopass': 'False', # twopass is not supported for AV1 with CRF
- 'type': 'video/webm codecs="av01, opus"',
- 'videoBitrate': '0',
- 'videoCodec': 'av1',
- },
-
+ "av1.webm": {
+ "audioBitrate": "128",
+ "audioCodec": "opus",
+ "crf": 30,
+ "preset": "6",
+ "samplerate": "48000",
+ "twopass": "False", # twopass is not supported for AV1 with CRF
+ "type": 'video/webm codecs="av01, opus"',
+ "videoBitrate": "0",
+ "videoCodec": "av1",
+ },
+ "an.av1.webm": {
+ "crf": 30,
+ "noaudio": "True",
+ "preset": "6",
+ "twopass": "False", # twopass is not supported for AV1 with CRF
+ "type": 'video/webm codecs="av01, opus"',
+ "videoBitrate": "0",
+ "videoCodec": "av1",
+ },
# Audio profiles
- 'ogg':
- {
- 'audioCodec': 'vorbis',
- 'audioQuality': '6',
- 'samplerate': '44100',
- 'channels': '2',
- 'noUpscaling': 'True',
- 'novideo': 'True',
- 'type': 'audio/ogg codecs="vorbis"',
- },
- 'opus':
- {
- 'audioCodec': 'opus',
- 'audioBitrate': '128',
- 'samplerate': '48000',
- 'channels': '2',
- 'noUpscaling': 'True',
- 'novideo': 'True',
- 'type': 'audio/ogg codecs="opus"',
- },
+ "ogg": {
+ "audioCodec": "vorbis",
+ "audioQuality": "6",
+ "samplerate": "44100",
+ "channels": "2",
+ "noUpscaling": "True",
+ "novideo": "True",
+ "type": 'audio/ogg codecs="vorbis"',
+ },
+ "opus": {
+ "audioCodec": "opus",
+ "audioBitrate": "128",
+ "samplerate": "48000",
+ "channels": "2",
+ "noUpscaling": "True",
+ "novideo": "True",
+ "type": 'audio/ogg codecs="opus"',
+ },
}
diff --git a/video2commons/backend/encode/transcodejob.py b/video2commons/backend/encode/transcodejob.py
index 85fda75..0d656d2 100644
--- a/video2commons/backend/encode/transcodejob.py
+++ b/video2commons/backend/encode/transcodejob.py
@@ -35,9 +35,14 @@
import signal
from .transcode import WebVideoTranscode
from .globals import (
- background_priority, background_time_limit, background_memory_limit,
- background_size_limit, ffmpeg_threads, ffmpeg_location, escape_shellarg,
- time_to_seconds
+ background_priority,
+ background_time_limit,
+ background_memory_limit,
+ background_size_limit,
+ ffmpeg_threads,
+ ffmpeg_location,
+ escape_shellarg,
+ time_to_seconds,
)
from video2commons.exceptions import TaskAbort
@@ -47,15 +52,21 @@ class WebVideoTranscodeJob(object):
"""Job class."""
def __init__(
- self, source, target, key, preserve={},
- statuscallback=None, errorcallback=None, source_info=None,
- concurrency=None
+ self,
+ source,
+ target,
+ key,
+ preserve={},
+ statuscallback=None,
+ errorcallback=None,
+ source_info=None,
+ concurrency=None,
):
"""Initialize the instance."""
self.source = os.path.abspath(source)
self.target = os.path.abspath(target)
self.key = key
- self.preserve = {'video': False, 'audio': False}
+ self.preserve = {"video": False, "audio": False}
self.preserve.update(preserve)
self.statuscallback = statuscallback or (lambda text, percent: None)
self.errorcallback = errorcallback or (lambda text: None)
@@ -83,8 +94,8 @@ def get_file(self):
@return File
"""
- if not hasattr(self, 'file'):
- self.file = open(self.source, 'r')
+ if not hasattr(self, "file"):
+ self.file = open(self.source, "r")
self.file.close()
return self.file
@@ -95,8 +106,8 @@ def get_target_path(self):
@return string
"""
- if not hasattr(self, 'targetEncodeFile'):
- self.targetEncodeFile = open(self.target, 'w')
+ if not hasattr(self, "targetEncodeFile"):
+ self.targetEncodeFile = open(self.target, "w")
self.targetEncodeFile.close()
return self.targetEncodeFile.name
@@ -107,7 +118,7 @@ def get_source_path(self):
@return string|bool
"""
- if not hasattr(self, 'sourceFilePath'):
+ if not hasattr(self, "sourceFilePath"):
self.sourceFilePath = self.get_file().name
return self.sourceFilePath
@@ -132,7 +143,7 @@ def run(self):
# Validate the file exists:
if not file:
- self.set_error(self.source + ': File not found ')
+ self.set_error(self.source + ": File not found ")
return False
# Validate the transcode key param:
@@ -144,26 +155,26 @@ def run(self):
return False
# Validate the source exists:
- if not self.get_source_path() or not \
- os.path.isfile(self.get_source_path()):
- status = self.source + ': Source not found'
+ if not self.get_source_path() or not os.path.isfile(self.get_source_path()):
+ status = self.source + ": Source not found"
self.set_error(status, transcode_key)
return False
options = WebVideoTranscode.settings[transcode_key]
- if 'novideo' in options:
- self.output("Encoding to audio codec: " + options['audioCodec'])
+ if "novideo" in options:
+ self.output("Encoding to audio codec: " + options["audioCodec"])
else:
- self.output("Encoding to codec: " + options['videoCodec'])
+ self.output("Encoding to codec: " + options["videoCodec"])
# Check the codec see which encode method to call
- if 'novideo' in options or self.preserve['video']:
+ if "novideo" in options or self.preserve["video"]:
status = self.ffmpeg_encode(options)
- elif options['videoCodec'] in ['vp8', 'vp9', 'h264', "av1"] or \
- (options['videoCodec'] == 'theora'):
+ elif options["videoCodec"] in ["vp8", "vp9", "h264", "av1"] or (
+ options["videoCodec"] == "theora"
+ ):
# Check for twopass:
- if 'twopass' in options and options['twopass'] == 'True':
+ if "twopass" in options and options["twopass"] == "True":
# ffmpeg requires manual two pass
status = self.ffmpeg_encode(options, 1)
if status and not isinstance(status, str):
@@ -171,14 +182,14 @@ def run(self):
else:
status = self.ffmpeg_encode(options)
else:
- self.output('Error unknown codec:' + options['videoCodec'])
- status = 'Error unknown target codec:' + options['videoCodec']
+ self.output("Error unknown codec:" + options["videoCodec"])
+ status = "Error unknown target codec:" + options["videoCodec"]
self.remove_ffmpeg_log_files()
# If status is oky and target does not exist, reset status
if status is True and not os.path.isfile(self.get_target_path()):
- status = 'Target does not exist: ' + self.get_target_path()
+ status = "Target does not exist: " + self.get_target_path()
# If status is ok and target is larger than 0 bytes
if status is True and os.path.getsize(self.get_target_path()) > 0:
@@ -196,8 +207,8 @@ def remove_ffmpeg_log_files(self):
if os.path.isdir(dir):
for file in os.listdir(dir):
log_path = os.path.abspath(dir + "/" + file)
- ext = file.split('.')[-1]
- if ext == 'log' and log_path.startswith(path):
+ ext = file.split(".")[-1]
+ if ext == "log" and log_path.startswith(path):
os.unlink(log_path)
def ffmpeg_encode(self, options, p=0):
@@ -209,63 +220,68 @@ def ffmpeg_encode(self, options, p=0):
@return bool|string
"""
if not os.path.isfile(self.get_source_path()):
- return "source file is missing, " + self.get_source_path() + \
- ". Encoding failed."
+ return (
+ "source file is missing, "
+ + self.get_source_path()
+ + ". Encoding failed."
+ )
# Set up the base command
- cmd = escape_shellarg(ffmpeg_location) + ' -y -i ' + \
- escape_shellarg(self.get_source_path())
+ cmd = (
+ escape_shellarg(ffmpeg_location)
+ + " -y -i "
+ + escape_shellarg(self.get_source_path())
+ )
cmd += " -max_muxing_queue_size 4096"
- if 'vpre' in options:
- cmd += ' -vpre ' + escape_shellarg(options['vpre'])
+ if "vpre" in options:
+ cmd += " -vpre " + escape_shellarg(options["vpre"])
# Copy non-standard custom metadata specific to mp4 and mov files
container = self.source_info.format.format
- if container == 'mov,mp4,m4a,3gp,3g2,mj2':
- cmd += ' -movflags use_metadata_tags'
+ if container == "mov,mp4,m4a,3gp,3g2,mj2":
+ cmd += " -movflags use_metadata_tags"
- cmd += ' -map_metadata 0'
+ cmd += " -map_metadata 0"
- if 'novideo' in options:
+ if "novideo" in options:
cmd += " -vn "
- elif self.preserve['video']:
+ elif self.preserve["video"]:
cmd += " -vcodec copy"
- elif options['videoCodec'] == 'av1':
+ elif options["videoCodec"] == "av1":
cmd += self.ffmpeg_add_av1_video_options(options, p)
- elif options['videoCodec'] == 'vp8' or options['videoCodec'] == 'vp9':
+ elif options["videoCodec"] == "vp8" or options["videoCodec"] == "vp9":
cmd += self.ffmpeg_add_webm_video_options(options, p)
- elif options['videoCodec'] == 'h264':
+ elif options["videoCodec"] == "h264":
cmd += self.ffmpeg_add_h264_video_options(options, p)
- elif options['videoCodec'] == 'theora':
+ elif options["videoCodec"] == "theora":
cmd += self.ffmpeg_add_theora_video_options(options, p)
# Check for start time
- if 'starttime' in options:
- cmd += ' -ss ' + escape_shellarg(options['starttime'])
+ if "starttime" in options:
+ cmd += " -ss " + escape_shellarg(options["starttime"])
else:
- options['starttime'] = 0
+ options["starttime"] = 0
# Check for end time:
- if 'endtime' in options:
- cmd += ' -t ' + str(options['endtime']) - str(options['starttime'])
+ if "endtime" in options:
+ cmd += " -t " + str(options["endtime"]) - str(options["starttime"])
- if p == 1 or 'noaudio' in options:
- cmd += ' -an'
- elif self.preserve['audio']:
+ if p == 1 or "noaudio" in options:
+ cmd += " -an"
+ elif self.preserve["audio"]:
cmd += " -acodec copy"
else:
cmd += self.ffmpeg_add_audio_options(options, p)
if p != 0:
cmd += " -pass " + escape_shellarg(p)
- cmd += " -passlogfile " + \
- escape_shellarg(self.get_target_path() + '.log')
+ cmd += " -passlogfile " + escape_shellarg(self.get_target_path() + ".log")
# And the output target:
if p == 1:
- cmd += ' /dev/null'
+ cmd += " /dev/null"
else:
cmd += " " + escape_shellarg(self.get_target_path())
@@ -275,8 +291,7 @@ def ffmpeg_encode(self, options, p=0):
retval, shellOutput = self.run_shell_exec(cmd, track=p != 1)
if int(retval) != 0:
- return cmd + \
- "\nExitcode: " + str(retval)
+ return cmd + "\nExitcode: " + str(retval)
return True
@@ -291,8 +306,8 @@ def ffmpeg_add_h264_video_options(self, options, p):
# Set the codec:
cmd = " -threads " + str(self.ffmpeg_get_thread_count()) + " -vcodec libx264"
- if 'videoBitrate' in options:
- cmd += " -b " + escape_shellarg(options['videoBitrate'])
+ if "videoBitrate" in options:
+ cmd += " -b " + escape_shellarg(options["videoBitrate"])
# Output mp4
cmd += " -f mp4"
@@ -306,26 +321,26 @@ def ffmpeg_add_av1_video_options(self, options, p):
@param p
@return string
"""
- cmd = ' -threads ' + str(self.ffmpeg_get_thread_count())
+ cmd = " -threads " + str(self.ffmpeg_get_thread_count())
# libsvtav1-specific constant quality
- if 'crf' in options:
- cmd += " -crf " + escape_shellarg(options['crf'])
+ if "crf" in options:
+ cmd += " -crf " + escape_shellarg(options["crf"])
- if 'videoBitrate' in options:
- if int(options['videoBitrate']) > 0:
+ if "videoBitrate" in options:
+ if int(options["videoBitrate"]) > 0:
cmd += " -qmin 1 -qmax 63"
- cmd += " -b:v " + escape_shellarg(int(options['videoBitrate']) * 1000)
+ cmd += " -b:v " + escape_shellarg(int(options["videoBitrate"]) * 1000)
cmd += " -vcodec libsvtav1"
# libsvtav1 ignores the -threads option, so we have to set it manually.
- cmd += ' -svtav1-params lp=' + str(self.ffmpeg_get_thread_count())
+ cmd += " -svtav1-params lp=" + str(self.ffmpeg_get_thread_count())
if p == 1:
- cmd += ' -preset 12' # Make first pass faster
- elif 'preset' in options:
- cmd += ' -preset ' + escape_shellarg(options['preset'])
+ cmd += " -preset 12" # Make first pass faster
+ elif "preset" in options:
+ cmd += " -preset " + escape_shellarg(options["preset"])
cmd += " -f webm"
@@ -339,67 +354,65 @@ def ffmpeg_add_webm_video_options(self, options, p):
@param p
@return string
"""
- cmd = ' -threads ' + str(self.ffmpeg_get_thread_count())
- if options['videoCodec'] == 'vp9':
- cmd += ' -row-mt 1'
+ cmd = " -threads " + str(self.ffmpeg_get_thread_count())
+ if options["videoCodec"] == "vp9":
+ cmd += " -row-mt 1"
# check for presets:
- if 'preset' in options:
- if options['preset'] == "360p":
+ if "preset" in options:
+ if options["preset"] == "360p":
cmd += " -vpre libvpx-360p"
- elif options['preset'] == "720p":
+ elif options["preset"] == "720p":
cmd += " -vpre libvpx-720p"
- elif options['preset'] == "1080p":
+ elif options["preset"] == "1080p":
cmd += " -vpre libvpx-1080p"
# Check for video quality:
- if 'videoQuality' in options and int(options['videoQuality']) >= 0:
+ if "videoQuality" in options and int(options["videoQuality"]) >= 0:
# Map 0-10 to 63-0, higher values worse quality
- quality = 63 - int(int(options['videoQuality']) / 10.0 * 63)
+ quality = 63 - int(int(options["videoQuality"]) / 10.0 * 63)
cmd += " -qmin " + escape_shellarg(quality)
cmd += " -qmax " + escape_shellarg(quality)
# libvpx-specific constant quality or constrained quality
# note the range is different between VP8 and VP9
- if 'crf' in options:
- cmd += " -crf " + escape_shellarg(options['crf'])
+ if "crf" in options:
+ cmd += " -crf " + escape_shellarg(options["crf"])
# Check for video bitrate:
- if 'videoBitrate' in options:
+ if "videoBitrate" in options:
cmd += " -qmin 1 -qmax 51"
- cmd += " -b:v " + escape_shellarg(int(options['videoBitrate']) * 1000)
+ cmd += " -b:v " + escape_shellarg(int(options["videoBitrate"]) * 1000)
# Set the codec:
- if options['videoCodec'] == 'vp9':
+ if options["videoCodec"] == "vp9":
cmd += " -vcodec libvpx-vp9"
- if 'tileColumns' in options:
- cmd += ' -tile-columns ' + \
- escape_shellarg(options['tileColumns'])
+ if "tileColumns" in options:
+ cmd += " -tile-columns " + escape_shellarg(options["tileColumns"])
else:
cmd += " -vcodec libvpx"
- if 'altref' in options:
- cmd += ' -auto-alt-ref 1'
- cmd += ' -lag-in-frames 25'
+ if "altref" in options:
+ cmd += " -auto-alt-ref 1"
+ cmd += " -lag-in-frames 25"
# Check for keyframeInterval
- if 'keyframeInterval' in options:
- cmd += ' -g ' + escape_shellarg(options['keyframeInterval'])
- cmd += ' -keyint_min ' + \
- escape_shellarg(options['keyframeInterval'])
+ if "keyframeInterval" in options:
+ cmd += " -g " + escape_shellarg(options["keyframeInterval"])
+ cmd += " -keyint_min " + escape_shellarg(options["keyframeInterval"])
- if 'deinterlace' in options:
- cmd += ' -deinterlace'
+ if "deinterlace" in options:
+ cmd += " -deinterlace"
if p == 1:
# Make first pass faster...
- cmd += ' -speed 4'
- elif 'speed' in options:
- cmd += ' -speed ' + escape_shellarg(options['speed'])
+ cmd += " -speed 4"
+ elif "speed" in options:
+ cmd += " -speed " + escape_shellarg(options["speed"])
# In libvpx quality sets a deadline on how long frames can be processed.
- if 'quality' in options:
- cmd += ' -quality ' + escape_shellarg(options['quality'])
+ if "quality" in options:
+ cmd += " -quality " + escape_shellarg(options["quality"])
# Output WebM
cmd += " -f webm"
@@ -416,31 +429,30 @@ def ffmpeg_add_theora_video_options(self, options, p):
@param p
@return string
"""
- cmd = ' -threads ' + str(self.ffmpeg_get_thread_count())
+ cmd = " -threads " + str(self.ffmpeg_get_thread_count())
# Check for video quality:
- if 'videoQuality' in options and int(options['videoQuality']) >= 0:
- cmd += " -q:v " + escape_shellarg(options['videoQuality'])
+ if "videoQuality" in options and int(options["videoQuality"]) >= 0:
+ cmd += " -q:v " + escape_shellarg(options["videoQuality"])
# Check for video bitrate:
- if 'videoBitrate' in options:
+ if "videoBitrate" in options:
cmd += " -qmin 1 -qmax 51"
- cmd += " -b:v " + escape_shellarg(int(options['videoBitrate']) * 1000)
+ cmd += " -b:v " + escape_shellarg(int(options["videoBitrate"]) * 1000)
# Set the codec:
cmd += " -vcodec theora"
# Check for keyframeInterval
- if 'keyframeInterval' in options:
- cmd += ' -g ' + escape_shellarg(options['keyframeInterval'])
- cmd += ' -keyint_min ' + \
- escape_shellarg(options['keyframeInterval'])
+ if "keyframeInterval" in options:
+ cmd += " -g " + escape_shellarg(options["keyframeInterval"])
+ cmd += " -keyint_min " + escape_shellarg(options["keyframeInterval"])
- if 'deinterlace' in options:
- cmd += ' -deinterlace'
+ if "deinterlace" in options:
+ cmd += " -deinterlace"
- if 'framerate' in options:
- cmd += ' -r ' + escape_shellarg(options['framerate'])
+ if "framerate" in options:
+ cmd += " -r " + escape_shellarg(options["framerate"])
# Output Ogg
cmd += " -f ogg"
@@ -455,34 +467,34 @@ def ffmpeg_add_audio_options(self, options, p):
@param p
@return string
"""
- cmd = ''
- if 'audioQuality' in options:
- cmd += " -aq " + escape_shellarg(options['audioQuality'])
+ cmd = ""
+ if "audioQuality" in options:
+ cmd += " -aq " + escape_shellarg(options["audioQuality"])
- if 'audioBitrate' in options:
- cmd += ' -b:a ' + str(int(options['audioBitrate']) * 1000)
+ if "audioBitrate" in options:
+ cmd += " -b:a " + str(int(options["audioBitrate"]) * 1000)
- if 'samplerate' in options:
- cmd += " -ar " + escape_shellarg(options['samplerate'])
+ if "samplerate" in options:
+ cmd += " -ar " + escape_shellarg(options["samplerate"])
- if 'channels' in options:
- cmd += " -ac " + escape_shellarg(options['channels'])
+ if "channels" in options:
+ cmd += " -ac " + escape_shellarg(options["channels"])
- if 'audioCodec' in options:
+ if "audioCodec" in options:
encoders = {
- 'vorbis': 'libvorbis',
- 'opus': 'libopus',
- 'mp3': 'libmp3lame',
+ "vorbis": "libvorbis",
+ "opus": "libopus",
+ "mp3": "libmp3lame",
}
- if options['audioCodec'] in encoders:
- codec = encoders[options['audioCodec']]
+ if options["audioCodec"] in encoders:
+ codec = encoders[options["audioCodec"]]
else:
- codec = options['audioCodec']
+ codec = options["audioCodec"]
cmd += " -acodec " + escape_shellarg(codec)
- if codec == 'aac':
+ if codec == "aac":
# the aac encoder is currently "experimental" in libav 9? :P
- cmd += ' -strict experimental'
+ cmd += " -strict experimental"
else:
# if no audio codec set use vorbis :
cmd += " -acodec libvorbis "
@@ -504,21 +516,36 @@ def run_shell_exec(self, cmd, track=True):
@param cmd String Command to be run
@return int, string
"""
- cmd = 'ulimit -f ' + escape_shellarg(background_size_limit) + ';' + \
- 'ulimit -v ' + escape_shellarg(background_memory_limit) + ';' + \
- 'nice -n ' + escape_shellarg(background_priority) + ' ' + \
- 'timeout ' + escape_shellarg(background_time_limit) + ' ' + \
- cmd + \
- ' 2>&1'
+ cmd = (
+ "ulimit -f "
+ + escape_shellarg(background_size_limit)
+ + ";"
+ + "ulimit -v "
+ + escape_shellarg(background_memory_limit)
+ + ";"
+ + "nice -n "
+ + escape_shellarg(background_priority)
+ + " "
+ + "timeout "
+ + escape_shellarg(background_time_limit)
+ + " "
+ + cmd
+ + " 2>&1"
+ )
# Adapted from https://gist.github.com/marazmiki/3015621
process = subprocess.Popen(
- cmd, stdin=None, stdout=subprocess.PIPE, stderr=None,
- universal_newlines=True, shell=True, preexec_fn=os.setsid
+ cmd,
+ stdin=None,
+ stdout=subprocess.PIPE,
+ stderr=None,
+ universal_newlines=True,
+ shell=True,
+ preexec_fn=os.setsid,
)
- re_duration = re.compile(r'Duration: (\d{2}:\d{2}:\d{2})')
- re_position = re.compile(r'time=(\d{2}:\d{2}:\d{2})', re.I)
+ re_duration = re.compile(r"Duration: (\d{2}:\d{2}:\d{2})")
+ re_position = re.compile(r"time=(\d{2}:\d{2}:\d{2})", re.I)
duration = None
position = None
@@ -542,9 +569,9 @@ def run_shell_exec(self, cmd, track=True):
if position_match:
position = time_to_seconds(position_match.group(1))
if duration and position:
- newpercentage = min(int(
- math.floor(100 * position / duration)
- ), 100)
+ newpercentage = min(
+ int(math.floor(100 * position / duration)), 100
+ )
if newpercentage != percentage:
percentage = newpercentage
@@ -557,4 +584,4 @@ def run_shell_exec(self, cmd, track=True):
time.sleep(2)
process.stdout.close()
- return process.returncode, ''
+ return process.returncode, ""
diff --git a/video2commons/backend/subtitles/__init__.py b/video2commons/backend/subtitles/__init__.py
index 950c3ce..f9c10e4 100644
--- a/video2commons/backend/subtitles/__init__.py
+++ b/video2commons/backend/subtitles/__init__.py
@@ -34,12 +34,11 @@
def upload(site, filename, text, langcode, langname):
"""Upload subtitles to Wikimedia Commons."""
- page = pywikibot.Page(site, f'TimedText:{filename}.{langcode.lower()}.srt')
+ page = pywikibot.Page(site, f"TimedText:{filename}.{langcode.lower()}.srt")
page.text = text
if not page.exists():
page.save(
- summary=f'Import {langname} subtitles for [[:File:{filename}]]',
- minor=False
+ summary=f"Import {langname} subtitles for [[:File:{filename}]]", minor=False
)
@@ -47,28 +46,36 @@ def get_container_subtitle_languages(filepath):
"""Returns subtitle languages contained in a video container."""
languages = set()
- result = subprocess.run([
- ffprobe_location,
- '-loglevel', 'error',
- '-select_streams', 's',
- '-show_entries', 'stream=index:stream_tags=language',
- '-of', 'json',
- filepath
- ], capture_output=True, text=True)
+ result = subprocess.run(
+ [
+ ffprobe_location,
+ "-loglevel",
+ "error",
+ "-select_streams",
+ "s",
+ "-show_entries",
+ "stream=index:stream_tags=language",
+ "-of",
+ "json",
+ filepath,
+ ],
+ capture_output=True,
+ text=True,
+ )
if result.returncode != 0:
return set()
- for stream in json.loads(result.stdout).get('streams', []):
- has_language = 'tags' in stream and 'language' in stream['tags']
- has_index = 'index' in stream
+ for stream in json.loads(result.stdout).get("streams", []):
+ has_language = "tags" in stream and "language" in stream["tags"]
+ has_index = "index" in stream
# Skip unlabelled subtitles that have no language tag.
if not has_language or not has_index:
continue
try:
- langcode = langcodes.standardize_tag(stream['tags']['language'])
+ langcode = langcodes.standardize_tag(stream["tags"]["language"])
except LanguageTagError:
continue # Skip subtitles with invalid language tags.
@@ -91,58 +98,67 @@ def get_subtitle_languages(subtitles):
return languages
-def upload_container_subtitles(filepath, filename, outputdir, username, statuscallback=None):
+
+def upload_container_subtitles(
+ filepath, filename, outputdir, username, statuscallback=None
+):
"""Extract subtitles from a video container that supports it (e.g. mkv)."""
statuscallback = statuscallback or (lambda text, percent: None)
- statuscallback('Uploading subtitles...', -1)
+ statuscallback("Uploading subtitles...", -1)
percent = 0
- result = subprocess.run([
- ffprobe_location,
- '-loglevel', 'error',
- '-select_streams', 's',
- '-show_entries', 'stream=index:stream_tags=language',
- '-of', 'json',
- filepath
- ], capture_output=True, text=True)
+ result = subprocess.run(
+ [
+ ffprobe_location,
+ "-loglevel",
+ "error",
+ "-select_streams",
+ "s",
+ "-show_entries",
+ "stream=index:stream_tags=language",
+ "-of",
+ "json",
+ filepath,
+ ],
+ capture_output=True,
+ text=True,
+ )
if result.returncode != 0:
statuscallback(
- f'Failed to extract subtitles: {result.stderr or result.returncode}',
- None
+ f"Failed to extract subtitles: {result.stderr or result.returncode}", None
)
return
subtitles = []
languages = set()
- streams = json.loads(result.stdout).get('streams', [])
+ streams = json.loads(result.stdout).get("streams", [])
if not streams:
- statuscallback('No subtitles found in container', 100)
+ statuscallback("No subtitles found in container", 100)
return
- statuscallback(f'Extracting subtitles for {len(streams)} language(s)...', -1)
+ statuscallback(f"Extracting subtitles for {len(streams)} language(s)...", -1)
# Extract all subtitles from the video container (0-50%).
for stream in streams:
- has_language = 'tags' in stream and 'language' in stream['tags']
- has_index = 'index' in stream
+ has_language = "tags" in stream and "language" in stream["tags"]
+ has_index = "index" in stream
# Skip unlabelled subtitles that have no language tag.
if not has_language or not has_index:
percent += 50.0 / len(streams)
- statuscallback('Skipping subtitles missing required tags', None)
+ statuscallback("Skipping subtitles missing required tags", None)
continue
try:
- langcode = langcodes.standardize_tag(stream['tags']['language'])
+ langcode = langcodes.standardize_tag(stream["tags"]["language"])
except LanguageTagError:
percent += 50.0 / len(streams)
statuscallback(
- f'Skipping subtitles with invalid language tag: {langcode}',
- None
+ f"Skipping subtitles with invalid language tag: {langcode}", None
)
continue # Skip subtitles with invalid language tags.
@@ -151,50 +167,56 @@ def upload_container_subtitles(filepath, filename, outputdir, username, statusca
if langcode in languages:
percent += 50.0 / len(streams)
statuscallback(
- f'Skipping duplicate subtitles with language: {langcode}',
- None
+ f"Skipping duplicate subtitles with language: {langcode}", None
)
continue
else:
languages.add(langcode)
langname = Language.make(language=langcode).display_name()
- statuscallback(f'Extracting {langname} subtitles...', int(percent))
+ statuscallback(f"Extracting {langname} subtitles...", int(percent))
- srt_filepath = os.path.join(outputdir, f'{filename}.{langcode.lower()}.srt')
+ srt_filepath = os.path.join(outputdir, f"{filename}.{langcode.lower()}.srt")
# Write the subtitles to the output directory of the job.
- result = subprocess.run([
- ffmpeg_location,
- '-nostdin',
- '-hide_banner',
- '-loglevel', 'quiet',
- '-i', filepath,
- '-map', f'0:{stream["index"]}',
- srt_filepath
- ], capture_output=True, text=True)
+ result = subprocess.run(
+ [
+ ffmpeg_location,
+ "-nostdin",
+ "-hide_banner",
+ "-loglevel",
+ "quiet",
+ "-i",
+ filepath,
+ "-map",
+ f"0:{stream['index']}",
+ srt_filepath,
+ ],
+ capture_output=True,
+ text=True,
+ )
percent += 50.0 / len(streams)
if result.returncode != 0:
statuscallback(
f"Failed to extract '{langcode.lower()}' subtitles: {result.stderr or result.returncode}",
- int(percent)
+ int(percent),
)
continue
subtitles.append((langcode, langname, srt_filepath))
if not subtitles:
- statuscallback('No subtitles extracted successfully', 100)
+ statuscallback("No subtitles extracted successfully", 100)
return
# Attempt uploads only after successful extraction of all subtitles (50-100%).
for langcode, langname, srt_filepath in subtitles:
try:
- statuscallback(f'Uploading {langname} subtitles...', int(percent))
+ statuscallback(f"Uploading {langname} subtitles...", int(percent))
- with open(srt_filepath, 'rb') as f:
+ with open(srt_filepath, "rb") as f:
text = f.read()
# Try to first decode the subtitles as UTF-8 if possible rather
@@ -205,11 +227,10 @@ def upload_container_subtitles(filepath, filename, outputdir, username, statusca
text = decoded_text
else:
# It's not UTF-8, so try to detect the encoding.
- encoding = chardet.detect(text)['encoding']
+ encoding = chardet.detect(text)["encoding"]
if not encoding:
statuscallback(
- f'Skipping subtitles with invalid encoding: {langcode}',
- None
+ f"Skipping subtitles with invalid encoding: {langcode}", None
)
continue
@@ -217,43 +238,40 @@ def upload_container_subtitles(filepath, filename, outputdir, username, statusca
text = text.decode(encoding)
except Exception:
statuscallback(
- f'Skipping subtitles with invalid encoding: {langcode}',
- None
+ f"Skipping subtitles with invalid encoding: {langcode}", None
)
continue
upload(
- site=pywikibot.Site('commons', 'commons', user=username),
+ site=pywikibot.Site("commons", "commons", user=username),
filename=filename,
text=text,
langcode=langcode,
- langname=langname
+ langname=langname,
)
percent += 50.0 / len(subtitles)
- statuscallback(f'Finished uploading {langname} subtitles', int(percent))
+ statuscallback(f"Finished uploading {langname} subtitles", int(percent))
except TaskAbort:
raise
except Exception as e:
percent += 50.0 / len(subtitles)
- statuscallback(f'{type(e).__name__}: {e}\n\n{traceback.format_exc()}', int(percent))
+ statuscallback(
+ f"{type(e).__name__}: {e}\n\n{traceback.format_exc()}", int(percent)
+ )
def upload_subtitles(
- subtitles, wikifilename, username,
- statuscallback=None, errorcallback=None
+ subtitles, wikifilename, username, statuscallback=None, errorcallback=None
):
"""Convert and upload subtitles to corresponding TimedText pages."""
statuscallback = statuscallback or (lambda text, percent: None)
errorcallback = errorcallback or (lambda text: None)
- statuscallback('Uploading subtitles...', -1)
+ statuscallback("Uploading subtitles...", -1)
percent = 0
- c = Converter(
- ffmpeg_path=ffmpeg_location,
- ffprobe_path=ffprobe_location
- )
+ c = Converter(ffmpeg_path=ffmpeg_location, ffprobe_path=ffprobe_location)
for langcode, filename in list(subtitles.items()):
try:
@@ -261,74 +279,63 @@ def upload_subtitles(
langcode = str(lang).lower()
langdesc = lang.describe()
- langname = langdesc['language']
- del langdesc['language']
+ langname = langdesc["language"]
+ del langdesc["language"]
if langdesc:
- langname += ' (%s)' % ', '.join(list(langdesc.values()))
+ langname += " (%s)" % ", ".join(list(langdesc.values()))
- statuscallback('Loading subtitles in ' + langname, int(percent))
- subtitletext = ''
+ statuscallback("Loading subtitles in " + langname, int(percent))
+ subtitletext = ""
info = c.probe(filename)
if not info:
continue
if len(info.streams) != 1:
continue
- if info.streams[0].type != 'subtitle':
+ if info.streams[0].type != "subtitle":
continue
format = info.streams[0].codec
- if format.lower() != 'subrip':
- target = filename + '.srt'
- cmd = [
- ffmpeg_location,
- '-i', filename,
- '-f', 'srt',
- target
- ]
+ if format.lower() != "subrip":
+ target = filename + ".srt"
+ cmd = [ffmpeg_location, "-i", filename, "-f", "srt", target]
statuscallback("Running cmd: %s" % cmd, None)
subprocess.check_call(cmd, stderr=None)
filename = target
- with open(filename, 'rb') as f:
+ with open(filename, "rb") as f:
subtitletext = f.read()
- subtitletext = subtitletext.decode(
- chardet.detect(subtitletext)['encoding']
- )
+ subtitletext = subtitletext.decode(chardet.detect(subtitletext)["encoding"])
percent += 50.0 / len(subtitles)
- statuscallback(
- 'Uploading subtitles in ' + langname,
- int(percent)
- )
+ statuscallback("Uploading subtitles in " + langname, int(percent))
# ENSURE PYWIKIBOT OAUTH PROPERLY CONFIGURED!
- site = pywikibot.Site('commons', 'commons', user=username)
+ site = pywikibot.Site("commons", "commons", user=username)
upload(
site=site,
filename=wikifilename,
text=subtitletext,
langcode=langcode,
- langname=langname
+ langname=langname,
)
percent += 50.0 / len(subtitles)
- statuscallback(
- 'Finished processing subtitles in ' + langname,
- int(percent)
- )
+ statuscallback("Finished processing subtitles in " + langname, int(percent))
except TaskAbort:
raise
except Exception as e:
- statuscallback(f'{type(e).__name__}: {e} \n\n{traceback.format_exc()}', None)
+ statuscallback(
+ f"{type(e).__name__}: {e} \n\n{traceback.format_exc()}", None
+ )
pass
def parse_utf8(bytestring):
"""Try to decode a bytestring as UTF-8, returning None on failure."""
try:
- return bytestring.decode('utf-8')
+ return bytestring.decode("utf-8")
except UnicodeDecodeError:
return None
diff --git a/video2commons/backend/upload/__init__.py b/video2commons/backend/upload/__init__.py
index d1c0587..8d7b639 100644
--- a/video2commons/backend/upload/__init__.py
+++ b/video2commons/backend/upload/__init__.py
@@ -34,8 +34,14 @@
def upload(
- filename, wikifilename, sourceurl, http_host, filedesc, username,
- statuscallback=None, errorcallback=None
+ filename,
+ wikifilename,
+ sourceurl,
+ http_host,
+ filedesc,
+ username,
+ statuscallback=None,
+ errorcallback=None,
):
"""Upload a file from filename to wikifilename."""
statuscallback = statuscallback or (lambda text, percent: None)
@@ -45,54 +51,78 @@ def upload(
if size < 1000000000:
return upload_pwb(
- filename, wikifilename, sourceurl, filedesc, username,
- size, statuscallback, errorcallback
+ filename,
+ wikifilename,
+ sourceurl,
+ filedesc,
+ username,
+ size,
+ statuscallback,
+ errorcallback,
)
elif size < (5 << 30):
try:
return upload_pwb(
- filename, wikifilename, sourceurl, filedesc, username,
- size, statuscallback, errorcallback
+ filename,
+ wikifilename,
+ sourceurl,
+ filedesc,
+ username,
+ size,
+ statuscallback,
+ errorcallback,
)
except pywikibot.exceptions.APIError as e:
- if 'stash' in e.code or e.code == 'backend-fail-internal':
+ if "stash" in e.code or e.code == "backend-fail-internal":
upload_ss(
- filename, wikifilename, http_host, filedesc,
- statuscallback, errorcallback
+ filename,
+ wikifilename,
+ http_host,
+ filedesc,
+ statuscallback,
+ errorcallback,
)
else:
raise
else:
errorcallback(
- 'Sorry, but files larger than 5GB can not be uploaded even ' +
- 'with server-side uploading. This task may need manual ' +
- ' intervention.'
+ "Sorry, but files larger than 5GB can not be uploaded even "
+ + "with server-side uploading. This task may need manual "
+ + " intervention."
)
def upload_pwb(
- filename, wikifilename, sourceurl, filedesc, username,
- size, statuscallback, errorcallback
+ filename,
+ wikifilename,
+ sourceurl,
+ filedesc,
+ username,
+ size,
+ statuscallback,
+ errorcallback,
):
"""Upload with pywikibot."""
# ENSURE PYWIKIBOT OAUTH PROPERLY CONFIGURED!
- site = pywikibot.Site('commons', 'commons', user=username)
+ site = pywikibot.Site("commons", "commons", user=username)
page = pywikibot.FilePage(site, wikifilename)
if page.exists():
- errorcallback('File already exists. Please choose another name.')
+ errorcallback("File already exists. Please choose another name.")
- comment = 'Imported media from ' + sourceurl
+ comment = "Imported media from " + sourceurl
chunked = (16 * (1 << 20)) if size >= 100000000 else 0
remaining_tries = MAX_RETRIES
while True:
if remaining_tries == MAX_RETRIES:
- statuscallback('Uploading...', -1)
+ statuscallback("Uploading...", -1)
elif remaining_tries > 1:
- statuscallback(f'Retrying upload... ({remaining_tries} tries remaining)', -1)
+ statuscallback(
+ f"Retrying upload... ({remaining_tries} tries remaining)", -1
+ )
elif remaining_tries == 1:
- statuscallback(f'Retrying upload... ({remaining_tries} try remaining)', -1)
+ statuscallback(f"Retrying upload... ({remaining_tries} try remaining)", -1)
if remaining_tries != MAX_RETRIES:
exponential_backoff(remaining_tries)
@@ -105,9 +135,9 @@ def upload_pwb(
text=filedesc,
chunk_size=chunked,
asynchronous=bool(chunked),
- ignore_warnings=['exists-normalized'],
+ ignore_warnings=["exists-normalized"],
):
- errorcallback('Upload failed!')
+ errorcallback("Upload failed!")
break # The upload completed successfully.
except TaskError:
@@ -126,18 +156,17 @@ def upload_pwb(
if remaining_tries == 0:
raise # No more retries, raise the error.
- statuscallback('Upload success!', 100)
+ statuscallback("Upload success!", 100)
return page.title(with_ns=False), page.full_url()
def upload_ss(
- filename, wikifilename, http_host, filedesc,
- statuscallback, errorcallback
+ filename, wikifilename, http_host, filedesc, statuscallback, errorcallback
):
"""Prepare for server-side upload."""
# Get hash
md5 = hashlib.md5()
- with open(filename, 'rb') as f:
+ with open(filename, "rb") as f:
while True:
data = f.read(65536)
if not data:
@@ -145,21 +174,27 @@ def upload_ss(
md5.update(data)
# file name check
- wikifilename = wikifilename.replace('/', '-').replace(' ', '_')
- wikifilename = wikifilename.replace('\r\n', '_')
- wikifilename = wikifilename.replace('\r', '_').replace('\n', '_')
+ wikifilename = wikifilename.replace("/", "-").replace(" ", "_")
+ wikifilename = wikifilename.replace("\r\n", "_")
+ wikifilename = wikifilename.replace("\r", "_").replace("\n", "_")
- newfilename = '/srv/v2c/ssu/' + wikifilename
+ newfilename = "/srv/v2c/ssu/" + wikifilename
remaining_tries = MAX_RETRIES
while True:
try:
if remaining_tries == MAX_RETRIES:
- statuscallback('Preparing for server-side upload...', -1)
+ statuscallback("Preparing for server-side upload...", -1)
elif remaining_tries > 1:
- statuscallback(f'Retrying server-side upload preparation... ({remaining_tries} tries remaining)', -1)
+ statuscallback(
+ f"Retrying server-side upload preparation... ({remaining_tries} tries remaining)",
+ -1,
+ )
elif remaining_tries == 1:
- statuscallback(f'Retrying server-side upload preparation... ({remaining_tries} try remaining)', -1)
+ statuscallback(
+ f"Retrying server-side upload preparation... ({remaining_tries} try remaining)",
+ -1,
+ )
if remaining_tries != MAX_RETRIES:
exponential_backoff(remaining_tries)
@@ -173,16 +208,16 @@ def upload_ss(
remaining_tries -= 1
if remaining_tries == 0:
# No more retries, raise the error.
- errorcallback('Upload failed: NFS share is likely overloaded')
+ errorcallback("Upload failed: NFS share is likely overloaded")
- with open(newfilename + '.txt', 'w') as filedescfile:
+ with open(newfilename + ".txt", "w") as filedescfile:
filedesc = filedesc.replace(
- '[[Category:Uploaded with video2commons]]',
- '[[Category:Uploaded with video2commons/Server-side uploads]]'
+ "[[Category:Uploaded with video2commons]]",
+ "[[Category:Uploaded with video2commons/Server-side uploads]]",
)
filedescfile.write(filedesc)
- fileurl = 'https://' + http_host + '/' + wikifilename
+ fileurl = "https://" + http_host + "/" + wikifilename
raise NeedServerSideUpload(fileurl, md5.hexdigest())
diff --git a/video2commons/backend/user-config.py b/video2commons/backend/user-config.py
index 6e90921..884b1ac 100644
--- a/video2commons/backend/user-config.py
+++ b/video2commons/backend/user-config.py
@@ -4,7 +4,7 @@
"""Pywikibot configs."""
-family = 'commons'
-mylang = 'commons'
+family = "commons"
+mylang = "commons"
socket_timeout = 30, 300 # chunked uploading unreliable
diff --git a/video2commons/backend/worker.py b/video2commons/backend/worker.py
index bf830d8..369c7b2 100644
--- a/video2commons/backend/worker.py
+++ b/video2commons/backend/worker.py
@@ -17,8 +17,6 @@
"""video2commons backend worker."""
-
-
import os
import sys
import shutil
@@ -37,19 +35,19 @@
from video2commons.backend import upload
from video2commons.backend import subtitles as subtitleuploader
from video2commons.config import (
- redis_pw, redis_host, consumer_key, consumer_secret, http_host
+ redis_pw,
+ redis_host,
+ consumer_key,
+ consumer_secret,
+ http_host,
)
from video2commons.shared.stats import update_task_stats
-redisurl = 'redis://:' + redis_pw + '@' + redis_host + ':6379/'
-app = celery.Celery(
- 'v2cbackend',
- backend=redisurl + '1',
- broker=redisurl + '2'
-)
+redisurl = "redis://:" + redis_pw + "@" + redis_host + ":6379/"
+app = celery.Celery("v2cbackend", backend=redisurl + "1", broker=redisurl + "2")
app.conf.result_expires = 30 * 24 * 3600 # 1 month
-app.conf.accept_content = ['json']
+app.conf.accept_content = ["json"]
app.conf.worker_prefetch_multiplier = 1
redisconnection = Redis(host=redis_host, db=3, password=redis_pw)
@@ -58,27 +56,35 @@
class Stats:
"""Storage for task status."""
- text = ''
+ text = ""
percent = 0
def get_worker_concurrency():
"""Parse concurrency value from CELERYD_OPTS environment variable."""
- celeryd_opts = os.environ.get('CELERYD_OPTS', '')
+ celeryd_opts = os.environ.get("CELERYD_OPTS", "")
- match = re.search(r'--concurrency[=\s]+(\d+)', celeryd_opts)
+ match = re.search(r"--concurrency[=\s]+(\d+)", celeryd_opts)
if match:
return int(match.group(1))
@app.task(bind=True, track_started=False, base=AbortableTask)
def main(
- self, url, ie_key, subtitles, filename, filedesc,
- downloadkey, convertkey, username, oauth
+ self,
+ url,
+ ie_key,
+ subtitles,
+ filename,
+ filedesc,
+ downloadkey,
+ convertkey,
+ username,
+ oauth,
):
"""Main worker code."""
# Get a lock to prevent double-running with same task ID
- lockkey = 'tasklock:' + self.request.id
+ lockkey = "tasklock:" + self.request.id
if redisconnection.exists(lockkey):
raise Ignore
@@ -90,9 +96,9 @@ def main(
pass # We don't want to fail the task if we can't update stats.
# Check for 10G of disk space, refuse to run if it is unavailable
- st = os.statvfs('/srv')
+ st = os.statvfs("/srv")
if st.f_frsize * st.f_bavail < 10 << 30:
- self.retry(max_retries=20, countdown=5*60)
+ self.retry(max_retries=20, countdown=5 * 60)
assert False # should never reach here
redisconnection.setex(lockkey, 7 * 24 * 3600, self.request.hostname)
@@ -100,7 +106,7 @@ def main(
# Generate temporary directory for task
for i in range(10): # 10 tries
id = os.urandom(8).hex()
- outputdir = '/srv/v2c/output/' + id
+ outputdir = "/srv/v2c/output/" + id
if not os.path.isdir(outputdir):
os.makedirs(outputdir)
break
@@ -116,52 +122,56 @@ def statuscallback(text, percent):
s.text = text
if percent is not None:
s.percent = percent
- print('%d: %s' % (s.percent, s.text))
+ print("%d: %s" % (s.percent, s.text))
- self.update_state(
- state='PROGRESS',
- meta={'text': s.text, 'percent': s.percent}
- )
+ self.update_state(state="PROGRESS", meta={"text": s.text, "percent": s.percent})
def errorcallback(text):
raise TaskError(text)
try:
- statuscallback('Downloading...', -1)
+ statuscallback("Downloading...", -1)
d = download.download(
- url, ie_key, downloadkey, subtitles,
- outputdir, statuscallback, errorcallback
+ url,
+ ie_key,
+ downloadkey,
+ subtitles,
+ outputdir,
+ statuscallback,
+ errorcallback,
)
if not d:
- errorcallback('Download failed!')
- file = d['target']
+ errorcallback("Download failed!")
+ file = d["target"]
if not file:
- errorcallback('Download failed!')
+ errorcallback("Download failed!")
source = file
# Remember intent with subtitles so categories can be added
# appropriately later. These can be strings, so convert to bool.
subtitles_requested = subtitles
- if type(subtitles_requested) == str:
- subtitles_requested = subtitles_requested.lower() == 'true'
+ if type(subtitles_requested) is str:
+ subtitles_requested = subtitles_requested.lower() == "true"
- subtitles = subtitles and d['subtitles']
+ subtitles = subtitles and d["subtitles"]
- statuscallback('Converting...', -1)
+ statuscallback("Converting...", -1)
concurrency = get_worker_concurrency()
file = encode.encode(
file, convertkey, statuscallback, errorcallback, concurrency
)
if not file:
- errorcallback('Convert failed!')
- ext = file.split('.')[-1]
+ errorcallback("Convert failed!")
+ ext = file.split(".")[-1]
- statuscallback('Configuring Pywikibot...', -1)
- pywikibot.config.authenticate['commons.wikimedia.org'] = \
- (consumer_key, consumer_secret) + tuple(oauth)
- pywikibot.config.usernames['commons']['commons'] = username
- pywikibot.Site('commons', 'commons', user=username).login()
+ statuscallback("Configuring Pywikibot...", -1)
+ pywikibot.config.authenticate["commons.wikimedia.org"] = (
+ consumer_key,
+ consumer_secret,
+ ) + tuple(oauth)
+ pywikibot.config.usernames["commons"]["commons"] = username
+ pywikibot.Site("commons", "commons", user=username).login()
# Identify the language codes of all present subtitles. Fallback to
# checking the container ONLY IF yt-dlp was unable to find subtitles.
@@ -169,28 +179,37 @@ def errorcallback(text):
if subtitles:
found_langcodes.update(subtitleuploader.get_subtitle_languages(subtitles))
elif subtitles_requested:
- found_langcodes.update(subtitleuploader.get_container_subtitle_languages(source))
+ found_langcodes.update(
+ subtitleuploader.get_container_subtitle_languages(source)
+ )
# Add additional inferable meta-categories to the file description.
found_categories = set()
found_categories.update(categories.get_inferable_categories(file))
- found_categories.update(categories.get_subtitle_categories(file, found_langcodes))
+ found_categories.update(
+ categories.get_subtitle_categories(file, found_langcodes)
+ )
filedesc = categories.append_categories(filedesc, found_categories)
- statuscallback('Uploading...', -1)
- filename += '.' + ext
+ statuscallback("Uploading...", -1)
+ filename += "." + ext
filename, wikifileurl = upload.upload(
- file, filename, url, http_host,
- filedesc, username, statuscallback, errorcallback
+ file,
+ filename,
+ url,
+ http_host,
+ filedesc,
+ username,
+ statuscallback,
+ errorcallback,
)
if not wikifileurl:
- errorcallback('Upload failed!')
+ errorcallback("Upload failed!")
if subtitles:
try:
subtitleuploader.upload_subtitles(
- subtitles, filename, username,
- statuscallback, errorcallback
+ subtitles, filename, username, statuscallback, errorcallback
)
except TaskAbort:
raise
@@ -207,7 +226,7 @@ def errorcallback(text):
filename=filename,
outputdir=outputdir,
username=username,
- statuscallback=statuscallback
+ statuscallback=statuscallback,
)
except TaskAbort:
raise
@@ -218,23 +237,22 @@ def errorcallback(text):
except NeedServerSideUpload as e:
# json serializer cannot properly serialize an exception
# without losing data, so we change the exception into a dict.
- return {'type': 'ssu', 'hashsum': e.hashsum, 'url': e.url}
+ return {"type": "ssu", "hashsum": e.hashsum, "url": e.url}
except pywikibot.exceptions.Error:
exc_info = sys.exc_info()
raise TaskError(
- (
- 'pywikibot.Error: %s: %s' % (
- exc_info[0].__name__, exc_info[1]
- )
- ).encode('utf-8')).with_traceback(exc_info[2])
+ ("pywikibot.Error: %s: %s" % (exc_info[0].__name__, exc_info[1])).encode(
+ "utf-8"
+ )
+ ).with_traceback(exc_info[2])
else:
- statuscallback('Done!', 100)
- return {'type': 'done', 'filename': filename, 'url': wikifileurl}
+ statuscallback("Done!", 100)
+ return {"type": "done", "filename": filename, "url": wikifileurl}
finally:
- statuscallback('Cleaning up...', -1)
+ statuscallback("Cleaning up...", -1)
pywikibot.stopme()
pywikibot.config.authenticate.clear()
- pywikibot.config.usernames['commons'].clear()
+ pywikibot.config.usernames["commons"].clear()
pywikibot._sites.clear()
shutil.rmtree(outputdir)
diff --git a/video2commons/config.py b/video2commons/config.py
index 35af95b..0ddae49 100644
--- a/video2commons/config.py
+++ b/video2commons/config.py
@@ -11,20 +11,20 @@
tooldir = _os.path.dirname(_os.path.realpath(__file__))
if tooldir.startswith("/workspace"): # we are in buildpack
tooldir = _os.path.expandvars("$TOOL_DATA_DIR/video2commons")
- with open(tooldir + '/../config.json', 'r') as _f:
+ with open(tooldir + "/../config.json", "r") as _f:
_data = _json.load(_f)
except IOError as _e:
- __import__('logging').exception(_e)
+ __import__("logging").exception(_e)
_data = {}
-consumer_key = _data.get('consumer_key')
-consumer_secret = _data.get('consumer_secret')
-api_url = _data.get('api_url')
-redis_pw = _data.get('redis_pw')
-redis_host = _data.get('redis_host')
-session_key = _data.get('session_key')
-http_host = _data.get('http_host')
-webfrontend_uri = _data.get('webfrontend_uri')
-socketio_uri = _data.get('socketio_uri')
-youtube_user = _data.get('youtube_user')
-youtube_pass = _data.get('youtube_pass')
\ No newline at end of file
+consumer_key = _data.get("consumer_key")
+consumer_secret = _data.get("consumer_secret")
+api_url = _data.get("api_url")
+redis_pw = _data.get("redis_pw")
+redis_host = _data.get("redis_host")
+session_key = _data.get("session_key")
+http_host = _data.get("http_host")
+webfrontend_uri = _data.get("webfrontend_uri")
+socketio_uri = _data.get("socketio_uri")
+youtube_user = _data.get("youtube_user")
+youtube_pass = _data.get("youtube_pass")
diff --git a/video2commons/exceptions.py b/video2commons/exceptions.py
index 2838c6c..8491b5e 100644
--- a/video2commons/exceptions.py
+++ b/video2commons/exceptions.py
@@ -49,5 +49,4 @@ class TaskAbort(TaskError):
def __init__(self):
"""Initialize."""
- super().__init__('The task has been aborted.')
-
+ super().__init__("The task has been aborted.")
diff --git a/video2commons/frontend/__init__.py b/video2commons/frontend/__init__.py
index a0c8a0b..6eff5d7 100644
--- a/video2commons/frontend/__init__.py
+++ b/video2commons/frontend/__init__.py
@@ -19,8 +19,6 @@
"""videocommons backend."""
-
-
from video2commons.frontend.app import app
-__all__ = ['app']
+__all__ = ["app"]
diff --git a/video2commons/frontend/api.py b/video2commons/frontend/api.py
index 45943ed..2e90f7e 100644
--- a/video2commons/frontend/api.py
+++ b/video2commons/frontend/api.py
@@ -19,42 +19,44 @@
"""video2commons web API."""
-
-
import json
import traceback
import re
from uuid import uuid4
-from flask import (
- Blueprint, request, session, jsonify, current_app
-)
+from flask import Blueprint, request, session, jsonify, current_app
from video2commons.config import session_key
from video2commons.backend import worker
from video2commons.frontend.shared import (
- redisconnection, check_banned, generate_csrf_token, redis_publish
+ redisconnection,
+ check_banned,
+ generate_csrf_token,
+ redis_publish,
)
from video2commons.frontend.urlextract import (
- do_extract_url, do_validate_filename_unique, do_validate_youtube_id, make_dummy_desc,
- do_validate_filename, do_validate_filedesc, sanitize
-)
-from video2commons.frontend.upload import (
- upload as _upload, status as _uploadstatus
+ do_extract_url,
+ do_validate_filename_unique,
+ do_validate_youtube_id,
+ make_dummy_desc,
+ do_validate_filename,
+ do_validate_filedesc,
+ sanitize,
)
+from video2commons.frontend.upload import upload as _upload, status as _uploadstatus
from video2commons.shared import stats
# Adapted from: https://stackoverflow.com/a/19161373
YOUTUBE_REGEX = (
- r'(https?://)?(www\.)?'
- r'(youtube|youtu|youtube-nocookie)\.(com|be)/'
- r'(watch\?.*?(?=v=)v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'
+ r"(https?://)?(www\.)?"
+ r"(youtube|youtu|youtube-nocookie)\.(com|be)/"
+ r"(watch\?.*?(?=v=)v=|embed/|v/|.+\?v=)?([^&=%\?]{11})"
)
-api = Blueprint('api', __name__)
+api = Blueprint("api", __name__)
@api.errorhandler(Exception)
@@ -66,18 +68,20 @@ def all_exception_handler(e):
@api.before_request
def check_logged_in():
"""Error if a user is not logged in."""
- if 'username' not in session and \
- request.headers.get('X-V2C-Session-Bypass') != session_key:
- return error_json('Are you logged in?')
+ if (
+ "username" not in session
+ and request.headers.get("X-V2C-Session-Bypass") != session_key
+ ):
+ return error_json("Are you logged in?")
@api.before_request
def csrf_protect():
"""For POSTs, require CSRF token."""
if request.method == "POST":
- token = session.get('_csrf_token')
- if not token or token != request.form.get('_csrf_token'):
- return error_json('Invalid CSRF token. Try reloading this page.')
+ token = session.get("_csrf_token")
+ if not token or token != request.form.get("_csrf_token"):
+ return error_json("Invalid CSRF token. Try reloading this page.")
def format_exception(e):
@@ -87,7 +91,7 @@ def format_exception(e):
if isinstance(e, AssertionError):
return desc
else:
- return f'An exception occurred: {type(e).__name__}: {desc}'
+ return f"An exception occurred: {type(e).__name__}: {desc}"
def error_json(e):
@@ -95,36 +99,28 @@ def error_json(e):
session.rollback()
if isinstance(e, BaseException):
return jsonify(
- step='error',
- error=format_exception(e),
- traceback=traceback.format_exc()
+ step="error", error=format_exception(e), traceback=traceback.format_exc()
)
else:
- return jsonify(
- step='error',
- error=e,
- traceback=None
- )
+ return jsonify(step="error", error=e, traceback=None)
-@api.route('/csrf')
+@api.route("/csrf")
def get_csrf():
"""Get the CSRF token for API-only access."""
- return jsonify(
- csrf=generate_csrf_token()
- )
+ return jsonify(csrf=generate_csrf_token())
-@api.route('/iosession')
+@api.route("/iosession")
def get_iosession():
"""Get a pointer to session for read-only socket.io notifications."""
iosession = str(uuid4())
- redisconnection.set('iosession:' + iosession, session.sid)
- redisconnection.expire('iosession:' + iosession, 60)
+ redisconnection.set("iosession:" + iosession, session.sid)
+ redisconnection.expire("iosession:" + iosession, 60)
return jsonify(iosession=iosession)
-@api.route('/status')
+@api.route("/status")
def status():
"""Get all visible task status for user."""
key, ids = get_tasks()
@@ -133,21 +129,16 @@ def status():
values.append(_status(id))
values = [_f for _f in values if _f]
- rooms = [t['id'] for t in values] + [key]
+ rooms = [t["id"] for t in values] + [key]
return jsonify(
- values=values,
- rooms=rooms,
- username=session['username'],
- stats=get_stats()
+ values=values, rooms=rooms, username=session["username"], stats=get_stats()
)
-@api.route('/status-single')
+@api.route("/status-single")
def status_single():
"""Get the status of one task."""
- return jsonify(
- value=_status(request.args['task'])
- )
+ return jsonify(value=_status(request.args["task"]))
def _status(id):
@@ -157,216 +148,211 @@ def _status(id):
return None
res = worker.main.AsyncResult(id)
- task = {
- 'id': id,
- 'title': title,
- 'hostname': get_hostname_from_task(id)
- }
+ task = {"id": id, "title": title, "hostname": get_hostname_from_task(id)}
try:
state = res.state
except:
- task.update({
- 'status': 'fail',
- 'text': 'The status of the task could not be retrieved.',
- 'traceback': traceback.format_exc()
- })
+ task.update(
+ {
+ "status": "fail",
+ "text": "The status of the task could not be retrieved.",
+ "traceback": traceback.format_exc(),
+ }
+ )
else:
- if state == 'PENDING':
- task.update({
- 'status': 'progress',
- 'text': 'Your task is pending...',
- 'progress': -1
- })
- elif state == 'PROGRESS':
- task.update({
- 'status': 'progress',
- 'text': res.result['text'],
- 'progress': res.result['percent']
- })
- elif state == 'SUCCESS':
+ if state == "PENDING":
+ task.update(
+ {
+ "status": "progress",
+ "text": "Your task is pending...",
+ "progress": -1,
+ }
+ )
+ elif state == "PROGRESS":
+ task.update(
+ {
+ "status": "progress",
+ "text": res.result["text"],
+ "progress": res.result["percent"],
+ }
+ )
+ elif state == "SUCCESS":
if isinstance(res.result, (list, tuple)):
filename, wikifileurl = res.result
- task.update({
- 'status': 'done',
- 'url': wikifileurl,
- 'text': filename
- })
+ task.update({"status": "done", "url": wikifileurl, "text": filename})
elif isinstance(res.result, dict):
- if res.result['type'] == 'done':
- task.update({
- 'status': 'done',
- 'url': res.result['url'],
- 'text': res.result['filename']
- })
- elif res.result['type'] == 'ssu':
- task.update({
- 'status': 'needssu',
- 'filename': res.result['url'].rsplit('/', 1)[-1],
- 'url': res.result['url'],
- 'hashsum': res.result['hashsum']
- })
- elif state == 'FAILURE':
+ if res.result["type"] == "done":
+ task.update(
+ {
+ "status": "done",
+ "url": res.result["url"],
+ "text": res.result["filename"],
+ }
+ )
+ elif res.result["type"] == "ssu":
+ task.update(
+ {
+ "status": "needssu",
+ "filename": res.result["url"].rsplit("/", 1)[-1],
+ "url": res.result["url"],
+ "hashsum": res.result["hashsum"],
+ }
+ )
+ elif state == "FAILURE":
e = res.result
if e is False:
- task.update({
- 'status': 'fail',
- 'text': res.traceback,
- 'restartable': True
- })
+ task.update(
+ {"status": "fail", "text": res.traceback, "restartable": True}
+ )
else:
- task.update({
- 'status': 'fail',
- 'text': format_exception(e),
- 'restartable': (
- (not redisconnection.exists('restarted:' + id)) and
- redisconnection.exists('params:' + id)
- )
- })
- elif state == 'RETRY':
- task.update({
- 'status': 'progress',
- 'text': 'Your task is being rescheduled...',
- 'progress': -1
- })
- elif state == 'ABORTED':
- task.update({
- 'status': 'abort',
- 'text': 'Your task is being aborted...'
- })
+ task.update(
+ {
+ "status": "fail",
+ "text": format_exception(e),
+ "restartable": (
+ (not redisconnection.exists("restarted:" + id))
+ and redisconnection.exists("params:" + id)
+ ),
+ }
+ )
+ elif state == "RETRY":
+ task.update(
+ {
+ "status": "progress",
+ "text": "Your task is being rescheduled...",
+ "progress": -1,
+ }
+ )
+ elif state == "ABORTED":
+ task.update({"status": "abort", "text": "Your task is being aborted..."})
else:
- task.update({
- 'status': 'fail',
- 'text': (
- 'This task is in an unknown state. Please file an issue '
- 'in GitHub: '
- ),
- 'url': 'https://github.com/toolforge/video2commons/issues'
- })
+ task.update(
+ {
+ "status": "fail",
+ "text": (
+ "This task is in an unknown state. Please file an issue "
+ "in GitHub: "
+ ),
+ "url": "https://github.com/toolforge/video2commons/issues",
+ }
+ )
return task
def is_sudoer(username):
"""Check if a user is a sudoer."""
- return username in redisconnection.lrange('sudoers', 0, -1)
+ return username in redisconnection.lrange("sudoers", 0, -1)
def get_tasks():
"""Get a list of visible tasks for user."""
# sudoer = able to monitor all tasks
- username = session['username']
- if session.get('is_maintainer'):
- key = 'alltasks'
+ username = session["username"]
+ if session.get("is_maintainer"):
+ key = "alltasks"
else:
- key = 'tasks:' + username
+ key = "tasks:" + username
return key, redisconnection.lrange(key, 0, -1)[::-1]
def get_stats():
"""Get worker stats from Redis."""
- stats = redisconnection.get('stats')
+ stats = redisconnection.get("stats")
return json.loads(stats) if stats else None
def get_title_from_task(id):
"""Get task title from task ID."""
- return redisconnection.get('titles:' + id)
+ return redisconnection.get("titles:" + id)
def get_hostname_from_task(id):
"""Get the hostname of the worker processing a task from task ID."""
- hostname = redisconnection.get('tasklock:' + id)
+ hostname = redisconnection.get("tasklock:" + id)
# Old tasks don't have a hostname as the value in tasklock and store the
# literal 'T' instead. Reinterpret these values as null.
- if hostname == 'T':
+ if hostname == "T":
hostname = None
return hostname
-@api.route('/extracturl', methods=['POST'])
+@api.route("/extracturl", methods=["POST"])
def extract_url():
"""Extract a video url."""
- url = request.form['url']
+ url = request.form["url"]
return jsonify(**do_extract_url(url))
-@api.route('/makedesc', methods=['POST'])
+@api.route("/makedesc", methods=["POST"])
def make_desc():
"""Create a (mostly-empty) description."""
- filename = request.form['filename']
+ filename = request.form["filename"]
return jsonify(**make_dummy_desc(filename))
-@api.route('/listformats', methods=['POST'])
+@api.route("/listformats", methods=["POST"])
def list_formats():
"""List the possible convert formats from a given audio/video pair."""
formats = []
- prefer = ''
- video = _boolize(request.form['video'])
- audio = _boolize(request.form['audio'])
+ prefer = ""
+ video = _boolize(request.form["video"])
+ audio = _boolize(request.form["audio"])
if video:
if audio:
- formats = ['ogv (Theora/Vorbis)', 'webm (VP8/Vorbis)',
- 'webm (VP9/Opus)', 'webm (AV1/Opus)']
- prefer = 'webm (AV1/Opus)'
+ formats = [
+ "ogv (Theora/Vorbis)",
+ "webm (VP8/Vorbis)",
+ "webm (VP9/Opus)",
+ "webm (AV1/Opus)",
+ ]
+ prefer = "webm (AV1/Opus)"
else:
- formats = ['ogv (Theora)', 'webm (VP8)',
- 'webm (VP9)', 'webm (AV1)']
- prefer = 'webm (AV1)'
+ formats = ["ogv (Theora)", "webm (VP8)", "webm (VP9)", "webm (AV1)"]
+ prefer = "webm (AV1)"
else:
if audio:
- formats = ['ogg (Vorbis)', 'opus (Opus)']
- prefer = 'ogg (Vorbis)'
+ formats = ["ogg (Vorbis)", "opus (Opus)"]
+ prefer = "ogg (Vorbis)"
else:
- raise RuntimeError('Either video or audio must be kept')
+ raise RuntimeError("Either video or audio must be kept")
- return jsonify(
- audio=audio,
- video=video,
- format=prefer,
- formats=formats
- )
+ return jsonify(audio=audio, video=video, format=prefer, formats=formats)
def _boolize(data):
- return data in [True, 'true', 'TRUE', 'True', 1, '1']
+ return data in [True, "true", "TRUE", "True", 1, "1"]
-@api.route('/validatefilename', methods=['POST'])
+@api.route("/validatefilename", methods=["POST"])
def validate_filename():
"""Validate filename for invalid characters/parts."""
- return jsonify(
- filename=do_validate_filename(request.form['filename'])
- )
+ return jsonify(filename=do_validate_filename(request.form["filename"]))
-@api.route('/validatefiledesc', methods=['POST'])
+@api.route("/validatefiledesc", methods=["POST"])
def validate_filedesc():
"""Validate filename for invalid characters/parts."""
- return jsonify(
- filedesc=do_validate_filedesc(request.form['filedesc'])
- )
+ return jsonify(filedesc=do_validate_filedesc(request.form["filedesc"]))
-@api.route('/validatefilenameunique', methods=['POST'])
+@api.route("/validatefilenameunique", methods=["POST"])
def validate_filename_unique():
"""Validate filename isn't already in use on the wiki."""
- return jsonify(
- filename=do_validate_filename_unique(request.form['filename'])
- )
+ return jsonify(filename=do_validate_filename_unique(request.form["filename"]))
-@api.route('/validateurl', methods=['POST'])
+@api.route("/validateurl", methods=["POST"])
def validate_url():
"""Validate that a video belonging to a URL is not already on the wiki."""
- url = request.form['url']
+ url = request.form["url"]
# Check if the URL is a YouTube URL, and if so, extract the ID and validate
# that it doesn't already exist on Commons.
@@ -377,8 +363,7 @@ def validate_url():
return jsonify(entity_url=do_validate_youtube_id(youtube_id))
except Exception as e:
current_app.logger.error(
- f'Error validating YouTube URL "{url}": {e}\n\n'
- f'{traceback.format_exc()}'
+ f'Error validating YouTube URL "{url}": {e}\n\n{traceback.format_exc()}'
)
# Skip validation if errors are encountered, e.g. SPARQL is down.
@@ -392,170 +377,170 @@ def get_backend_keys(format):
MAXSIZE = 5 << 30
COMBINED_FMT = (
- 'bestvideo[filesize<{max}]+'
- 'bestaudio[acodec={{acodec}}]/'
- 'bestvideo[filesize<{max}]+'
- 'bestaudio[ext={{aext}}]/'
- 'bestvideo+bestaudio/best'
- ).format(max=MAXSIZE)
- VIDEO_FMT = (
- 'bestvideo[filesize<{max}]/'
- 'bestvideo/best'
+ "bestvideo[filesize<{max}]+"
+ "bestaudio[acodec={{acodec}}]/"
+ "bestvideo[filesize<{max}]+"
+ "bestaudio[ext={{aext}}]/"
+ "bestvideo+bestaudio/best"
).format(max=MAXSIZE)
+ VIDEO_FMT = ("bestvideo[filesize<{max}]/bestvideo/best").format(max=MAXSIZE)
AUDIO_FMT = (
- 'bestaudio[acodec={{acodec}}]/'
- 'bestaudio[ext={{aext}}]/'
- 'bestaudio/best'
- ).format(max=MAXSIZE)
+ "bestaudio[acodec={{acodec}}]/bestaudio[ext={{aext}}]/bestaudio/best"
+ ).format()
return {
- 'ogv (Theora)':
- (VIDEO_FMT.format(vcodec='theora', vext='ogv'), 'an.ogv'),
- 'webm (VP8)':
- (VIDEO_FMT.format(vcodec='vp8', vext='webm'), 'an.webm'),
- 'webm (VP9)':
- (VIDEO_FMT.format(vcodec='vp9', vext='webm'), 'an.vp9.webm'),
- 'webm (AV1)':
- (VIDEO_FMT.format(vcodec='av1', vext='webm'), 'an.av1.webm'),
- 'ogg (Vorbis)':
- (AUDIO_FMT.format(acodec='vorbis', aext='ogg'), 'ogg'),
- 'opus (Opus)':
- (AUDIO_FMT.format(acodec='opus', aext='opus'), 'opus'),
- 'ogv (Theora/Vorbis)':
- (COMBINED_FMT.format(
- vcodec='theora', vext='ogv', acodec='vorbis', aext='ogg'),
- 'ogv'),
- 'webm (VP8/Vorbis)':
- (COMBINED_FMT.format(
- vcodec='vp8', vext='webm', acodec='vorbis', aext='ogg'),
- 'webm'),
- 'webm (VP9/Opus)':
- (COMBINED_FMT.format(
- vcodec='vp9', vext='webm', acodec='opus', aext='webm'),
- 'vp9.webm'),
- 'webm (AV1/Opus)':
- (COMBINED_FMT.format(
- vcodec='av1', vext='webm', acodec='opus', aext='webm'),
- 'av1.webm'),
+ "ogv (Theora)": (VIDEO_FMT.format(vcodec="theora", vext="ogv"), "an.ogv"),
+ "webm (VP8)": (VIDEO_FMT.format(vcodec="vp8", vext="webm"), "an.webm"),
+ "webm (VP9)": (VIDEO_FMT.format(vcodec="vp9", vext="webm"), "an.vp9.webm"),
+ "webm (AV1)": (VIDEO_FMT.format(vcodec="av1", vext="webm"), "an.av1.webm"),
+ "ogg (Vorbis)": (AUDIO_FMT.format(acodec="vorbis", aext="ogg"), "ogg"),
+ "opus (Opus)": (AUDIO_FMT.format(acodec="opus", aext="opus"), "opus"),
+ "ogv (Theora/Vorbis)": (
+ COMBINED_FMT.format(
+ vcodec="theora", vext="ogv", acodec="vorbis", aext="ogg"
+ ),
+ "ogv",
+ ),
+ "webm (VP8/Vorbis)": (
+ COMBINED_FMT.format(vcodec="vp8", vext="webm", acodec="vorbis", aext="ogg"),
+ "webm",
+ ),
+ "webm (VP9/Opus)": (
+ COMBINED_FMT.format(vcodec="vp9", vext="webm", acodec="opus", aext="webm"),
+ "vp9.webm",
+ ),
+ "webm (AV1/Opus)": (
+ COMBINED_FMT.format(vcodec="av1", vext="webm", acodec="opus", aext="webm"),
+ "av1.webm",
+ ),
}[format]
-@api.route('/task/run', methods=['POST'])
+@api.route("/task/run", methods=["POST"])
def run_task():
"""Run a task with parameters from session."""
- url = request.form['url']
- ie_key = request.form['extractor']
- subtitles = request.form['subtitles']
- filename = sanitize(request.form['filename'])
- filedesc = request.form['filedesc']
- downloadkey, convertkey = get_backend_keys(request.form['format'])
- username = session['username']
- oauth = (session['access_token_key'], session['access_token_secret'])
-
- taskid = run_task_internal(filename, (
- url, ie_key, subtitles, filename, filedesc,
- downloadkey, convertkey, username, oauth
- ))
+ url = request.form["url"]
+ ie_key = request.form["extractor"]
+ subtitles = request.form["subtitles"]
+ filename = sanitize(request.form["filename"])
+ filedesc = request.form["filedesc"]
+ downloadkey, convertkey = get_backend_keys(request.form["format"])
+ username = session["username"]
+ oauth = (session["access_token_key"], session["access_token_secret"])
+
+ taskid = run_task_internal(
+ filename,
+ (
+ url,
+ ie_key,
+ subtitles,
+ filename,
+ filedesc,
+ downloadkey,
+ convertkey,
+ username,
+ oauth,
+ ),
+ )
- return jsonify(id=taskid, step='success')
+ return jsonify(id=taskid, step="success")
def run_task_internal(filename, params):
"""Internal run task function to accept whatever params given."""
banned = check_banned()
- assert not banned, 'You are banned from using this tool! Reason: ' + banned
+ assert not banned, "You are banned from using this tool! Reason: " + banned
res = worker.main.delay(*params)
taskid = res.id
expire = 14 * 24 * 3600 # 2 weeks
- redisconnection.lpush('alltasks', taskid)
- redisconnection.expire('alltasks', expire)
- redisconnection.lpush('tasks:' + session['username'], taskid)
- redisconnection.expire('tasks:' + session['username'], expire)
- redisconnection.set('titles:' + taskid, filename)
- redisconnection.expire('titles:' + taskid, expire)
- redisconnection.set('params:' + taskid, json.dumps(params))
- redisconnection.expire('params:' + taskid, expire)
+ redisconnection.lpush("alltasks", taskid)
+ redisconnection.expire("alltasks", expire)
+ redisconnection.lpush("tasks:" + session["username"], taskid)
+ redisconnection.expire("tasks:" + session["username"], expire)
+ redisconnection.set("titles:" + taskid, filename)
+ redisconnection.expire("titles:" + taskid, expire)
+ redisconnection.set("params:" + taskid, json.dumps(params))
+ redisconnection.expire("params:" + taskid, expire)
try:
stats.increment_queue_counter(redisconnection)
except Exception:
pass # We don't want to fail the API call if we can't update stats.
- redis_publish('add', {'taskid': taskid, 'user': session['username']})
- redis_publish('update', {'taskid': taskid, 'data': _status(taskid)})
+ redis_publish("add", {"taskid": taskid, "user": session["username"]})
+ redis_publish("update", {"taskid": taskid, "data": _status(taskid)})
return taskid
-@api.route('/task/restart', methods=['POST'])
+@api.route("/task/restart", methods=["POST"])
def restart_task():
"""Reastart a task: run a task with params of another task."""
- id = request.form['id']
+ id = request.form["id"]
- filename = redisconnection.get('titles:' + id)
- assert filename, 'Task does not exist'
- if not session.get('is_maintainer'):
- assert id in \
- redisconnection.lrange('tasks:' + session['username'], 0, -1), \
- 'Task must belong to you.'
+ filename = redisconnection.get("titles:" + id)
+ assert filename, "Task does not exist"
+ if not session.get("is_maintainer"):
+ assert id in redisconnection.lrange("tasks:" + session["username"], 0, -1), (
+ "Task must belong to you."
+ )
- restarted = redisconnection.get('restarted:' + id)
- assert not restarted, \
- 'Task has already been restarted with id ' + restarted
- params = redisconnection.get('params:' + id)
- assert params, 'Could not extract the task parameters.'
+ restarted = redisconnection.get("restarted:" + id)
+ assert not restarted, "Task has already been restarted with id " + restarted
+ params = redisconnection.get("params:" + id)
+ assert params, "Could not extract the task parameters."
newid = run_task_internal(filename, json.loads(params))
- redisconnection.set('restarted:' + id, newid)
+ redisconnection.set("restarted:" + id, newid)
- redis_publish('update', {'taskid': id, 'data': _status(id)})
+ redis_publish("update", {"taskid": id, "data": _status(id)})
- return jsonify(restart='success', id=id, taskid=newid)
+ return jsonify(restart="success", id=id, taskid=newid)
-@api.route('/task/remove', methods=['POST'])
+@api.route("/task/remove", methods=["POST"])
def remove_task():
"""Revove a task from list of tasks."""
- id = request.form['id']
- username = session['username']
- if not session.get('is_maintainer'):
- assert id in \
- redisconnection.lrange('tasks:' + username, 0, -1), \
- 'Task must belong to you.'
- redisconnection.lrem('alltasks', 0, id)
- redisconnection.lrem('tasks:' + username, 0, id)
- redisconnection.delete('titles:' + id)
- redisconnection.delete('params:' + id)
- redisconnection.delete('restarted:' + id)
+ id = request.form["id"]
+ username = session["username"]
+ if not session.get("is_maintainer"):
+ assert id in redisconnection.lrange("tasks:" + username, 0, -1), (
+ "Task must belong to you."
+ )
+ redisconnection.lrem("alltasks", 0, id)
+ redisconnection.lrem("tasks:" + username, 0, id)
+ redisconnection.delete("titles:" + id)
+ redisconnection.delete("params:" + id)
+ redisconnection.delete("restarted:" + id)
- redis_publish('remove', {'taskid': id})
+ redis_publish("remove", {"taskid": id})
- return jsonify(remove='success', id=id)
+ return jsonify(remove="success", id=id)
-@api.route('/task/abort', methods=['POST'])
+@api.route("/task/abort", methods=["POST"])
def abort_task():
"""Abort a task."""
- id = request.form['id']
- username = session['username']
- if not session.get('is_maintainer'):
- assert id in \
- redisconnection.lrange('tasks:' + username, 0, -1), \
- 'Task must belong to you.'
+ id = request.form["id"]
+ username = session["username"]
+ if not session.get("is_maintainer"):
+ assert id in redisconnection.lrange("tasks:" + username, 0, -1), (
+ "Task must belong to you."
+ )
worker.main.AsyncResult(id).abort()
- redis_publish('update', {'taskid': id, 'data': _status(id)})
+ redis_publish("update", {"taskid": id, "data": _status(id)})
- return jsonify(remove='success', id=id)
+ return jsonify(remove="success", id=id)
# No nested blueprints in flask; we have to do this :(
-@api.route('/upload/upload', methods=['POST'])
+@api.route("/upload/upload", methods=["POST"])
def upload():
return _upload()
-@api.route('/upload/status', methods=['POST'])
+@api.route("/upload/status", methods=["POST"])
def uploadstatus():
return _uploadstatus()
diff --git a/video2commons/frontend/app.py b/video2commons/frontend/app.py
index e32ef38..2986685 100644
--- a/video2commons/frontend/app.py
+++ b/video2commons/frontend/app.py
@@ -19,32 +19,35 @@
"""video2commons web frontend."""
-
-
import json
import logging
import traceback
from urllib.parse import urlparse, urljoin
-from flask import (
- Flask, request, Response, session, render_template, redirect, url_for
-)
+from flask import Flask, request, Response, session, render_template, redirect, url_for
from mwoauth import AccessToken, ConsumerToken, RequestToken, Handshaker
from requests_oauthlib import OAuth1
import requests
from video2commons.config import (
- consumer_key, consumer_secret, api_url, webfrontend_uri, socketio_uri
+ consumer_key,
+ consumer_secret,
+ api_url,
+ webfrontend_uri,
+ socketio_uri,
)
from video2commons.frontend.redisession import RedisSessionInterface
from video2commons.frontend.shared import redisconnection, check_banned
from video2commons.frontend.api import api, is_sudoer
from video2commons.frontend.i18n import (
- i18nblueprint, translate as _, getlanguage, is_rtl
+ i18nblueprint,
+ translate as _,
+ getlanguage,
+ is_rtl,
)
-ISSUE_URL = 'https://github.com/toolforge/video2commons/issues'
+ISSUE_URL = "https://github.com/toolforge/video2commons/issues"
consumer_token = ConsumerToken(consumer_key, consumer_secret)
handshaker = Handshaker(api_url, consumer_token)
@@ -53,23 +56,23 @@
app.logger.setLevel(logging.INFO)
-app.session_cookie_name = 'v2c-session'
+app.session_cookie_name = "v2c-session"
app.session_interface = RedisSessionInterface(redisconnection)
-app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 3600
+app.config["SEND_FILE_MAX_AGE_DEFAULT"] = 3600
config_p = {
- 'webfrontend_uri': webfrontend_uri,
- 'socketio_uri': socketio_uri,
+ "webfrontend_uri": webfrontend_uri,
+ "socketio_uri": socketio_uri,
}
-app.jinja_env.globals['config'] = config_p
-app.jinja_env.globals['_'] = _
-app.jinja_env.globals['lang'] = getlanguage
-app.jinja_env.tests['rtl'] = is_rtl
+app.jinja_env.globals["config"] = config_p
+app.jinja_env.globals["_"] = _
+app.jinja_env.globals["lang"] = getlanguage
+app.jinja_env.tests["rtl"] = is_rtl
-app.register_blueprint(api, url_prefix='/api')
-app.register_blueprint(i18nblueprint, url_prefix='/i18n')
+app.register_blueprint(api, url_prefix="/api")
+app.register_blueprint(i18nblueprint, url_prefix="/i18n")
@app.errorhandler(Exception)
@@ -80,25 +83,24 @@ def all_exception_handler(e):
try:
message = (
- f'Please file an issue with this error in GitHub: '
- f'{issue_link}
'
+ f"Please file an issue with this error in GitHub: {issue_link}
"
)
- loggedin = 'username' in session
+ loggedin = "username" in session
stacktrace = traceback.format_exc()
except:
message = (
- f'Something went terribly wrong, '
- f'and we failed to find the cause automatically. '
- f'Please file an issue in GitHub: {issue_link}'
+ f"Something went terribly wrong, "
+ f"and we failed to find the cause automatically. "
+ f"Please file an issue in GitHub: {issue_link}"
)
loggedin = False
try:
return render_template(
- 'error.min.html',
+ "error.min.html",
html_message=message,
stacktrace=stacktrace,
- loggedin=loggedin
+ loggedin=loggedin,
), 500
except:
return message, 500
@@ -107,65 +109,56 @@ def all_exception_handler(e):
@app.before_request
def force_https():
"""Force user to redirect to https, checking X-Forwarded-Proto."""
- if request.headers.get('X-Forwarded-Proto') == 'http':
- return redirect('https://' + request.headers['Host'] +
- request.headers['X-Original-URI'],
- code=301)
+ if request.headers.get("X-Forwarded-Proto") == "http":
+ return redirect(
+ "https://" + request.headers["Host"] + request.headers["X-Original-URI"],
+ code=301,
+ )
-@app.route('/config')
+@app.route("/config")
def get_config():
"""Get the current config as a dict and output Javascript."""
- data = 'window.config=' + json.dumps(config_p) + ';'
- return Response(data, mimetype='application/javascript; charset=utf-8')
+ data = "window.config=" + json.dumps(config_p) + ";"
+ return Response(data, mimetype="application/javascript; charset=utf-8")
-@app.route('/')
+@app.route("/")
def main():
"""Main page."""
banned = check_banned()
if banned:
return render_template(
- 'error.min.html',
- message='You are banned from using this tool! Reason: ' + banned,
- loggedin=False
+ "error.min.html",
+ message="You are banned from using this tool! Reason: " + banned,
+ loggedin=False,
)
try:
auth = dologin()
- session['language'] = querylanguage(auth)
+ session["language"] = querylanguage(auth)
except:
# SECURITY: If we cannot login, the session is invalid.
app.session_interface.abandon_session(app, session)
- return render_template(
- 'main.min.html',
- loggedin=False
- )
+ return render_template("main.min.html", loggedin=False)
- return render_template(
- 'main.min.html',
- loggedin=True
- )
+ return render_template("main.min.html", loggedin=True)
def dologin():
"""Attempt to login."""
- if not (
- 'access_token_key' in session and
- 'access_token_secret' in session
- ):
+ if not ("access_token_key" in session and "access_token_secret" in session):
raise NameError("No access keys")
access_token = AccessToken(
- session['access_token_key'],
- session['access_token_secret']
+ session["access_token_key"], session["access_token_secret"]
)
- session['username'] = handshaker.identify(access_token)['username']
+ session["username"] = handshaker.identify(access_token)["username"]
auth = OAuth1(
client_key=consumer_token.key,
client_secret=consumer_token.secret,
resource_owner_key=access_token.key,
- resource_owner_secret=access_token.secret
+ resource_owner_secret=access_token.secret,
)
return auth
@@ -173,21 +166,21 @@ def dologin():
def querylanguage(auth):
"""Query user's language that's available on v2c."""
- default = 'en'
+ default = "en"
r = requests.post(
- url=api_url.replace('index.php', 'api.php'),
+ url=api_url.replace("index.php", "api.php"),
data={
- 'action': 'query',
- 'format': 'json',
- 'meta': 'userinfo',
- 'uiprop': 'options'
+ "action": "query",
+ "format": "json",
+ "meta": "userinfo",
+ "uiprop": "options",
},
- auth=auth
+ auth=auth,
)
try:
- language = r.json()['query']['userinfo']['options']['language']
+ language = r.json()["query"]["userinfo"]["options"]["language"]
except (NameError, KeyError):
return default
@@ -197,71 +190,74 @@ def querylanguage(auth):
return language
-@app.route('/oauthinit')
+@app.route("/oauthinit")
def loginredirect():
"""Initialize OAuth login."""
app.session_interface.abandon_session(app, session)
redirecturl, request_token = handshaker.initiate()
- session['request_token_key'], session['request_token_secret'] = \
- request_token.key, request_token.secret
- session['return_to_url'] = url_for('main')
+ session["request_token_key"], session["request_token_secret"] = (
+ request_token.key,
+ request_token.secret,
+ )
+ session["return_to_url"] = url_for("main")
- returnto = request.args.get('returnto')
+ returnto = request.args.get("returnto")
if returnto:
ref_url = urlparse(request.url_root)
test_url = urlparse(urljoin(request.host_url, returnto))
if (
- test_url.scheme == ref_url.scheme and
- test_url.netloc == ref_url.netloc and
- test_url.path.startswith(ref_url.path)
+ test_url.scheme == ref_url.scheme
+ and test_url.netloc == ref_url.netloc
+ and test_url.path.startswith(ref_url.path)
):
- session['return_to_url'] = returnto
+ session["return_to_url"] = returnto
return redirect(redirecturl)
-@app.route('/oauthcallback')
+@app.route("/oauthcallback")
def logincallback():
"""Finialize OAuth login."""
request_token = RequestToken(
- session['request_token_key'],
- session['request_token_secret']
+ session["request_token_key"], session["request_token_secret"]
)
access_token = handshaker.complete(request_token, request.query_string)
- session.pop('access_token_key', None)
- session.pop('access_token_secret', None)
- session.pop('username', None)
+ session.pop("access_token_key", None)
+ session.pop("access_token_secret", None)
+ session.pop("username", None)
identify = handshaker.identify(access_token)
- is_contributor = identify['editcount'] >= 50
- is_maintainer = is_sudoer(identify['username'])
- is_autoconfirmed = 'autoconfirmed' in identify['rights']
+ is_contributor = identify["editcount"] >= 50
+ is_maintainer = is_sudoer(identify["username"])
+ is_autoconfirmed = "autoconfirmed" in identify["rights"]
# Only allow autoconfirmed users either with at least 50 edits or
# maintainer status to use this tool.
if not (is_autoconfirmed and (is_contributor or is_maintainer)):
return render_template(
- 'error.min.html',
- message='You must be an autoconfirmed Commons user '
- 'with at least 50 edits to use this tool.',
- loggedin=True
+ "error.min.html",
+ message="You must be an autoconfirmed Commons user "
+ "with at least 50 edits to use this tool.",
+ loggedin=True,
)
- session['access_token_key'], session['access_token_secret'] = \
- access_token.key, access_token.secret
+ session["access_token_key"], session["access_token_secret"] = (
+ access_token.key,
+ access_token.secret,
+ )
- session['username'] = identify['username']
- session['is_maintainer'] = is_maintainer
+ session["username"] = identify["username"]
+ session["is_maintainer"] = is_maintainer
- return redirect(session.get('return_to_url', url_for('main')))
+ return redirect(session.get("return_to_url", url_for("main")))
-@app.route('/logout')
+@app.route("/logout")
def logout():
"""Logout: clear all session data."""
session.clear()
- return redirect(url_for('main'))
+ return redirect(url_for("main"))
diff --git a/video2commons/frontend/i18n.py b/video2commons/frontend/i18n.py
index 52b96cb..8d5d733 100644
--- a/video2commons/frontend/i18n.py
+++ b/video2commons/frontend/i18n.py
@@ -19,15 +19,13 @@
"""video2commons web i18n module."""
-
-
import os
import json
from flask import Blueprint, Response, request, session, g
from video2commons.frontend.shared import redisconnection
-i18nblueprint = Blueprint('i18n', __name__)
+i18nblueprint = Blueprint("i18n", __name__)
_d = os.path.dirname(os.path.realpath(__file__))
@@ -39,16 +37,16 @@ def max_age(response):
return response
-@i18nblueprint.route('/')
+@i18nblueprint.route("/")
def urlget(lang):
"""Get the i18n of language lang and output Javascript."""
- data = 'window.i18n=' + json.dumps(get(lang)) + ';'
- return Response(data, mimetype='application/javascript; charset=utf-8')
+ data = "window.i18n=" + json.dumps(get(lang)) + ";"
+ return Response(data, mimetype="application/javascript; charset=utf-8")
def get(lang):
"""Get the i18n of language lang and output dict."""
- i18nkey = 'i18n:' + lang
+ i18nkey = "i18n:" + lang
gval = g.get(i18nkey, None)
if gval:
return gval
@@ -58,8 +56,8 @@ def get(lang):
data = {}
fallbacklist = _create_fallback(lang)
datafiles = _loadi18nfiles(fallbacklist)
- for key in datafiles['en']:
- if key == '@metadata':
+ for key in datafiles["en"]:
+ if key == "@metadata":
# @metadata is a dict not a string
continue
@@ -70,11 +68,11 @@ def get(lang):
# if the translation breaks due to double escaping,
# oh well, why are you hacking this tool?
# --XSS prevention
- data[key] = data[key].replace('<', '<')
- data[key] = data[key].replace('>', '>')
+ data[key] = data[key].replace("<", "<")
+ data[key] = data[key].replace(">", ">")
break
- data['@lang'] = lang
- data['@dir'] = _dir(lang)
+ data["@lang"] = lang
+ data["@dir"] = _dir(lang)
setattr(g, i18nkey, data)
redisconnection.setex(i18nkey, 60, json.dumps(data))
@@ -85,40 +83,40 @@ def _loadi18nfiles(fallbacklist):
datafiles = {}
for code in fallbacklist:
if code not in datafiles:
- path = _d + '/i18n/' + code + '.json'
+ path = _d + "/i18n/" + code + ".json"
if os.path.isfile(path):
- with open(path, 'r') as f:
+ with open(path, "r") as f:
datafiles[code] = json.load(f)
return datafiles
def _create_fallback(lang):
- fallbacks = _loadmetadatafile('fallbacks').get(lang, [])
+ fallbacks = _loadmetadatafile("fallbacks").get(lang, [])
fallbacks = fallbacks if isinstance(fallbacks, list) else [fallbacks]
- return [lang] + fallbacks + ['en']
+ return [lang] + fallbacks + ["en"]
def translate(key):
"""Translate a key in user language."""
- return get(getlanguage()).get(key, '<' + key + '>')
+ return get(getlanguage()).get(key, "<" + key + ">")
def getlanguage():
"""Get the user language."""
- gval = g.get('language', None)
+ gval = g.get("language", None)
if gval:
return gval
for lang in [
- request.form.get('uselang'),
- request.args.get('uselang'),
- session.get('language'),
+ request.form.get("uselang"),
+ request.args.get("uselang"),
+ session.get("language"),
request.accept_languages.best,
]:
if lang and _islang(lang):
break
else:
- lang = 'en'
+ lang = "en"
g.language = lang
@@ -126,13 +124,13 @@ def getlanguage():
def _loadmetadatafile(metadata):
- key = 'i18nmeta-' + metadata
+ key = "i18nmeta-" + metadata
gval = g.get(key, None)
if gval:
return gval
- path = _d + '/i18n-metadata/' + metadata + '.json'
- with open(path, 'r') as f:
+ path = _d + "/i18n-metadata/" + metadata + ".json"
+ with open(path, "r") as f:
data = json.load(f)
setattr(g, key, data)
@@ -140,13 +138,13 @@ def _loadmetadatafile(metadata):
def _islang(lang):
- return lang in _loadmetadatafile('alllangs')
+ return lang in _loadmetadatafile("alllangs")
def _dir(lang):
- return 'rtl' if lang in _loadmetadatafile('rtl') else 'ltr'
+ return "rtl" if lang in _loadmetadatafile("rtl") else "ltr"
def is_rtl(lang):
"""Jinja2 test for rtl-ness."""
- return get(lang).get('@dir') == 'rtl'
+ return get(lang).get("@dir") == "rtl"
diff --git a/video2commons/frontend/redisession.py b/video2commons/frontend/redisession.py
index 4ab899b..7fdbda8 100644
--- a/video2commons/frontend/redisession.py
+++ b/video2commons/frontend/redisession.py
@@ -44,7 +44,7 @@ class RedisSessionInterface(SessionInterface):
serializer = json
session_class = RedisSession
- def __init__(self, redis=None, prefix='session:'):
+ def __init__(self, redis=None, prefix="session:"):
"""Initialize the instance."""
if redis is None:
redis = Redis()
@@ -83,28 +83,37 @@ def open_session(self, app, request):
def save_session(self, app, session, response):
"""Save session to Redis."""
domain = self.get_cookie_domain(app)
- path = url_for('main', _external=False)
+ path = url_for("main", _external=False)
if session is None:
return
elif not session:
self.redis.delete(self.prefix + session.sid)
if session.modified:
- response.delete_cookie(app.session_cookie_name,
- domain=domain, path=path)
+ response.delete_cookie(
+ app.session_cookie_name, domain=domain, path=path
+ )
else:
redis_exp = self.get_redis_expiration_time(app, session)
cookie_exp = self.get_expiration_time(app, session)
if session.modified:
val = self.serializer.dumps(dict(session))
- self.redis.setex(self.prefix + session.sid,
- int(redis_exp.total_seconds()), val)
+ self.redis.setex(
+ self.prefix + session.sid, int(redis_exp.total_seconds()), val
+ )
else:
- self.redis.expire(self.prefix + session.sid,
- int(redis_exp.total_seconds()))
- response.set_cookie(app.session_cookie_name, session.sid,
- expires=cookie_exp, httponly=True,
- domain=domain, path=path, secure=True)
+ self.redis.expire(
+ self.prefix + session.sid, int(redis_exp.total_seconds())
+ )
+ response.set_cookie(
+ app.session_cookie_name,
+ session.sid,
+ expires=cookie_exp,
+ httponly=True,
+ domain=domain,
+ path=path,
+ secure=True,
+ )
def abandon_session(self, app, session):
"""Delete the session from redis, empty it, and reinit."""
diff --git a/video2commons/frontend/shared.py b/video2commons/frontend/shared.py
index 4400db5..3cfe5fd 100644
--- a/video2commons/frontend/shared.py
+++ b/video2commons/frontend/shared.py
@@ -19,8 +19,6 @@
"""video2commons web shared."""
-
-
import json
from uuid import uuid4
@@ -29,8 +27,7 @@
from video2commons.config import redis_pw, redis_host
-redisconnection = Redis(host=redis_host, db=3, password=redis_pw,
- decode_responses=True)
+redisconnection = Redis(host=redis_host, db=3, password=redis_pw, decode_responses=True)
def check_banned():
@@ -40,10 +37,10 @@ def check_banned():
def generate_csrf_token():
"""Generate a CSRF token."""
- if '_csrf_token' not in session:
- session['_csrf_token'] = str(uuid4())
- return session['_csrf_token']
+ if "_csrf_token" not in session:
+ session["_csrf_token"] = str(uuid4())
+ return session["_csrf_token"]
def redis_publish(typ, data):
- redisconnection.publish('v2cnotif:'+typ, json.dumps(data))
+ redisconnection.publish("v2cnotif:" + typ, json.dumps(data))
diff --git a/video2commons/frontend/upload.py b/video2commons/frontend/upload.py
index a51ae8c..9bb92f2 100644
--- a/video2commons/frontend/upload.py
+++ b/video2commons/frontend/upload.py
@@ -18,7 +18,6 @@
#
-
import os
import re
import uuid
@@ -26,8 +25,8 @@
from flask import request, jsonify
-RE_CONTENT_RANGE = re.compile(r'^bytes (\d+)-(\d+)/(\d+)$')
-RE_ALLOWED_FILEKEYS = re.compile(r'^[a-zA-Z0-9-]+$')
+RE_CONTENT_RANGE = re.compile(r"^bytes (\d+)-(\d+)/(\d+)$")
+RE_ALLOWED_FILEKEYS = re.compile(r"^[a-zA-Z0-9-]+$")
class WrongOffset(Exception):
@@ -37,8 +36,9 @@ def __init__(self, offset):
def getpath(digest):
- return os.path.join(os.path.dirname(os.path.realpath(__file__)),
- 'static/uploads', digest)
+ return os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), "static/uploads", digest
+ )
def stat(permpath):
@@ -47,42 +47,43 @@ def stat(permpath):
# Flask endpoint
def upload():
- f = request.files['file']
+ f = request.files["file"]
assert f, "Where's my file?"
- filekey = request.form.get('filekey') or str(uuid.uuid1())
- assert RE_ALLOWED_FILEKEYS.match('filekey'), 'Unacceptable file key'
+ filekey = request.form.get("filekey") or str(uuid.uuid1())
+ assert RE_ALLOWED_FILEKEYS.match("filekey"), "Unacceptable file key"
permpath = getpath(filekey)
- content_range = (f.headers.get('Content-Range') or
- request.headers.get('Content-Range'))
+ content_range = f.headers.get("Content-Range") or request.headers.get(
+ "Content-Range"
+ )
if content_range:
result, kwargs = handle_chunked(f, permpath, content_range)
else:
result, kwargs = handle_full(f, permpath)
- kwargs['filekey'] = filekey
+ kwargs["filekey"] = filekey
return jsonify(result=result, **kwargs)
# Flask endpoint
def status():
- permpath = getpath(request.form['filekey'])
+ permpath = getpath(request.form["filekey"])
return jsonify(offset=stat(permpath))
def handle_full(f, permpath):
f.save(permpath)
- return 'Success', {}
+ return "Success", {}
def handle_chunked(f, permpath, content_range):
try:
content_range = RE_CONTENT_RANGE.match(content_range)
- assert content_range, 'Invalid content range!'
+ assert content_range, "Invalid content range!"
cr1, cr2, cr3 = [int(content_range.group(i)) for i in range(1, 4)]
@@ -94,7 +95,7 @@ def handle_chunked(f, permpath, content_range):
if size != cr1:
raise WrongOffset(size)
- with open(permpath, 'ab') as dest:
+ with open(permpath, "ab") as dest:
shutil.copyfileobj(f, dest)
except WrongOffset as e:
@@ -102,8 +103,9 @@ def handle_chunked(f, permpath, content_range):
else:
size = stat(permpath)
if size < cr3:
- return 'Continue', {'offset': size}
+ return "Continue", {"offset": size}
elif size > cr3:
- raise RuntimeError('What?! Uploaded file is larger than '
- 'what it is supposed to be?')
- return 'Success', {}
+ raise RuntimeError(
+ "What?! Uploaded file is larger than what it is supposed to be?"
+ )
+ return "Success", {}
diff --git a/video2commons/frontend/urlextract.py b/video2commons/frontend/urlextract.py
index 1251eea..e32a32a 100644
--- a/video2commons/frontend/urlextract.py
+++ b/video2commons/frontend/urlextract.py
@@ -21,10 +21,7 @@
from collections import OrderedDict
from video2commons.backend.encode.transcode import WebVideoTranscode
-from video2commons.config import (
- tooldir, youtube_user, youtube_pass, consumer_key, consumer_secret
-)
-from pywikibot.data import sparql
+from video2commons.config import tooldir, youtube_user, youtube_pass
import re
import emoji
@@ -37,9 +34,9 @@
SITE = pywikibot.Site()
# File extensions are probably alphanumeric with 0 to 4 chars
-RE_EXTENSION = re.compile(r'^[a-z0-9]{0,4}$', re.IGNORECASE)
+RE_EXTENSION = re.compile(r"^[a-z0-9]{0,4}$", re.IGNORECASE)
-DEFAULT_LICENSE = '{{subst:nld|}}'
+DEFAULT_LICENSE = "{{subst:nld|}}"
FILEDESC_TEMPLATE = """
=={{int:filedesc}}==
{{Information
@@ -71,212 +68,214 @@
def make_dummy_desc(filename):
filedesc = FILEDESC_TEMPLATE % {
- 'desc': '',
- 'date': '',
- 'source': '',
- 'uploader': '',
- 'license': DEFAULT_LICENSE
+ "desc": "",
+ "date": "",
+ "source": "",
+ "uploader": "",
+ "license": DEFAULT_LICENSE,
}
# Remove the extension
- filename = filename.rsplit('.', 1)
+ filename = filename.rsplit(".", 1)
if len(filename) == 1 or RE_EXTENSION.match(filename[1]):
filename = filename[0]
else:
- filename = '.'.join(filename)
+ filename = ".".join(filename)
return {
- 'extractor': '(uploads)',
- 'filedesc': filedesc.strip(),
- 'filename': sanitize(filename)
+ "extractor": "(uploads)",
+ "filedesc": filedesc.strip(),
+ "filename": sanitize(filename),
}
def do_extract_url(url):
"""Extract a video url."""
params = {
- 'format': 'bestvideo+bestaudio/best',
- 'outtmpl': '/dev/null',
- 'writedescription': True,
- 'writeinfojson': True,
- 'writesubtitles': False,
- 'subtitlesformat': 'srt/ass/vtt/best',
- 'cachedir': '/tmp/',
- 'noplaylist': False,
+ "format": "bestvideo+bestaudio/best",
+ "outtmpl": "/dev/null",
+ "writedescription": True,
+ "writeinfojson": True,
+ "writesubtitles": False,
+ "subtitlesformat": "srt/ass/vtt/best",
+ "cachedir": "/tmp/",
+ "noplaylist": False,
}
- if '.youtube.com/' in url:
+ if ".youtube.com/" in url:
# https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies
# https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp
- params.update({
- 'cookiefile': tooldir + '/../cookies.txt',
- 'username': youtube_user,
- 'password': youtube_pass
- })
+ params.update(
+ {
+ "cookiefile": tooldir + "/../cookies.txt",
+ "username": youtube_user,
+ "password": youtube_pass,
+ }
+ )
with yt_dlp.YoutubeDL(params) as dl:
info = dl.extract_info(url, download=False)
# Extract playlist entries if this is a playlist.
- if info and 'entries' in info:
+ if info and "entries" in info:
videos = []
- for entry in info['entries']:
+ for entry in info["entries"]:
video_info = _extract_info(entry)
videos.append(video_info)
return {
- 'type': 'playlist',
- 'id': info.get('id', ''),
- 'title': info.get('title', ''),
- 'url': url,
- 'videos': videos
+ "type": "playlist",
+ "id": info.get("id", ""),
+ "title": info.get("title", ""),
+ "url": url,
+ "videos": videos,
}
video_info = _extract_info(info)
- return { 'type': 'single', **video_info }
+ return {"type": "single", **video_info}
def _extract_info(info):
"""Process metadata for a single video."""
- assert 'formats' in info or info.get('direct'), \
- 'Your url cannot be processed correctly'
+ assert "formats" in info or info.get("direct"), (
+ "Your url cannot be processed correctly"
+ )
- ie_key = info['extractor_key']
- title = (info.get('title') or '').strip()
- url = info.get('webpage_url')
+ ie_key = info["extractor_key"]
+ title = (info.get("title") or "").strip()
+ url = info.get("webpage_url")
filedesc = FILEDESC_TEMPLATE % {
- 'desc': _desc(url, ie_key, title, info),
- 'date': _date(url, ie_key, title, info),
- 'source': _source(url, ie_key, title, info),
- 'uploader': _uploader(url, ie_key, title, info),
- 'license': _license(url, ie_key, title, info)
+ "desc": _desc(url, ie_key, title, info),
+ "date": _date(url, ie_key, title, info),
+ "source": _source(url, ie_key, title, info),
+ "uploader": _uploader(url, ie_key, title, info),
+ "license": _license(url, ie_key, title, info),
}
return {
- 'url': url,
- 'extractor': ie_key,
- 'filedesc': filedesc.strip(),
- 'filename': sanitize(title),
- 'date': _date(url, ie_key, title, info)
+ "url": url,
+ "extractor": ie_key,
+ "filedesc": filedesc.strip(),
+ "filename": sanitize(title),
+ "date": _date(url, ie_key, title, info),
}
def _date(url, ie_key, title, info):
- date = (info.get('upload_date') or '').strip()
- if re.match(r'^[0-9]{8}$', date):
- date = '%s-%s-%s' % (date[0:4], date[4:6], date[6:8])
+ date = (info.get("upload_date") or "").strip()
+ if re.match(r"^[0-9]{8}$", date):
+ date = "%s-%s-%s" % (date[0:4], date[4:6], date[6:8])
return date
def _source(url, ie_key, title, info):
- if info['id']:
- if ie_key == 'Youtube':
- return '{{From YouTube|1=%(id)s|2=%(title)s}}' % \
- {'id': info['id'], 'title': escape_wikitext(title)}
- elif ie_key == 'Vimeo':
- return '{{From Vimeo|1=%(id)s|2=%(title)s}}' % \
- {'id': info['id'], 'title': escape_wikitext(title)}
-
- if ie_key == 'Generic':
+ if info["id"]:
+ if ie_key == "Youtube":
+ return "{{From YouTube|1=%(id)s|2=%(title)s}}" % {
+ "id": info["id"],
+ "title": escape_wikitext(title),
+ }
+ elif ie_key == "Vimeo":
+ return "{{From Vimeo|1=%(id)s|2=%(title)s}}" % {
+ "id": info["id"],
+ "title": escape_wikitext(title),
+ }
+
+ if ie_key == "Generic":
return url
else:
- if ':' in info['extractor']:
+ if ":" in info["extractor"]:
# Try to find the anme of the 'owner' of this sub-ie
- ie_tmp = info['extractor'][:info['extractor'].index(':')]
+ ie_tmp = info["extractor"][: info["extractor"].index(":")]
for ie in yt_dlp.gen_extractors():
if ie.IE_NAME == ie_tmp:
ie_key = ie.ie_key()
break
- return '[%(url)s %(title)s - %(extractor)s]' % \
- {'url': url, 'title': escape_wikitext(title), 'extractor': ie_key}
+ return "[%(url)s %(title)s - %(extractor)s]" % {
+ "url": url,
+ "title": escape_wikitext(title),
+ "extractor": ie_key,
+ }
def _desc(url, ie_key, title, info):
- desc_orig = desc = (info.get('description') or '').strip() or title
+ desc_orig = desc = (info.get("description") or "").strip() or title
desc = escape_wikitext(desc)
if len(desc_orig) > 100:
lang = guess_language.guess_language(desc_orig)
- if lang != 'UNKNOWN':
- desc = '{{' + lang + '|1=' + desc + '}}'
+ if lang != "UNKNOWN":
+ desc = "{{" + lang + "|1=" + desc + "}}"
return desc
def _uploader(url, ie_key, title, info):
- uploader = escape_wikitext((info.get('uploader') or '').strip())
- uploader_url = info.get('uploader_url') or ''
+ uploader = escape_wikitext((info.get("uploader") or "").strip())
+ uploader_url = info.get("uploader_url") or ""
if uploader_url:
# HACK: YouTube outputs http:// atm (issue #80)
- if ie_key == 'Youtube':
- uploader_url = uploader_url.replace('http://', 'https://')
- uploader = '[%s %s]' % (uploader_url, uploader)
+ if ie_key == "Youtube":
+ uploader_url = uploader_url.replace("http://", "https://")
+ uploader = "[%s %s]" % (uploader_url, uploader)
return uploader
def _license(url, ie_key, title, info):
- uploader = info.get('uploader')
- uploader_param = ''
+ uploader = info.get("uploader")
+ uploader_param = ""
if uploader:
- uploader_param = '|' + escape_wikitext(uploader.strip())
+ uploader_param = "|" + escape_wikitext(uploader.strip())
default = DEFAULT_LICENSE
- if ie_key == 'Youtube' and info.get('license') == \
- 'Creative Commons Attribution license (reuse allowed)':
- if _date(url, ie_key, title, info) <= '2025-08-01':
- return '{{YouTube CC-BY%s}}' % uploader_param
- return '{{YouTube CC-BY 4.0%s}}' % uploader_param
- elif ie_key == 'Flickr':
+ if (
+ ie_key == "Youtube"
+ and info.get("license")
+ == "Creative Commons Attribution license (reuse allowed)"
+ ):
+ if _date(url, ie_key, title, info) <= "2025-08-01":
+ return "{{YouTube CC-BY%s}}" % uploader_param
+ return "{{YouTube CC-BY 4.0%s}}" % uploader_param
+ elif ie_key == "Flickr":
return {
- 'Attribution':
- '{{cc-by-2.0%s}}' % uploader_param,
- 'Attribution-ShareAlike':
- '{{cc-by-sa-2.0%s}}' % uploader_param,
- 'No known copyright restrictions':
- '{{Flickr-no known copyright restrictions}}',
- 'United States government work':
- '{{PD-USGov}}',
- 'Public Domain Dedication (CC0)':
- '{{cc-zero}}',
- 'Public Domain Work':
- '{{safesubst:Flickr-public domain mark/subst}}',
- 'Public Domain Mark':
- '{{safesubst:Flickr-public domain mark/subst}}',
- }.get(info.get('license'), default)
- elif ie_key == 'Vimeo':
+ "Attribution": "{{cc-by-2.0%s}}" % uploader_param,
+ "Attribution-ShareAlike": "{{cc-by-sa-2.0%s}}" % uploader_param,
+ "No known copyright restrictions": "{{Flickr-no known copyright restrictions}}",
+ "United States government work": "{{PD-USGov}}",
+ "Public Domain Dedication (CC0)": "{{cc-zero}}",
+ "Public Domain Work": "{{safesubst:Flickr-public domain mark/subst}}",
+ "Public Domain Mark": "{{safesubst:Flickr-public domain mark/subst}}",
+ }.get(info.get("license"), default)
+ elif ie_key == "Vimeo":
return {
- 'by':
- '{{cc-by-3.0%s}}' % uploader_param,
- 'by-sa':
- '{{cc-by-sa-3.0%s}}' % uploader_param,
- 'cc0':
- '{{cc-zero}}',
- }.get(info.get('license'), default)
- elif ie_key == 'PeerTube':
+ "by": "{{cc-by-3.0%s}}" % uploader_param,
+ "by-sa": "{{cc-by-sa-3.0%s}}" % uploader_param,
+ "cc0": "{{cc-zero}}",
+ }.get(info.get("license"), default)
+ elif ie_key == "PeerTube":
return {
- 'Attribution':
- '{{cc-by-4.0%s}}' % uploader_param,
- 'Attribution - Share Alike':
- '{{cc-by-sa-4.0%s}}' % uploader_param,
- 'Public Domain Dedication':
- '{{cc-zero}}',
- }.get(info.get('license'), default)
+ "Attribution": "{{cc-by-4.0%s}}" % uploader_param,
+ "Attribution - Share Alike": "{{cc-by-sa-4.0%s}}" % uploader_param,
+ "Public Domain Dedication": "{{cc-zero}}",
+ }.get(info.get("license"), default)
return default
def escape_wikitext(wikitext):
"""Escape wikitext for use in file description."""
- rep = OrderedDict([
- ('{|', '{{(}}|'),
- ('|}', '|{{)}}'),
- ('||', '||'),
- ('|', '|'),
- ('[[', '{{!((}}'),
- (']]', '{{))!}}'),
- ('{{', '{{((}}'),
- ('}}', '{{))}}'),
- ('{', '{{(}}'),
- ('}', '{{)}}'),
- ])
+ rep = OrderedDict(
+ [
+ ("{|", "{{(}}|"),
+ ("|}", "|{{)}}"),
+ ("||", "||"),
+ ("|", "|"),
+ ("[[", "{{!((}}"),
+ ("]]", "{{))!}}"),
+ ("{{", "{{((}}"),
+ ("}}", "{{))}}"),
+ ("{", "{{(}}"),
+ ("}", "{{)}}"),
+ ]
+ )
rep = dict((re.escape(k), v) for k, v in rep.items())
pattern = re.compile("|".join(list(rep.keys())))
return pattern.sub(lambda m: rep[re.escape(m.group(0))], wikitext)
@@ -286,93 +285,66 @@ def get_emoji_regexp():
# Sort emoji by length to make sure multi-character emojis are
# matched first
emojis = sorted(emoji.EMOJI_DATA, key=len, reverse=True)
- pattern = u'(' + u'|'.join(re.escape(u) for u in emojis) + u')'
+ pattern = "(" + "|".join(re.escape(u) for u in emojis) + ")"
return re.compile(pattern)
# Source: mediawiki.Title.js@9df363d
sanitationRules = [
# issue #101
- {
- 'pattern': get_emoji_regexp(),
- 'replace': ''
- },
+ {"pattern": get_emoji_regexp(), "replace": ""},
# "signature"
- {
- 'pattern': re.compile(r'~{3}'),
- 'replace': ''
- },
+ {"pattern": re.compile(r"~{3}"), "replace": ""},
# Space, underscore, tab, NBSP and other unusual spaces
{
- 'pattern': re.compile(r'[ _\u0009\u00A0\u1680\u180E\u2000-\u200A'
- r'\u2028\u2029\u202F\u205F\u3000\s]+'),
- 'replace': ' '
+ "pattern": re.compile(
+ r"[ _\u0009\u00A0\u1680\u180E\u2000-\u200A"
+ r"\u2028\u2029\u202F\u205F\u3000\s]+"
+ ),
+ "replace": " ",
},
# issue #96
- {
- 'pattern': re.compile(r'\u200B'),
- 'replace': ''
- },
+ {"pattern": re.compile(r"\u200B"), "replace": ""},
# unicode bidi override characters: Implicit, Embeds, Overrides
- {
- 'pattern': re.compile(r'[\u200E\u200F\u202A-\u202E]'),
- 'replace': ''
- },
+ {"pattern": re.compile(r"[\u200E\u200F\u202A-\u202E]"), "replace": ""},
# control characters
- {
- 'pattern': re.compile(r'[\x00-\x1f\x7f]'),
- 'replace': ''
- },
+ {"pattern": re.compile(r"[\x00-\x1f\x7f]"), "replace": ""},
# URL encoding (possibly)
- {
- 'pattern': re.compile(r'%([0-9A-Fa-f]{2})'),
- 'replace': r'% \1'
- },
+ {"pattern": re.compile(r"%([0-9A-Fa-f]{2})"), "replace": r"% \1"},
# HTML-character-entities
{
- 'pattern': re.compile(r'&(([A-Za-z0-9\x80-\xff]+|'
- r'#[0-9]+|#x[0-9A-Fa-f]+);)'),
- 'replace': r'& \1'
+ "pattern": re.compile(
+ r"&(([A-Za-z0-9\x80-\xff]+|"
+ r"#[0-9]+|#x[0-9A-Fa-f]+);)"
+ ),
+ "replace": r"& \1",
},
# slash, colon (not supported by file systems like NTFS/Windows,
# Mac OS 9 [:], ext4 [/])
- {
- 'pattern': re.compile(r'[:/#]'),
- 'replace': '-'
- },
+ {"pattern": re.compile(r"[:/#]"), "replace": "-"},
# brackets, greater than
- {
- 'pattern': re.compile(r'[\]\}>]'),
- 'replace': ')'
- },
+ {"pattern": re.compile(r"[\]\}>]"), "replace": ")"},
# brackets, lower than
- {
- 'pattern': re.compile(r'[\[\{<]'),
- 'replace': '('
- },
+ {"pattern": re.compile(r"[\[\{<]"), "replace": "("},
# directory structures
{
- 'pattern': re.compile(r'^(\.|\.\.|\./.*|\.\./.*|.*/\./.*|'
- r'.*/\.\./.*|.*/\.|.*/\.\.)$'),
- 'replace': ''
+ "pattern": re.compile(
+ r"^(\.|\.\.|\./.*|\.\./.*|.*/\./.*|"
+ r".*/\.\./.*|.*/\.|.*/\.\.)$"
+ ),
+ "replace": "",
},
# everything that wasn't covered yet
- {
- 'pattern': re.compile(r'[|#+?:/\\\u0000-\u001f\u007f]'),
- 'replace': '-'
- },
+ {"pattern": re.compile(r"[|#+?:/\\\u0000-\u001f\u007f]"), "replace": "-"},
# titleblacklist-custom-double-apostrophe
- {
- 'pattern': re.compile(r"'{2,}"),
- 'replace': '"'
- },
+ {"pattern": re.compile(r"'{2,}"), "replace": '"'},
]
def sanitize(filename):
"""Sanitize a filename for uploading."""
for rule in sanitationRules:
- filename = rule['pattern'].sub(rule['replace'], filename)
+ filename = rule["pattern"].sub(rule["replace"], filename)
return filename
@@ -387,40 +359,40 @@ def capitalize_first_letter(input_string):
def do_validate_filename(filename):
"""Validate filename for invalid characters/parts."""
- assert len(filename.encode('utf-8')) <= MAX_FILENAME_SIZE, \
- 'Your filename is too long'
- assert len(filename) == len(filename.lstrip()), \
- 'Your filename contains leading spaces'
- assert len(filename) == len(filename.rstrip()), \
- 'Your filename contains trailing spaces'
+ assert len(filename.encode("utf-8")) <= MAX_FILENAME_SIZE, (
+ "Your filename is too long"
+ )
+ assert len(filename) == len(filename.lstrip()), (
+ "Your filename contains leading spaces"
+ )
+ assert len(filename) == len(filename.rstrip()), (
+ "Your filename contains trailing spaces"
+ )
for rule in sanitationRules:
- reobj = rule['pattern'].search(filename)
- assert not reobj or reobj.group(0) == ' ', \
- 'Your filename contains an illegal part: %r' % reobj.group(0)
+ reobj = rule["pattern"].search(filename)
+ assert not reobj or reobj.group(0) == " ", (
+ "Your filename contains an illegal part: %r" % reobj.group(0)
+ )
- return filename.replace('_', ' ')
+ return filename.replace("_", " ")
def do_validate_filedesc(filedesc):
"""Validate filename for invalid characters/parts."""
parse = SITE.simple_request(
- action='parse',
- text=filedesc,
- prop='externallinks'
+ action="parse", text=filedesc, prop="externallinks"
).submit()
- externallinks = parse.get('parse', {}).get('externallinks', [])
+ externallinks = parse.get("parse", {}).get("externallinks", [])
if externallinks:
- spam = SITE.simple_request(
- action='spamblacklist',
- url=externallinks
- ).submit()
+ spam = SITE.simple_request(action="spamblacklist", url=externallinks).submit()
- assert spam.get('spamblacklist', {}).get('result') != 'blacklisted', \
- ('Your file description matches spam blacklist! Matches: %s' %
- ', '.join(spam.get('spamblacklist', {}).get('matches', [])))
+ assert spam.get("spamblacklist", {}).get("result") != "blacklisted", (
+ "Your file description matches spam blacklist! Matches: %s"
+ % ", ".join(spam.get("spamblacklist", {}).get("matches", []))
+ )
return filedesc
@@ -437,14 +409,14 @@ def do_validate_filename_unique(filename):
# The built in 'capitalize()' method isn't used since it lowers the rest of
# the string, which would also break the comparison.
conflicting_names = {
- capitalize_first_letter(f"{filename}.{format}")
- for format in formats
+ capitalize_first_letter(f"{filename}.{format}") for format in formats
}
pages = SITE.allpages(prefix=filename, namespace=NAMESPACE_FILE)
for page in pages:
- assert page.title(with_ns=False) not in conflicting_names, \
- f'A filename with the same name already exists: {page.full_url()}'
+ assert page.title(with_ns=False) not in conflicting_names, (
+ f"A filename with the same name already exists: {page.full_url()}"
+ )
return filename
@@ -460,7 +432,7 @@ def do_validate_youtube_id(youtube_id):
"""
results = WcqsSession().query(query)
- if len(results['results']['bindings']) == 0:
+ if len(results["results"]["bindings"]) == 0:
return None
- return results['results']['bindings'][0]['file']['value']
+ return results["results"]["bindings"][0]["file"]["value"]
diff --git a/video2commons/frontend/wcqs.py b/video2commons/frontend/wcqs.py
index 362b96b..ce46b7d 100644
--- a/video2commons/frontend/wcqs.py
+++ b/video2commons/frontend/wcqs.py
@@ -6,7 +6,8 @@
from typing import Any
from video2commons.frontend.shared import redisconnection
-class WcqsSession():
+
+class WcqsSession:
"""This class manages WCQS sessions and executes SPARQL queries.
Relevant Documentation:
@@ -21,18 +22,18 @@ def query(self, query: str):
"""Queries the Wikimedia Commons Query Service."""
retry_after_ts = self._check_retry()
if retry_after_ts:
- retry_after = int((retry_after_ts - datetime.now(timezone.utc)).total_seconds())
- raise RuntimeError(
- f'Too many requests, try again in {retry_after} seconds'
+ retry_after = int(
+ (retry_after_ts - datetime.now(timezone.utc)).total_seconds()
)
+ raise RuntimeError(f"Too many requests, try again in {retry_after} seconds")
# Make the SPARQL request using the provided query.
response = self.session.get(
- 'https://commons-query.wikimedia.org/sparql',
- params={'query': query},
+ "https://commons-query.wikimedia.org/sparql",
+ params={"query": query},
headers={
- 'Accept': 'application/sparql-results+json',
- 'User-Agent': 'video2commons-bot/1.0 (https://video2commons.toolforge.org/)'
+ "Accept": "application/sparql-results+json",
+ "User-Agent": "video2commons-bot/1.0 (https://video2commons.toolforge.org/)",
},
# Set-Cookie session refresh headers get sent with a 307 redirect.
allow_redirects=True,
@@ -44,29 +45,27 @@ def query(self, query: str):
#
# https://wikitech.wikimedia.org/wiki/Robot_policy#Generally_applicable_rules
if response.status_code == 429:
- retry_after = response.headers.get('Retry-After') or 60
+ retry_after = response.headers.get("Retry-After") or 60
self._set_retry(int(retry_after))
- raise RuntimeError(
- f'Too many requests, try again in {retry_after} seconds'
- )
+ raise RuntimeError(f"Too many requests, try again in {retry_after} seconds")
# Handle other unexpected response codes.
- content_type = response.headers.get('Content-Type')
+ content_type = response.headers.get("Content-Type")
if (
response.status_code < 200
or response.status_code >= 300
- or content_type != 'application/sparql-results+json;charset=utf-8'
+ or content_type != "application/sparql-results+json;charset=utf-8"
):
raise RuntimeError(
- f'Got unexpected response from SPARQL ({response.status_code}): {response.text}'
+ f"Got unexpected response from SPARQL ({response.status_code}): {response.text}"
)
return response.json()
def _check_retry(self):
"""Checks if we're rate limited before making SPARQL requests."""
- retry_after = redisconnection.get('wcqs:retry-after')
+ retry_after = redisconnection.get("wcqs:retry-after")
if retry_after:
retry_after_ts = datetime.fromisoformat(retry_after)
@@ -80,69 +79,72 @@ def _set_retry(self, retry_after: int):
retry_after_ts = datetime.now(timezone.utc) + timedelta(seconds=retry_after)
redisconnection.setex(
- 'wcqs:retry-after',
+ "wcqs:retry-after",
retry_after,
- retry_after_ts.replace(tzinfo=timezone.utc).isoformat()
+ retry_after_ts.replace(tzinfo=timezone.utc).isoformat(),
)
def _get_cookies(self) -> list[dict[str, Any]]:
"""Retrieve cookies from Redis or the filesystem."""
- cookies = redisconnection.get('wcqs:session')
+ cookies = redisconnection.get("wcqs:session")
if cookies:
return json.loads(cookies)
- current_app.logger.warning('Pulling in WCQS session from file as fallback')
+ current_app.logger.warning("Pulling in WCQS session from file as fallback")
try:
# Fallback: Pull in cookies from file. Needed for initial setup.
- with open('/data/project/video2commons/wcqs-session.json', 'r') as f:
+ with open("/data/project/video2commons/wcqs-session.json", "r") as f:
return json.load(f)
except FileNotFoundError:
- raise RuntimeError('No WCQS session found in Redis or filesystem')
+ raise RuntimeError("No WCQS session found in Redis or filesystem")
def _set_cookies(self, cookies: list[dict[str, Any]]):
"""Load authentication cookies into the session."""
- cookie_dict = {(cookie['domain'], cookie['name']): cookie for cookie in cookies}
+ cookie_dict = {(cookie["domain"], cookie["name"]): cookie for cookie in cookies}
# wcqsOauth is a long lived cookie that wcqs uses to authenticate the
# user against commons.wikimedia.org. This cookie is used to refresh
# the wcqsSession cookie.
- wcqsOauth = cookie_dict.get(('commons-query.wikimedia.org', 'wcqsOauth'))
+ wcqsOauth = cookie_dict.get(("commons-query.wikimedia.org", "wcqsOauth"))
if wcqsOauth:
self.session.cookies.set(
- name='wcqsOauth',
- value=wcqsOauth['value'],
- domain=wcqsOauth['domain'],
- path=wcqsOauth['path'],
- secure=wcqsOauth['secure'],
+ name="wcqsOauth",
+ value=wcqsOauth["value"],
+ domain=wcqsOauth["domain"],
+ path=wcqsOauth["path"],
+ secure=wcqsOauth["secure"],
expires=None, # Intentional as wcqsOauth is long-lived
)
else:
- raise RuntimeError('wcqsOauth cookie not found')
+ raise RuntimeError("wcqsOauth cookie not found")
# wcqsSession is a short lived cookie (2 hour lifetime) holding a JWT
# that grants query access to wcqs. This cookie is provided in a 307
# redirect to any request that has a valid wcqsOauth cookie but no
# valid wcqsSession cookie.
- wcqsSession = cookie_dict.get(('commons-query.wikimedia.org', 'wcqsSession'))
+ wcqsSession = cookie_dict.get(("commons-query.wikimedia.org", "wcqsSession"))
if wcqsSession:
self.session.cookies.set(
- name='wcqsSession',
- value=wcqsSession['value'],
- domain=wcqsSession['domain'],
- path=wcqsSession['path'],
- secure=wcqsSession['secure'],
- expires=int(wcqsSession['expirationDate']),
+ name="wcqsSession",
+ value=wcqsSession["value"],
+ domain=wcqsSession["domain"],
+ path=wcqsSession["path"],
+ secure=wcqsSession["secure"],
+ expires=int(wcqsSession["expirationDate"]),
)
def _save_cookies(self):
"""Save cookies from the session to Redis."""
- cookies = [{
- 'name': cookie.name,
- 'value': cookie.value,
- 'domain': cookie.domain,
- 'path': cookie.path,
- 'expirationDate': cookie.expires,
- 'secure': cookie.secure,
- } for cookie in self.session.cookies]
-
- redisconnection.set('wcqs:session', json.dumps(cookies))
+ cookies = [
+ {
+ "name": cookie.name,
+ "value": cookie.value,
+ "domain": cookie.domain,
+ "path": cookie.path,
+ "expirationDate": cookie.expires,
+ "secure": cookie.secure,
+ }
+ for cookie in self.session.cookies
+ ]
+
+ redisconnection.set("wcqs:session", json.dumps(cookies))
diff --git a/video2commons/shared/stats.py b/video2commons/shared/stats.py
index 20f4a09..070bb02 100644
--- a/video2commons/shared/stats.py
+++ b/video2commons/shared/stats.py
@@ -3,7 +3,7 @@
import json
import time
-LOCK_KEY = 'stats_lock'
+LOCK_KEY = "stats_lock"
def collect_worker_stats(conn, inspector):
@@ -13,8 +13,8 @@ def collect_worker_stats(conn, inspector):
if stats:
for _, worker_stats in stats.items():
- pool = worker_stats.get('pool', {})
- max_concurrency = pool.get('max-concurrency', 0)
+ pool = worker_stats.get("pool", {})
+ max_concurrency = pool.get("max-concurrency", 0)
total_capacity += max_concurrency
active_tasks = inspector.active()
@@ -26,26 +26,26 @@ def collect_worker_stats(conn, inspector):
total_active += len(tasks)
for task in tasks:
- task_id = task.get('id')
+ task_id = task.get("id")
if task_id:
task_ids.append(task_id)
queue_length = get_queue_length(conn)
return {
- 'task_ids': task_ids,
- 'pending': queue_length,
- 'capacity': total_capacity,
- 'processing': total_active,
- 'available': total_capacity - total_active,
- 'utilization': (total_active / total_capacity) if total_capacity > 0 else 0,
- 'last_updated_by_job': int(time.time()),
+ "task_ids": task_ids,
+ "pending": queue_length,
+ "capacity": total_capacity,
+ "processing": total_active,
+ "available": total_capacity - total_active,
+ "utilization": (total_active / total_capacity) if total_capacity > 0 else 0,
+ "last_updated_by_job": int(time.time()),
}
def get_queue_length(conn):
"""Get the number of messages waiting in the broker queue."""
- return conn.llen('celery') + conn.hlen('unacked')
+ return conn.llen("celery") + conn.hlen("unacked")
def update_task_stats(conn, task_id, remove=False):
@@ -56,34 +56,36 @@ def update_task_stats(conn, task_id, remove=False):
raise RuntimeError("Could not acquire write lock on stats key.")
try:
- serialized_stats = conn.get('stats')
+ serialized_stats = conn.get("stats")
if not serialized_stats:
raise RuntimeError("No stats are available, aborting.")
stats = json.loads(serialized_stats)
if not remove:
- stats['task_ids'].append(task_id)
+ stats["task_ids"].append(task_id)
else:
# This can fail with a ValueError, but that's fine since we don't
# want to write to the key if this happens anyway.
- stats['task_ids'].remove(task_id)
+ stats["task_ids"].remove(task_id)
- stats['processing'] = len(stats['task_ids'])
- stats['available'] = stats['capacity'] - stats['processing']
- stats['utilization'] = (stats['processing'] / stats['capacity']) if stats['capacity'] > 0 else 0
+ stats["processing"] = len(stats["task_ids"])
+ stats["available"] = stats["capacity"] - stats["processing"]
+ stats["utilization"] = (
+ (stats["processing"] / stats["capacity"]) if stats["capacity"] > 0 else 0
+ )
# Update the queued tasks counter, which only tracks tasks that haven't
# been picked up by any workers yet.
if not remove:
- stats['pending'] = max(stats['pending'] - 1, 0)
+ stats["pending"] = max(stats["pending"] - 1, 0)
# FAILSAFE: We shouldn't get weird numbers in stats, but be safe.
if (
- stats['available'] > stats['capacity']
- or stats['available'] < 0
- or stats['processing'] > stats['capacity']
- or stats['processing'] < 0
+ stats["available"] > stats["capacity"]
+ or stats["available"] < 0
+ or stats["processing"] > stats["capacity"]
+ or stats["processing"] < 0
):
raise RuntimeError("Received invalid stats, aborting.")
@@ -100,12 +102,12 @@ def increment_queue_counter(conn):
raise RuntimeError("Could not acquire write lock on stats key.")
try:
- serialized_stats = conn.get('stats')
+ serialized_stats = conn.get("stats")
if not serialized_stats:
raise RuntimeError("No stats are available, aborting.")
stats = json.loads(serialized_stats)
- stats['pending'] = stats.get('pending', 0) + 1
+ stats["pending"] = stats.get("pending", 0) + 1
update_worker_stats(conn, stats)
finally:
@@ -116,7 +118,7 @@ def acquire_write_lock(conn):
"""Acquire a write lock on the stats key (~1 second timeout)."""
for _ in range(10):
try:
- lock_acquired = conn.set(LOCK_KEY, '1', nx=True, ex=2)
+ lock_acquired = conn.set(LOCK_KEY, "1", nx=True, ex=2)
if lock_acquired:
return True
except Exception:
@@ -137,7 +139,7 @@ def release_write_lock(conn):
def get_worker_stats(conn):
"""Get worker stats from Redis."""
- serialized_stats = conn.get('stats')
+ serialized_stats = conn.get("stats")
if not serialized_stats:
return None
@@ -146,4 +148,4 @@ def get_worker_stats(conn):
def update_worker_stats(conn, stats):
"""Update worker stats in Redis."""
- conn.set('stats', json.dumps(stats))
+ conn.set("stats", json.dumps(stats))
diff --git a/video2commons/user-config.py b/video2commons/user-config.py
index 6e90921..884b1ac 100644
--- a/video2commons/user-config.py
+++ b/video2commons/user-config.py
@@ -4,7 +4,7 @@
"""Pywikibot configs."""
-family = 'commons'
-mylang = 'commons'
+family = "commons"
+mylang = "commons"
socket_timeout = 30, 300 # chunked uploading unreliable
diff --git a/www/python/src/app.py b/www/python/src/app.py
index 6a0c169..22ad835 100644
--- a/www/python/src/app.py
+++ b/www/python/src/app.py
@@ -19,13 +19,12 @@
"""video2commons web frontend wrapper."""
-
-
import os
import sys
+
sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../../../")
from video2commons.frontend import app # NOQA
-if __name__ == '__main__':
+if __name__ == "__main__":
app.run()
diff --git a/www/python/src/user-config.py b/www/python/src/user-config.py
index 6e90921..884b1ac 100644
--- a/www/python/src/user-config.py
+++ b/www/python/src/user-config.py
@@ -4,7 +4,7 @@
"""Pywikibot configs."""
-family = 'commons'
-mylang = 'commons'
+family = "commons"
+mylang = "commons"
socket_timeout = 30, 300 # chunked uploading unreliable