Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
name: Validate JavaScript Build
name: CI

on:
pull_request:
types: [opened, synchronize, reopened]

jobs:
check:
name: Validate JavaScript Build
runs-on: ubuntu-latest
container:
image: debian:bookworm
Expand Down Expand Up @@ -61,3 +62,22 @@ jobs:
git add video2commons/frontend/static/*.min.js video2commons/frontend/templates/*.min.html
git commit -m "Update built files from CI"
git push origin $GITHUB_HEAD_REF

ruff:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Setup uv
uses: astral-sh/setup-uv@v7.2.1
with:
python-version: "3.14"
enable-cache: true

- name: Run Ruff
run: uv run ruff check --output-format=github .

- name: Run the Ruff formatter
run: uv run ruff format --check .
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.14
13 changes: 13 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[project]
name = "video2commons"
version = "0.1.0"
requires-python = ">=3.14"

[dependency-groups]
dev = ["ruff>=0.14.14"]

[tool.ruff.lint]
ignore = ["E722"]

[tool.ruff.format]
docstring-code-format = true
4 changes: 2 additions & 2 deletions user-config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

"""Pywikibot configs."""

family = 'commons'
mylang = 'commons'
family = "commons"
mylang = "commons"

socket_timeout = 30, 300 # chunked uploading unreliable
13 changes: 6 additions & 7 deletions utils/cleanuptasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,20 @@
import sys
from redis import Redis

sys.path.append(os.path.dirname(os.path.realpath(__file__)) +
"/../video2commons")
sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../video2commons")
from config import redis_pw, redis_host # NOQA

redisconnection = Redis(host=redis_host, db=3, password=redis_pw)

for userkey in redisconnection.keys('tasks:*') + ['alltasks']:
for userkey in redisconnection.keys("tasks:*") + ["alltasks"]:
for taskid in redisconnection.lrange(userkey, 0, -1):
if not redisconnection.exists('titles:' + taskid):
if not redisconnection.exists("titles:" + taskid):
redisconnection.lrem(userkey, 0, taskid)
print("delete %s from %s" % (taskid, userkey))

for pattern in ['params:*', 'restarted:*']: # 'tasklock:*'
for pattern in ["params:*", "restarted:*"]: # 'tasklock:*'
for key in redisconnection.keys(pattern):
taskid = key.split(':')[1]
if not redisconnection.exists('titles:' + taskid):
taskid = key.split(":")[1]
if not redisconnection.exists("titles:" + taskid):
redisconnection.delete(key)
print("delete %s" % (key))
47 changes: 26 additions & 21 deletions utils/extracti18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,50 +27,55 @@
import re
import json

if not len(sys.argv) > 1 or '/messages' not in sys.argv[1]:
print(("usage: python " + sys.argv[0] + " <dir>\n\n"
" <dir> The path to mediawiki/languages/messages\n"))
if not len(sys.argv) > 1 or "/messages" not in sys.argv[1]:
print(
(
"usage: python " + sys.argv[0] + " <dir>\n\n"
" <dir> The path to mediawiki/languages/messages\n"
)
)
sys.exit(1)

msgDir = sys.argv[1]

dest = os.path.dirname(os.path.realpath(__file__)) + \
'/../video2commons/frontend/i18n-metadata'
dest = (
os.path.dirname(os.path.realpath(__file__))
+ "/../video2commons/frontend/i18n-metadata"
)

data = {
'fallbacks': {},
'rtl': [],
'alllangs': [],
"fallbacks": {},
"rtl": [],
"alllangs": [],
}
rFallback = re.compile(r"fallback = '(.*?)'", re.I)
rIsRtl = re.compile(r'rtl = true', re.I)
rIsRtl = re.compile(r"rtl = true", re.I)
for file in os.listdir(msgDir):
filePath = msgDir + "/" + file
if file in ['.', '..'] or not os.path.isfile(filePath):
if file in [".", ".."] or not os.path.isfile(filePath):
continue

with open(filePath, 'r') as openfile:
with open(filePath, "r") as openfile:
content = openfile.read()

fileMatch = re.match(r'Messages(.*?)\.php', file)
source = fileMatch.group(1).lower().replace('_', '-')
fileMatch = re.match(r"Messages(.*?)\.php", file)
source = fileMatch.group(1).lower().replace("_", "-")
contentMatch = rFallback.search(content)
if contentMatch:
fallbacks = [s.strip() for s in contentMatch.group(1).split(',')]
data['fallbacks'][source] = \
fallbacks if len(fallbacks) > 1 else fallbacks[0]
fallbacks = [s.strip() for s in contentMatch.group(1).split(",")]
data["fallbacks"][source] = fallbacks if len(fallbacks) > 1 else fallbacks[0]

if rIsRtl.search(content):
data['rtl'].append(source)
data["rtl"].append(source)

data['alllangs'].append(source)
data["alllangs"].append(source)


def _write(key):
dest_file = dest + "/" + key + ".json"
with open(dest_file, 'w') as openfile:
json.dump(data[key], openfile, sort_keys=True,
indent=4, separators=(',', ': '))
with open(dest_file, "w") as openfile:
json.dump(data[key], openfile, sort_keys=True, indent=4, separators=(",", ": "))


for key in data:
_write(key)
11 changes: 6 additions & 5 deletions utils/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import sys
import time

sys.path.insert(0, '/srv/v2c')
sys.path.insert(0, "/srv/v2c")

from redis import Redis

Expand All @@ -17,7 +17,7 @@
collect_worker_stats,
get_worker_stats,
release_write_lock,
update_worker_stats
update_worker_stats,
)

# Stats are considered stale if they haven't been updated in 30 minutes.
Expand All @@ -41,8 +41,8 @@ def main():

# Don't update stats if they've been updated recently by another job.
existing_stats = get_worker_stats(app_conn)
if existing_stats and 'last_updated_by_job' in existing_stats:
if int(time.time()) - existing_stats['last_updated_by_job'] < STALE_SECS:
if existing_stats and "last_updated_by_job" in existing_stats:
if int(time.time()) - existing_stats["last_updated_by_job"] < STALE_SECS:
print("Stats have been updated recently, skipping update.")
return

Expand All @@ -58,5 +58,6 @@ def main():
finally:
release_write_lock(app_conn)

if __name__ == '__main__':

if __name__ == "__main__":
main()
44 changes: 44 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions video2commons/backend/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@

"""videocommons backend."""



from video2commons.backend import worker

__all__ = ['worker']
__all__ = ["worker"]
58 changes: 37 additions & 21 deletions video2commons/backend/categories/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,26 @@
def has_video_track(source: str) -> bool:
"""Check if a video has an audio track."""

result = subprocess.run([
ffprobe_location,
'-loglevel', 'error',
'-select_streams', 'v',
'-show_entries', 'stream=index,codec_type',
'-of', 'json',
source
], capture_output=True, text=True)
result = subprocess.run(
[
ffprobe_location,
"-loglevel",
"error",
"-select_streams",
"v",
"-show_entries",
"stream=index,codec_type",
"-of",
"json",
source,
],
capture_output=True,
text=True,
)

if result.returncode == 0:
for stream in json.loads(result.stdout).get('streams', []):
if stream.get('codec_type') == 'video':
for stream in json.loads(result.stdout).get("streams", []):
if stream.get("codec_type") == "video":
return True

return False
Expand All @@ -44,18 +52,26 @@ def has_video_track(source: str) -> bool:
def has_audio_track(source: str) -> bool:
"""Check if a video has an audio track."""

result = subprocess.run([
ffprobe_location,
'-loglevel', 'error',
'-select_streams', 'a',
'-show_entries', 'stream=index,codec_type',
'-of', 'json',
source
], capture_output=True, text=True)
result = subprocess.run(
[
ffprobe_location,
"-loglevel",
"error",
"-select_streams",
"a",
"-show_entries",
"stream=index,codec_type",
"-of",
"json",
source,
],
capture_output=True,
text=True,
)

if result.returncode == 0:
for stream in json.loads(result.stdout).get('streams', []):
if stream.get('codec_type') == 'audio':
for stream in json.loads(result.stdout).get("streams", []):
if stream.get("codec_type") == "audio":
return True

return False
Expand All @@ -82,7 +98,7 @@ def get_inferable_categories(source: str) -> Set[str]:
categories = set()

if not has_audio_track(source):
categories.add('[[Category:Videos without audio]]')
categories.add("[[Category:Videos without audio]]")

return categories

Expand Down
Loading