diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 7b57ba37..be397a4a 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -1,9 +1,9 @@
-# Automatically update versions for pip
+# Automatically update versions for uv
version: 2
updates:
# Maintain dependencies for Python
- - package-ecosystem: "pip"
+ - package-ecosystem: "uv"
directory: "/"
schedule:
interval: "monthly"
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 631cdff0..cee343d5 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -28,5 +28,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- - uses: actions/setup-python@v6
- - uses: pre-commit/action@v3.0.1
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v7
+ with:
+ enable-cache: true
+ cache-python: true
+ - name: Run prek
+ run: uvx prek run --all-files
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 7063b08b..17613206 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -12,15 +12,14 @@ jobs:
id-token: write
steps:
- uses: actions/checkout@v6
- - name: Set up Python
- uses: actions/setup-python@v6
with:
- python-version: 3.9
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install setuptools wheel pre-commit
+ fetch-depth: 0
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v7
+ with:
+ enable-cache: true
+ cache-python: true
- name: Build distribution
- run: make dist
+ run: uv build
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/pythontest.yml b/.github/workflows/pythontest.yml
index 5087e49a..cb13c53a 100644
--- a/.github/workflows/pythontest.yml
+++ b/.github/workflows/pythontest.yml
@@ -20,7 +20,7 @@ jobs:
uses: fkirc/skip-duplicate-actions@master
with:
github_token: ${{ github.token }}
- paths: '["**.py", "requirements_test.txt", ".github/workflows/pythontest.yml"]'
+ paths: '["**.py", "pyproject.toml", "uv.lock", ".github/workflows/pythontest.yml"]'
unit_test:
name: Python unit tests
needs: pre_job
@@ -30,16 +30,19 @@ jobs:
fail-fast: false
matrix:
os: [windows-latest, ubuntu-latest, macos-latest]
- python-version: [3.9, '3.10', '3.11', '3.12', '3.13']
+ python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
steps:
- uses: actions/checkout@v6
if: ${{ needs.pre_job.outputs.should_skip != 'true' }}
- - name: Set up Python ${{ matrix.python-version }}
+ with:
+ fetch-depth: 0
+ - name: Set up uv
if: ${{ needs.pre_job.outputs.should_skip != 'true' }}
- uses: actions/setup-python@v6
+ uses: astral-sh/setup-uv@v7
with:
+ enable-cache: true
+ cache-python: true
python-version: ${{ matrix.python-version }}
- cache: 'pip'
- name: Install Ubuntu dependencies
run: |
sudo apt-get -y -qq update
@@ -104,17 +107,6 @@ jobs:
run: |
Add-Content -Path $env:GITHUB_PATH -Value "$env:GITHUB_WORKSPACE\tools\ffmpeg-master-latest-win64-gpl\bin" -Encoding utf8
Add-Content -Path $env:GITHUB_PATH -Value "$env:GITHUB_WORKSPACE\tools\poppler-21.11.0\Library\bin" -Encoding utf8
- - name: Install tox
- if: ${{ needs.pre_job.outputs.should_skip != 'true' }}
- run: |
- python -m pip install --upgrade pip
- pip install tox
- - name: tox env cache
- if: ${{ needs.pre_job.outputs.should_skip != 'true' && !startsWith(runner.os, 'windows') }}
- uses: actions/cache@v5
- with:
- path: ${{ github.workspace }}/.tox/py${{ matrix.python-version }}
- key: ${{ runner.os }}-tox-py${{ matrix.python-version }}-${{ hashFiles('setup.py') }}
- - name: Test with tox
+ - name: Run tests
if: ${{ needs.pre_job.outputs.should_skip != 'true' }}
- run: tox -e py${{ matrix.python-version }}
+ run: uv run --group test --extra google_drive pytest
diff --git a/.gitignore b/.gitignore
index b36df2ee..df7fe7ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,7 +49,6 @@ pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
-.tox/
.coverage
.coverage.*
.cache
@@ -137,3 +136,6 @@ cache.sqlite
chefdata/
audio_cache.sqlite
+
+# uv
+.venv/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b3355f9e..b4642ed1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,15 +1,10 @@
repos:
- - repo: https://github.com/python/black
- rev: 21.12b0
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.11.6
hooks:
- - id: black
- types_or: [python, pyi]
- additional_dependencies: ['click==8.0.4']
- - repo: https://github.com/pycqa/flake8
- rev: 7.1.1
- hooks:
- - id: flake8
- exclude: (?x)(.*examples.*)
+ - id: ruff
+ args: [--fix]
+ - id: ruff-format
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
hooks:
@@ -20,10 +15,6 @@ repos:
- id: debug-statements
- id: end-of-file-fixer
exclude: '^.+?\.json$'
- - repo: https://github.com/asottile/reorder_python_imports
- rev: v2.6.0
- hooks:
- - id: reorder-python-imports
- repo: https://github.com/google/yamlfmt
rev: v0.14.0
hooks:
@@ -35,3 +26,7 @@ repos:
- id: actionlint
# Expects shellcheck to be installed on the system
# https://github.com/koalaman/shellcheck#installing
+ - repo: https://github.com/astral-sh/uv-pre-commit
+ rev: 0.11.2
+ hooks:
+ - id: uv-lock
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 0c924b2c..dd698903 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -6,7 +6,8 @@ build:
python: "3.11"
jobs:
pre_install:
- - pip install -e .
+ - pip install uv
+ - uv pip install -e . --system
sphinx:
configuration: docs/conf.py
python:
diff --git a/AGENTS.md b/AGENTS.md
index 636f0d0f..9860fd65 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -5,19 +5,19 @@ Guidance for AI coding agents working in this repository.
## Quick Start
```bash
-pip install -e '.[test,dev]' # install (use uv pip if venv was created with uv)
-pytest # run all tests
-pytest tests/test_files.py # run a single test file
-pytest -k 'test_something' # filter by test name
-pre-commit run --all-files # lint (the ONLY way to run linting)
+uv sync --group dev # install (creates .venv and installs all dev dependencies)
+uv run --group test pytest # run all tests
+uv run --group test pytest tests/test_files.py # run a single test file
+uv run --group test pytest -k 'test_something' # filter by test name
+uvx prek run --all-files # lint (the ONLY way to run linting)
```
**System dependencies:** `ffmpeg` and `poppler-utils`.
## Critical Gotchas
-- **Linting:** Always use `pre-commit run --all-files`. Never run black, flake8, or other tools directly.
-- **Line length:** 160 characters, enforced by pre-commit.
+- **Linting:** Always use `uvx prek run --all-files`. Never run ruff, black, flake8, or other tools directly.
+- **Line length:** 160 characters, enforced by prek/ruff.
- **New file types require exactly two changes:** (1) a conversion handler in `convert.py`, (2) a metadata extractor in `extract_metadata.py`. That's it. Do NOT touch `classes/files.py` or `classes/nodes.py` — the existing File/Node subclasses there are legacy backwards-compatibility APIs that are NOT needed for new file types. The pipeline infers kinds and presets automatically.
- **Test placement:** Pipeline tests go in `tests/pipeline/` — add to existing files like `test_convert.py` and `test_extract_metadata.py`. Do not create new test files.
- **Validation logic:** Each handler implements only the validation its spec requires. Do not copy validation from other handlers (e.g., do not add HTML body parsing or empty-body checks unless the spec explicitly requires them).
diff --git a/CLAUDE.md b/CLAUDE.md
index 2f3f1a28..dd71d0c2 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -24,7 +24,7 @@ Three stages with ordered handlers: **transfer** (download) → **convert** (com
- The pipeline code is the reference architecture — follow its OOP patterns when writing new code.
- Older code (especially `ricecooker/classes/`) has less test coverage. Take extra care when modifying it.
-- PRs target `main` on `learningequality/ricecooker`. CI tests Python 3.9–3.13 on Linux, macOS, and Windows.
+- PRs target `main` on `learningequality/ricecooker`. CI tests Python 3.9–3.13 on Linux, macOS, and Windows using uv.
### Adding support for new file types
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d63ee204..1b6c586a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -49,9 +49,7 @@ Becoming a ricecooker developer
-------------------------------
Ready to contribute? In order to work on the `ricecooker` code you'll first need
-to make you have [Python 3](https://www.python.org/downloads/) on your computer.
-You'll also need to install the Python package [pip](https://pypi.org/project/pip/)
-if you don't have it already.
+to have [Python 3.9+](https://www.python.org/downloads/) on your computer.
Here are the steps for setting up `ricecooker` for local development:
@@ -65,46 +63,23 @@ Here are the steps for setting up `ricecooker` for local development:
cd ricecooker/
```
-3. Create a Python virtual environment for this project (optional, but recommended):
+3. Install [uv](https://docs.astral.sh/uv/getting-started/installation/) if you don't have it already:
- * Install the `virtualenv` package using the command
- ```
- pip install virtualenv
- ```
-
- * The next steps depends if you're using a UNIX system (Mac/Linux) or Windows:
- * For UNIX operating systems:
- * Create a virtual env called `venv` in the current directory using the
- command:
- ```
- virtualenv -p python3 venv
- ```
- * Activate the virtualenv called `venv` by running:
- ```
- source venv/bin/activate
- ```
- Your command prompt will change to indicate you're working inside `venv`.
+ ```
+ curl -LsSf https://astral.sh/uv/install.sh | sh
+ ```
- * For Windows systems:
- * Create a virtual env called `venv` in the current directory using the
- following command:
- ```
- virtualenv -p C:/Python36/python.exe venv
- ```
- You may need to adjust the `-p` argument depending on where your version
- of Python is located. Note you'll need Python version 3.5 or higher.
- * Activate the virtualenv called `venv` by running:
- ```
- .\venv\Scripts\activate
- ```
+ On Windows:
+ ```
+ powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
+ ```
-4. Install the `ricecooker` code in the virtual environment using these commands:
+4. Install the `ricecooker` code and its dependencies:
```
- pip install -e .
+ uv sync --group dev
```
-
5. Create a branch for local development:
```
@@ -114,24 +89,23 @@ Here are the steps for setting up `ricecooker` for local development:
Now you can make your changes locally.
-6. When you're done making changes, check that your changes pass flake8 linter rules
- and the `ricecooker` test suite, including testing other Python versions with tox:
+6. When you're done making changes, check that your changes pass linting
+ and the `ricecooker` test suite:
- To run the tests you will need to install the extra depedencies tagged "test"
-
- ```
- pip install -e .[test]
- ```
-
- Running the tests
+ Run linting:
+ ```
+ uvx prek run --all-files
+ ```
+ Run the tests:
```
- flake8 ricecooker tests
- pytest
- tox
+ uv run --group test pytest
```
- To get `flake8` and `tox`, just `pip install` them into your virtualenv.
+ Run tests across all supported Python versions:
+ ```
+ make test-all
+ ```
7. Commit your changes and push your branch to GitHub:
@@ -157,8 +131,7 @@ Before you submit a pull request, check that it meets these guidelines:
2. If the pull request adds functionality, the docs should be updated. Put
your new functionality into a function with a docstring, and add the
feature to the list in `README.md`.
-3. The pull request should work for Python 3.5+. Check
- [https://travis-ci.org/github/learningequality/ricecooker/pull_requests](https://travis-ci.org/github/learningequality/ricecooker/pull_requests)
+3. The pull request should work for Python 3.9+. Check the GitHub Actions CI
and make sure that the tests pass for all supported Python versions.
diff --git a/ISSUE.md b/ISSUE.md
new file mode 100644
index 00000000..52e4cfdf
--- /dev/null
+++ b/ISSUE.md
@@ -0,0 +1,60 @@
+---
+issue: 662
+target_branch: main
+repo: learningequality/ricecooker
+updated_at: "2026-03-31T21:48:28Z"
+---
+
+
+
+
+
+❌ **This issue is not open for contribution. Visit Contributing guidelines** to learn about the contributing process and how to find suitable issues.
+
+
+
+
+
+## Overview
+
+Migrate from pip/tox/setup.py to uv, and replace flake8/black/reorder-python-imports with ruff, following the same approach as learningequality/kolibri#14457.
+
+**Complexity:** Low
+**Target branch:** main
+
+### Context
+
+Kolibri has migrated to uv for Python version management, virtual environments, dependency resolution, and CI. The same migration should be applied across the ecosystem for consistency. Ricecooker already has yamlfmt and actionlint in its pre-commit config.
+
+### The Change
+
+- Convert `setup.py`/`setup.cfg` to `pyproject.toml` with PEP 735 dependency groups
+- Replace custom versioning with setuptools-scm
+- Replace tox with direct uv invocation in CI
+- Replace flake8/black/reorder-python-imports with ruff
+- Replace pre-commit with prek
+- Update CI workflows to use `astral-sh/setup-uv` with `enable-cache: true` and `cache-python: true` (this may need to be turned off for Windows workflows - can check on CI)
+- Configure `exclude-newer = "7 days"` with `exclude-newer-package` exemption for `le-utils` in `[tool.uv]` for supply chain safety
+- Add `uv-lock` pre-commit hook (from `astral-sh/uv-pre-commit`) to keep `uv.lock` in sync
+- Update developer documentation
+
+### Acceptance Criteria
+
+- [ ] `pyproject.toml` replaces `setup.py`/`setup.cfg` as the single source of project metadata and configuration
+- [ ] Versioning handled by setuptools-scm
+- [ ] `tox.ini` removed; CI uses uv directly
+- [ ] Linting and formatting handled by ruff (replacing black, flake8, reorder-python-imports)
+- [ ] Pre-commit uses prek
+- [ ] CI workflows use `astral-sh/setup-uv@v7`
+- [ ] `exclude-newer` cooldown configured in pyproject.toml
+- [ ] `uv-lock` pre-commit hook added to `.pre-commit-config.yaml`
+- [ ] All existing tests pass
+- [ ] Developer documentation updated
+
+### References
+
+- https://github.com/learningequality/kolibri/pull/14457 — Kolibri uv migration (reference implementation)
+
+## AI usage
+
+This issue was drafted by Claude Code based on the Kolibri uv migration work.
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 0b81597b..00000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,15 +0,0 @@
-
-include AUTHORS.rst
-include CONTRIBUTING.rst
-include HISTORY.rst
-include LICENSE
-include README.md
-
-recursive-include tests *
-recursive-exclude tests/testcontent *
-recursive-exclude * __pycache__
-recursive-exclude * *.py[co]
-
-recursive-include docs *.md *.rst conf.py Makefile make.bat *.jpg *.png *.gif
-
-recursive-exclude docs/tutorial/storage *
diff --git a/Makefile b/Makefile
index 95f75441..e95b008f 100644
--- a/Makefile
+++ b/Makefile
@@ -43,35 +43,35 @@ clean-pyc: ## remove Python file artifacts
find . -name '__pycache__' -exec rm -fr {} +
clean-test: ## remove test and coverage artifacts
- rm -fr .tox/
rm -f .coverage
rm -fr htmlcov/
rm -rf tests/testcontent/downloaded/*
rm -rf tests/testcontent/generated/*
-lint:
- pre-commit run --all-files
+lint: ## run linting with prek
+ uvx prek run --all-files
test: clean-test ## run tests quickly with the default Python
- pytest
+ uv run --group test pytest
-
-test-all: clean-test ## run tests on every Python version with tox
- tox
+test-all: clean-test ## run tests on every Python version
+ for py in 3.9 3.10 3.11 3.12 3.13; do \
+ echo "Testing Python $$py"; \
+ uv run --python $$py --group test pytest || exit 1; \
+ done
integration-test:
echo "Testing against hotfixes"
- CONTENTWORKSHOP_URL=https://hotfixes.studio.learningequality.org python tests/test_chef_integration.py
+ CONTENTWORKSHOP_URL=https://hotfixes.studio.learningequality.org uv run python tests/test_chef_integration.py
echo "Testing against unstable"
- CONTENTWORKSHOP_URL=https://unstable.studio.learningequality.org python tests/test_chef_integration.py
+ CONTENTWORKSHOP_URL=https://unstable.studio.learningequality.org uv run python tests/test_chef_integration.py
echo "Testing against production"
- CONTENTWORKSHOP_URL=https://studio.learningequality.org python tests/test_chef_integration.py
+ CONTENTWORKSHOP_URL=https://studio.learningequality.org uv run python tests/test_chef_integration.py
coverage: ## check code coverage quickly with the default Python
- pip install coverage pytest
- coverage run --source ricecooker -m pytest
- coverage report -m
- coverage html
+ uv run --group test --with coverage coverage run --source ricecooker -m pytest
+ uv run --with coverage coverage report -m
+ uv run --with coverage coverage html
$(BROWSER) htmlcov/index.html
docsclean:
@@ -79,26 +79,21 @@ docsclean:
rm -f docs/_build/*
docs: ## generate Sphinx HTML documentation
- pip install -r docs/requirements.txt
- $(MAKE) -C docs clean
- $(MAKE) -C docs html
- # $(BROWSER) docs/build/html/index.html
+ uv run --with-requirements docs/requirements.txt $(MAKE) -C docs clean
+ uv run --with-requirements docs/requirements.txt $(MAKE) -C docs html
latexdocs:
- pip install -r docs/requirements.txt
- $(MAKE) -C docs clean
- $(MAKE) -C docs latex
+ uv run --with-requirements docs/requirements.txt $(MAKE) -C docs clean
+ uv run --with-requirements docs/requirements.txt $(MAKE) -C docs latex
servedocs: docs ## compile the docs watching for changes
watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
-dist: clean
- pip install setuptools wheel
- python setup.py sdist bdist_wheel
+dist: clean ## build source and wheel distributions
+ uv build
release: dist ## package and upload a release
- pip install twine
- twine upload dist/*
+ uv run --with twine twine upload dist/*
install: clean ## install the package to the active Python's site-packages
- python setup.py install
+ uv sync
diff --git a/PLAN/conventions.md b/PLAN/conventions.md
new file mode 100644
index 00000000..d31eeedd
--- /dev/null
+++ b/PLAN/conventions.md
@@ -0,0 +1,56 @@
+# Ricecooker Development Conventions
+
+## Pre-Migration State (current)
+
+- **Tooling**: setuptools with `setup.py` + `setup.cfg`
+- **Package manager**: pip (no uv)
+- **Python**: 3.9, 3.10, 3.11, 3.12, 3.13
+- **Version management**: bumpversion (setup.cfg `current_version = 0.1.0`), `__version__ = "0.8.0"` in `ricecooker/__init__.py`
+- **Entry points**: `corrections` console script
+- **Extras**: `[test]`, `[dev]` (just pre-commit), `[google_drive]`, `[sentry]`
+- **Lint**: black 21.12b0, flake8 (max-line-length=160, max-complexity=10), reorder-python-imports v2.6.0
+- **Pre-commit hooks**: black, flake8, pre-commit-hooks, reorder-python-imports, yamlfmt v0.14.0, actionlint v1.7.7
+- **Test runner**: pytest 8.4.2, tox for multi-version; VCR cassettes in `tests/cassettes/`
+- **CI**: tox in pythontest.yml, pre-commit/action in pre-commit.yml, `make dist` in python-publish.yml
+- **Docs**: Sphinx with RTD, `pip install -e .` in `.readthedocs.yml`
+- **Install for dev**: `pip install -e .[dev]`
+
+## Post-Migration Target
+
+- **Tooling**: setuptools with `pyproject.toml` (PEP 621 + PEP 735 dependency groups)
+- **Package manager**: uv
+- **Version management**: setuptools-scm (dynamic version from git tags)
+- **Dependency groups**: `test` (pytest, vcrpy, etc.), `dev` (includes test + ruff)
+- **Optional deps**: `google_drive`, `sentry` (user-facing, stay as `[project.optional-dependencies]`)
+- **Lint/format**: ruff (replaces black, flake8, reorder-python-imports)
+- **Pre-commit**: prek (run via `uvx prek`), hooks: ruff, ruff-format, pre-commit-hooks, yamlfmt, actionlint, uv-lock
+- **CI**: `astral-sh/setup-uv@v7` with `enable-cache: true` + `cache-python: true`, `uv run --group test pytest`
+- **Supply chain**: `exclude-newer = "7 days"` with `exclude-newer-package = ["le-utils"]`
+- **Docs**: RTD uses `pip install uv && uv pip install -e . --system`
+- **Install for dev**: `uv sync --group dev`
+
+## Code Style (unchanged)
+
+- **Line length**: 160 chars
+- **Indentation**: 4 spaces (`.editorconfig`)
+- **EOL**: LF, UTF-8, final newline yes, trim trailing whitespace
+- **Makefile**: uses tabs (per `.editorconfig`)
+- **Imports**: managed by ruff isort (force-single-line, known-first-party = ricecooker)
+- **Naming**: PascalCase classes, snake_case functions/methods, UPPER_SNAKE_CASE constants
+
+## Project Structure (unchanged)
+
+```
+ricecooker/ # Main package
+ __init__.py # Version via importlib.metadata + Python version check
+ chefs.py # SushiChef base class
+ commands.py # CLI implementation
+ config.py # Configuration & logging
+ exceptions.py # Custom exceptions
+ classes/ # Node, file, license, question models
+ managers/ # Progress and tree management
+ utils/ # Utility modules
+tests/ # pytest tests + cassettes/
+docs/ # Sphinx docs (RTD, Python 3.11, Ubuntu 22.04)
+examples/ # Sample implementations
+```
diff --git a/PLAN/index.md b/PLAN/index.md
new file mode 100644
index 00000000..68cdb5a4
--- /dev/null
+++ b/PLAN/index.md
@@ -0,0 +1,1044 @@
+# Migrate to uv + ruff + pyproject.toml — Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Migrate ricecooker from pip/tox/setup.py/black/flake8 to uv/ruff/pyproject.toml, matching the Kolibri ecosystem approach (learningequality/kolibri#14457).
+
+**Architecture:** Replace `setup.py` + `setup.cfg` with a single `pyproject.toml` using setuptools backend with setuptools-scm for versioning and PEP 735 dependency groups. Replace tox with direct `uv run pytest` in CI. Replace black/flake8/reorder-python-imports with ruff. Replace pre-commit with prek. Update all three CI workflows accordingly.
+
+**Tech Stack:** uv, ruff, setuptools-scm, prek, astral-sh/setup-uv@v7, PEP 735 dependency groups
+
+---
+
+## File Map
+
+| Action | File | Responsibility |
+|--------|------|---------------|
+| Create | `pyproject.toml` | Single source of project metadata, dependencies, build config, ruff config, pytest config |
+| Delete | `setup.py` | Replaced by pyproject.toml |
+| Delete | `setup.cfg` | Replaced by pyproject.toml (flake8 config → ruff, bumpversion → setuptools-scm) |
+| Delete | `tox.ini` | Replaced by direct uv invocation in CI |
+| Delete | `pytest.ini` | Merged into pyproject.toml `[tool.pytest.ini_options]` |
+| Delete | `MANIFEST.in` | Not needed with modern setuptools + pyproject.toml |
+| Modify | `ricecooker/__init__.py` | Remove hardcoded `__version__`, use importlib.metadata |
+| Modify | `docs/conf.py` | Update version import to use importlib.metadata |
+| Modify | `.pre-commit-config.yaml` | Replace black/flake8/reorder-python-imports with ruff + add uv-lock hook, use prek |
+| Modify | `.github/workflows/pythontest.yml` | Replace setup-python/tox with setup-uv/uv run |
+| Modify | `.github/workflows/pre-commit.yml` | Replace pre-commit/action with prek |
+| Modify | `.github/workflows/python-publish.yml` | Use uv for build |
+| Modify | `.github/dependabot.yml` | Change pip ecosystem to uv |
+| Modify | `.readthedocs.yml` | Use uv for install |
+| Modify | `Makefile` | Update targets to use uv and ruff |
+| Modify | `CONTRIBUTING.md` | Update developer setup instructions for uv |
+| Modify | `.gitignore` | Add uv-specific entries, remove tox entries |
+
+## Task Dependencies
+
+Tasks **1 → 2 → 3 → 4** are strictly sequential — each depends on the previous. Task 5 depends on Task 4 (needs `uv sync --group dev` to have run). Tasks 6–13 are independent of each other but all depend on Tasks 1–4 being complete. Task 14 must run last.
+
+```
+1 → 2 → 3 → 4 → 5 ──→ 14
+ ├→ 6 ─┤
+ ├→ 7 ─┤
+ ├→ 8 ─┤
+ ├→ 9 ─┤
+ ├→ 10 ┤
+ ├→ 11 ┤
+ ├→ 12 ┤
+ └→ 13 ┘
+```
+
+---
+
+### Task 1: Create pyproject.toml with project metadata and dependencies
+
+This is the foundational task. All subsequent tasks depend on the package being defined here.
+
+**Files:**
+- Create: `pyproject.toml`
+- Read (reference only): `setup.py`, `setup.cfg`, `pytest.ini`
+
+- [x] **Step 1: Create pyproject.toml with build system, metadata, and dependencies**
+
+```toml
+[build-system]
+requires = ["setuptools>=75.0", "setuptools-scm>=8"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "ricecooker"
+description = "API for adding content to the Kolibri content curation server"
+authors = [
+ {name = "Learning Equality", email = "dev@learningequality.org"},
+]
+license = "MIT"
+readme = "README.md"
+keywords = ["ricecooker"]
+requires-python = ">=3.9, <3.14"
+dynamic = ["version"]
+classifiers = [
+ "Intended Audience :: Developers",
+ "Development Status :: 5 - Production/Stable",
+ "License :: OSI Approved :: MIT License",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
+ "Natural Language :: English",
+ "Topic :: Education",
+]
+dependencies = [
+ "requests>=2.11.1",
+ "le_utils>=0.2.10",
+ "requests_file",
+ "beautifulsoup4>=4.6.3,<4.9.0",
+ "selenium==4.36.0",
+ "yt-dlp>=2024.12.23",
+ "html5lib",
+ "cachecontrol==0.14.3",
+ "filelock==3.19.1",
+ "css-html-js-minify==2.5.5",
+ "pypdf2==1.26.0",
+ "dictdiffer>=0.8.0",
+ "Pillow==11.3.0",
+ "colorlog>=4.1.0,<6.11",
+ "chardet==5.2.0",
+ "ffmpy>=0.2.2",
+ "pdf2image==1.17.0",
+ "le-pycaption>=2.2.0a1",
+ "EbookLib>=0.17.1",
+ "filetype>=1.1.0",
+ "urllib3==2.6.3",
+ "langcodes[data]==3.5.1",
+]
+
+[project.optional-dependencies]
+google_drive = ["google-api-python-client", "google-auth"]
+sentry = ["sentry-sdk>=2.32.0"]
+
+[project.scripts]
+corrections = "ricecooker.utils.corrections:correctionsmain"
+
+[project.urls]
+Homepage = "https://github.com/learningequality/ricecooker"
+
+[dependency-groups]
+test = [
+ "requests-cache==1.2.1",
+ "pytest==8.4.2",
+ "pytest-env==1.1.5",
+ "vcrpy==7.0.0; python_version >='3.10'",
+ "mock==5.2.0",
+]
+dev = [
+ {include-group = "test"},
+ "ruff>=0.11",
+]
+
+[tool.setuptools-scm]
+
+[tool.setuptools.packages.find]
+include = ["ricecooker*"]
+
+[tool.uv]
+exclude-newer = "7 days"
+exclude-newer-package = ["le-utils"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+norecursedirs = ["docs", "examples", "resources"]
+env = [
+ "RICECOOKER_STORAGE=./.pytest_storage",
+ "RICECOOKER_FILECACHE=./.pytest_filecache",
+]
+
+[tool.ruff]
+line-length = 160
+exclude = ["docs", "examples"]
+
+[tool.ruff.lint]
+select = ["E", "F", "W", "C90"]
+ignore = ["E226", "E203", "E741"]
+# W503 does not exist in ruff (it uses Wxxx for whitespace)
+# E41 (ambiguous) is not a ruff code; individual E4xx codes can be ignored if needed
+
+[tool.ruff.lint.mccabe]
+max-complexity = 10
+
+[tool.ruff.lint.isort]
+known-first-party = ["ricecooker"]
+force-single-line = true
+```
+
+Note: The current `setup.py` concatenates `README.md` + `docs/history.rst` for PyPI long_description. The `pyproject.toml` `readme` field only supports a single file. This is an intentional simplification — the changelog in `docs/history.rst` is stale (last entry 2020) and not useful on PyPI.
+
+**⚠ Verify at implementation time:** Run `uv help` or check uv docs to confirm that `exclude-newer = "7 days"` (relative duration) and `exclude-newer-package` are supported in `[tool.uv]`. If `exclude-newer` only accepts RFC 3339 dates, use today's date (e.g., `"2026-04-01"`) and add a comment explaining to update it periodically. If `exclude-newer-package` is not supported, remove it and add a comment noting that le-utils is not exempt.
+
+- [x] **Step 2: Verify pyproject.toml parses correctly**
+
+Run: `python -c "import tomllib; tomllib.load(open('pyproject.toml', 'rb')); print('OK')"`
+Expected: `OK`
+
+- [x] **Step 3: Commit**
+
+```bash
+git add pyproject.toml
+git commit -m "feat: add pyproject.toml with project metadata, dependencies, and tool config"
+```
+
+---
+
+### Task 2: Update version management to setuptools-scm
+
+Replace the hardcoded `__version__` in `ricecooker/__init__.py` with dynamic version from `importlib.metadata`, and update `docs/conf.py` to match.
+
+**Files:**
+- Modify: `ricecooker/__init__.py`
+- Modify: `docs/conf.py:24`
+
+- [x] **Step 1: Update ricecooker/__init__.py**
+
+Replace the entire contents of `ricecooker/__init__.py` with:
+
+```python
+# -*- coding: utf-8 -*-
+
+__author__ = "Learning Equality"
+__email__ = "info@learningequality.org"
+
+from importlib.metadata import PackageNotFoundError
+from importlib.metadata import version
+
+try:
+ __version__ = version("ricecooker")
+except PackageNotFoundError:
+ __version__ = "0.0.0.dev0"
+
+
+import sys
+
+if sys.version_info < (3, 9, 0):
+ raise RuntimeError("Ricecooker only supports Python 3.9+")
+```
+
+- [x] **Step 2: Update docs/conf.py version import**
+
+In `docs/conf.py`, replace line 24:
+```python
+from ricecooker import __version__ as current_ricecooker_version
+```
+with:
+```python
+from importlib.metadata import PackageNotFoundError
+from importlib.metadata import version as get_version
+
+try:
+ current_ricecooker_version = get_version("ricecooker")
+except PackageNotFoundError:
+ current_ricecooker_version = "0.0.0.dev0"
+```
+
+Also remove the `from ricecooker import __version__ as current_ricecooker_version` import line.
+
+Note: `docs/conf.py` uses `current_ricecooker_version` in later lines for `version` and `release` assignments — those remain unchanged.
+
+- [x] **Step 3: Commit**
+
+```bash
+git add ricecooker/__init__.py docs/conf.py
+git commit -m "feat: switch to setuptools-scm for version management"
+```
+
+---
+
+### Task 3: Delete legacy build/config files
+
+Remove `setup.py`, `setup.cfg`, `tox.ini`, `pytest.ini`, and `MANIFEST.in` — all replaced by `pyproject.toml`.
+
+**Files:**
+- Delete: `setup.py`
+- Delete: `setup.cfg`
+- Delete: `tox.ini`
+- Delete: `pytest.ini`
+- Delete: `MANIFEST.in`
+
+- [x] **Step 1: Delete legacy files**
+
+```bash
+git rm setup.py setup.cfg tox.ini pytest.ini MANIFEST.in
+```
+
+- [x] **Step 2: Verify the package metadata resolves from pyproject.toml alone**
+
+Run: `python -c "import tomllib; data = tomllib.load(open('pyproject.toml', 'rb')); assert data['project']['name'] == 'ricecooker'; assert 'version' in data['project']['dynamic']; print('OK')"`
+Expected: `OK`
+
+- [x] **Step 3: Verify uv can resolve the project**
+
+Run: `uv lock --check 2>&1 || uv lock 2>&1 | tail -5`
+Expected: uv can resolve the project with only `pyproject.toml` (no `setup.py`). If this fails, it will be caught and fixed here rather than in Task 4.
+
+- [x] **Step 4: Commit**
+
+```bash
+git commit -m "chore: remove setup.py, setup.cfg, tox.ini, pytest.ini, MANIFEST.in"
+```
+
+---
+
+### Task 4: Generate uv.lock
+
+Initialize the uv lockfile so all dependency resolution is pinned.
+
+**Files:**
+- Create: `uv.lock` (generated)
+
+- [x] **Step 1: Run uv lock**
+
+Run: `uv lock`
+Expected: Creates `uv.lock` without errors.
+
+**Troubleshooting `exclude-newer`:**
+- If `uv lock` fails with an error about the `exclude-newer` or `exclude-newer-package` config keys, check the installed uv version's docs. The `exclude-newer` field may only accept RFC 3339 dates (e.g., `"2026-04-01"`), not relative durations like `"7 days"`. Similarly, `exclude-newer-package` may not exist — if so, remove it and add a comment.
+- If `exclude-newer` causes resolution failures because packages were published outside the window, temporarily remove the `exclude-newer` line, run `uv lock`, then add it back. The constraint only affects future resolutions — existing locked versions are preserved.
+
+- [x] **Step 2: Verify uv sync works**
+
+Run: `uv sync --group dev`
+Expected: Installs all dependencies including dev group without errors.
+
+- [x] **Step 3: Verify tests can run**
+
+Run: `uv run pytest tests/ -x --timeout=60 -q 2>&1 | head -30`
+Expected: Tests start running (some may fail due to missing system deps like ffmpeg, but pytest itself should start).
+
+- [x] **Step 4: Commit**
+
+```bash
+git add uv.lock
+git commit -m "chore: add uv.lock"
+```
+
+---
+
+### Task 5: Replace pre-commit config with ruff + prek
+
+Replace black/flake8/reorder-python-imports hooks with ruff, add uv-lock hook, and configure for use with prek instead of pre-commit.
+
+**Files:**
+- Modify: `.pre-commit-config.yaml`
+
+- [x] **Step 1: Replace .pre-commit-config.yaml contents**
+
+Replace the entire `.pre-commit-config.yaml` with:
+
+```yaml
+repos:
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.11.6
+ hooks:
+ - id: ruff
+ args: [--fix]
+ - id: ruff-format
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.1.0
+ hooks:
+ - id: trailing-whitespace
+ - id: check-yaml
+ - id: check-added-large-files
+ exclude: '^tests/cassettes'
+ - id: debug-statements
+ - id: end-of-file-fixer
+ exclude: '^.+?\.json$'
+ - repo: https://github.com/google/yamlfmt
+ rev: v0.14.0
+ hooks:
+ - id: yamlfmt
+ exclude: '^tests/cassettes'
+ - repo: https://github.com/rhysd/actionlint
+ rev: v1.7.7
+ hooks:
+ - id: actionlint
+ # Expects shellcheck to be installed on the system
+ # https://github.com/koalaman/shellcheck#installing
+ - repo: https://github.com/astral-sh/uv-pre-commit
+ rev: 0.7.2
+ hooks:
+ - id: uv-lock
+```
+
+Note: The exact `rev` for ruff-pre-commit and uv-pre-commit should be checked at implementation time. Use the latest stable versions. The versions above are approximate — check https://github.com/astral-sh/ruff-pre-commit/releases and https://github.com/astral-sh/uv-pre-commit/releases for the current latest.
+
+- [x] **Step 2: Verify prek can run the hooks**
+
+Run: `uvx prek run --all-files 2>&1 | tail -20`
+Expected: All hooks run. Ruff may report formatting/lint fixes on first run — that's expected and will be addressed next.
+
+- [x] **Step 3: Apply ruff formatting fixes across the codebase**
+
+Run: `uv run --group dev ruff check --fix . && uv run --group dev ruff format .`
+Expected: Ruff applies auto-fixes for import sorting and formatting.
+
+- [x] **Step 4: Run prek again to verify all hooks pass**
+
+Run: `uvx prek run --all-files 2>&1 | tail -20`
+Expected: All hooks pass.
+
+- [x] **Step 5: Run tests to verify ruff changes didn't break anything**
+
+Run: `uv run pytest tests/ -x -q 2>&1 | tail -20`
+Expected: Tests pass (same results as before ruff changes).
+
+- [x] **Step 6: Commit**
+
+```bash
+git add .pre-commit-config.yaml
+git commit -m "feat: replace black/flake8/reorder-python-imports with ruff, add uv-lock hook"
+```
+
+- [x] **Step 7: Commit ruff formatting changes separately**
+
+```bash
+git add -u
+git commit -m "style: apply ruff formatting and import sorting across codebase"
+```
+
+---
+
+### Task 6: Update CI workflow — pythontest.yml
+
+Replace setup-python + tox with setup-uv + uv run pytest.
+
+**Files:**
+- Modify: `.github/workflows/pythontest.yml`
+
+- [x] **Step 1: Replace pythontest.yml contents**
+
+Replace the entire `.github/workflows/pythontest.yml` with:
+
+```yaml
+name: Python tests
+on:
+ push:
+ branches:
+ - develop
+ - main
+ pull_request:
+ branches:
+ - develop
+ - main
+jobs:
+ pre_job:
+ name: Path match check
+ runs-on: ubuntu-latest
+ # Map a step output to a job output
+ outputs:
+ should_skip: ${{ steps.skip_check.outputs.should_skip }}
+ steps:
+ - id: skip_check
+ uses: fkirc/skip-duplicate-actions@master
+ with:
+ github_token: ${{ github.token }}
+ paths: '["**.py", "pyproject.toml", "uv.lock", ".github/workflows/pythontest.yml"]'
+ unit_test:
+ name: Python unit tests
+ needs: pre_job
+ runs-on: ${{ matrix.os }}
+ strategy:
+ max-parallel: 5
+ fail-fast: false
+ matrix:
+ os: [windows-latest, ubuntu-latest, macos-latest]
+ python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
+ steps:
+ - uses: actions/checkout@v6
+ if: ${{ needs.pre_job.outputs.should_skip != 'true' }}
+ - name: Set up uv
+ if: ${{ needs.pre_job.outputs.should_skip != 'true' }}
+ uses: astral-sh/setup-uv@v7
+ with:
+ enable-cache: true
+ cache-python: true
+ python-version: ${{ matrix.python-version }}
+ - name: Install Ubuntu dependencies
+ run: |
+ sudo apt-get -y -qq update
+ sudo apt-get install -y ffmpeg
+ sudo apt-get install -y poppler-utils
+ if: ${{ needs.pre_job.outputs.should_skip != 'true' && startsWith(matrix.os, 'ubuntu') }}
+ - name: Cache Mac dependencies
+ uses: actions/cache@v5
+ if: needs.pre_job.outputs.should_skip != 'true' && matrix.os == 'macos-latest'
+ with:
+ path: ~/Library/Caches/Homebrew
+ key: ${{ runner.os }}-brew-${{ hashFiles('.github/workflows/pythontest.yml') }}
+ - name: Unlink Homebrew Python 3.13 if not testing 3.13
+ if: needs.pre_job.outputs.should_skip != 'true' && matrix.os == 'macos-latest' && matrix.python-version != '3.13'
+ run: brew unlink python@3.13 || true
+ - name: Install Mac dependencies
+ run: |
+ # Conditionally link python@3.13 to avoid conflicts when testing Python 3.13
+ # See: https://github.com/actions/runner-images/issues/9966
+ if [[ "${{ matrix.python-version }}" != "3.13" ]]; then
+ echo "Linking Homebrew python@3.13"
+ brew link --overwrite python@3.13
+ else
+ echo "Skipping Homebrew python@3.13 linking for Python 3.13 test."
+ fi
+ brew install ffmpeg poppler
+ if: needs.pre_job.outputs.should_skip != 'true' && matrix.os == 'macos-latest'
+ - name: Windows dependencies cache
+ id: windowscache
+ if: needs.pre_job.outputs.should_skip != 'true' && matrix.os == 'windows-latest'
+ uses: actions/cache@v5
+ with:
+ path: ${{ github.workspace }}\tools
+ key: ${{ runner.os }}-${{ matrix.python-version }}-tools-${{ hashFiles('.github/workflows/pythontest.yml') }}
+ - name: Download Windows dependencies if needed
+ if: needs.pre_job.outputs.should_skip != 'true' && matrix.os == 'windows-latest'
+ shell: pwsh
+ run: |
+ # Create tools directory if it doesn't exist
+ New-Item -Path "tools" -ItemType Directory -Force -ErrorAction SilentlyContinue
+
+ # Check and download FFmpeg if needed
+ if (-not (Test-Path "$env:GITHUB_WORKSPACE\tools\ffmpeg-master-latest-win64-gpl\bin\ffmpeg.exe")) {
+ Write-Output "FFmpeg not found, downloading..."
+ curl.exe --output ffmpeg.zip -L https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip
+ 7z x ffmpeg.zip -otools -y
+ } else {
+ Write-Output "FFmpeg already exists, skipping download"
+ }
+
+ # Check and download Poppler if needed
+ if (-not (Test-Path "$env:GITHUB_WORKSPACE\tools\poppler-21.11.0\Library\bin\pdfinfo.exe")) {
+ Write-Output "Poppler not found, downloading..."
+ curl.exe --output poppler.zip -L https://github.com/oschwartz10612/poppler-windows/releases/download/v21.11.0-0/Release-21.11.0-0.zip
+ 7z x poppler.zip -otools -y
+ } else {
+ Write-Output "Poppler already exists, skipping download"
+ }
+ - name: Set paths to Windows dependencies
+ if: needs.pre_job.outputs.should_skip != 'true' && matrix.os == 'windows-latest'
+ shell: pwsh
+ run: |
+ Add-Content -Path $env:GITHUB_PATH -Value "$env:GITHUB_WORKSPACE\tools\ffmpeg-master-latest-win64-gpl\bin" -Encoding utf8
+ Add-Content -Path $env:GITHUB_PATH -Value "$env:GITHUB_WORKSPACE\tools\poppler-21.11.0\Library\bin" -Encoding utf8
+ - name: Run tests
+ if: ${{ needs.pre_job.outputs.should_skip != 'true' }}
+ run: uv run --group test --extra google_drive pytest
+```
+
+Key changes from the original:
+- `actions/setup-python` → `astral-sh/setup-uv@v7` with `enable-cache: true` and `cache-python: true`
+- Removed pip install tox, tox env cache steps
+- `tox -e py${{ matrix.python-version }}` → `uv run --group test --extra google_drive pytest`
+- Updated `paths` in skip check: `setup.py` → `pyproject.toml`, added `uv.lock`
+- **New:** Added Homebrew Python 3.13 unlinking workaround for macOS (not in original workflow — addresses [actions/runner-images#9966](https://github.com/actions/runner-images/issues/9966))
+- Note: If Windows has issues with `cache-python: true`, it can be turned off for the Windows matrix entries — the CI run will surface this.
+- Note: The other workflow files (`call-pull-request-target.yml`, `call-*.yml`, `community-contribution-labeling.yml`) do not use Python/pip directly and need no changes.
+
+- [x] **Step 2: Commit**
+
+```bash
+git add .github/workflows/pythontest.yml
+git commit -m "ci: replace tox with uv run pytest in test workflow"
+```
+
+---
+
+### Task 7: Update CI workflow — pre-commit.yml
+
+Replace pre-commit/action with prek via uv.
+
+**Files:**
+- Modify: `.github/workflows/pre-commit.yml`
+
+- [x] **Step 1: Replace pre-commit.yml contents**
+
+Replace the entire `.github/workflows/pre-commit.yml` with:
+
+```yaml
+name: Linting
+on:
+ push:
+ branches:
+ - develop
+ - main
+ pull_request:
+ branches:
+ - develop
+ - main
+jobs:
+ pre_job:
+ name: Path match check
+ runs-on: ubuntu-latest
+ # Map a step output to a job output
+ outputs:
+ should_skip: ${{ steps.skip_check.outputs.should_skip }}
+ steps:
+ - id: skip_check
+ uses: fkirc/skip-duplicate-actions@master
+ with:
+ github_token: ${{ github.token }}
+ paths_ignore: '["**.po", "**.json"]'
+ linting:
+ name: All file linting
+ needs: pre_job
+ if: ${{ needs.pre_job.outputs.should_skip != 'true' }}
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v6
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v7
+ with:
+ enable-cache: true
+ cache-python: true
+ - name: Run prek
+ run: uvx prek run --all-files
+```
+
+Key changes:
+- Removed `actions/setup-python` (uv manages Python)
+- Replaced `pre-commit/action@v3.0.1` with `uvx prek run --all-files`
+
+- [x] **Step 2: Commit**
+
+```bash
+git add .github/workflows/pre-commit.yml
+git commit -m "ci: replace pre-commit/action with prek in linting workflow"
+```
+
+---
+
+### Task 8: Update CI workflow — python-publish.yml
+
+Replace pip/setuptools build with uv build.
+
+**Files:**
+- Modify: `.github/workflows/python-publish.yml`
+
+- [x] **Step 1: Replace python-publish.yml contents**
+
+Replace the entire `.github/workflows/python-publish.yml` with:
+
+```yaml
+# This workflow will upload a Python Package using pypa/gh-action-pypi-publish when a release is created
+
+name: Upload Python Package
+on:
+ release:
+ types: [published]
+jobs:
+ deploy:
+ runs-on: ubuntu-latest
+ permissions:
+ # IMPORTANT: this permission is mandatory for trusted publishing
+ id-token: write
+ steps:
+ - uses: actions/checkout@v6
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v7
+ with:
+ enable-cache: true
+ cache-python: true
+ - name: Build distribution
+ run: uv build
+ - name: Publish package distributions to PyPI
+ uses: pypa/gh-action-pypi-publish@release/v1
+```
+
+Key changes:
+- Removed `actions/setup-python`, pip install setuptools/wheel/pre-commit
+- Replaced `make dist` with `uv build`
+
+- [x] **Step 2: Commit**
+
+```bash
+git add .github/workflows/python-publish.yml
+git commit -m "ci: use uv build in publish workflow"
+```
+
+---
+
+### Task 9: Update dependabot.yml
+
+Change the pip ecosystem to uv so dependabot tracks `uv.lock`.
+
+**Files:**
+- Modify: `.github/dependabot.yml`
+
+- [x] **Step 1: Update dependabot.yml**
+
+Replace the pip ecosystem section. Change:
+```yaml
+ - package-ecosystem: "pip"
+```
+to:
+```yaml
+ - package-ecosystem: "uv"
+```
+
+The rest of the pip section (directory, schedule, cooldown) stays the same.
+
+- [x] **Step 2: Commit**
+
+```bash
+git add .github/dependabot.yml
+git commit -m "ci: switch dependabot from pip to uv ecosystem"
+```
+
+---
+
+### Task 10: Update Makefile
+
+Update targets to use uv and ruff instead of pip, tox, and pre-commit.
+
+**Files:**
+- Modify: `Makefile`
+
+- [x] **Step 1: Update Makefile**
+
+Replace the entire `Makefile` with:
+
+```makefile
+.PHONY: clean clean-test clean-pyc clean-build docs help
+.DEFAULT_GOAL := help
+define BROWSER_PYSCRIPT
+import os, webbrowser, sys
+try:
+ from urllib import pathname2url
+except:
+ from urllib.request import pathname2url
+
+webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
+endef
+export BROWSER_PYSCRIPT
+
+define PRINT_HELP_PYSCRIPT
+import re, sys
+
+for line in sys.stdin:
+ match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
+ if match:
+ target, help = match.groups()
+ print("%-20s %s" % (target, help))
+endef
+export PRINT_HELP_PYSCRIPT
+BROWSER := python -c "$$BROWSER_PYSCRIPT"
+
+help:
+ @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
+
+clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
+
+
+clean-build: ## remove build artifacts
+ rm -fr build/
+ rm -fr dist/
+ rm -fr .eggs/
+ find . -name '*.egg-info' -exec rm -fr {} +
+ find . -name '*.egg' -exec rm -f {} +
+
+clean-pyc: ## remove Python file artifacts
+ find . -name '*.pyc' -exec rm -f {} +
+ find . -name '*.pyo' -exec rm -f {} +
+ find . -name '*~' -exec rm -f {} +
+ find . -name '__pycache__' -exec rm -fr {} +
+
+clean-test: ## remove test and coverage artifacts
+ rm -f .coverage
+ rm -fr htmlcov/
+ rm -rf tests/testcontent/downloaded/*
+ rm -rf tests/testcontent/generated/*
+
+lint: ## run linting with prek
+ uvx prek run --all-files
+
+test: clean-test ## run tests quickly with the default Python
+ uv run --group test pytest
+
+test-all: clean-test ## run tests on every Python version
+ for py in 3.9 3.10 3.11 3.12 3.13; do \
+ echo "Testing Python $$py"; \
+ uv run --python $$py --group test pytest || exit 1; \
+ done
+
+integration-test:
+ echo "Testing against hotfixes"
+ CONTENTWORKSHOP_URL=https://hotfixes.studio.learningequality.org uv run python tests/test_chef_integration.py
+ echo "Testing against unstable"
+ CONTENTWORKSHOP_URL=https://unstable.studio.learningequality.org uv run python tests/test_chef_integration.py
+ echo "Testing against production"
+ CONTENTWORKSHOP_URL=https://studio.learningequality.org uv run python tests/test_chef_integration.py
+
+coverage: ## check code coverage quickly with the default Python
+ uv run --group test --with coverage coverage run --source ricecooker -m pytest
+ uv run --with coverage coverage report -m
+ uv run --with coverage coverage html
+ $(BROWSER) htmlcov/index.html
+
+docsclean:
+ $(MAKE) -C docs clean
+ rm -f docs/_build/*
+
+docs: ## generate Sphinx HTML documentation
+ uv run --with-requirements docs/requirements.txt $(MAKE) -C docs clean
+ uv run --with-requirements docs/requirements.txt $(MAKE) -C docs html
+
+latexdocs:
+ uv run --with-requirements docs/requirements.txt $(MAKE) -C docs clean
+ uv run --with-requirements docs/requirements.txt $(MAKE) -C docs latex
+
+servedocs: docs ## compile the docs watching for changes
+ watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
+
+dist: clean ## build source and wheel distributions
+ uv build
+
+release: dist ## package and upload a release
+ uv run --with twine twine upload dist/*
+
+install: clean ## install the package to the active Python's site-packages
+ uv sync
+```
+
+Key changes:
+- `pre-commit run --all-files` → `uvx prek run --all-files`
+- `pytest` → `uv run --group test pytest`
+- `tox` → loop over Python versions with `uv run --python`
+- `pip install ...` → `uv run --with ...`
+- `python setup.py sdist bdist_wheel` → `uv build`
+- Removed `.tox/` from `clean-test`
+- `make install` → `uv sync`
+
+- [x] **Step 2: Commit**
+
+```bash
+git add Makefile
+git commit -m "chore: update Makefile to use uv, ruff, and prek"
+```
+
+---
+
+### Task 11: Update .readthedocs.yml
+
+Update ReadTheDocs config to use uv for installation.
+
+**Files:**
+- Modify: `.readthedocs.yml`
+
+- [x] **Step 1: Update .readthedocs.yml**
+
+Replace the entire `.readthedocs.yml` with:
+
+```yaml
+version: 2
+formats: all
+build:
+ os: ubuntu-22.04
+ tools:
+ python: "3.11"
+ jobs:
+ pre_install:
+ - pip install uv
+ - uv pip install -e . --system
+sphinx:
+ configuration: docs/conf.py
+python:
+ install:
+ - requirements: docs/requirements.txt
+```
+
+Note: ReadTheDocs doesn't natively support uv yet, so we use `pip install uv` in the pre_install step and then `uv pip install -e . --system` to install the package into RTD's managed environment (NOT `uv sync`, which would create an isolated `.venv` that RTD ignores). The `--system` flag tells uv to install into the active environment. The docs/requirements.txt is still installed separately by RTD's Python config.
+
+- [x] **Step 2: Commit**
+
+```bash
+git add .readthedocs.yml
+git commit -m "ci: update ReadTheDocs config to use uv"
+```
+
+---
+
+### Task 12: Update .gitignore
+
+Add uv-specific entries and remove obsolete tox entries.
+
+**Files:**
+- Modify: `.gitignore`
+
+- [x] **Step 1: Update .gitignore**
+
+Add to the end of `.gitignore` (before any blank trailing lines):
+
+```
+# uv
+.venv/
+```
+
+Remove the `.tox/` line from the test artifacts section (it's no longer used).
+
+Note: `uv.lock` should NOT be in `.gitignore` — it must be committed.
+
+- [x] **Step 2: Commit**
+
+```bash
+git add .gitignore
+git commit -m "chore: update .gitignore for uv, remove tox entry"
+```
+
+---
+
+### Task 13: Update CONTRIBUTING.md
+
+Update developer setup instructions to use uv.
+
+**Files:**
+- Modify: `CONTRIBUTING.md`
+
+- [x] **Step 1: Update CONTRIBUTING.md**
+
+Replace the "Becoming a ricecooker developer" section — from the heading `Becoming a ricecooker developer` through the end of step 6 (ends at `To get `flake8` and `tox`, just `pip install` them into your virtualenv.`). Keep everything before this section and step 7 onward unchanged.
+
+Replace with:
+```markdown
+Becoming a ricecooker developer
+-------------------------------
+
+Ready to contribute? In order to work on the `ricecooker` code you'll first need
+to have [Python 3.9+](https://www.python.org/downloads/) on your computer.
+
+Here are the steps for setting up `ricecooker` for local development:
+
+1. Fork the `ricecooker` repo on GitHub.
+ The result will be your very own copy repository for the ricecooker
+ codebase `https://github.com//ricecooker`.
+2. Clone your fork of the repository locally, and go into the `ricecooker` directory:
+
+ ```
+ git clone git@github.com:/ricecooker.git
+ cd ricecooker/
+ ```
+
+3. Install [uv](https://docs.astral.sh/uv/getting-started/installation/) if you don't have it already:
+
+ ```
+ curl -LsSf https://astral.sh/uv/install.sh | sh
+ ```
+
+ On Windows:
+ ```
+ powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
+ ```
+
+4. Install the `ricecooker` code and its dependencies:
+
+ ```
+ uv sync --group dev
+ ```
+
+5. Create a branch for local development:
+
+ ```
+ git checkout -b name-of-your-bugfix-or-feature
+ ```
+
+ Now you can make your changes locally.
+
+
+6. When you're done making changes, check that your changes pass linting
+ and the `ricecooker` test suite:
+
+ Run linting:
+ ```
+ uvx prek run --all-files
+ ```
+
+ Run the tests:
+ ```
+ uv run --group test pytest
+ ```
+
+ Run tests across all supported Python versions:
+ ```
+ make test-all
+ ```
+```
+
+Also update the "Pull Request Guidelines" section. Find the text:
+```
+3. The pull request should work for Python 3.5+. Check
+ https://travis-ci.org/github/learningequality/ricecooker/pull_requests
+ and make sure that the tests pass for all supported Python versions.
+```
+Replace with:
+```
+3. The pull request should work for Python 3.9+. Check the GitHub Actions CI
+ and make sure that the tests pass for all supported Python versions.
+```
+
+- [x] **Step 2: Commit**
+
+```bash
+git add CONTRIBUTING.md
+git commit -m "docs: update CONTRIBUTING.md for uv-based development workflow"
+```
+
+---
+
+### Task 14: Simplify pass — review all changes
+
+Run the /simplify skill to review all changed files for reuse, quality, and efficiency.
+
+**Files:**
+- All files modified in Tasks 1–13
+
+- [x] **Step 1: Review pyproject.toml for redundancy**
+
+Check that:
+- No duplicate dependencies between `[project.dependencies]` and `[dependency-groups]`
+- Ruff config doesn't duplicate defaults unnecessarily
+- `[tool.setuptools.packages.find]` include pattern is correct
+
+- [x] **Step 2: Verify all tests still pass**
+
+Run: `uv run --group test pytest tests/ -x -q 2>&1 | tail -20`
+Expected: Tests pass.
+
+- [x] **Step 3: Verify linting passes**
+
+Run: `uvx prek run --all-files 2>&1 | tail -20`
+Expected: All hooks pass.
+
+- [x] **Step 4: Verify package builds correctly**
+
+Run: `uv build 2>&1 | tail -10`
+Expected: sdist and wheel built successfully.
+
+- [x] **Step 5: Verify setuptools-scm version works**
+
+Run: `uv run python -c "import ricecooker; print(ricecooker.__version__)"`
+Expected: Prints a version string (may be `0.0.0.dev0` if no git tags match, or a scm-derived version if tags exist).
+
+- [x] **Step 6: Fix any issues found and commit**
+
+If any issues are found, fix them and create a commit describing the fix.
+
+---
+
+## Acceptance Criteria Traceability
+
+| Acceptance Criterion | Task |
+|---------------------|------|
+| `pyproject.toml` replaces `setup.py`/`setup.cfg` | Tasks 1, 3 |
+| Versioning handled by setuptools-scm | Task 2 |
+| `tox.ini` removed; CI uses uv directly | Tasks 3, 6 |
+| Linting/formatting handled by ruff | Tasks 1 (config), 5 (hooks + formatting) |
+| Pre-commit uses prek | Tasks 5, 7 |
+| CI workflows use `astral-sh/setup-uv@v7` | Tasks 6, 7, 8 |
+| `exclude-newer` cooldown configured | Task 1 (pyproject.toml `[tool.uv]`) |
+| `uv-lock` pre-commit hook added | Task 5 |
+| All existing tests pass | Task 14 |
+| Developer documentation updated | Task 13 |
diff --git a/REVIEWER_FEEDBACK.md b/REVIEWER_FEEDBACK.md
new file mode 100644
index 00000000..6bfaedf9
--- /dev/null
+++ b/REVIEWER_FEEDBACK.md
@@ -0,0 +1,14 @@
+---
+pr: 663
+repo: learningequality/ricecooker
+latest_review_at: "2026-04-06T22:50:33Z"
+addressed_reviews: 4064930117
+---
+
+# Reviewer Feedback
+
+### rtibbles — COMMENTED (2026-04-06T22:50:33Z) [ADDRESSED]
+
+I think this could do with being rebased onto latest main, and including uv specific updates in AGENTS.md and CLAUDE.md
+
+---
diff --git a/docs/conf.py b/docs/conf.py
index e126a7f6..ecff329a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,9 +21,9 @@
import sys
from datetime import datetime
-from ricecooker import __version__ as current_ricecooker_version
-
sys.path.insert(0, os.path.abspath(".."))
+
+from ricecooker import __version__ as current_ricecooker_version
# from recommonmark.parser import CommonMarkParser
diff --git a/docs/examples/document_conversion.ipynb b/docs/examples/document_conversion.ipynb
index 13aea321..a163a843 100644
--- a/docs/examples/document_conversion.ipynb
+++ b/docs/examples/document_conversion.ipynb
@@ -54,19 +54,20 @@
"source": [
"# helper\n",
"def save_response_content(response, filename):\n",
- " with open(filename, 'wb') as localfile:\n",
+ " with open(filename, \"wb\") as localfile:\n",
" localfile.write(response.content)\n",
"\n",
+ "\n",
"# Download a sample .docx file\n",
- "docx_url = 'https://calibre-ebook.com/downloads/demos/demo.docx'\n",
+ "docx_url = \"https://calibre-ebook.com/downloads/demos/demo.docx\"\n",
"response1 = requests.get(docx_url)\n",
- "save_response_content(response1, 'document.docx')\n",
+ "save_response_content(response1, \"document.docx\")\n",
"\n",
"# Convert it\n",
- "microwave_url = 'http://35.185.105.222:8989/unoconv/pdf'\n",
- "files = {'file': open('sample.docx', 'rb')}\n",
+ "microwave_url = \"http://35.185.105.222:8989/unoconv/pdf\"\n",
+ "files = {\"file\": open(\"sample.docx\", \"rb\")}\n",
"response = requests.post(microwave_url, files=files)\n",
- "save_response_content(response, 'document.pdf')"
+ "save_response_content(response, \"document.pdf\")"
]
},
{
@@ -98,18 +99,19 @@
"source": [
"# helper\n",
"def save_response_content(response, filename):\n",
- " with open(filename, 'wb') as localfile:\n",
+ " with open(filename, \"wb\") as localfile:\n",
" localfile.write(response.content)\n",
"\n",
+ "\n",
"# Let's GET the poster\n",
- "png_url = 'https://www.who.int/images/default-source/health-topics/coronavirus/risk-communications/general-public/protect-yourself/infographics/masks-infographic---final.tmb-1920v.png'\n",
+ "png_url = \"https://www.who.int/images/default-source/health-topics/coronavirus/risk-communications/general-public/protect-yourself/infographics/masks-infographic---final.tmb-1920v.png\"\n",
"response1 = requests.get(png_url)\n",
- "save_response_content(response1, 'infographic.png')\n",
+ "save_response_content(response1, \"infographic.png\")\n",
"\n",
"# Convert it\n",
- "files = {'file': open('infographic.png', 'rb')}\n",
+ "files = {\"file\": open(\"infographic.png\", \"rb\")}\n",
"response = requests.post(microwave_url, files=files)\n",
- "save_response_content(response, 'infographic.pdf')"
+ "save_response_content(response, \"infographic.pdf\")"
]
},
{
diff --git a/docs/examples/exercises.ipynb b/docs/examples/exercises.ipynb
index 1c064878..000f96f4 100644
--- a/docs/examples/exercises.ipynb
+++ b/docs/examples/exercises.ipynb
@@ -39,12 +39,12 @@
"\n",
"class ExercisesChef(SushiChef):\n",
" channel_info = {\n",
- " 'CHANNEL_TITLE': 'Sample Exercises',\n",
- " 'CHANNEL_SOURCE_DOMAIN': '', # where you got the content\n",
- " 'CHANNEL_SOURCE_ID': '', # channel's unique id CHANGE ME\n",
- " 'CHANNEL_LANGUAGE': 'en', # le_utils language code\n",
- " 'CHANNEL_DESCRIPTION': 'A test channel with different types of exercise questions', # (optional)\n",
- " 'CHANNEL_THUMBNAIL': None, # (optional)\n",
+ " \"CHANNEL_TITLE\": \"Sample Exercises\",\n",
+ " \"CHANNEL_SOURCE_DOMAIN\": \"\", # where you got the content\n",
+ " \"CHANNEL_SOURCE_ID\": \"\", # channel's unique id CHANGE ME\n",
+ " \"CHANNEL_LANGUAGE\": \"en\", # le_utils language code\n",
+ " \"CHANNEL_DESCRIPTION\": \"A test channel with different types of exercise questions\", # (optional)\n",
+ " \"CHANNEL_THUMBNAIL\": None, # (optional)\n",
" }\n",
"\n",
" def construct_channel(self, **kwargs):\n",
@@ -53,73 +53,76 @@
" channel.add_child(topic)\n",
"\n",
" exercise_node = ExerciseNode(\n",
- " source_id='',\n",
- " title='Basic questions',\n",
- " author='LE content team',\n",
- " description='Showcase of the simple question type supported by Ricecooker and Studio',\n",
- " language=getlang('en').code,\n",
+ " source_id=\"\",\n",
+ " title=\"Basic questions\",\n",
+ " author=\"LE content team\",\n",
+ " description=\"Showcase of the simple question type supported by Ricecooker and Studio\",\n",
+ " language=getlang(\"en\").code,\n",
" license=get_license(licenses.PUBLIC_DOMAIN),\n",
" thumbnail=None,\n",
" exercise_data={\n",
- " 'mastery_model': exercises.M_OF_N, # \\\n",
- " 'm': 2, # learners must get 2/3 questions correct to complete exercise\n",
- " 'n': 3, # /\n",
- " 'randomize': True, # show questions in random order\n",
+ " \"mastery_model\": exercises.M_OF_N, # \\\n",
+ " \"m\": 2, # learners must get 2/3 questions correct to complete exercise\n",
+ " \"n\": 3, # /\n",
+ " \"randomize\": True, # show questions in random order\n",
" },\n",
" questions=[\n",
" MultipleSelectQuestion(\n",
- " id='sampleEX_Q1',\n",
- " question = \"Which numbers the following numbers are even?\",\n",
- " correct_answers = [\"2\", \"4\",],\n",
- " all_answers = [\"1\", \"2\", \"3\", \"4\", \"5\"],\n",
- " hints=['Even numbers are divisible by 2.'],\n",
+ " id=\"sampleEX_Q1\",\n",
+ " question=\"Which numbers the following numbers are even?\",\n",
+ " correct_answers=[\n",
+ " \"2\",\n",
+ " \"4\",\n",
+ " ],\n",
+ " all_answers=[\"1\", \"2\", \"3\", \"4\", \"5\"],\n",
+ " hints=[\"Even numbers are divisible by 2.\"],\n",
" ),\n",
" SingleSelectQuestion(\n",
- " id='sampleEX_Q2',\n",
- " question = \"What is 2 times 3?\",\n",
- " correct_answer = \"6\",\n",
- " all_answers = [\"2\", \"3\", \"5\", \"6\"],\n",
- " hints=['Multiplication of $a$ by $b$ is like computing the area of a rectangle with length $a$ and width $b$.'],\n",
+ " id=\"sampleEX_Q2\",\n",
+ " question=\"What is 2 times 3?\",\n",
+ " correct_answer=\"6\",\n",
+ " all_answers=[\"2\", \"3\", \"5\", \"6\"],\n",
+ " hints=[\"Multiplication of $a$ by $b$ is like computing the area of a rectangle with length $a$ and width $b$.\"],\n",
" ),\n",
" InputQuestion(\n",
- " id='sampleEX_Q3',\n",
- " question = \"Name one of the *factors* of 10.\",\n",
- " answers = [\"1\", \"2\", \"5\", \"10\"],\n",
- " hints=['The factors of a number are the divisors of the number that leave a whole remainder.'],\n",
- " )\n",
- " ]\n",
+ " id=\"sampleEX_Q3\",\n",
+ " question=\"Name one of the *factors* of 10.\",\n",
+ " answers=[\"1\", \"2\", \"5\", \"10\"],\n",
+ " hints=[\"The factors of a number are the divisors of the number that leave a whole remainder.\"],\n",
+ " ),\n",
+ " ],\n",
" )\n",
" topic.add_child(exercise_node)\n",
"\n",
- " # LOAD JSON DATA (as string) FOR PERSEUS QUESTIONS \n",
- " RAW_PERSEUS_JSON_STR = open('../../examples/exercises/chefdata/perseus_graph_question.json', 'r').read()\n",
+ " # LOAD JSON DATA (as string) FOR PERSEUS QUESTIONS\n",
+ " RAW_PERSEUS_JSON_STR = open(\"../../examples/exercises/chefdata/perseus_graph_question.json\", \"r\").read()\n",
" # or\n",
" # import requests\n",
" # RAW_PERSEUS_JSON_STR = requests.get('https://raw.githubusercontent.com/learningequality/sample-channels/master/contentnodes/exercise/perseus_graph_question.json').text\n",
" exercise_node2 = ExerciseNode(\n",
- " source_id='',\n",
- " title='An exercise containing a perseus question',\n",
- " author='LE content team',\n",
- " description='An example exercise with a Persus question',\n",
- " language=getlang('en').code,\n",
- " license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'),\n",
- " thumbnail=None,\n",
- " exercise_data={\n",
- " 'mastery_model': exercises.M_OF_N,\n",
- " 'm': 1,\n",
- " 'n': 1,\n",
- " },\n",
- " questions=[\n",
- " PerseusQuestion(\n",
- " id='ex2bQ4',\n",
- " raw_data=RAW_PERSEUS_JSON_STR,\n",
- " source_url='https://github.com/learningequality/sample-channels/blob/master/contentnodes/exercise/perseus_graph_question.json'\n",
- " ),\n",
- " ]\n",
+ " source_id=\"\",\n",
+ " title=\"An exercise containing a perseus question\",\n",
+ " author=\"LE content team\",\n",
+ " description=\"An example exercise with a Persus question\",\n",
+ " language=getlang(\"en\").code,\n",
+ " license=get_license(licenses.CC_BY, copyright_holder=\"Copyright holder name\"),\n",
+ " thumbnail=None,\n",
+ " exercise_data={\n",
+ " \"mastery_model\": exercises.M_OF_N,\n",
+ " \"m\": 1,\n",
+ " \"n\": 1,\n",
+ " },\n",
+ " questions=[\n",
+ " PerseusQuestion(\n",
+ " id=\"ex2bQ4\",\n",
+ " raw_data=RAW_PERSEUS_JSON_STR,\n",
+ " source_url=\"https://github.com/learningequality/sample-channels/blob/master/contentnodes/exercise/perseus_graph_question.json\",\n",
+ " ),\n",
+ " ],\n",
" )\n",
" topic.add_child(exercise_node2)\n",
"\n",
- " return channel\n"
+ " return channel"
]
},
{
@@ -176,9 +179,9 @@
"source": [
"chef = ExercisesChef()\n",
"args = {\n",
- " 'command': 'dryrun', # use 'uploadchannel' for real run\n",
- " 'verbose': True,\n",
- " 'token': 'YOURTOKENHERE9139139f3a23232'\n",
+ " \"command\": \"dryrun\", # use 'uploadchannel' for real run\n",
+ " \"verbose\": True,\n",
+ " \"token\": \"YOURTOKENHERE9139139f3a23232\",\n",
"}\n",
"options = {}\n",
"\n",
diff --git a/docs/examples/languages.ipynb b/docs/examples/languages.ipynb
index 87421145..2e7fcd07 100644
--- a/docs/examples/languages.ipynb
+++ b/docs/examples/languages.ipynb
@@ -39,7 +39,7 @@
"\n",
"\n",
"# can lookup language using language code\n",
- "language_obj = languages.getlang('en')\n",
+ "language_obj = languages.getlang(\"en\")\n",
"language_obj"
]
},
@@ -61,7 +61,7 @@
],
"source": [
"# can lookup language using language name (the new le_utils version has not shipped yet)\n",
- "language_obj = languages.getlang_by_name('English')\n",
+ "language_obj = languages.getlang_by_name(\"English\")\n",
"language_obj"
]
},
@@ -103,9 +103,9 @@
"source": [
"from le_utils.constants.languages import getlang_by_native_name\n",
"\n",
- "lang_obj = getlang_by_native_name('français')\n",
+ "lang_obj = getlang_by_native_name(\"français\")\n",
"print(lang_obj)\n",
- "print(lang_obj.code)\n"
+ "print(lang_obj.code)"
]
},
{
@@ -147,18 +147,18 @@
"from le_utils.constants.languages import getlang\n",
"\n",
"\n",
- "\n",
"class MultipleLanguagesChef(SushiChef):\n",
" \"\"\"\n",
" A sushi chef that creates a channel with content in EN, FR, and SP.\n",
" \"\"\"\n",
+ "\n",
" channel_info = {\n",
- " 'CHANNEL_TITLE': 'Languages test channel',\n",
- " 'CHANNEL_SOURCE_DOMAIN': '', # where you got the content\n",
- " 'CHANNEL_SOURCE_ID': '', # channel's unique id CHANGE ME!!\n",
- " 'CHANNEL_LANGUAGE': getlang('mul').code, # set global language for channel\n",
- " 'CHANNEL_DESCRIPTION': 'This channel contains nodes in multiple languages',\n",
- " 'CHANNEL_THUMBNAIL': None, # (optional)\n",
+ " \"CHANNEL_TITLE\": \"Languages test channel\",\n",
+ " \"CHANNEL_SOURCE_DOMAIN\": \"\", # where you got the content\n",
+ " \"CHANNEL_SOURCE_ID\": \"\", # channel's unique id CHANGE ME!!\n",
+ " \"CHANNEL_LANGUAGE\": getlang(\"mul\").code, # set global language for channel\n",
+ " \"CHANNEL_DESCRIPTION\": \"This channel contains nodes in multiple languages\",\n",
+ " \"CHANNEL_THUMBNAIL\": None, # (optional)\n",
" }\n",
"\n",
" def construct_channel(self, **kwargs):\n",
@@ -169,15 +169,15 @@
" topic = TopicNode(\n",
" source_id=\"\",\n",
" title=\"New Topic in English\",\n",
- " language=getlang('en').code,\n",
+ " language=getlang(\"en\").code,\n",
" )\n",
" doc_node = DocumentNode(\n",
" source_id=\"\",\n",
- " title='Some doc in English',\n",
- " description='This is a sample document node in English',\n",
- " files=[DocumentFile(path='samplefiles/documents/doc_EN.pdf')],\n",
+ " title=\"Some doc in English\",\n",
+ " description=\"This is a sample document node in English\",\n",
+ " files=[DocumentFile(path=\"samplefiles/documents/doc_EN.pdf\")],\n",
" license=licenses.PUBLIC_DOMAIN,\n",
- " language=getlang('en').code,\n",
+ " language=getlang(\"en\").code,\n",
" )\n",
" topic.add_child(doc_node)\n",
" channel.add_child(topic)\n",
@@ -186,15 +186,15 @@
" topic = TopicNode(\n",
" source_id=\"\",\n",
" title=\"Topic in Spanish\",\n",
- " language=getlang('es-MX').code,\n",
+ " language=getlang(\"es-MX\").code,\n",
" )\n",
" doc_node = DocumentNode(\n",
" source_id=\"\",\n",
- " title='Some doc in Spanish',\n",
- " description='This is a sample document node in Spanish',\n",
- " files=[DocumentFile(path='samplefiles/documents/doc_ES.pdf')],\n",
+ " title=\"Some doc in Spanish\",\n",
+ " description=\"This is a sample document node in Spanish\",\n",
+ " files=[DocumentFile(path=\"samplefiles/documents/doc_ES.pdf\")],\n",
" license=licenses.PUBLIC_DOMAIN,\n",
- " language=getlang('es-MX').code,\n",
+ " language=getlang(\"es-MX\").code,\n",
" )\n",
" topic.add_child(doc_node)\n",
" channel.add_child(topic)\n",
@@ -203,20 +203,20 @@
" topic = TopicNode(\n",
" source_id=\"\",\n",
" title=\"Topic in French\",\n",
- " language=languages.getlang('fr').code,\n",
+ " language=languages.getlang(\"fr\").code,\n",
" )\n",
" doc_node = DocumentNode(\n",
" source_id=\"\",\n",
- " title='Some doc in French',\n",
- " description='This is a sample document node in French',\n",
- " files=[DocumentFile(path='samplefiles/documents/doc_FR.pdf')],\n",
+ " title=\"Some doc in French\",\n",
+ " description=\"This is a sample document node in French\",\n",
+ " files=[DocumentFile(path=\"samplefiles/documents/doc_FR.pdf\")],\n",
" license=licenses.PUBLIC_DOMAIN,\n",
- " language=getlang('fr').code,\n",
+ " language=getlang(\"fr\").code,\n",
" )\n",
" topic.add_child(doc_node)\n",
" channel.add_child(topic)\n",
"\n",
- " return channel\n"
+ " return channel"
]
},
{
@@ -266,9 +266,9 @@
"source": [
"mychef = MultipleLanguagesChef()\n",
"args = {\n",
- " 'command': 'dryrun', # use 'uploadchannel' for real run\n",
- " 'verbose': True,\n",
- " 'token': 'YOURTOKENHERE9139139f3a23232'\n",
+ " \"command\": \"dryrun\", # use 'uploadchannel' for real run\n",
+ " \"verbose\": True,\n",
+ " \"token\": \"YOURTOKENHERE9139139f3a23232\",\n",
"}\n",
"options = {}\n",
"mychef.run(args, options)"
@@ -315,15 +315,17 @@
"source": [
"import youtube_dl\n",
"\n",
- "ydl = youtube_dl.YoutubeDL({\n",
- " #'quiet': True,\n",
- " 'no_warnings': True,\n",
- " 'writesubtitles': True,\n",
- " 'allsubtitles': True,\n",
- "})\n",
+ "ydl = youtube_dl.YoutubeDL(\n",
+ " {\n",
+ " #'quiet': True,\n",
+ " \"no_warnings\": True,\n",
+ " \"writesubtitles\": True,\n",
+ " \"allsubtitles\": True,\n",
+ " }\n",
+ ")\n",
"\n",
"\n",
- "youtube_id = 'FN12ty5ztAs'\n",
+ "youtube_id = \"FN12ty5ztAs\"\n",
"\n",
"info = ydl.extract_info(youtube_id, download=False)\n",
"subtitle_languages = info[\"subtitles\"].keys()\n",
@@ -336,9 +338,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "\n"
- ]
+ "source": []
},
{
"cell_type": "markdown",
@@ -363,18 +363,20 @@
"\n",
"\n",
"import youtube_dl\n",
- "ydl = youtube_dl.YoutubeDL({\n",
- " 'quiet': True,\n",
- " 'no_warnings': True,\n",
- " 'writesubtitles': True,\n",
- " 'allsubtitles': True,\n",
- "})\n",
+ "\n",
+ "ydl = youtube_dl.YoutubeDL(\n",
+ " {\n",
+ " \"quiet\": True,\n",
+ " \"no_warnings\": True,\n",
+ " \"writesubtitles\": True,\n",
+ " \"allsubtitles\": True,\n",
+ " }\n",
+ ")\n",
"\n",
"\n",
"# Define the license object with necessary info\n",
"TE_LICENSE = licenses.SpecialPermissionsLicense(\n",
- " description='Permission granted by Touchable Earth to distribute through Kolibri.',\n",
- " copyright_holder='Touchable Earth Foundation (New Zealand)'\n",
+ " description=\"Permission granted by Touchable Earth to distribute through Kolibri.\", copyright_holder=\"Touchable Earth Foundation (New Zealand)\"\n",
")\n",
"\n",
"\n",
@@ -382,13 +384,14 @@
" \"\"\"\n",
" A sushi chef that creates a channel with content in EN, FR, and SP.\n",
" \"\"\"\n",
+ "\n",
" channel_info = {\n",
- " 'CHANNEL_SOURCE_DOMAIN': '', # where you got the content\n",
- " 'CHANNEL_SOURCE_ID': '', # channel's unique id CHANGE ME!!\n",
- " 'CHANNEL_TITLE': 'Youtube subtitles downloading chef',\n",
- " 'CHANNEL_LANGUAGE': 'en',\n",
- " 'CHANNEL_THUMBNAIL': 'https://edoc.coe.int/4115/postcard-47-flags.jpg',\n",
- " 'CHANNEL_DESCRIPTION': 'This is a test channel to make sure youtube subtitle languages lookup works'\n",
+ " \"CHANNEL_SOURCE_DOMAIN\": \"\", # where you got the content\n",
+ " \"CHANNEL_SOURCE_ID\": \"\", # channel's unique id CHANGE ME!!\n",
+ " \"CHANNEL_TITLE\": \"Youtube subtitles downloading chef\",\n",
+ " \"CHANNEL_LANGUAGE\": \"en\",\n",
+ " \"CHANNEL_THUMBNAIL\": \"https://edoc.coe.int/4115/postcard-47-flags.jpg\",\n",
+ " \"CHANNEL_DESCRIPTION\": \"This is a test channel to make sure youtube subtitle languages lookup works\",\n",
" }\n",
"\n",
" def construct_channel(self, **kwargs):\n",
@@ -396,15 +399,15 @@
" channel = self.get_channel(**kwargs)\n",
"\n",
" # get all subtitles available for a sample video\n",
- " youtube_id ='FN12ty5ztAs'\n",
+ " youtube_id = \"FN12ty5ztAs\"\n",
" info = ydl.extract_info(youtube_id, download=False)\n",
" subtitle_languages = info[\"subtitles\"].keys()\n",
- " print('Found subtitle_languages = ', subtitle_languages)\n",
- " \n",
+ " print(\"Found subtitle_languages = \", subtitle_languages)\n",
+ "\n",
" # create video node\n",
" video_node = VideoNode(\n",
" source_id=youtube_id,\n",
- " title='Youtube video',\n",
+ " title=\"Youtube video\",\n",
" license=TE_LICENSE,\n",
" derive_thumbnail=True,\n",
" files=[YouTubeVideoFile(youtube_id=youtube_id)],\n",
@@ -413,20 +416,13 @@
" # add subtitles in whichever languages are available.\n",
" for lang_code in subtitle_languages:\n",
" if is_youtube_subtitle_file_supported_language(lang_code):\n",
- " video_node.add_file(\n",
- " YouTubeSubtitleFile(\n",
- " youtube_id=youtube_id,\n",
- " language=lang_code\n",
- " )\n",
- " )\n",
+ " video_node.add_file(YouTubeSubtitleFile(youtube_id=youtube_id, language=lang_code))\n",
" else:\n",
- " print('Unsupported subtitle language code:', lang_code)\n",
+ " print(\"Unsupported subtitle language code:\", lang_code)\n",
"\n",
" channel.add_child(video_node)\n",
"\n",
- " return channel\n",
- "\n",
- " "
+ " return channel"
]
},
{
@@ -482,9 +478,9 @@
"source": [
"chef = YoutubeVideoWithSubtitlesSushiChef()\n",
"args = {\n",
- " 'command': 'dryrun', # use 'uploadchannel' for real run\n",
- " 'verbose': True,\n",
- " 'token': 'YOURTOKENHERE9139139f3a23232'\n",
+ " \"command\": \"dryrun\", # use 'uploadchannel' for real run\n",
+ " \"verbose\": True,\n",
+ " \"token\": \"YOURTOKENHERE9139139f3a23232\",\n",
"}\n",
"options = {}\n",
"chef.run(args, options)"
diff --git a/examples/oldexamples/large_wikipedia_chef.py b/examples/oldexamples/large_wikipedia_chef.py
index f3ffe31b..c31fb31e 100644
--- a/examples/oldexamples/large_wikipedia_chef.py
+++ b/examples/oldexamples/large_wikipedia_chef.py
@@ -73,14 +73,10 @@ def construct_channel(self, *args, **kwargs):
"""
Create ChannelNode and build topic tree.
"""
- channel = self.get_channel(
- *args, **kwargs
- ) # creates ChannelNode from data in self.channel_info
+ channel = self.get_channel(*args, **kwargs) # creates ChannelNode from data in self.channel_info
city_topic = TopicNode(source_id="List_of_largest_cities", title="Cities!")
channel.add_child(city_topic)
- add_subpages_from_wikipedia_list(
- city_topic, "https://en.wikipedia.org/wiki/List_of_largest_cities"
- )
+ add_subpages_from_wikipedia_list(city_topic, "https://en.wikipedia.org/wiki/List_of_largest_cities")
return channel
@@ -94,15 +90,10 @@ def add_subpages_from_wikipedia_list(topic, list_url):
page = get_parsed_html_from_url(list_url)
# extract the main table from the page
- table = page.find(
- lambda tag: tag.name == "table"
- and tag.has_attr("class")
- and "wikitable" in tag["class"]
- )
+ table = page.find(lambda tag: tag.name == "table" and tag.has_attr("class") and "wikitable" in tag["class"])
# loop through all the rows in the table
for row in table.find_all("tr"):
-
# extract the columns (cells, really) within the current row
columns = row.find_all("td")
@@ -128,9 +119,7 @@ def add_subpages_from_wikipedia_list(topic, list_url):
# attempt to extract a thumbnail for the subpage, from the second column in the table
image = columns[1].find("img")
thumbnail_url = make_fully_qualified_url(image["src"]) if image else None
- if thumbnail_url and not (
- thumbnail_url.endswith("jpg") or thumbnail_url.endswith("png")
- ):
+ if thumbnail_url and not (thumbnail_url.endswith("jpg") or thumbnail_url.endswith("png")):
thumbnail_url = None
# download the wikipedia page into an HTML5 app node
@@ -172,9 +161,7 @@ def process_wikipedia_page(content, baseurl, destpath, **kwargs):
page = BeautifulSoup(content, "html.parser")
for image in page.find_all("img"):
- relpath, _ = download_file(
- make_fully_qualified_url(image["src"]), destpath, request_fn=make_request
- )
+ relpath, _ = download_file(make_fully_qualified_url(image["src"]), destpath, request_fn=make_request)
image["src"] = relpath
return str(page)
diff --git a/examples/oldexamples/sample_program.py b/examples/oldexamples/sample_program.py
index 3498a71b..86cb52d7 100644
--- a/examples/oldexamples/sample_program.py
+++ b/examples/oldexamples/sample_program.py
@@ -150,11 +150,7 @@ def guess_content_kind(path=None, web_video_data=None, questions=None):
ext = os.path.splitext(path)[1][1:].lower()
if ext in content_kinds.MAPPING:
return content_kinds.MAPPING[ext]
- raise InvalidFormatException(
- "Invalid file type: Allowed formats are {0}".format(
- [key for key, value in content_kinds.MAPPING.items()]
- )
- )
+ raise InvalidFormatException("Invalid file type: Allowed formats are {0}".format([key for key, value in content_kinds.MAPPING.items()]))
elif web_video_data:
return content_kinds.VIDEO
else:
@@ -185,9 +181,7 @@ def guess_content_kind(path=None, web_video_data=None, questions=None):
"license": licenses.CC_BY_NC_SA,
"copyright_holder": "Learning Equality",
"files": [
- {
- "path": "https://ia600209.us.archive.org/27/items/RiceChef/Rice Chef.mp4"
- },
+ {"path": "https://ia600209.us.archive.org/27/items/RiceChef/Rice Chef.mp4"},
{
"encoding": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAMAAAAoLQ9TAAAABGdBTUEAALGPC/xhBQAAAAFzUkdCAK7OHOkAAAAgY0hSTQAAeiYAAICEAAD6AAAAgOgAAHUwAADqYAAAOpgAABdwnLpRPAAAAmFQTFRF////wN/2I0FiNFFuAAAAxdvsN1RxV3KMnrPFFi9PAB1CVG+KXHaQI0NjttLrEjVchIF4AyNGZXB5V087UUw/EzBMpqWeb2thbmpgpqOceXVsERgfTWeADg8QCAEApKGZBAYIop+XCQkIhZ+2T2mEg5mtnK/AobPDkKO2YXqTAAAAJkBetMraZH2VprjIz9zm4enw7/T47fP3wc7ae5GnAAAAN1BsSmSApLfI1ODq2OHp5Orv8PL09vb38fb5wM/bbISbrL/PfZSpxNPgzdnj2+Pr5evw6+/z6e3w3ePp2OPsma2/ABM5Q197ABk4jKG1yNfjytfh1uDo3eXs4unv1t/nztrjqbzMTmmEXneRES1Ji6CzxtXixdPfztrk1N/n1+Dp1d/oz9vlxdPeq73NVG+KYnyUAAAddIuhwtPhvMzaxtTgytfiy9jjwtHewtHenbDCHT1fS2eCRV52qr7PvM3cucrYv87cv8/cvMzavc3bucvacoyl////ByE8WnKKscXWv9Hguszbu8zbvc7dtcnaiJqrcHZ4f4SHEh0nEitFTWZ+hJqumrDDm7HDj6W5dI2lYGJfmZeQl5SNAAAADRciAAATHjdSOVNsPlhyLklmKCYjW1lUlpOLlZKLFSAqWXSOBQAADA0NAAAAHh0bWlhSk5CIk5CIBAYJDRQbERcdDBAUBgkMAAAEDg4NAAAAHBsZWFZQkY6GAAAAAAAABQUEHBsZAAAAGxoYVlROko+GBAQDZ2RdAAAAGhkYcW9oAgICAAAAExMSDQwLjouDjYuDioiAiIV9hoN7VlRO////Z2DcYwAAAMR0Uk5TAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACRKrJyrZlBQECaNXCsKaqypMGAUDcu7Gpn5mf03gDo8+4saiipKq3xRMBH83Eu7OsqbG61DkDMdbFvrizsbK3wNs9Ax/VysS/vLq/zNwfArDhxMfExMXE3pMCMe7byMjIzd33ZgYGQtnz6+zooeJXBQMFD1yHejZ1+l8FBgEELlOR+GgFCQ0SGxoBGFKg+m0BBwEMR6v+hAEDM6nRASWURVuYQQ4AAAABYktHRACIBR1IAAAACXBIWXMAAAjLAAAIywGEuOmJAAABCklEQVQY02NgUGZUUVVT19DUYtBmYmZhYdBh1dXTNzA0MjYxZTFjAwqwm1tYWlnb2NrZO3A4cgIFGJycXVzd3D08vbx9uHyBAn7+AYFBwSEhoWHhEdyRQIGo6JjYuPiExKTklFSeNKBAekZmVnZObk5efkEhbxFQgK+4pLSsvKKyqrqGoZZfgIVBsK6+obGpuaW1rV2oQ1hEgKFTtKu7p7evf8LEI5PEJotLMEyZyjJt+oyZsxhmzzk6V3KeFIO01vwFMrJyCxctXrL02DL55QwsClorVq5avWbtuvUbNh7fpMjAwsKyWWvLFJatStu279h5YhdIAAJ2s+zZu+/kfoQAy4HNLAcPHQYA5YtSi+k2/WkAAAAldEVYdGRhdGU6Y3JlYXRlADIwMTMtMTAtMDRUMTk6Mzk6MjEtMDQ6MDAwU1uYAAAAJXRFWHRkYXRlOm1vZGlmeQAyMDEzLTEwLTA0VDE5OjM5OjIxLTA0OjAwQQ7jJAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAAAASUVORK5CYII="
},
@@ -200,11 +194,7 @@ def guess_content_kind(path=None, web_video_data=None, questions=None):
"license": licenses.CC_BY_NC_SA,
"copyright_holder": "Learning Equality",
"mastery_model": exercises.DO_ALL,
- "files": [
- {
- "path": "http://www.publicdomainpictures.net/pictures/110000/nahled/bowl-of-rice.jpg"
- }
- ],
+ "files": [{"path": "http://www.publicdomainpictures.net/pictures/110000/nahled/bowl-of-rice.jpg"}],
"questions": [
{
"id": "eeeee",
@@ -253,11 +243,7 @@ def guess_content_kind(path=None, web_video_data=None, questions=None):
"license": licenses.CC_BY_NC_SA,
"copyright_holder": "Learning Equality",
"mastery_model": exercises.M_OF_N,
- "files": [
- {
- "path": "https://c1.staticflickr.com/5/4021/4302326650_b11f0f0aaf_b.jpg"
- }
- ],
+ "files": [{"path": "https://c1.staticflickr.com/5/4021/4302326650_b11f0f0aaf_b.jpg"}],
"questions": [
{
"id": "11111",
@@ -289,11 +275,7 @@ def guess_content_kind(path=None, web_video_data=None, questions=None):
"license": licenses.CC_BY_NC_SA,
"copyright_holder": "Learning Equality",
"mastery_model": exercises.M_OF_N,
- "files": [
- {
- "path": "https://upload.wikimedia.org/wikipedia/commons/b/b7/Rice_p1160004.jpg"
- }
- ],
+ "files": [{"path": "https://upload.wikimedia.org/wikipedia/commons/b/b7/Rice_p1160004.jpg"}],
"questions": [
{
"id": "123456",
@@ -331,9 +313,7 @@ def construct_channel(self, *args, **kwargs):
"""
Create ChannelNode and build topic tree.
"""
- channel = self.get_channel(
- *args, **kwargs
- ) # creates ChannelNode from data in self.channel_info
+ channel = self.get_channel(*args, **kwargs) # creates ChannelNode from data in self.channel_info
_build_tree(channel, SAMPLE_TREE)
raise_for_invalid_channel(channel)
@@ -346,9 +326,7 @@ def _build_tree(node, sourcetree):
"""
for child_source_node in sourcetree:
try:
- main_file = (
- child_source_node["files"][0] if "files" in child_source_node else {}
- )
+ main_file = child_source_node["files"][0] if "files" in child_source_node else {}
kind = guess_content_kind(
path=main_file.get("path"),
web_video_data=main_file.get("youtube_id") or main_file.get("web_url"),
@@ -415,10 +393,7 @@ def _build_tree(node, sourcetree):
node.add_child(child_node)
elif kind == content_kinds.EXERCISE:
- mastery_model = (
- child_source_node.get("mastery_model")
- and {"mastery_model": child_source_node["mastery_model"]}
- ) or {}
+ mastery_model = (child_source_node.get("mastery_model") and {"mastery_model": child_source_node["mastery_model"]}) or {}
child_node = nodes.ExerciseNode(
source_id=child_source_node["id"],
title=child_source_node["title"],
@@ -469,12 +444,9 @@ def _build_tree(node, sourcetree):
def add_files(node, file_list):
for f in file_list:
-
path = f.get("path")
if path is not None:
- abspath = get_abspath(
- path
- ) # NEW: expand content:// --> ./content/ in file paths
+ abspath = get_abspath(path)
else:
abspath = None
@@ -509,20 +481,10 @@ def add_files(node, file_list):
elif file_type == FileTypes.BASE64_FILE:
node.add_file(files.Base64ImageFile(encoding=f["encoding"]))
elif file_type == FileTypes.WEB_VIDEO_FILE:
- node.add_file(
- files.WebVideoFile(
- web_url=f["web_url"], high_resolution=f.get("high_resolution")
- )
- )
+ node.add_file(files.WebVideoFile(web_url=f["web_url"], high_resolution=f.get("high_resolution")))
elif file_type == FileTypes.YOUTUBE_VIDEO_FILE:
- node.add_file(
- files.YouTubeVideoFile(
- youtube_id=f["youtube_id"], high_resolution=f.get("high_resolution")
- )
- )
- node.add_file(
- files.YouTubeSubtitleFile(youtube_id=f["youtube_id"], language="en")
- )
+ node.add_file(files.YouTubeVideoFile(youtube_id=f["youtube_id"], high_resolution=f.get("high_resolution")))
+ node.add_file(files.YouTubeSubtitleFile(youtube_id=f["youtube_id"], language="en"))
else:
raise UnknownFileTypeError("Unrecognized file type '{0}'".format(f["path"]))
@@ -530,22 +492,14 @@ def add_files(node, file_list):
def create_question(raw_question):
question = parse_images(raw_question.get("question"))
hints = raw_question.get("hints")
- hints = (
- parse_images(hints)
- if isinstance(hints, str)
- else [parse_images(hint) for hint in hints or []]
- )
+ hints = parse_images(hints) if isinstance(hints, str) else [parse_images(hint) for hint in hints or []]
if raw_question["type"] == exercises.MULTIPLE_SELECTION:
return questions.MultipleSelectQuestion(
id=raw_question["id"],
question=question,
- correct_answers=[
- parse_images(answer) for answer in raw_question["correct_answers"]
- ],
- all_answers=[
- parse_images(answer) for answer in raw_question["all_answers"]
- ],
+ correct_answers=[parse_images(answer) for answer in raw_question["correct_answers"]],
+ all_answers=[parse_images(answer) for answer in raw_question["all_answers"]],
hints=hints,
)
if raw_question["type"] == exercises.SINGLE_SELECTION:
@@ -553,9 +507,7 @@ def create_question(raw_question):
id=raw_question["id"],
question=question,
correct_answer=parse_images(raw_question["correct_answer"]),
- all_answers=[
- parse_images(answer) for answer in raw_question["all_answers"]
- ],
+ all_answers=[parse_images(answer) for answer in raw_question["all_answers"]],
hints=hints,
)
if raw_question["type"] == exercises.INPUT_QUESTION:
@@ -574,9 +526,7 @@ def create_question(raw_question):
)
else:
raise UnknownQuestionTypeError(
- "Unrecognized question type '{0}': accepted types are {1}".format(
- raw_question["type"], [key for key, value in exercises.question_choices]
- )
+ "Unrecognized question type '{0}': accepted types are {1}".format(raw_question["type"], [key for key, value in exercises.question_choices])
)
diff --git a/examples/wikipedia/sushichef.py b/examples/wikipedia/sushichef.py
index e2eed934..c5d7e4f7 100755
--- a/examples/wikipedia/sushichef.py
+++ b/examples/wikipedia/sushichef.py
@@ -77,17 +77,11 @@ def construct_channel(self, *args, **kwargs):
channel = self.get_channel(**kwargs)
citrus_topic = TopicNode(source_id="List_of_citrus_fruits", title="Citrus!")
channel.add_child(citrus_topic)
- add_subpages_from_wikipedia_list(
- citrus_topic, "https://en.wikipedia.org/wiki/List_of_citrus_fruits"
- )
+ add_subpages_from_wikipedia_list(citrus_topic, "https://en.wikipedia.org/wiki/List_of_citrus_fruits")
- potato_topic = TopicNode(
- source_id="List_of_potato_cultivars", title="Potatoes!"
- )
+ potato_topic = TopicNode(source_id="List_of_potato_cultivars", title="Potatoes!")
channel.add_child(potato_topic)
- add_subpages_from_wikipedia_list(
- potato_topic, "https://en.wikipedia.org/wiki/List_of_potato_cultivars"
- )
+ add_subpages_from_wikipedia_list(potato_topic, "https://en.wikipedia.org/wiki/List_of_potato_cultivars")
return channel
@@ -106,7 +100,6 @@ def add_subpages_from_wikipedia_list(topic, list_url):
# loop through all the rows in the table
for row in table.find_all("tr"):
-
# extract the columns (cells, really) within the current row
columns = row.find_all("td")
@@ -130,9 +123,7 @@ def add_subpages_from_wikipedia_list(topic, list_url):
# attempt to extract a thumbnail for the subpage, from the second column in the table
image = columns[1].find("img")
thumbnail_url = make_fully_qualified_url(image["src"]) if image else None
- if thumbnail_url and not (
- thumbnail_url.endswith("jpg") or thumbnail_url.endswith("png")
- ):
+ if thumbnail_url and not (thumbnail_url.endswith("jpg") or thumbnail_url.endswith("png")):
thumbnail_url = None
# download the wikipedia page into an HTML5 app node
@@ -176,9 +167,7 @@ def process_wikipedia_page(content, baseurl, destpath, **kwargs):
page = BeautifulSoup(content, "html.parser")
for image in page.find_all("img"):
- relpath, _ = download_file(
- make_fully_qualified_url(image["src"]), destpath, request_fn=make_request
- )
+ relpath, _ = download_file(make_fully_qualified_url(image["src"]), destpath, request_fn=make_request)
image["src"] = relpath
return str(page)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..27bf24ae
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,114 @@
+[build-system]
+requires = ["setuptools>=75.0", "setuptools-scm>=8"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "ricecooker"
+description = "API for adding content to the Kolibri content curation server"
+authors = [
+ {name = "Learning Equality", email = "dev@learningequality.org"},
+]
+license = "MIT"
+readme = "README.md"
+keywords = ["ricecooker"]
+requires-python = ">=3.9, <3.14"
+dynamic = ["version"]
+classifiers = [
+ "Intended Audience :: Developers",
+ "Development Status :: 5 - Production/Stable",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
+ "Natural Language :: English",
+ "Topic :: Education",
+]
+dependencies = [
+ "requests>=2.11.1",
+ "le_utils>=0.2.10",
+ "requests_file",
+ "beautifulsoup4>=4.6.3,<4.9.0",
+ "selenium==4.36.0",
+ "yt-dlp>=2024.12.23",
+ "html5lib",
+ "cachecontrol==0.14.3",
+ "filelock==3.19.1",
+ "css-html-js-minify==2.5.5",
+ "pypdf2==1.26.0",
+ "dictdiffer>=0.8.0",
+ "Pillow==11.3.0",
+ "colorlog>=4.1.0,<6.11",
+ "chardet==5.2.0",
+ "ffmpy>=0.2.2",
+ "pdf2image==1.17.0",
+ "le-pycaption>=2.2.0a1",
+ "EbookLib>=0.17.1",
+ "filetype>=1.1.0",
+ "urllib3==2.6.3",
+ "langcodes[data]==3.5.1",
+]
+
+[project.optional-dependencies]
+google_drive = ["google-api-python-client", "google-auth"]
+sentry = ["sentry-sdk>=2.32.0"]
+
+[project.scripts]
+corrections = "ricecooker.utils.corrections:correctionsmain"
+
+[project.urls]
+Homepage = "https://github.com/learningequality/ricecooker"
+
+[dependency-groups]
+test = [
+ "requests-cache==1.2.1",
+ "pytest==8.4.2",
+ "pytest-env==1.1.5",
+ "vcrpy==7.0.0; python_version >='3.10'",
+ "mock==5.2.0",
+]
+dev = [
+ {include-group = "test"},
+ "ruff>=0.11",
+]
+
+[tool.setuptools-scm]
+
+[tool.setuptools.packages.find]
+include = ["ricecooker*"]
+
+[tool.uv]
+# Only resolve for CPython (vcrpy has urllib3<2 constraint on PyPy that conflicts)
+environments = ["platform_python_implementation == 'CPython'"]
+# Update this date periodically to allow new dependency versions
+exclude-newer = "2026-03-24T00:00:00Z"
+# Exempt le-utils from the exclude-newer cooldown
+exclude-newer-package = { le-utils = "2099-12-31T00:00:00Z" }
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+norecursedirs = ["docs", "examples", "resources"]
+env = [
+ "RICECOOKER_STORAGE=./.pytest_storage",
+ "RICECOOKER_FILECACHE=./.pytest_filecache",
+]
+
+[tool.ruff]
+target-version = "py39"
+line-length = 160
+exclude = ["docs", "examples"]
+
+[tool.ruff.lint]
+select = ["E", "F", "W", "I", "C90"]
+ignore = ["E226", "E203", "E741"]
+
+[tool.ruff.lint.mccabe]
+max-complexity = 10
+
+[tool.ruff.lint.per-file-ignores]
+"tests/**" = ["F811"]
+"ricecooker/utils/downloader.py" = ["C901"]
+
+[tool.ruff.lint.isort]
+known-first-party = ["ricecooker"]
+force-single-line = true
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index 0eb29657..00000000
--- a/pytest.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-[pytest]
-testpaths = tests/
-norecursedirs = docs examples resources
-env =
- # cleaned up in conftest.py fixture
- RICECOOKER_STORAGE=./.pytest_storage
- RICECOOKER_FILECACHE=./.pytest_filecache
diff --git a/ricecooker/__init__.py b/ricecooker/__init__.py
index e96ce1ac..a546862e 100644
--- a/ricecooker/__init__.py
+++ b/ricecooker/__init__.py
@@ -1,11 +1,17 @@
# -*- coding: utf-8 -*-
+import sys
+
__author__ = "Learning Equality"
__email__ = "info@learningequality.org"
-__version__ = "0.8.0"
+from importlib.metadata import PackageNotFoundError
+from importlib.metadata import version
-import sys
+try:
+ __version__ = version("ricecooker")
+except PackageNotFoundError:
+ __version__ = "0.0.0.dev0"
if sys.version_info < (3, 9, 0):
raise RuntimeError("Ricecooker only supports Python 3.9+")
diff --git a/ricecooker/chefs.py b/ricecooker/chefs.py
index 310877a7..165c197f 100644
--- a/ricecooker/chefs.py
+++ b/ricecooker/chefs.py
@@ -10,6 +10,10 @@
import requests
+from ricecooker.utils.images import convert_image
+from ricecooker.utils.pipeline import FilePipeline
+from ricecooker.utils.request_utils import DomainSpecificAuth
+
from . import config
from .classes import files
from .classes import nodes
@@ -21,20 +25,16 @@
from .utils.jsontrees import build_tree_from_json
from .utils.jsontrees import get_channel_node_from_json
from .utils.jsontrees import read_tree_from_json
-from .utils.linecook import build_ricecooker_json_tree
from .utils.linecook import FolderExistsAction
-from .utils.metadata_provider import CsvMetadataProvider
+from .utils.linecook import build_ricecooker_json_tree
from .utils.metadata_provider import DEFAULT_CHANNEL_INFO_FILENAME
from .utils.metadata_provider import DEFAULT_CONTENT_INFO_FILENAME
from .utils.metadata_provider import DEFAULT_EXERCISE_QUESTIONS_INFO_FILENAME
from .utils.metadata_provider import DEFAULT_EXERCISES_INFO_FILENAME
+from .utils.metadata_provider import CsvMetadataProvider
from .utils.tokens import get_content_curation_token
from .utils.youtube import YouTubePlaylistUtils
from .utils.youtube import YouTubeVideoUtils
-from ricecooker.utils.images import convert_image
-from ricecooker.utils.pipeline import FilePipeline
-from ricecooker.utils.request_utils import DomainSpecificAuth
-
# SUSHI CHEF BASE CLASS
################################################################################
@@ -49,9 +49,7 @@ class SushiChef(object):
CHEF_RUN_DATA = config.CHEF_DATA_DEFAULT # loaded from chefdata/chef_data.json
TREES_DATA_DIR = config.TREES_DATA_DIR # tree archives and JsonTreeChef inputs
- DOMAIN_AUTH_HEADERS = (
- {}
- ) # dict of {domain: {header: env var name}} for requests auth
+ DOMAIN_AUTH_HEADERS = {} # dict of {domain: {header: env var name}} for requests auth
channel_node_class = nodes.ChannelNode
@@ -70,15 +68,13 @@ def __init__(self, *args, **kwargs):
warning_text = "thumbnails setting is deprecated and will be replaced by thumbnails in version 0.8 please update"
config.LOGGER.warn(warning_text)
warn(warning_text, DeprecationWarning)
- self.SETTINGS["thumbnails"] == self.SETTINGS[
- "generate-missing-thumbnails"
- ]
+ self.SETTINGS["thumbnails"] = self.SETTINGS["generate-missing-thumbnails"]
if "compress-videos" in self.SETTINGS:
warning_text = "compress-videos setting is deprecated and will be replaced by compress in version 0.8 please update"
config.LOGGER.warn(warning_text)
warn(warning_text, DeprecationWarning)
- self.SETTINGS["compress"] == self.SETTINGS["compress-videos"]
+ self.SETTINGS["compress"] = self.SETTINGS["compress-videos"]
# these will be assigned to later by the argparse handling.
self.args = None
@@ -114,9 +110,7 @@ def __init__(self, *args, **kwargs):
action="store_true",
help="Force file re-download (skip .ricecookerfilecache/).",
)
- parser.add_argument(
- "--debug", action="store_true", help="Print extra debugging infomation."
- )
+ parser.add_argument("--debug", action="store_true", help="Print extra debugging infomation.")
parser.add_argument(
"-v",
"--verbose",
@@ -124,9 +118,7 @@ def __init__(self, *args, **kwargs):
default=True,
help="Verbose mode (default).",
)
- parser.add_argument(
- "--warn", action="store_true", help="Print errors and warnings."
- )
+ parser.add_argument("--warn", action="store_true", help="Print errors and warnings.")
parser.add_argument("--quiet", action="store_true", help="Print only errors.")
parser.add_argument(
"--compress",
@@ -189,8 +181,7 @@ def __init__(self, *args, **kwargs):
dest="stage_deprecated",
action="store_true",
help=(
- "(deprecated) Stage updated content for review."
- " Uploading a staging tree is now the default behavior. Use --deploy to upload to the main tree."
+ "(deprecated) Stage updated content for review. Uploading a staging tree is now the default behavior. Use --deploy to upload to the main tree."
),
)
@@ -261,14 +252,10 @@ def parse_args_and_options(self):
"DEPRECATION WARNING: --reset is now the default bevavior. The --reset flag has been deprecated and will be removed in ricecooker 1.0."
)
if args["publish"] and args["stage"]:
- raise InvalidUsageException(
- "The --publish argument must be used together with --deploy argument."
- )
+ raise InvalidUsageException("The --publish argument must be used together with --deploy argument.")
logging_args = [key for key in ["quiet", "warn", "debug"] if args[key]]
if len(logging_args) > 1:
- raise InvalidUsageException(
- "Agruments --quiet, --warn, and --debug cannot be used together."
- )
+ raise InvalidUsageException("Agruments --quiet, --warn, and --debug cannot be used together.")
if args["command"] == "uploadchannel":
# Make sure token is provided. There are four ways to specify:
@@ -287,9 +274,7 @@ def parse_args_and_options(self):
option_key, option_value = preoption.split("=")
options.update({option_key.strip(): option_value.strip()})
except IndexError:
- msg = "Invalid option '{0}': use [key]=[value] format (no whitespace)".format(
- preoption
- )
+ msg = "Invalid option '{0}': use [key]=[value] format (no whitespace)".format(preoption)
raise InvalidUsageException(msg)
self.args = args
@@ -340,22 +325,14 @@ def get_channel(self, **kwargs):
if hasattr(self, "channel_info"):
# Make sure we're not using the template id values in `channel_info`
template_domains = [""]
- using_template_domain = (
- self.channel_info["CHANNEL_SOURCE_DOMAIN"] in template_domains
- )
+ using_template_domain = self.channel_info["CHANNEL_SOURCE_DOMAIN"] in template_domains
if using_template_domain:
- config.LOGGER.error(
- "Template source domain detected. Please change CHANNEL_SOURCE_DOMAIN before running this chef."
- )
+ config.LOGGER.error("Template source domain detected. Please change CHANNEL_SOURCE_DOMAIN before running this chef.")
template_ids = ["", ""]
- using_template_source_id = (
- self.channel_info["CHANNEL_SOURCE_ID"] in template_ids
- )
+ using_template_source_id = self.channel_info["CHANNEL_SOURCE_ID"] in template_ids
if using_template_source_id:
- config.LOGGER.error(
- "Template channel source ID detected. Please change CHANNEL_SOURCE_ID before running this chef."
- )
+ config.LOGGER.error("Template channel source ID detected. Please change CHANNEL_SOURCE_ID before running this chef.")
if using_template_domain or using_template_source_id:
sys.exit(1)
@@ -373,9 +350,7 @@ def get_channel(self, **kwargs):
)
return channel
else:
- raise NotImplementedError(
- "Subclass must define get_channel method or have a channel_info (dict) attribute."
- )
+ raise NotImplementedError("Subclass must define get_channel method or have a channel_info (dict) attribute.")
def construct_channel(self, **kwargs):
"""
@@ -391,17 +366,11 @@ def load_chef_data(self):
self.CHEF_RUN_DATA = json.load(open(config.DATA_PATH))
def save_channel_tree_as_json(self, channel):
- filename = os.path.join(
- self.TREES_DATA_DIR, "{}.json".format(self.CHEF_RUN_DATA["current_run"])
- )
+ filename = os.path.join(self.TREES_DATA_DIR, "{}.json".format(self.CHEF_RUN_DATA["current_run"]))
os.makedirs(self.TREES_DATA_DIR, exist_ok=True)
json.dump(channel.get_json_tree(), open(filename, "w"), indent=2)
- self.CHEF_RUN_DATA["tree_archives"]["previous"] = self.CHEF_RUN_DATA[
- "tree_archives"
- ]["current"]
- self.CHEF_RUN_DATA["tree_archives"]["current"] = filename.replace(
- os.getcwd() + "/", ""
- )
+ self.CHEF_RUN_DATA["tree_archives"]["previous"] = self.CHEF_RUN_DATA["tree_archives"]["current"]
+ self.CHEF_RUN_DATA["tree_archives"]["current"] = filename.replace(os.getcwd() + "/", "")
self.save_chef_data()
def save_channel_metadata_as_csv(self, channel):
@@ -432,18 +401,12 @@ def load_channel_metadata_from_csv(self):
line_new_title = line["New Title"]
line_new_description = line["New Description"]
line_new_tags = line["New Tags"]
- if (
- line_new_title != ""
- or line_new_description != ""
- or line_new_tags != ""
- ):
+ if line_new_title != "" or line_new_description != "" or line_new_tags != "":
metadata_dict[line_source_id] = {}
if line_new_title != "":
metadata_dict[line_source_id]["New Title"] = line_new_title
if line_new_description != "":
- metadata_dict[line_source_id][
- "New Description"
- ] = line_new_description
+ metadata_dict[line_source_id]["New Description"] = line_new_description
if line_new_tags != "":
tags_arr = re.split(",| ,", line_new_tags)
metadata_dict[line_source_id]["New Tags"] = tags_arr
@@ -485,12 +448,7 @@ def run(self, args, options):
"""
args_copy = args.copy()
args_copy["token"] = args_copy["token"][0:6] + "..."
- config.LOGGER.info(
- "In SushiChef.run method. args="
- + str(args_copy)
- + " options="
- + str(options)
- )
+ config.LOGGER.info("In SushiChef.run method. args=" + str(args_copy) + " options=" + str(options))
run_id = datetime.now().strftime("%Y-%m-%d__%H%M")
self.CHEF_RUN_DATA["current_run"] = run_id
@@ -562,9 +520,7 @@ def pre_run(self, args, options):
"""
This function is called before `run` to create the json tree file.
"""
- raise NotImplementedError(
- "JsonTreeChef subclass must implement the `pre_run` method."
- )
+ raise NotImplementedError("JsonTreeChef subclass must implement the `pre_run` method.")
def get_json_tree_path(self, *args, **kwargs):
"""
@@ -708,9 +664,7 @@ def pre_run(self, args, options):
kwargs.update(args)
kwargs.update(options)
json_tree_path = self.get_json_tree_path(**kwargs)
- build_ricecooker_json_tree(
- args, options, self.metadata_provider, json_tree_path
- )
+ build_ricecooker_json_tree(args, options, self.metadata_provider, json_tree_path)
class YouTubeSushiChef(SushiChef):
@@ -725,9 +679,7 @@ class YouTubeSushiChef(SushiChef):
DATA_DIR = os.path.abspath("chefdata")
YOUTUBE_CACHE_DIR = os.path.join(DATA_DIR, "youtubecache")
DOWNLOADS_DIR = os.path.join(DATA_DIR, "downloads")
- ARCHIVE_DIR = os.path.join(
- DOWNLOADS_DIR, "archive_{}".format(CONTENT_ARCHIVE_VERSION)
- )
+ ARCHIVE_DIR = os.path.join(DOWNLOADS_DIR, "archive_{}".format(CONTENT_ARCHIVE_VERSION))
USE_PROXY = False
def get_playlist_ids(self):
@@ -792,10 +744,7 @@ def create_nodes_for_playlists(self):
playlist_nodes = []
for playlist_id in self.get_playlist_ids():
-
- playlist = YouTubePlaylistUtils(
- id=playlist_id, cache_dir=self.YOUTUBE_CACHE_DIR
- )
+ playlist = YouTubePlaylistUtils(id=playlist_id, cache_dir=self.YOUTUBE_CACHE_DIR)
playlist_info = playlist.get_playlist_info(use_proxy=self.USE_PROXY)
@@ -818,9 +767,7 @@ def create_nodes_for_playlists(self):
for child in playlist_info["children"]:
# check for duplicate videos
if child["id"] not in video_ids:
- video_node = self.create_video_node(
- child, parent_id=topic_source_id
- )
+ video_node = self.create_video_node(child, parent_id=topic_source_id)
if video_node:
topic_node.add_child(video_node)
video_ids.append(child["id"])
@@ -841,9 +788,7 @@ def create_video_node(self, video_id, parent_id="", playlist_id=None):
# Check youtube thumbnail extension as some are not supported formats
thumbnail_link = video_details["thumbnail"]
config.LOGGER.info("thumbnail = {}".format(thumbnail_link))
- archive_filename = get_archive_filename(
- thumbnail_link, download_root=self.ARCHIVE_DIR
- )
+ archive_filename = get_archive_filename(thumbnail_link, download_root=self.ARCHIVE_DIR)
dest_file = os.path.join(self.ARCHIVE_DIR, archive_filename)
os.makedirs(os.path.dirname(dest_file), exist_ok=True)
@@ -853,10 +798,7 @@ def create_video_node(self, video_id, parent_id="", playlist_id=None):
response = requests.get(thumbnail_link, stream=True)
# Some images that YT returns are actually webp despite their extension,
# so make sure we update our file extension to match.
- if (
- "Content-Type" in response.headers
- and response.headers["Content-Type"] == "image/webp"
- ):
+ if "Content-Type" in response.headers and response.headers["Content-Type"] == "image/webp":
base_path, ext = os.path.splitext(dest_file)
dest_file = base_path + ".webp"
@@ -874,18 +816,14 @@ def create_video_node(self, video_id, parent_id="", playlist_id=None):
description=video_details["description"],
language=self.channel_info["CHANNEL_LANGUAGE"],
author=self.get_metadata_for_video("author", video_id, playlist_id) or "",
- provider=self.get_metadata_for_video("provider", video_id, playlist_id)
- or "",
+ provider=self.get_metadata_for_video("provider", video_id, playlist_id) or "",
thumbnail=dest_file,
license=self.get_metadata_for_video("license", video_id, playlist_id),
files=[
files.YouTubeVideoFile(
youtube_id=video_id,
language="en",
- high_resolution=self.get_metadata_for_video(
- "high_resolution", video_id, playlist_id
- )
- or False,
+ high_resolution=self.get_metadata_for_video("high_resolution", video_id, playlist_id) or False,
)
],
)
@@ -908,9 +846,7 @@ def construct_channel(self, *args, **kwargs):
channel = self.get_channel(*args, **kwargs)
if len(self.get_playlist_ids()) == 0 and len(self.get_video_ids()) == 0:
- raise NotImplementedError(
- "Either get_playlist_ids() or get_video_ids() must be implemented."
- )
+ raise NotImplementedError("Either get_playlist_ids() or get_video_ids() must be implemented.")
# TODO: Replace next line with chef code
nodes = self.create_nodes_for_playlists()
diff --git a/ricecooker/classes/curriculum.py b/ricecooker/classes/curriculum.py
index 9c59f6e4..4ed3f130 100644
--- a/ricecooker/classes/curriculum.py
+++ b/ricecooker/classes/curriculum.py
@@ -5,6 +5,7 @@
learning objectives, which can be associated with lessons and assessment
questions.
"""
+
import uuid
# Fixed namespace for generating deterministic learning objective UUIDs.
diff --git a/ricecooker/classes/files.py b/ricecooker/classes/files.py
index c824a829..71f20a20 100644
--- a/ricecooker/classes/files.py
+++ b/ricecooker/classes/files.py
@@ -8,15 +8,13 @@
from le_utils.constants import languages
from requests import HTTPError
-from .. import config
-from ..exceptions import UnknownFileTypeError
from ricecooker.utils.caching import FILECACHE
from ricecooker.utils.caching import get_cache_filename
+from ricecooker.utils.images import ThumbnailGenerationError
from ricecooker.utils.images import create_image_from_epub
from ricecooker.utils.images import create_image_from_pdf_page
from ricecooker.utils.images import create_image_from_zip
from ricecooker.utils.images import create_tiled_image
-from ricecooker.utils.images import ThumbnailGenerationError
from ricecooker.utils.pipeline import FilePipeline
from ricecooker.utils.pipeline.convert import AudioCompressionHandler
from ricecooker.utils.pipeline.convert import ImageConversionHandler
@@ -30,6 +28,9 @@
from ricecooker.utils.videos import extract_thumbnail_from_video
from ricecooker.utils.youtube import get_language_with_alpha2_fallback
+from .. import config
+from ..exceptions import UnknownFileTypeError
+
fallback_pipeline = FilePipeline()
# Lookup table for convertible file formats for a given preset
@@ -116,30 +117,19 @@ def size(self):
return os.path.getsize(config.get_storage_path(self.get_filename()))
def truncate_fields(self):
- if (
- self.original_filename
- and len(self.original_filename) > config.MAX_ORIGINAL_FILENAME_LENGTH
- ):
- config.print_truncate(
- "original_filename", self.node.source_id, self.original_filename
- )
+ if self.original_filename and len(self.original_filename) > config.MAX_ORIGINAL_FILENAME_LENGTH:
+ config.print_truncate("original_filename", self.node.source_id, self.original_filename)
original_extension = self.original_filename.split(".")[-1]
if original_extension == self.original_filename:
original_extension = ""
self.original_filename = self.original_filename.split(".")[0]
- extension_length = (
- 0 if not original_extension else len(original_extension) + 1
- )
- self.original_filename = self.original_filename[
- : config.MAX_ORIGINAL_FILENAME_LENGTH - extension_length
- ]
+ extension_length = 0 if not original_extension else len(original_extension) + 1
+ self.original_filename = self.original_filename[: config.MAX_ORIGINAL_FILENAME_LENGTH - extension_length]
if original_extension:
self.original_filename += "." + original_extension
if self.source_url and len(self.source_url) > config.MAX_SOURCE_URL_LENGTH:
- config.print_truncate(
- "file_source_url", self.node.source_id, self.source_url
- )
+ config.print_truncate("file_source_url", self.node.source_id, self.source_url)
self.source_url = self.source_url[: config.MAX_SOURCE_URL_LENGTH]
def file_dict(self, filename=None):
@@ -164,9 +154,7 @@ def to_dict(self):
if os.path.isfile(config.get_storage_path(filename)):
return self.file_dict(filename=filename)
else:
- config.LOGGER.warning(
- "File not found: {}".format(config.get_storage_path(filename))
- )
+ config.LOGGER.warning("File not found: {}".format(config.get_storage_path(filename)))
return None
@@ -195,9 +183,7 @@ def validate(self):
if not extension:
extension = extract_path_ext(self.path, default_ext=self.default_ext)
if self.allowed_formats is not None and extension not in self.allowed_formats:
- raise ValueError(
- f"Incompatible extension {extension} for {self.__class__.__name__} at {self.path}"
- )
+ raise ValueError(f"Incompatible extension {extension} for {self.__class__.__name__} at {self.path}")
def __str__(self):
return self.path
@@ -209,9 +195,7 @@ def process_file(self):
except ValueError as ve:
raise InvalidFileException from ve
pipeline = config.FILE_PIPELINE or fallback_pipeline
- metadata = pipeline.execute(
- self.path, context=self.context, skip_cache=config.UPDATE
- )[0]
+ metadata = pipeline.execute(self.path, context=self.context, skip_cache=config.UPDATE)[0]
metadata = metadata.to_dict()
for key in metadata:
if key == "path":
@@ -255,9 +239,7 @@ class AudioFile(DownloadFile):
default_preset = format_presets.AUDIO
def __init__(self, path, ffmpeg_settings=None, **kwargs):
- super(AudioFile, self).__init__(
- path, context={"audio_settings": ffmpeg_settings or {}}, **kwargs
- )
+ super(AudioFile, self).__init__(path, context={"audio_settings": ffmpeg_settings or {}}, **kwargs)
class DocumentFile(DownloadFile):
@@ -301,9 +283,7 @@ class VideoFile(DownloadFile):
is_primary = True
def __init__(self, path, ffmpeg_settings=None, **kwargs):
- super(VideoFile, self).__init__(
- path, context={"video_settings": ffmpeg_settings or {}}, **kwargs
- )
+ super(VideoFile, self).__init__(path, context={"video_settings": ffmpeg_settings or {}}, **kwargs)
class WebVideoFile(DownloadFile):
@@ -328,9 +308,7 @@ def __init__(
class YouTubeVideoFile(WebVideoFile):
def __init__(self, youtube_id, **kwargs):
- super(YouTubeVideoFile, self).__init__(
- "http://www.youtube.com/watch?v={}".format(youtube_id), **kwargs
- )
+ super(YouTubeVideoFile, self).__init__("http://www.youtube.com/watch?v={}".format(youtube_id), **kwargs)
class YouTubeSubtitleFile(File):
@@ -349,9 +327,7 @@ def __init__(self, youtube_id, language=None, **kwargs):
self.youtube_url = "http://www.youtube.com/watch?v={}".format(youtube_id)
if isinstance(language, languages.Language):
language = language.code
- self.youtube_language = (
- language # save youtube language code (can differ from internal repr.)
- )
+ self.youtube_language = language # save youtube language code (can differ from internal repr.)
language_obj = get_language_with_alpha2_fallback(language)
super(YouTubeSubtitleFile, self).__init__(language=language_obj.code, **kwargs)
self.context = {
@@ -445,17 +421,13 @@ def process_file(self):
def generate_graphie_file(self):
if self.ka_language is None:
raise ValueError("ka_language must be specified")
- key = "GRAPHIE: {}".format(
- self.path + (self.ka_language if self.ka_language != "en" else "")
- )
+ key = "GRAPHIE: {}".format(self.path + (self.ka_language if self.ka_language != "en" else ""))
cache_file = get_cache_filename(key)
if not config.UPDATE and cache_file:
return cache_file
- tempf = tempfile.NamedTemporaryFile(
- suffix=".{}".format(file_formats.GRAPHIE), delete=False
- )
+ tempf = tempfile.NamedTemporaryFile(suffix=".{}".format(file_formats.GRAPHIE), delete=False)
# Initialize hash and files
delimiter = bytes(exercises.GRAPHIE_DELIMITER, "UTF-8")
config.LOGGER.info("\tDownloading graphie {}".format(self.original_filename))
@@ -507,9 +479,7 @@ def process_file(self):
Returns: filename or None
"""
config.LOGGER.info("\t--- Extracting thumbnail from {}".format(self.path))
- tempf = tempfile.NamedTemporaryFile(
- suffix=".{}".format(file_formats.PNG), delete=False
- )
+ tempf = tempfile.NamedTemporaryFile(suffix=".{}".format(file_formats.PNG), delete=False)
tempf.close()
try:
self.extractor_fun(self.path, tempf.name, **self.extractor_kwargs)
@@ -573,9 +543,7 @@ class TiledThumbnailFile(ThumbnailPresetMixin, File):
def __init__(self, source_nodes, **kwargs):
self.sources = []
for n in source_nodes:
- images = [
- f for f in n.files if isinstance(f, ThumbnailFile) and f.get_filename()
- ]
+ images = [f for f in n.files if isinstance(f, ThumbnailFile) and f.get_filename()]
if len(images) > 0:
self.sources.append(images[0])
super(TiledThumbnailFile, self).__init__(**kwargs)
@@ -594,13 +562,8 @@ def generate_tiled_image(self):
else:
return None
config.LOGGER.info("\tGenerating tiled thumbnail.")
- images = [
- config.get_storage_path(f.get_filename())
- for f in self.sources[:num_pictures]
- ]
- with tempfile.NamedTemporaryFile(
- suffix=".{}".format(file_formats.PNG)
- ) as tempf:
+ images = [config.get_storage_path(f.get_filename()) for f in self.sources[:num_pictures]]
+ with tempfile.NamedTemporaryFile(suffix=".{}".format(file_formats.PNG)) as tempf:
tempf.close()
create_tiled_image(images, tempf.name)
filename = copy_file_to_storage(tempf.name, ext=file_formats.PNG)
@@ -630,11 +593,7 @@ def validate(self):
try:
response.raise_for_status()
except Exception as e:
- raise ValueError(
- "Could not find remote file {} for reason {}".format(
- self.filename, e
- )
- )
+ raise ValueError("Could not find remote file {} for reason {}".format(self.filename, e))
self.size = int(response.headers.get("Content-Length", 0))
self._validated = True
diff --git a/ricecooker/classes/licenses.py b/ricecooker/classes/licenses.py
index 15e3bd88..9217cc34 100644
--- a/ricecooker/classes/licenses.py
+++ b/ricecooker/classes/licenses.py
@@ -23,22 +23,14 @@ def get_license(license_id, copyright_holder=None, description=None):
elif license_id == licenses.PUBLIC_DOMAIN:
return PublicDomainLicense(copyright_holder=copyright_holder)
elif license_id == licenses.SPECIAL_PERMISSIONS:
- return SpecialPermissionsLicense(
- copyright_holder=copyright_holder, description=description
- )
+ return SpecialPermissionsLicense(copyright_holder=copyright_holder, description=description)
else:
- raise UnknownLicenseError(
- "{} is not a valid license id. (Valid license are {})".format(
- license_id, [_list[0] for _list in licenses.choices]
- )
- )
+ raise UnknownLicenseError("{} is not a valid license id. (Valid license are {})".format(license_id, [_list[0] for _list in licenses.choices]))
class License(object):
license_id = None # (str): content's license based on le_utils.constants.licenses
- copyright_holder = (
- None # (str): name of person or organization who owns license (optional)
- )
+ copyright_holder = None # (str): name of person or organization who owns license (optional)
description = None # (str): description of the license (optional)
require_copyright_holder = True
@@ -50,33 +42,17 @@ def get_id(self):
return self.license_id
def validate(self):
- assert (
- not self.require_copyright_holder or self.copyright_holder != ""
- ), "{} License requires a copyright holder".format(self.license_id)
- assert isinstance(
- self.copyright_holder, str
- ), "Copyright holder must be a string"
+ assert not self.require_copyright_holder or self.copyright_holder != "", "{} License requires a copyright holder".format(self.license_id)
+ assert isinstance(self.copyright_holder, str), "Copyright holder must be a string"
def truncate_fields(self):
- if (
- self.description
- and len(self.description) > config.MAX_LICENSE_DESCRIPTION_LENGTH
- ):
- config.print_truncate(
- "license_description", self.license_id, self.description
- )
+ if self.description and len(self.description) > config.MAX_LICENSE_DESCRIPTION_LENGTH:
+ config.print_truncate("license_description", self.license_id, self.description)
self.description = self.description[: config.MAX_LICENSE_DESCRIPTION_LENGTH]
- if (
- self.copyright_holder
- and len(self.copyright_holder) > config.MAX_COPYRIGHT_HOLDER_LENGTH
- ):
- config.print_truncate(
- "copyright_holder", self.license_id, self.copyright_holder
- )
- self.copyright_holder = self.copyright_holder[
- : config.MAX_COPYRIGHT_HOLDER_LENGTH
- ]
+ if self.copyright_holder and len(self.copyright_holder) > config.MAX_COPYRIGHT_HOLDER_LENGTH:
+ config.print_truncate("copyright_holder", self.license_id, self.copyright_holder)
+ self.copyright_holder = self.copyright_holder[: config.MAX_COPYRIGHT_HOLDER_LENGTH]
def as_dict(self):
return {
@@ -203,6 +179,4 @@ class SpecialPermissionsLicense(License):
def __init__(self, copyright_holder=None, description=None):
assert description, "Special Permissions licenses must have a description"
- super(SpecialPermissionsLicense, self).__init__(
- copyright_holder=copyright_holder, description=description
- )
+ super(SpecialPermissionsLicense, self).__init__(copyright_holder=copyright_holder, description=description)
diff --git a/ricecooker/classes/nodes.py b/ricecooker/classes/nodes.py
index fed5ceda..f0097f5f 100644
--- a/ricecooker/classes/nodes.py
+++ b/ricecooker/classes/nodes.py
@@ -18,6 +18,9 @@
from le_utils.constants.labels import resource_type
from le_utils.constants.labels import subjects
+from ricecooker.utils.pipeline.exceptions import ExpectedFileException
+from ricecooker.utils.pipeline.exceptions import InvalidFileException
+
from .. import __version__
from .. import config
from ..exceptions import InvalidNodeException
@@ -32,8 +35,6 @@
from .licenses import License
from .questions import VARIANT_A
from .questions import VARIANT_B
-from ricecooker.utils.pipeline.exceptions import ExpectedFileException
-from ricecooker.utils.pipeline.exceptions import InvalidFileException
MASTERY_MODELS = [id for id, name in exercises.MASTERY_MODELS]
ROLES = [id for id, name in roles.choices]
@@ -167,9 +168,7 @@ def __init__(
self.tags = tags or []
self.domain_ns = domain_ns
self.suggested_duration = suggested_duration
- self.questions = (
- self.questions if hasattr(self, "questions") else []
- ) # Needed for to_dict method
+ self.questions = self.questions if hasattr(self, "questions") else [] # Needed for to_dict method
self.grade_levels = grade_levels or []
self.resource_types = resource_types or []
@@ -179,9 +178,7 @@ def __init__(
self.learner_needs = learner_needs or []
self.role = role
- self.set_license(
- license, copyright_holder=copyright_holder, description=license_description
- )
+ self.set_license(license, copyright_holder=copyright_holder, description=license_description)
def set_language(self, language):
"""Set self.language to internal lang. repr. code from str or Language object."""
@@ -196,9 +193,7 @@ def set_language(self, language):
def __str__(self):
count = self.count()
- metadata = "{0} {1}".format(
- count, "descendant" if count == 1 else "descendants"
- )
+ metadata = "{0} {1}".format(count, "descendant" if count == 1 else "descendants")
return "{title} ({kind}) ({source_id}): {metadata}".format(
title=self.title,
kind=self.__class__.__name__,
@@ -215,9 +210,7 @@ def truncate_fields(self):
self.title = self.title[: config.MAX_TITLE_LENGTH]
if self.source_id and len(self.source_id) > config.MAX_SOURCE_ID_LENGTH:
- config.print_truncate(
- "source_id", self.source_id, self.source_id, kind=self.kind
- )
+ config.print_truncate("source_id", self.source_id, self.source_id, kind=self.kind)
self.source_id = self.source_id[: config.MAX_SOURCE_ID_LENGTH]
for f in self.files:
@@ -245,9 +238,7 @@ def add_file(self, file_to_add):
"""
from .files import File
- assert isinstance(
- file_to_add, File
- ), "Files being added must be instances of a subclass of File class"
+ assert isinstance(file_to_add, File), "Files being added must be instances of a subclass of File class"
file_to_add.node = self
if file_to_add not in self.files:
self.files.append(file_to_add)
@@ -360,9 +351,7 @@ def print_tree(self, indent=2):
Args: indent (int): What level of indentation at which to start printing
Returns: None
"""
- config.LOGGER.info(
- "{indent}{data}".format(indent=" " * indent, data=str(self))
- )
+ config.LOGGER.info("{indent}{data}".format(indent=" " * indent, data=str(self)))
for child in self.children:
child.print_tree(indent + 1)
@@ -423,9 +412,7 @@ def set_license(self, license, copyright_holder=None, description=None):
if isinstance(license, str):
from .licenses import get_license
- license = get_license(
- license, copyright_holder=copyright_holder, description=description
- )
+ license = get_license(license, copyright_holder=copyright_holder, description=description)
self.license = license
def _validate(self): # noqa: C901
@@ -452,9 +439,7 @@ def _validate(self): # noqa: C901
not (isinstance(self.description, str) or self.description is None),
"Description is not a string",
)
- self._validate_values(
- not isinstance(self.children, list), "Children is not a list"
- )
+ self._validate_values(not isinstance(self.children, list), "Children is not a list")
for f in self.files:
self._validate_values(not isinstance(f, File), "Files must be file class")
@@ -468,22 +453,12 @@ def _validate(self): # noqa: C901
)
self.infer_learning_activities()
- self._validate_values(
- not isinstance(self.author, str), "Author is not a string"
- )
- self._validate_values(
- not isinstance(self.aggregator, str), "Aggregator is not a string"
- )
- self._validate_values(
- not isinstance(self.provider, str), "Provider is not a string"
- )
+ self._validate_values(not isinstance(self.author, str), "Author is not a string")
+ self._validate_values(not isinstance(self.aggregator, str), "Aggregator is not a string")
+ self._validate_values(not isinstance(self.provider, str), "Provider is not a string")
self._validate_values(not isinstance(self.files, list), "Files is not a list")
- self._validate_values(
- not isinstance(self.questions, list), "Questions is not a list"
- )
- self._validate_values(
- not isinstance(self.extra_fields, dict), "Extra fields is not a dict"
- )
+ self._validate_values(not isinstance(self.questions, list), "Questions is not a list")
+ self._validate_values(not isinstance(self.extra_fields, dict), "Extra fields is not a dict")
self._validate_values(not isinstance(self.tags, list), "Tags is not a list")
for tag in self.tags:
@@ -494,17 +469,13 @@ def _validate(self): # noqa: C901
)
if self.license is not None:
- self._validate_values(
- not isinstance(self.license, License), "License is not a license object"
- )
+ self._validate_values(not isinstance(self.license, License), "License is not a license object")
try:
self.license.validate()
except AssertionError as e:
self._validate_values(True, str(e))
- self._validate_values(
- self.role not in ROLES, f"Role must be one of the following: {ROLES}"
- )
+ self._validate_values(self.role not in ROLES, f"Role must be one of the following: {ROLES}")
if self.grade_levels is not None:
for grade in self.grade_levels:
@@ -538,15 +509,12 @@ def _validate(self): # noqa: C901
)
for access_label in self.accessibility_labels:
self._validate_values(
- access_label
- not in accessibility_categories.ACCESSIBILITYCATEGORIESLIST,
+ access_label not in accessibility_categories.ACCESSIBILITYCATEGORIESLIST,
f"Accessibility label must be one of the following: {accessibility_categories.ACCESSIBILITYCATEGORIESLIST}",
)
if self.categories is not None:
- self._validate_values(
- not isinstance(self.categories, list), "Categories must be list"
- )
+ self._validate_values(not isinstance(self.categories, list), "Categories must be list")
for category in self.categories:
self._validate_values(
category not in subjects.SUBJECTSLIST,
@@ -554,9 +522,7 @@ def _validate(self): # noqa: C901
)
if self.learner_needs is not None:
- self._validate_values(
- not isinstance(self.learner_needs, list), "Learner needs must be list"
- )
+ self._validate_values(not isinstance(self.learner_needs, list), "Learner needs must be list")
for learner_need in self.learner_needs:
self._validate_values(
learner_need not in needs.NEEDSLIST,
@@ -585,9 +551,7 @@ def get_metadata_dict(self, metadata: dict[str, any]) -> dict[str, any]:
node_values = getattr(self, field)
final_values = set()
# Get a list of all keys in reverse order of length so we can remove any less specific values
- all_values = sorted(
- set(ancestor_values).union(set(node_values)), key=len, reverse=True
- )
+ all_values = sorted(set(ancestor_values).union(set(node_values)), key=len, reverse=True)
for value in all_values:
if not any(k != value and k.startswith(value) for k in final_values):
final_values.add(value)
@@ -631,14 +595,10 @@ def get_node_id(self):
def truncate_fields(self):
if self.description and len(self.description) > config.MAX_DESCRIPTION_LENGTH:
- config.print_truncate(
- "description", self.source_id, self.description, kind=self.kind
- )
+ config.print_truncate("description", self.source_id, self.description, kind=self.kind)
self.description = self.description[: config.MAX_DESCRIPTION_LENGTH]
if self.tagline and len(self.tagline) > config.MAX_TAGLINE_LENGTH:
- config.print_truncate(
- "tagline", self.source_id, self.tagline, kind=self.kind
- )
+ config.print_truncate("tagline", self.source_id, self.tagline, kind=self.kind)
self.tagline = self.tagline[: config.MAX_TAGLINE_LENGTH]
super(ChannelNode, self).truncate_fields()
@@ -659,13 +619,7 @@ def to_dict(self):
"source_id": self.source_id,
"ricecooker_version": __version__,
"extra_fields": json.dumps(self.extra_fields),
- "files": [
- f.to_dict()
- for f in self.files
- if f
- and f.filename
- and not (self.thumbnail and self.thumbnail.filename is f.filename)
- ],
+ "files": [f.to_dict() for f in self.files if f and f.filename and not (self.thumbnail and self.thumbnail.filename is f.filename)],
}
def _validate(self):
@@ -673,9 +627,7 @@ def _validate(self):
Args: None
Returns: boolean indicating if channel is valid
"""
- self._validate_values(
- not isinstance(self.source_domain, str), "Channel domain must be a string"
- )
+ self._validate_values(not isinstance(self.source_domain, str), "Channel domain must be a string")
self._validate_values(self.language is None, "Channel must have a language")
return super(ChannelNode, self)._validate()
@@ -700,13 +652,9 @@ def get_content_id(self):
return self.content_id
def get_node_id(self):
- assert (
- self.parent
- ), "Parent not found: node id must be calculated based on parent"
+ assert self.parent, "Parent not found: node id must be calculated based on parent"
if not self.node_id:
- self.node_id = uuid.uuid5(
- self.parent.get_node_id(), self.get_content_id().hex
- )
+ self.node_id = uuid.uuid5(self.parent.get_node_id(), self.get_content_id().hex)
return self.node_id
def truncate_fields(self):
@@ -715,15 +663,11 @@ def truncate_fields(self):
self.author = self.author[: config.MAX_AUTHOR_LENGTH]
if self.aggregator and len(self.aggregator) > config.MAX_AGGREGATOR_LENGTH:
- config.print_truncate(
- "aggregator", self.source_id, self.aggregator, kind=self.kind
- )
+ config.print_truncate("aggregator", self.source_id, self.aggregator, kind=self.kind)
self.aggregator = self.aggregator[: config.MAX_AGGREGATOR_LENGTH]
if self.provider and len(self.provider) > config.MAX_PROVIDER_LENGTH:
- config.print_truncate(
- "provider", self.source_id, self.provider, kind=self.kind
- )
+ config.print_truncate("provider", self.source_id, self.provider, kind=self.kind)
self.provider = self.provider[: config.MAX_PROVIDER_LENGTH]
self.license and self.license.truncate_fields()
@@ -743,10 +687,7 @@ def convert(text):
return int(text) if text.isdigit() else text.lower()
def key(key):
- return [
- convert(re.sub(r"[^A-Za-z0-9]+", "", c.replace("&", "and")))
- for c in re.split("([0-9]+)", key.title)
- ]
+ return [convert(re.sub(r"[^A-Za-z0-9]+", "", c.replace("&", "and"))) for c in re.split("([0-9]+)", key.title)]
self.children = sorted(self.children, key=key, reverse=reverse)
return self.children
@@ -759,8 +700,7 @@ def to_dict(self):
return {
"title": self.node_modifications.get("New Title") or self.title,
"language": self.language,
- "description": self.node_modifications.get("New Description")
- or self.description,
+ "description": self.node_modifications.get("New Description") or self.description,
"node_id": self.get_node_id().hex,
"content_id": self.get_content_id().hex,
"source_domain": self.domain_ns.hex,
@@ -768,9 +708,7 @@ def to_dict(self):
"author": self.author,
"aggregator": self.aggregator,
"provider": self.provider,
- "files": [
- f.to_dict() for f in self.files if f and f.filename
- ], # Filter out failed downloads
+ "files": [f.to_dict() for f in self.files if f and f.filename], # Filter out failed downloads
"tags": self.node_modifications.get("New Tags") or self.tags,
"kind": self.kind,
"license": None,
@@ -789,9 +727,7 @@ def to_dict(self):
def gather_ancestor_metadata(self):
if not self.parent:
- raise InvalidNodeException(
- "Parent not found: cannot gather ancestor metadata if no parent exists"
- )
+ raise InvalidNodeException("Parent not found: cannot gather ancestor metadata if no parent exists")
metadata = self.parent.gather_ancestor_metadata()
return self.get_metadata_dict(metadata)
@@ -851,12 +787,8 @@ def __str__(self):
if len(self.files) == 0 and self.uri:
metadata = "uri: {}".format(self.uri)
else:
- metadata = "{0} {1}".format(
- len(self.files), "file" if len(self.files) == 1 else "files"
- )
- return "{title} ({kind}): {metadata}".format(
- title=self.title, kind=self.__class__.__name__, metadata=metadata
- )
+ metadata = "{0} {1}".format(len(self.files), "file" if len(self.files) == 1 else "files")
+ return "{title} ({kind}): {metadata}".format(title=self.title, kind=self.__class__.__name__, metadata=metadata)
def _validate_uri(self):
try:
@@ -864,9 +796,7 @@ def _validate_uri(self):
except InvalidFileException:
should_handle = False
if not should_handle:
- raise InvalidNodeException(
- "Invalid node: pipeline cannot handle uri {}".format(self.uri)
- )
+ raise InvalidNodeException("Invalid node: pipeline cannot handle uri {}".format(self.uri))
def _validate(self):
"""validate: Makes sure content node is valid
@@ -879,16 +809,7 @@ def _validate(self):
if self.required_presets:
num_required_presets = 0
for f in self.files:
- num_required_presets += (
- 1
- if (
- any(
- f.filename and f.get_preset() == preset
- for preset in self.required_presets
- )
- )
- else 0
- )
+ num_required_presets += 1 if (any(f.filename and f.get_preset() == preset for preset in self.required_presets)) else 0
self._validate_values(
num_required_presets == 0,
f"No required format preset found out of {self.required_presets}",
@@ -914,9 +835,7 @@ def _validate(self):
def _process_uri(self):
try:
- file_metadata_list = self.pipeline.execute(
- self.uri, skip_cache=config.UPDATE
- )
+ file_metadata_list = self.pipeline.execute(self.uri, skip_cache=config.UPDATE)
except (InvalidFileException, ExpectedFileException) as e:
config.LOGGER.error(f"Error processing path: {self.uri} with error: {e}")
return None
@@ -934,9 +853,7 @@ def _process_uri(self):
self.extra_fields.update(value)
else:
if key == "kind" and self.kind is not None and self.kind != value:
- raise InvalidNodeException(
- "Inferred kind is different from content node class kind."
- )
+ raise InvalidNodeException("Inferred kind is different from content node class kind.")
setattr(self, key, value)
def process_files(self):
@@ -991,13 +908,11 @@ class VideoNode(ContentNode):
required_presets = (format_presets.VIDEO_HIGH_RES, format_presets.VIDEO_LOW_RES)
def generate_thumbnail(self):
- from .files import VideoFile, WebVideoFile, ExtractedVideoThumbnailFile
+ from .files import ExtractedVideoThumbnailFile
+ from .files import VideoFile
+ from .files import WebVideoFile
- video_files = [
- f
- for f in self.files
- if isinstance(f, VideoFile) or isinstance(f, WebVideoFile)
- ]
+ video_files = [f for f in self.files if isinstance(f, VideoFile) or isinstance(f, WebVideoFile)]
if video_files:
video_file = video_files[0]
if video_file.filename and not video_file.error:
@@ -1022,12 +937,7 @@ def _validate(self):
language_codes_seen.add(language_code)
else:
file_info = file.path if hasattr(file, "path") else file.youtube_url
- config.LOGGER.warning(
- "Skipping duplicate subs for "
- + language_code
- + " from "
- + file_info
- )
+ config.LOGGER.warning("Skipping duplicate subs for " + language_code + " from " + file_info)
else:
new_files.append(file)
self.files = new_files
@@ -1111,10 +1021,7 @@ def __init__(self, *args, entrypoint=None, **kwargs):
super().__init__(*args, **kwargs)
def generate_thumbnail(self):
-
- html5_files = [
- f for f in self.files if f.get_preset() == format_presets.HTML5_ZIP
- ]
+ html5_files = [f for f in self.files if f.get_preset() == format_presets.HTML5_ZIP]
if html5_files:
html_file = html5_files[0]
if html_file.filename and not html_file.error:
@@ -1171,12 +1078,8 @@ def __init__(self, *args, questions=None, exercise_data=None, **kwargs):
super(ExerciseNode, self).__init__(*args, extra_fields=exercise_data, **kwargs)
def __str__(self):
- metadata = "{0} {1}".format(
- len(self.questions), "question" if len(self.questions) == 1 else "questions"
- )
- return "{title} ({kind}): {metadata}".format(
- title=self.title, kind=self.__class__.__name__, metadata=metadata
- )
+ metadata = "{0} {1}".format(len(self.questions), "question" if len(self.questions) == 1 else "questions")
+ return "{title} ({kind}): {metadata}".format(title=self.title, kind=self.__class__.__name__, metadata=metadata)
def add_question(self, question):
"""add_question: adds question to question list
@@ -1189,9 +1092,7 @@ def process_files(self):
"""Goes through question fields and replaces image strings
Returns: content-hash based filenames of all the required image files
"""
- config.LOGGER.info(
- "\t*** Processing images for exercise: {}".format(self.title)
- )
+ config.LOGGER.info("\t*** Processing images for exercise: {}".format(self.title))
downloaded = super(ExerciseNode, self).process_files()
for question in self.questions:
downloaded += question.process_question()
@@ -1240,9 +1141,7 @@ def _validate(self):
"""
# Check if questions are correct
- self._validate_values(
- not self.questions, "Exercise does not have any questions"
- )
+ self._validate_values(not self.questions, "Exercise does not have any questions")
self._validate_values(
any(not q.validate() for q in self.questions),
"Exercise has invalid question",
@@ -1252,12 +1151,8 @@ def _validate(self):
"Unrecognized mastery model {}".format(self.extra_fields["mastery_model"]),
)
if self.extra_fields["mastery_model"] == exercises.M_OF_N:
- self._validate_values(
- "m" not in self.extra_fields, "M of N mastery model is missing M value"
- )
- self._validate_values(
- "n" not in self.extra_fields, "M of N mastery model is missing N value"
- )
+ self._validate_values("m" not in self.extra_fields, "M of N mastery model is missing M value")
+ self._validate_values("n" not in self.extra_fields, "M of N mastery model is missing N value")
try:
int(self.extra_fields["m"])
except ValueError:
@@ -1316,11 +1211,12 @@ def add_file(self, file_to_add):
Args: file (SlideshowNode or ThumbnailFile): file model to add to node
Returns: None
"""
- from .files import ThumbnailFile, SlideImageFile
+ from .files import SlideImageFile
+ from .files import ThumbnailFile
- assert isinstance(file_to_add, ThumbnailFile) or isinstance(
- file_to_add, SlideImageFile
- ), "Files being added must be instances of a subclass of File class"
+ assert isinstance(file_to_add, ThumbnailFile) or isinstance(file_to_add, SlideImageFile), (
+ "Files being added must be instances of a subclass of File class"
+ )
if file_to_add not in self.files:
filename = file_to_add.get_filename()
@@ -1334,12 +1230,8 @@ def add_file(self, file_to_add):
if isinstance(file_to_add, SlideImageFile):
#
# Find the idx of sort_order.next()
- slideshow_image_files = [
- f for f in self.files if isinstance(f, SlideImageFile)
- ]
- idx = len(
- slideshow_image_files
- ) # next available index, assuming added in desired order
+ slideshow_image_files = [f for f in self.files if isinstance(f, SlideImageFile)]
+ idx = len(slideshow_image_files) # next available index, assuming added in desired order
#
# Add slideshow data to extra_fields['slideshow_data'] (aka manifest)
slideshow_data = self.extra_fields["slideshow_data"]
@@ -1373,9 +1265,7 @@ def __init__(self, *args, entrypoint=None, **kwargs):
super(CustomNavigationNode, self).__init__(*args, **kwargs)
def generate_thumbnail(self):
- html5_files = [
- f for f in self.files if f.get_preset() == format_presets.HTML5_ZIP
- ]
+ html5_files = [f for f in self.files if f.get_preset() == format_presets.HTML5_ZIP]
if html5_files:
html_file = html5_files[0]
if html_file.filename and not html_file.error:
@@ -1443,40 +1333,22 @@ class StudioContentNode(TreeNode):
"suggested_duration",
]
- def __init__(
- self, source_channel_id, source_node_id=None, source_content_id=None, **kwargs
- ):
- self.source_channel_id = (
- source_channel_id if is_valid_uuid_string(source_channel_id) else None
- )
- self.source_node_id = (
- source_node_id if is_valid_uuid_string(source_node_id) else None
- )
- self.source_content_id = (
- source_content_id if is_valid_uuid_string(source_content_id) else None
- )
+ def __init__(self, source_channel_id, source_node_id=None, source_content_id=None, **kwargs):
+ self.source_channel_id = source_channel_id if is_valid_uuid_string(source_channel_id) else None
+ self.source_node_id = source_node_id if is_valid_uuid_string(source_node_id) else None
+ self.source_content_id = source_content_id if is_valid_uuid_string(source_content_id) else None
self.overrides = kwargs.copy()
overriden_title = kwargs.pop("title", "")
- super(StudioContentNode, self).__init__(
- source_node_id or source_content_id, overriden_title, **kwargs
- )
+ super(StudioContentNode, self).__init__(source_node_id or source_content_id, overriden_title, **kwargs)
def _validate(self):
if not self.source_channel_id:
- raise InvalidNodeException(
- "Invalid node: source_channel_id must be specified, and be a valid UUID string."
- )
+ raise InvalidNodeException("Invalid node: source_channel_id must be specified, and be a valid UUID string.")
if not self.source_node_id and not self.source_content_id:
- raise InvalidNodeException(
- "Invalid node: at least one of source_node_id or source_content_id must be specified, and be a valid UUID string."
- )
+ raise InvalidNodeException("Invalid node: at least one of source_node_id or source_content_id must be specified, and be a valid UUID string.")
for key in self.overrides:
if key not in self.ALLOWED_OVERRIDES:
- raise InvalidNodeException(
- "Invalid node: '{}' cannot be overriden on a StudioContentNode.".format(
- key
- )
- )
+ raise InvalidNodeException("Invalid node: '{}' cannot be overriden on a StudioContentNode.".format(key))
return super(StudioContentNode, self)._validate()
def to_dict(self):
@@ -1487,9 +1359,7 @@ def to_dict(self):
"source_content_id": self.source_content_id,
}
if "thumbnail" in self.overrides:
- self.overrides["files"] = [
- f.to_dict() for f in self.files if f and f.filename
- ]
+ self.overrides["files"] = [f.to_dict() for f in self.files if f and f.filename]
del self.overrides["thumbnail"]
data.update(self.overrides)
return data
@@ -1523,9 +1393,7 @@ def __init__(self, *args, **kwargs):
def _validate_child(self, node):
"""Validate that node is an instance of the allowed child class."""
if not isinstance(node, self.CHILD_CLASS):
- raise InvalidNodeException(
- f"{self.__class__.__name__} can only have {self.CHILD_CLASS.__name__} children"
- )
+ raise InvalidNodeException(f"{self.__class__.__name__} can only have {self.CHILD_CLASS.__name__} children")
def add_child(self, node):
"""Add a child node after validating its type."""
@@ -1582,9 +1450,7 @@ def _validate_learning_objectives(self, learning_objectives):
raise InvalidNodeException("Must have at least one learning objective")
for lo in learning_objectives:
if not isinstance(lo, LearningObjective):
- raise InvalidNodeException(
- f"Expected LearningObjective, got {type(lo).__name__}"
- )
+ raise InvalidNodeException(f"Expected LearningObjective, got {type(lo).__name__}")
def add_child(self, node, learning_objectives):
"""
@@ -1601,9 +1467,7 @@ def add_child(self, node, learning_objectives):
"""
self._validate_learning_objectives(learning_objectives)
if node.source_id in self.lesson_objectives:
- raise InvalidNodeException(
- f"Duplicate source_id '{node.source_id}' in {self.__class__.__name__}"
- )
+ raise InvalidNodeException(f"Duplicate source_id '{node.source_id}' in {self.__class__.__name__}")
self.lesson_objectives[node.source_id] = learning_objectives
super().add_child(node)
@@ -1627,21 +1491,15 @@ def _validate(self):
variant_a = []
variant_b = []
for question, variant, los in self.test_questions:
- self._validate_values(
- not question.validate(), "UnitNode has invalid question"
- )
+ self._validate_values(not question.validate(), "UnitNode has invalid question")
if variant == VARIANT_A:
variant_a.append((question, los))
else:
variant_b.append((question, los))
# Minimum 2 questions per variant
- self._validate_values(
- len(variant_a) < 2, "Must have at least 2 VARIANT_A questions"
- )
- self._validate_values(
- len(variant_b) < 2, "Must have at least 2 VARIANT_B questions"
- )
+ self._validate_values(len(variant_a) < 2, "Must have at least 2 VARIANT_A questions")
+ self._validate_values(len(variant_b) < 2, "Must have at least 2 VARIANT_B questions")
# Equal total counts
self._validate_values(
@@ -1660,12 +1518,8 @@ def _validate(self):
# Each LO equally represented across variants and across LOs
lo_totals = {}
for lo_id in lesson_los:
- a_count = sum(
- 1 for _, los in variant_a if any(lo.id == lo_id for lo in los)
- )
- b_count = sum(
- 1 for _, los in variant_b if any(lo.id == lo_id for lo in los)
- )
+ a_count = sum(1 for _, los in variant_a if any(lo.id == lo_id for lo in los))
+ b_count = sum(1 for _, los in variant_b if any(lo.id == lo_id for lo in los))
self._validate_values(
a_count != b_count,
"Learning objective must have equal questions in each variant",
@@ -1705,14 +1559,9 @@ def _get_learning_objectives_data(self):
return {
"learning_objectives": [lo.to_dict() for lo in all_los.values()],
- "assessment_objectives": {
- q.assessment_id: [lo.id for lo in los]
- for q, _, los in self.test_questions
- },
+ "assessment_objectives": {q.assessment_id: [lo.id for lo in los] for q, _, los in self.test_questions},
"lesson_objectives": {
- child.get_node_id().hex: [
- lo.id for lo in self.lesson_objectives[child.source_id]
- ]
+ child.get_node_id().hex: [lo.id for lo in self.lesson_objectives[child.source_id]]
for child in self.children
if child.source_id in self.lesson_objectives
},
diff --git a/ricecooker/classes/questions.py b/ricecooker/classes/questions.py
index b795f931..ac686076 100644
--- a/ricecooker/classes/questions.py
+++ b/ricecooker/classes/questions.py
@@ -7,25 +7,22 @@
from bs4 import BeautifulSoup
from le_utils.constants import exercises
+from ricecooker.utils.encodings import get_base64_encoding
+
from .. import config
from ..exceptions import InvalidQuestionException
from .files import _ExerciseBase64ImageFile
from .files import _ExerciseGraphieFile
from .files import _ExerciseImageFile
-from ricecooker.utils.encodings import get_base64_encoding
# Reusable protocol and path pattern for Perseus questions
-PERSEUS_PROTOCOL_PATH = (
- r"(?Pweb\+graphie|https?|file|data):(?P[^\)\"]+)"
-)
+PERSEUS_PROTOCOL_PATH = r"(?Pweb\+graphie|https?|file|data):(?P[^\)\"]+)"
# match protocol:{{path}} in quotation marks for Perseus
PERSEUS_QUOTED_IMAGE_REGEX = rf"(?P\"){PERSEUS_PROTOCOL_PATH}(?P\")"
# match protocol:{{path}} in markdown images  for Perseus - captures the URL part only
-PERSEUS_MARKDOWN_IMAGE_REGEX = (
- rf"(?P!\[[^\]]*\]\(){PERSEUS_PROTOCOL_PATH}(?P\))"
-)
+PERSEUS_MARKDOWN_IMAGE_REGEX = rf"(?P!\[[^\]]*\]\(){PERSEUS_PROTOCOL_PATH}(?P\))"
# match protocol:{{path}} either wrapped in parentheses or quotes (original regex)
MARKDOWN_IMAGE_REGEX = r"!\[([^\]]+)?\]\(([^\)]+?)\)" # match 
@@ -64,9 +61,7 @@ def __init__(
self.question_type = question_type
self.files = []
self.answers = answers if answers is not None else []
- self.hints = (
- [] if hints is None else [hints] if isinstance(hints, str) else hints
- )
+ self.hints = [] if hints is None else [hints] if isinstance(hints, str) else hints
self.raw_data = raw_data
self.source_id = id
self.source_url = source_url
@@ -80,9 +75,7 @@ def assessment_id(self):
def truncate_fields(self):
if self.source_url and len(self.source_url) > config.MAX_SOURCE_URL_LENGTH:
- config.print_truncate(
- "question_source_url", self.source_id, self.source_url
- )
+ config.print_truncate("question_source_url", self.source_id, self.source_url)
self.source_url = self.source_url[: config.MAX_SOURCE_URL_LENGTH]
def to_dict(self):
@@ -93,9 +86,7 @@ def to_dict(self):
return {
"assessment_id": self.assessment_id,
"type": self.question_type,
- "files": [
- f.to_dict() for f in filter(lambda x: x and x.filename, self.files)
- ],
+ "files": [f.to_dict() for f in filter(lambda x: x and x.filename, self.files)],
"question": self.question,
"hints": json.dumps(self.hints, ensure_ascii=False),
"answers": json.dumps(self.answers, ensure_ascii=False),
@@ -218,9 +209,7 @@ def set_image(self, text):
# Process file to make the replacement_str available
exercise_image_file.process_file()
# Get `new_text` = the replacement path for the image resource
- new_text = exercises.CONTENT_STORAGE_FORMAT.format(
- exercise_image_file.get_replacement_str()
- )
+ new_text = exercises.CONTENT_STORAGE_FORMAT.format(exercise_image_file.get_replacement_str())
return new_text, [exercise_image_file]
def validate(self):
@@ -229,24 +218,14 @@ def validate(self):
Returns: boolean indicating if question is valid
"""
assert self.id is not None, "Assumption Failed: Question must have an id"
- assert (
- isinstance(self.question, str) or self.question is None
- ), "Assumption Failed: Question must be a string"
- assert isinstance(
- self.question_type, str
- ), "Assumption Failed: Question type must be a string"
- assert isinstance(
- self.answers, list
- ), "Assumption Failed: Answers must be a list"
+ assert isinstance(self.question, str) or self.question is None, "Assumption Failed: Question must be a string"
+ assert isinstance(self.question_type, str), "Assumption Failed: Question type must be a string"
+ assert isinstance(self.answers, list), "Assumption Failed: Answers must be a list"
assert isinstance(self.hints, list), "Assumption Failed: Hints must be a list"
for a in self.answers:
- assert isinstance(
- a, dict
- ), "Assumption Failed: Answer in answer list is not a dict"
+ assert isinstance(a, dict), "Assumption Failed: Answer in answer list is not a dict"
for h in self.hints:
- assert isinstance(
- h, str
- ), "Assumption Failed: Hint in hints list is not a string"
+ assert isinstance(h, str), "Assumption Failed: Hint in hints list is not a string"
return True
@@ -284,23 +263,13 @@ def validate(self):
Returns: boolean indicating if perseus question is valid
"""
try:
- assert (
- self.question == ""
- ), "Assumption Failed: Perseus question should not have a question"
- assert (
- self.question_type == exercises.PERSEUS_QUESTION
- ), "Assumption Failed: Question should be perseus type"
- assert (
- self.answers == []
- ), "Assumption Failed: Answer list should be empty for perseus question"
- assert (
- self.hints == []
- ), "Assumption Failed: Hints list should be empty for perseus question"
+ assert self.question == "", "Assumption Failed: Perseus question should not have a question"
+ assert self.question_type == exercises.PERSEUS_QUESTION, "Assumption Failed: Question should be perseus type"
+ assert self.answers == [], "Assumption Failed: Answer list should be empty for perseus question"
+ assert self.hints == [], "Assumption Failed: Hints list should be empty for perseus question"
return super(PerseusQuestion, self).validate()
except AssertionError:
- raise InvalidQuestionException(
- "Invalid question: {0}".format(self.__dict__)
- )
+ raise InvalidQuestionException("Invalid question: {0}".format(self.__dict__))
def _replace_image(self, match):
protocol = match.group("protocol")
@@ -330,9 +299,7 @@ def _replace_image(self, match):
exercise_image_file.process_file()
self.files.append(exercise_image_file)
# Get `new_path` = the replacement path for the image resource
- new_path = exercises.CONTENT_STORAGE_FORMAT.format(
- exercise_image_file.get_replacement_str()
- )
+ new_path = exercises.CONTENT_STORAGE_FORMAT.format(exercise_image_file.get_replacement_str())
if protocol == "web+graphie": # need to put back the `web+graphie:` prefix
new_path = "web+graphie:" + new_path
return f"{open}{new_path}{close}"
@@ -344,13 +311,9 @@ def process_question(self):
Returns: list of all files needed to render this question.
"""
# First pass: handle quoted images
- self.raw_data = re.sub(
- PERSEUS_QUOTED_IMAGE_REGEX, self._replace_image, self.raw_data
- )
+ self.raw_data = re.sub(PERSEUS_QUOTED_IMAGE_REGEX, self._replace_image, self.raw_data)
# Second pass: handle markdown images (excluding those already processed)
- self.raw_data = re.sub(
- PERSEUS_MARKDOWN_IMAGE_REGEX, self._replace_image, self.raw_data
- )
+ self.raw_data = re.sub(PERSEUS_MARKDOWN_IMAGE_REGEX, self._replace_image, self.raw_data)
# Return all filenames
return [f.filename for f in self.files]
@@ -376,23 +339,12 @@ class MultipleSelectQuestion(BaseQuestion):
def __init__(self, id, question, correct_answers, all_answers, **kwargs):
# Put answers into standard format
set_all_answers = set(all_answers)
- all_answers += [
- answer for answer in correct_answers if answer not in set_all_answers
- ]
- answers = [
- self.create_answer(answer, answer in correct_answers)
- for answer in all_answers
- ]
+ all_answers += [answer for answer in correct_answers if answer not in set_all_answers]
+ answers = [self.create_answer(answer, answer in correct_answers) for answer in all_answers]
if len(answers) == 0:
answers = [self.create_answer("No answers provided.")]
- config.LOGGER.warning(
- "\tWARNING: Question {id} does not have any answers (set to default)".format(
- id=id
- )
- )
- super(MultipleSelectQuestion, self).__init__(
- id, question, exercises.MULTIPLE_SELECTION, answers, **kwargs
- )
+ config.LOGGER.warning("\tWARNING: Question {id} does not have any answers (set to default)".format(id=id))
+ super(MultipleSelectQuestion, self).__init__(id, question, exercises.MULTIPLE_SELECTION, answers, **kwargs)
def validate(self):
"""validate: Makes sure multiple selection question is valid
@@ -400,28 +352,16 @@ def validate(self):
Returns: boolean indicating if multiple selection question is valid
"""
try:
- assert (
- self.question_type == exercises.MULTIPLE_SELECTION
- ), "Assumption Failed: Question should be multiple selection type"
- assert (
- len(self.answers) > 0
- ), "Assumption Failed: Multiple selection question should have answers"
+ assert self.question_type == exercises.MULTIPLE_SELECTION, "Assumption Failed: Question should be multiple selection type"
+ assert len(self.answers) > 0, "Assumption Failed: Multiple selection question should have answers"
for a in self.answers:
- assert "answer" in a and isinstance(
- a["answer"], str
- ), "Assumption Failed: Answer in answer list is not a string"
- assert "correct" in a and isinstance(
- a["correct"], bool
- ), "Assumption Failed: Correct indicator is not a boolean in answer list"
+ assert "answer" in a and isinstance(a["answer"], str), "Assumption Failed: Answer in answer list is not a string"
+ assert "correct" in a and isinstance(a["correct"], bool), "Assumption Failed: Correct indicator is not a boolean in answer list"
for h in self.hints:
- assert isinstance(
- h, str
- ), "Assumption Failed: Hint in hint list is not a string"
+ assert isinstance(h, str), "Assumption Failed: Hint in hint list is not a string"
return super(MultipleSelectQuestion, self).validate()
except AssertionError:
- raise InvalidQuestionException(
- "Invalid question: {0}".format(self.__dict__)
- )
+ raise InvalidQuestionException("Invalid question: {0}".format(self.__dict__))
class SingleSelectQuestion(BaseQuestion):
@@ -443,20 +383,11 @@ def __init__(self, id, question, correct_answer, all_answers, **kwargs):
# Put answers into standard format
if correct_answer not in all_answers:
all_answers += [correct_answer]
- answers = [
- self.create_answer(answer, answer == correct_answer)
- for answer in all_answers
- ]
+ answers = [self.create_answer(answer, answer == correct_answer) for answer in all_answers]
if len(answers) == 0:
answers = [self.create_answer("No answers provided.")]
- config.LOGGER.warning(
- "\tWARNING: Question {id} does not have any answers (set to default)".format(
- id=id
- )
- )
- super(SingleSelectQuestion, self).__init__(
- id, question, exercises.SINGLE_SELECTION, answers, **kwargs
- )
+ config.LOGGER.warning("\tWARNING: Question {id} does not have any answers (set to default)".format(id=id))
+ super(SingleSelectQuestion, self).__init__(id, question, exercises.SINGLE_SELECTION, answers, **kwargs)
def validate(self):
"""validate: Makes sure single selection question is valid
@@ -464,33 +395,19 @@ def validate(self):
Returns: boolean indicating if single selection question is valid
"""
try:
- assert (
- self.question_type == exercises.SINGLE_SELECTION
- ), "Assumption Failed: Question should be single selection type"
- assert (
- len(self.answers) > 0
- ), "Assumption Failed: Multiple selection question should have answers"
+ assert self.question_type == exercises.SINGLE_SELECTION, "Assumption Failed: Question should be single selection type"
+ assert len(self.answers) > 0, "Assumption Failed: Multiple selection question should have answers"
correct_answers = 0
for a in self.answers:
- assert "answer" in a and isinstance(
- a["answer"], str
- ), "Assumption Failed: Answer in answer list is not a string"
- assert "correct" in a and isinstance(
- a["correct"], bool
- ), "Assumption Failed: Correct indicator is not a boolean in answer list"
+ assert "answer" in a and isinstance(a["answer"], str), "Assumption Failed: Answer in answer list is not a string"
+ assert "correct" in a and isinstance(a["correct"], bool), "Assumption Failed: Correct indicator is not a boolean in answer list"
correct_answers += 1 if a["correct"] else 0
- assert (
- correct_answers == 1
- ), "Assumption Failed: Single selection question should have only one correct answer"
+ assert correct_answers == 1, "Assumption Failed: Single selection question should have only one correct answer"
for h in self.hints:
- assert isinstance(
- h, str
- ), "Assumption Failed: Hint in hints list is not a string"
+ assert isinstance(h, str), "Assumption Failed: Hint in hints list is not a string"
return super(SingleSelectQuestion, self).validate()
except AssertionError:
- raise InvalidQuestionException(
- "Invalid question: {0}".format(self.__dict__)
- )
+ raise InvalidQuestionException("Invalid question: {0}".format(self.__dict__))
class InputQuestion(BaseQuestion):
@@ -511,14 +428,8 @@ def __init__(self, id, question, answers, **kwargs):
answers = [self.create_answer(answer) for answer in answers]
if len(answers) == 0:
answers = [self.create_answer("No answers provided.")]
- config.LOGGER.warning(
- "\tWARNING: Question {id} does not have any answers (set to default)".format(
- id=id
- )
- )
- super(InputQuestion, self).__init__(
- id, question, exercises.INPUT_QUESTION, answers, **kwargs
- )
+ config.LOGGER.warning("\tWARNING: Question {id} does not have any answers (set to default)".format(id=id))
+ super(InputQuestion, self).__init__(id, question, exercises.INPUT_QUESTION, answers, **kwargs)
def validate(self):
"""validate: Makes sure input question is valid
@@ -526,28 +437,16 @@ def validate(self):
Returns: boolean indicating if input question is valid
"""
try:
- assert (
- self.question_type == exercises.INPUT_QUESTION
- ), "Assumption Failed: Question should be input answer type"
- assert (
- len(self.answers) > 0
- ), "Assumption Failed: Multiple selection question should have answers"
+ assert self.question_type == exercises.INPUT_QUESTION, "Assumption Failed: Question should be input answer type"
+ assert len(self.answers) > 0, "Assumption Failed: Multiple selection question should have answers"
for a in self.answers:
- assert (
- "answer" in a
- ), "Assumption Failed: Answers must have an answer field"
+ assert "answer" in a, "Assumption Failed: Answers must have an answer field"
try:
float(a["answer"])
except ValueError:
- assert False, "Assumption Failed: Answer {} must be numeric".format(
- a["answer"]
- )
+ assert False, "Assumption Failed: Answer {} must be numeric".format(a["answer"])
for h in self.hints:
- assert isinstance(
- h, str
- ), "Assumption Failed: Hint in hints list is not a string"
+ assert isinstance(h, str), "Assumption Failed: Hint in hints list is not a string"
return super(InputQuestion, self).validate()
except AssertionError:
- raise InvalidQuestionException(
- "Invalid question: {0}".format(self.__dict__)
- )
+ raise InvalidQuestionException("Invalid question: {0}".format(self.__dict__))
diff --git a/ricecooker/commands.py b/ricecooker/commands.py
index 5a2766d3..750584ce 100644
--- a/ricecooker/commands.py
+++ b/ricecooker/commands.py
@@ -70,12 +70,8 @@ def uploadchannel( # noqa: C901
config.FILE_PIPELINE = chef.file_pipeline
# Set max retries for downloading
- config.DOWNLOAD_SESSION.mount(
- "http://", requests.adapters.HTTPAdapter(max_retries=int(download_attempts))
- )
- config.DOWNLOAD_SESSION.mount(
- "https://", requests.adapters.HTTPAdapter(max_retries=int(download_attempts))
- )
+ config.DOWNLOAD_SESSION.mount("http://", requests.adapters.HTTPAdapter(max_retries=int(download_attempts)))
+ config.DOWNLOAD_SESSION.mount("https://", requests.adapters.HTTPAdapter(max_retries=int(download_attempts)))
config.DOWNLOAD_SESSION.auth = chef.auth
@@ -86,9 +82,7 @@ def uploadchannel( # noqa: C901
# Authenticate user and check current Ricecooker version
username, token = authenticate_user(token)
config.LOGGER.info("Logged in with username {0}".format(username))
- config.DOWNLOAD_SESSION.headers.update(
- {"User-Agent": f"Ricecooker/{__version__} bot ({username})"}
- )
+ config.DOWNLOAD_SESSION.headers.update({"User-Agent": f"Ricecooker/{__version__} bot ({username})"})
check_version_number()
else:
username = ""
@@ -98,19 +92,13 @@ def uploadchannel( # noqa: C901
# Set up progress tracker
config.PROGRESS_MANAGER = RestoreManager()
- if (
- not resume or not config.PROGRESS_MANAGER.check_for_session()
- ) and step.upper() != Status.DONE.name:
+ if (not resume or not config.PROGRESS_MANAGER.check_for_session()) and step.upper() != Status.DONE.name:
config.PROGRESS_MANAGER.init_session()
else:
- if resume or prompt_yes_or_no(
- "Previous session detected. Would you like to resume your last session?"
- ):
+ if resume or prompt_yes_or_no("Previous session detected. Would you like to resume your last session?"):
config.LOGGER.info("Resuming your last session...")
step = Status.LAST.name if step is None else step
- config.PROGRESS_MANAGER = config.PROGRESS_MANAGER.load_progress(
- step.upper()
- )
+ config.PROGRESS_MANAGER = config.PROGRESS_MANAGER.load_progress(step.upper())
else:
config.PROGRESS_MANAGER.init_session()
@@ -137,10 +125,7 @@ def uploadchannel( # noqa: C901
# Early permission check: Try creating the channel before downloading/uploading files
# This will fail fast if the user lacks edit permissions
# Fixes issues #95 and #434 by avoiding wasted downloads/uploads
- if (
- config.PROGRESS_MANAGER.get_status_val() <= Status.CREATE_TREE.value
- and command != "dryrun"
- ):
+ if config.PROGRESS_MANAGER.get_status_val() <= Status.CREATE_TREE.value and command != "dryrun":
config.LOGGER.info("Checking channel permissions...")
try:
tree.root_id, tree.channel_id = tree.add_channel()
@@ -193,10 +178,7 @@ def uploadchannel( # noqa: C901
channel_id = config.PROGRESS_MANAGER.channel_id
# Publish tree if flag is set to True
- if (
- config.PUBLISH
- and config.PROGRESS_MANAGER.get_status_val() <= Status.PUBLISH_CHANNEL.value
- ):
+ if config.PUBLISH and config.PROGRESS_MANAGER.get_status_val() <= Status.PUBLISH_CHANNEL.value:
config.LOGGER.info("")
config.LOGGER.info("Publishing channel...")
publish_tree(tree, channel_id)
@@ -237,9 +219,7 @@ def authenticate_user(token):
def check_version_number():
- response = config.SESSION.post(
- config.check_version_url(), data=json.dumps({"version": __version__})
- )
+ response = config.SESSION.post(config.check_version_url(), data=json.dumps({"version": __version__}))
response.raise_for_status()
result = json.loads(response._content.decode("utf-8"))
@@ -321,9 +301,7 @@ def upload_files(tree, file_diff):
Returns: None
"""
# Upload new files to CC
- config.LOGGER.info(
- " Uploading {0} new file(s) to Kolibri Studio...".format(len(file_diff))
- )
+ config.LOGGER.info(" Uploading {0} new file(s) to Kolibri Studio...".format(len(file_diff)))
tree.upload_files(file_diff)
tree.reattempt_upload_fails()
return file_diff
@@ -398,18 +376,14 @@ def attach(parent, node_path):
try:
parent.add_child(node_path[0])
except TypeError:
- raise NotImplementedError(
- "--sample mode is not supported for channels with curriculum structure nodes"
- )
+ raise NotImplementedError("--sample mode is not supported for channels with curriculum structure nodes")
else:
child = node_path[0]
if not any(c.source_id == child.source_id for c in parent.children):
try:
parent.add_child(child)
except TypeError:
- raise NotImplementedError(
- "--sample mode is not supported for channels with curriculum structure nodes"
- )
+ raise NotImplementedError("--sample mode is not supported for channels with curriculum structure nodes")
attach(child, node_path[1:])
for node_path in sample_paths:
diff --git a/ricecooker/config.py b/ricecooker/config.py
index 6a60be2c..9390c19b 100644
--- a/ricecooker/config.py
+++ b/ricecooker/config.py
@@ -1,6 +1,7 @@
"""
Settings and global config values for ricecooker.
"""
+
import atexit
import hashlib
import logging.config
@@ -12,7 +13,6 @@
import requests
from requests_file import FileAdapter
-
UPDATE = False
COMPRESS = False
VIDEO_HEIGHT = None
@@ -141,9 +141,9 @@ def setup_logging(level=logging.INFO, main_log=None, error_log=None, add_loggers
# Domain and file store location for uploading to production Studio server
DEFAULT_DOMAIN = "https://api.studio.learningequality.org"
-DOMAIN_ENV = os.getenv("STUDIO_URL", None)
+DOMAIN_ENV = os.getenv("STUDIO_URL")
if DOMAIN_ENV is None: # check old ENV varable for backward compatibility
- DOMAIN_ENV = os.getenv("CONTENTWORKSHOP_URL", None)
+ DOMAIN_ENV = os.getenv("CONTENTWORKSHOP_URL")
DOMAIN = DOMAIN_ENV if DOMAIN_ENV else DEFAULT_DOMAIN
if DOMAIN.endswith("/"):
DOMAIN = DOMAIN.rstrip("/")
@@ -157,7 +157,7 @@ def setup_logging(level=logging.INFO, main_log=None, error_log=None, add_loggers
CURRENT_CWD = os.getcwd()
# Allow users to choose which phantomjs they use
-PHANTOMJS_PATH = os.getenv("PHANTOMJS_PATH", None)
+PHANTOMJS_PATH = os.getenv("PHANTOMJS_PATH")
# URL for authenticating user on Kolibri Studio
AUTHENTICATION_URL = "{domain}/api/internal/authenticate_user_internal"
@@ -187,9 +187,7 @@ def setup_logging(level=logging.INFO, main_log=None, error_log=None, add_loggers
PUBLISH_CHANNEL_URL = "{domain}/api/internal/publish_channel"
# Folder to store downloaded files
-STORAGE_DIRECTORY = os.getenv(
- "RICECOOKER_STORAGE", os.path.join(CURRENT_CWD, "storage")
-)
+STORAGE_DIRECTORY = os.getenv("RICECOOKER_STORAGE", os.path.join(CURRENT_CWD, "storage"))
# Folder to store progress tracking information
RESTORE_DIRECTORY = "restore"
@@ -198,9 +196,7 @@ def setup_logging(level=logging.INFO, main_log=None, error_log=None, add_loggers
SESSION = requests.Session()
# Cache for filenames
-FILECACHE_DIRECTORY = os.getenv(
- "RICECOOKER_FILECACHE", os.path.join(CURRENT_CWD, ".ricecookerfilecache")
-)
+FILECACHE_DIRECTORY = os.getenv("RICECOOKER_FILECACHE", os.path.join(CURRENT_CWD, ".ricecookerfilecache"))
FAILED_FILES = []
@@ -209,16 +205,9 @@ def setup_logging(level=logging.INFO, main_log=None, error_log=None, add_loggers
DOWNLOAD_SESSION.mount("file://", FileAdapter())
# Environment variable indicating we should use a proxy for yt_dlp downloads
-USEPROXY = False
-USEPROXY = (
- True
- if os.getenv("USEPROXY") is not None or os.getenv("PROXY_LIST") is not None
- else False
-)
+USEPROXY = os.getenv("USEPROXY") is not None or os.getenv("PROXY_LIST") is not None
-GOOGLE_SERVICE_ACCOUNT_CREDENTIALS_PATH = os.getenv(
- "GOOGLE_SERVICE_ACCOUNT_CREDENTIALS_PATH", None
-)
+GOOGLE_SERVICE_ACCOUNT_CREDENTIALS_PATH = os.getenv("GOOGLE_SERVICE_ACCOUNT_CREDENTIALS_PATH")
if GOOGLE_SERVICE_ACCOUNT_CREDENTIALS_PATH:
TASK_THREADS = 1 # If using service account, only one thread is allowed - random errors happen otherwise.
@@ -270,13 +259,9 @@ def delete_temp_dir():
pass
# Slack webhook URL for channel upload notifications
-SLACK_WEBHOOK_URL = os.getenv("SLACK_WEBHOOK_URL", None)
-if SLACK_WEBHOOK_URL and not SLACK_WEBHOOK_URL.startswith(
- "https://hooks.slack.com/services/"
-):
- LOGGER.warning(
- "Invalid Slack webhook URL provided. Notifications will be disabled."
- )
+SLACK_WEBHOOK_URL = os.getenv("SLACK_WEBHOOK_URL")
+if SLACK_WEBHOOK_URL and not SLACK_WEBHOOK_URL.startswith("https://hooks.slack.com/services/"):
+ LOGGER.warning("Invalid Slack webhook URL provided. Notifications will be disabled.")
SLACK_WEBHOOK_URL = None
@@ -293,9 +278,7 @@ def delete_temp_dir():
# Character limits based on Kolibri models
-TRUNCATE_MSG = (
- "\t\t{kind} {id}: {field} {value} is too long - max {max} characters (truncating)"
-)
+TRUNCATE_MSG = "\t\t{kind} {id}: {field} {value} is too long - max {max} characters (truncating)"
MAX_TITLE_LENGTH = 200
MAX_SOURCE_ID_LENGTH = 200
@@ -371,12 +354,8 @@ def get_storage_path(filename):
Args: filename (str): Name of file to store
Returns: string path to file
"""
- directory = os.path.abspath(
- os.path.join(STORAGE_DIRECTORY, filename[0], filename[1])
- )
- # Make storage directory for downloaded files if it doesn't already exist
- if not os.path.exists(directory):
- os.makedirs(directory)
+ directory = os.path.abspath(os.path.join(STORAGE_DIRECTORY, filename[0], filename[1]))
+ os.makedirs(directory, exist_ok=True)
return os.path.join(directory, filename)
@@ -432,9 +411,7 @@ def get_storage_url(filename):
Args: filename (str): Name of file
Returns: string URL for file
"""
- file_url = FILE_STORAGE_URL.format(
- domain=DOMAIN, f=filename[0], s=filename[1], filename=filename
- )
+ file_url = FILE_STORAGE_URL.format(domain=DOMAIN, f=filename[0], s=filename[1], filename=filename)
if DOMAIN == DEFAULT_DOMAIN:
# If we are targeting the default domain, don't make content storage requests
# to api.studio because it will skip cloudflare.
@@ -472,9 +449,7 @@ def open_channel_url(channel, staging=False):
channel (str): channel id of uploaded channel
Returns: string url to open channel
"""
- frontend_domain = DOMAIN.replace(
- "api.", ""
- ) # Don't send them to the API domain for preview / review.
+ frontend_domain = DOMAIN.replace("api.", "") # Don't send them to the API domain for preview / review.
return OPEN_CHANNEL_URL.format(
domain=frontend_domain,
channel_id=channel,
diff --git a/ricecooker/managers/progress.py b/ricecooker/managers/progress.py
index dce18f8a..9056b47e 100644
--- a/ricecooker/managers/progress.py
+++ b/ricecooker/managers/progress.py
@@ -73,10 +73,8 @@ def check_for_session(self, status=None):
Returns: boolean indicating if session exists
"""
status = Status.LAST if status is None else status
- return (
- os.path.isfile(self.get_restore_path(status))
- and os.path.getsize(self.get_restore_path(status)) > 0
- )
+ restore_path = self.get_restore_path(status)
+ return os.path.isfile(restore_path) and os.path.getsize(restore_path) > 0
def get_restore_path(self, status=None):
"""get_restore_path: get path to restoration file
@@ -92,9 +90,7 @@ def __record_progress(self, next_step=None):
Args: None
Returns: None
"""
- with open(self.get_restore_path(Status.LAST), "wb") as handle, open(
- self.get_restore_path(), "wb"
- ) as step_handle:
+ with open(self.get_restore_path(Status.LAST), "wb") as handle, open(self.get_restore_path(), "wb") as step_handle:
pickle.dump(self, handle)
pickle.dump(self, step_handle)
@@ -108,11 +104,7 @@ def load_progress(self, resume_step):
# If progress is corrupted, revert to step before
while not self.check_for_session(resume_step):
- config.LOGGER.error(
- "Ricecooker has not reached {0} status. Reverting to earlier step...".format(
- resume_step.name
- )
- )
+ config.LOGGER.error("Ricecooker has not reached {0} status. Reverting to earlier step...".format(resume_step.name))
# All files are corrupted or absent, restart process
if resume_step.value - 1 < 0:
self.init_session()
@@ -217,9 +209,7 @@ def set_channel_created(self, channel_link, channel_id):
"""
self.channel_link = channel_link
self.channel_id = channel_id
- self.__record_progress(
- Status.PUBLISH_CHANNEL if config.PUBLISH else Status.DONE
- )
+ self.__record_progress(Status.PUBLISH_CHANNEL if config.PUBLISH else Status.DONE)
def set_published(self):
"""set_published: records progress after channel has been published
diff --git a/ricecooker/managers/tree.py b/ricecooker/managers/tree.py
index 1bd6c8db..13c2ef56 100644
--- a/ricecooker/managers/tree.py
+++ b/ricecooker/managers/tree.py
@@ -6,9 +6,10 @@
from requests.exceptions import RequestException
-from .. import config
from ricecooker.exceptions import InvalidNodeException
+from .. import config
+
class InsufficientStorageException(Exception):
"""Raised when there is not enough storage space."""
@@ -41,9 +42,7 @@ def validate(self):
if not self.all_nodes:
self.all_nodes = self.gather_tree_recur([], self.channel)
valid = True
- with concurrent.futures.ThreadPoolExecutor(
- max_workers=config.TASK_THREADS
- ) as executor:
+ with concurrent.futures.ThreadPoolExecutor(max_workers=config.TASK_THREADS) as executor:
for result in executor.map(self.validate_node, self.all_nodes):
valid = valid and result
return valid
@@ -67,9 +66,7 @@ def process_tree(self):
"""
if not self.all_nodes:
self.all_nodes = self.gather_tree_recur([], self.channel)
- with concurrent.futures.ThreadPoolExecutor(
- max_workers=config.TASK_THREADS
- ) as executor:
+ with concurrent.futures.ThreadPoolExecutor(max_workers=config.TASK_THREADS) as executor:
for data in executor.map(self.process_node, self.all_nodes):
self.file_map.update(data)
return list(self.file_map.keys())
@@ -77,9 +74,7 @@ def process_tree(self):
def gather_tree_recur(self, nodes, node):
# Process node's children
for child_node in node.children:
- self.gather_tree_recur(
- nodes, child_node
- ) # Defer insert until after all descendants in case a tiled thumbnail is needed
+ self.gather_tree_recur(nodes, child_node) # Defer insert until after all descendants in case a tiled thumbnail is needed
nodes.append(node)
return nodes
@@ -115,14 +110,10 @@ def check_for_files_failed(self):
Returns: None
"""
if len(config.FAILED_FILES) > 0:
- config.LOGGER.error(
- " {} file(s) have failed to download".format(len(config.FAILED_FILES))
- )
+ config.LOGGER.error(" {} file(s) have failed to download".format(len(config.FAILED_FILES)))
for f in config.FAILED_FILES:
if f.node: # files associated with a a content node
- info = "{0} {id}".format(
- f.node.kind.capitalize(), id=f.node.source_id
- )
+ info = "{0} {id}".format(f.node.kind.capitalize(), id=f.node.source_id)
elif f.assessment_item: # files associated with an assessment item
info = "{0} {id}".format("Question", id=f.assessment_item.source_id)
else: # files not associated with a node or an assessment item
@@ -132,11 +123,7 @@ def check_for_files_failed(self):
file_identifier = f.path
elif hasattr(f, "youtube_url") and f.youtube_url:
file_identifier = f.youtube_url
- config.LOGGER.warning(
- "\t{0}: {id} \n\t {err}".format(
- info, id=file_identifier, err=f.error
- )
- )
+ config.LOGGER.warning("\t{0}: {id} \n\t {err}".format(info, id=file_identifier, err=f.error))
else:
config.LOGGER.info(" All files were successfully downloaded")
@@ -149,16 +136,8 @@ def get_file_diff(self, files_to_diff):
Args: None
Returns: list of files that are not on server
"""
- with concurrent.futures.ThreadPoolExecutor(
- max_workers=config.TASK_THREADS
- ) as executor:
- return [
- filename
- for filename, exists in zip(
- files_to_diff, executor.map(self.check_file_exists, files_to_diff)
- )
- if not exists
- ]
+ with concurrent.futures.ThreadPoolExecutor(max_workers=config.TASK_THREADS) as executor:
+ return [filename for filename, exists in zip(files_to_diff, executor.map(self.check_file_exists, files_to_diff)) if not exists]
def do_file_upload(self, filename):
file_data = self.file_map[filename]
@@ -189,27 +168,15 @@ def do_file_upload(self, filename):
might_skip = response_data["might_skip"]
if might_skip and self.check_file_exists(filename):
return
- b64checksum = (
- codecs.encode(codecs.decode(file_data.checksum, "hex"), "base64")
- .decode()
- .strip()
- )
+ b64checksum = codecs.encode(codecs.decode(file_data.checksum, "hex"), "base64").decode().strip()
headers = {"Content-Type": content_type, "Content-MD5": b64checksum}
- response = config.SESSION.put(
- upload_url, headers=headers, data=file_obj
- )
+ response = config.SESSION.put(upload_url, headers=headers, data=file_obj)
if response.status_code == 200:
return
- raise RequestException(
- "Error uploading file {}, response code: {} - {}".format(
- filename, response.status_code, response.text
- )
- )
+ raise RequestException("Error uploading file {}, response code: {} - {}".format(filename, response.status_code, response.text))
else:
raise RequestException(
- "Error retrieving upload URL for file {}, response code: {} - {}".format(
- filename, url_response.status_code, url_response.text
- )
+ "Error retrieving upload URL for file {}, response code: {} - {}".format(filename, url_response.status_code, url_response.text)
)
def _handle_upload(self, f):
@@ -231,22 +198,14 @@ def upload_files(self, file_list):
Returns: None
"""
counter = 0
- files_to_upload = list(
- set(file_list) - set(self.uploaded_files)
- ) # In case restoring from previous session
+ files_to_upload = list(set(file_list) - set(self.uploaded_files)) # In case restoring from previous session
try:
- with concurrent.futures.ThreadPoolExecutor(
- max_workers=config.TASK_THREADS
- ) as executor:
+ with concurrent.futures.ThreadPoolExecutor(max_workers=config.TASK_THREADS) as executor:
# Start the upload operations
for filename in executor.map(self._handle_upload, files_to_upload):
if filename is not None:
counter += 1
- config.LOGGER.info(
- "\tUploaded {0} ({count}/{total}) ".format(
- filename, count=counter, total=len(files_to_upload)
- )
- )
+ config.LOGGER.info("\tUploaded {0} ({count}/{total}) ".format(filename, count=counter, total=len(files_to_upload)))
finally:
config.PROGRESS_MANAGER.set_uploading(self.uploaded_files)
@@ -256,9 +215,7 @@ def reattempt_upload_fails(self):
Returns: None
"""
if len(self.failed_uploads) > 0:
- config.LOGGER.info(
- "Reattempting to upload {0} file(s)...".format(len(self.failed_uploads))
- )
+ config.LOGGER.info("Reattempting to upload {0} file(s)...".format(len(self.failed_uploads)))
current_fails = [k for k in self.failed_uploads]
self.failed_uploads = {}
self.upload_files(current_fails)
@@ -285,9 +242,7 @@ def upload_tree(self):
self.check_failed()
channel_id, channel_link = self.commit_channel(channel_id)
end_time = datetime.now()
- config.LOGGER.info(
- "Upload time: {time}s".format(time=(end_time - start_time).total_seconds())
- )
+ config.LOGGER.info("Upload time: {time}s".format(time=(end_time - start_time).total_seconds()))
return channel_id, channel_link
def truncate_fields(self, node):
@@ -300,9 +255,7 @@ def check_failed(self):
config.LOGGER.warning("WARNING: The following nodes could not be created:")
for node_id in self.failed_node_builds:
node = self.failed_node_builds[node_id]
- config.LOGGER.warning(
- "\t{} ({})".format(str(node["node"]), node["error"])
- )
+ config.LOGGER.warning("\t{} ({})".format(str(node["node"]), node["error"]))
if "content" in node:
config.LOGGER.warning(node["content"][:80])
else:
@@ -316,9 +269,7 @@ def add_channel(self):
config.LOGGER.info(" Creating channel {0}".format(self.channel.title))
self.channel.truncate_fields()
payload = {"channel_data": self.channel.to_dict()}
- response = config.SESSION.post(
- config.create_channel_url(), data=json.dumps(payload)
- )
+ response = config.SESSION.post(config.create_channel_url(), data=json.dumps(payload))
try:
response.raise_for_status()
except Exception:
@@ -352,32 +303,18 @@ def add_nodes(self, root_id, current_node, indent=1): # noqa: C901
# Send children in chunks to avoid gateway errors
try:
- chunks = [
- current_node.children[x : x + 10]
- for x in range(0, len(current_node.children), 10)
- ]
+ chunks = [current_node.children[x : x + 10] for x in range(0, len(current_node.children), 10)]
for chunk in chunks:
payload_children = []
for child in chunk:
- failed = [
- f
- for f in child.files
- if f.is_primary
- and (not f.filename or self.failed_uploads.get(f.filename))
- ]
+ failed = [f for f in child.files if f.is_primary and (not f.filename or self.failed_uploads.get(f.filename))]
if failed or not child.valid:
node_id = child.get_node_id().hex
if not self.failed_node_builds.get(node_id):
error_message = ""
for fail in failed:
- reason = (
- fail.filename
- + ": "
- + self.failed_uploads.get(fail.filename)
- if fail.filename
- else "File failed to download"
- )
+ reason = fail.filename + ": " + self.failed_uploads.get(fail.filename) if fail.filename else "File failed to download"
error_message = error_message + reason + ", "
if hasattr(child, "_error"):
error_message = error_message + child._error + ", "
@@ -389,9 +326,7 @@ def add_nodes(self, root_id, current_node, indent=1): # noqa: C901
payload_children.append(child.to_dict())
payload = {"root_id": root_id, "content_data": payload_children}
- response = config.SESSION.post(
- config.add_nodes_url(), data=json.dumps(payload)
- )
+ response = config.SESSION.post(config.add_nodes_url(), data=json.dumps(payload))
if response.status_code != 200:
self.failed_node_builds[root_id] = {
"node": current_node,
@@ -419,22 +354,12 @@ def commit_channel(self, channel_id):
Returns: channel id and link to uploadedchannel
"""
payload = {"channel_id": channel_id, "stage": config.STAGE}
- response = config.SESSION.post(
- config.finish_channel_url(), data=json.dumps(payload)
- )
+ response = config.SESSION.post(config.finish_channel_url(), data=json.dumps(payload))
if response.status_code != 200:
config.LOGGER.error("")
- config.LOGGER.error(
- "Could not activate channel: {}\n".format(
- response._content.decode("utf-8")
- )
- )
+ config.LOGGER.error("Could not activate channel: {}\n".format(response._content.decode("utf-8")))
if response.status_code == 403:
- config.LOGGER.error(
- "Channel can be viewed at {}\n\n".format(
- config.open_channel_url(channel_id, staging=True)
- )
- )
+ config.LOGGER.error("Channel can be viewed at {}\n\n".format(config.open_channel_url(channel_id, staging=True)))
sys.exit()
response.raise_for_status()
new_channel = json.loads(response._content.decode("utf-8"))
@@ -448,7 +373,5 @@ def publish(self, channel_id):
Returns: None
"""
payload = {"channel_id": channel_id}
- response = config.SESSION.post(
- config.publish_channel_url(), data=json.dumps(payload)
- )
+ response = config.SESSION.post(config.publish_channel_url(), data=json.dumps(payload))
response.raise_for_status()
diff --git a/ricecooker/utils/caching.py b/ricecooker/utils/caching.py
index ebb7fb83..46048240 100644
--- a/ricecooker/utils/caching.py
+++ b/ricecooker/utils/caching.py
@@ -13,7 +13,6 @@
from ricecooker.utils.utils import get_hash
from ricecooker.utils.utils import is_valid_url
-
# Cache for filenames
FILECACHE = FileCache(config.FILECACHE_DIRECTORY, forever=True)
@@ -50,12 +49,9 @@ class InvalidatingCacheControlAdapter(CacheControlAdapter):
def __init__(self, heuristic=None, *args, **kw):
if not heuristic:
heuristic = NeverCache()
- super(InvalidatingCacheControlAdapter, self).__init__(
- *args, heuristic=heuristic, **kw
- )
+ super(InvalidatingCacheControlAdapter, self).__init__(*args, heuristic=heuristic, **kw)
def send(self, request, **kw):
-
# delete any existing cached value from the cache
try:
cache_url = self.controller.cache_url(request.url)
@@ -82,9 +78,7 @@ def generate_key(action, path_or_id, settings=None, default=" (default)"):
settings_str = json.dumps(settings, sort_keys=True)
else:
# keep using old strategy to avoid invalidating all chef caches
- settings_str = (
- "{}".format(str(sorted(settings.items()))) if settings else default
- )
+ settings_str = "{}".format(str(sorted(settings.items()))) if settings else default
return "{}: {} {}".format(action.upper(), path_or_id, settings_str)
diff --git a/ricecooker/utils/corrections.py b/ricecooker/utils/corrections.py
index 299a9806..5938460e 100755
--- a/ricecooker/utils/corrections.py
+++ b/ricecooker/utils/corrections.py
@@ -12,7 +12,6 @@
from ricecooker.config import LOGGER
from ricecooker.utils.libstudio import StudioApi
-
# CONFIG CONSTANTS for data directories
################################################################################
STUDIO_CREDENTIALS = "credentials/studio.json"
@@ -90,9 +89,7 @@ def get_channel_tree(api, channel_id, suffix="", update=True):
channel_tree = json.load(open(filename, "r"))
return channel_tree
else:
- print(
- " Downloading tree for channel_id=", channel_id, " and saving to", filename
- )
+ print(" Downloading tree for channel_id=", channel_id, " and saving to", filename)
root_studio_id = api.get_channel_root_studio_id(channel_id)
# next step takes long since recursively making O(n) API calls!
channel_tree = api.get_tree_for_studio_id(root_studio_id)
@@ -144,9 +141,7 @@ def print_tree(subtree, indent=""):
class CorretionsCsvFileExporter(object):
- def __init__(
- self, csvfilepath="corrections-export.csv", exportattrs=default_export
- ):
+ def __init__(self, csvfilepath="corrections-export.csv", exportattrs=default_export):
self.csvfilepath = csvfilepath
self.exportattrs = exportattrs
@@ -179,9 +174,7 @@ def _write_subtree(path_tuple, subtree, is_root=False):
# TOPIC ############################################################
if kind == "topic":
if is_root:
- self.write_topic_row_from_studio_dict(
- path_tuple, subtree, is_root=is_root
- )
+ self.write_topic_row_from_studio_dict(path_tuple, subtree, is_root=is_root)
for child in subtree["children"]:
_write_subtree(path_tuple, child)
else:
@@ -359,9 +352,7 @@ def get_corrections_by_node_id(csvfilepath, modifyattrs):
# TODO: Additions
# TODO: Moves
datetimesuffix = datetime.now().strftime("%Y-%m-%d__%H%M")
- correctionspath = os.path.join(
- CORRECTIONS_DIR, "imported-" + datetimesuffix + ".json"
- )
+ correctionspath = os.path.join(CORRECTIONS_DIR, "imported-" + datetimesuffix + ".json")
json.dump(
corrections_by_node_id,
open(correctionspath, "w"),
@@ -400,9 +391,7 @@ def find_nodes_by_node_id(subtree, node_id):
def find_nodes_by_original_source_node_id(subtree, original_source_node_id):
- return find_nodes_by_attr(
- subtree, "original_source_node_id", original_source_node_id
- )
+ return find_nodes_by_attr(subtree, "original_source_node_id", original_source_node_id)
def unresolve_children(node):
@@ -422,9 +411,7 @@ def unresolve_children(node):
################################################################################
-def remap_original_source_node_id_to_node_id(
- channel_tree, corrections_by_original_source_node_id
-):
+def remap_original_source_node_id_to_node_id(channel_tree, corrections_by_original_source_node_id):
ALL_COORECTIONS_KINDS = [
"nodes_modified",
"nodes_added",
@@ -437,9 +424,7 @@ def remap_original_source_node_id_to_node_id(
corrections_by_node_id[correction_kind] = {}
corrections_dict = corrections_by_original_source_node_id[correction_kind]
for original_source_node_id, correction in corrections_dict.items():
- results = find_nodes_by_original_source_node_id(
- channel_tree, original_source_node_id
- )
+ results = find_nodes_by_original_source_node_id(channel_tree, original_source_node_id)
assert results, "no match found based on original_source_node_id search"
assert len(results) == 1, "multiple matches found..."
tree_node = results[0]
@@ -578,9 +563,7 @@ def apply_corrections_by_node_id(api, channel_tree, channel_id, corrections_by_n
#
# Deletions
for node_id, deletion_dict in corrections_by_node_id["nodes_deleted"].items():
- apply_deletion_for_node_id(
- api, channel_tree, channel_id, node_id, deletion_dict
- )
+ apply_deletion_for_node_id(api, channel_tree, channel_id, node_id, deletion_dict)
# TODO: Additions
# TODO: Moves
@@ -606,9 +589,7 @@ def get_studio_api(studio_creds=None):
token=studio_creds["token"],
username=studio_creds["username"],
password=studio_creds["password"],
- studio_url=studio_creds.get(
- "studio_url", "https://studio.learningequality.org"
- ),
+ studio_url=studio_creds.get("studio_url", "https://studio.learningequality.org"),
)
return api
@@ -640,9 +621,7 @@ def apply_corrections(args):
# of the nodes in the derivative channel so we must do a remapping:
if args.primarykey == "original_source_node_id":
corrections_by_original_source_node_id = json.load(open(correctionspath))
- corrections_by_node_id = remap_original_source_node_id_to_node_id(
- channel_tree, corrections_by_original_source_node_id
- )
+ corrections_by_node_id = remap_original_source_node_id_to_node_id(channel_tree, corrections_by_original_source_node_id)
json.dump(
corrections_by_node_id,
open(correctionspath, "w"),
@@ -663,9 +642,7 @@ def apply_corrections(args):
corrections_by_node_id = json.load(open(correctionspath))
#
# 5. Apply the corrections
- apply_corrections_by_node_id(
- api, channel_tree, args.channel_id, corrections_by_node_id
- )
+ apply_corrections_by_node_id(api, channel_tree, args.channel_id, corrections_by_node_id)
#
# 6. SAVE the Studio tree after corrections for review of what was changed
channel_tree = get_channel_tree(api, args.channel_id, suffix="-after")
@@ -689,9 +666,7 @@ def correctionsmain():
default="node_id",
)
parser.add_argument("--gsheet_id", help="Google spreadsheets sheet ID (public)")
- parser.add_argument(
- "--gid", help="The gid argument to indicate which sheet", default="0"
- )
+ parser.add_argument("--gid", help="The gid argument to indicate which sheet", default="0")
parser.add_argument(
"--modifyattrs",
help="Which attributes to modify",
diff --git a/ricecooker/utils/downloader.py b/ricecooker/utils/downloader.py
index e524e991..6f3df10b 100644
--- a/ricecooker/utils/downloader.py
+++ b/ricecooker/utils/downloader.py
@@ -58,7 +58,9 @@
# Flake8 thinks this is too complex.
try: # noqa: C901
import asyncio
- from pyppeteer import launch, errors
+
+ from pyppeteer import errors
+ from pyppeteer import launch
async def load_page(path, timeout=30, strict=True):
browser = await launch({"headless": True})
@@ -79,9 +81,7 @@ async def load_page(path, timeout=30, strict=True):
# some sites have API calls running regularly, so the timeout may be that there's never any true
# network idle time. Try 'networkidle2' option instead before determining we can't scrape.
if not strict:
- LOGGER.info(
- "Attempting to download URL with networkidle2 instead of networkidle0..."
- )
+ LOGGER.info("Attempting to download URL with networkidle2 instead of networkidle0...")
await page.goto(
path,
{
@@ -116,9 +116,7 @@ async def take_screenshot(url, filename, element=None, timeout=30):
await page.waitForSelector(element, {"timeout": 10000})
elements = await page.querySelectorAll(element)
if len(list(elements)) > 1:
- LOGGER.warning(
- "Multiple elements matched screenshot element, using first..."
- )
+ LOGGER.warning("Multiple elements matched screenshot element, using first...")
screenshot_element = elements[0]
LOGGER.info("Saving screenshot to {}".format(filename))
@@ -190,15 +188,11 @@ def read(
return response.content
except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema):
- with open(
- path, "rb"
- ) as fobj: # If path is a local file path, try to open the file
+ with open(path, "rb") as fobj: # If path is a local file path, try to open the file
return fobj.read()
-def make_request(
- url, clear_cookies=False, headers=None, timeout=60, session=None, *args, **kwargs
-):
+def make_request(url, clear_cookies=False, headers=None, timeout=60, session=None, *args, **kwargs):
sess = session or DOWNLOAD_SESSION
if clear_cookies:
@@ -213,20 +207,9 @@ def make_request(
while retry_count <= max_retries:
try:
- response = sess.get(
- url,
- headers=request_headers,
- stream=True,
- timeout=timeout,
- *args,
- **kwargs
- )
+ response = sess.get(url, headers=request_headers, stream=True, timeout=timeout, *args, **kwargs)
if response.status_code != 200:
- LOGGER.error(
- "{} error while trying to download {}".format(
- response.status_code, url
- )
- )
+ LOGGER.error("{} error while trying to download {}".format(response.status_code, url))
if STRICT:
response.raise_for_status()
return response
@@ -236,9 +219,7 @@ def make_request(
) as e:
retry_count += 1
LOGGER.warning(
- "Error with connection ('{msg}'); about to perform retry {count} of {trymax}.".format(
- msg=str(e), count=retry_count, trymax=max_retries
- )
+ "Error with connection ('{msg}'); about to perform retry {count} of {trymax}.".format(msg=str(e), count=retry_count, trymax=max_retries)
)
time.sleep(retry_count * 1)
if retry_count > max_retries:
@@ -321,9 +302,7 @@ def download_srcset(selector, attr, content_middleware=None):
new_url = filename
if relative_links and base_url:
base_filename = derive_filename(base_url)
- new_url = get_relative_url_for_archive_filename(
- filename, base_filename
- )
+ new_url = get_relative_url_for_archive_filename(filename, base_filename)
fullpath = os.path.join(destination, filename)
if not os.path.exists(fullpath):
@@ -342,13 +321,10 @@ def download_srcset(selector, attr, content_middleware=None):
node[attr] = ", ".join(new_sources)
# Helper function to download all assets for a given CSS selector.
- def download_assets(
- selector, attr, url_middleware=None, content_middleware=None, node_filter=None
- ):
+ def download_assets(selector, attr, url_middleware=None, content_middleware=None, node_filter=None):
nodes = doc.select(selector)
for i, node in enumerate(nodes):
-
if node_filter:
if not node_filter(node):
src = node[attr]
@@ -455,9 +431,7 @@ def repl(match):
if parts.scheme and parts.netloc:
src_url = src
elif parts.path.startswith("/") and url:
- src_url = "{}://{}{}".format(
- root_parts.scheme, root_parts.netloc, parts.path
- )
+ src_url = "{}://{}{}".format(root_parts.scheme, root_parts.netloc, parts.path)
elif url and root_url:
src_url = urljoin(root_url, src)
else:
@@ -472,9 +446,7 @@ def repl(match):
new_url = src
if url and parts.path.startswith("/") or relative_links:
page_filename = derive_filename(url)
- new_url = get_relative_url_for_archive_filename(
- derived_filename, page_filename
- )
+ new_url = get_relative_url_for_archive_filename(derived_filename, page_filename)
elif derive_filename == _derive_filename:
# The _derive_filename function puts all files in the root, so all URLs need
# rewritten. When using get_archive_filename, relative URLs will still work.
@@ -489,9 +461,7 @@ def repl(match):
filename=derived_filename,
)
else:
- LOGGER.debug(
- "Resource already downloaded, skipping: {}".format(src_url)
- )
+ LOGGER.debug("Resource already downloaded, skipping: {}".format(src_url))
return 'url("%s")' % new_url
return _CSS_URL_RE.sub(repl, content)
@@ -512,9 +482,7 @@ def repl(match):
content_middleware=css_content_middleware,
node_filter=css_node_filter,
) # CSS
- download_assets(
- "script[src]", "src", content_middleware=js_content_middleware
- ) # JS
+ download_assets("script[src]", "src", content_middleware=js_content_middleware) # JS
download_assets("source[src]", "src") # Potentially audio
download_srcset("source[srcset]", "srcset") # Potentially audio
@@ -536,9 +504,7 @@ def repl(match):
continue
parts = urlparse(download_url)
# if we're scraping links, always scrape relative links regardless of setting.
- should_scrape = "all" in link_policy["scope"] or (
- not parts.scheme and not parts.netloc
- )
+ should_scrape = "all" in link_policy["scope"] or (not parts.scheme and not parts.netloc)
if not parts.scheme or parts.scheme.startswith("http"):
LOGGER.debug("checking url: {}".format(url))
if not parts.netloc:
@@ -570,18 +536,12 @@ def repl(match):
new_url = derived_filename
if is_html:
if download_url not in downloaded_pages:
- LOGGER.info(
- "Downloading linked HTML page {}".format(download_url)
- )
+ LOGGER.info("Downloading linked HTML page {}".format(download_url))
global archiver
if archiver:
- info = archiver.get_page(
- download_url, link_policy=policy, run_js=run_js
- )
- filename = info["index_path"].replace(
- archiver.root_dir + os.sep, ""
- )
+ info = archiver.get_page(download_url, link_policy=policy, run_js=run_js)
+ filename = info["index_path"].replace(archiver.root_dir + os.sep, "")
else:
info = archive_page(
download_url,
@@ -590,9 +550,7 @@ def repl(match):
run_js=run_js,
relative_links=relative_links,
)
- filename = info["index_path"].replace(
- destination + os.sep, ""
- )
+ filename = info["index_path"].replace(destination + os.sep, "")
new_url = filename
downloaded_pages[download_url] = new_url
@@ -605,9 +563,7 @@ def repl(match):
if relative_links and base_url:
page_filename = derive_filename(base_url)
- new_url = get_relative_url_for_archive_filename(
- new_url, page_filename
- )
+ new_url = get_relative_url_for_archive_filename(new_url, page_filename)
else:
full_path = os.path.join(destination, derived_filename)
new_url = derived_filename
@@ -658,18 +614,14 @@ def get_archive_filename(url, page_url=None, download_root=None, resource_urls=N
if file_url_parsed.query:
# Append the query to the filename, so that the filename is unique for each set of params.
- query_string = "_{}".format(
- file_url_parsed.query.replace("=", "_").replace("&", "_")
- )
+ query_string = "_{}".format(file_url_parsed.query.replace("=", "_").replace("&", "_"))
local_path = _path + query_string + ext
LOGGER.debug("local_path is now {}".format(local_path))
local_dir_name = local_path
if ext != "":
local_dir_name = os.path.dirname(local_path)
- LOGGER.debug(
- "local_path = {}, local_dir_name = {}".format(local_path, local_dir_name)
- )
+ LOGGER.debug("local_path = {}, local_dir_name = {}".format(local_path, local_dir_name))
if local_dir_name != local_path and resource_urls is not None:
full_dir = os.path.join(download_root, local_dir_name)
@@ -714,9 +666,7 @@ def archive_page(
os.makedirs(download_root, exist_ok=True)
if run_js:
- content, props = asyncio.get_event_loop().run_until_complete(
- load_page(url, strict=strict)
- )
+ content, props = asyncio.get_event_loop().run_until_complete(load_page(url, strict=strict))
else:
response = make_request(url)
props = {
@@ -763,9 +713,7 @@ def get_resource_filename(url):
relative_links=relative_links,
)
- download_path = os.path.join(
- download_root, get_archive_filename(url, page_url, download_root)
- )
+ download_path = os.path.join(download_root, get_archive_filename(url, page_url, download_root))
_path, ext = os.path.splitext(download_path)
index_path = download_path
if ".htm" not in ext:
@@ -870,9 +818,7 @@ def clear_cache_data(self):
self.cache_data = {}
self.save_cache_data()
- def get_page(
- self, url, refresh=False, link_policy=None, run_js=False, strict=False
- ):
+ def get_page(self, url, refresh=False, link_policy=None, run_js=False, strict=False):
if refresh or url not in self.cache_data:
self.cache_data[url] = archive_page(
url,
@@ -888,9 +834,7 @@ def get_page(
def get_relative_index_path(self, url):
if url in self.cache_data and "index_path" in self.cache_data[url]:
- return self.cache_data[url]["index_path"].replace(
- self.root_dir + os.sep, ""
- )
+ return self.cache_data[url]["index_path"].replace(self.root_dir + os.sep, "")
return None
@@ -903,9 +847,7 @@ def find_page_by_index_path(self, index_path):
def get_page_soup(self, url):
if url not in self.cache_data:
- raise KeyError(
- "Unable to find page {} in archive. Did you call get_page?".format(url)
- )
+ raise KeyError("Unable to find page {} in archive. Did you call get_page?".format(url))
info = self.cache_data[url]
# lxml enables some nice features like being able to search for individual
@@ -949,9 +891,7 @@ def _copy_resources_to_dir(self, base_dir, resources):
def create_zip_dir_for_page(self, url):
if url not in self.cache_data:
- raise KeyError(
- "Please ensure you call get_page before calling this function to download the content."
- )
+ raise KeyError("Please ensure you call get_page before calling this function to download the content.")
temp_dir = tempfile.mkdtemp()
info = self.cache_data[url]
diff --git a/ricecooker/utils/html.py b/ricecooker/utils/html.py
index 77f4e618..46a3a7ca 100644
--- a/ricecooker/utils/html.py
+++ b/ricecooker/utils/html.py
@@ -11,12 +11,12 @@
import requests
from selenium import webdriver
-from .caching import CacheControlAdapter
-from .caching import FileCache
from ricecooker.config import LOGGER
from ricecooker.config import PHANTOMJS_PATH
from ricecooker.config import STRICT
+from .caching import CacheControlAdapter
+from .caching import FileCache
# create a default session with basic caching mechanisms (similar to what a browser would do)
sess = requests.Session()
@@ -26,9 +26,7 @@
sess.mount("https://", basic_adapter)
if PHANTOMJS_PATH is None:
- PHANTOMJS_PATH = os.path.join(
- os.getcwd(), "node_modules", "phantomjs-prebuilt", "bin", "phantomjs"
- )
+ PHANTOMJS_PATH = os.path.join(os.getcwd(), "node_modules", "phantomjs-prebuilt", "bin", "phantomjs")
class WebDriver(object):
@@ -57,11 +55,7 @@ def __exit__(self, type, value, traceback):
def get_generated_html_from_driver(driver, tagname="html"):
- driver.execute_script(
- "return document.getElementsByTagName('{tagname}')[0].innerHTML".format(
- tagname=tagname
- )
- )
+ driver.execute_script("return document.getElementsByTagName('{tagname}')[0].innerHTML".format(tagname=tagname))
def replace_links( # noqa: C901
@@ -85,9 +79,7 @@ def replace_links( # noqa: C901
rel_path = pathname2url(rel_path)
if relative_links:
- value = pathname2url(
- os.path.relpath(os.path.join(download_root, value), content_dir)
- )
+ value = pathname2url(os.path.relpath(os.path.join(download_root, value), content_dir))
# When we get an absolute URL, it may appear in one of three different ways in the page:
key_variants = [
@@ -118,9 +110,7 @@ def replace_links( # noqa: C901
# we avoid using BeautifulSoup because Python HTML parsers can be destructive and
# do things like strip out the doctype.
content = content.replace('="{}"'.format(variant), '="{}"'.format(value))
- content = content.replace(
- "url({})".format(variant), "url({})".format(value)
- )
+ content = content.replace("url({})".format(variant), "url({})".format(value))
for match in srcset_links:
url = match[1]
@@ -158,12 +148,8 @@ def calculate_relative_url(url, filename=None, baseurl=None, subpath=None):
# if a base path was supplied, calculate the file's subpath relative to it
if baseurl:
- baseurl = urllib.parse.urljoin(
- baseurl, "."
- ) # ensure baseurl is normalized (to remove '/./' and '/../')
- assert url.startswith(baseurl), "URL {} must start with baseurl {}".format(
- url, baseurl
- )
+ baseurl = urllib.parse.urljoin(baseurl, ".") # ensure baseurl is normalized (to remove '/./' and '/../')
+ assert url.startswith(baseurl), "URL {} must start with baseurl {}".format(url, baseurl)
subpath = subpath + url[len(baseurl) :].strip("/").split("/")[:-1]
# if we don't have a filename, extract it from the URL
@@ -196,9 +182,7 @@ def download_file( # noqa: C901
- If `middleware_kwargs` are also specified, they will also be passed in to each function in middleware_callbacks.
"""
- relative_file_url, subpath, filename = calculate_relative_url(
- url, filename=filename, baseurl=baseurl, subpath=subpath
- )
+ relative_file_url, subpath, filename = calculate_relative_url(url, filename=filename, baseurl=baseurl, subpath=subpath)
LOGGER.info("Download called for {}".format(url))
# ensure that the destination directory exists
@@ -227,9 +211,7 @@ def download_file( # noqa: C901
encoding = chardet.detect(response.content)
if encoding and "encoding" in encoding:
response.encoding = encoding["encoding"]
- LOGGER.warning(
- "encoding for {} = {}".format(url, response.encoding)
- )
+ LOGGER.warning("encoding for {} = {}".format(url, response.encoding))
content = response.text
if not isinstance(middleware_callbacks, list):
diff --git a/ricecooker/utils/html_writer.py b/ricecooker/utils/html_writer.py
index e51922c8..7ccc3eea 100644
--- a/ricecooker/utils/html_writer.py
+++ b/ricecooker/utils/html_writer.py
@@ -57,11 +57,7 @@ def close(self):
index_present = self.contains("index.html")
self.zf.close() # Make sure zipfile closes no matter what
if not index_present:
- raise ReferenceError(
- "Invalid Zip at {}: missing index.html file (use write_index_contents method)".format(
- self.write_to_path
- )
- )
+ raise ReferenceError("Invalid Zip at {}: missing index.html file (use write_index_contents method)".format(self.write_to_path))
def contains(self, filename):
"""contains: Checks if filename is in the zipfile
@@ -78,9 +74,7 @@ def write_contents(self, filename, contents, directory=None):
directory: (str) directory in zipfile to write file to (optional)
Returns: path to file in zip
"""
- filepath = (
- "{}/{}".format(directory.rstrip("/"), filename) if directory else filename
- )
+ filepath = "{}/{}".format(directory.rstrip("/"), filename) if directory else filename
self._write_to_zipfile(filepath, contents)
return filepath
@@ -109,9 +103,7 @@ def write_url(self, url, filename, directory=None):
directory: (str) directory in zipfile to write file to (optional)
Returns: path to file in zip
"""
- filepath = (
- "{}/{}".format(directory.rstrip("/"), filename) if directory else filename
- )
+ filepath = "{}/{}".format(directory.rstrip("/"), filename) if directory else filename
if not self.contains(filepath):
self._write_to_zipfile(filepath, read(url))
return filepath
diff --git a/ricecooker/utils/images.py b/ricecooker/utils/images.py
index 565334a9..34b46a2d 100644
--- a/ricecooker/utils/images.py
+++ b/ricecooker/utils/images.py
@@ -8,7 +8,6 @@
from .thumbscropping import scale_and_crop
-
# SMARTCROP UTILS
################################################################################
@@ -56,9 +55,7 @@ def create_image_from_epub(epubfile, fpath_out, crop=None):
# 2. fallback to get first image in the ePub file
images = list(book.get_items_of_type(ebooklib.ITEM_IMAGE))
if not images:
- raise ThumbnailGenerationError(
- "ePub file {} contains no images.".format(epubfile)
- )
+ raise ThumbnailGenerationError("ePub file {} contains no images.".format(epubfile))
# TODO: get largest image of the bunch
image_data = BytesIO(images[0].get_content())
@@ -94,9 +91,7 @@ def create_image_from_zip(htmlfile, fpath_out, crop="smart"):
biggest_name = filename
size = img_size
if biggest_name is None:
- raise ThumbnailGenerationError(
- "HTML5 zip file {} contains no images.".format(htmlfile)
- )
+ raise ThumbnailGenerationError("HTML5 zip file {} contains no images.".format(htmlfile))
with zf.open(biggest_name) as fhandle:
image_data = fhandle.read()
with BytesIO(image_data) as bhandle:
@@ -113,9 +108,7 @@ def create_image_from_pdf_page(fpath_in, fpath_out, page_number=0, crop=None):
"""
try:
assert fpath_in.endswith("pdf"), "File must be in pdf format"
- pages = convert_from_path(
- fpath_in, 500, first_page=page_number, last_page=page_number + 1
- )
+ pages = convert_from_path(fpath_in, 500, first_page=page_number, last_page=page_number + 1)
page = pages[0]
# resize
page = scale_and_crop_thumbnail(page, zoom=10, crop=crop)
@@ -135,9 +128,7 @@ def create_tiled_image(source_images, fpath_out):
"""
try:
sizes = {1: 1, 4: 2, 9: 3, 16: 4, 25: 5, 36: 6, 49: 7}
- assert (
- len(source_images) in sizes.keys()
- ), "Number of images must be a perfect square <= 49"
+ assert len(source_images) in sizes.keys(), "Number of images must be a perfect square <= 49"
root = sizes[len(source_images)]
images = list(map(Image.open, source_images))
@@ -171,9 +162,7 @@ def convert_image(filename, dest_dir=None, size=None, format="PNG"):
:returns: Path to converted file.
"""
- assert os.path.exists(filename), "Image file not found: {}".format(
- os.path.abspath(filename)
- )
+ assert os.path.exists(filename), "Image file not found: {}".format(os.path.abspath(filename))
if not dest_dir:
dest_dir = os.path.dirname(os.path.abspath(filename))
diff --git a/ricecooker/utils/jsontrees.py b/ricecooker/utils/jsontrees.py
index 0694382e..e506a223 100644
--- a/ricecooker/utils/jsontrees.py
+++ b/ricecooker/utils/jsontrees.py
@@ -180,9 +180,7 @@ def build_tree_from_json(parent_node, sourcetree):
role=source_node.get("role", roles.LEARNER),
language=source_node.get("language"),
thumbnail=source_node.get("thumbnail"),
- derive_thumbnail=source_node.get(
- "derive_thumbnail", False
- ), # not supported yet
+ derive_thumbnail=source_node.get("derive_thumbnail", False), # not supported yet
tags=source_node.get("tags"),
exercise_data=source_node.get("exercise_data"),
questions=[],
@@ -305,48 +303,26 @@ def add_files(node, file_list): # noqa: C901
node.add_file(video_file)
elif file_type == AUDIO_FILE:
- node.add_file(
- files.AudioFile(
- path=f["path"], language=f.get("language", None), preset=preset
- )
- )
+ node.add_file(files.AudioFile(path=f["path"], language=f.get("language", None), preset=preset))
elif file_type == DOCUMENT_FILE:
- node.add_file(
- files.DocumentFile(
- path=path, language=f.get("language", None), preset=preset
- )
- )
+ node.add_file(files.DocumentFile(path=path, language=f.get("language", None), preset=preset))
elif file_type == EPUB_FILE:
- node.add_file(
- files.EPubFile(
- path=path, language=f.get("language", None), preset=preset
- )
- )
+ node.add_file(files.EPubFile(path=path, language=f.get("language", None), preset=preset))
elif file_type == HTML5_FILE:
- node.add_file(
- files.HTMLZipFile(
- path=path, language=f.get("language", None), preset=preset
- )
- )
+ node.add_file(files.HTMLZipFile(path=path, language=f.get("language", None), preset=preset))
elif file_type == THUMBNAIL_FILE:
if "encoding" in f:
node.add_file(files.Base64ImageFile(encoding=f["encoding"]))
else:
- node.add_file(
- files.ThumbnailFile(path=path, language=f.get("language", None))
- )
+ node.add_file(files.ThumbnailFile(path=path, language=f.get("language", None)))
elif file_type == SUBTITLES_FILE:
if "youtube_id" in f:
- node.add_file(
- files.YouTubeSubtitleFile(
- youtube_id=f["youtube_id"], language=f["language"]
- )
- )
+ node.add_file(files.YouTubeSubtitleFile(youtube_id=f["youtube_id"], language=f["language"]))
else:
keys = ["language", "subtitlesformat"]
params = {"path": path}
@@ -440,7 +416,5 @@ def add_questions(exercise_node, question_list):
else:
raise UnknownQuestionTypeError(
- "Unrecognized question type {0}: accepted types are {1}".format(
- question_type, [key for key, value in exercises.question_choices]
- )
+ "Unrecognized question type {0}: accepted types are {1}".format(question_type, [key for key, value in exercises.question_choices])
)
diff --git a/ricecooker/utils/kolibripreview.py b/ricecooker/utils/kolibripreview.py
index 5e312432..3ef6e360 100755
--- a/ricecooker/utils/kolibripreview.py
+++ b/ricecooker/utils/kolibripreview.py
@@ -33,9 +33,7 @@ def main(args):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=main.__doc__)
- parser.add_argument(
- "--srcdir", help="HTML5 webroot (source directory)", default="."
- )
+ parser.add_argument("--srcdir", help="HTML5 webroot (source directory)", default=".")
parser.add_argument(
"--destzip",
help="Path to a HTML5 zip file in local Kolibri installation",
diff --git a/ricecooker/utils/libstudio.py b/ricecooker/utils/libstudio.py
index fd5c7787..0d1c5b72 100644
--- a/ricecooker/utils/libstudio.py
+++ b/ricecooker/utils/libstudio.py
@@ -2,7 +2,6 @@
from ricecooker.config import LOGGER
-
# DEFAULT_STUDIO_URL = 'https://develop.studio.learningequality.org'
# DEFAULT_STUDIO_URL = 'http://127.0.0.1:8080'
DEFAULT_STUDIO_URL = "https://studio.learningequality.org"
@@ -19,9 +18,7 @@ class StudioApi(object):
corrections, and other automation.
"""
- def __init__(
- self, token, username=None, password=None, studio_url=DEFAULT_STUDIO_URL
- ):
+ def __init__(self, token, username=None, password=None, studio_url=DEFAULT_STUDIO_URL):
self.studio_url = studio_url.rstrip("/")
self.token = token
self.licenses_by_id = self.get_licenses()
@@ -109,10 +106,7 @@ def get_nodes_by_ids_bulk(self, studio_ids):
NODES_ENDPOINT = self.studio_url + "/api/get_nodes_by_ids_complete/"
headers = {"Authorization": "Token {0}".format(self.token)}
studio_nodes = []
- studio_ids_chunks = [
- studio_ids[i : i + CHUNK_SIZE]
- for i in range(0, len(studio_ids), CHUNK_SIZE)
- ]
+ studio_ids_chunks = [studio_ids[i : i + CHUNK_SIZE] for i in range(0, len(studio_ids), CHUNK_SIZE)]
for studio_ids_chunk in studio_ids_chunks:
studio_ids_csv = ",".join(studio_ids_chunk)
url = NODES_ENDPOINT + studio_ids_csv
@@ -149,9 +143,7 @@ def put_contentnode(self, data):
"""
CONTENTNODE_ENDPOINT = self.studio_url + "/api/contentnode"
REQUIRED_FIELDS = ["id", "tags", "prerequisite", "parent"]
- assert data_has_required_keys(
- data, REQUIRED_FIELDS
- ), "missing necessary attributes"
+ assert data_has_required_keys(data, REQUIRED_FIELDS), "missing necessary attributes"
# studio_id = data['id']
url = CONTENTNODE_ENDPOINT
# print(' semantic PATCH using PUT ' + url)
@@ -170,9 +162,7 @@ def delete_contentnode(self, data, channel_id, trash_studio_id=None):
"""
MOVE_NODES_ENDPOINT = self.studio_url + "/api/move_nodes/"
REQUIRED_FIELDS = ["id"]
- assert data_has_required_keys(
- data, REQUIRED_FIELDS
- ), "missing necessary attributes"
+ assert data_has_required_keys(data, REQUIRED_FIELDS), "missing necessary attributes"
if trash_studio_id is None:
channel_data = self.get_channel(channel_id)
trash_studio_id = channel_data["trash_tree"]["id"]
diff --git a/ricecooker/utils/linecook.py b/ricecooker/utils/linecook.py
index 2a004de8..43e6e887 100644
--- a/ricecooker/utils/linecook.py
+++ b/ricecooker/utils/linecook.py
@@ -3,6 +3,8 @@
from le_utils.constants import content_kinds
+from ricecooker.config import LOGGER
+
from .jsontrees import AUDIO_FILE
from .jsontrees import AUDIO_NODE
from .jsontrees import DOCUMENT_FILE
@@ -16,8 +18,6 @@
from .jsontrees import VIDEO_NODE
from .jsontrees import write_tree_to_json_tree
from .metadata_provider import path_to_tuple
-from ricecooker.config import LOGGER
-
# LINECOOK CONFIGS
################################################################################
@@ -30,9 +30,7 @@
".gitkeep",
]
FILE_SKIP_PATTENRS = []
-FILE_SKIP_THUMBNAILS = (
- []
-) # global list of paths that correspond to thumbails for other content nodes
+FILE_SKIP_THUMBNAILS = [] # global list of paths that correspond to thumbails for other content nodes
# LINECOOK HELPER FUNCTIONS
@@ -126,14 +124,8 @@ def filter_thumbnail_files(chan_path, filenames, metadata_provider):
"""
We don't want to create `ContentNode` from thumbnail files.
"""
- thumbnail_files_to_skip = set(
- os.path.join(*p) for p in metadata_provider.get_thumbnail_paths()
- )
- return [
- filename
- for filename in filenames
- if os.path.join(chan_path, filename) not in thumbnail_files_to_skip
- ]
+ thumbnail_files_to_skip = set(os.path.join(*p) for p in metadata_provider.get_thumbnail_paths())
+ return [filename for filename in filenames if os.path.join(chan_path, filename) not in thumbnail_files_to_skip]
def keep_folder(raw_path):
@@ -166,24 +158,18 @@ def process_folder(channel, rel_path, filenames, metadata_provider):
if len(chan_path_list) == 1:
# CASE CHANNEL ROOT: `rel_path` points to `channeldir`
# No need to create a topic node here since channel already exists
- containing_node = (
- channel # attach content nodes in filenames directly to channel
- )
+ containing_node = channel # attach content nodes in filenames directly to channel
else:
# CASE TOPIC FOLDER: `rel_path` points to a channelroot subfolder (a.k.a TopicNode)
- dirname = (
- chan_path_list.pop()
- ) # name of the folder (used as ID for internal lookup)
+ dirname = chan_path_list.pop() # name of the folder (used as ID for internal lookup)
topic_parent_node = get_topic_for_path(channel, chan_path_list)
# read topic metadata to get title and description for the TopicNode
topic_metadata = metadata_provider.get(chan_path_tuple)
thumbnail_chan_path = topic_metadata.get("thumbnail_chan_path", None)
if thumbnail_chan_path:
- thumbnail_rel_path = rel_path_from_chan_path(
- thumbnail_chan_path, metadata_provider.channeldir
- )
+ thumbnail_rel_path = rel_path_from_chan_path(thumbnail_chan_path, metadata_provider.channeldir)
else:
thumbnail_rel_path = None
# create TopicNode for this folder
@@ -200,27 +186,19 @@ def process_folder(channel, rel_path, filenames, metadata_provider):
children=[],
)
topic_parent_node["children"].append(topic)
- containing_node = (
- topic # attach content nodes in filenames to the newly created topic
- )
+ containing_node = topic # attach content nodes in filenames to the newly created topic
# filter filenames
filenames_cleaned = filter_filenames(filenames)
- filenames_cleaned2 = filter_thumbnail_files(
- chan_path, filenames_cleaned, metadata_provider
- )
+ filenames_cleaned2 = filter_thumbnail_files(chan_path, filenames_cleaned, metadata_provider)
# PROCESS FILES
for filename in filenames_cleaned2:
chan_filepath = os.path.join(chan_path, filename)
chan_filepath_tuple = path_to_tuple(chan_filepath)
metadata = metadata_provider.get(chan_filepath_tuple)
- node = make_content_node(
- metadata_provider.channeldir, rel_path, filename, metadata
- )
- containing_node["children"].append(
- node
- ) # attach content node to containing_node
+ node = make_content_node(metadata_provider.channeldir, rel_path, filename, metadata)
+ containing_node["children"].append(node) # attach content node to containing_node
def build_ricecooker_json_tree(args, options, metadata_provider, json_tree_path):
@@ -239,9 +217,7 @@ def build_ricecooker_json_tree(args, options, metadata_provider, json_tree_path)
channel_info = metadata_provider.get_channel_info()
thumbnail_chan_path = channel_info.get("thumbnail_chan_path", None)
if thumbnail_chan_path:
- thumbnail_rel_path = rel_path_from_chan_path(
- thumbnail_chan_path, metadata_provider.channeldir
- )
+ thumbnail_rel_path = rel_path_from_chan_path(thumbnail_chan_path, metadata_provider.channeldir)
else:
thumbnail_rel_path = None
@@ -271,19 +247,13 @@ def build_ricecooker_json_tree(args, options, metadata_provider, json_tree_path)
# order of nodes within a given topic. Since alphabetical order is used to
# walk the files in the `channeldir`, we must "splice in" the exercises here
if metadata_provider.has_exercises():
- dir_chan_path = chan_path_from_rel_path(
- rel_path, metadata_provider.channeldir
- )
+ dir_chan_path = chan_path_from_rel_path(rel_path, metadata_provider.channeldir)
dir_path_tuple = path_to_tuple(dir_chan_path)
- exercises_filenames = metadata_provider.get_exercises_for_dir(
- dir_path_tuple
- )
+ exercises_filenames = metadata_provider.get_exercises_for_dir(dir_path_tuple)
filenames.extend(exercises_filenames)
sorted_filenames = sorted(filenames)
- process_folder(
- ricecooker_json_tree, rel_path, sorted_filenames, metadata_provider
- )
+ process_folder(ricecooker_json_tree, rel_path, sorted_filenames, metadata_provider)
# Write out ricecooker_json_tree.json
write_tree_to_json_tree(json_tree_path, ricecooker_json_tree)
@@ -302,11 +272,7 @@ def make_content_node(channeldir, rel_path, filename, metadata): # noqa: C901
elif "questions" in metadata:
kind = content_kinds.EXERCISE
else:
- raise ValueError(
- "Could not find kind for extension "
- + str(ext)
- + " in content_kinds.MAPPING"
- )
+ raise ValueError("Could not find kind for extension " + str(ext) + " in content_kinds.MAPPING")
# Extract metadata fields
source_id = metadata.get("source_id", None)
@@ -336,9 +302,7 @@ def make_content_node(channeldir, rel_path, filename, metadata): # noqa: C901
license=license_dict,
derive_thumbnail=True,
thumbnail=thumbnail_rel_path,
- files=[
- {"file_type": VIDEO_FILE, "path": filepath, "language": lang}
- ], # ffmpeg_settings={"crf": 24},
+ files=[{"file_type": VIDEO_FILE, "path": filepath, "language": lang}], # ffmpeg_settings={"crf": 24},
)
elif kind == AUDIO_NODE:
diff --git a/ricecooker/utils/metadata_provider.py b/ricecooker/utils/metadata_provider.py
index a13eba37..8f621eb2 100644
--- a/ricecooker/utils/metadata_provider.py
+++ b/ricecooker/utils/metadata_provider.py
@@ -13,7 +13,6 @@
from ricecooker.config import LOGGER
from ricecooker.utils.libstudio import StudioApi
-
# CONSTANTS
################################################################################
DEFAULT_EXTRA_ITEMS_SEPARATOR = "🍣" # used to separate list-like data in CSV
@@ -82,13 +81,9 @@
]
DEFAULT_EXERCISE_QUESTIONS_INFO_FILENAME = "ExerciseQuestions.csv"
-EXERCISE_QUESTIONS_QUESTIONID_KEY = (
- "Question ID *" # unique idendifier for this question
-)
+EXERCISE_QUESTIONS_QUESTIONID_KEY = "Question ID *" # unique idendifier for this question
EXERCISE_QUESTIONS_TYPE_KEY = "Question type *" # one of ['SingleSelectQuestion', 'MultipleSelectQuestion', 'InputQuestion']
-EXERCISE_QUESTIONS_QUESTION_KEY = (
- "Question *" # string that contains the question setup and the prompt
-)
+EXERCISE_QUESTIONS_QUESTION_KEY = "Question *" # string that contains the question setup and the prompt
EXERCISE_QUESTIONS_OPTION_A_KEY = "Option A"
EXERCISE_QUESTIONS_OPTION_B_KEY = "Option B"
EXERCISE_QUESTIONS_OPTION_C_KEY = "Option C"
@@ -96,9 +91,7 @@
EXERCISE_QUESTIONS_OPTION_E_KEY = "Option E"
EXERCISE_QUESTIONS_OPTION_FGHI_KEY = "Options F..." # This field can contain a list of multiple '🍣'-separated string values,
# e.g., 'Anser F🍣Answer G🍣Answer H' (or other suitable unicode character)
-EXERCISE_QUESTIONS_CORRECT_ANSWER_KEY = (
- "Correct Answer *" # A string that equals one of the options strings
-)
+EXERCISE_QUESTIONS_CORRECT_ANSWER_KEY = "Correct Answer *" # A string that equals one of the options strings
EXERCISE_QUESTIONS_CORRECT_ANSWER2_KEY = "Correct Answer 2" # (for multiple select)
EXERCISE_QUESTIONS_CORRECT_ANSWER3_KEY = "Correct Answer 3" # (for multiple select)
EXERCISE_QUESTIONS_HINT_1_KEY = "Hint 1"
@@ -106,9 +99,7 @@
EXERCISE_QUESTIONS_HINT_3_KEY = "Hint 3"
EXERCISE_QUESTIONS_HINT_4_KEY = "Hint 4"
EXERCISE_QUESTIONS_HINT_5_KEY = "Hint 5"
-EXERCISE_QUESTIONS_HINT_6789_KEY = (
- "Hint 6+" # This field can contain a list of multiple '🍣'-separated string values,
-)
+EXERCISE_QUESTIONS_HINT_6789_KEY = "Hint 6+" # This field can contain a list of multiple '🍣'-separated string values,
# e.g., 'Hint 6 text🍣Hint 7 text🍣Hing 8 text'
EXERCISE_QUESTIONS_INFO_HEADER = [
EXERCISE_SOURCEID_KEY,
@@ -216,12 +207,8 @@ def __init__(
self.contentinfo = contentinfo
self.exercisesinfo = exercisesinfo
self.questionsinfo = questionsinfo
- self.contentcache = (
- {}
- ) # { ('chan', 'path','as','tuple's) --> node metadata dict
- self.exercise_filenames_in_dir = defaultdict(
- list
- ) # { ('chan', 'path','some','dir) --> list of exercises (virtual filenames)
+ self.contentcache = {} # { ('chan', 'path','as','tuple's) --> node metadata dict
+ self.exercise_filenames_in_dir = defaultdict(list) # { ('chan', 'path','some','dir) --> list of exercises (virtual filenames)
self.winpaths = winpaths # paths separator in .csv is windows '\'
if validate_and_cache:
self.validate_headers()
@@ -242,9 +229,7 @@ def cache_contentinfo(self):
dict_reader = csv.DictReader(csv_lines)
for row in dict_reader:
row_dict = self._map_content_row_to_dict(row)
- path_tuple = input_path_to_tuple(
- row_dict["chan_path"], windows=self.winpaths
- )
+ path_tuple = input_path_to_tuple(row_dict["chan_path"], windows=self.winpaths)
self.contentcache[path_tuple] = row_dict
# Additional handling of data in Exercises.csv and ExerciseQuestions.txt
@@ -266,9 +251,7 @@ def cache_contentinfo(self):
dict_reader = csv.DictReader(csv_lines)
for exercise_row in dict_reader:
exercise_dict = self._map_exercise_row_to_dict(exercise_row)
- path_tuple = input_path_to_tuple(
- exercise_dict["chan_path"], windows=self.winpaths
- )
+ path_tuple = input_path_to_tuple(exercise_dict["chan_path"], windows=self.winpaths)
question_source_id = exercise_dict["source_id"]
exercise_dict["questions"] = questions_by_source_id[question_source_id]
# B1: exercises are standard content nodes, so add to contentcache
@@ -298,9 +281,7 @@ def get_channel_info(self):
"""
Returns the first data row from Channel.csv
"""
- csv_filename = get_metadata_file_path(
- channeldir=self.channeldir, filename=self.channelinfo
- )
+ csv_filename = get_metadata_file_path(channeldir=self.channeldir, filename=self.channelinfo)
csv_lines = _read_csv_lines(csv_filename)
dict_reader = csv.DictReader(csv_lines)
channel_csvs_list = list(dict_reader)
@@ -320,17 +301,13 @@ def get_thumbnail_paths(self):
channel_info = self.get_channel_info()
chthumbnail_path = channel_info.get("thumbnail_chan_path", None)
if chthumbnail_path:
- chthumbnail_path_tuple = input_path_to_tuple(
- chthumbnail_path, windows=self.winpaths
- )
+ chthumbnail_path_tuple = input_path_to_tuple(chthumbnail_path, windows=self.winpaths)
thumbnail_path_tuples.append(chthumbnail_path_tuple)
# content thumbnails
for content_file_path_tuple, row in self.contentcache.items():
thumbnail_path = row.get("thumbnail_chan_path", None)
if thumbnail_path:
- thumbnail_path_tuple = input_path_to_tuple(
- thumbnail_path, windows=self.winpaths
- )
+ thumbnail_path_tuple = input_path_to_tuple(thumbnail_path, windows=self.winpaths)
thumbnail_path_tuples.append(thumbnail_path_tuple)
return thumbnail_path_tuples
@@ -364,9 +341,7 @@ def _map_content_row_to_dict(self, row):
license_dict = dict(
license_id=row_cleaned[CONTENT_LICENSE_ID_KEY],
description=row_cleaned.get(CONTENT_LICENSE_DESCRIPTION_KEY, None),
- copyright_holder=row_cleaned.get(
- CONTENT_LICENSE_COPYRIGHT_HOLDER_KEY, None
- ),
+ copyright_holder=row_cleaned.get(CONTENT_LICENSE_COPYRIGHT_HOLDER_KEY, None),
)
else:
license_dict = None
@@ -411,9 +386,7 @@ def _map_exercise_row_to_dict(self, row):
license_dict = dict(
license_id=row_cleaned[CONTENT_LICENSE_ID_KEY],
description=row_cleaned.get(CONTENT_LICENSE_DESCRIPTION_KEY, None),
- copyright_holder=row_cleaned.get(
- CONTENT_LICENSE_COPYRIGHT_HOLDER_KEY, None
- ),
+ copyright_holder=row_cleaned.get(CONTENT_LICENSE_COPYRIGHT_HOLDER_KEY, None),
)
else:
license_dict = None
@@ -425,9 +398,7 @@ def _map_exercise_row_to_dict(self, row):
elif randomize_raw.lower() in CSV_STR_FALSE_VALUES:
randomize = False
else:
- raise ValueError(
- "Unrecognized value " + randomize_raw + " for randomzied key"
- )
+ raise ValueError("Unrecognized value " + randomize_raw + " for randomzied key")
exercise_data = dict(mastery_model=exercises.M_OF_N, randomize=randomize)
m_value = row_cleaned.get(EXERCISE_M_KEY, None)
if m_value:
@@ -541,9 +512,7 @@ def _map_exercise_question_row_to_dict(self, row): # noqa: C901
hints=hints,
)
elif question_type == exercises.PERSEUS_QUESTION:
- raise ValueError(
- "Perseus questions not currently supported in CSV workflow."
- )
+ raise ValueError("Perseus questions not currently supported in CSV workflow.")
return question_dict
@@ -558,12 +527,8 @@ def validate_headers(self):
self.validate_header(self.channeldir, self.channelinfo, CHANNEL_INFO_HEADER)
self.validate_header(self.channeldir, self.contentinfo, CONTENT_INFO_HEADER)
if self.has_exercises():
- self.validate_header(
- self.channeldir, self.exercisesinfo, EXERCISE_INFO_HEADER
- )
- self.validate_header(
- self.channeldir, self.questionsinfo, EXERCISE_QUESTIONS_INFO_HEADER
- )
+ self.validate_header(self.channeldir, self.exercisesinfo, EXERCISE_INFO_HEADER)
+ self.validate_header(self.channeldir, self.questionsinfo, EXERCISE_QUESTIONS_INFO_HEADER)
def validate_header(self, channeldir, filename, expected_header):
"""
@@ -575,12 +540,7 @@ def validate_header(self, channeldir, filename, expected_header):
dict_reader = csv.DictReader(csv_lines)
actual = set(dict_reader.fieldnames)
if not actual == expected:
- raise ValueError(
- "Unexpected CSV file header in "
- + csv_filename
- + " Expected header:"
- + str(expected)
- )
+ raise ValueError("Unexpected CSV file header in " + csv_filename + " Expected header:" + str(expected))
def validate(self):
"""
@@ -596,9 +556,7 @@ def generate_exercises_from_importstudioid(self, args, options):
Create rows in Exercises.csv and ExerciseQuestions.csv from a Studio channel,
specified based on a studio_id (e.g. studio_id of main_tree for some channel)'
"""
- print(
- "Generating Exercises.csv and ExerciseQuestions.csv from a Studio channel"
- )
+ print("Generating Exercises.csv and ExerciseQuestions.csv from a Studio channel")
self.studioapi = StudioApi(token=args["token"])
channel_dict = self.studioapi.get_tree_for_studio_id(args["importstudioid"])
json.dump(
@@ -630,11 +588,8 @@ def _write_subtree(path_tuple, subtree, is_root=False):
# TOPIC ############################################################
if kind == "topic":
-
if is_root:
- self.write_topic_row_from_studio_dict(
- path_tuple, subtree, is_root=is_root
- )
+ self.write_topic_row_from_studio_dict(path_tuple, subtree, is_root=is_root)
for child in subtree["children"]:
_write_subtree(path_tuple, child)
else:
@@ -730,18 +685,11 @@ def _process_string(string):
for match in matches:
file_result = match[1]
file_name = file_result.replace(contentstorage_prefix, "")
- file_url = (
- studio_storage + file_name[0] + "/" + file_name[1] + "/" + file_name
- )
+ file_url = studio_storage + file_name[0] + "/" + file_name[1] + "/" + file_name
file_local_path = os.path.join(dest_path, file_name)
response = requests.get(file_url)
if response.status_code != 200:
- print(
- "Failed for image "
- + str(response.status_code)
- + " >> "
- + file_url
- )
+ print("Failed for image " + str(response.status_code) + " >> " + file_url)
return string
with open(file_local_path, "wb") as local_file:
local_file.write(response.content)
@@ -806,44 +754,21 @@ def _safe_list_get(_list, idx, default):
if hints_raw:
raise ValueError("Found hints but not handled..")
- LOGGER.info(
- " - writing question with studio_id="
- + question_dict["assessment_id"]
- )
+ LOGGER.info(" - writing question with studio_id=" + question_dict["assessment_id"])
question_row = {}
question_row[EXERCISE_SOURCEID_KEY] = source_id
- question_row[EXERCISE_QUESTIONS_QUESTIONID_KEY] = question_dict[
- "assessment_id"
- ] # question_dict['assessment_id']
- question_row[EXERCISE_QUESTIONS_TYPE_KEY] = type_lookup[
- question_dict["type"]
- ]
+ question_row[EXERCISE_QUESTIONS_QUESTIONID_KEY] = question_dict["assessment_id"]
+ question_row[EXERCISE_QUESTIONS_TYPE_KEY] = type_lookup[question_dict["type"]]
question_row[EXERCISE_QUESTIONS_QUESTION_KEY] = question_dict["question"]
- question_row[EXERCISE_QUESTIONS_OPTION_A_KEY] = _safe_list_get(
- options, 0, None
- )
- question_row[EXERCISE_QUESTIONS_OPTION_B_KEY] = _safe_list_get(
- options, 1, None
- )
- question_row[EXERCISE_QUESTIONS_OPTION_C_KEY] = _safe_list_get(
- options, 2, None
- )
- question_row[EXERCISE_QUESTIONS_OPTION_D_KEY] = _safe_list_get(
- options, 3, None
- )
- question_row[EXERCISE_QUESTIONS_OPTION_E_KEY] = _safe_list_get(
- options, 4, None
- )
+ question_row[EXERCISE_QUESTIONS_OPTION_A_KEY] = _safe_list_get(options, 0, None)
+ question_row[EXERCISE_QUESTIONS_OPTION_B_KEY] = _safe_list_get(options, 1, None)
+ question_row[EXERCISE_QUESTIONS_OPTION_C_KEY] = _safe_list_get(options, 2, None)
+ question_row[EXERCISE_QUESTIONS_OPTION_D_KEY] = _safe_list_get(options, 3, None)
+ question_row[EXERCISE_QUESTIONS_OPTION_E_KEY] = _safe_list_get(options, 4, None)
question_row[EXERCISE_QUESTIONS_OPTION_FGHI_KEY] = extra_options
- question_row[EXERCISE_QUESTIONS_CORRECT_ANSWER_KEY] = _safe_list_get(
- correct, 0, None
- )
- question_row[EXERCISE_QUESTIONS_CORRECT_ANSWER2_KEY] = _safe_list_get(
- correct, 1, None
- )
- question_row[EXERCISE_QUESTIONS_CORRECT_ANSWER3_KEY] = _safe_list_get(
- correct, 2, None
- )
+ question_row[EXERCISE_QUESTIONS_CORRECT_ANSWER_KEY] = _safe_list_get(correct, 0, None)
+ question_row[EXERCISE_QUESTIONS_CORRECT_ANSWER2_KEY] = _safe_list_get(correct, 1, None)
+ question_row[EXERCISE_QUESTIONS_CORRECT_ANSWER3_KEY] = _safe_list_get(correct, 2, None)
question_row[EXERCISE_QUESTIONS_HINT_1_KEY] = None # TODO
question_row[EXERCISE_QUESTIONS_HINT_2_KEY] = None # TODO
question_row[EXERCISE_QUESTIONS_HINT_3_KEY] = None # TODO
@@ -881,14 +806,8 @@ def generate_contentinfo_from_channeldir(self, args, options):
for rel_path, _subfolders, filenames in content_folders:
LOGGER.info("processing folder " + str(rel_path))
sorted_filenames = sorted(filenames)
- self.generate_contentinfo_from_folder(
- csvwriter, rel_path, sorted_filenames
- )
- LOGGER.info(
- "Generted {} row for all folders and files in {}".format(
- self.contentinfo, self.channeldir
- )
- )
+ self.generate_contentinfo_from_folder(csvwriter, rel_path, sorted_filenames)
+ LOGGER.info("Generted {} row for all folders and files in {}".format(self.contentinfo, self.channeldir))
def generate_contentinfo_from_folder(self, csvwriter, rel_path, filenames):
"""
@@ -896,9 +815,7 @@ def generate_contentinfo_from_folder(self, csvwriter, rel_path, filenames):
add content node rows for all the files in the `rel_path` folder.
"""
LOGGER.debug("IN process_folder " + str(rel_path) + " " + str(filenames))
- from ricecooker.utils.linecook import (
- filter_filenames,
- )
+ from ricecooker.utils.linecook import filter_filenames
# WRITE TOPIC ROW
topicrow = self.channeldir_node_to_row(rel_path.split(os.path.sep))
@@ -920,9 +837,7 @@ def channeldir_node_to_row(self, path_tuple):
row = dict()
for key in CONTENT_INFO_HEADER:
row[key] = None
- row[CONTENT_PATH_KEY] = "/".join(
- path_tuple
- ) # use / in .csv on Windows and UNIX
+ row[CONTENT_PATH_KEY] = "/".join(path_tuple) # use / in .csv on Windows and UNIX
title = path_tuple[-1].replace("_", " ")
for ext in content_kinds.MAPPING.keys():
if title.endswith(ext):
diff --git a/ricecooker/utils/pdf.py b/ricecooker/utils/pdf.py
index c15d6935..20704b81 100644
--- a/ricecooker/utils/pdf.py
+++ b/ricecooker/utils/pdf.py
@@ -98,11 +98,8 @@ def get_toc(self, subchapters=False): # noqa: C901
index = 0
for dest in self.pdf.getOutlines():
-
# Process chapters
- if isinstance(dest, CustomDestination) and not isinstance(
- dest["/Page"], NullObject
- ):
+ if isinstance(dest, CustomDestination) and not isinstance(dest["/Page"], NullObject):
page_num = self.pdf.getDestinationPageNumber(dest)
chapter_pagerange = {
"title": dest["/Title"].replace("\xa0", " "),
@@ -128,9 +125,7 @@ def get_toc(self, subchapters=False): # noqa: C901
parent = chapters[index - 1]
subindex = 0
for subdest in dest:
- if isinstance(subdest, CustomDestination) and not isinstance(
- subdest["/Page"], NullObject
- ):
+ if isinstance(subdest, CustomDestination) and not isinstance(subdest["/Page"], NullObject):
subpage_num = self.pdf.getDestinationPageNumber(subdest)
parent["children"].append(
{
@@ -151,12 +146,8 @@ def write_pagerange(self, pagerange, prefix=""):
e.g. pagerange = {'title':'First chapter', 'page_start':0, 'page_end':5}
"""
writer = PdfFileWriter()
- slug = "".join(
- [c for c in pagerange["title"].replace(" ", "-") if c.isalnum() or c == "-"]
- )
- write_to_path = os.path.sep.join(
- [self.directory, "{}{}.pdf".format(prefix, slug)]
- )
+ slug = "".join([c for c in pagerange["title"].replace(" ", "-") if c.isalnum() or c == "-"])
+ write_to_path = os.path.sep.join([self.directory, "{}{}.pdf".format(prefix, slug)])
for page in range(pagerange["page_start"], pagerange["page_end"]):
writer.addPage(self.pdf.getPage(page))
writer.removeLinks() # must be done every page
@@ -214,17 +205,11 @@ def split_subchapters(self, jsondata=None):
"page_start": chpagerange["page_start"],
"page_end": first_subchapter["page_start"],
}
- write_to_path = self.write_pagerange(
- chintro_pagerange, prefix=chprefix
- )
- chapter_topic["children"].append(
- {"title": chpagerange["title"], "path": write_to_path}
- )
+ write_to_path = self.write_pagerange(chintro_pagerange, prefix=chprefix)
+ chapter_topic["children"].append({"title": chpagerange["title"], "path": write_to_path})
# Handle all subchapters
- subchapter_nodes = self.split_chapters(
- jsondata=subchpageranges, prefix=chprefix
- )
+ subchapter_nodes = self.split_chapters(jsondata=subchpageranges, prefix=chprefix)
chapter_topic["children"].extend(subchapter_nodes)
chapters.append(chapter_topic)
diff --git a/ricecooker/utils/pipeline/__init__.py b/ricecooker/utils/pipeline/__init__.py
index f73386d0..f0e08ffa 100644
--- a/ricecooker/utils/pipeline/__init__.py
+++ b/ricecooker/utils/pipeline/__init__.py
@@ -4,12 +4,12 @@
from typing import Dict
from typing import Optional
+from ricecooker.utils.pipeline.context import FileMetadata
+
from .convert import ConversionStageHandler
from .extract_metadata import ExtractMetadataStageHandler
from .file_handler import CompositeHandler
from .transfer import DownloadStageHandler
-from ricecooker.utils.pipeline.context import FileMetadata
-
# Do this to prevent import of broken Windows filetype registry that makes guesstype not work.
# https://www.thecodingforums.com/threads/mimetypes-guess_type-broken-in-windows-on-py2-7-and-python-3-x.952693/
@@ -80,9 +80,7 @@ def execute(
# For each new metadata in the returned list
# make a unique copy of the existing metadata and
# merge the new metadata into the existing metadata
- updated_file_metadata_list.append(
- file_metadata.merge(new_metadata)
- )
+ updated_file_metadata_list.append(file_metadata.merge(new_metadata))
else:
# Otherwise, it's a noop
updated_file_metadata_list.append(file_metadata)
diff --git a/ricecooker/utils/pipeline/context.py b/ricecooker/utils/pipeline/context.py
index 386e177b..0001fe71 100644
--- a/ricecooker/utils/pipeline/context.py
+++ b/ricecooker/utils/pipeline/context.py
@@ -59,9 +59,7 @@ class FileMetadata:
content_node_metadata: Optional[ContentNodeMetadata] = None
def to_dict(self):
- return asdict(
- self, dict_factory=lambda x: {k: v for k, v in x if v is not None}
- )
+ return asdict(self, dict_factory=lambda x: {k: v for k, v in x if v is not None})
def merge(self, other):
"""
diff --git a/ricecooker/utils/pipeline/convert.py b/ricecooker/utils/pipeline/convert.py
index 2c3c729a..03229af8 100644
--- a/ricecooker/utils/pipeline/convert.py
+++ b/ricecooker/utils/pipeline/convert.py
@@ -2,6 +2,7 @@
To avoid making the pipeline overly convoluted, these handlers
both validate and convert files.
"""
+
import json
import os
import shutil
@@ -27,8 +28,6 @@
from PyPDF2 import PdfFileReader
from PyPDF2.utils import PdfReadError
-from .file_handler import ExtensionMatchingHandler
-from .file_handler import StageHandler
from ricecooker import config
from ricecooker.exceptions import UnknownFileTypeError
from ricecooker.utils.audio import AudioCompressionError
@@ -37,17 +36,19 @@
from ricecooker.utils.pipeline.context import ContextMetadata
from ricecooker.utils.pipeline.context import FileMetadata
from ricecooker.utils.pipeline.exceptions import InvalidFileException
-from ricecooker.utils.subtitles import build_subtitle_converter_from_file
+from ricecooker.utils.subtitles import LANGUAGE_CODE_UNKNOWN
from ricecooker.utils.subtitles import InvalidSubtitleFormatError
from ricecooker.utils.subtitles import InvalidSubtitleLanguageError
-from ricecooker.utils.subtitles import LANGUAGE_CODE_UNKNOWN
+from ricecooker.utils.subtitles import build_subtitle_converter_from_file
from ricecooker.utils.utils import extract_path_ext
+from ricecooker.utils.videos import VideoCompressionError
from ricecooker.utils.videos import compress_video
from ricecooker.utils.videos import validate_media_file
-from ricecooker.utils.videos import VideoCompressionError
from ricecooker.utils.youtube import get_language_with_alpha2_fallback
from ricecooker.utils.zip import create_predictable_zip
+from .file_handler import ExtensionMatchingHandler
+from .file_handler import StageHandler
CONVERTIBLE_FORMATS = {p.id: p.convertible_formats for p in format_presets.PRESETLIST}
@@ -81,9 +82,7 @@ class VideoCompressionHandler(MediaCompressionHandler):
file_formats.WEBM,
}
- EXTENSIONS = SUPPORTED_VIDEO_EXTS | set(
- CONVERTIBLE_FORMATS[format_presets.VIDEO_HIGH_RES]
- )
+ EXTENSIONS = SUPPORTED_VIDEO_EXTS | set(CONVERTIBLE_FORMATS[format_presets.VIDEO_HIGH_RES])
HANDLED_EXCEPTIONS = [VideoCompressionError]
@@ -91,7 +90,6 @@ def get_file_kwargs(self, context):
return [{"ffmpeg_settings": context.video_settings}]
def handle_file(self, path, ffmpeg_settings=None):
-
ffmpeg_settings = ffmpeg_settings or {}
input_ext = extract_path_ext(path)
@@ -102,9 +100,7 @@ def handle_file(self, path, ffmpeg_settings=None):
# If we're not compressing, just validate the file.
is_valid, error = validate_media_file(path)
if not is_valid:
- raise InvalidFileException(
- f"Video file {path} did not pass verification with error: {error}"
- )
+ raise InvalidFileException(f"Video file {path} did not pass verification with error: {error}")
return
else:
output_ext = file_formats.WEBM
@@ -148,9 +144,7 @@ def handle_file(self, path, ffmpeg_settings=None):
# If we're not compressing, just validate the file.
is_valid, error = validate_media_file(path)
if not is_valid:
- raise InvalidFileException(
- f"Audio file {path} did not pass verification with error: {error}"
- )
+ raise InvalidFileException(f"Audio file {path} did not pass verification with error: {error}")
return
output_ext = file_formats.MP3
@@ -165,7 +159,6 @@ class ArchiveProcessingContextMetadata(ContextMetadata):
class ArchiveProcessingBaseHandler(ExtensionMatchingHandler):
-
CONTEXT_CLASS = ArchiveProcessingContextMetadata
def get_cache_key(self, path, audio_settings=None, video_settings=None) -> str:
@@ -207,9 +200,7 @@ def handle_file(self, path, audio_settings=None, video_settings=None):
ext=ext,
)
# create_predictable_zip will iterate over subfiles, call file_converter
- processed_zip_path = create_predictable_zip(
- path, file_converter=file_converter if config.COMPRESS else None
- )
+ processed_zip_path = create_predictable_zip(path, file_converter=file_converter if config.COMPRESS else None)
with self.write_file(ext) as fh:
with open(processed_zip_path, "rb") as zf:
@@ -224,17 +215,13 @@ def open_and_verify_archive(self, path):
with zipfile.ZipFile(path) as zf:
yield zf
except zipfile.BadZipFile:
- raise InvalidFileException(
- f"File {path} is not a valid {self.FILE_TYPE} file, it is not a valid zip archive."
- )
+ raise InvalidFileException(f"File {path} is not a valid {self.FILE_TYPE} file, it is not a valid zip archive.")
def read_file_from_archive(self, zf, filepath):
try:
return zf.read(filepath)
except KeyError:
- raise InvalidFileException(
- f"File {zf.filename} is not a valid {self.FILE_TYPE} file, {filepath} is missing."
- )
+ raise InvalidFileException(f"File {zf.filename} is not a valid {self.FILE_TYPE} file, {filepath} is missing.")
def _validate_index_html_body(self, zf, path):
"""Validate that index.html exists and has a non-empty body."""
@@ -243,27 +230,17 @@ def _validate_index_html_body(self, zf, path):
dom = html5lib.parse(index_html, namespaceHTMLElements=False)
body = dom.find("body")
if body is None:
- raise InvalidFileException(
- f"File {path} is not a valid {self.FILE_TYPE} file, index.html is missing a body element."
- )
+ raise InvalidFileException(f"File {path} is not a valid {self.FILE_TYPE} file, index.html is missing a body element.")
# Check that the body has at least one child element
# for some reason it seems like comments don't get a string tag attribute
- body_children = [
- c for c in body.iter() if isinstance(c.tag, str) and c.tag != "body"
- ]
+ body_children = [c for c in body.iter() if isinstance(c.tag, str) and c.tag != "body"]
if not (body.text and body.text.strip()) and not body_children:
- raise InvalidFileException(
- f"File {path} is not a valid {self.FILE_TYPE} file, index.html is empty."
- )
+ raise InvalidFileException(f"File {path} is not a valid {self.FILE_TYPE} file, index.html is empty.")
return dom
except ParseError:
- raise InvalidFileException(
- f"File {path} is not a valid {self.FILE_TYPE} file, index.html is not well-formed."
- )
+ raise InvalidFileException(f"File {path} is not a valid {self.FILE_TYPE} file, index.html is not well-formed.")
- def _read_and_compress_archive_file(
- self, filepath, reader, audio_settings=None, video_settings=None, ext=None
- ):
+ def _read_and_compress_archive_file(self, filepath, reader, audio_settings=None, video_settings=None, ext=None):
extension = extract_path_ext(filepath, default_ext=ext)
# If it's mp4, webm, or mp3, compress it; else pass it through
@@ -276,9 +253,7 @@ def _read_and_compress_archive_file(
try:
# Create a temp out for compressed result
- with tempfile.NamedTemporaryFile(
- suffix=f".{extension}", delete=False
- ) as temp_out:
+ with tempfile.NamedTemporaryFile(suffix=f".{extension}", delete=False) as temp_out:
temp_out.close()
if extension == file_formats.MP3:
@@ -310,7 +285,6 @@ def _read_and_compress_archive_file(
class HTML5ConversionHandler(ArchiveProcessingBaseHandler):
-
EXTENSIONS = {file_formats.HTML5}
FILE_TYPE = "HTML5"
@@ -320,7 +294,6 @@ def validate_archive(self, path: str):
class H5PConversionHandler(ArchiveProcessingBaseHandler):
-
EXTENSIONS = {file_formats.H5P}
FILE_TYPE = "H5P"
@@ -330,20 +303,15 @@ def validate_archive(self, path: str):
try:
json.loads(h5p_json)
except json.JSONDecodeError:
- raise InvalidFileException(
- f"File {path} is not a valid H5P file, h5p.json is not valid JSON."
- )
+ raise InvalidFileException(f"File {path} is not a valid H5P file, h5p.json is not valid JSON.")
content_json = self.read_file_from_archive(zf, "content/content.json")
try:
json.loads(content_json)
except json.JSONDecodeError:
- raise InvalidFileException(
- f"File {path} is not a valid H5P file, content/content.json is not valid JSON."
- )
+ raise InvalidFileException(f"File {path} is not a valid H5P file, content/content.json is not valid JSON.")
class EPUBConversionHandler(ArchiveProcessingBaseHandler):
-
EXTENSIONS = {file_formats.EPUB}
FILE_TYPE = "EPUB"
@@ -352,13 +320,9 @@ def _validate_mimetype(self, zf, path):
try:
mimetype = mimetype.decode("utf-8").strip()
except UnicodeDecodeError:
- raise InvalidFileException(
- f"File {path} is not a valid EPUB file, mimetype file is not UTF-8 encoded."
- )
+ raise InvalidFileException(f"File {path} is not a valid EPUB file, mimetype file is not UTF-8 encoded.")
if mimetype != "application/epub+zip":
- raise InvalidFileException(
- f"File {path} is not a valid EPUB file, mimetype is incorrect."
- )
+ raise InvalidFileException(f"File {path} is not a valid EPUB file, mimetype is incorrect.")
def _get_opf_path(self, zf, path):
# Then read the container manifest to confirm it exists and get the path to the OPF file.
@@ -370,36 +334,24 @@ def _get_opf_path(self, zf, path):
{"ns": "urn:oasis:names:tc:opendocument:xmlns:container"},
)
if not rootfiles:
- raise InvalidFileException(
- f"File {path} is not a valid EPUB file, rootfile is missing from container manifest."
- )
+ raise InvalidFileException(f"File {path} is not a valid EPUB file, rootfile is missing from container manifest.")
opf_path = rootfiles[0].get("full-path")
if not opf_path:
- raise InvalidFileException(
- f"File {path} is not a valid EPUB file, rootfile path is empty."
- )
+ raise InvalidFileException(f"File {path} is not a valid EPUB file, rootfile path is empty.")
return opf_path
except ET.ParseError:
- raise InvalidFileException(
- f"File {path} is not a valid EPUB file, container manifest is not well-formed."
- )
+ raise InvalidFileException(f"File {path} is not a valid EPUB file, container manifest is not well-formed.")
def _validate_opf(self, zf, path, opf_path):
# If the container manifest is valid, read the OPF file and confirm it exists and has a manifest.
opf_file = self.read_file_from_archive(zf, opf_path)
try:
opf = ET.fromstring(opf_file)
- manifest = opf.find(
- ".//ns:manifest", {"ns": "http://www.idpf.org/2007/opf"}
- )
+ manifest = opf.find(".//ns:manifest", {"ns": "http://www.idpf.org/2007/opf"})
if manifest is None:
- raise InvalidFileException(
- f"File {path} is not a valid EPUB file, manifest is missing from OPF."
- )
+ raise InvalidFileException(f"File {path} is not a valid EPUB file, manifest is missing from OPF.")
except ET.ParseError:
- raise InvalidFileException(
- f"File {path} is not a valid EPUB file, OPF file is not well-formed."
- )
+ raise InvalidFileException(f"File {path} is not a valid EPUB file, OPF file is not well-formed.")
def validate_archive(self, path: str):
with self.open_and_verify_archive(path) as zf:
@@ -409,7 +361,6 @@ def validate_archive(self, path: str):
class KPUBConversionHandler(ArchiveProcessingBaseHandler):
-
EXTENSIONS = {file_formats.HTML5_ARTICLE}
FILE_TYPE = "KPUB"
@@ -419,25 +370,18 @@ def validate_archive(self, path: str):
# Check for inline