diff --git a/.gitignore b/.gitignore index 190d698..9ecb4cb 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,8 @@ docs/_build # Unit test output *.log + +# Fixture output files (written by the fixture tests for inspection) +test/fixtures/output/ +test/fixtures/hide/output/ +test/fixtures/restore/output/ diff --git a/README.md b/README.md index e8f2214..baeb348 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,19 @@ Note - NTTT will work on Windows, macOS and Linux. For maintainers, [doc/transformations.md](doc/transformations.md) describes what NTTT changes in `meta.yml` and Markdown files (sections, HTML, formatting, URLs, and related behaviour). +NTTT supports both the legacy (`--- task ---`) and the Raspberry Flavoured Markdown (`> [!TASK]`) syntaxes, which may be mixed in a single file. The structural markers for both syntaxes are defined in one editable data file — see [doc/markers.md](doc/markers.md) — which also drives the hide-strings mode. The design rationale for this dual-syntax + hide-strings work is recorded in [doc/plan-dual-syntax-hide-strings.md](doc/plan-dual-syntax-hide-strings.md). + +### Hide-strings mode + +NTTT can generate the list of Crowdin string IDs to hide from translators (markers from [`nttt/markers.yml`](nttt/markers.yml)): + +```bash +crowdin string list --verbose | nttt --hide-strings > ids.txt +while read -r id; do crowdin string edit "$id" --hidden; done < ids.txt +``` + +An example CI workflow for content repositories is in [doc/workflows/hide-strings.yml](doc/workflows/hide-strings.yml). + ## Prerequisites The tool requires having Python 3.7 or newer. @@ -114,28 +127,6 @@ You can specify different directories for the input and output folder using the nttt --input c:\path\to\project\de-DE --output c:\path\to\project\de-DE-tidy ``` -### Crowdin marker stripping and restoring - -NTTT has three processing modes: - -- `tidy` (default): restore stripped Markdown markers for non-English locale folders, then run the existing tidy-up transforms. -- `strip`: remove non-translatable Markdown markers before uploading English source files to Crowdin. -- `restore`: reinsert stripped Markdown markers into translated files after downloading from Crowdin. - -Use `strip` on the English source folder before Crowdin upload: - -```bash -nttt --mode strip -i en -o en -Y on -``` - -Use `restore` on a translated locale folder after Crowdin download: - -```bash -nttt --mode restore -i de-DE -e en -o de-DE -Y on -``` - -Modern bare markers such as `> [!TASK]` are removed entirely, along with their paired empty `>` line. Modern labelled markers such as `> [!ACCORDION] Where are my voice recordings stored?` keep the label available for translation by becoming `> Where are my voice recordings stored?`; restore reinserts `[!ACCORDION]` before the translated label. Legacy markers such as `--- task ---` and `--- /task ---` are also removed and restored by line alignment against `en/`. - ### Help To bring up full usage information use the `-h`/`--help` option. diff --git a/doc/markers.md b/doc/markers.md new file mode 100644 index 0000000..ee53a33 --- /dev/null +++ b/doc/markers.md @@ -0,0 +1,81 @@ +# NTTT: marker registry + +NTTT supports two markdown syntaxes for Raspberry Pi project content, which may +appear **in the same file**: + +- **legacy** (`kramdown-rpf`): `--- task ---` … `--- /task ---` +- **RFM** (Raspberry Flavoured Markdown / GFM alerts): `> [!TASK]`, `> [!HINT]`, `> [!ACCORDION] Title` + +The list of markers lives in one data file — [`nttt/markers.yml`](../nttt/markers.yml) — +so it can be changed **without editing Python**. It is the single source of truth for: + +1. **Hiding** — which marker strings NTTT lists for Crowdin to hide from translators + (`nttt --hide-strings`, see below). +2. **Restoring** — which RFM alert keywords NTTT reverts back to English on download + (see [`nttt/cleanup_alerts.py`](../nttt/cleanup_alerts.py)). + +> The legacy `--- … ---` normalisation in [`nttt/cleanup_sections.py`](../nttt/cleanup_sections.py) +> is intentionally **syntax-generic** (it must cope with arbitrary translated tag +> names), so it does not read the registry. The registry drives hiding and the RFM +> alert handling. + +## Editing `markers.yml` (no Python needed) + +Each block type is one list entry: + +```yaml + - name: task + hide: true + legacy: { open: "--- task ---", close: "--- /task ---" } + rfm: { alert: "[!TASK]" } +``` + +- `hide: true` lists this marker for Crowdin to hide; `false` keeps it visible. +- `legacy.open` / `legacy.close` are the exact marker lines (`close` is optional — + some blocks, e.g. `save`, have no closing marker). +- `rfm.alert` is the alert token exactly as written, including the brackets. +- Include only the syntaxes a block has (some are RFM-only, e.g. `info`/`tip`/`debug`). + +`raw_patterns:` holds non-block strings to hide (matched as plain substrings), e.g. +`hero_image images/`. + +**To add a block type:** copy an entry, change the values, run the tests: + +```bash +python -m unittest discover -s unit_test +``` + +## Legacy ↔ RFM mapping + +| Block | Legacy | RFM alert | Hidden | +|--------------|--------------------------------|-------------------------------|:------:| +| task | `--- task ---` | `[!TASK]` | yes | +| hints | `--- hints ---` | *(grouped hints)* | yes | +| hint | `--- hint ---` | `[!HINT]` | yes | +| collapse | `--- collapse ---` | `[!ACCORDION]` *(+ title)* | yes | +| challenge | `--- challenge ---` | `[!CHALLENGE]` | yes | +| code | `--- code ---` | *(fenced-code attributes)* | yes | +| save | `--- save ---` | `[!SAVE]` | yes | +| new-page | `--- new-page ---` | `
` | yes | +| no-print | `--- no-print ---` | `[!NOPRINT]` | yes | +| print-only | `--- print-only ---` | `[!PRINTONLY]` | yes | +| quiz | `--- quiz ---` | — | yes | +| question | `--- question ---` | — | yes | +| choices | `--- choices ---` | — | yes | +| feedback | `--- feedback ---` | — | yes | +| info | — | `[!INFO]` | yes | +| tip | — | `[!TIP]` | yes | +| debug | — | `[!DEBUG]` | yes | + +## Hide-strings mode + +NTTT generates the Crowdin hide-list itself (replacing the old grep pipeline). It +reads `crowdin string list --verbose` on stdin and prints the IDs of any string +whose source text contains a hideable marker: + +```bash +crowdin string list --verbose | nttt --hide-strings > ids.txt +while read -r id; do crowdin string edit "$id" --hidden; done < ids.txt +``` + +See [`doc/workflows/hide-strings.yml`](workflows/hide-strings.yml) for the CI version. diff --git a/doc/plan-dual-syntax-hide-strings.md b/doc/plan-dual-syntax-hide-strings.md new file mode 100644 index 0000000..32ece71 --- /dev/null +++ b/doc/plan-dual-syntax-hide-strings.md @@ -0,0 +1,203 @@ +# Plan: Dual-syntax support + marker hide-list generation for NTTT + +## Context + +NTTT ("Nina's Translation Tidy-up Tool") cleans up Crowdin-translated Raspberry Pi +project content. Today it: + +- Runs **only on the download side** (the `nttt-processing.yml` step), normalising and + reverting translated content after it comes back from Crowdin. +- Understands **only the legacy `kramdown-rpf` syntax** (`--- task ---`, `--- hint ---`, + `--- /no-print ---`, …) — see [cleanup_sections.py](nttt/cleanup_sections.py). + +Two things are changing: + +1. A **new "Raspberry Flavoured Markdown" (RFM)** syntax is being introduced — GFM + blockquote alerts (`> [!TASK]`, `> [!HINT]`, `> [!ACCORDION] Title`, `> [!NOPRINT]`, + page breaks, fenced-code attributes). NTTT must support **both** syntaxes, and **a single + file may mix the two** (confirmed with the user). +2. Marker hiding (so translators never translate structural markers) is currently done by a + brittle `grep` pipeline in the content repos' `hide-strings.yml`. We are moving that logic + into NTTT (branch name: `nttt-no-hide-strings`). **NTTT will generate the Crowdin hide-list** + (the chosen mechanism); markers stay in the files, and the existing download-side + fix/revert pipeline puts any mangled markers back to their English form. + +**Decisions confirmed with the user:** +- Mechanism = **generate Crowdin hide-list** (markers are hidden in Crowdin, not stripped from files). +- Marker set = **configurable** (ship a sensible default = all structural markers, editable by non-devs). +- Files **may mix** legacy + RFM markers — handle both within one file. +- Deliver **NTTT tool changes + example workflow ymls**. + +**Outcome:** NTTT supports legacy and RFM content, owns the hide-list generation (retiring the +grep in `hide-strings.yml`), and the marker set lives in one declarative data file that a +non-Python maintainer can edit. + +--- + +## Design overview — a single declarative marker registry + +The centrepiece (and the answer to "modular, maintainable by non-Python devs") is **one data +file** describing every block type and its legacy + RFM spellings, plus whether it should be +hidden. All code reads from it; adding/removing a block type or toggling hiding is a YAML edit, +no Python. + +`nttt/markers.yml` (ruamel.yaml is already a dependency): + +```yaml +# Edit this file to add/remove block types or change what gets hidden from translators. +# 'hide: true' => NTTT lists this marker's strings for Crowdin to hide. +markers: + - name: task + hide: true + legacy: { open: "--- task ---", close: "--- /task ---" } + rfm: { alert: "[!TASK]" } + - name: hint + hide: true + legacy: { open: "--- hint ---", close: "--- /hint ---" } + rfm: { alert: "[!HINT]" } + - name: collapse # RFM calls this ACCORDION; title is translatable + hide: true + legacy: { open: "--- collapse ---", close: "--- /collapse ---" } + rfm: { alert: "[!ACCORDION]" } + - name: no-print + hide: true + legacy: { open: "--- no-print ---", close: "--- /no-print ---" } + rfm: { alert: "[!NOPRINT]" } + # … save, new-page/page-break, print-only, challenge, code, quiz, question, + # choices, feedback, info, tip, debug … +raw_patterns: # non-block strings to hide (e.g. asset paths) + - "hero_image images/" +``` + +The full marker set is derived from the two attached specs (legacy `kramdown-rpf` and RFM draft). +Entries with only one of `legacy`/`rfm` are fine (e.g. `info`/`tip`/`debug` are RFM-only). + +`nttt/markers.py` — loader/accessor (single source of truth): +- `load_markers()` → parsed registry (cached). +- `hideable_strings()` → list of literal marker strings + raw patterns to match against + Crowdin's `string list` output (both syntaxes). +- `alert_keywords()` / legacy tag helpers for the cleanup modules. + +--- + +## Work items + +### 1. Marker registry (new) +- **`nttt/markers.yml`** — the declarative data file above (full set from both specs). +- **`nttt/markers.py`** — loader + accessors described above. Package the `.yml` via + `setup.py` (`package_data` / `include_package_data`). + +### 2. Hide-list generation mode (new) — replaces the grep in `hide-strings.yml` +- **`nttt/hide_strings.py`** — reads `crowdin string list --verbose` output (stdin or file), + filters rows whose source text contains a hideable marker string (from `markers.hideable_strings()`, + covering legacy **and** RFM), and prints the numeric string IDs (one per line). +- **CLI wiring** in [arguments.py](nttt/arguments.py) + [__init__.py](nttt/__init__.py): + add a `--hide-strings` mode flag. When present, `main()` dispatches to `hide_strings` and + reads stdin instead of running `tidyup_translations`. **Default behaviour (`nttt -Y YES`) is + unchanged** so the existing download workflow keeps working. + +### 3. RFM download-side cleanup (new) — mirrors the legacy section logic +- **`nttt/cleanup_alerts.py`** — `fix_alerts(content, logging)` and + `revert_alert_translation(name, content, en_content, logging)`: + - Normalise blockquote alert headers (`>[!TASK]` → `> [!TASK]`, spacing, Crowdin escape quirks). + - Revert translated alert keywords/`ACCORDION` titles to the English form **by position + against the English file**, reusing the proven algorithm in + [`revert_section_translation`](nttt/cleanup_sections.py) (extract → count-match → replace). + - Keyword set comes from `markers.py`. +- **Wire into [`fix_md_step`](nttt/tidyup.py:55)** alongside the existing legacy steps. Because + legacy (`--- x ---`) and RFM (`> [!X]`) patterns are disjoint, running both on every file + safely supports mixed files. Add matching `--disable` flags (`fix_alerts`, + `revert_alert_translation`) following the existing pattern in [arguments.py](nttt/arguments.py:54). + +### 4. Make legacy `cleanup_sections.py` registry-aware (light touch) +- Keep its generic `\w+` regexes, but source the **known legacy tag list and hide flags** from + `markers.py` so there is one source of truth. Avoid behavioural change to existing tests. + +### 5. Example workflows (deliver alongside the tool) +- Add **`doc/workflows/`** with updated copies for content repos to adopt: + - **`hide-strings.yml`** — install NTTT, then + `crowdin string list --verbose | nttt --hide-strings > ids.txt` and loop + `crowdin string edit "$id" --hidden < ids.txt`. (Replaces the grep/awk/sed pipeline and + fixes the existing bug where the `while read` loop receives no piped input.) + - `nttt-processing.yml` / `upload-sources.yml` — carried over; note any version bump. +- Reference them from the README. + +### 6. Tests (follow existing two-layer pattern) +- **Unit tests** in `unit_test/`: `test_markers.py` (registry load + hideable strings), + `test_hide_strings.py` (filter sample `crowdin string list` text → expected IDs, legacy + RFM + + raw pattern rows), `test_cleanup_alerts.py` (normalise + revert, mirroring + [test_cleanup_sections.py]). +- **Fixture tests** in `test/`: add an RFM/mixed fixture (e.g. `step_7.md` across + `fixtures/{input,en,output}`) exercising `> [!TASK]`/`> [!HINT]`/`> [!ACCORDION]` reverts plus + a legacy marker in the same file. Reuse the `_run`/`INSPECT` harness in + [test_fixtures.py](test/test_fixtures.py). + +### 7. Local round-trip fixtures — inspect hide + restore by eye + +Beyond pass/fail unit tests, add **inspectable input→output fixtures** (same spirit as the +existing `test/fixtures/{input,en,output}` + `NTTT_INSPECT` harness) so a maintainer can open +the before/after files locally and confirm hiding and restoring look right. Two flows: + +**(a) Hide flow — "what would get hidden":** +- `test/fixtures/hide/input/` — sample English source files (legacy, RFM, and mixed) **and** a + captured `crowdin_string_list.txt` (real `crowdin string list --verbose` output saved once). +- `test/fixtures/hide/output/` (gitignored) — the generated hide-list IDs and a human-readable + report listing each matched source string next to the marker that matched it, so input vs + output is reviewable at a glance. +- Test runs `nttt --hide-strings` over the sample and writes both files; assertions check the + expected IDs/markers are present (legacy + RFM + `hero_image`) and unrelated prose is absent. + +**(b) Restore flow — "translated → restored":** +- `test/fixtures/restore/input/` — **translated** step files where markers have been mangled the + way Crowdin/translators do it (`\---`, jammed lines, translated `--- taak ---`, translated + `> [!TAREA]`, bad spacing `>[!task]`, mixed legacy+RFM in one file). +- `test/fixtures/restore/en/` — the English reference files (the structural template). +- `test/fixtures/restore/expected/` — the **hand-authored correct restored** version, committed + so we have a clear oracle. +- `test/fixtures/restore/output/` (gitignored) — what NTTT actually produced. +- Test runs `fix_md_step` and (in normal mode) diffs `output` vs `expected`; in + `NTTT_INSPECT=1` mode it skips the diff and just writes `output` so you can open + `input` → `output` → `expected` side by side and eyeball the round-trip. + +Document both flows in `doc/transformations.md` so the local-check workflow is discoverable. + +### 8. Docs +- Update [doc/transformations.md](doc/transformations.md): add the RFM alert step and the + hide-list mode to the pipeline description and code map. +- New **`doc/markers.md`**: explains the registry, the legacy↔RFM mapping table, and + step-by-step "how to add a new block type" for non-Python maintainers. +- Update [README.md](README.md): document `--hide-strings` mode and link the new docs/workflows. +- Bump `nttt/_version.py`. + +--- + +## Verification + +1. **Unit + fixture tests:** + ```bash + python -m unittest discover -s unit_test -v + python -m unittest discover -s test -p "test_fixtures.py" -v + ``` + Inspect mode for the new RFM fixture before locking assertions: + ```bash + NTTT_INSPECT=1 python -m unittest discover -s test -p "test_fixtures.py" -v + ``` +2. **Hide-list mode** against a captured sample of `crowdin string list --verbose` output + (saved as a test fixture): confirm it emits the IDs of legacy markers, RFM alert lines, and + `hero_image images/` rows — and nothing else. +3. **Mixed-syntax round-trip:** run `fix_md_step` on a file containing both `--- task ---` and + `> [!TASK]` with a translated copy; confirm both are reverted to English and unrelated prose + is untouched. +4. **Backward compatibility:** `nttt -Y YES` (default tidyup) still processes legacy-only + content identically (existing `step_1`–`step_6` fixtures pass unchanged). +5. **Registry editability:** add a dummy block type to `markers.yml`, re-run the hide-list mode, + confirm it appears with no code change. + +--- + +## Notes / non-goals +- We are **not** stripping markers from files or using placeholder tokens (per the chosen + "generate hide-list" mechanism). Markers remain in source; Crowdin hides them. +- Renderer HTML output (the two spec docs) is **reference for marker syntax only** — NTTT does + not render HTML, so those HTML blocks are not test oracles here. +- Workflow ymls live in the content repos; we ship updated **examples**, the team wires them in. diff --git a/doc/transformations.md b/doc/transformations.md index 629888a..a0ad9da 100644 --- a/doc/transformations.md +++ b/doc/transformations.md @@ -16,42 +16,25 @@ NTTT does **not** process standalone `.html` files. HTML-related steps run on ** For each `.md` file, [`nttt/tidyup.py`](../nttt/tidyup.py) applies, in order: -1. **`restore_tree`** — for non-English locale folders, restore Markdown markers stripped before Crowdin upload. -2. **`fix_sections`** — normalise `---` section lines (Crowdin quirks). -3. **`revert_section_translation`** — optional; restore English section tag lines when structure matches. -4. **`trim_md_tags`** — strip padding inside paired Markdown delimiters (outside ` ``` ` fences). -5. **`trim_html_tags`** — strip padding inside simple inline HTML tags (outside single `` ` `` spans). -6. **`trim_formatting_tags`** — normalise `{ … }` attribute blocks after a word (Scratch/Pico-style). -7. **URL rewrite:** replace `/en/` with `//` everywhere in the file body. +1. **`fix_sections`** — normalise legacy `---` section lines (Crowdin quirks). +2. **`revert_section_translation`** — optional; restore English section tag lines when structure matches. +3. **`fix_alerts`** — normalise RFM blockquote alert headers (`>[! task ]` → `> [!TASK]`). +4. **`revert_alert_translation`** — optional; revert translated RFM alert keywords to English when structure matches (keeps translated titles, e.g. ACCORDION). +5. **`trim_md_tags`** — strip padding inside paired Markdown delimiters (outside ` ``` ` fences). +6. **`trim_html_tags`** — strip padding inside simple inline HTML tags (outside single `` ` `` spans). +7. **`trim_formatting_tags`** — normalise `{ … }` attribute blocks after a word (Scratch/Pico-style). +8. **URL rewrite:** replace `/en/` with `//` everywhere in the file body. -Steps 1–5 can be skipped via **`--disable`** (see [`nttt/arguments.py`](../nttt/arguments.py)). +Both syntaxes are handled on every file, so a file may freely **mix** legacy and RFM markers. + +Steps 1–7 can be skipped via **`--disable`** (see [`nttt/arguments.py`](../nttt/arguments.py)): +`fix_sections`, `revert_section_translation`, `fix_alerts`, `revert_alert_translation`, +`fix_md`, `fix_html`, `fix_formatting`. `meta.yml` is handled separately by **`fix_meta`** (YAML round-trip, revert non-translatable keys from English). This doc focuses on Markdown/HTML-style transforms. --- -## Crowdin marker strip/restore (`nttt/strip.py`, `nttt/restore.py`) - -**Modes:** `--mode strip`, `--mode restore`, and default `--mode tidy`. - -| Mode | Behaviour | -|------|-----------| -| `strip` | Runs on `en/` before Crowdin upload. Removes structural-only markers and keeps labelled marker text translatable. | -| `restore` | Runs on a locale folder after Crowdin download. Rebuilds markers from the matching English file. | -| `tidy` | For non-English locale folders, runs restore first, then the existing tidy transforms. | - -**Marker classification (`nttt/markers.py`):** - -| Kind | Pattern | Strip output | Restore output | -|------|---------|--------------|----------------| -| Modern bare | `> [!TASK]`, `> [!SAVE]`, nested forms like `> > [!HINT]` | Dropped. A following empty blockquote line (`>`, `> >`) is also dropped. | Copied back from `en/`. | -| Modern labelled | `> [!ACCORDION] Where are my voice recordings stored?` | Rewritten to `> Where are my voice recordings stored?`. | Rewritten to `> [!ACCORDION] `. | -| Legacy bare | `--- task ---`, `--- /task ---`, `--- print-only ---`, `--- feedback ---` | Dropped. | Copied back from `en/`. | - -Restore uses line-index alignment against the stripped English file. If the translated file already contains at least as many legacy bare marker lines as the English reference, restore is skipped for that file to avoid duplicating markers. If the translated file has a different number of lines from the stripped English reference, NTTT logs a warning and leaves that file unchanged for this step. - -Fenced code blocks split by ` ``` ` are not stripped. - ## 1. Section markers (`nttt/cleanup_sections.py`) **Function:** `fix_sections` @@ -127,8 +110,57 @@ After cleanup: **replace every `/en/` with `//`** in the Markdown file |---------|--------| | Orchestration | `nttt/tidyup.py`, `nttt/__init__.py` | | CLI / disable flags | `nttt/arguments.py` | -| Sections | `nttt/cleanup_sections.py` | +| Sections (legacy `--- … ---`) | `nttt/cleanup_sections.py` | +| Alerts (RFM `> [!…]`) | `nttt/cleanup_alerts.py` | +| Marker registry (both syntaxes) | `nttt/markers.yml`, `nttt/markers.py` | +| Hide-list generation | `nttt/hide_strings.py` | | Markdown emphasis / code delimiters | `nttt/cleanup_markdown.py` | | Inline HTML | `nttt/cleanup_html.py` | | Brace attributes | `nttt/cleanup_formatting.py` | | Split "every other segment" | `nttt/utilities.py` → `apply_to_every_other_part` | + +--- + +## Running the fixture tests + +`test/test_fixtures.py` contains six integration tests, one per transformation type. Each test runs a real Dutch-translation `.md` file through `fix_md_step` and writes the result to `test/fixtures/output/` (gitignored) so you can open it and compare it with the input. + +| Fixture | What it covers | +|---------|---------------| +| `step_1.md` | Section markers — escaped `\---`, jammed lines, section name revert | +| `step_2.md` | Markdown delimiters — `_ text _`, `** text **`; code block preserved | +| `step_3.md` | Inline HTML — ` Enter ` → `Enter`; backtick spans preserved | +| `step_4.md` | Formatting braces — `{ : class = "..."}`, `_blank` target | +| `step_5.md` | URL rewrite — `/en/` → `/nl/` | +| `step_6.md` | All of the above combined | + +**Normal run** (assertions on — use in CI or to catch regressions): + +```bash +python -m unittest discover -s test -p "test_fixtures.py" -v +``` + +**Inspect mode** (assertions off — use when adding new input to see the raw output before writing assertions): + +```bash +NTTT_INSPECT=1 python -m unittest discover -s test -p "test_fixtures.py" -v +``` + +After either run, open any file in `test/fixtures/output/` alongside its counterpart in `test/fixtures/input/` to see before and after. + +## Round-trip checks (hide + restore) + +`test/test_roundtrip.py` lets you inspect the two hide/restore flows locally (same +`NTTT_INSPECT` convention): + +| Flow | Fixtures | What it shows | +|------|----------|---------------| +| **Hide** | `test/fixtures/hide/` | A captured `crowdin string list --verbose` (`input/`) → the IDs to hide and a `report.txt` matching each ID to its marker (`output/`). | +| **Restore** | `test/fixtures/restore/` | A mangled translation (`input/`) + the English template (`en/`) → the restored file (`output/`), diffed against the committed oracle (`expected/`). Exercises a file that **mixes** legacy `--- task ---` with RFM `> [!HINT]` / `> [!ACCORDION]`. | + +```bash +python -m unittest discover -s test -p "test_roundtrip.py" -v # assertions on +NTTT_INSPECT=1 python -m unittest discover -s test -p "test_roundtrip.py" -v # write outputs only +``` + +See [doc/markers.md](markers.md) for the marker registry and hide-strings mode. diff --git a/doc/workflows/hide-strings.yml b/doc/workflows/hide-strings.yml new file mode 100644 index 0000000..7beed73 --- /dev/null +++ b/doc/workflows/hide-strings.yml @@ -0,0 +1,59 @@ +# Example workflow for CONTENT repositories (not this tool repo). +# +# Hides structural markers from translators in Crowdin. The list of markers is +# generated by NTTT from its marker registry (markers.yml), covering both the +# legacy (--- task ---) and RFM (> [!TASK]) syntaxes. This replaces the previous +# hand-written grep/awk/sed pipeline. +# +# Copy this into the content repo's .github/workflows/ directory. + +name: Crowdin Hide Strings + +on: + workflow_run: + workflows: ["Crowdin Upload Action"] + types: + - completed + +jobs: + crowdin-hide: + if: ${{ github.event.workflow_run.conclusion == 'success' }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install NTTT + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/raspberrypilearning/nttt.git + + - name: Install Crowdin CLI + run: | + curl -L https://github.com/crowdin/crowdin-cli/releases/latest/download/crowdin-cli.zip -o crowdin-cli.zip + unzip crowdin-cli.zip -d crowdin-cli + mkdir -p ~/bin + mv crowdin-cli/*/crowdin ~/bin/crowdin + cp crowdin-cli/*/crowdin-cli.jar ~/bin/crowdin-cli.jar + chmod +x ~/bin/crowdin + echo "PATH=$HOME/bin:$PATH" >> $GITHUB_ENV + + - name: Hide matching strings + run: | + set -euo pipefail + crowdin --version + # NTTT reads the Crowdin listing and prints the IDs of strings to hide + # (those containing a marker from markers.yml, legacy or RFM). + crowdin string list --verbose | nttt --hide-strings > ids.txt + echo "Hiding $(wc -l < ids.txt) strings" + while read -r id; do + crowdin string edit "$id" --hidden + done < ids.txt + env: + CROWDIN_PROJECT_ID: ${{ secrets.CROWDIN_PROJECT_ID }} + CROWDIN_API_TOKEN: ${{ secrets.CROWDIN_API_TOKEN }} diff --git a/nttt/__init__.py b/nttt/__init__.py index 546ab30..42b046e 100644 --- a/nttt/__init__.py +++ b/nttt/__init__.py @@ -1,24 +1,17 @@ from .arguments import parse_command_line, resolve_arguments, check_arguments, show_arguments -from .constants import ArgumentKeyConstants, Modes -from .restore import restore_tree -from .strip import strip_tree from .tidyup import tidyup_translations +from .hide_strings import run as run_hide_strings from ._version import __version__ def main(): command_line_args = parse_command_line(__version__) + + # Hide-strings mode: generate the Crowdin hide-list from stdin and exit. + if getattr(command_line_args, "hide_strings", False): + run_hide_strings() + return + resolved_arguments = resolve_arguments(command_line_args) show_arguments(resolved_arguments) if (check_arguments(resolved_arguments)): - mode = resolved_arguments[ArgumentKeyConstants.MODE] - if mode == Modes.STRIP: - strip_tree( - resolved_arguments[ArgumentKeyConstants.INPUT], - resolved_arguments[ArgumentKeyConstants.OUTPUT]) - elif mode == Modes.RESTORE: - restore_tree( - resolved_arguments[ArgumentKeyConstants.INPUT], - resolved_arguments[ArgumentKeyConstants.ENGLISH], - resolved_arguments[ArgumentKeyConstants.OUTPUT]) - else: - tidyup_translations(resolved_arguments) + tidyup_translations(resolved_arguments) diff --git a/nttt/_version.py b/nttt/_version.py index a4cb70e..a42cda7 100644 --- a/nttt/_version.py +++ b/nttt/_version.py @@ -1,2 +1,2 @@ # The approach is taken from https://packaging.python.org/guides/single-sourcing-package-version/ -__version__ = "0.5.3" +__version__ = "0.6.0" diff --git a/nttt/arguments.py b/nttt/arguments.py index 6e2ca09..b8091ef 100644 --- a/nttt/arguments.py +++ b/nttt/arguments.py @@ -1,4 +1,4 @@ -from .constants import ArgumentKeyConstants, Modes +from .constants import ArgumentKeyConstants import os from pathlib import Path from argparse import ArgumentParser @@ -51,22 +51,24 @@ def parse_command_line(version): parser.add_argument("-l", "--language", help="The language of the content to be tidied up, defaults to basename(INPUT).") parser.add_argument("-v", "--volunteers", help="The list of volunteers as a comma separated list, defaults to an empty list.") parser.add_argument("-f", "--final", help="The number of the final step file, defaults to the step file with the highest number.") - parser.add_argument("-m", "--mode", choices=[Modes.TIDY, Modes.STRIP, Modes.RESTORE], - help="The processing mode. Options are: tidy (default cleanup), " - "strip (remove non-translatable structural markers before Crowdin upload), " - "restore (restore stripped structural markers after Crowdin download). " - "Default is tidy.") parser.add_argument("-D", "--Disable", help="The risky features to be disabled, separated by commas. " "Options are: fix_md (fix common markdown-related issues), " "fix_html (fix common issues in HTML-like tags (Return)), " "fix_sections (fix common issues in section tags (--- hint ---)), " "revert_section_translation (revert translation for section tags), " + "fix_alerts (fix common issues in RFM alert tags (> [!HINT])), " + "revert_alert_translation (revert translation for RFM alert tags), " "fix_formatting (fix common issues in formatting tags ({:class=\"block3motion\"})). " "Defaults to all risky features to be enabled.") parser.add_argument("-L", "--Logging", help="Logging of modifications. Options are on and off. Default is off.") parser.add_argument("-Y", "--Yes", help="Automatic yes to prompts. " "If enabled assume 'yes' as answer to all prompts and run non-interactively. " "Options are on and off. Default is off.") + parser.add_argument("--hide-strings", action="store_true", + help="Hide-strings mode. Reads 'crowdin string list --verbose' " + "output on stdin and prints the IDs of strings to hide " + "(those containing a marker from markers.yml), one per line. " + "Does not tidy up any files.") return parser.parse_args() @@ -125,11 +127,6 @@ def resolve_arguments(command_line_args): else: arguments[ArgumentKeyConstants.YES] = "off" - if hasattr(command_line_args, "mode") and command_line_args.mode: - arguments[ArgumentKeyConstants.MODE] = command_line_args.mode - else: - arguments[ArgumentKeyConstants.MODE] = Modes.TIDY - return arguments @@ -148,7 +145,6 @@ def show_arguments(arguments): print("Disabled functions - '{}'".format(arguments[ArgumentKeyConstants.DISABLE])) print("Logging - '{}'".format(arguments[ArgumentKeyConstants.LOGGING])) print("Yes - '{}'".format(arguments[ArgumentKeyConstants.YES])) - print("Mode - '{}'".format(arguments[ArgumentKeyConstants.MODE])) def check_folder(folder): diff --git a/nttt/cleanup_alerts.py b/nttt/cleanup_alerts.py new file mode 100644 index 0000000..bece627 --- /dev/null +++ b/nttt/cleanup_alerts.py @@ -0,0 +1,86 @@ +""" +Clean-up for Raspberry Flavoured Markdown (RFM) blockquote alerts, e.g. + + > [!TASK] + > [!HINT] + > [!ACCORDION] Downloading the software + +This mirrors ``cleanup_sections.py`` (which handles the legacy ``--- task ---`` +syntax). Both run on every file, so a file may freely mix the two syntaxes. +""" +import re +import sys +from .nttt_logging import log_replacement + + +# Matches a blockquote alert header line. Tolerates: +# * a missing/extra space after '>' ( ">[!TASK]" ) +# * spaces inside the brackets ( "> [! TASK ]" ) +# * a Crowdin backslash escape ( "> \[!TASK]" ) +# * nested blockquote levels ( "> > [!HINT]" ) +# Captures the blockquote prefix, the keyword, and any trailing title text +# (the title is translatable, e.g. for ACCORDION, so it is preserved). +_ALERT_HEADER_RE = re.compile( + r"^(?P[ \t]*(?:>[ \t]*)+)\\?\[!\s*(?P[^\]\r\n]+?)\s*\](?P[ \t]*[^\r\n]*)$", + re.MULTILINE, +) + + +def _normalise_prefix(prefix): + """Collapse a blockquote prefix to one space after each '>' ("> > ").""" + levels = prefix.count(">") + return "> " * levels + + +def _format_alert(prefix, keyword, title): + new_prefix = _normalise_prefix(prefix) + title = title.strip() + new_title = (" " + title) if title else "" + return f"{new_prefix}[!{keyword}]{new_title}" + + +def fix_alerts(md_file_content, logging): + """Normalise RFM alert header spacing/case (e.g. ">[! task ]" -> "> [!TASK]").""" + + def replacement(matchobj): + keyword = matchobj.group("kw").strip().upper() + new_line = _format_alert(matchobj.group("prefix"), keyword, matchobj.group("title")) + log_replacement(matchobj.group(0), new_line, logging) + return new_line + + return _ALERT_HEADER_RE.sub(replacement, md_file_content) + + +def revert_alert_translation(md_file_name, md_file_content, en_file_content, logging): + """ + Reverts translated alert keywords back to English (e.g. "> [!TAREA]" -> + "> [!TASK]") by position against the English file, keeping any translated + title text. Only runs when the alert counts match, mirroring + ``revert_section_translation``. + """ + md_lines = md_file_content.split("\n") + en_lines = en_file_content.split("\n") + + md_indices = [i for i, line in enumerate(md_lines) if _ALERT_HEADER_RE.match(line)] + en_keywords = [ + _ALERT_HEADER_RE.match(line).group("kw").strip().upper() + for line in en_lines + if _ALERT_HEADER_RE.match(line) + ] + + if len(md_indices) != len(en_keywords): + print( + "Warning ({}): Different alert structure in the original (en) and the " + "translated pages. Reverting of translated alert keywords will not be " + "performed".format(md_file_name), + file=sys.stderr, + ) + return md_file_content + + for position, line_index in enumerate(md_indices): + match = _ALERT_HEADER_RE.match(md_lines[line_index]) + new_line = _format_alert(match.group("prefix"), en_keywords[position], match.group("title")) + log_replacement(md_lines[line_index], new_line, logging) + md_lines[line_index] = new_line + + return "\n".join(md_lines) diff --git a/nttt/constants.py b/nttt/constants.py index 1b08b17..ce14cee 100644 --- a/nttt/constants.py +++ b/nttt/constants.py @@ -17,13 +17,6 @@ class ArgumentKeyConstants: DISABLE = 'DISABLE' LOGGING = 'LOGGING' YES = 'YES' - MODE = 'MODE' - - -class Modes: - TIDY = "tidy" - STRIP = "strip" - RESTORE = "restore" class RegexConstants: diff --git a/nttt/hide_strings.py b/nttt/hide_strings.py new file mode 100644 index 0000000..d310a3f --- /dev/null +++ b/nttt/hide_strings.py @@ -0,0 +1,72 @@ +""" +Generates the list of Crowdin string IDs that should be hidden from translators. + +Reads the output of ``crowdin string list --verbose`` (on stdin) and prints, one +per line, the numeric ID of every string whose source text contains a marker +listed in the registry (see ``markers.py`` / ``markers.yml``). This replaces the +hand-written grep/awk/sed pipeline that used to live in ``hide-strings.yml`` and +covers both the legacy and RFM syntaxes. + +Typical use in CI: + + crowdin string list --verbose | nttt --hide-strings > ids.txt + while read -r id; do crowdin string edit "$id" --hidden; done < ids.txt +""" +import re +import sys +from .markers import hideable_strings + + +# The verbose listing puts the string ID first, e.g. "#12345 source text ...". +_ID_RE = re.compile(r"^#?(\d+)\b") + + +def find_hidden_strings(string_list_text, markers=None): + """ + Returns a list of dicts ``{"id", "marker", "source"}`` for each line of the + Crowdin listing whose source text contains a hideable marker. + """ + markers = hideable_strings() if markers is None else markers + results = [] + + for line in string_list_text.splitlines(): + matched = next((marker for marker in markers if marker in line), None) + if matched is None: + continue + + tokens = line.split() + if not tokens: + continue + id_match = _ID_RE.match(tokens[0]) + if id_match: + results.append({"id": id_match.group(1), "marker": matched, "source": line.strip()}) + + return results + + +def unique_ids(results): + """Returns the IDs from ``find_hidden_strings`` de-duplicated, order preserved.""" + seen = set() + ids = [] + for result in results: + if result["id"] not in seen: + seen.add(result["id"]) + ids.append(result["id"]) + return ids + + +def format_report(results): + """Human-readable 'id <- matched marker <- source' report, for inspection.""" + return "\n".join( + "{id}\t{marker}\t{source}".format(**result) for result in results + ) + + +def run(input_stream=None, output_stream=None): + """Reads a Crowdin listing from ``input_stream`` and prints IDs to hide.""" + input_stream = input_stream if input_stream is not None else sys.stdin + output_stream = output_stream if output_stream is not None else sys.stdout + + results = find_hidden_strings(input_stream.read()) + for string_id in unique_ids(results): + print(string_id, file=output_stream) diff --git a/nttt/markers.py b/nttt/markers.py index 81e5366..a07a40e 100644 --- a/nttt/markers.py +++ b/nttt/markers.py @@ -1,96 +1,99 @@ +""" +Loads and exposes the marker registry (``markers.yml``). + +This is the single source of truth for the structural markers used in both the +legacy (kramdown-rpf) and the Raspberry Flavoured Markdown (RFM) syntaxes. Other +modules ask this module *what* the markers are; the actual list lives in the data +file so it can be edited without touching Python. +""" +import os import re - - -LINE_KIND_BARE_MARKER = "bare" -LINE_KIND_LABELLED_MARKER = "labelled" -LINE_KIND_PAIRED_EMPTY_BLOCKQUOTE = "paired_empty_blockquote" -LINE_KIND_REGULAR = "regular" - - -RFM_BARE_MARKER_PATTERN = re.compile( - r'^(?P<prefix>\s*(?:>\s*)+)\[!(?P<tag>[A-Z][A-Z0-9_-]*)\]\s*$' -) - -RFM_LABELLED_MARKER_PATTERN = re.compile( - r'^(?P<prefix>\s*(?:>\s*)+)\[!(?P<tag>[A-Z][A-Z0-9_-]*)\]\s+(?P<label>\S.*?)\s*$' -) - -LEGACY_BARE_MARKER_PATTERN = re.compile( - r'^\s*---\s+/?[\w-]+\s+---\s*$' -) - -EMPTY_BLOCKQUOTE_PATTERN = re.compile(r'^\s*(?:>\s*)+$') -FENCE_LINE_PREFIX_PATTERN = re.compile(r'^\s*(?:>\s*)*') -SAME_LINE_FENCE_PATTERN = re.compile(r'^```[^`]*```$') - - -def remove_eol(line): - return line.rstrip("\r\n") - - -def get_eol(line): - if line.endswith("\r\n"): - return "\r\n" - if line.endswith("\n"): - return "\n" - if line.endswith("\r"): - return "\r" - return "" - - -def classify_line(line): - line_without_eol = remove_eol(line) - - match = RFM_LABELLED_MARKER_PATTERN.match(line_without_eol) - if match: - return LINE_KIND_LABELLED_MARKER, match - - match = RFM_BARE_MARKER_PATTERN.match(line_without_eol) - if match: - return LINE_KIND_BARE_MARKER, match - - match = LEGACY_BARE_MARKER_PATTERN.match(line_without_eol) - if match: - return LINE_KIND_BARE_MARKER, match - - match = EMPTY_BLOCKQUOTE_PATTERN.match(line_without_eol) - if match: - return LINE_KIND_PAIRED_EMPTY_BLOCKQUOTE, match - - return LINE_KIND_REGULAR, None - - -def is_marker_line(line): - line_kind, _ = classify_line(line) - return line_kind in (LINE_KIND_BARE_MARKER, LINE_KIND_LABELLED_MARKER) - - -def is_rfm_bare_marker_line(line): - return RFM_BARE_MARKER_PATTERN.match(remove_eol(line)) is not None - - -def is_paired_empty_blockquote(line): - line_kind, _ = classify_line(line) - return line_kind == LINE_KIND_PAIRED_EMPTY_BLOCKQUOTE - - -def iter_lines_with_fence_state(content): - inside_fenced_code = False - - for line in content.splitlines(keepends=True): - yield line, inside_fenced_code - if _count_fence_markers(line) % 2 == 1: - inside_fenced_code = not inside_fenced_code - - -def _count_fence_markers(line): - content = remove_eol(line) - content_without_prefix = content[FENCE_LINE_PREFIX_PATTERN.match(content).end():] - - if not content_without_prefix.startswith("```"): - return 0 - - if SAME_LINE_FENCE_PATTERN.match(content_without_prefix): - return 2 - - return 1 +import ruamel.yaml + + +_MARKERS_FILE = os.path.join(os.path.dirname(__file__), "markers.yml") +_registry_cache = None + +# Matches an RFM alert token such as "[!TASK]" and captures the keyword. +_ALERT_TOKEN_RE = re.compile(r"\[!\s*([^\]\s][^\]]*?)\s*\]") + + +def load_markers(markers_file=_MARKERS_FILE): + """Returns the parsed marker registry as a dict, caching the default file.""" + global _registry_cache + + if markers_file == _MARKERS_FILE and _registry_cache is not None: + return _registry_cache + + yaml_parser = ruamel.yaml.YAML(typ="safe") + with open(markers_file, encoding="utf-8") as f: + registry = yaml_parser.load(f) or {} + + registry.setdefault("markers", []) + registry.setdefault("raw_patterns", []) + + if markers_file == _MARKERS_FILE: + _registry_cache = registry + return registry + + +def _markers(registry=None): + return (registry or load_markers()).get("markers", []) + + +def hideable_strings(registry=None): + """ + Returns the list of literal strings to hide from translators (both syntaxes + plus raw patterns). Each is matched as a substring against Crowdin source + text. Order is preserved and duplicates removed. + """ + registry = registry or load_markers() + strings = [] + + for marker in _markers(registry): + if not marker.get("hide", False): + continue + legacy = marker.get("legacy") or {} + rfm = marker.get("rfm") or {} + for value in (legacy.get("open"), legacy.get("close"), rfm.get("alert")): + if value: + strings.append(value) + + strings.extend(registry.get("raw_patterns", [])) + + # de-duplicate, preserving order + seen = set() + unique = [] + for s in strings: + if s not in seen: + seen.add(s) + unique.append(s) + return unique + + +def alert_keywords(registry=None): + """Returns the set of canonical English RFM alert keywords (e.g. {"TASK", "HINT"}).""" + keywords = set() + for marker in _markers(registry): + rfm = marker.get("rfm") or {} + alert = rfm.get("alert") + if alert: + match = _ALERT_TOKEN_RE.search(alert) + if match: + keywords.add(match.group(1).strip().upper()) + return keywords + + +def legacy_tag_names(registry=None): + """ + Returns the set of known legacy section tag names (e.g. {"task", "hint"}), + derived from the registry's legacy open markers ("--- task ---" -> "task"). + """ + names = set() + for marker in _markers(registry): + legacy = marker.get("legacy") or {} + opener = legacy.get("open", "") + stripped = opener.strip().strip("-").strip() + if stripped: + names.add(stripped) + return names diff --git a/nttt/markers.yml b/nttt/markers.yml new file mode 100644 index 0000000..ff1f608 --- /dev/null +++ b/nttt/markers.yml @@ -0,0 +1,109 @@ +# NTTT marker registry +# ===================== +# This is the single source of truth for the structural "markers" used in +# Raspberry Pi project content. NTTT uses it for two things: +# +# 1. Hiding - generating the list of marker strings that should be hidden +# from translators in Crowdin (see `nttt --hide-strings`). +# 2. Restoring - recognising and reverting markers that translators or Crowdin +# may have changed, so they go back to their English form. +# +# Two markdown syntaxes are supported and may appear in the same file: +# * legacy - kramdown-rpf, e.g. --- task --- ... --- /task --- +# * rfm - Raspberry Flavoured Markdown (GFM alerts), e.g. > [!TASK] +# +# HOW TO EDIT (no Python needed): +# * To add a new block type, copy an existing "- name:" entry and change the +# values. Include whichever of `legacy:` / `rfm:` apply (a block may have +# only one syntax). +# * `hide: true` means NTTT lists this marker for Crowdin to hide. +# `hide: false` keeps it visible to translators. +# * `legacy.open` / `legacy.close` are the exact marker lines. `close` is +# optional (some blocks, e.g. save, have no closing marker). +# * `rfm.alert` is the alert token exactly as it appears, including brackets. +# +# After editing, run the tests: python -m unittest discover -s unit_test + +markers: + - name: task + hide: true + legacy: { open: "--- task ---", close: "--- /task ---" } + rfm: { alert: "[!TASK]" } + + - name: hints + hide: true + legacy: { open: "--- hints ---", close: "--- /hints ---" } + # RFM has no separate "hints" container - individual [!HINT] blocks are grouped. + + - name: hint + hide: true + legacy: { open: "--- hint ---", close: "--- /hint ---" } + rfm: { alert: "[!HINT]" } + + - name: collapse # RFM calls this ACCORDION; its title is translatable. + hide: true + legacy: { open: "--- collapse ---", close: "--- /collapse ---" } + rfm: { alert: "[!ACCORDION]" } + + - name: challenge + hide: true + legacy: { open: "--- challenge ---", close: "--- /challenge ---" } + rfm: { alert: "[!CHALLENGE]" } + + - name: code + hide: true + legacy: { open: "--- code ---", close: "--- /code ---" } + # RFM expresses code options as fenced-code attributes, not a marker line. + + - name: save + hide: true + legacy: { open: "--- save ---" } # no closing marker + rfm: { alert: "[!SAVE]" } + + - name: new-page + hide: true + legacy: { open: "--- new-page ---" } # no closing marker + rfm: { alert: '<br class="page-break" />' } + + - name: no-print + hide: true + legacy: { open: "--- no-print ---", close: "--- /no-print ---" } + rfm: { alert: "[!NOPRINT]" } + + - name: print-only + hide: true + legacy: { open: "--- print-only ---", close: "--- /print-only ---" } + rfm: { alert: "[!PRINTONLY]" } + + - name: quiz + hide: true + legacy: { open: "--- quiz ---", close: "--- /quiz ---" } + + - name: question + hide: true + legacy: { open: "--- question ---", close: "--- /question ---" } + + - name: choices + hide: true + legacy: { open: "--- choices ---", close: "--- /choices ---" } + + - name: feedback + hide: true + legacy: { open: "--- feedback ---", close: "--- /feedback ---" } + + - name: info + hide: true + rfm: { alert: "[!INFO]" } + + - name: tip + hide: true + rfm: { alert: "[!TIP]" } + + - name: debug + hide: true + rfm: { alert: "[!DEBUG]" } + +# Non-block strings that should also be hidden from translators. +# These are matched as plain substrings against the Crowdin source text. +raw_patterns: + - "hero_image images/" diff --git a/nttt/restore.py b/nttt/restore.py deleted file mode 100644 index 709527d..0000000 --- a/nttt/restore.py +++ /dev/null @@ -1,242 +0,0 @@ -import os -import re -import sys -from .markers import ( - LEGACY_BARE_MARKER_PATTERN, - LINE_KIND_BARE_MARKER, - LINE_KIND_LABELLED_MARKER, - LINE_KIND_PAIRED_EMPTY_BLOCKQUOTE, - LINE_KIND_REGULAR, - classify_line, - get_eol, - iter_lines_with_fence_state, - is_rfm_bare_marker_line, - is_paired_empty_blockquote, - remove_eol, -) -from .strip import strip_md -from .utilities import find_files, get_file, save_file - - -TRANSLATED_LABEL_PATTERN = re.compile(r'^(?P<prefix>\s*(?:>\s*)+)(?P<label>.*)$') -_CROWDIN_TITLE_HEADING_PATTERN = re.compile(r'^##\s*title:\s*(.+)$') -_CROWDIN_HEADING_JAM_MARKER_PATTERN = re.compile(r'^\s*##\s+\\?---') - - -def _count_legacy_bare_markers(content): - return sum( - 1 for line in content.splitlines() - if LEGACY_BARE_MARKER_PATTERN.match(remove_eol(line)) - ) - - -def _already_has_full_legacy_markers(translated_content, english_content): - english_count = _count_legacy_bare_markers(english_content) - if english_count == 0: - return False - return _count_legacy_bare_markers(translated_content) >= english_count - - -def _normalize_crowdin_stripped(translated_content): - content = translated_content.replace("\\---", "---") - lines = content.splitlines(keepends=True) - normalized_lines = [] - index = 0 - - while index < len(lines): - line = lines[index] - bare_line = remove_eol(line) - - if LEGACY_BARE_MARKER_PATTERN.match(bare_line): - index += 1 - continue - - if _CROWDIN_HEADING_JAM_MARKER_PATTERN.match(bare_line): - index += 1 - continue - - if bare_line.strip() == "---" and index + 1 < len(lines): - lookahead = index + 1 - while lookahead < len(lines) and remove_eol(lines[lookahead]).strip() == "": - lookahead += 1 - title_match = _CROWDIN_TITLE_HEADING_PATTERN.match(remove_eol(lines[lookahead])) - if title_match is not None: - eol = get_eol(line) or "\n" - normalized_lines.append(line) - normalized_lines.append("title: {}{}".format( - title_match.group(1).strip(), - get_eol(lines[lookahead]) or eol)) - normalized_lines.append("---{}".format(eol)) - index = lookahead + 1 - continue - - title_match = _CROWDIN_TITLE_HEADING_PATTERN.match(bare_line) - if title_match is not None: - eol = get_eol(line) or "\n" - normalized_lines.append("---{}".format(eol)) - normalized_lines.append("title: {}{}".format(title_match.group(1).strip(), eol)) - normalized_lines.append("---{}".format(eol)) - index += 1 - continue - - normalized_lines.append(line) - index += 1 - - return "".join(normalized_lines) - - -def _align_to_english_blanks(translated_content, english_content): - english_lines = strip_md(english_content).splitlines(keepends=True) - translated_lines = translated_content.splitlines(keepends=True) - aligned_lines = [] - translated_index = 0 - - for english_line in english_lines: - if remove_eol(english_line).strip() == "": - aligned_lines.append(get_eol(english_line) or "\n") - if ( - translated_index < len(translated_lines) - and remove_eol(translated_lines[translated_index]).strip() == "" - ): - translated_index += 1 - continue - - while ( - translated_index < len(translated_lines) - and remove_eol(translated_lines[translated_index]).strip() == "" - ): - translated_index += 1 - - if translated_index >= len(translated_lines): - return None - - aligned_lines.append(translated_lines[translated_index]) - translated_index += 1 - - while ( - translated_index < len(translated_lines) - and remove_eol(translated_lines[translated_index]).strip() == "" - ): - translated_index += 1 - - if translated_index != len(translated_lines): - return None - - return "".join(aligned_lines) - - -def restore_md(translated_content, english_content, file_label): - if _already_has_full_legacy_markers(translated_content, english_content): - return translated_content - - normalized_content = _normalize_crowdin_stripped(translated_content) - aligned_content = _align_to_english_blanks(normalized_content, english_content) - translated_content = aligned_content if aligned_content is not None else normalized_content - - translated_lines = translated_content.splitlines(keepends=True) - expected_line_count = len(strip_md(english_content).splitlines(keepends=True)) - - if len(translated_lines) != expected_line_count: - print("Warning ({}): Different stripped structure in the original (en) and translated pages. " - "Restoring stripped markers will not be performed. Expected {} translated lines, found {}.".format( - file_label, - expected_line_count, - len(translated_lines)), - file=sys.stderr) - return translated_content - - restored_lines = [] - translated_index = 0 - english_actions = _build_english_actions(english_content) - - for action_kind, english_line, match in english_actions: - if action_kind in (LINE_KIND_BARE_MARKER, LINE_KIND_PAIRED_EMPTY_BLOCKQUOTE): - restored_lines.append(english_line) - continue - - translated_line = translated_lines[translated_index] - translated_index += 1 - - if action_kind == LINE_KIND_LABELLED_MARKER: - restored_lines.append(_restore_labelled_marker(english_line, match, translated_line)) - else: - restored_lines.append(translated_line) - - return "".join(restored_lines) - - -def restore_tree(input_folder, english_folder, output_folder): - files_to_restore = find_files(input_folder, extensions=[".md"]) - - for source_file_path in files_to_restore: - relative_file_name = os.path.relpath(source_file_path, input_folder) - english_file_path = os.path.join(english_folder, relative_file_name) - output_file_path = os.path.join(output_folder, relative_file_name) - output_file_folder = os.path.dirname(output_file_path) - - if not os.path.exists(output_file_folder): - os.makedirs(output_file_folder) - - content, suggested_eol = get_file(source_file_path) - if os.path.isfile(english_file_path): - english_content, _ = get_file(english_file_path) - content = restore_md(content, english_content, relative_file_name) - - save_file(output_file_path, content, suggested_eol) - - -def _build_english_actions(english_content): - lines_with_fence_state = list(iter_lines_with_fence_state(english_content)) - lines = [line for line, _ in lines_with_fence_state] - outside_fence = [not inside for _, inside in lines_with_fence_state] - actions = [] - index = 0 - - while index < len(lines): - line = lines[index] - - if not outside_fence[index]: - actions.append((LINE_KIND_REGULAR, line, None)) - index += 1 - continue - - line_kind, match = classify_line(line) - - if line_kind == LINE_KIND_BARE_MARKER: - actions.append((LINE_KIND_BARE_MARKER, line, match)) - if ( - is_rfm_bare_marker_line(line) - and index + 1 < len(lines) - and outside_fence[index + 1] - and is_paired_empty_blockquote(lines[index + 1]) - ): - actions.append((LINE_KIND_PAIRED_EMPTY_BLOCKQUOTE, lines[index + 1], None)) - index += 2 - else: - index += 1 - continue - - if line_kind == LINE_KIND_PAIRED_EMPTY_BLOCKQUOTE: - actions.append((LINE_KIND_REGULAR, line, None)) - else: - actions.append((line_kind, line, match)) - index += 1 - - return actions - - -def _restore_labelled_marker(english_line, english_match, translated_line): - translated_label = _extract_translated_label(translated_line) - return "{}[!{}] {}{}".format( - english_match.group("prefix"), - english_match.group("tag"), - translated_label, - get_eol(translated_line) or get_eol(english_line)) - - -def _extract_translated_label(translated_line): - line_without_eol = remove_eol(translated_line) - match = TRANSLATED_LABEL_PATTERN.match(line_without_eol) - if match: - return match.group("label") - return line_without_eol diff --git a/nttt/strip.py b/nttt/strip.py deleted file mode 100644 index a5b6844..0000000 --- a/nttt/strip.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -from .markers import ( - LINE_KIND_BARE_MARKER, - LINE_KIND_LABELLED_MARKER, - classify_line, - get_eol, - iter_lines_with_fence_state, - is_rfm_bare_marker_line, - is_paired_empty_blockquote, -) -from .utilities import find_files, get_file, save_file - - -def strip_md(content): - lines_with_fence_state = list(iter_lines_with_fence_state(content)) - lines = [line for line, _ in lines_with_fence_state] - outside_fence = [not inside for _, inside in lines_with_fence_state] - stripped_lines = [] - index = 0 - - while index < len(lines): - line = lines[index] - if not outside_fence[index]: - stripped_lines.append(line) - index += 1 - continue - - line_kind, match = classify_line(line) - - if line_kind == LINE_KIND_BARE_MARKER: - if ( - is_rfm_bare_marker_line(line) - and index + 1 < len(lines) - and outside_fence[index + 1] - and is_paired_empty_blockquote(lines[index + 1]) - ): - index += 2 - else: - index += 1 - continue - - if line_kind == LINE_KIND_LABELLED_MARKER: - stripped_lines.append("{}{}{}".format( - match.group("prefix"), - match.group("label"), - get_eol(line))) - index += 1 - continue - - stripped_lines.append(line) - index += 1 - - return "".join(stripped_lines) - - -def strip_tree(input_folder, output_folder): - files_to_strip = find_files(input_folder, extensions=[".md"]) - - for source_file_path in files_to_strip: - relative_file_name = os.path.relpath(source_file_path, input_folder) - output_file_path = os.path.join(output_folder, relative_file_name) - output_file_folder = os.path.dirname(output_file_path) - - if not os.path.exists(output_file_folder): - os.makedirs(output_file_folder) - - content, suggested_eol = get_file(source_file_path) - save_file(output_file_path, strip_md(content), suggested_eol) diff --git a/nttt/tidyup.py b/nttt/tidyup.py index 6d94b56..b4b1b11 100644 --- a/nttt/tidyup.py +++ b/nttt/tidyup.py @@ -10,7 +10,7 @@ from .cleanup_formatting import trim_formatting_tags from .cleanup_sections import fix_sections from .cleanup_sections import revert_section_translation -from .restore import restore_tree +from .cleanup_alerts import fix_alerts, revert_alert_translation def fix_meta(src, english_src, dst): @@ -64,6 +64,11 @@ def fix_md_step(src, lang, english_src, dst, disable, logging): if en_md_content is not None and "revert_section_translation" not in disable: md_content = revert_section_translation(src, md_content, en_md_content, logging) + if "fix_alerts" not in disable: + md_content = fix_alerts(md_content, logging) + if en_md_content is not None and "revert_alert_translation" not in disable: + md_content = revert_alert_translation(src, md_content, en_md_content, logging) + if "fix_md" not in disable: md_content = trim_md_tags(md_content, logging) @@ -105,10 +110,6 @@ def tidyup_translations(arguments): continue_with_cleanup = (process_yn.casefold() == "y") if continue_with_cleanup: - if language != "en" and os.path.isdir(english_folder): - print("Restoring stripped markers ...") - restore_tree(input_folder, english_folder, output_folder) - for source_file_path in files_to_update: relative_input_file_name = os.path.relpath(source_file_path, input_folder) diff --git a/setup.py b/setup.py index e2b9a23..18e5821 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,8 @@ author = __author__, author_email = __author_email__, packages = [__project__], + package_data={__project__: ['markers.yml']}, + include_package_data=True, entry_points={ 'console_scripts': [ 'nttt = nttt:main' diff --git a/test/fixtures/en/step_1.md b/test/fixtures/en/step_1.md new file mode 100644 index 0000000..93bbcae --- /dev/null +++ b/test/fixtures/en/step_1.md @@ -0,0 +1,14 @@ +--- task --- +--- hints --- +--- hint --- + +Try it! See if you can control the robot. + +--- /hint --- +--- hint --- + +Here is the code you need: + +--- /hint --- +--- /hints --- +--- /task --- diff --git a/test/fixtures/en/step_2.md b/test/fixtures/en/step_2.md new file mode 100644 index 0000000..b3889bf --- /dev/null +++ b/test/fixtures/en/step_2.md @@ -0,0 +1,15 @@ +Click the _green flag_ to start the project. + +Use **seven** patterns to play the music. + +Type `your name`{:class="block3variables"} below. + +* List item +* List item with *italic* word + +Here is a code block: + +```python +x = 3 * 2 * 1 +y = _ unchanged _ +``` diff --git a/test/fixtures/en/step_3.md b/test/fixtures/en/step_3.md new file mode 100644 index 0000000..7d6ee6a --- /dev/null +++ b/test/fixtures/en/step_3.md @@ -0,0 +1,5 @@ +Press <kbd>Enter</kbd> to confirm. + +Click <strong>OK</strong> to continue. + +Use the code `<code> uncrossed </code>` (this stays the same). diff --git a/test/fixtures/en/step_4.md b/test/fixtures/en/step_4.md new file mode 100644 index 0000000..82d4d08 --- /dev/null +++ b/test/fixtures/en/step_4.md @@ -0,0 +1,5 @@ +Use `if`{:class="block3control"} blocks. + +Click `the mouse pointer`{:class="block3sensing"}. + +Open the [print-friendly version](https://projects.raspberrypi.org/en/projects/boat-race/print){:target="_blank"}. diff --git a/test/fixtures/en/step_5.md b/test/fixtures/en/step_5.md new file mode 100644 index 0000000..9ca6b33 --- /dev/null +++ b/test/fixtures/en/step_5.md @@ -0,0 +1,5 @@ +View the project on [this page](https://projects.raspberrypi.org/en/projects/boat-race). + +Click [here](https://projects.raspberrypi.org/en/projects/another-project/step_1) for more information. + +This is an absolute URL without a language code that does not change: https://www.raspberrypi.org/help. diff --git a/test/fixtures/en/step_6.md b/test/fixtures/en/step_6.md new file mode 100644 index 0000000..28672d9 --- /dev/null +++ b/test/fixtures/en/step_6.md @@ -0,0 +1,18 @@ +--- task --- +--- hints --- +--- hint --- + +Click _start_ to open the project. + +Press <kbd>Enter</kbd> after entering. + +Use `if`{:class="block3control"} to decide. + +--- /hint --- +--- hint --- + +Look at [the project](https://projects.raspberrypi.org/en/projects/scratchpad). + +--- /hint --- +--- /hints --- +--- /task --- diff --git a/test/fixtures/hide/input/crowdin_string_list.txt b/test/fixtures/hide/input/crowdin_string_list.txt new file mode 100644 index 0000000..9786c8b --- /dev/null +++ b/test/fixtures/hide/input/crowdin_string_list.txt @@ -0,0 +1,13 @@ +#5001 Welcome to this project. You will build a game. +#5002 --- task --- +#5003 Complete the activity below. +#5004 --- /task --- +#5005 --- no-print --- +#5006 This content is only shown on screen. +#5007 --- /no-print --- +#5008 > [!HINT] +#5009 Try clicking the green flag first. +#5010 > [!ACCORDION] Downloading the Raspberry Pi software +#5011 > [!SAVE] +#5012 hero_image images/cover.png +#5013 Well done, you have finished the project! diff --git a/test/fixtures/input/step_1.md b/test/fixtures/input/step_1.md new file mode 100644 index 0000000..1ec8803 --- /dev/null +++ b/test/fixtures/input/step_1.md @@ -0,0 +1,9 @@ +\--- taak \--- \--- tips \--- \--- tip \--- + +Probeer het! Kijk of je de robot kunt besturen. + +\--- /tip \--- \--- tip \--- + +Hier is de code die je nodig hebt: + +\--- /tip \--- \--- /tips \--- \--- /taak \--- diff --git a/test/fixtures/input/step_2.md b/test/fixtures/input/step_2.md new file mode 100644 index 0000000..12234c0 --- /dev/null +++ b/test/fixtures/input/step_2.md @@ -0,0 +1,15 @@ +Klik op de _ groene vlag _ om het project te starten. + +Gebruik ** zeven ** patronen om de muziek te spelen. + +Schrijf ` je naam `{:class="block3variables"} hieronder. + +* Lijst item +* Lijst item met * cursief * woord + +Hier is een codeblok (inhoud wordt niet aangeraakt): + +```python +x = 3 * 2 * 1 +y = _ onveranderd _ +``` diff --git a/test/fixtures/input/step_3.md b/test/fixtures/input/step_3.md new file mode 100644 index 0000000..0e64b90 --- /dev/null +++ b/test/fixtures/input/step_3.md @@ -0,0 +1,5 @@ +Druk op <kbd> Enter </kbd> om te bevestigen. + +Klik op <strong> OK </strong> om door te gaan. + +Gebruik de code `<code> ongekruist </code>` (dit blijft hetzelfde). diff --git a/test/fixtures/input/step_4.md b/test/fixtures/input/step_4.md new file mode 100644 index 0000000..d097a0b --- /dev/null +++ b/test/fixtures/input/step_4.md @@ -0,0 +1,5 @@ +Gebruik `als`{ : class = "block3control"} blokken. + +Klik op `de muisaanwijzer`{: CLASS = "block3sensing" }. + +Open de [printvriendelijke versie](https://projects.raspberrypi.org/en/projects/boat-race/print) {:target=" _ blank"}. diff --git a/test/fixtures/input/step_5.md b/test/fixtures/input/step_5.md new file mode 100644 index 0000000..05dd698 --- /dev/null +++ b/test/fixtures/input/step_5.md @@ -0,0 +1,5 @@ +Bekijk het project op [deze pagina](https://projects.raspberrypi.org/en/projects/boat-race). + +Klik [hier](https://projects.raspberrypi.org/en/projects/another-project/step_1) voor meer informatie. + +Dit is een absolute URL zonder taalcode die niet verandert: https://www.raspberrypi.org/help. diff --git a/test/fixtures/input/step_6.md b/test/fixtures/input/step_6.md new file mode 100644 index 0000000..319f302 --- /dev/null +++ b/test/fixtures/input/step_6.md @@ -0,0 +1,13 @@ +\--- taak \--- \--- tips \--- \--- tip \--- + +Klik op _ starten _ om het project te openen. + +Druk op <kbd> Enter </kbd> na het invoeren. + +Gebruik `als`{ : class = "block3control"} om te beslissen. + +\--- /tip \--- \--- tip \--- + +Bekijk [het project](https://projects.raspberrypi.org/en/projects/scratchpad). + +\--- /tip \--- \--- /tips \--- \--- /taak \--- diff --git a/test/fixtures/restore/en/step_7.md b/test/fixtures/restore/en/step_7.md new file mode 100644 index 0000000..39526a4 --- /dev/null +++ b/test/fixtures/restore/en/step_7.md @@ -0,0 +1,13 @@ +--- task --- + +Complete the activity at /en/projects/example. + +> [!HINT] +> +> Try the green flag. + +> [!ACCORDION] Downloading the software +> +> Follow these steps. + +--- /task --- diff --git a/test/fixtures/restore/expected/step_7.md b/test/fixtures/restore/expected/step_7.md new file mode 100644 index 0000000..495ddca --- /dev/null +++ b/test/fixtures/restore/expected/step_7.md @@ -0,0 +1,13 @@ +--- task --- + +Completa la actividad en /es/projects/example. + +> [!HINT] +> +> Prueba la bandera verde. + +> [!ACCORDION] Descargar el software +> +> Sigue estos pasos. + +--- /task --- diff --git a/test/fixtures/restore/input/step_7.md b/test/fixtures/restore/input/step_7.md new file mode 100644 index 0000000..35e91ff --- /dev/null +++ b/test/fixtures/restore/input/step_7.md @@ -0,0 +1,13 @@ +\--- tarea \--- + +Completa la actividad en /en/projects/example. + +>[! PISTA ] +> +> Prueba la bandera verde. + +> [!ACORDEON] Descargar el software +> +> Sigue estos pasos. + +\--- /tarea \--- diff --git a/test/test_fixtures.py b/test/test_fixtures.py new file mode 100644 index 0000000..732d90a --- /dev/null +++ b/test/test_fixtures.py @@ -0,0 +1,182 @@ +""" +Integration fixture tests for NTTT. + +Each test processes a real .md fixture file through fix_md_step and writes the +output to test/fixtures/output/ — open those files to inspect the before/after. + +Normal run (assertions enabled): + python -m unittest discover -s test -p "test_fixtures.py" -v + +Inspect mode (writes outputs, skips assertions — useful when adding new +transformations and you want to see the raw output before writing assertions): + NTTT_INSPECT=1 python -m unittest discover -s test -p "test_fixtures.py" -v +""" +import os +import unittest +import nttt.tidyup + +FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures') +INPUT = os.path.join(FIXTURES, 'input') +EN = os.path.join(FIXTURES, 'en') +OUTPUT = os.path.join(FIXTURES, 'output') + +INSPECT = os.environ.get('NTTT_INSPECT') == '1' + + +class TestFixtures(unittest.TestCase): + + def setUp(self): + os.makedirs(OUTPUT, exist_ok=True) + + def _assertIn(self, member, container): + if not INSPECT: + super().assertIn(member, container) + + def _assertNotIn(self, member, container): + if not INSPECT: + super().assertNotIn(member, container) + + def _run(self, step, lang='nl'): + src = os.path.join(INPUT, step) + en_src = os.path.join(EN, step) + dst = os.path.join(OUTPUT, step) + nttt.tidyup.fix_md_step(src, lang, en_src, dst, (), 'off') + with open(dst, encoding='utf-8') as f: + return f.read() + + def test_step_1_section_markers(self): + """ + Verifies that backslash-escaped section markers are normalised and + translated section tag names are reverted to their English equivalents. + + Input: \\--- taak \\--- \\--- tips \\--- \\--- tip \\--- (jammed, escaped) + Output: --- task --- / --- hints --- / --- hint --- (split, English) + """ + result = self._run('step_1.md') + + self._assertIn('--- task ---', result) + self._assertIn('--- hints ---', result) + self._assertIn('--- hint ---', result) + self._assertIn('--- /hint ---', result) + self._assertIn('--- /hints ---', result) + self._assertIn('--- /task ---', result) + + self._assertNotIn('\\---', result) + self._assertNotIn('--- taak ---', result) + self._assertNotIn('--- tips ---', result) + self._assertNotIn('--- tip ---', result) + + print(f'\n Output: {os.path.join(OUTPUT, "step_1.md")}') + + def test_step_2_markdown_delimiters(self): + """ + Verifies that extra whitespace inside markdown emphasis delimiters is + stripped, while code-block content is left untouched. + + Input: _ groene vlag _ / ** zeven ** / ` je naam ` + Output: _groene vlag_ / **zeven** / `je naam` + Code block interior (3 * 2 * 1) preserved unchanged. + """ + result = self._run('step_2.md') + + self._assertIn('_groene vlag_', result) + self._assertIn('**zeven**', result) + self._assertIn('`je naam`', result) + + self._assertNotIn('_ groene vlag _', result) + self._assertNotIn('** zeven **', result) + self._assertNotIn('` je naam `', result) + + self._assertIn('3 * 2 * 1', result) + + print(f'\n Output: {os.path.join(OUTPUT, "step_2.md")}') + + def test_step_3_html_tags(self): + """ + Verifies that padding inside simple inline HTML tags is stripped, while + content wrapped in backtick spans is left untouched. + + Input: <kbd> Enter </kbd> / <strong> OK </strong> + Output: <kbd>Enter</kbd> / <strong>OK</strong> + Backtick span `<code> ongekruist </code>` preserved unchanged. + """ + result = self._run('step_3.md') + + self._assertIn('<kbd>Enter</kbd>', result) + self._assertIn('<strong>OK</strong>', result) + + self._assertNotIn('<kbd> Enter </kbd>', result) + self._assertNotIn('<strong> OK </strong>', result) + + self._assertIn('`<code> ongekruist </code>`', result) + + print(f'\n Output: {os.path.join(OUTPUT, "step_3.md")}') + + def test_step_4_formatting_braces(self): + """ + Verifies that { :class = "..." } attribute blocks are normalised: + extra spaces removed, attribute name and value lowercased, and the + _blank target shorthand fixed. + + Input: { : class = "block3control"} / {: CLASS = "block3sensing" } / {:target=" _ blank"} + Output: {:class="block3control"} / {:class="block3sensing"} / {:target="_blank"} + """ + result = self._run('step_4.md') + + self._assertIn('{:class="block3control"}', result) + self._assertIn('{:class="block3sensing"}', result) + self._assertIn('{:target="_blank"}', result) + + self._assertNotIn('{ : class', result) + self._assertNotIn('CLASS', result) + self._assertNotIn('" _ blank"', result) + + print(f'\n Output: {os.path.join(OUTPUT, "step_4.md")}') + + def test_step_5_url_rewrite(self): + """ + Verifies that every /en/ path segment in the file is rewritten to the + target language code. + + Input (lang=nl): /en/projects/boat-race / /en/projects/another-project/step_1 + Output: /nl/projects/boat-race / /nl/projects/another-project/step_1 + """ + result = self._run('step_5.md', lang='nl') + + self._assertIn('/nl/projects/boat-race', result) + self._assertIn('/nl/projects/another-project', result) + + self._assertNotIn('/en/projects/', result) + + print(f'\n Output: {os.path.join(OUTPUT, "step_5.md")}') + + def test_step_6_combined(self): + """ + Verifies that all transformations work together on a single file: + section markers fixed and reverted, markdown delimiters trimmed, + HTML tags trimmed, formatting braces normalised, and URLs rewritten. + """ + result = self._run('step_6.md', lang='nl') + + self._assertIn('--- task ---', result) + self._assertIn('--- hint ---', result) + self._assertNotIn('--- taak ---', result) + self._assertNotIn('\\---', result) + + self._assertIn('_starten_', result) + self._assertNotIn('_ starten _', result) + + self._assertIn('<kbd>Enter</kbd>', result) + self._assertNotIn('<kbd> Enter </kbd>', result) + + self._assertIn('{:class="block3control"}', result) + self._assertNotIn('{ : class', result) + + self._assertIn('/nl/projects/', result) + self._assertNotIn('/en/projects/', result) + + print(f'\n Output: {os.path.join(OUTPUT, "step_6.md")}') + + +if __name__ == '__main__': + unittest.main(verbosity=2) diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py new file mode 100644 index 0000000..29088b6 --- /dev/null +++ b/test/test_roundtrip.py @@ -0,0 +1,86 @@ +""" +Local round-trip fixtures for inspecting the hide + restore flows by eye. + +Like test_fixtures.py, these write their results under test/fixtures/.../output/ +(gitignored) so a maintainer can open the before/after files. They are NOT part +of the CI unit-test run; run them locally: + +Normal run (assertions enabled): + python -m unittest discover -s test -p "test_roundtrip.py" -v + +Inspect mode (writes outputs, skips assertions): + NTTT_INSPECT=1 python -m unittest discover -s test -p "test_roundtrip.py" -v + +Hide flow - test/fixtures/hide/: a captured `crowdin string list` -> the IDs to hide. +Restore flow - test/fixtures/restore/: a mangled translation + English template -> restored file. +""" +import os +import unittest + +import nttt.tidyup +from nttt import hide_strings + +FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures') +INSPECT = os.environ.get('NTTT_INSPECT') == '1' + +HIDE = os.path.join(FIXTURES, 'hide') +RESTORE = os.path.join(FIXTURES, 'restore') + + +class TestHideFlow(unittest.TestCase): + """Given a Crowdin listing, check which string IDs NTTT would hide.""" + + def setUp(self): + os.makedirs(os.path.join(HIDE, 'output'), exist_ok=True) + + def test_hide_list(self): + listing_path = os.path.join(HIDE, 'input', 'crowdin_string_list.txt') + with open(listing_path, encoding='utf-8') as f: + listing = f.read() + + results = hide_strings.find_hidden_strings(listing) + ids = hide_strings.unique_ids(results) + + # Write the IDs and a human-readable report for inspection. + with open(os.path.join(HIDE, 'output', 'ids.txt'), 'w', encoding='utf-8') as f: + f.write("\n".join(ids) + "\n") + with open(os.path.join(HIDE, 'output', 'report.txt'), 'w', encoding='utf-8') as f: + f.write(hide_strings.format_report(results) + "\n") + + if not INSPECT: + # markers (legacy + RFM + raw) are hidden ... + for expected in ['5002', '5004', '5005', '5007', '5008', '5010', '5011', '5012']: + self.assertIn(expected, ids) + # ... plain prose is not. + for prose in ['5001', '5003', '5006', '5009', '5013']: + self.assertNotIn(prose, ids) + + print(f'\n Output: {os.path.join(HIDE, "output", "ids.txt")}') + + +class TestRestoreFlow(unittest.TestCase): + """Given a mangled translation + the English template, check the restored file.""" + + def setUp(self): + os.makedirs(os.path.join(RESTORE, 'output'), exist_ok=True) + + def test_restore_step_7(self): + src = os.path.join(RESTORE, 'input', 'step_7.md') + en = os.path.join(RESTORE, 'en', 'step_7.md') + dst = os.path.join(RESTORE, 'output', 'step_7.md') + + nttt.tidyup.fix_md_step(src, 'es', en, dst, (), 'off') + + with open(dst, encoding='utf-8') as f: + result = f.read() + + if not INSPECT: + with open(os.path.join(RESTORE, 'expected', 'step_7.md'), encoding='utf-8') as f: + expected = f.read() + self.assertEqual(result, expected) + + print(f'\n Output: {dst}') + + +if __name__ == '__main__': + unittest.main(verbosity=2) diff --git a/unit_test/test_arguments.py b/unit_test/test_arguments.py index 63228e7..1a3acf8 100644 --- a/unit_test/test_arguments.py +++ b/unit_test/test_arguments.py @@ -59,7 +59,6 @@ def __init__(self): self.Disable = False self.Logging = False self.Yes = False - self.mode = None # Using the os.chdir function for a subdirectory of a directory created # with TemporaryDirectory doesn't work on Windows and macOS. Therefore, @@ -86,7 +85,6 @@ def __init__(self): self.assertEqual(arguments[nttt.arguments.ArgumentKeyConstants.DISABLE], []) self.assertEqual(arguments[nttt.arguments.ArgumentKeyConstants.LOGGING], "off") self.assertEqual(arguments[nttt.arguments.ArgumentKeyConstants.YES], "off") - self.assertEqual(arguments[nttt.arguments.ArgumentKeyConstants.MODE], "tidy") input_folder = Path(data_folder, "da-DK") output_folder = Path(data_folder, "output") @@ -103,7 +101,6 @@ def __init__(self): command_line_args.Disable = "fix_md,fix_html" command_line_args.Logging = "on" command_line_args.Yes = "on" - command_line_args.mode = "strip" arguments = nttt.arguments.resolve_arguments(command_line_args) self.assertEqual(arguments[nttt.arguments.ArgumentKeyConstants.INPUT], input_folder) self.assertEqual(arguments[nttt.arguments.ArgumentKeyConstants.OUTPUT], output_folder) @@ -114,7 +111,6 @@ def __init__(self): self.assertEqual(arguments[nttt.arguments.ArgumentKeyConstants.DISABLE], ["fix_md", "fix_html"]) self.assertEqual(arguments[nttt.arguments.ArgumentKeyConstants.LOGGING], "on") self.assertEqual(arguments[nttt.arguments.ArgumentKeyConstants.YES], "on") - self.assertEqual(arguments[nttt.arguments.ArgumentKeyConstants.MODE], "strip") def test_check_folder(self): ''' Test case for the check_folder function: diff --git a/unit_test/test_cleanup_alerts.py b/unit_test/test_cleanup_alerts.py new file mode 100644 index 0000000..967032f --- /dev/null +++ b/unit_test/test_cleanup_alerts.py @@ -0,0 +1,77 @@ +import unittest +from nttt import cleanup_alerts + + +class TestCleanupAlerts(unittest.TestCase): + logging = "off" + + def _fix(self, content): + return cleanup_alerts.fix_alerts(content, self.logging) + + def test_adds_missing_space_after_gt(self): + self.assertEqual(self._fix(">[!TASK]"), "> [!TASK]") + + def test_strips_spaces_inside_brackets(self): + self.assertEqual(self._fix("> [! TASK ]"), "> [!TASK]") + + def test_uppercases_keyword(self): + self.assertEqual(self._fix("> [!task]"), "> [!TASK]") + + def test_unescapes_crowdin_backslash(self): + self.assertEqual(self._fix("> \\[!HINT]"), "> [!HINT]") + + def test_preserves_accordion_title(self): + self.assertEqual( + self._fix("> [!ACCORDION] Downloading the software "), + "> [!ACCORDION] Downloading the software", + ) + + def test_nested_blockquote_levels(self): + self.assertEqual(self._fix("> >[!hint]"), "> > [!HINT]") + + def test_leaves_normal_blockquote_untouched(self): + content = "> Just a quote, not an alert.\n> Another line." + self.assertEqual(self._fix(content), content) + + def test_multiline_block(self): + content = ">[!TASK]\n>\n> Do the thing.\n" + expected = "> [!TASK]\n>\n> Do the thing.\n" + self.assertEqual(self._fix(content), expected) + + def test_revert_translated_keyword(self): + translated = "> [!TAREA]\n>\n> Hazlo.\n" + english = "> [!TASK]\n>\n> Do it.\n" + self.assertEqual( + cleanup_alerts.revert_alert_translation("step.md", translated, english, self.logging), + "> [!TASK]\n>\n> Hazlo.\n", + ) + + def test_revert_preserves_translated_accordion_title(self): + translated = "> [!ACORDEON] Titulo traducido\n" + english = "> [!ACCORDION] English title\n" + self.assertEqual( + cleanup_alerts.revert_alert_translation("step.md", translated, english, self.logging), + "> [!ACCORDION] Titulo traducido\n", + ) + + def test_revert_skips_on_count_mismatch(self): + translated = "> [!TAREA]\n> [!PISTA]\n" + english = "> [!TASK]\n" + # counts differ -> content returned unchanged + self.assertEqual( + cleanup_alerts.revert_alert_translation("step.md", translated, english, self.logging), + translated, + ) + + def test_revert_handles_mixed_with_legacy(self): + # legacy "--- task ---" lines are not alert headers and must be ignored here + translated = "--- taak ---\n> [!PISTA]\n" + english = "--- task ---\n> [!HINT]\n" + self.assertEqual( + cleanup_alerts.revert_alert_translation("step.md", translated, english, self.logging), + "--- taak ---\n> [!HINT]\n", + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/unit_test/test_cleanup_sections.py b/unit_test/test_cleanup_sections.py index ced5177..dba5d3a 100644 --- a/unit_test/test_cleanup_sections.py +++ b/unit_test/test_cleanup_sections.py @@ -1,6 +1,5 @@ import unittest from nttt import cleanup_sections -from nttt.restore import restore_md class TestCleanupSections(unittest.TestCase): @@ -306,99 +305,6 @@ def test_tag_with_dash(self): self.assertEqual(cleanup_sections.fix_sections(c_initial, self.logging), c_target) - def test_no_collapse_doubling_for_crowdin_broken_with_matching_line_count(self): - """ - Regression for the user-reported bug: in default `tidy` mode, when a - Crowdin-broken locale file happens to have the same line count as the - stripped English reference, `restore_md` would insert legacy bare - markers from English while `fix_sections` separately rebuilt them from - the `## --- collapse ---` / `## title:` Crowdin pattern, producing - duplicated `--- collapse ---` and `--- /collapse ---` lines. - - The pipeline (restore_md -> fix_sections) must produce a single, - canonical `--- collapse ---` block. - """ - en = ("## What you will make\n" - "\n" - "Use the face recognition tools.\n" - "\n" - "You will need a **webcam**.\n" - "\n" - "![Image.](images/foo.png)\n" - "\n" - "--- collapse ---\n" - "---\n" - "title: Where are my images stored?\n" - "---\n" - "\n" - "- This project uses a technology.\n" - "- No images from your webcam.\n" - "\n" - "--- /collapse ---\n" - "\n" - "\n" - "--- collapse ---\n" - "---\n" - "title: No YouTube?\n" - "---\n" - "\n" - "You can [download].\n" - "\n" - "\n" - "--- /collapse ---\n") - - fr = ("## Ce que tu vas faire\n" - "\n" - "Utilise les outils.\n" - "\n" - "Tu auras besoin d'une **webcam**.\n" - "\n" - "![Image.](images/foo.png)\n" - "\n" - "## \\--- collapse \\---\n" - "\n" - "## title: Ou sont stockees mes images ?\n" - "\n" - "- Ce projet.\n" - "- Aucune image.\n" - "\n" - "\\--- /collapse \\---\n" - "\n" - "## \\--- collapse \\---\n" - "\n" - "## title: Pas de YouTube ?\n" - "\n" - "Tu peux [telecharger].\n" - "\n" - "\\--- /collapse \\---\n") - - # Mimic the default tidy pipeline: restore first, then fix_sections. - restored = restore_md(fr, en, "step_1.md") - fixed = cleanup_sections.fix_sections(restored, self.logging) - - # No duplicate markers anywhere. - self.assertNotIn("--- collapse ---\n--- collapse ---", fixed) - self.assertNotIn("--- /collapse ---\n--- /collapse ---", fixed) - - # Exactly two opening and two closing canonical legacy markers (one per - # collapse block, and no Crowdin-broken `## ---` patterns left over). - opening_lines = [line for line in fixed.splitlines() if line == "--- collapse ---"] - closing_lines = [line for line in fixed.splitlines() if line == "--- /collapse ---"] - self.assertEqual(len(opening_lines), 2) - self.assertEqual(len(closing_lines), 2) - self.assertNotIn("## ---", fixed) - self.assertNotIn("\\---", fixed) - - # And the title blocks were rebuilt in canonical YAML form. - self.assertIn("--- collapse ---\n" - "---\n" - "title: Ou sont stockees mes images ?\n" - "---\n", fixed) - self.assertIn("--- collapse ---\n" - "---\n" - "title: Pas de YouTube ?\n" - "---\n", fixed) - if __name__ == '__main__': unittest.main() diff --git a/unit_test/test_hide_strings.py b/unit_test/test_hide_strings.py new file mode 100644 index 0000000..9fbc4c2 --- /dev/null +++ b/unit_test/test_hide_strings.py @@ -0,0 +1,52 @@ +import io +import unittest +from nttt import hide_strings + + +SAMPLE_LISTING = ( + "#101 Just some ordinary prose that should stay visible.\n" + "#102 --- task ---\n" + "#103 --- /no-print ---\n" + "#104 > [!HINT]\n" + "#105 > [!ACCORDION] Downloading the software\n" + "#106 hero_image images/cover.png\n" + "#107 Another translatable sentence.\n" +) + + +class TestHideStrings(unittest.TestCase): + + def test_finds_legacy_rfm_and_raw(self): + results = hide_strings.find_hidden_strings(SAMPLE_LISTING) + ids = hide_strings.unique_ids(results) + self.assertEqual(ids, ["102", "103", "104", "105", "106"]) + + def test_does_not_match_prose(self): + results = hide_strings.find_hidden_strings(SAMPLE_LISTING) + ids = hide_strings.unique_ids(results) + self.assertNotIn("101", ids) + self.assertNotIn("107", ids) + + def test_records_matched_marker(self): + results = hide_strings.find_hidden_strings(SAMPLE_LISTING) + by_id = {r["id"]: r["marker"] for r in results} + self.assertEqual(by_id["102"], "--- task ---") + self.assertEqual(by_id["105"], "[!ACCORDION]") + self.assertEqual(by_id["106"], "hero_image images/") + + def test_id_without_hash_prefix(self): + results = hide_strings.find_hidden_strings("102\t--- task ---\n") + self.assertEqual(hide_strings.unique_ids(results), ["102"]) + + def test_unique_ids_dedupes(self): + results = [{"id": "5"}, {"id": "5"}, {"id": "9"}] + self.assertEqual(hide_strings.unique_ids(results), ["5", "9"]) + + def test_run_prints_ids(self): + out = io.StringIO() + hide_strings.run(io.StringIO(SAMPLE_LISTING), out) + self.assertEqual(out.getvalue().split(), ["102", "103", "104", "105", "106"]) + + +if __name__ == '__main__': + unittest.main() diff --git a/unit_test/test_markers.py b/unit_test/test_markers.py index 418a9cf..468d258 100644 --- a/unit_test/test_markers.py +++ b/unit_test/test_markers.py @@ -1,125 +1,56 @@ import unittest -from nttt.markers import ( - LINE_KIND_BARE_MARKER, - LINE_KIND_LABELLED_MARKER, - LINE_KIND_PAIRED_EMPTY_BLOCKQUOTE, - LINE_KIND_REGULAR, - classify_line, - iter_lines_with_fence_state, - is_marker_line, - is_paired_empty_blockquote, -) +from nttt import markers class TestMarkers(unittest.TestCase): - def assert_line_kind(self, line, expected_kind): - line_kind, _ = classify_line(line) - self.assertEqual(line_kind, expected_kind) - def test_bare_rfm_markers(self): - self.assert_line_kind("> [!TASK]", LINE_KIND_BARE_MARKER) - self.assert_line_kind("> [!NOPRINT]", LINE_KIND_BARE_MARKER) - self.assert_line_kind("> [!PRINTONLY]", LINE_KIND_BARE_MARKER) - self.assert_line_kind("> [!HINT]", LINE_KIND_BARE_MARKER) - self.assert_line_kind("> [!CHALLENGE]", LINE_KIND_BARE_MARKER) - self.assert_line_kind("> [!SAVE]", LINE_KIND_BARE_MARKER) - self.assert_line_kind("> > [!TASK]", LINE_KIND_BARE_MARKER) - - def test_labelled_rfm_markers(self): - line_kind, match = classify_line("> [!ACCORDION] Where are my voice recordings stored?") - self.assertEqual(line_kind, LINE_KIND_LABELLED_MARKER) - self.assertEqual(match.group("tag"), "ACCORDION") - self.assertEqual(match.group("label"), "Where are my voice recordings stored?") - - def test_empty_blockquote(self): - self.assert_line_kind(">", LINE_KIND_PAIRED_EMPTY_BLOCKQUOTE) - self.assert_line_kind("> >", LINE_KIND_PAIRED_EMPTY_BLOCKQUOTE) - self.assertTrue(is_paired_empty_blockquote(">")) - - def test_bare_legacy_markers(self): - self.assert_line_kind("--- task ---", LINE_KIND_BARE_MARKER) - self.assert_line_kind("--- /task ---", LINE_KIND_BARE_MARKER) - self.assert_line_kind(" --- feedback ---", LINE_KIND_BARE_MARKER) - self.assert_line_kind("--- print-only ---", LINE_KIND_BARE_MARKER) - - def test_negative_cases(self): - self.assert_line_kind("This paragraph contains --- task --- text.", LINE_KIND_REGULAR) - self.assert_line_kind("\\--- task \\---", LINE_KIND_REGULAR) - self.assert_line_kind("> Quote text", LINE_KIND_REGULAR) - self.assertFalse(is_marker_line("> Quote text")) - - def test_iter_lines_with_fence_state_handles_two_fences_on_one_line(self): - content = ( - "```inline```\n" - "> [!TASK]\n" - ">\n" - "> Body\n") - - line_states = list(iter_lines_with_fence_state(content)) - - self.assertEqual( - line_states, - [ - ("```inline```\n", False), - ("> [!TASK]\n", False), - (">\n", False), - ("> Body\n", False), - ]) - - def test_iter_lines_with_fence_state_ignores_inline_triple_backticks(self): - content = ( - "Text with ` ``` ` inline\n" - "> [!TASK]\n" - ">\n" - "> Body\n") - - line_states = list(iter_lines_with_fence_state(content)) - - self.assertEqual( - line_states, - [ - ("Text with ` ``` ` inline\n", False), - ("> [!TASK]\n", False), - (">\n", False), - ("> Body\n", False), - ]) - - def test_iter_lines_with_fence_state_ignores_inline_triple_backticks_without_spaces(self): - content = ( - "Text with ```inline``` marker\n" - "> [!TASK]\n" - ">\n" - "> Body\n") - - line_states = list(iter_lines_with_fence_state(content)) - - self.assertEqual( - line_states, - [ - ("Text with ```inline``` marker\n", False), - ("> [!TASK]\n", False), - (">\n", False), - ("> Body\n", False), - ]) - - def test_iter_lines_with_fence_state_handles_language_fence_open_and_close(self): - content = ( - "```python\n" - "> [!TASK]\n" - "```\n" - "> Body\n") - - line_states = list(iter_lines_with_fence_state(content)) - - self.assertEqual( - line_states, - [ - ("```python\n", False), - ("> [!TASK]\n", True), - ("```\n", True), - ("> Body\n", False), - ]) - - -if __name__ == "__main__": + def test_loads_default_registry(self): + registry = markers.load_markers() + self.assertIn("markers", registry) + self.assertIn("raw_patterns", registry) + self.assertTrue(len(registry["markers"]) > 0) + + def test_hideable_strings_cover_both_syntaxes(self): + strings = markers.hideable_strings() + # legacy + self.assertIn("--- task ---", strings) + self.assertIn("--- /task ---", strings) + self.assertIn("--- no-print ---", strings) + # rfm + self.assertIn("[!TASK]", strings) + self.assertIn("[!HINT]", strings) + self.assertIn("[!ACCORDION]", strings) + # raw pattern + self.assertIn("hero_image images/", strings) + + def test_hideable_strings_are_unique(self): + strings = markers.hideable_strings() + self.assertEqual(len(strings), len(set(strings))) + + def test_hide_false_is_excluded(self): + registry = { + "markers": [ + {"name": "shown", "hide": False, "rfm": {"alert": "[!SHOWN]"}}, + {"name": "hidden", "hide": True, "rfm": {"alert": "[!HIDDEN]"}}, + ], + "raw_patterns": [], + } + strings = markers.hideable_strings(registry) + self.assertIn("[!HIDDEN]", strings) + self.assertNotIn("[!SHOWN]", strings) + + def test_alert_keywords(self): + keywords = markers.alert_keywords() + self.assertIn("TASK", keywords) + self.assertIn("ACCORDION", keywords) + self.assertNotIn("QUIZ", keywords) # legacy-only, no RFM alert + + def test_legacy_tag_names(self): + names = markers.legacy_tag_names() + self.assertIn("task", names) + self.assertIn("no-print", names) + self.assertNotIn("info", names) # RFM-only, no legacy marker + + +if __name__ == '__main__': unittest.main() diff --git a/unit_test/test_restore.py b/unit_test/test_restore.py deleted file mode 100644 index 6ddc034..0000000 --- a/unit_test/test_restore.py +++ /dev/null @@ -1,249 +0,0 @@ -import io -import unittest -from unittest.mock import patch -from nttt.restore import restore_md - - -class TestRestore(unittest.TestCase): - def test_restore_bare_marker(self): - english = ( - "Intro\n" - "> [!TASK]\n" - ">\n" - "> Do this.\n") - translated = ( - "Intro translated\n" - "> Do this translated.\n") - expected = ( - "Intro translated\n" - "> [!TASK]\n" - ">\n" - "> Do this translated.\n") - - self.assertEqual(restore_md(translated, english, "step_1.md"), expected) - - def test_restore_labelled_marker_preserves_translated_label(self): - english = "> [!ACCORDION] Where are my voice recordings stored?\n>\n> Body\n" - translated = "> Wo werden meine Sprachaufnahmen gespeichert?\n>\n> Inhalt\n" - expected = "> [!ACCORDION] Wo werden meine Sprachaufnahmen gespeichert?\n>\n> Inhalt\n" - - self.assertEqual(restore_md(translated, english, "step_1.md"), expected) - - def test_restore_nested_labelled_marker(self): - english = "> > [!ACCORDION] Teacher notes\n> >\n> > Body\n" - translated = "> > Notizen fuer Lehrende\n> >\n> > Inhalt\n" - expected = "> > [!ACCORDION] Notizen fuer Lehrende\n> >\n> > Inhalt\n" - - self.assertEqual(restore_md(translated, english, "step_1.md"), expected) - - def test_restore_bare_marker_after_inline_fences_on_same_line(self): - english = ( - "```inline```\n" - "> [!TASK]\n" - ">\n" - "> Do this.\n") - translated = ( - "```inline```\n" - "> Do this translated.\n") - expected = ( - "```inline```\n" - "> [!TASK]\n" - ">\n" - "> Do this translated.\n") - - self.assertEqual(restore_md(translated, english, "step_1.md"), expected) - - def test_restore_warns_and_skips_on_count_mismatch(self): - english = "> [!TASK]\n>\n> Do this.\n" - translated = "> Do this.\n> Extra line.\n" - - with patch("sys.stderr", new_callable=io.StringIO) as stderr: - result = restore_md(translated, english, "step_1.md") - - self.assertEqual(result, translated) - self.assertIn("Different stripped structure", stderr.getvalue()) - - def test_restore_noop_for_file_without_markers(self): - english = "Intro\nBody\n" - translated = "Intro translated\nBody translated\n" - - self.assertEqual(restore_md(translated, english, "step_1.md"), translated) - - def test_restore_crowdin_escape_and_heading_jam(self): - english = ( - "Intro\n" - "\n" - "--- collapse ---\n" - "\n" - "---\n" - "title: Notes\n" - "---\n" - "\n" - "Body\n" - "\n" - "--- /collapse ---\n") - translated = ( - "Intro translated\n" - "\n" - "\\--- collapse \\---\n" - "\n" - "---\n" - "\n" - "## title: Notes translated\n" - "\n" - "Body translated\n" - "\n" - "\\--- /collapse \\---\n") - expected = ( - "Intro translated\n" - "\n" - "--- collapse ---\n" - "\n" - "---\n" - "title: Notes translated\n" - "---\n" - "\n" - "Body translated\n" - "\n" - "--- /collapse ---\n") - - self.assertEqual(restore_md(translated, english, "step_1.md"), expected) - - def test_restore_crowdin_title_headings(self): - english = ( - "Intro\n" - "\n" - "--- collapse ---\n" - "\n" - "---\n" - "title: Where are my images stored?\n" - "---\n" - "\n" - "Body\n" - "\n" - "--- /collapse ---\n") - translated = ( - "Intro translated\n" - "\n" - "---\n" - "\n" - "## title: Wo werden meine Bilder gespeichert?\n" - "\n" - "Body translated\n") - expected = ( - "Intro translated\n" - "\n" - "--- collapse ---\n" - "\n" - "---\n" - "title: Wo werden meine Bilder gespeichert?\n" - "---\n" - "\n" - "Body translated\n" - "\n" - "--- /collapse ---\n") - - self.assertEqual(restore_md(translated, english, "step_1.md"), expected) - - def test_restore_skips_when_translated_already_has_legacy_marker(self): - # If the translated file already has canonical `--- collapse ---` - # markers, restoration would duplicate them. - english = ( - "Intro\n" - "--- collapse ---\n" - "Body\n" - "--- /collapse ---\n") - translated = ( - "Intro translated\n" - "--- collapse ---\n" - "Body translated\n" - "--- /collapse ---\n") - - with patch("sys.stderr", new_callable=io.StringIO) as stderr: - result = restore_md(translated, english, "step_1.md") - - self.assertEqual(result, translated) - self.assertEqual(stderr.getvalue(), "") - - def test_restore_still_inserts_when_stray_legacy_marker_present(self): - # A single orphaned legacy marker from Crowdin TM must not block restore. - english = ( - "Intro\n" - "--- collapse ---\n" - "Body\n" - "--- /collapse ---\n") - translated = ( - "--- task ---\n" - "Intro translated\n" - "Body translated\n") - expected = ( - "Intro translated\n" - "--- collapse ---\n" - "Body translated\n" - "--- /collapse ---\n") - - self.assertEqual(restore_md(translated, english, "step_1.md"), expected) - - def test_restore_still_inserts_when_pure_stripped(self): - # Regression: a pure-stripped translated file (no `\---`, no `## ---`, - # no canonical legacy markers) must still get its markers restored. - english = ( - "Intro\n" - "--- collapse ---\n" - "Body\n" - "--- /collapse ---\n") - translated = ( - "Intro translated\n" - "Body translated\n") - expected = ( - "Intro translated\n" - "--- collapse ---\n" - "Body translated\n" - "--- /collapse ---\n") - - self.assertEqual(restore_md(translated, english, "step_1.md"), expected) - - - def test_restore_aligns_when_translated_has_extra_blank_lines(self): - english = ( - "Line one\n" - "\n" - "--- collapse ---\n" - "\n" - "---\n" - "title: Notes\n" - "---\n" - "\n" - "Body\n" - "\n" - "--- /collapse ---\n") - translated = ( - "Line one translated\n" - "\n" - "\n" - "---\n" - "\n" - "## title: Notes translated\n" - "\n" - "\n" - "Body translated\n" - "\n" - "\n") - expected = ( - "Line one translated\n" - "\n" - "--- collapse ---\n" - "\n" - "---\n" - "title: Notes translated\n" - "---\n" - "\n" - "Body translated\n" - "\n" - "--- /collapse ---\n") - - self.assertEqual(restore_md(translated, english, "step_1.md"), expected) - - -if __name__ == "__main__": - unittest.main() diff --git a/unit_test/test_strip.py b/unit_test/test_strip.py deleted file mode 100644 index 600cb20..0000000 --- a/unit_test/test_strip.py +++ /dev/null @@ -1,67 +0,0 @@ -import unittest -from nttt.strip import strip_md - - -class TestStrip(unittest.TestCase): - def test_strip_bare_rfm_marker_and_paired_empty_blockquote(self): - content = ( - "Intro\n" - "> [!TASK]\n" - ">\n" - "> Do this.\n") - - expected = ( - "Intro\n" - "> Do this.\n") - - self.assertEqual(strip_md(content), expected) - - def test_strip_labelled_rfm_marker_keeps_label(self): - content = "> [!ACCORDION] Where are my voice recordings stored?\n>\n> Body\n" - expected = "> Where are my voice recordings stored?\n>\n> Body\n" - - self.assertEqual(strip_md(content), expected) - - def test_strip_nested_blockquote_marker(self): - content = ( - "> [!NOPRINT]\n" - ">\n" - "> > [!TASK]\n" - "> >\n" - "> > ### Play\n") - - expected = ( - "> > ### Play\n") - - self.assertEqual(strip_md(content), expected) - - def test_strip_legacy_marker(self): - content = "--- task ---\nDo this.\n--- /task ---\n" - expected = "Do this.\n" - - self.assertEqual(strip_md(content), expected) - - def test_preserve_code_fences(self): - content = ( - "Before\n" - "```\n" - "> [!TASK]\n" - "--- task ---\n" - "```\n" - "> [!SAVE]\n" - ">\n" - "After\n") - - expected = ( - "Before\n" - "```\n" - "> [!TASK]\n" - "--- task ---\n" - "```\n" - "After\n") - - self.assertEqual(strip_md(content), expected) - - -if __name__ == "__main__": - unittest.main() diff --git a/unit_test/test_strip_restore_roundtrip.py b/unit_test/test_strip_restore_roundtrip.py deleted file mode 100644 index 7f2cb24..0000000 --- a/unit_test/test_strip_restore_roundtrip.py +++ /dev/null @@ -1,26 +0,0 @@ -import unittest -from pathlib import Path -from nttt.restore import restore_md -from nttt.strip import strip_md - - -class TestStripRestoreRoundtrip(unittest.TestCase): - def assert_roundtrip_file(self, file_path): - content = file_path.read_text(encoding="utf-8") - self.assertEqual(restore_md(strip_md(content), content, str(file_path)), content) - - def test_labelled_accordions(self): - data_folder = Path(__file__).resolve().parent / "data" / "markdown" - self.assert_roundtrip_file(data_folder / "labelled_accordions.md") - - def test_nested_rfm_markers(self): - data_folder = Path(__file__).resolve().parent / "data" / "markdown" - self.assert_roundtrip_file(data_folder / "nested_modern_markers.md") - - def test_legacy_quiz_markers(self): - data_folder = Path(__file__).resolve().parent / "data" / "markdown" - self.assert_roundtrip_file(data_folder / "legacy_quiz_markers.md") - - -if __name__ == "__main__": - unittest.main()