diff --git a/.gitignore b/.gitignore
index 190d698..9ecb4cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,3 +37,8 @@ docs/_build
# Unit test output
*.log
+
+# Fixture output files (written by the fixture tests for inspection)
+test/fixtures/output/
+test/fixtures/hide/output/
+test/fixtures/restore/output/
diff --git a/README.md b/README.md
index e8f2214..baeb348 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,19 @@ Note - NTTT will work on Windows, macOS and Linux.
For maintainers, [doc/transformations.md](doc/transformations.md) describes what NTTT changes in `meta.yml` and Markdown files (sections, HTML, formatting, URLs, and related behaviour).
+NTTT supports both the legacy (`--- task ---`) and the Raspberry Flavoured Markdown (`> [!TASK]`) syntaxes, which may be mixed in a single file. The structural markers for both syntaxes are defined in one editable data file — see [doc/markers.md](doc/markers.md) — which also drives the hide-strings mode. The design rationale for this dual-syntax + hide-strings work is recorded in [doc/plan-dual-syntax-hide-strings.md](doc/plan-dual-syntax-hide-strings.md).
+
+### Hide-strings mode
+
+NTTT can generate the list of Crowdin string IDs to hide from translators (markers from [`nttt/markers.yml`](nttt/markers.yml)):
+
+```bash
+crowdin string list --verbose | nttt --hide-strings > ids.txt
+while read -r id; do crowdin string edit "$id" --hidden; done < ids.txt
+```
+
+An example CI workflow for content repositories is in [doc/workflows/hide-strings.yml](doc/workflows/hide-strings.yml).
+
## Prerequisites
The tool requires having Python 3.7 or newer.
@@ -114,28 +127,6 @@ You can specify different directories for the input and output folder using the
nttt --input c:\path\to\project\de-DE --output c:\path\to\project\de-DE-tidy
```
-### Crowdin marker stripping and restoring
-
-NTTT has three processing modes:
-
-- `tidy` (default): restore stripped Markdown markers for non-English locale folders, then run the existing tidy-up transforms.
-- `strip`: remove non-translatable Markdown markers before uploading English source files to Crowdin.
-- `restore`: reinsert stripped Markdown markers into translated files after downloading from Crowdin.
-
-Use `strip` on the English source folder before Crowdin upload:
-
-```bash
-nttt --mode strip -i en -o en -Y on
-```
-
-Use `restore` on a translated locale folder after Crowdin download:
-
-```bash
-nttt --mode restore -i de-DE -e en -o de-DE -Y on
-```
-
-Modern bare markers such as `> [!TASK]` are removed entirely, along with their paired empty `>` line. Modern labelled markers such as `> [!ACCORDION] Where are my voice recordings stored?` keep the label available for translation by becoming `> Where are my voice recordings stored?`; restore reinserts `[!ACCORDION]` before the translated label. Legacy markers such as `--- task ---` and `--- /task ---` are also removed and restored by line alignment against `en/`.
-
### Help
To bring up full usage information use the `-h`/`--help` option.
diff --git a/doc/markers.md b/doc/markers.md
new file mode 100644
index 0000000..ee53a33
--- /dev/null
+++ b/doc/markers.md
@@ -0,0 +1,81 @@
+# NTTT: marker registry
+
+NTTT supports two markdown syntaxes for Raspberry Pi project content, which may
+appear **in the same file**:
+
+- **legacy** (`kramdown-rpf`): `--- task ---` … `--- /task ---`
+- **RFM** (Raspberry Flavoured Markdown / GFM alerts): `> [!TASK]`, `> [!HINT]`, `> [!ACCORDION] Title`
+
+The list of markers lives in one data file — [`nttt/markers.yml`](../nttt/markers.yml) —
+so it can be changed **without editing Python**. It is the single source of truth for:
+
+1. **Hiding** — which marker strings NTTT lists for Crowdin to hide from translators
+ (`nttt --hide-strings`, see below).
+2. **Restoring** — which RFM alert keywords NTTT reverts back to English on download
+ (see [`nttt/cleanup_alerts.py`](../nttt/cleanup_alerts.py)).
+
+> The legacy `--- … ---` normalisation in [`nttt/cleanup_sections.py`](../nttt/cleanup_sections.py)
+> is intentionally **syntax-generic** (it must cope with arbitrary translated tag
+> names), so it does not read the registry. The registry drives hiding and the RFM
+> alert handling.
+
+## Editing `markers.yml` (no Python needed)
+
+Each block type is one list entry:
+
+```yaml
+ - name: task
+ hide: true
+ legacy: { open: "--- task ---", close: "--- /task ---" }
+ rfm: { alert: "[!TASK]" }
+```
+
+- `hide: true` lists this marker for Crowdin to hide; `false` keeps it visible.
+- `legacy.open` / `legacy.close` are the exact marker lines (`close` is optional —
+ some blocks, e.g. `save`, have no closing marker).
+- `rfm.alert` is the alert token exactly as written, including the brackets.
+- Include only the syntaxes a block has (some are RFM-only, e.g. `info`/`tip`/`debug`).
+
+`raw_patterns:` holds non-block strings to hide (matched as plain substrings), e.g.
+`hero_image images/`.
+
+**To add a block type:** copy an entry, change the values, run the tests:
+
+```bash
+python -m unittest discover -s unit_test
+```
+
+## Legacy ↔ RFM mapping
+
+| Block | Legacy | RFM alert | Hidden |
+|--------------|--------------------------------|-------------------------------|:------:|
+| task | `--- task ---` | `[!TASK]` | yes |
+| hints | `--- hints ---` | *(grouped hints)* | yes |
+| hint | `--- hint ---` | `[!HINT]` | yes |
+| collapse | `--- collapse ---` | `[!ACCORDION]` *(+ title)* | yes |
+| challenge | `--- challenge ---` | `[!CHALLENGE]` | yes |
+| code | `--- code ---` | *(fenced-code attributes)* | yes |
+| save | `--- save ---` | `[!SAVE]` | yes |
+| new-page | `--- new-page ---` | ` ` | yes |
+| no-print | `--- no-print ---` | `[!NOPRINT]` | yes |
+| print-only | `--- print-only ---` | `[!PRINTONLY]` | yes |
+| quiz | `--- quiz ---` | — | yes |
+| question | `--- question ---` | — | yes |
+| choices | `--- choices ---` | — | yes |
+| feedback | `--- feedback ---` | — | yes |
+| info | — | `[!INFO]` | yes |
+| tip | — | `[!TIP]` | yes |
+| debug | — | `[!DEBUG]` | yes |
+
+## Hide-strings mode
+
+NTTT generates the Crowdin hide-list itself (replacing the old grep pipeline). It
+reads `crowdin string list --verbose` on stdin and prints the IDs of any string
+whose source text contains a hideable marker:
+
+```bash
+crowdin string list --verbose | nttt --hide-strings > ids.txt
+while read -r id; do crowdin string edit "$id" --hidden; done < ids.txt
+```
+
+See [`doc/workflows/hide-strings.yml`](workflows/hide-strings.yml) for the CI version.
diff --git a/doc/plan-dual-syntax-hide-strings.md b/doc/plan-dual-syntax-hide-strings.md
new file mode 100644
index 0000000..32ece71
--- /dev/null
+++ b/doc/plan-dual-syntax-hide-strings.md
@@ -0,0 +1,203 @@
+# Plan: Dual-syntax support + marker hide-list generation for NTTT
+
+## Context
+
+NTTT ("Nina's Translation Tidy-up Tool") cleans up Crowdin-translated Raspberry Pi
+project content. Today it:
+
+- Runs **only on the download side** (the `nttt-processing.yml` step), normalising and
+ reverting translated content after it comes back from Crowdin.
+- Understands **only the legacy `kramdown-rpf` syntax** (`--- task ---`, `--- hint ---`,
+ `--- /no-print ---`, …) — see [cleanup_sections.py](nttt/cleanup_sections.py).
+
+Two things are changing:
+
+1. A **new "Raspberry Flavoured Markdown" (RFM)** syntax is being introduced — GFM
+ blockquote alerts (`> [!TASK]`, `> [!HINT]`, `> [!ACCORDION] Title`, `> [!NOPRINT]`,
+ page breaks, fenced-code attributes). NTTT must support **both** syntaxes, and **a single
+ file may mix the two** (confirmed with the user).
+2. Marker hiding (so translators never translate structural markers) is currently done by a
+ brittle `grep` pipeline in the content repos' `hide-strings.yml`. We are moving that logic
+ into NTTT (branch name: `nttt-no-hide-strings`). **NTTT will generate the Crowdin hide-list**
+ (the chosen mechanism); markers stay in the files, and the existing download-side
+ fix/revert pipeline puts any mangled markers back to their English form.
+
+**Decisions confirmed with the user:**
+- Mechanism = **generate Crowdin hide-list** (markers are hidden in Crowdin, not stripped from files).
+- Marker set = **configurable** (ship a sensible default = all structural markers, editable by non-devs).
+- Files **may mix** legacy + RFM markers — handle both within one file.
+- Deliver **NTTT tool changes + example workflow ymls**.
+
+**Outcome:** NTTT supports legacy and RFM content, owns the hide-list generation (retiring the
+grep in `hide-strings.yml`), and the marker set lives in one declarative data file that a
+non-Python maintainer can edit.
+
+---
+
+## Design overview — a single declarative marker registry
+
+The centrepiece (and the answer to "modular, maintainable by non-Python devs") is **one data
+file** describing every block type and its legacy + RFM spellings, plus whether it should be
+hidden. All code reads from it; adding/removing a block type or toggling hiding is a YAML edit,
+no Python.
+
+`nttt/markers.yml` (ruamel.yaml is already a dependency):
+
+```yaml
+# Edit this file to add/remove block types or change what gets hidden from translators.
+# 'hide: true' => NTTT lists this marker's strings for Crowdin to hide.
+markers:
+ - name: task
+ hide: true
+ legacy: { open: "--- task ---", close: "--- /task ---" }
+ rfm: { alert: "[!TASK]" }
+ - name: hint
+ hide: true
+ legacy: { open: "--- hint ---", close: "--- /hint ---" }
+ rfm: { alert: "[!HINT]" }
+ - name: collapse # RFM calls this ACCORDION; title is translatable
+ hide: true
+ legacy: { open: "--- collapse ---", close: "--- /collapse ---" }
+ rfm: { alert: "[!ACCORDION]" }
+ - name: no-print
+ hide: true
+ legacy: { open: "--- no-print ---", close: "--- /no-print ---" }
+ rfm: { alert: "[!NOPRINT]" }
+ # … save, new-page/page-break, print-only, challenge, code, quiz, question,
+ # choices, feedback, info, tip, debug …
+raw_patterns: # non-block strings to hide (e.g. asset paths)
+ - "hero_image images/"
+```
+
+The full marker set is derived from the two attached specs (legacy `kramdown-rpf` and RFM draft).
+Entries with only one of `legacy`/`rfm` are fine (e.g. `info`/`tip`/`debug` are RFM-only).
+
+`nttt/markers.py` — loader/accessor (single source of truth):
+- `load_markers()` → parsed registry (cached).
+- `hideable_strings()` → list of literal marker strings + raw patterns to match against
+ Crowdin's `string list` output (both syntaxes).
+- `alert_keywords()` / legacy tag helpers for the cleanup modules.
+
+---
+
+## Work items
+
+### 1. Marker registry (new)
+- **`nttt/markers.yml`** — the declarative data file above (full set from both specs).
+- **`nttt/markers.py`** — loader + accessors described above. Package the `.yml` via
+ `setup.py` (`package_data` / `include_package_data`).
+
+### 2. Hide-list generation mode (new) — replaces the grep in `hide-strings.yml`
+- **`nttt/hide_strings.py`** — reads `crowdin string list --verbose` output (stdin or file),
+ filters rows whose source text contains a hideable marker string (from `markers.hideable_strings()`,
+ covering legacy **and** RFM), and prints the numeric string IDs (one per line).
+- **CLI wiring** in [arguments.py](nttt/arguments.py) + [__init__.py](nttt/__init__.py):
+ add a `--hide-strings` mode flag. When present, `main()` dispatches to `hide_strings` and
+ reads stdin instead of running `tidyup_translations`. **Default behaviour (`nttt -Y YES`) is
+ unchanged** so the existing download workflow keeps working.
+
+### 3. RFM download-side cleanup (new) — mirrors the legacy section logic
+- **`nttt/cleanup_alerts.py`** — `fix_alerts(content, logging)` and
+ `revert_alert_translation(name, content, en_content, logging)`:
+ - Normalise blockquote alert headers (`>[!TASK]` → `> [!TASK]`, spacing, Crowdin escape quirks).
+ - Revert translated alert keywords/`ACCORDION` titles to the English form **by position
+ against the English file**, reusing the proven algorithm in
+ [`revert_section_translation`](nttt/cleanup_sections.py) (extract → count-match → replace).
+ - Keyword set comes from `markers.py`.
+- **Wire into [`fix_md_step`](nttt/tidyup.py:55)** alongside the existing legacy steps. Because
+ legacy (`--- x ---`) and RFM (`> [!X]`) patterns are disjoint, running both on every file
+ safely supports mixed files. Add matching `--disable` flags (`fix_alerts`,
+ `revert_alert_translation`) following the existing pattern in [arguments.py](nttt/arguments.py:54).
+
+### 4. Make legacy `cleanup_sections.py` registry-aware (light touch)
+- Keep its generic `\w+` regexes, but source the **known legacy tag list and hide flags** from
+ `markers.py` so there is one source of truth. Avoid behavioural change to existing tests.
+
+### 5. Example workflows (deliver alongside the tool)
+- Add **`doc/workflows/`** with updated copies for content repos to adopt:
+ - **`hide-strings.yml`** — install NTTT, then
+ `crowdin string list --verbose | nttt --hide-strings > ids.txt` and loop
+ `crowdin string edit "$id" --hidden < ids.txt`. (Replaces the grep/awk/sed pipeline and
+ fixes the existing bug where the `while read` loop receives no piped input.)
+ - `nttt-processing.yml` / `upload-sources.yml` — carried over; note any version bump.
+- Reference them from the README.
+
+### 6. Tests (follow existing two-layer pattern)
+- **Unit tests** in `unit_test/`: `test_markers.py` (registry load + hideable strings),
+ `test_hide_strings.py` (filter sample `crowdin string list` text → expected IDs, legacy + RFM
+ + raw pattern rows), `test_cleanup_alerts.py` (normalise + revert, mirroring
+ [test_cleanup_sections.py]).
+- **Fixture tests** in `test/`: add an RFM/mixed fixture (e.g. `step_7.md` across
+ `fixtures/{input,en,output}`) exercising `> [!TASK]`/`> [!HINT]`/`> [!ACCORDION]` reverts plus
+ a legacy marker in the same file. Reuse the `_run`/`INSPECT` harness in
+ [test_fixtures.py](test/test_fixtures.py).
+
+### 7. Local round-trip fixtures — inspect hide + restore by eye
+
+Beyond pass/fail unit tests, add **inspectable input→output fixtures** (same spirit as the
+existing `test/fixtures/{input,en,output}` + `NTTT_INSPECT` harness) so a maintainer can open
+the before/after files locally and confirm hiding and restoring look right. Two flows:
+
+**(a) Hide flow — "what would get hidden":**
+- `test/fixtures/hide/input/` — sample English source files (legacy, RFM, and mixed) **and** a
+ captured `crowdin_string_list.txt` (real `crowdin string list --verbose` output saved once).
+- `test/fixtures/hide/output/` (gitignored) — the generated hide-list IDs and a human-readable
+ report listing each matched source string next to the marker that matched it, so input vs
+ output is reviewable at a glance.
+- Test runs `nttt --hide-strings` over the sample and writes both files; assertions check the
+ expected IDs/markers are present (legacy + RFM + `hero_image`) and unrelated prose is absent.
+
+**(b) Restore flow — "translated → restored":**
+- `test/fixtures/restore/input/` — **translated** step files where markers have been mangled the
+ way Crowdin/translators do it (`\---`, jammed lines, translated `--- taak ---`, translated
+ `> [!TAREA]`, bad spacing `>[!task]`, mixed legacy+RFM in one file).
+- `test/fixtures/restore/en/` — the English reference files (the structural template).
+- `test/fixtures/restore/expected/` — the **hand-authored correct restored** version, committed
+ so we have a clear oracle.
+- `test/fixtures/restore/output/` (gitignored) — what NTTT actually produced.
+- Test runs `fix_md_step` and (in normal mode) diffs `output` vs `expected`; in
+ `NTTT_INSPECT=1` mode it skips the diff and just writes `output` so you can open
+ `input` → `output` → `expected` side by side and eyeball the round-trip.
+
+Document both flows in `doc/transformations.md` so the local-check workflow is discoverable.
+
+### 8. Docs
+- Update [doc/transformations.md](doc/transformations.md): add the RFM alert step and the
+ hide-list mode to the pipeline description and code map.
+- New **`doc/markers.md`**: explains the registry, the legacy↔RFM mapping table, and
+ step-by-step "how to add a new block type" for non-Python maintainers.
+- Update [README.md](README.md): document `--hide-strings` mode and link the new docs/workflows.
+- Bump `nttt/_version.py`.
+
+---
+
+## Verification
+
+1. **Unit + fixture tests:**
+ ```bash
+ python -m unittest discover -s unit_test -v
+ python -m unittest discover -s test -p "test_fixtures.py" -v
+ ```
+ Inspect mode for the new RFM fixture before locking assertions:
+ ```bash
+ NTTT_INSPECT=1 python -m unittest discover -s test -p "test_fixtures.py" -v
+ ```
+2. **Hide-list mode** against a captured sample of `crowdin string list --verbose` output
+ (saved as a test fixture): confirm it emits the IDs of legacy markers, RFM alert lines, and
+ `hero_image images/` rows — and nothing else.
+3. **Mixed-syntax round-trip:** run `fix_md_step` on a file containing both `--- task ---` and
+ `> [!TASK]` with a translated copy; confirm both are reverted to English and unrelated prose
+ is untouched.
+4. **Backward compatibility:** `nttt -Y YES` (default tidyup) still processes legacy-only
+ content identically (existing `step_1`–`step_6` fixtures pass unchanged).
+5. **Registry editability:** add a dummy block type to `markers.yml`, re-run the hide-list mode,
+ confirm it appears with no code change.
+
+---
+
+## Notes / non-goals
+- We are **not** stripping markers from files or using placeholder tokens (per the chosen
+ "generate hide-list" mechanism). Markers remain in source; Crowdin hides them.
+- Renderer HTML output (the two spec docs) is **reference for marker syntax only** — NTTT does
+ not render HTML, so those HTML blocks are not test oracles here.
+- Workflow ymls live in the content repos; we ship updated **examples**, the team wires them in.
diff --git a/doc/transformations.md b/doc/transformations.md
index 629888a..a0ad9da 100644
--- a/doc/transformations.md
+++ b/doc/transformations.md
@@ -16,42 +16,25 @@ NTTT does **not** process standalone `.html` files. HTML-related steps run on **
For each `.md` file, [`nttt/tidyup.py`](../nttt/tidyup.py) applies, in order:
-1. **`restore_tree`** — for non-English locale folders, restore Markdown markers stripped before Crowdin upload.
-2. **`fix_sections`** — normalise `---` section lines (Crowdin quirks).
-3. **`revert_section_translation`** — optional; restore English section tag lines when structure matches.
-4. **`trim_md_tags`** — strip padding inside paired Markdown delimiters (outside ` ``` ` fences).
-5. **`trim_html_tags`** — strip padding inside simple inline HTML tags (outside single `` ` `` spans).
-6. **`trim_formatting_tags`** — normalise `{ … }` attribute blocks after a word (Scratch/Pico-style).
-7. **URL rewrite:** replace `/en/` with `//` everywhere in the file body.
+1. **`fix_sections`** — normalise legacy `---` section lines (Crowdin quirks).
+2. **`revert_section_translation`** — optional; restore English section tag lines when structure matches.
+3. **`fix_alerts`** — normalise RFM blockquote alert headers (`>[! task ]` → `> [!TASK]`).
+4. **`revert_alert_translation`** — optional; revert translated RFM alert keywords to English when structure matches (keeps translated titles, e.g. ACCORDION).
+5. **`trim_md_tags`** — strip padding inside paired Markdown delimiters (outside ` ``` ` fences).
+6. **`trim_html_tags`** — strip padding inside simple inline HTML tags (outside single `` ` `` spans).
+7. **`trim_formatting_tags`** — normalise `{ … }` attribute blocks after a word (Scratch/Pico-style).
+8. **URL rewrite:** replace `/en/` with `//` everywhere in the file body.
-Steps 1–5 can be skipped via **`--disable`** (see [`nttt/arguments.py`](../nttt/arguments.py)).
+Both syntaxes are handled on every file, so a file may freely **mix** legacy and RFM markers.
+
+Steps 1–7 can be skipped via **`--disable`** (see [`nttt/arguments.py`](../nttt/arguments.py)):
+`fix_sections`, `revert_section_translation`, `fix_alerts`, `revert_alert_translation`,
+`fix_md`, `fix_html`, `fix_formatting`.
`meta.yml` is handled separately by **`fix_meta`** (YAML round-trip, revert non-translatable keys from English). This doc focuses on Markdown/HTML-style transforms.
---
-## Crowdin marker strip/restore (`nttt/strip.py`, `nttt/restore.py`)
-
-**Modes:** `--mode strip`, `--mode restore`, and default `--mode tidy`.
-
-| Mode | Behaviour |
-|------|-----------|
-| `strip` | Runs on `en/` before Crowdin upload. Removes structural-only markers and keeps labelled marker text translatable. |
-| `restore` | Runs on a locale folder after Crowdin download. Rebuilds markers from the matching English file. |
-| `tidy` | For non-English locale folders, runs restore first, then the existing tidy transforms. |
-
-**Marker classification (`nttt/markers.py`):**
-
-| Kind | Pattern | Strip output | Restore output |
-|------|---------|--------------|----------------|
-| Modern bare | `> [!TASK]`, `> [!SAVE]`, nested forms like `> > [!HINT]` | Dropped. A following empty blockquote line (`>`, `> >`) is also dropped. | Copied back from `en/`. |
-| Modern labelled | `> [!ACCORDION] Where are my voice recordings stored?` | Rewritten to `> Where are my voice recordings stored?`. | Rewritten to `> [!ACCORDION] `. |
-| Legacy bare | `--- task ---`, `--- /task ---`, `--- print-only ---`, `--- feedback ---` | Dropped. | Copied back from `en/`. |
-
-Restore uses line-index alignment against the stripped English file. If the translated file already contains at least as many legacy bare marker lines as the English reference, restore is skipped for that file to avoid duplicating markers. If the translated file has a different number of lines from the stripped English reference, NTTT logs a warning and leaves that file unchanged for this step.
-
-Fenced code blocks split by ` ``` ` are not stripped.
-
## 1. Section markers (`nttt/cleanup_sections.py`)
**Function:** `fix_sections`
@@ -127,8 +110,57 @@ After cleanup: **replace every `/en/` with `//`** in the Markdown file
|---------|--------|
| Orchestration | `nttt/tidyup.py`, `nttt/__init__.py` |
| CLI / disable flags | `nttt/arguments.py` |
-| Sections | `nttt/cleanup_sections.py` |
+| Sections (legacy `--- … ---`) | `nttt/cleanup_sections.py` |
+| Alerts (RFM `> [!…]`) | `nttt/cleanup_alerts.py` |
+| Marker registry (both syntaxes) | `nttt/markers.yml`, `nttt/markers.py` |
+| Hide-list generation | `nttt/hide_strings.py` |
| Markdown emphasis / code delimiters | `nttt/cleanup_markdown.py` |
| Inline HTML | `nttt/cleanup_html.py` |
| Brace attributes | `nttt/cleanup_formatting.py` |
| Split "every other segment" | `nttt/utilities.py` → `apply_to_every_other_part` |
+
+---
+
+## Running the fixture tests
+
+`test/test_fixtures.py` contains six integration tests, one per transformation type. Each test runs a real Dutch-translation `.md` file through `fix_md_step` and writes the result to `test/fixtures/output/` (gitignored) so you can open it and compare it with the input.
+
+| Fixture | What it covers |
+|---------|---------------|
+| `step_1.md` | Section markers — escaped `\---`, jammed lines, section name revert |
+| `step_2.md` | Markdown delimiters — `_ text _`, `** text **`; code block preserved |
+| `step_3.md` | Inline HTML — ` Enter ` → `Enter`; backtick spans preserved |
+| `step_4.md` | Formatting braces — `{ : class = "..."}`, `_blank` target |
+| `step_5.md` | URL rewrite — `/en/` → `/nl/` |
+| `step_6.md` | All of the above combined |
+
+**Normal run** (assertions on — use in CI or to catch regressions):
+
+```bash
+python -m unittest discover -s test -p "test_fixtures.py" -v
+```
+
+**Inspect mode** (assertions off — use when adding new input to see the raw output before writing assertions):
+
+```bash
+NTTT_INSPECT=1 python -m unittest discover -s test -p "test_fixtures.py" -v
+```
+
+After either run, open any file in `test/fixtures/output/` alongside its counterpart in `test/fixtures/input/` to see before and after.
+
+## Round-trip checks (hide + restore)
+
+`test/test_roundtrip.py` lets you inspect the two hide/restore flows locally (same
+`NTTT_INSPECT` convention):
+
+| Flow | Fixtures | What it shows |
+|------|----------|---------------|
+| **Hide** | `test/fixtures/hide/` | A captured `crowdin string list --verbose` (`input/`) → the IDs to hide and a `report.txt` matching each ID to its marker (`output/`). |
+| **Restore** | `test/fixtures/restore/` | A mangled translation (`input/`) + the English template (`en/`) → the restored file (`output/`), diffed against the committed oracle (`expected/`). Exercises a file that **mixes** legacy `--- task ---` with RFM `> [!HINT]` / `> [!ACCORDION]`. |
+
+```bash
+python -m unittest discover -s test -p "test_roundtrip.py" -v # assertions on
+NTTT_INSPECT=1 python -m unittest discover -s test -p "test_roundtrip.py" -v # write outputs only
+```
+
+See [doc/markers.md](markers.md) for the marker registry and hide-strings mode.
diff --git a/doc/workflows/hide-strings.yml b/doc/workflows/hide-strings.yml
new file mode 100644
index 0000000..7beed73
--- /dev/null
+++ b/doc/workflows/hide-strings.yml
@@ -0,0 +1,59 @@
+# Example workflow for CONTENT repositories (not this tool repo).
+#
+# Hides structural markers from translators in Crowdin. The list of markers is
+# generated by NTTT from its marker registry (markers.yml), covering both the
+# legacy (--- task ---) and RFM (> [!TASK]) syntaxes. This replaces the previous
+# hand-written grep/awk/sed pipeline.
+#
+# Copy this into the content repo's .github/workflows/ directory.
+
+name: Crowdin Hide Strings
+
+on:
+ workflow_run:
+ workflows: ["Crowdin Upload Action"]
+ types:
+ - completed
+
+jobs:
+ crowdin-hide:
+ if: ${{ github.event.workflow_run.conclusion == 'success' }}
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Set up Python 3.11
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.11'
+
+ - name: Install NTTT
+ run: |
+ python -m pip install --upgrade pip
+ pip install git+https://github.com/raspberrypilearning/nttt.git
+
+ - name: Install Crowdin CLI
+ run: |
+ curl -L https://github.com/crowdin/crowdin-cli/releases/latest/download/crowdin-cli.zip -o crowdin-cli.zip
+ unzip crowdin-cli.zip -d crowdin-cli
+ mkdir -p ~/bin
+ mv crowdin-cli/*/crowdin ~/bin/crowdin
+ cp crowdin-cli/*/crowdin-cli.jar ~/bin/crowdin-cli.jar
+ chmod +x ~/bin/crowdin
+ echo "PATH=$HOME/bin:$PATH" >> $GITHUB_ENV
+
+ - name: Hide matching strings
+ run: |
+ set -euo pipefail
+ crowdin --version
+ # NTTT reads the Crowdin listing and prints the IDs of strings to hide
+ # (those containing a marker from markers.yml, legacy or RFM).
+ crowdin string list --verbose | nttt --hide-strings > ids.txt
+ echo "Hiding $(wc -l < ids.txt) strings"
+ while read -r id; do
+ crowdin string edit "$id" --hidden
+ done < ids.txt
+ env:
+ CROWDIN_PROJECT_ID: ${{ secrets.CROWDIN_PROJECT_ID }}
+ CROWDIN_API_TOKEN: ${{ secrets.CROWDIN_API_TOKEN }}
diff --git a/nttt/__init__.py b/nttt/__init__.py
index 546ab30..42b046e 100644
--- a/nttt/__init__.py
+++ b/nttt/__init__.py
@@ -1,24 +1,17 @@
from .arguments import parse_command_line, resolve_arguments, check_arguments, show_arguments
-from .constants import ArgumentKeyConstants, Modes
-from .restore import restore_tree
-from .strip import strip_tree
from .tidyup import tidyup_translations
+from .hide_strings import run as run_hide_strings
from ._version import __version__
def main():
command_line_args = parse_command_line(__version__)
+
+ # Hide-strings mode: generate the Crowdin hide-list from stdin and exit.
+ if getattr(command_line_args, "hide_strings", False):
+ run_hide_strings()
+ return
+
resolved_arguments = resolve_arguments(command_line_args)
show_arguments(resolved_arguments)
if (check_arguments(resolved_arguments)):
- mode = resolved_arguments[ArgumentKeyConstants.MODE]
- if mode == Modes.STRIP:
- strip_tree(
- resolved_arguments[ArgumentKeyConstants.INPUT],
- resolved_arguments[ArgumentKeyConstants.OUTPUT])
- elif mode == Modes.RESTORE:
- restore_tree(
- resolved_arguments[ArgumentKeyConstants.INPUT],
- resolved_arguments[ArgumentKeyConstants.ENGLISH],
- resolved_arguments[ArgumentKeyConstants.OUTPUT])
- else:
- tidyup_translations(resolved_arguments)
+ tidyup_translations(resolved_arguments)
diff --git a/nttt/_version.py b/nttt/_version.py
index a4cb70e..a42cda7 100644
--- a/nttt/_version.py
+++ b/nttt/_version.py
@@ -1,2 +1,2 @@
# The approach is taken from https://packaging.python.org/guides/single-sourcing-package-version/
-__version__ = "0.5.3"
+__version__ = "0.6.0"
diff --git a/nttt/arguments.py b/nttt/arguments.py
index 6e2ca09..b8091ef 100644
--- a/nttt/arguments.py
+++ b/nttt/arguments.py
@@ -1,4 +1,4 @@
-from .constants import ArgumentKeyConstants, Modes
+from .constants import ArgumentKeyConstants
import os
from pathlib import Path
from argparse import ArgumentParser
@@ -51,22 +51,24 @@ def parse_command_line(version):
parser.add_argument("-l", "--language", help="The language of the content to be tidied up, defaults to basename(INPUT).")
parser.add_argument("-v", "--volunteers", help="The list of volunteers as a comma separated list, defaults to an empty list.")
parser.add_argument("-f", "--final", help="The number of the final step file, defaults to the step file with the highest number.")
- parser.add_argument("-m", "--mode", choices=[Modes.TIDY, Modes.STRIP, Modes.RESTORE],
- help="The processing mode. Options are: tidy (default cleanup), "
- "strip (remove non-translatable structural markers before Crowdin upload), "
- "restore (restore stripped structural markers after Crowdin download). "
- "Default is tidy.")
parser.add_argument("-D", "--Disable", help="The risky features to be disabled, separated by commas. "
"Options are: fix_md (fix common markdown-related issues), "
"fix_html (fix common issues in HTML-like tags (Return)), "
"fix_sections (fix common issues in section tags (--- hint ---)), "
"revert_section_translation (revert translation for section tags), "
+ "fix_alerts (fix common issues in RFM alert tags (> [!HINT])), "
+ "revert_alert_translation (revert translation for RFM alert tags), "
"fix_formatting (fix common issues in formatting tags ({:class=\"block3motion\"})). "
"Defaults to all risky features to be enabled.")
parser.add_argument("-L", "--Logging", help="Logging of modifications. Options are on and off. Default is off.")
parser.add_argument("-Y", "--Yes", help="Automatic yes to prompts. "
"If enabled assume 'yes' as answer to all prompts and run non-interactively. "
"Options are on and off. Default is off.")
+ parser.add_argument("--hide-strings", action="store_true",
+ help="Hide-strings mode. Reads 'crowdin string list --verbose' "
+ "output on stdin and prints the IDs of strings to hide "
+ "(those containing a marker from markers.yml), one per line. "
+ "Does not tidy up any files.")
return parser.parse_args()
@@ -125,11 +127,6 @@ def resolve_arguments(command_line_args):
else:
arguments[ArgumentKeyConstants.YES] = "off"
- if hasattr(command_line_args, "mode") and command_line_args.mode:
- arguments[ArgumentKeyConstants.MODE] = command_line_args.mode
- else:
- arguments[ArgumentKeyConstants.MODE] = Modes.TIDY
-
return arguments
@@ -148,7 +145,6 @@ def show_arguments(arguments):
print("Disabled functions - '{}'".format(arguments[ArgumentKeyConstants.DISABLE]))
print("Logging - '{}'".format(arguments[ArgumentKeyConstants.LOGGING]))
print("Yes - '{}'".format(arguments[ArgumentKeyConstants.YES]))
- print("Mode - '{}'".format(arguments[ArgumentKeyConstants.MODE]))
def check_folder(folder):
diff --git a/nttt/cleanup_alerts.py b/nttt/cleanup_alerts.py
new file mode 100644
index 0000000..bece627
--- /dev/null
+++ b/nttt/cleanup_alerts.py
@@ -0,0 +1,86 @@
+"""
+Clean-up for Raspberry Flavoured Markdown (RFM) blockquote alerts, e.g.
+
+ > [!TASK]
+ > [!HINT]
+ > [!ACCORDION] Downloading the software
+
+This mirrors ``cleanup_sections.py`` (which handles the legacy ``--- task ---``
+syntax). Both run on every file, so a file may freely mix the two syntaxes.
+"""
+import re
+import sys
+from .nttt_logging import log_replacement
+
+
+# Matches a blockquote alert header line. Tolerates:
+# * a missing/extra space after '>' ( ">[!TASK]" )
+# * spaces inside the brackets ( "> [! TASK ]" )
+# * a Crowdin backslash escape ( "> \[!TASK]" )
+# * nested blockquote levels ( "> > [!HINT]" )
+# Captures the blockquote prefix, the keyword, and any trailing title text
+# (the title is translatable, e.g. for ACCORDION, so it is preserved).
+_ALERT_HEADER_RE = re.compile(
+ r"^(?P[ \t]*(?:>[ \t]*)+)\\?\[!\s*(?P[^\]\r\n]+?)\s*\](?P[ \t]*[^\r\n]*)$",
+ re.MULTILINE,
+)
+
+
+def _normalise_prefix(prefix):
+ """Collapse a blockquote prefix to one space after each '>' ("> > ")."""
+ levels = prefix.count(">")
+ return "> " * levels
+
+
+def _format_alert(prefix, keyword, title):
+ new_prefix = _normalise_prefix(prefix)
+ title = title.strip()
+ new_title = (" " + title) if title else ""
+ return f"{new_prefix}[!{keyword}]{new_title}"
+
+
+def fix_alerts(md_file_content, logging):
+ """Normalise RFM alert header spacing/case (e.g. ">[! task ]" -> "> [!TASK]")."""
+
+ def replacement(matchobj):
+ keyword = matchobj.group("kw").strip().upper()
+ new_line = _format_alert(matchobj.group("prefix"), keyword, matchobj.group("title"))
+ log_replacement(matchobj.group(0), new_line, logging)
+ return new_line
+
+ return _ALERT_HEADER_RE.sub(replacement, md_file_content)
+
+
+def revert_alert_translation(md_file_name, md_file_content, en_file_content, logging):
+ """
+ Reverts translated alert keywords back to English (e.g. "> [!TAREA]" ->
+ "> [!TASK]") by position against the English file, keeping any translated
+ title text. Only runs when the alert counts match, mirroring
+ ``revert_section_translation``.
+ """
+ md_lines = md_file_content.split("\n")
+ en_lines = en_file_content.split("\n")
+
+ md_indices = [i for i, line in enumerate(md_lines) if _ALERT_HEADER_RE.match(line)]
+ en_keywords = [
+ _ALERT_HEADER_RE.match(line).group("kw").strip().upper()
+ for line in en_lines
+ if _ALERT_HEADER_RE.match(line)
+ ]
+
+ if len(md_indices) != len(en_keywords):
+ print(
+ "Warning ({}): Different alert structure in the original (en) and the "
+ "translated pages. Reverting of translated alert keywords will not be "
+ "performed".format(md_file_name),
+ file=sys.stderr,
+ )
+ return md_file_content
+
+ for position, line_index in enumerate(md_indices):
+ match = _ALERT_HEADER_RE.match(md_lines[line_index])
+ new_line = _format_alert(match.group("prefix"), en_keywords[position], match.group("title"))
+ log_replacement(md_lines[line_index], new_line, logging)
+ md_lines[line_index] = new_line
+
+ return "\n".join(md_lines)
diff --git a/nttt/constants.py b/nttt/constants.py
index 1b08b17..ce14cee 100644
--- a/nttt/constants.py
+++ b/nttt/constants.py
@@ -17,13 +17,6 @@ class ArgumentKeyConstants:
DISABLE = 'DISABLE'
LOGGING = 'LOGGING'
YES = 'YES'
- MODE = 'MODE'
-
-
-class Modes:
- TIDY = "tidy"
- STRIP = "strip"
- RESTORE = "restore"
class RegexConstants:
diff --git a/nttt/hide_strings.py b/nttt/hide_strings.py
new file mode 100644
index 0000000..d310a3f
--- /dev/null
+++ b/nttt/hide_strings.py
@@ -0,0 +1,72 @@
+"""
+Generates the list of Crowdin string IDs that should be hidden from translators.
+
+Reads the output of ``crowdin string list --verbose`` (on stdin) and prints, one
+per line, the numeric ID of every string whose source text contains a marker
+listed in the registry (see ``markers.py`` / ``markers.yml``). This replaces the
+hand-written grep/awk/sed pipeline that used to live in ``hide-strings.yml`` and
+covers both the legacy and RFM syntaxes.
+
+Typical use in CI:
+
+ crowdin string list --verbose | nttt --hide-strings > ids.txt
+ while read -r id; do crowdin string edit "$id" --hidden; done < ids.txt
+"""
+import re
+import sys
+from .markers import hideable_strings
+
+
+# The verbose listing puts the string ID first, e.g. "#12345 source text ...".
+_ID_RE = re.compile(r"^#?(\d+)\b")
+
+
+def find_hidden_strings(string_list_text, markers=None):
+ """
+ Returns a list of dicts ``{"id", "marker", "source"}`` for each line of the
+ Crowdin listing whose source text contains a hideable marker.
+ """
+ markers = hideable_strings() if markers is None else markers
+ results = []
+
+ for line in string_list_text.splitlines():
+ matched = next((marker for marker in markers if marker in line), None)
+ if matched is None:
+ continue
+
+ tokens = line.split()
+ if not tokens:
+ continue
+ id_match = _ID_RE.match(tokens[0])
+ if id_match:
+ results.append({"id": id_match.group(1), "marker": matched, "source": line.strip()})
+
+ return results
+
+
+def unique_ids(results):
+ """Returns the IDs from ``find_hidden_strings`` de-duplicated, order preserved."""
+ seen = set()
+ ids = []
+ for result in results:
+ if result["id"] not in seen:
+ seen.add(result["id"])
+ ids.append(result["id"])
+ return ids
+
+
+def format_report(results):
+ """Human-readable 'id <- matched marker <- source' report, for inspection."""
+ return "\n".join(
+ "{id}\t{marker}\t{source}".format(**result) for result in results
+ )
+
+
+def run(input_stream=None, output_stream=None):
+ """Reads a Crowdin listing from ``input_stream`` and prints IDs to hide."""
+ input_stream = input_stream if input_stream is not None else sys.stdin
+ output_stream = output_stream if output_stream is not None else sys.stdout
+
+ results = find_hidden_strings(input_stream.read())
+ for string_id in unique_ids(results):
+ print(string_id, file=output_stream)
diff --git a/nttt/markers.py b/nttt/markers.py
index 81e5366..a07a40e 100644
--- a/nttt/markers.py
+++ b/nttt/markers.py
@@ -1,96 +1,99 @@
+"""
+Loads and exposes the marker registry (``markers.yml``).
+
+This is the single source of truth for the structural markers used in both the
+legacy (kramdown-rpf) and the Raspberry Flavoured Markdown (RFM) syntaxes. Other
+modules ask this module *what* the markers are; the actual list lives in the data
+file so it can be edited without touching Python.
+"""
+import os
import re
-
-
-LINE_KIND_BARE_MARKER = "bare"
-LINE_KIND_LABELLED_MARKER = "labelled"
-LINE_KIND_PAIRED_EMPTY_BLOCKQUOTE = "paired_empty_blockquote"
-LINE_KIND_REGULAR = "regular"
-
-
-RFM_BARE_MARKER_PATTERN = re.compile(
- r'^(?P\s*(?:>\s*)+)\[!(?P[A-Z][A-Z0-9_-]*)\]\s*$'
-)
-
-RFM_LABELLED_MARKER_PATTERN = re.compile(
- r'^(?P\s*(?:>\s*)+)\[!(?P[A-Z][A-Z0-9_-]*)\]\s+(?P