Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
499 changes: 281 additions & 218 deletions chardesc.tsv

Large diffs are not rendered by default.

Empty file modified chardescupdate
100644 → 100755
Empty file.
117 changes: 109 additions & 8 deletions checktrans
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,8 +1,109 @@
#!/bin/bash
#Transcription checking script:
# Transcribes checktrans-cases.ssml, compares result with checktrans-expected-results.txt, prints found differences if any.
pushd $(dirname $0) >/dev/null
../../../local/bin/RHVoice-transcribe-sentences checktrans-cases.ssml /dev/stdout |
diff -b -u0 checktrans-expected-results.txt /dev/stdin &&
echo "All $(wc -l <checktrans-expected-results.txt) tests passed." &&
popd >/dev/null
#!/usr/bin/env python3

import subprocess
import sys
from pathlib import Path
from xml.sax.saxutils import escape


def normalize_transcription(text: str) -> str:
text = text.strip()
if text.startswith("pau "):
text = text[4:]
if text.endswith(" pau"):
text = text[:-4]
return " ".join(text.split())


def split_test_line(line: str) -> tuple[str, str]:
quote = None
for index, char in enumerate(line):
if quote is None:
if char in ("'", '"'):
quote = char
elif char == "=":
return line[:index], line[index + 1 :]
elif char == quote:
quote = None
if quote is not None:
raise ValueError("unterminated quoted string")
raise ValueError("missing '=' separator")


def parse_test_field(field: str) -> str:
stripped = field.strip()
if not stripped:
return ""
if stripped[0] in ("'", '"'):
quote = stripped[0]
if len(stripped) < 2 or stripped[-1] != quote:
raise ValueError("unterminated quoted string")
inner = stripped[1:-1]
if quote in inner:
raise ValueError(f"unexpected {quote} inside quoted string")
return inner
return stripped


def load_tests(path: Path) -> dict[str, str]:
tests: dict[str, str] = {}
for line_number, raw_line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1):
stripped = raw_line.strip()
if not stripped or stripped.startswith(("#", ";")):
continue
try:
raw_case, raw_expected = split_test_line(raw_line)
case = parse_test_field(raw_case)
expected = parse_test_field(raw_expected)
except ValueError as exc:
raise ValueError(f"{path}:{line_number}: {exc}") from exc
tests[case] = expected
return tests


def main() -> int:
script_dir = Path(__file__).resolve().parent
tests_path = script_dir / "checktrans-tests.ini"
transcriber = script_dir / "../../../local/bin/RHVoice-transcribe-sentences"

try:
tests = load_tests(tests_path)
except ValueError as exc:
print(exc, file=sys.stderr)
return 1

ssml_lines = ['<speak xml:lang="pl">']
ssml_lines.extend(f"<s>{escape(case)}</s>" for case in tests)
ssml_lines.append("</speak>")
ssml_input = "\n".join(ssml_lines) + "\n"

result = subprocess.run(
[str(transcriber), "/dev/stdin", "/dev/stdout"],
cwd=script_dir,
input=ssml_input,
text=True,
capture_output=True,
)
if result.returncode != 0:
if result.stderr:
sys.stderr.write(result.stderr)
return result.returncode

expected = [normalize_transcription(tests[case]) for case in tests]
actual = [normalize_transcription(line) for line in result.stdout.splitlines()]

if actual == expected:
print(f"All {len(expected)} tests passed.")
return 0

cases = list(tests)
for case, expected_text, actual_text in zip(cases, expected, actual):
if expected_text != actual_text:
print(f"Failed: {case}")
print(f'Expected: "{expected_text}"')
print(f'Got: "{actual_text}"')
return 1


if __name__ == "__main__":
raise SystemExit(main())
280 changes: 0 additions & 280 deletions checktrans-cases.ssml

This file was deleted.

Loading