diff --git a/.github/workflows/noneeditable.yml b/.github/workflows/noneeditable.yml index 574b350b..8e1f1d81 100644 --- a/.github/workflows/noneeditable.yml +++ b/.github/workflows/noneeditable.yml @@ -10,12 +10,11 @@ on: branches: [ main ] jobs: - build: - + build-ubuntu: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} + - name: Set up Python 3.11 uses: actions/setup-python@v4 with: python-version: '3.11' @@ -40,3 +39,26 @@ jobs: - name: Test with pytest run: | python -m pytest tests/test_cli.py # Cli test are here enough + build-windows: + runs-on: windows-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade cython + python -m pip install --upgrade setuptools + python -m pip install flake8 + # https://github.com/pypa/pip/issues/12030#issuecomment-1546344047 + python -m pip install wheel + pip install -r requirements.txt + python -m pip list + python -m pip install '.[test]' + - name: Test with pytest + run: | + python -m pytest tests/test_cli.py # Cli test are here enough diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f6258c7f..1226edb2 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -10,15 +10,13 @@ on: branches: [ main ] jobs: - build: - + test-ubuntu: runs-on: ubuntu-latest strategy: fail-fast: false matrix: # Remember to update "classifiers" in setup.py when changing Python version - python-version: [3.8, 3.9, '3.10', '3.11', '3.12'] - + python-version: [3.9, '3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -34,16 +32,44 @@ jobs: python -m pip install flake8 # https://github.com/pypa/pip/issues/12030#issuecomment-1546344047 python -m pip install wheel - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install -r requirements.txt python -m pip install pytest-cov codecov python -m pip list python -m pip install -e '.[all]' - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - python -m flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - python -m flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | python -m pytest + test-windows: + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + # Remember to update "classifiers" in setup.py when changing Python version + python-version: ['3.12'] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install Perl + uses: shogo82148/actions-setup-perl@v1 + with: + perl-version: "5.38" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade cython + python -m pip install --upgrade setuptools + python -m pip install flake8 + # https://github.com/pypa/pip/issues/12030#issuecomment-1546344047 + python -m pip install wheel + pip install -r requirements.txt + python -m pip install pytest-cov codecov + python -m pip list + python -m pip install -e '.[all]' + - name: Test with pytest + run: | + # Ignore doctests on Windows. They are hard to guard w.r.t. file paths + python -m pytest tests diff --git a/meeteval/viz/__main__.py b/meeteval/viz/__main__.py index 34d2e917..946ff275 100644 --- a/meeteval/viz/__main__.py +++ b/meeteval/viz/__main__.py @@ -112,7 +112,7 @@ def load_per_reco_file(alignment, f): # Make the save_path relative to the index.html file such that # the links work when moving the folder av.data['save_path'] = str(save_name) - av.data['absolute_path'] = save_path.absolute() + av.data['absolute_path'] = str(save_path.absolute()) avs.append(av.data) dump_overview_table(avs, out / 'index.html') @@ -224,7 +224,15 @@ def resolve_system_names(avs): shutil.copy(av['absolute_path'], f_new) av['absolute_path'] = f_new - av['save_path'] = os.path.relpath(av['absolute_path'], out.parent) + try: + av['save_path'] = str(os.path.relpath(av['absolute_path'], out.parent)) + except ValueError: + # Fails on windows when the files are located on different mount + # points. Use the absolute path then. The relative path is meant for + # scenarios where the folders are copied or moved to another + # location. This breaks with absolute paths! + av['save_path'] = str(av['absolute_path']) + av['absolute_path'] = str(av['absolute_path']) out = Path(out) out.parent.mkdir(parents=True, exist_ok=True) diff --git a/meeteval/viz/overview_table.py b/meeteval/viz/overview_table.py index 836e4f32..3fd309d1 100644 --- a/meeteval/viz/overview_table.py +++ b/meeteval/viz/overview_table.py @@ -62,8 +62,10 @@ def get_average_wer(data): f',\n{indent}]' ) html = (Path(__file__).parent / 'overview_table.html').read_text() - import re + + # Escape backslashes for windows paths. re.escape escapes too much + html_data = html_data.replace('\\', '\\\\') html, n = re.subn(f'// DATA START((.|\n)*)// DATA END', f'const data = {html_data};', html) assert n == 1, (html, n) return html diff --git a/meeteval/viz/visualize.py b/meeteval/viz/visualize.py index dede1487..ecc1e257 100644 --- a/meeteval/viz/visualize.py +++ b/meeteval/viz/visualize.py @@ -81,7 +81,9 @@ def nested_round(obj): if isinstance(path, io.IOBase): simplejson.dump(obj, path, indent=indent, - sort_keys=sort_keys, **kwargs) + sort_keys=sort_keys, + for_json=True, + **kwargs) elif isinstance(path, (str, Path)): path = Path(path).expanduser() diff --git a/setup.py b/setup.py index c140e073..9f5d6f85 100644 --- a/setup.py +++ b/setup.py @@ -1,46 +1,55 @@ +import sys from distutils.extension import Extension import numpy from setuptools import setup, find_packages from Cython.Build import cythonize + +is_windows = sys.platform.startswith('win') + +if is_windows: + cythonize_args = dict( + extra_compile_args=['/std:c++20', '/O2'], + extra_link_args=['/std:c++20'], + ) +else: + cythonize_args = dict( + extra_compile_args=['-std=c++11', '-O3'], + extra_link_args=['-std=c++11'], + ) + ext_modules = cythonize( [ Extension( 'meeteval.wer.matching.cy_orc_matching', ['meeteval/wer/matching/cy_orc_matching.pyx'], - extra_compile_args=['-std=c++11'], - extra_link_args=['-std=c++11'], + **cythonize_args, ), Extension( 'meeteval.wer.matching.cy_mimo_matching', ['meeteval/wer/matching/cy_mimo_matching.pyx'], - extra_compile_args=['-std=c++11'], - extra_link_args=['-std=c++11'], + **cythonize_args, ), Extension( 'meeteval.wer.matching.cy_levenshtein', ['meeteval/wer/matching/cy_levenshtein.pyx'], - extra_compile_args=['-std=c++11'], - extra_link_args=['-std=c++11'], + **cythonize_args, ), Extension( 'meeteval.wer.matching.cy_time_constrained_orc_matching', ['meeteval/wer/matching/cy_time_constrained_orc_matching.pyx'], - extra_compile_args=['-std=c++11', '-O3'], - extra_link_args=['-std=c++11'], + **cythonize_args, ), Extension( 'meeteval.wer.matching.cy_greedy_combination_matching', ['meeteval/wer/matching/cy_greedy_combination_matching.pyx'], - extra_compile_args=['-std=c++11', '-O3'], - extra_link_args=['-std=c++11'], + **cythonize_args, ), Extension( 'meeteval.wer.matching.cy_time_constrained_mimo_matching', ['meeteval/wer/matching/cy_time_constrained_mimo_matching.pyx'], - extra_compile_args=['-std=c++11'], - extra_link_args=['-std=c++11'], + **cythonize_args, ), ] ) @@ -53,7 +62,6 @@ 'aiohttp', 'soundfile', 'tqdm', # Used in meeteval.viz.__main__.py - 'yattag', # Used in meeteval.viz.__main__.py 'platformdirs', # Used in meeteval.viz.visualize.py ] extras_require['test'] = [ diff --git a/tests/test_cli.py b/tests/test_cli.py index 3f5506b2..04725f01 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,3 +1,4 @@ +import sys import subprocess from pathlib import Path import shutil @@ -5,8 +6,14 @@ example_files = (Path(__file__).parent.parent / 'example_files').absolute() +on_windows = sys.platform.startswith('win') def run(cmd, cwd=example_files): + + # Translate file paths for windows + if on_windows: + cmd = cmd.replace('/', '\\') + cp = subprocess.run( cmd, shell=True, @@ -15,7 +22,7 @@ def run(cmd, cwd=example_files): check=False, universal_newlines=True, cwd=cwd, - executable='bash', # echo "<(cat hyp.stm)" requires bash not sh. + executable=None if on_windows else 'bash', # echo "<(cat hyp.stm)" requires bash not sh. ) if cp.returncode == 0: @@ -30,18 +37,10 @@ def run(cmd, cwd=example_files): f'\n\nstdout:\n{cp.stdout}' f'\n\nstderr:\n{cp.stderr}' ) - -def test_burn_orc(): - # Normal test with stm files - run(f'python -m meeteval.wer orcwer -h hyp.stm -r ref.stm') - # assert (example_files / 'hyp_orc.json').exists() - # assert (example_files / 'hyp_orc_per_reco.json').exists() - run(f'meeteval-wer orcwer -h hyp.stm -r ref.stm') - - # Multiple stm files - run(f"python -m meeteval.wer orcwer -h hypA.stm -h hypB.stm -r refA.stm -r refB.stm") - run(f"python -m meeteval.wer orcwer -h hyp.stm -h hypA.stm hypB.stm -r refA.stm refB.stm") +@pytest.mark.skipif(on_windows, reason='Bash features do not work on Windows.') +def test_bash(): + """Tests Bash-related features like piping and globbing.""" # Test with glob (backwards compatibility). Note: '?' and '*' are escaped. # Be careful, that the glob only matches the desired files. # The 'hyp*.stm' will here match 'hyp.stm', 'hypA.stm' and 'hypB.stm'. @@ -52,17 +51,10 @@ def test_burn_orc(): run(f"python -m meeteval.wer orcwer -h hyp*.stm -r ref*.stm") run(f"python -m meeteval.wer orcwer -h hyp?.stm -r ref?.stm") - # Test with ctm files - run(f'python -m meeteval.wer orcwer -h hyp1.ctm -h hyp2.ctm -r ref.stm') - run(f"python -m meeteval.wer orcwer -h 'hyp*.ctm' -r ref.stm") - run(f'python -m meeteval.wer orcwer -h hyp1.ctm -r ref.stm') + # Test path pattern completion + run("python -m meeteval.wer orcwer -h hyp*.stm -r ref*.stm --average-out {parent}/{stem}-average-out.yaml") - # Test output formats - run(f"python -m meeteval.wer orcwer -h hyp*.stm -r ref*.stm --average-out average-out.json") - # assert (example_files / 'average-out.json').exists() - run("python -m meeteval.wer orcwer -h hyp*.stm -r ref*.stm --average-out '{parent}/{stem}-average-out.yaml'") - # assert (example_files / 'hyp-average-out.yaml').exists() - # Output to stdout. Specifying the format requires = + # Test output to stdout. Specifying the format requires = run(f"python -m meeteval.wer orcwer -h hyp*.stm -r ref*.stm --average-out -") run(f"python -m meeteval.wer orcwer -h hyp*.stm -r ref*.stm --average-out=-.yaml") run(f"python -m meeteval.wer orcwer -h hyp*.stm -r ref*.stm --average-out=-.json") @@ -70,6 +62,23 @@ def test_burn_orc(): # Test with pipes. Makes "--average-out" file and "--per-reco-out" file # mandatory. run(f'python -m meeteval.wer orcwer -h <(cat hypA.stm hypB.stm) -r <(cat refA.stm refB.stm) --average-out hyp_orc.json --per-reco-out hyp_orc_per_reco.json') + + # Test output formats + run(f"python -m meeteval.wer orcwer -h hyp*.stm -r ref*.stm --average-out average-out.json") + +def test_burn_orc(): + # Normal test with stm files + run(f'python -m meeteval.wer orcwer -h hyp.stm -r ref.stm') + run(f'meeteval-wer orcwer -h hyp.stm -r ref.stm') + + # Multiple stm files + run(f"python -m meeteval.wer orcwer -h hypA.stm -h hypB.stm -r refA.stm -r refB.stm") + run(f"python -m meeteval.wer orcwer -h hyp.stm -h hypA.stm hypB.stm -r refA.stm refB.stm") + + # Test with ctm files + run(f'python -m meeteval.wer orcwer -h hyp1.ctm -h hyp2.ctm -r ref.stm') + run(f"python -m meeteval.wer orcwer -h hyp1.ctm -h hyp2.ctm -r ref.stm") + run(f'python -m meeteval.wer orcwer -h hyp1.ctm -r ref.stm') # Test with files in SegLST format run(f'python -m meeteval.wer orcwer -h hyp.seglst.json -r ref.seglst.json') @@ -105,7 +114,6 @@ def test_burn_greedy_ditcp(): def test_burn_mimo(): run(f'python -m meeteval.wer mimower -h hyp.stm -r ref.stm') - run(f"python -m meeteval.wer mimower -h 'hyp?.stm' -r 'ref?.stm'") run(f'python -m meeteval.wer mimower -h hyp.seglst.json -r ref.seglst.json') run('python -m meeteval.wer mimower -h hyp.stm -r ref.stm --reference-sort "segment" --hypothesis-sort "false"') @@ -116,25 +124,27 @@ def test_burn_tcmimo(): run(f'python -m meeteval.wer tcmimower -h hyp.seglst.json -r ref.seglst.json --collar 5') run(f'python -m meeteval.wer tcmimower -h hyp.stm -r ref.stm --hypothesis-sort true --collar 5') - def test_burn_cp(): run(f'python -m meeteval.wer cpwer -h hyp.stm -r ref.stm') - run(f"python -m meeteval.wer cpwer -h 'hyp?.stm' -r 'ref?.stm'") run(f'python -m meeteval.wer cpwer -h hyp.seglst.json -r ref.seglst.json') run('python -m meeteval.wer cpwer -h hyp.stm -r ref.stm --reference-sort "segment" --hypothesis-sort "false"') + + # Test UEM file run(f'python -m meeteval.wer cpwer -h hyp.stm -r ref.stm --uem uem.uem') def test_burn_tcp(): - # run(f'python -m meeteval.wer tcpwer -h hyp.stm -r ref.stm') # Mar 2025: Disabled, because default collar=0 is too a too special case to be default run(f'python -m meeteval.wer tcpwer -h hyp.stm -r ref.stm --collar 5') run(f'python -m meeteval.wer tcpwer -h hyp.stm -r ref.stm --hyp-pseudo-word-timing equidistant_points --collar 5') run(f'python -m meeteval.wer tcpwer -h hyp.seglst.json -r ref.seglst.json --collar 5') run(f'python -m meeteval.wer tcpwer -h hyp.stm -r ref.stm --reference-sort word --hypothesis-sort true --collar 5') + # Test that the collar option is mandatory + with pytest.raises(Exception, match='.*the following arguments are required: --collar.*'): + run(f'python -m meeteval.wer tcpwer -h hyp.stm -r ref.stm') + def test_burn_tcorc(): - # run(f'python -m meeteval.wer tcorcwer -h hyp.stm -r ref.stm') # Mar 2025: Disabled, because default collar=0 is too a too special case to be default run(f'python -m meeteval.wer tcorcwer -h hyp.stm -r ref.stm --collar 5') run(f'python -m meeteval.wer tcorcwer -h hyp.stm -r ref.stm --hyp-pseudo-word-timing equidistant_points --collar 5') run(f'python -m meeteval.wer tcorcwer -h hyp.seglst.json -r ref.seglst.json --collar 5') @@ -142,7 +152,6 @@ def test_burn_tcorc(): def test_burn_greedy_tcorc(): - # run(f'python -m meeteval.wer greedy_tcorcwer -h hyp.stm -r ref.stm') # Mar 2025: Disabled, because default collar=0 is too a too special case to be default run(f'python -m meeteval.wer greedy_tcorcwer -h hyp.stm -r ref.stm --collar 5') run(f'python -m meeteval.wer greedy_tcorcwer -h hyp.stm -r ref.stm --hyp-pseudo-word-timing equidistant_points --collar 5') run(f'python -m meeteval.wer greedy_tcorcwer -h hyp.seglst.json -r ref.seglst.json --collar 5') @@ -216,7 +225,8 @@ def test_viz_html(): run(f'python -m meeteval.viz html -h hyp.stm -r ref.stm --out=viz') run(f'python -m meeteval.viz html -h hyp.stm -r ref.stm --alignment cp tcorc') - # Test loading a precomputed assignment +def test_viz_precomputed(): + """Test loading a precomputed assignment""" run(f'python -m meeteval.wer cpwer -h hyp.stm -r ref.stm --per-reco-out hyp_cpwer_per_reco.json') run(f'python -m meeteval.wer tcorcwer -h hyp.stm -r ref.stm --per-reco-out hyp_tcorcwer_per_reco.json --collar 5') run(f'meeteval-viz html -h hyp.stm -r ref.stm --alignment cp tcorc --per-reco-file hyp_cpwer_per_reco.json hyp_tcorcwer_per_reco.json') @@ -231,11 +241,14 @@ def test_normalize(tmpdir): run(f'python -m meeteval.wer normalize hyp.stm -o - --normalizer="lower,rm(.?!,)"') run(f'python -m meeteval.wer normalize hyp.stm -o {tmpdir / "hyp_normalized.stm"} --normalizer="lower,rm([^a-z0-9 ])"') +@pytest.mark.skipif(on_windows, reason='Piping is not supported on Windows') +def test_normalize_piping(tmpdir): # Test that chaining normalizer and wer scripts is equal to using the normalizer option on the script chained = run('python -m meeteval.wer cpwer -r <(python -m meeteval.wer normalize ref.stm -o - --normalizer "lower,rm(.?!,)") -h <(python -m meeteval.wer normalize hyp.stm -o - --normalizer "lower,rm(.?!,)") --average-out - --per-reco-out -') option = run('python -m meeteval.wer cpwer -r ref.stm -h hyp.stm --average-out - --per-reco-out - --normalizer "lower,rm(.?!,)"') assert chained.stdout == option.stdout assert chained.stderr == option.stderr +@pytest.mark.skipif(on_windows, reason='Piping is not supported on Windows') def test_pipe_cli_commands(): run('cat hyp.stm | python -m meeteval.wer normalize - -o - | python -m meeteval.io stm2seglst -o -') diff --git a/tests/test_di_cp.py b/tests/test_di_cp.py index 468ad0ee..ee5f118a 100644 --- a/tests/test_di_cp.py +++ b/tests/test_di_cp.py @@ -1,9 +1,10 @@ from hypothesis import given, strategies as st, assume, settings import pytest import meeteval +from meeteval.io import SegLST seglst = st.builds( - meeteval.io.SegLST, + SegLST, st.lists( st.builds( lambda **x: { diff --git a/tests/test_docs.py b/tests/test_docs.py index 1892d01b..82fbcaf2 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -3,6 +3,8 @@ if sys.version_info < (3, 11): pytest.skip(reason='algorithms.md requires Python 3.11+', allow_module_level=True) +if sys.platform.startswith('win'): + pytest.skip(reason='Does not run on Windows', allow_module_level=True) import re from pathlib import Path diff --git a/tests/test_io_converters.py b/tests/test_io_converters.py index 5838938c..bcdfbaeb 100644 --- a/tests/test_io_converters.py +++ b/tests/test_io_converters.py @@ -1,3 +1,4 @@ +import sys import itertools from pathlib import Path import pytest @@ -13,7 +14,14 @@ 'seglst': 'hyp.seglst.json', } -def run(cmd): +on_windows = sys.platform.startswith('win') + +def run(cmd, cwd=example_files): + + # Translate file paths for windows + if on_windows: + cmd = cmd.replace('/', '\\') + cp = subprocess.run( cmd, shell=True, @@ -21,8 +29,8 @@ def run(cmd): stderr=subprocess.PIPE, check=False, universal_newlines=True, - cwd=example_files, - executable='bash', # echo "<(cat hyp.stm)" requires bash not sh. + cwd=cwd, + executable=None if on_windows else 'bash', # echo "<(cat hyp.stm)" requires bash not sh. ) if cp.returncode == 0: @@ -62,6 +70,7 @@ def test_merge_ctm_speaker_arg(tmp_path): assert (tmp_path / "hyp.stm").exists() meeteval.io.load(tmp_path / "hyp.stm").to_seglst().unique('speaker') == {'spk-A'} +@pytest.mark.skipif(on_windows, reason='Piping does not work on Windows') def test_piping(tmp_path): run(f'cat {example_files / "hyp.stm"} | meeteval-io stm2rttm {tmp_path / "hyp.rttm"}') run(f'cat {example_files / "hyp.stm"} | meeteval-io stm2rttm -') @@ -92,6 +101,7 @@ def test_convert_file_exists(tmp_path): run(f'meeteval-io stm2rttm --force {example_files / "hyp.stm"} {tmp_path / "hyp.rttm"}') run(f'meeteval-io stm2rttm -f {example_files / "hyp.stm"} {tmp_path / "hyp.rttm"}') +@pytest.mark.skipif(on_windows, reason='Piping does not work on Windows') def test_ctm_piping(): run(f'cat {example_files / "hyp1.ctm"} | meeteval-io ctm2stm --speaker spk-A - > /dev/null') with pytest.raises(Exception, match='.*the following arguments are required: --speaker.*'): diff --git a/tests/test_viz.py b/tests/test_viz.py index efadcdce..59b2028d 100644 --- a/tests/test_viz.py +++ b/tests/test_viz.py @@ -99,7 +99,7 @@ def get_index_html_data(path): import re import yaml content = path.read_text() - data = re.search('data = (\[\n(.|\n)*\n\s*]);', content).groups(1)[0] + data = re.search('data = (\[\n(.|\n)*\n\s*\]);', content).groups(1)[0] data = yaml.safe_load(data) # JSON complains about trailing comma return data