From 1e36d673e7d2dd7a24b6cc41bffa8463eb82f272 Mon Sep 17 00:00:00 2001 From: Stanislav Pankevich Date: Thu, 12 Jun 2025 21:21:40 +0200 Subject: [PATCH] Add boilerplate for automated testing of syntax grammar --- .github/workflows/ci-linux-ubuntu-latest.yml | 38 ++++++++ .gitignore | 6 ++ package.json | 17 +++- parse_syntax.js | 46 +++++++++ requirements.txt | 3 + tasks.py | 95 +++++++++++++++++++ tests/integration/lit.cfg.py | 24 +++++ .../syntax/01_basic_document_node/sample.sdoc | 2 + .../syntax/01_basic_document_node/test.itest | 7 ++ 9 files changed, 233 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/ci-linux-ubuntu-latest.yml create mode 100644 parse_syntax.js create mode 100644 requirements.txt create mode 100644 tasks.py create mode 100644 tests/integration/lit.cfg.py create mode 100644 tests/integration/syntax/01_basic_document_node/sample.sdoc create mode 100644 tests/integration/syntax/01_basic_document_node/test.itest diff --git a/.github/workflows/ci-linux-ubuntu-latest.yml b/.github/workflows/ci-linux-ubuntu-latest.yml new file mode 100644 index 0000000..3156386 --- /dev/null +++ b/.github/workflows/ci-linux-ubuntu-latest.yml @@ -0,0 +1,38 @@ +name: "StrictDoc.tmLanguage on Linux" + +on: + pull_request: + branches: [ "**" ] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: 3.12 + + - name: Upgrade pip + run: | + python -m pip install --upgrade pip + + - name: Install minimal Python packages + run: | + pip install -r requirements.txt + + - name: Install Node packages + run: | + npm install + + - name: Run tests + run: | + invoke test diff --git a/.gitignore b/.gitignore index f414e7b..f771ea3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,10 @@ .idea node_modules +package-lock.json *.vsix /_* + +# tests/integration +.lit_test_times.txt +**/Output/** + diff --git a/package.json b/package.json index 3d70163..2bcdb77 100644 --- a/package.json +++ b/package.json @@ -33,10 +33,17 @@ "configuration": "./language-configuration.json" } ], - "grammars": [{ - "language": "sdoc", - "scopeName": "source.sdoc", - "path": "./syntaxes/sdoc.tmLanguage.json" - }] + "grammars": [ + { + "language": "sdoc", + "scopeName": "source.sdoc", + "path": "./syntaxes/sdoc.tmLanguage.json" + } + ] + }, + "dependencies": { + "onigasm": "^2.2.5", + "vscode-textmate": "^9.2.0", + "vscode-oniguruma": "^1.5.1" } } diff --git a/parse_syntax.js b/parse_syntax.js new file mode 100644 index 0000000..4adf460 --- /dev/null +++ b/parse_syntax.js @@ -0,0 +1,46 @@ +const fs = require('fs'); +const path = require('path'); +const vsctm = require('vscode-textmate'); +const oniguruma = require('vscode-oniguruma'); + +const wasmBin = fs.readFileSync(path.join(__dirname, './node_modules/vscode-oniguruma/release/onig.wasm')).buffer; +const vscodeOnigurumaLib = oniguruma.loadWASM(wasmBin).then(() => { + return { + createOnigScanner(patterns) { return new oniguruma.OnigScanner(patterns); }, + createOnigString(s) { return new oniguruma.OnigString(s); } + }; +}); + +const scopeName = "source.sdoc"; +const grammarPath = path.join(__dirname, "syntaxes/sdoc.tmLanguage.json"); +const filePath = process.argv[2]; +if (!fs.existsSync(filePath)) { + throw('File does NOT exist'); +} + +// Create a registry that can create a grammar from a scope name. +const registry = new vsctm.Registry({ + onigLib: vscodeOnigurumaLib, + loadGrammar: (scope) => { + if (scope === scopeName) { + const grammarData = fs.readFileSync(grammarPath, 'utf-8'); + return Promise.resolve(vsctm.parseRawGrammar(grammarData, grammarPath)); + } + return null; + } +}); + +registry.loadGrammar(scopeName).then(grammar => { + const lines = fs.readFileSync(filePath, 'utf-8').split(/\r?\n/); + let ruleStack = vsctm.INITIAL; + + lines.forEach((line, lineIndex) => { + const lineTokens = grammar.tokenizeLine(line, ruleStack); + ruleStack = lineTokens.ruleStack; + + lineTokens.tokens.forEach(token => { + const tokenText = line.slice(token.startIndex, token.endIndex); + console.log(`[${lineIndex + 1}:${token.startIndex}-${token.endIndex}] "${tokenText}" → ${token.scopes.join(' ')}`); + }); + }); + }); diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ae7d6d2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +invoke +lit +filecheck>=0.0.20,<1.0.0 diff --git a/tasks.py b/tasks.py new file mode 100644 index 0000000..048f065 --- /dev/null +++ b/tasks.py @@ -0,0 +1,95 @@ +# Invoke is broken on Python 3.11 +# https://github.com/pyinvoke/invoke/issues/833#issuecomment-1293148106 +import inspect +import os +import re +import shutil +import sys +import tempfile +from enum import Enum +from pathlib import Path +from typing import Dict, Optional + +if not hasattr(inspect, "getargspec"): + inspect.getargspec = inspect.getfullargspec + +import invoke +from invoke import task + +# Specifying encoding because Windows crashes otherwise when running Invoke +# tasks below: +# UnicodeEncodeError: 'charmap' codec can't encode character '\ufffd' +# in position 16: character maps to +# People say, it might also be possible to export PYTHONIOENCODING=utf8 but this +# seems to work. +# FIXME: If you are a Windows user and expert, please advise on how to do this +# properly. +sys.stdout = open(1, "w", encoding="utf-8", closefd=False, buffering=1) + + +def run_invoke( + context, + cmd, + environment: Optional[dict] = None, + pty: bool = False, + warn: bool = False, +) -> invoke.runners.Result: + def one_line_command(string): + return re.sub("\\s+", " ", string).strip() + + return context.run( + one_line_command(cmd), + env=environment, + hide=False, + warn=warn, + pty=pty, + echo=True, + ) + + +@task() +def test( + context, + focus=None, + debug=False, + no_parallelization=False, + fail_first=False, +): + clean_itest_artifacts(context) + + cwd = os.getcwd() + + parse_syntax_script = f'node \\"{cwd}/parse_syntax.js\\"' + + debug_opts = "-vv --show-all" if debug else "" + focus_or_none = f"--filter {focus}" if focus else "" + fail_first_argument = "--max-failures 1" if fail_first else "" + parallelize_opts = "" if not no_parallelization else "--threads 1" + test_folder = f"{cwd}/tests/integration" + + itest_command = f""" + lit + --param PARSE_SYNTAX_EXEC="{parse_syntax_script}" + -v + {debug_opts} + {focus_or_none} + {fail_first_argument} + {parallelize_opts} + {test_folder} + """ + run_invoke( + context, + itest_command, + ) + +@task +def clean_itest_artifacts(context): + # The command sometimes exits with 1 even if the files are deleted. + # warn=True ensures that the execution continues. + run_invoke( + context, + """ + git clean -dX --force --quiet tests/integration/ + """, + warn=True, + ) diff --git a/tests/integration/lit.cfg.py b/tests/integration/lit.cfg.py new file mode 100644 index 0000000..10ad041 --- /dev/null +++ b/tests/integration/lit.cfg.py @@ -0,0 +1,24 @@ +# ruff: noqa: F821 + +import os +import sys +from typing import Any + +import lit.formats + +config: Any +lit_config: Any + +config.name = "StrictDoc integration tests" +config.test_format = lit.formats.ShTest("0") +config.suffixes = [".itest"] + +current_dir = os.getcwd() + +parse_syntax_exec = lit_config.params["PARSE_SYNTAX_EXEC"] + +# NOTE: All substitutions work for the RUN: statements but they don't for CHECK:. +# That's how LLVM LIT works. +config.substitutions.append(("%THIS_TEST_FOLDER", '$(basename "%S")')) + +config.substitutions.append(("%parse_syntax", parse_syntax_exec)) diff --git a/tests/integration/syntax/01_basic_document_node/sample.sdoc b/tests/integration/syntax/01_basic_document_node/sample.sdoc new file mode 100644 index 0000000..045d5d4 --- /dev/null +++ b/tests/integration/syntax/01_basic_document_node/sample.sdoc @@ -0,0 +1,2 @@ +[DOCUMENT] +TITLE: Document Title diff --git a/tests/integration/syntax/01_basic_document_node/test.itest b/tests/integration/syntax/01_basic_document_node/test.itest new file mode 100644 index 0000000..8adf68e --- /dev/null +++ b/tests/integration/syntax/01_basic_document_node/test.itest @@ -0,0 +1,7 @@ +RUN: %parse_syntax %S/sample.sdoc | filecheck %s + +CHECK: [1:0-10] "[DOCUMENT]" → source.sdoc keyword.sdoc +CHECK: [2:0-5] "TITLE" → source.sdoc keyword.control.sdoc keyword.control.sdoc +CHECK: [2:5-7] ": " → source.sdoc keyword.control.sdoc +CHECK: [2:7-22] "Document Title" → source.sdoc keyword.control.sdoc string.sdoc +CHECK: [3:0-1] "" → source.sdoc