runtimeverification · anvacaru · Dec 4, 2025 · Dec 4, 2025 · Dec 4, 2025 · Dec 4, 2025
diff --git a/kevm-pyk/src/kevm_pyk/__main__.py b/kevm-pyk/src/kevm_pyk/__main__.py
@@ -12,6 +12,7 @@
 from dataclasses import dataclass
 from functools import cached_property
 from pathlib import Path
+from subprocess import CalledProcessError
 from typing import TYPE_CHECKING
 
 from filelock import SoftFileLock
@@ -602,8 +603,8 @@ def exec_run(options: RunOptions) -> None:
         if options.gst_name:
             json_read = {options.gst_name: json_read[options.gst_name]}
         kore_pattern_list = [
-            (name, kore)
-            for (name, kore) in iterate_gst(
+            (name, kore, exception_metadata)
+            for (name, kore, exception_metadata) in iterate_gst(
                 json_read, options.mode, options.chainid, options.usegas, schedule=options.schedule
             )
         ]
@@ -617,21 +618,31 @@ def exec_run(options: RunOptions) -> None:
                 kore_pgm_to_kore(
                     kore_pgm, SORT_ETHEREUM_SIMULATION, options.schedule, options.mode, options.chainid, options.usegas
                 ),
+                (False, False),
             ),
         ]
 
-    for name, kore_pattern in kore_pattern_list:
+    for name, kore_pattern, (exception_expected, _) in kore_pattern_list:
         if name:
             _LOGGER.info(f'Processing test - {name}')
-        kevm.run(
-            kore_pattern,
-            depth=options.depth,
-            term=True,
-            expand_macros=options.expand_macros,
-            output=options.output,
-            check=True,
-            debugger=options.debugger,
-        )
+        if exception_expected:
+            _LOGGER.info(f'Test {name} is expected to fail.')
+        try:
+            kevm.run(
+                kore_pattern,
+                depth=options.depth,
+                term=True,
+                expand_macros=options.expand_macros,
+                output=options.output,
+                check=True,
+                debugger=options.debugger,
+            )
+        except CalledProcessError:
+            if exception_expected:
+                _LOGGER.info(f'Test {name} failed as expected')
+                continue
+            else:
+                raise
 
 
 def exec_kast(options: KastOptions) -> None:
@@ -646,8 +657,8 @@ def exec_kast(options: KastOptions) -> None:
         if options.gst_name:
             json_read = {options.gst_name: json_read[options.gst_name]}
         kore_pattern_list = [
-            (name, kore)
-            for (name, kore) in iterate_gst(
+            (name, kore, exception_metadata)
+            for (name, kore, exception_metadata) in iterate_gst(
                 json_read, options.mode, options.chainid, options.usegas, schedule=options.schedule
             )
         ]
@@ -661,10 +672,11 @@ def exec_kast(options: KastOptions) -> None:
                 kore_pgm_to_kore(
                     kore_pgm, SORT_ETHEREUM_SIMULATION, options.schedule, options.mode, options.chainid, options.usegas
                 ),
+                (False, False),
             ),
         ]
 
-    for name, kore_pattern in kore_pattern_list:
+    for name, kore_pattern, _ in kore_pattern_list:
         if name:
             _LOGGER.info(f'Processing test - {name}')
         output_text = kore_print(kore_pattern, definition_dir=kevm.definition_dir, output=options.output)

diff --git a/kevm-pyk/src/kevm_pyk/gst_to_kore.py b/kevm-pyk/src/kevm_pyk/gst_to_kore.py
@@ -59,6 +59,7 @@
         'transactionSequence',
         'chainname',
         'lastblockhash',
+        'expectException',
         'hasBigInt',
         'config',
         'network',

diff --git a/kevm-pyk/src/kevm_pyk/interpreter.py b/kevm-pyk/src/kevm_pyk/interpreter.py
@@ -22,14 +22,27 @@ def iterate_gst(
     usegas: bool,
     skipped_keys: frozenset[str] = frozenset(),
     schedule: str | None = None,
-) -> Iterator[tuple[str, App]]:
-    """Yield (test_name, kore_pattern) for each test in GST data after filtering discarded keys."""
+) -> Iterator[tuple[str, App, tuple[bool, bool]]]:
+    """Yield (test_name, kore_pattern, metadata) for each test in GST data."""
     for test_name, test in gst_data.items():
         if test_name in skipped_keys:
             continue
+        exception_metadata = _resolve_exception(test)
         test_schedule = _resolve_schedule(test, schedule)
         gst_filtered = {test_name: filter_gst_keys(test)}
-        yield test_name, gst_to_kore(gst_filtered, test_schedule, mode, chainid, usegas)
+        yield test_name, gst_to_kore(gst_filtered, test_schedule, mode, chainid, usegas), exception_metadata
+
+
+def _resolve_exception(test: dict) -> tuple[bool, bool]:
+    """Parse the 'blocks' field of a GST test and check if 'expectException' and 'hasBigInt' fields are present."""
+    exception_expected = False
+    has_big_int = False
+    for block in test.get('blocks', []):
+        exception_expected = exception_expected or 'expectException' in block
+        has_big_int = has_big_int or 'hasBigInt' in block
+        if exception_expected and has_big_int:
+            break
+    return (exception_expected, has_big_int)
 
 
 def _resolve_schedule(test: dict, fallback: str | None) -> str:

diff --git a/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/driver.md b/kevm-pyk/src/kevm_pyk/kproj/evm-semantics/driver.md
@@ -358,28 +358,11 @@ Processing SetCode Transaction Authority Entries
     rule <k> #addAuthority(AUTHORITY, _CID, NONCE, _ADDR) => .K ... </k> requires notBool (#accountExists(AUTHORITY) orBool #asWord(NONCE) ==Int 0)
 ```
 
--   `exception` only clears from the `<k>` cell if there is an exception preceding it.
--   `failure_` holds the name of a test that failed if a test does fail.
 -   `success` sets the `<exit-code>` to `0` and the `<mode>` to `SUCCESS`.
 
 ```k
     syntax Mode ::= "SUCCESS"
  // -------------------------
-
-    syntax EthereumCommand ::= "exception" | "status" StatusCode
- // ------------------------------------------------------------
-    rule <statusCode> _:ExceptionalStatusCode </statusCode>
-         <k> #halt ~> exception => .K ... </k>
-
-    rule <statusCode> _:ExceptionalStatusCode </statusCode>
-         <k> exception ~> check _ => exception ... </k>
-
-    rule <statusCode> _:ExceptionalStatusCode </statusCode>
-         <k> exception ~> run TEST => run TEST ~> exception ... </k>
-
-    rule <statusCode> _:ExceptionalStatusCode </statusCode>
-         <k> exception ~> clear => clear ... </k>
-
     syntax EthereumCommand ::= "success"
  // ------------------------------------
     rule <k> success => .K ... </k>
@@ -396,17 +379,7 @@ Processing SetCode Transaction Authority Entries
     syntax EthereumCommand ::= "run" JSON
 // --------------------------------------
    rule <k> run { .JSONs } => .K ... </k>
-   rule <k> run { TESTID : { TEST:JSONs } } => run { TEST }
-         ~> #if #hasPost?( { TEST } ) #then .K #else exception #fi
-         ~> clear
-        ...
-        </k>
-     requires notBool TESTID in (#loadKeys #execKeys #checkKeys)
-
-    syntax Bool ::= "#hasPost?" "(" JSON ")" [function]
- // ---------------------------------------------------
-    rule #hasPost? ({ .JSONs }) => false
-    rule #hasPost? ({ (KEY:String) : _ , REST }) => (KEY in #postKeys) orBool #hasPost? ({ REST })
+   rule <k> run { TESTID : { TEST:JSONs } } => run { TEST } ~> clear ... </k> requires notBool TESTID in (#loadKeys #execKeys #checkKeys)
 ```
 
 -   `#loadKeys` are all the JSON nodes which should be considered as loads before execution.
@@ -435,15 +408,6 @@ Processing SetCode Transaction Authority Entries
 
     syntax EthereumCommand ::= "process" JSON
  // -----------------------------------------
-    rule <k> process { "expectException" : _ , REST } => exception ~> process { REST } ... </k>
-
-    rule <k> exception ~> process { "rlp_decoded" : { KEY : VAL , REST1 => REST1 }, (REST2 => KEY : VAL , REST2 ) } ... </k>
-    rule <k> exception ~> process { "rlp_decoded" : { .JSONs } , REST   => REST}                                    ... </k>
-
-    rule <k> exception ~> process { KEY : VAL , REST } => load KEY : VAL ~> exception ~> process { REST }             ... </k> requires KEY in #loadKeys
-    rule <k> exception ~> process { KEY : VAL , REST } => exception ~> process { REST } ~> check {KEY : VAL} ... </k> requires KEY in #checkKeys
-    rule <k> exception ~> process { .JSONs }           => #startBlock ~> startTx ~> exception ... </k>
-
     rule <k> process { "rlp_decoded" : { KEY : VAL , REST1 => REST1 }, (REST2 => KEY : VAL , REST2 ) } ... </k>
     rule <k> process { "rlp_decoded" : { .JSONs } , REST => REST}                                      ... </k>
 
@@ -479,7 +443,7 @@ Processing SetCode Transaction Authority Entries
     syntax Set ::= "#postKeys" [function] | "#allPostKeys" [function] | "#checkKeys" [function]
  // -------------------------------------------------------------------------------------------
     rule #postKeys    => ( SetItem("post") SetItem("postState") SetItem("postStateHash") )
-    rule #allPostKeys => ( #postKeys SetItem("expect") SetItem("export") SetItem("expet") )
+    rule #allPostKeys => ( #postKeys SetItem("expect") SetItem("export") )
     rule #checkKeys   => ( #allPostKeys SetItem("logs") SetItem("out") SetItem("gas")
                            SetItem("blockHeader") SetItem("transactions") SetItem("uncleHeaders")
                            SetItem("genesisBlockHeader") SetItem("withdrawals") SetItem("blocknumber")

diff --git a/kevm-pyk/src/tests/integration/test_run.py b/kevm-pyk/src/tests/integration/test_run.py
@@ -32,7 +32,7 @@ def test_run(gst_file: Path, update_expected_output: bool) -> None:
 
     actual = ''
     # When
-    for _, init_kore in iterate_gst(gst_data, 'NORMAL', 1, True):
+    for _, init_kore, _ in iterate_gst(gst_data, 'NORMAL', 1, True):
         pattern = interpret(init_kore, check=False)
         actual += kore_print(pattern, definition_dir=kdist.get('evm-semantics.llvm'), output=PrintOutput.PRETTY)
 

diff --git a/kevm-pyk/src/tests/unit/test_gst_to_kore.py b/kevm-pyk/src/tests/unit/test_gst_to_kore.py
@@ -38,7 +38,7 @@ def test_gst_to_kore(gst_path: str, expected_path: str, update_expected_output:
     expected_file = REPO_ROOT / expected_path
 
     # When
-    actuals = [kore for _, kore in iterate_gst(gst_data, 'NORMAL', 1, True)]
+    actuals = [kore for _, kore, _ in iterate_gst(gst_data, 'NORMAL', 1, True)]
 
     # Then
     if update_expected_output:

diff --git a/kevm-pyk/src/tests/utils.py b/kevm-pyk/src/tests/utils.py
@@ -7,9 +7,9 @@
 from typing import TYPE_CHECKING
 
 import pytest
-from pyk.kdist import kdist
+from pyk.kdist._kdist import kdist
 from pyk.kore.prelude import int_dv
-from pyk.kore.syntax import App
+from pyk.kore.syntax import App, SortApp, SymbolId
 from pyk.kore.tools import PrintOutput, kore_print
 
 from kevm_pyk.interpreter import interpret, iterate_gst
@@ -27,8 +27,11 @@
 REPO_ROOT: Final = Path(__file__).parents[3].resolve(strict=True)
 GOLDEN: Final = (REPO_ROOT / 'tests/templates/output-success-llvm.json').read_text().rstrip()
 
+DOT_STATUS_CODE: Final = App(SymbolId("Lbl'Stop'StatusCode'Unds'NETWORK'Unds'StatusCode"))
+SORT_EXCEPTIONAL_STATUS_CODE: Final = SortApp('SortExceptionalStatusCode')
 
-def _assert_exit_code_zero(pattern: Pattern) -> None:
+
+def _assert_exit_code_zero(pattern: Pattern, exception_expected: bool = False) -> None:
     assert type(pattern) is App
     kevm_cell = pattern.args[0]
     assert type(kevm_cell) is App
@@ -38,23 +41,46 @@ def _assert_exit_code_zero(pattern: Pattern) -> None:
     exit_code = exit_code_cell.args[0]
     if exit_code == int_dv(0):
         return
-
+    elif exception_expected:
+        _assert_exit_code_exception(kevm_cell)
+        return
     pretty = kore_print(pattern, definition_dir=kdist.get('evm-semantics.llvm'), output=PrintOutput.PRETTY)
     assert pretty == GOLDEN
 
 
-def _skipped_tests(test_dir: Path, slow_tests_file: Path, failing_tests_file: Path) -> dict[Path, frozenset[str]]:
+def _assert_exit_code_exception(kevm_cell: App) -> None:
+    """Assert that the status code in a kevm_cell has the ExceptionalStatusCode sort."""
+    status_code = _fetch_status_code(kevm_cell)
+    # Some tests that are expected to fail might get stuck somewhere not related to the exception.
+    # This assert should catch any false negatives
+    assert status_code != DOT_STATUS_CODE
+    status_code_sort = status_code.sorts[0]
+    assert status_code_sort == SORT_EXCEPTIONAL_STATUS_CODE
+
+
+def _fetch_status_code(kevm_cell: App) -> App:
+    """Return the value of the "<statusCode>" cell in a kevm_cell:App."""
+    # <statusCode> is nested under <kevm><ethereum><evm>
+    ethereum_cell = kevm_cell.args[5]  # type: ignore[attr-defined]
+    evm_cell = ethereum_cell.args[0]  # type: ignore[attr-defined]
+    status_code_cell = evm_cell.args[1]  # type: ignore[attr-defined]
+    status_code = status_code_cell.args[0]  # type: ignore[attr-defined]
+    assert type(status_code) is App
+    return status_code
+
+
+def _skipped_tests(test_dir: Path, slow_tests_file: Path, failing_tests_file: Path) -> dict[Path, list[str]]:
     try:
         slow_tests = read_csv_file(slow_tests_file)
     except FileNotFoundError as e:
         _LOGGER.warning(e)
         slow_tests = ()
     failing_tests = read_csv_file(failing_tests_file)
-    skipped: dict[Path, set[str]] = {}
+    skipped: dict[Path, list[str]] = {}
     for test_file, test in slow_tests + failing_tests:
         test_file = test_dir / test_file
-        skipped.setdefault(test_file, set()).add(test)
-    return {k: frozenset(v) for k, v in skipped.items()}
+        skipped.setdefault(test_file, []).append(test)
+    return skipped
 
 
 def read_csv_file(csv_file: Path) -> tuple[tuple[Path, str], ...]:
@@ -70,35 +96,45 @@ def _test(
     usegas: bool,
     save_failing: bool,
     compute_chain_id: Callable[[str], int],
-    skipped_tests: dict[Path, frozenset[str]],
+    skipped_tests: dict[Path, list[str]],
     test_dir: Path,
     failing_tests_file: Path,
 ) -> None:
-    skipped_gst_tests = skipped_tests.get(gst_file, frozenset())
+    skipped_gst_tests = skipped_tests.get(gst_file, [])
     if '*' in skipped_gst_tests:
         pytest.skip()
 
     gst_file_relative_path: Final[str] = str(gst_file.relative_to(test_dir))
 
+    chain_id = compute_chain_id(gst_file_relative_path)
+
     with gst_file.open() as f:
         gst_data = json.load(f)
 
     failing_tests: list[str] = []
 
-    chain_id = compute_chain_id(gst_file_relative_path)
-
-    for test_name, init_kore in iterate_gst(gst_data, mode, chain_id, usegas, skipped_gst_tests):
+    for test_name, init_kore, (exception_expected, has_big_int) in iterate_gst(
+        gst_data, mode, chain_id, usegas, frozenset(skipped_gst_tests)
+    ):
         _LOGGER.info(f'Running test: {gst_file} - {test_name}')
-        res = interpret(init_kore, check=False)
 
         try:
-            _assert_exit_code_zero(res)
+            res = interpret(init_kore, check=False)
+        except RuntimeError:
+            if not has_big_int:
+                if not save_failing:
+                    raise
+                failing_tests.append(test_name)
+            continue
+
+        try:
+            _assert_exit_code_zero(res, exception_expected)
         except AssertionError:
             if not save_failing:
                 raise
             failing_tests.append(test_name)
 
-    if len(failing_tests) == 0:
+    if not failing_tests:
         return
 
     with failing_tests_file.open('a', newline='') as ff: