Skip to content

Commit a2cb09f

Browse files
A lot.
Get a diff of spec.txt. That should tell you evetything you need to know. HINT: It involves a REPL
1 parent a5f03f0 commit a2cb09f

File tree

3 files changed

+243
-16
lines changed

3 files changed

+243
-16
lines changed

asmln.exe

8.02 MB
Binary file not shown.

asmln.py

Lines changed: 162 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import argparse
44
import json
55
import math
6+
import os
67
import sys
78
from dataclasses import dataclass
89
from typing import Any, Callable, Dict, Iterable, List, Optional
@@ -83,8 +84,11 @@ def tokenize(self) -> List[Token]:
8384
tokens.append(Token(SYMBOLS[ch],ch,self.line,self.column))
8485
self._advance()
8586
continue
87+
if(ch == "-"):
88+
tokens.append(self._consume_signed_number())
89+
continue
8690
if(ch in "01"):
87-
tokens.append(self._consume_number())
91+
tokens.append(self._consume_unsigned_number())
8892
continue
8993
if(self._is_identifier_start(ch)):
9094
tokens.append(self._consume_identifier())
@@ -95,13 +99,27 @@ def tokenize(self) -> List[Token]:
9599
def _consume_comment(self) -> None:
96100
while(not self._eof and self._peek() != "\n"):
97101
self._advance()
98-
def _consume_number(self) -> Token:
102+
def _consume_unsigned_number(self) -> Token:
103+
line, col = self.line, self.column
104+
digits = self._consume_binary_digits()
105+
return(Token("NUMBER",digits,line,col))
106+
107+
def _consume_signed_number(self) -> Token:
99108
line, col = self.line, self.column
109+
self._advance() # consume '-'
110+
while(not self._eof and self._peek() in " \t\r"):
111+
self._advance()
112+
if(self._eof or self._peek() not in "01"):
113+
raise ASMParseError(f"Expected binary digits after '-' at {self.filename}:{line}:{col}")
114+
digits = self._consume_binary_digits()
115+
return(Token("NUMBER","-" + digits,line,col))
116+
117+
def _consume_binary_digits(self) -> str:
100118
digits: List[str] = []
101119
while(not self._eof and self._peek() in "01"):
102120
digits.append(self._peek())
103121
self._advance()
104-
return(Token("NUMBER","".join(digits),line,col))
122+
return "".join(digits)
105123
def _consume_identifier(self) -> Token:
106124
line, col = self.line, self.column
107125
chars: List[str] = []
@@ -480,6 +498,8 @@ def __init__(self) -> None:
480498
self._register_variadic("LEN", 0, lambda vals: len(vals))
481499
self._register_fixed("LOG", 1, self._safe_log)
482500
self._register_fixed("CLOG", 1, self._safe_clog)
501+
self._register_custom("MAIN", 0, 0, self._main)
502+
self._register_custom("IMPORT", 1, 1, self._import)
483503
self._register_custom("INPUT", 0, 0, self._input)
484504
self._register_custom("PRINT", 0, None, self._print)
485505
self._register_custom("ASSERT", 1, 1, self._assert)
@@ -557,6 +577,49 @@ def _safe_clog(self, value: int) -> int:
557577
return value.bit_length() - 1
558578
return value.bit_length()
559579

580+
def _main(
581+
self,
582+
interpreter: "Interpreter",
583+
_: List[int],
584+
__: List[Expression],
585+
___: Environment,
586+
location: SourceLocation,
587+
) -> int:
588+
# Return 1 when the call originates from the primary program file, else 0.
589+
root = interpreter.entry_filename
590+
if root == "<string>":
591+
return 1 if location.file == "<string>" else 0
592+
return 1 if os.path.abspath(location.file) == root else 0
593+
594+
def _import(
595+
self,
596+
interpreter: "Interpreter",
597+
_: List[int],
598+
arg_nodes: List[Expression],
599+
env: Environment,
600+
location: SourceLocation,
601+
) -> int:
602+
if len(arg_nodes) != 1 or not isinstance(arg_nodes[0], Identifier):
603+
raise ASMRuntimeError("IMPORT expects module name identifier", location=location, rewrite_rule="IMPORT")
604+
605+
module_name = arg_nodes[0].name
606+
base_dir = os.getcwd() if location.file == "<string>" else os.path.dirname(os.path.abspath(location.file))
607+
module_path = os.path.join(base_dir, f"{module_name}.asmln")
608+
609+
try:
610+
with open(module_path, "r", encoding="utf-8") as handle:
611+
source_text = handle.read()
612+
except OSError as exc:
613+
raise ASMRuntimeError(f"Failed to import '{module_name}': {exc}", location=location, rewrite_rule="IMPORT")
614+
615+
lexer = Lexer(source_text, module_path)
616+
tokens = lexer.tokenize()
617+
parser = Parser(tokens, module_path, source_text.splitlines())
618+
program = parser.parse()
619+
620+
interpreter._execute_block(program.statements, env)
621+
return 0
622+
560623
def _slice(self, a: int, hi: int, lo: int) -> int:
561624
if hi < lo:
562625
raise ASMRuntimeError("SLICE: hi must be >= lo", rewrite_rule="SLICE")
@@ -645,7 +708,9 @@ def __init__(
645708
output_sink: Optional[Callable[[str], None]] = None,
646709
) -> None:
647710
self.source = source
648-
self.filename = filename
711+
normalized_filename = filename if filename == "<string>" else os.path.abspath(filename)
712+
self.filename = normalized_filename
713+
self.entry_filename = normalized_filename
649714
self.verbose = verbose
650715
self.input_provider = input_provider or (lambda: input(">>> "))
651716
self.output_sink = output_sink or (lambda text: print(text))
@@ -742,6 +807,17 @@ def _evaluate_expression(self, expression: Expression, env: Environment) -> int:
742807
return(result)
743808
raise
744809
if(isinstance(expression,CallExpression)):
810+
if(expression.name == "IMPORT"):
811+
module_label = expression.args[0].name if (expression.args and isinstance(expression.args[0], Identifier)) else None
812+
dummy_args:List[int] = [0] * len(expression.args)
813+
try:
814+
result:int = self.builtins.invoke(self,expression.name,dummy_args,expression.args,env,expression.location)
815+
except ASMRuntimeError:
816+
self._log_step(rule="IMPORT",location=expression.location,extra={"module": module_label,"status": "error"})
817+
raise
818+
self._log_step(rule="IMPORT",location=expression.location,extra={"module": module_label,"result": result})
819+
return(result)
820+
745821
args:List[int] = []
746822
for arg in expression.args:
747823
args.append(self._evaluate_expression(arg,env))
@@ -873,12 +949,92 @@ def to_json(self, error: ASMRuntimeError) -> str:
873949
}
874950
return json.dumps(data, indent=2)
875951
def run_cli(argv: Optional[List[str]] = None) -> int:
952+
def _parse_statements_from_source(text: str, filename: str) -> List[Statement]:
953+
lexer = Lexer(text, filename)
954+
tokens = lexer.tokenize()
955+
parser = Parser(tokens, filename, text.splitlines())
956+
program = parser.parse()
957+
return program.statements
958+
959+
def run_repl(verbose: bool) -> int:
960+
print("ASM-Lang REPL. Enter statements, blank line to run buffer.")
961+
interpreter = Interpreter(source="", filename="<repl>", verbose=verbose)
962+
global_env = Environment()
963+
global_frame = interpreter._new_frame("<repl>", global_env, None)
964+
interpreter.call_stack.append(global_frame)
965+
buffer: List[str] = []
966+
967+
while True:
968+
prompt = ">>> " if not buffer else "..> "
969+
try:
970+
line = input(prompt)
971+
except EOFError:
972+
print()
973+
break
974+
975+
stripped = line.strip()
976+
977+
# If buffer is empty, try to execute single-line statements immediately
978+
# (so EXIT() exits without needing a blank line). Buffer multi-line
979+
# constructs that start blocks (FUNC, IF, WHILE, FOR) or explicit
980+
# bracket starts.
981+
is_block_start = False
982+
if not buffer:
983+
uc = stripped.upper()
984+
if uc.startswith("FUNC") or uc.startswith("IF") or uc.startswith("WHILE") or uc.startswith("FOR"):
985+
is_block_start = True
986+
if stripped.endswith("[") or stripped.endswith("{"):
987+
is_block_start = True
988+
989+
if not buffer and stripped != "" and not is_block_start:
990+
# try execute this single line immediately
991+
try:
992+
statements = _parse_statements_from_source(line, "<repl>")
993+
try:
994+
interpreter._execute_block(statements, global_env)
995+
except ExitSignal as sig:
996+
return sig.code
997+
except ASMParseError as error:
998+
# If parse error, fall back to buffering the line to allow
999+
# multi-line input (user may be starting a block).
1000+
buffer.append(line)
1001+
continue
1002+
1003+
if stripped == "" and buffer:
1004+
source_text = "\n".join(buffer)
1005+
buffer.clear()
1006+
try:
1007+
statements = _parse_statements_from_source(source_text, "<repl>")
1008+
interpreter._execute_block(statements, global_env)
1009+
except ExitSignal as sig:
1010+
return sig.code
1011+
except ASMParseError as error:
1012+
print(f"ParseError: {error}", file=sys.stderr)
1013+
except ASMRuntimeError as error:
1014+
if interpreter.logger.entries:
1015+
error.step_index = interpreter.logger.entries[-1].step_index
1016+
formatter = TracebackFormatter(interpreter)
1017+
print(formatter.format_text(error, verbose=interpreter.verbose), file=sys.stderr)
1018+
interpreter.call_stack = [global_frame]
1019+
continue
1020+
1021+
buffer.append(line)
1022+
1023+
return 0
1024+
8761025
parser = argparse.ArgumentParser(description="ASM-Lang reference interpreter")
877-
parser.add_argument("program", help="Source file path or literal source with -source")
1026+
parser.add_argument("program", nargs="?", help="Source file path or literal source with -source")
8781027
parser.add_argument("-source", "--source", dest="source_mode", action="store_true", help="Treat program argument as literal source text")
8791028
parser.add_argument("-verbose", "--verbose", dest="verbose", action="store_true", help="Emit env snapshots in tracebacks")
8801029
parser.add_argument("--traceback-json", action="store_true", help="Also emit JSON traceback")
8811030
args = parser.parse_args(argv)
1031+
1032+
if args.program is None:
1033+
if args.source_mode:
1034+
print("-source requires a program string", file=sys.stderr)
1035+
return 1
1036+
return run_repl(verbose=args.verbose)
1037+
8821038
if args.source_mode:
8831039
source_text = args.program
8841040
filename = "<string>"
@@ -906,4 +1062,4 @@ def run_cli(argv: Optional[List[str]] = None) -> int:
9061062
return 1
9071063
return 0
9081064
if __name__ == "__main__":
909-
raise SystemExit(run_cli())
1065+
raise SystemExit(run_cli())

spec.txt

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,6 @@ state-transition (rewrite) function. All intermediate states are
1010
exactly, including I/O and nondeterministic choices, which are modeled
1111
explicitly for deterministic replay.
1212

13-
Note: Builds should not refer to this language by any proper name,
14-
as none has been specified yet. If a name is needed
15-
(e.g. for a filename), use ASM-Lang (Abstract State Machine Language) as a
16-
placeholder.
17-
18-
1913
1. Overview
2014
-----------
2115

@@ -58,6 +52,12 @@ function names (Section 2.5); keywords and built-ins (control-flow
5852
keywords, built-in operators and functions; see Sections 4 and 5); and
5953
delimiters, namely '(', ')', '[', ']', '{', '}', ',', and '='.
6054

55+
The character '-' is not an operator token. It is permitted only as the
56+
leading sign of a binary integer literal (Section 3.1). If '-' appears
57+
anywhere else, the lexer must raise a syntax error. When '-' starts a
58+
literal, any spaces, horizontal tabs, or carriage returns between the '-'
59+
and the first digit are ignored and the literal is treated as negative.
60+
6161
Identifiers denote variables and user-defined functions. They must be
6262
non-empty and case-sensitive. An identifier must not contain non-ASCII
6363
characters, nor any of the following characters: '{', '}', '[', ']', '(',
@@ -71,10 +71,13 @@ with the name of any built-in operator or function (see Section 4.1).
7171
-------------
7272

7373
The sole literal data type is the binary integer. A binary integer literal is
74-
a non-empty sequence of characters drawn from the set '{0,1}' (for example,
75-
'0', '1', '1011', '100110'). There is no syntactic sign ('+' or '-') for
76-
literals; sign, if modeled, is an implementation detail (such as two’s
77-
complement) and does not affect the syntax.
74+
either (1) an unsigned non-empty sequence of characters drawn from the set
75+
'{0,1}' (for example, '0', '1', '1011', '100110'), or (2) a signed literal
76+
formed by a leading '-' followed by optional whitespace (spaces, horizontal
77+
tabs, or carriage returns) and then a non-empty sequence of '{0,1}'. The '-'
78+
is part of the literal spelling and is not an operator; it may only appear in
79+
this leading position. A '-' that is not immediately introducing a literal is
80+
a syntax error.
7881

7982
Every runtime value is a mathematical integer. The language does not support
8083
fractional or real-valued numbers. All operations that would otherwise
@@ -174,6 +177,17 @@ position, and any return value is discarded. The effect on the external
174177
world is recorded in the execution log, ensuring that replay remains
175178
deterministic.
176179

180+
'IMPORT(name)' loads and executes another source file inside the caller's
181+
namespace. The argument must be an identifier naming a module; the
182+
interpreter looks for a file named '<name>.asmln' in the same directory as
183+
the current source file. When the current source is provided via the
184+
'-source' string literal mode, the search directory is the process's current
185+
working directory. The imported file is parsed and run as if its top-level
186+
statements appeared inline at the call site: assignments affect the caller's
187+
environment, and any functions defined in the imported module become
188+
available to the caller. Each 'IMPORT' call executes the target module anew;
189+
there is no automatic caching or cycle detection.
190+
177191
'INPUT()' (or simply 'INPUT' in expression position) reads a value from the
178192
console or input stream. If the incoming text is empty, the result is '0'.
179193
If the text consists solely of the characters '0' and '1', it is interpreted
@@ -185,6 +199,12 @@ log for deterministic replay.
185199
non-zero, execution proceeds normally; if 'a' is '0', the program crashes
186200
with an assertion failure.
187201

202+
'MAIN()' returns '1' when the call site belongs to the primary program file
203+
(the file passed as the interpreter's first argument, or '<string>' when
204+
'-source' is used). It returns '0' when executed from code that came from an
205+
'IMPORT' (including nested imports). The result is determined solely by the
206+
source file that contains the call expression, not by the caller's call stack.
207+
188208
Program termination is exposed via 'EXIT'. 'EXIT()' or 'EXIT(code)' requests
189209
immediate termination of the interpreter. If an integer 'code' is supplied,
190210
it is used as the interpreter's process exit code; otherwise '0' is used.
@@ -550,6 +570,54 @@ full `rewrite_record` describing the DIV operation that failed.
550570
- File mode: `interpreter program.asm`
551571
- Source-string mode: `interpreter -source "foo = INPUT\nPRINT(foo)" -verbose`
552572

573+
- REPL / Interactive mode: `interpreter` (no program argument)
574+
575+
REPL (Interactive Mode)
576+
-----------------------
577+
578+
When the interpreter is invoked without a program path or a `-source` string
579+
argument it enters an interactive read–eval–print loop (REPL). The REPL is a
580+
convenient development and exploration environment that executes ASM-Lang
581+
statements using the same parser, runtime, built-ins, and state-logging
582+
semantics as file-mode execution. The following rules describe REPL
583+
behaviour:
584+
585+
- Invocation: running `interpreter` with no positional `program` argument
586+
launches the REPL. The `-verbose` and `--traceback-json` flags keep the
587+
same meanings in the REPL as in batch mode.
588+
- Prompting and input: the REPL presents a primary prompt for new top-level
589+
input and a continuation prompt while the user is entering a multi-line
590+
block (for example, the body of `FUNC`, `IF`, `WHILE`, or `FOR`).
591+
- Single-line execution: when the user enters a single complete top-level
592+
statement (for example `x = 1010` or `PRINT(x)`), the REPL parses and
593+
executes that statement immediately and prints any side-effect output.
594+
This means `EXIT()` typed as a single-line statement will terminate the
595+
REPL immediately and return the supplied exit code (or `0` if omitted),
596+
identical to the behaviour when `EXIT()` is executed in a file.
597+
- Multi-line buffering: if a statement begins a block (for example a `FUNC`
598+
definition or an `IF(...)[` that spans multiple lines), the REPL buffers
599+
lines until the block is complete (a blank line may be used to indicate
600+
end-of-entry when appropriate). When the buffer is complete the REPL
601+
parses and executes the collected statements as a unit.
602+
- Environment persistence: variables, function definitions, and the state
603+
logger persist for the duration of the REPL session (that is, top-level
604+
bindings remain available across successive inputs until explicitly
605+
deleted with `DEL(name)`). This lets the user incrementally build up a
606+
program interactively.
607+
- Deterministic logging and tracebacks: all REPL-executed statements are
608+
recorded in the same state log format used for file-mode execution. Errors
609+
produce tracebacks in the same concise and verbose modes; the `-verbose`
610+
flag causes the REPL to attach `env_snapshot` entries in tracebacks when
611+
available.
612+
613+
Notes and examples:
614+
- Start REPL: `interpreter`
615+
- Exit via meta-command: type `.exit` or press Ctrl-D (EOF)
616+
- Exit programmatically: `EXIT()` — this immediately terminates the
617+
interpreter and returns the specified exit code to the shell, just like in
618+
batch execution.
619+
620+
553621
APIREF — Quick Reference
554622
------------------------
555623

@@ -561,8 +629,11 @@ Function / Operator Signatures (expression position)
561629
- 'INPUT()' or 'INPUT' : returns integer from input stream (0 for empty)
562630
- 'PRINT(a1, a2, ..., aN)' : prints arguments (side-effect), return value discarded
563631
- 'ASSERT(a)' : crashes if 'a' is 0
632+
- 'MAIN()' : 1 if executing in the primary program file, else 0 when in imported code
564633
- 'DEL(x)' : delete variable 'x' from environment
565634
- 'EXIT()' or 'EXIT(code)' : terminate program immediately with optional exit code (default 0)
635+
- 'IMPORT(name)' : load and execute '<name>.asmln' beside the caller, sharing the caller's namespace
636+
- Binary literal: optional leading '-' (spaces/tabs/CR allowed after the dash) then '{0,1}+'
566637

567638
Arithmetic
568639
- 'ADD(a, b)' : a + b

0 commit comments

Comments
 (0)