From 4ab6dc58b3fbfe03ffc8449a49744e7edeb981f7 Mon Sep 17 00:00:00 2001 From: minglong51 <56749246+minglong51@users.noreply.github.com> Date: Thu, 12 Feb 2026 23:29:42 -0800 Subject: [PATCH 1/2] Add v0.1 skeleton: spec, parser/runtime, CLI, hello example, tests, CI --- .github/workflows/ci.yml | 27 ++++++ README.md | 13 +++ docs/grammar.ebnf | 23 +++++ docs/spec.md | 66 +++++++++++++ examples/hello.thread | 7 ++ pyproject.toml | 23 +++++ src/threadlang/__init__.py | 6 ++ src/threadlang/ast.py | 43 +++++++++ src/threadlang/cli.py | 47 ++++++++++ src/threadlang/parser.py | 185 +++++++++++++++++++++++++++++++++++++ src/threadlang/runtime.py | 54 +++++++++++ src/threadlang/trace.py | 10 ++ tests/test_golden_hello.py | 13 +++ 13 files changed, 517 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 docs/grammar.ebnf create mode 100644 docs/spec.md create mode 100644 examples/hello.thread create mode 100644 pyproject.toml create mode 100644 src/threadlang/__init__.py create mode 100644 src/threadlang/ast.py create mode 100644 src/threadlang/cli.py create mode 100644 src/threadlang/parser.py create mode 100644 src/threadlang/runtime.py create mode 100644 src/threadlang/trace.py create mode 100644 tests/test_golden_hello.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..4820e65 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,27 @@ +name: CI + +on: + push: + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.11"] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install package + run: python -m pip install -e . + + - name: Run tests + run: python -m pytest diff --git a/README.md b/README.md index a3d6231..c382bde 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,15 @@ # threadlang AI-native DSL for structured LLM workflows: context, constraints, steps, and typed outputs. + +## Run locally + +```bash +python -m pip install -e . +python -m pytest +``` + +## Hello example + +```bash +thread run examples/hello.thread --inputs name=world +``` diff --git a/docs/grammar.ebnf b/docs/grammar.ebnf new file mode 100644 index 0000000..af6e053 --- /dev/null +++ b/docs/grammar.ebnf @@ -0,0 +1,23 @@ +(* ThreadLang v0.1 minimal grammar *) + +program = "thread", IDENT, "{", { block }, "}" ; + +block = context_block + | emit_block + | placeholder_block ; + +context_block = "context", "{", { IDENT, "=", STRING }, "}" ; + +emit_block = "emit", "text", "{", expr, "}" ; + +placeholder_block = ("inputs" | "rules" | "steps"), "{", opaque, "}" ; + +expr = term, { "+", term } ; +term = STRING | variable_ref ; +variable_ref = ("context" | "inputs"), ".", IDENT ; + +IDENT = letter, { letter | digit | "_" } ; +STRING = '"', { character }, '"' ; + +(* Opaque placeholder content is accepted by the implementation using brace depth. *) +opaque = { ? any token with balanced braces ? } ; diff --git a/docs/spec.md b/docs/spec.md new file mode 100644 index 0000000..04a174c --- /dev/null +++ b/docs/spec.md @@ -0,0 +1,66 @@ +# ThreadLang v0.1 Specification (Skeleton) + +ThreadLang is a deterministic, traceable DSL for composing threaded prompts and outputs. +This document describes the **minimal v0.1 subset** implemented in this repository. + +## Design goals + +- **Deterministic parsing:** a source file has one unambiguous parse tree. +- **Deterministic runtime:** evaluation order and output are stable for the same source + inputs. +- **Traceability:** runtime emits structured events (`parse_ok`, `context_set`, `emit`). + +## Program shape + +A program starts with a single thread block: + +```threadlang +thread Name { + ...blocks... +} +``` + +Supported block families in v0.1: + +- `context { ... }` +- `inputs { ... }` *(placeholder in v0.1 parser; parsed as opaque block)* +- `rules { ... }` *(placeholder in v0.1 parser; parsed as opaque block)* +- `steps { ... }` *(placeholder in v0.1 parser; parsed as opaque block)* +- `emit text { ... }` + +## context block + +The context block contains string assignments: + +```threadlang +context { + greeting = "Hello" +} +``` + +These values are available via `context.` inside expressions. + +## emit block + +v0.1 supports only `emit text { }`. + +Expressions support: + +- String literals: `"hello"` +- Variable references: `context.key` and `inputs.key` +- Concatenation: ` + ` + +Example: + +```threadlang +emit text { context.greeting + ", " + inputs.name + "!" } +``` + +## Runtime behavior + +Given a parsed AST and an input dictionary, runtime: + +1. Emits `parse_ok`. +2. Applies context assignments in order and emits `context_set` for each. +3. Evaluates emit blocks in order, concatenates emitted `text`, emits `emit` events. + +Result: `{ output: string, trace: TraceEvent[] }`. diff --git a/examples/hello.thread b/examples/hello.thread new file mode 100644 index 0000000..89ba14d --- /dev/null +++ b/examples/hello.thread @@ -0,0 +1,7 @@ +thread HelloWorld { + context { + greeting = "Hello" + } + + emit text { context.greeting + ", " + inputs.name + "!" } +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5051da4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + +[project] +name = "threadlang" +version = "0.1.0" +description = "ThreadLang v0.1 interpreter skeleton" +readme = "README.md" +requires-python = ">=3.10" +authors = [{ name = "ThreadLang Contributors" }] + +[project.scripts] +thread = "threadlang.cli:main" + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/src/threadlang/__init__.py b/src/threadlang/__init__.py new file mode 100644 index 0000000..7e3ada7 --- /dev/null +++ b/src/threadlang/__init__.py @@ -0,0 +1,6 @@ +"""ThreadLang v0.1 package.""" + +from .parser import parse_program +from .runtime import run_program + +__all__ = ["parse_program", "run_program"] diff --git a/src/threadlang/ast.py b/src/threadlang/ast.py new file mode 100644 index 0000000..70b6fa4 --- /dev/null +++ b/src/threadlang/ast.py @@ -0,0 +1,43 @@ +"""AST nodes for the ThreadLang v0.1 grammar.""" + +from dataclasses import dataclass +from typing import List + + +@dataclass(frozen=True) +class Program: + thread_name: str + context: List["ContextAssign"] + emits: List["Emit"] + + +@dataclass(frozen=True) +class ContextAssign: + key: str + value: str + + +@dataclass(frozen=True) +class Emit: + target: str + expression: "Expr" + + +class Expr: + """Marker base class for expressions.""" + + +@dataclass(frozen=True) +class StringLiteral(Expr): + value: str + + +@dataclass(frozen=True) +class VariableRef(Expr): + scope: str + key: str + + +@dataclass(frozen=True) +class Concat(Expr): + parts: List[Expr] diff --git a/src/threadlang/cli.py b/src/threadlang/cli.py new file mode 100644 index 0000000..0006aaf --- /dev/null +++ b/src/threadlang/cli.py @@ -0,0 +1,47 @@ +"""Command-line interface for ThreadLang.""" + +from __future__ import annotations + +import argparse +from pathlib import Path +from typing import Dict + +from .parser import parse_program +from .runtime import run_program + + +def main() -> None: + parser = argparse.ArgumentParser(prog="thread") + subparsers = parser.add_subparsers(dest="command", required=True) + + run_parser = subparsers.add_parser("run", help="Run a .thread program") + run_parser.add_argument("file", type=Path, help="Path to .thread source file") + run_parser.add_argument( + "--inputs", + nargs="*", + default=[], + metavar="key=value", + help="Input bindings available via inputs.", + ) + + args = parser.parse_args() + + if args.command == "run": + source = args.file.read_text(encoding="utf-8") + program = parse_program(source) + result = run_program(program, inputs=_parse_inputs(args.inputs)) + print(result.output) + + +def _parse_inputs(items: list[str]) -> Dict[str, str]: + parsed: Dict[str, str] = {} + for item in items: + if "=" not in item: + raise ValueError(f"Input must be key=value, got: {item!r}") + key, value = item.split("=", 1) + parsed[key] = value + return parsed + + +if __name__ == "__main__": + main() diff --git a/src/threadlang/parser.py b/src/threadlang/parser.py new file mode 100644 index 0000000..9a8b5e1 --- /dev/null +++ b/src/threadlang/parser.py @@ -0,0 +1,185 @@ +"""Deterministic parser for the ThreadLang v0.1 subset.""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import List + +from .ast import Concat, ContextAssign, Emit, Program, StringLiteral, VariableRef + + +class ParseError(ValueError): + """Raised when source text does not match the v0.1 grammar.""" + + +_TOKEN_RE = re.compile( + r'\s*(?:(?P"[^"\\]*(?:\\.[^"\\]*)*")|' + r'(?P[A-Za-z_][A-Za-z0-9_]*)|' + r'(?P[{}.=+]))', +) + + +@dataclass +class Token: + kind: str + value: str + + +class TokenStream: + def __init__(self, source: str) -> None: + self.tokens = self._tokenize(source) + self.pos = 0 + + def _tokenize(self, source: str) -> List[Token]: + tokens: List[Token] = [] + idx = 0 + while idx < len(source): + match = _TOKEN_RE.match(source, idx) + if not match: + if source[idx:].strip() == "": + break + snippet = source[idx : idx + 20] + raise ParseError(f"Unexpected token near: {snippet!r}") + idx = match.end() + kind = "" + value = "" + for cand in ("STRING", "IDENT", "SYMBOL"): + group = match.group(cand) + if group is not None: + kind = cand + value = group + break + if kind: + tokens.append(Token(kind, value)) + return tokens + + def peek(self) -> Token | None: + if self.pos >= len(self.tokens): + return None + return self.tokens[self.pos] + + def expect_symbol(self, symbol: str) -> None: + token = self._next() + if token.kind != "SYMBOL" or token.value != symbol: + raise ParseError(f"Expected symbol {symbol!r}, got {token.value!r}") + + def expect_ident(self, value: str | None = None) -> str: + token = self._next() + if token.kind != "IDENT": + raise ParseError(f"Expected identifier, got {token.value!r}") + if value is not None and token.value != value: + raise ParseError(f"Expected identifier {value!r}, got {token.value!r}") + return token.value + + def expect_string(self) -> str: + token = self._next() + if token.kind != "STRING": + raise ParseError(f"Expected string literal, got {token.value!r}") + raw = token.value[1:-1] + return bytes(raw, "utf-8").decode("unicode_escape") + + def _next(self) -> Token: + token = self.peek() + if token is None: + raise ParseError("Unexpected end of input") + self.pos += 1 + return token + + +def parse_program(source: str) -> Program: + stream = TokenStream(source) + stream.expect_ident("thread") + thread_name = stream.expect_ident() + stream.expect_symbol("{") + + context_items: List[ContextAssign] = [] + emits: List[Emit] = [] + + while True: + token = stream.peek() + if token is None: + raise ParseError("Unclosed thread block") + if token.kind == "SYMBOL" and token.value == "}": + stream.expect_symbol("}") + break + + keyword = stream.expect_ident() + if keyword == "context": + context_items.extend(_parse_context_block(stream)) + elif keyword == "emit": + emits.append(_parse_emit_block(stream)) + elif keyword in {"inputs", "rules", "steps"}: + _parse_placeholder_block(stream) + else: + raise ParseError(f"Unsupported block type: {keyword!r}") + + if stream.peek() is not None: + raise ParseError("Unexpected tokens after thread block") + + return Program(thread_name=thread_name, context=context_items, emits=emits) + + +def _parse_context_block(stream: TokenStream) -> List[ContextAssign]: + stream.expect_symbol("{") + assigns: List[ContextAssign] = [] + while True: + token = stream.peek() + if token is None: + raise ParseError("Unclosed context block") + if token.kind == "SYMBOL" and token.value == "}": + stream.expect_symbol("}") + return assigns + key = stream.expect_ident() + stream.expect_symbol("=") + value = stream.expect_string() + assigns.append(ContextAssign(key=key, value=value)) + + +def _parse_emit_block(stream: TokenStream) -> Emit: + target = stream.expect_ident() + stream.expect_symbol("{") + expr = _parse_expression(stream) + stream.expect_symbol("}") + return Emit(target=target, expression=expr) + + +def _parse_placeholder_block(stream: TokenStream) -> None: + stream.expect_symbol("{") + depth = 1 + while depth: + token = stream._next() + if token.kind == "SYMBOL" and token.value == "{": + depth += 1 + elif token.kind == "SYMBOL" and token.value == "}": + depth -= 1 + + +def _parse_expression(stream: TokenStream): + parts = [_parse_term(stream)] + while True: + token = stream.peek() + if token and token.kind == "SYMBOL" and token.value == "+": + stream.expect_symbol("+") + parts.append(_parse_term(stream)) + else: + break + if len(parts) == 1: + return parts[0] + return Concat(parts=parts) + + +def _parse_term(stream: TokenStream): + token = stream.peek() + if token is None: + raise ParseError("Expected expression term, found end of input") + + if token.kind == "STRING": + return StringLiteral(stream.expect_string()) + + scope = stream.expect_ident() + if scope not in {"context", "inputs"}: + raise ParseError(f"Unsupported variable scope: {scope!r}") + stream.expect_symbol(".") + key = stream.expect_ident() + return VariableRef(scope=scope, key=key) diff --git a/src/threadlang/runtime.py b/src/threadlang/runtime.py new file mode 100644 index 0000000..0b02e38 --- /dev/null +++ b/src/threadlang/runtime.py @@ -0,0 +1,54 @@ +"""Runtime evaluator for ThreadLang v0.1 AST.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Dict, List + +from .ast import Concat, Emit, Expr, Program, StringLiteral, VariableRef +from .trace import TraceEvent + + +@dataclass(frozen=True) +class RunResult: + output: str + trace: List[TraceEvent] + + +def run_program(program: Program, inputs: Dict[str, str] | None = None) -> RunResult: + inputs = inputs or {} + trace: List[TraceEvent] = [ + TraceEvent("parse_ok", {"thread": program.thread_name}), + ] + + context: Dict[str, str] = {} + for assign in program.context: + context[assign.key] = assign.value + trace.append(TraceEvent("context_set", {"key": assign.key, "value": assign.value})) + + output_parts: List[str] = [] + for emit in program.emits: + value = _eval_emit(emit, context=context, inputs=inputs) + if emit.target == "text": + output_parts.append(value) + trace.append(TraceEvent("emit", {"target": emit.target, "value": value})) + + return RunResult(output="".join(output_parts), trace=trace) + + +def _eval_emit(emit: Emit, context: Dict[str, str], inputs: Dict[str, str]) -> str: + return _eval_expr(emit.expression, context=context, inputs=inputs) + + +def _eval_expr(expr: Expr, context: Dict[str, str], inputs: Dict[str, str]) -> str: + if isinstance(expr, StringLiteral): + return expr.value + if isinstance(expr, VariableRef): + if expr.scope == "context": + return context.get(expr.key, "") + if expr.scope == "inputs": + return inputs.get(expr.key, "") + raise ValueError(f"Unsupported variable scope: {expr.scope}") + if isinstance(expr, Concat): + return "".join(_eval_expr(part, context=context, inputs=inputs) for part in expr.parts) + raise ValueError(f"Unsupported expression node: {type(expr).__name__}") diff --git a/src/threadlang/trace.py b/src/threadlang/trace.py new file mode 100644 index 0000000..9d16c68 --- /dev/null +++ b/src/threadlang/trace.py @@ -0,0 +1,10 @@ +"""Trace event structures used by parser/runtime flows.""" + +from dataclasses import dataclass, field +from typing import Any, Dict + + +@dataclass(frozen=True) +class TraceEvent: + name: str + data: Dict[str, Any] = field(default_factory=dict) diff --git a/tests/test_golden_hello.py b/tests/test_golden_hello.py new file mode 100644 index 0000000..c7907e5 --- /dev/null +++ b/tests/test_golden_hello.py @@ -0,0 +1,13 @@ +from pathlib import Path + +from threadlang.parser import parse_program +from threadlang.runtime import run_program + + +def test_golden_hello() -> None: + source = Path("examples/hello.thread").read_text(encoding="utf-8") + program = parse_program(source) + result = run_program(program, inputs={"name": "world"}) + + assert result.output == "Hello, world!" + assert [event.name for event in result.trace] == ["parse_ok", "context_set", "emit"] From 9d182a3f4e39467cbd81ca4790af36d1f07618a2 Mon Sep 17 00:00:00 2001 From: minglong51 <56749246+minglong51@users.noreply.github.com> Date: Thu, 12 Feb 2026 23:34:36 -0800 Subject: [PATCH 2/2] Fix CI by installing pytest before running tests --- .github/workflows/ci.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4820e65..ae898cc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,8 +20,10 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Install package - run: python -m pip install -e . + - name: Install package and test runner + run: | + python -m pip install --upgrade pip + python -m pip install -e . pytest - name: Run tests run: python -m pytest