From 489343e143a94dbcbe9d18f0292464cdbaa1bc5a Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Fri, 19 Jun 2026 19:47:05 +0800
Subject: [PATCH] indent: un-overload YAML semantics off generic flags/literals
 (#44)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A non-YAML indentation grammar inherited three YAML behaviors derived
from flags/literals that mean something else, with no opt-out short of
mis-declaring the grammar. Detach each onto its own explicit, mode-neutral
IndentConfig field that defaults OFF; yaml.ts opts into each.

(A) Flow `:` key/value separator carve-out was derived from the `string`
    flag (`stringTokenNames`), silently enlisting every string-region token.
    New `flowSeparatorAfterTokens: string[]` names the membership explicitly
    (carve-out OFF when empty); `string: true` keeps its region-scoping /
    auto-close-derivation jobs without dragging a token into separator
    emission. PR #41's wholesale `flowColonSeparator` boolean is removed —
    an empty list is the neutral-off it provided, without re-overloading.

(B) Plain-scalar continuation folding was derived from `blockPattern`,
    giving YAML folding to any block-pattern token. New `foldTokens:
    string[]` names the fold participants explicitly (folding OFF when
    empty); the last-named token is the catch-all continuation type. A
    grammar can now carry a `blockPattern` token without inheriting the fold.

(C) `keyValueSeparator` was honored by gen-tm but the lexer hardcoded `:`
    (and `-`/`?`) in its key-line sniffs, a latent parser/highlighter split.
    Route every lexer key-separator sniff through `indent.keyValueSeparator`
    (via a shared `keySepAt` helper) and every compact-indicator sniff
    through `compactIndicators`, so the lexer and gen-tm share one source of
    truth for the separator for any value.

Deferred: (D) the §6.1 tab-in-indentation errors and the value/item-position
classification (seq-item `-` vs explicit-key `?`) still hardcode a few YAML
indicators; cleanly splitting them needs `startsBlockStructuralNode`'s
property/flow/alias indicator set parameterized — a larger sub-task, noted
in-code at each site.

yaml.ts opts in field-by-field (flowSeparatorAfterTokens + foldTokens) and
tokenizes byte-identically: `npm run gen` produces zero generated-file diff
across yaml + ts/js/jsx/tsx/html. test/indent-extensions.ts gains toy
non-YAML grammars proving each un-overload (a `string:true` token that keeps
its `:name` after values; a `blockPattern` token that does not fold; a
`keyValueSeparator:'='` grammar whose lexer treats `=` as structural).
---
 src/gen-lexer.ts          | 104 ++++++++++++++++----------
 src/types.ts              |  37 +++++++--
 test/indent-extensions.ts | 153 ++++++++++++++++++++++++++++++--------
 yaml.ts                   |   9 +++
 4 files changed, 224 insertions(+), 79 deletions(-)

diff --git a/src/gen-lexer.ts b/src/gen-lexer.ts
index 9aac269..7fc06ea 100644
--- a/src/gen-lexer.ts
+++ b/src/gen-lexer.ts
@@ -256,25 +256,28 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
   // exclusions, filtered ONCE here instead of re-tested per matcher per position.
   const scanMatchers = tokenMatchers.filter(tm =>
     tm.name !== templateTokenName && !markupTokenNames.has(tm.name) && !indentTokenNames.has(tm.name));
-  // String-literal token names (the `string`-flagged tokens — quoted scalars in YAML). Used by the
-  // flow mapping-separator guard below: a quoted scalar can never run past its closing quote, so a
-  // `:` immediately after one (inside flow) is ALWAYS the mapping `key: value` separator, never the
-  // start of a plain scalar — derived from the `string` flag, not a hardcoded token name.
-  const stringTokenNames = new Set(grammar.tokens.filter(t => t.string).map(t => t.name));
-  // Plain-scalar token names: the tokens carrying a block-context pattern variant (`blockPattern`).
-  // In YAML these are exactly the UNQUOTED scalar family (plain / key / number / boolean-null) — the
-  // ones whose flow-vs-block forms differ because flow indicators are content in block. Used by the
-  // flow multi-line-plain FOLD post-pass: a plain scalar folded across a flow-internal newline arrives
-  // as ADJACENT plain tokens (a space-separated plain is already one token; only a NEWLINE splits it),
-  // which the post-pass re-merges. Derived from `blockPattern`, not a hardcoded token name.
-  const plainScalarTokenNames = new Set(grammar.tokens.filter(t => tokenBlockPatternSource(t)).map(t => t.name));
-  // The generic (catch-all) plain-scalar token: the LAST-declared blockPattern token. Declaration
-  // order is specific-before-general (YAML: Key, Num, BoolNull, Plain — the typed/key shapes win
-  // earlier, so the broadest string-valued plain is necessarily last). Used as the type emitted for
-  // a folded plain-scalar CONTINUATION line — a more-indented line after a plain LEAF whose leading
-  // glyph (`-`/`&`/`!`/`[`/`?`/`*`) is plain CONTENT here, not structure (so it can't be lexed by
-  // the plain head pattern, which forbids those starts). Null when no blockPattern token exists.
-  const plainContinuationTokenName = [...grammar.tokens].reverse().find(t => tokenBlockPatternSource(t))?.name ?? null;
+  // Flow mapping-separator carve-out MEMBERSHIP (IndentConfig.flowSeparatorAfterTokens). A `:` glued
+  // (inside flow) right after one of these tokens is ALWAYS the mapping `key: value` separator, never
+  // the start of a `:`-led plain scalar — a quoted scalar / flow-close can never run past its closer.
+  // EXPLICIT list now (was derived from the `string` flag, which silently enlisted every string-region
+  // token); the carve-out is OFF when the list is absent. See the flow `:` guard below.
+  const flowSeparatorAfterTokens = new Set(indent?.flowSeparatorAfterTokens ?? []);
+  // Plain-scalar FOLD MEMBERSHIP (IndentConfig.foldTokens). The token TYPES that participate in YAML's
+  // plain-scalar continuation folding — in YAML the UNQUOTED scalar family (plain / key / number /
+  // boolean-null). EXPLICIT list now (was derived from `blockPattern`, which gave folding to ANY
+  // block-pattern token); folding is OFF when the list is absent. Used by: the block-context fold (a
+  // deeper line after a plain leaf), the flow illegal-head continuation, and the flow multi-line merge
+  // post-pass — a plain scalar folded across a flow-internal newline arrives as ADJACENT plain tokens
+  // (a NEWLINE splits it), which the post-pass re-merges.
+  const foldTokens = indent?.foldTokens ?? [];
+  const plainScalarTokenNames = new Set(foldTokens);
+  // The generic (catch-all) plain-scalar token: the LAST-named fold token. Declaration order is
+  // specific-before-general (YAML: Key, Num, BoolNull, Plain — the typed/key shapes win earlier, so
+  // the broadest string-valued plain is necessarily last). Used as the type emitted for a folded
+  // plain-scalar CONTINUATION line — a more-indented line after a plain LEAF whose leading glyph
+  // (`-`/`&`/`!`/`[`/`?`/`*`) is plain CONTENT here, not structure (so it can't be lexed by the plain
+  // head pattern, which forbids those starts). Null when no fold token is declared.
+  const plainContinuationTokenName = foldTokens.length ? foldTokens[foldTokens.length - 1] : null;
   // The generic plain token's FLOW pattern (its `pattern`, not the block variant) — used by the flow
   // illegal-head continuation fallback: a char that no token can START here (e.g. YAML's `%`/`@`/backtick,
   // illegal as a plain START) is, when it follows a plain scalar inside a flow collection, mid-scalar
@@ -282,7 +285,8 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
   // pattern at the next position), emit it as a plain-continuation token, and let the flow fold post-pass
   // merge it with the preceding scalar. Compiled once; null when no generic plain token exists.
   const plainFlowRe = (() => {
-    const t = [...grammar.tokens].reverse().find(t => tokenBlockPatternSource(t));
+    if (!plainContinuationTokenName) return null;
+    const t = grammar.tokens.find(t => t.name === plainContinuationTokenName);
     return t ? new RegExp(`^(?:${tokenPatternSource(t)})`) : null;
   })();
   // Does the line content starting at `start` carry a KEY SEPARATOR — an unquoted `:` followed by
@@ -306,7 +310,22 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
   const kBlockScalarTok = kOf(indent?.blockScalar?.token ?? null);
   const kRawBlockTok = kOf(indent?.rawBlock?.token ?? null);
   const kPlainCont = kOf(plainContinuationTokenName);
-  const tColon = puLitOf.get(':') ?? 0;
+  // The mapping KEY/VALUE separator (IndentConfig.keyValueSeparator, default `:`) — the ONE source of
+  // truth shared with gen-tm for every "is this a mapping-key line" sniff in the lexer. `kKvSep` is its
+  // punctuation-literal intern, for the flow-`:` carve-out push.
+  const keyValueSep = indent?.keyValueSeparator ?? ':';
+  const kKvSep = puLitOf.get(keyValueSep) ?? 0;
+  // Is `src` at `i` a mapping KEY separator — the `keyValueSeparator` literal followed by whitespace /
+  // EOL / a flow indicator (`,`/`[`/`]`/`{`/`}`)? The single shared test behind every key-line sniff
+  // (`lineHasKeySeparator`, `startsBlockStructuralNode`) so they read the separator from ONE place
+  // (the config) rather than each hardcoding `:`. Returns the index PAST the separator on a hit (so
+  // the caller can resume), or -1 on no match.
+  function keySepAt(src: string, i: number): number {
+    if (!src.startsWith(keyValueSep, i)) return -1;
+    const n = src[i + keyValueSep.length];
+    return (n === undefined || n === ' ' || n === '\t' || n === '\n' || n === '\r'
+            || n === ',' || n === '[' || n === ']' || n === '{' || n === '}') ? i + keyValueSep.length : -1;
+  }
 
   function lineHasKeySeparator(src: string, start: number): boolean {
     for (let i = start; i < src.length; i++) {
@@ -327,7 +346,7 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
         if (src[i] !== "'") break; continue;
       }
       if ((ch === ' ' || ch === '\t') && src[i + 1] === '#') break;            // trailing comment → any sep would be earlier
-      if (ch === ':') { const n = src[i + 1]; if (n === undefined || n === ' ' || n === '\t' || n === '\n' || n === '\r' || n === ',' || n === '[' || n === ']' || n === '{' || n === '}') return true; }
+      if (keySepAt(src, i) >= 0) return true;
     }
     return false;
   }
@@ -392,18 +411,19 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
   function startsBlockStructuralNode(src: string, start: number, allowProperty = true): boolean {
     const c0 = src[start];
     if (c0 === '[' || c0 === '{' || c0 === '*') return false;               // flow collection / alias → not indentation
-    if ((c0 === '-' || c0 === '?' || c0 === ':') && sepAfter(src[start + 1])) return true; // indicator / empty key
+    if (c0 !== undefined && compactIndicatorSet.has(c0) && sepAfter(src[start + 1])) return true; // compact indicator (`-`/`?`)
+    if (src.startsWith(keyValueSep, start) && sepAfter(src[start + keyValueSep.length])) return true; // empty key (`:` then ws/EOL)
     if ((c0 === '&' || c0 === '!') && allowProperty) return true;          // node property → establishes a node here
     if (c0 === '&' || c0 === '!') return false;                            // property after `:` → inline value, legal
-    // Scalar key sniff: scan the line for an unquoted `:` followed by ws/EOL/flow-indicator (a
-    // block key separator), skipping over "…"/'…' regions and stopping at a ` #` comment / EOL.
+    // Scalar key sniff: scan the line for an unquoted key separator followed by ws/EOL/flow-indicator,
+    // skipping over "…"/'…' regions and stopping at a ` #` comment / EOL.
     for (let i = start; i < src.length; i++) {
       const ch = src[i];
       if (ch === '\n' || ch === '\r') break;
       if (ch === '"') { i++; while (i < src.length && src[i] !== '"' && src[i] !== '\n') { if (src[i] === '\\') i++; i++; } continue; }
       if (ch === "'") { i++; while (i < src.length && src[i] !== '\n') { if (src[i] === "'" && src[i + 1] !== "'") break; if (src[i] === "'") i++; i++; } continue; }
       if ((ch === ' ' || ch === '\t') && src[i + 1] === '#') break;          // trailing comment → key sep would be earlier
-      if (ch === ':') { const n = src[i + 1]; if (n === undefined || n === ' ' || n === '\t' || n === '\n' || n === '\r' || n === ',' || n === '[' || n === ']' || n === '{' || n === '}') return true; }
+      if (keySepAt(src, i) >= 0) return true;
     }
     return false;
   }
@@ -424,8 +444,8 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
       } else break;
     }
     if (i >= src.length || src[i] === '\n' || src[i] === '\r') return false;   // property alone on the line → no nest
-    if ((src[i] === '-' || src[i] === '?') && sepAfter(src[i + 1])) return true; // nested indicator
-    return startsBlockStructuralNode(src, i, false);                            // a mapping key (the `:`-sniff)
+    if (src[i] !== undefined && compactIndicatorSet.has(src[i]) && sepAfter(src[i + 1])) return true; // nested compact indicator
+    return startsBlockStructuralNode(src, i, false);                            // a mapping key (the key-separator sniff)
   }
 
   // Scan from inside a template span to its next boundary: an interpolation hole
@@ -544,7 +564,11 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
             // The §7.4 / multi-line-flow bookkeeping is indent-only (a newline grammar has no stack).
             if (flowDepth === 0 && indent) {
               const prevTok = tokens[tokens.length - 2];   // the token before this just-pushed open
-              flowValueIndent = (prevTok && prevTok.type === '' && (prevTok.text === ':' || prevTok.text === '-'))
+              // value/item position: a flow opened right after the key/value separator (map value) or a
+              // sequence-item indicator. The `-` here is the seq-item lead specifically (NOT every
+              // compactIndicator — `?` is an explicit KEY, not a value position); classifying it from
+              // config is the (D) indicator-role split, deferred — see issue #44.
+              flowValueIndent = (prevTok && prevTok.type === '' && (prevTok.text === keyValueSep || prevTok.text === '-'))
                 ? indentStack[indentStack.length - 1] : -1;
               flowSawNewline = false;                      // start tracking whether this flow spans >1 line
             }
@@ -776,10 +800,10 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
           // while `-`/`?` include them (`-\t&a x` IS an error). Block context only (flowDepth===0).
           if (indent && flowDepth === 0) {   // §6.1 tab-after-indicator error is YAML-specific
             const prev = tokens[tokens.length - 1];
-            const isIndicator = prev && prev.type === '' && (prev.text === '-' || prev.text === '?' || prev.text === ':');
+            const isIndicator = prev && prev.type === '' && (compactIndicatorSet.has(prev.text) || prev.text === keyValueSep);
             if (isIndicator) {
               let q = pos; while (q < source.length && (source[q] === ' ' || source[q] === '\t')) q++;
-              if (source.slice(pos, q).includes('\t') && startsBlockStructuralNode(source, q, prev!.text !== ':')) {
+              if (source.slice(pos, q).includes('\t') && startsBlockStructuralNode(source, q, prev!.text !== keyValueSep)) {
                 throw new Error(`Tab character used in indentation at offset ${pos}`);
               }
             }
@@ -1046,14 +1070,16 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
       // the separator — emit it as the `:` punctuation literal here. Gated on flow (block-context `:`
       // separators are handled by the KEY-position lookaheads). yaml-test-suite 5MUD / 5T43 / 9MMW
       // / C2DT / K3WX (quoted key) and the flow-collection-key cohort.
-      // flowColonSeparator: false disables the YAML `"key":value` / `}: value` flow
-      // separator carve-out, for indentation grammars with `:name`-shaped tokens that
-      // may legally follow a quoted value or a flow-close delimiter.
-      if (indent && indent.flowColonSeparator !== false && flowDepth > 0 && source[pos] === ':') {
+      // Declaring flowSeparatorAfterTokens (a non-empty list — YAML: the quoted-key tokens) ENABLES
+      // the carve-out; it then fires after a NAMED token OR after any flow-CLOSE delimiter (`]`/`}`,
+      // which structurally can't run past its closer either). An indentation grammar that declares no
+      // such tokens gets no carve-out at all, so a `:name`-shaped token survives after values in flow.
+      // The separator glyph is keyValueSeparator (default `:`).
+      if (indent && flowDepth > 0 && flowSeparatorAfterTokens.size && source.startsWith(keyValueSep, pos)) {
         const prevTok = tokens[tokens.length - 1];
-        if (prevTok && (stringTokenNames.has(prevTok.type) || (prevTok.type === '' && flowCloseSet.has(prevTok.text)))) {
-          push(mkPu(':', pos, tColon));
-          pos += 1;
+        if (prevTok && (flowSeparatorAfterTokens.has(prevTok.type) || (prevTok.type === '' && flowCloseSet.has(prevTok.text)))) {
+          push(mkPu(keyValueSep, pos, kKvSep));
+          pos += keyValueSep.length;
           continue;
         }
       }
@@ -1213,7 +1239,7 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
               let q = i; while (q < source.length && source[q] === ' ') q++;
               return q > i && source[q] === '-' && sepAfter(source[q + 1]);
             };
-            const colonPairsExplicit = wasLineLead && lit === ':' && currentLineCol === lastExplicitKeyCol;
+            const colonPairsExplicit = wasLineLead && lit === keyValueSep && currentLineCol === lastExplicitKeyCol;
             const compactColon = colonPairsExplicit && dashAfter(pos);
             // A line-lead `:` at its `?`'s column USES UP that pairing — the explicit entry now has its
             // value, so a SECOND `: …` at the same column (`? a\n: - b\n: - c`, yaml-test-suite cousin) is
diff --git a/src/types.ts b/src/types.ts
index 123b042..21186b0 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -324,10 +324,12 @@ export interface IndentConfig {
     explicitKey?: string;  // the flow `?` explicit-key indicator (e.g. punctuation.definition.key-value)
   };
   comment?: string;       // line-comment introducer ignored for indentation (e.g. '#')
-  // The mapping KEY/VALUE separator literal (YAML `:`). Used by the derived highlighter's multi-line
-  // plain-scalar fold regions (gen-tm §2a′/§2a″) to recognise a `key:`-led line as STRUCTURAL (a
-  // sibling that ends a fold) vs a bare plain-scalar continuation. Declared here (not hardcoded in the
-  // generator) so the YAML region code stays data-driven. Absent → defaults to ':'.
+  // The mapping KEY/VALUE separator literal (YAML `:`). The ONE source of truth for "what glyph
+  // separates a mapping key from its value": BOTH the lexer's key-line sniffs (`lineHasKeySeparator`,
+  // `startsBlockStructuralNode`, the compact-key pairing) AND the derived highlighter's multi-line
+  // plain-scalar fold regions (gen-tm §2a′/§2a″) recognise a `key:`-led line as STRUCTURAL from this
+  // field — so parser and highlighter agree for ANY separator. Declared here (not hardcoded) so the
+  // region code stays data-driven. Absent → defaults to ':'.
   keyValueSeparator?: string;
   // Block scalars (YAML `|` / `>`): when the rest of a line is an introducer + indicators, the
   // following more-indented lines are verbatim content emitted as ONE token (like raw-text, but
@@ -339,10 +341,29 @@ export interface IndentConfig {
   // control sigil, not content; absent → the block-scalar token's own scope (introducer reads as the
   // body string). The body always keeps the token scope; only the introducer capture is re-scoped.
   blockScalar?: { introducers: string[]; token: string; documentMarkers?: string[]; indicatorScope?: string };
-  // Set false to disable the YAML flow `:` key-separator carve-out (a `:` glued after a quoted
-  // scalar / flow-close is forced punctuation). Indentation grammars with `:name`-shaped tokens
-  // (bound-attribute shorthand) need those to survive after values. Default true (YAML behavior).
-  flowColonSeparator?: boolean;
+  // Flow `:` key-separator carve-out MEMBERSHIP: token TYPES after which a `:` glued inside a flow
+  // collection is the `key: value` SEPARATOR (forced `:` punctuation), never the start of a `:`-led
+  // plain scalar. A quoted scalar / flow-close can never run past its closer, so a `:` immediately
+  // after one is unambiguously the separator (YAML: the quoted-key tokens). This is an EXPLICIT,
+  // mode-neutral list — the carve-out is OFF unless a token is named here. (Was derived from the
+  // `string` flag, which silently enlisted every string-region token; an indentation grammar with
+  // `:name`-shaped tokens after values keeps `string: true` for region scoping / auto-close
+  // derivation WITHOUT being dragged into separator emission.) Flow-CLOSE delimiters (`flowClose`)
+  // are always part of the carve-out — a `:` after `]`/`}` is structurally the separator regardless.
+  // Absent / empty → no carve-out (the `:` lexes normally). The separator glyph itself is
+  // `keyValueSeparator`. yaml-test-suite 5MUD / 5T43 / 9MMW / C2DT / K3WX.
+  flowSeparatorAfterTokens?: string[];
+  // Plain-scalar CONTINUATION fold MEMBERSHIP: the token TYPES that participate in YAML's plain-scalar
+  // folding — a more-indented line right after one of these LEAF scalars (or an adjacent one inside a
+  // flow collection) is a CONTINUATION of that scalar, not a new node. Drives the block-context fold
+  // (a deeper line after a plain leaf), the flow illegal-head continuation, and the flow multi-line
+  // merge post-pass. The LAST-named token is the generic catch-all used as the emitted CONTINUATION
+  // token type and whose `pattern` matches a folded body (declaration order is specific-before-general,
+  // so the broadest plain is last). This is an EXPLICIT, mode-neutral list — folding is OFF unless a
+  // token is named here. (Was derived from `blockPattern`, which gave YAML plain-scalar folding to ANY
+  // block-pattern token; an indentation grammar can now carry a `blockPattern` token WITHOUT inheriting
+  // the fold.) Absent / empty → no folding. yaml-test-suite 3MYT / A2M4 / AB8U / FBC9 / JTV5 / UT92.
+  foldTokens?: string[];
   // A comment introducer immediately followed by this string is NOT a comment line — it falls
   // through to ordinary tokenization (e.g. comment '//' + commentExcept '!' → `//!` doc-comment
   // lines lex as real tokens and stay visible to the indent stack, while `//` lines vanish).
diff --git a/test/indent-extensions.ts b/test/indent-extensions.ts
index dc6526e..003041b 100644
--- a/test/indent-extensions.ts
+++ b/test/indent-extensions.ts
@@ -2,22 +2,27 @@
 // engine behavior over TOY grammars (token names and introducer characters
 // deliberately unlike any real language — the behaviors are grammar DATA).
 //
-// Three opt-in IndentConfig fields, each motivated by a Pug-like indentation
-// language (one that nests HTML-ish tag lines rather than key/value scalars):
+// Opt-in IndentConfig fields, each motivated by a non-YAML indentation language
+// (one that nests HTML-ish tag lines or `k=v` entries rather than YAML scalars):
 //
-//   1. `commentExcept`   — two-tier comments: `--` lines vanish (invisible to
-//                          the indent stack, like YAML `#`), but `--!` lines
-//                          are REAL tokens (doc comments that ship to output).
-//   2. `rawBlock`        — verbatim capture introduced from the END of a line
-//                          (`tag:mode` filters/content modes, Pug-style); the
-//                          mirror image of YAML's leading `|`/`>` blockScalar.
-//   3. `flowColonSeparator: false` — languages with `:name`-shaped tokens
-//                          (bound-attribute shorthand) need a `:` after a
-//                          quoted value / flow-close to stay a token start,
-//                          not YAML's forced `key: value` separator punct.
+//   1. `commentExcept`             — two-tier comments: `--` lines vanish (like
+//                                    YAML `#`), but `--!` lines are REAL tokens.
+//   2. `rawBlock`                  — verbatim capture introduced from the END of
+//                                    a line (Pug-style); mirror of `blockScalar`.
+//   3. `flowSeparatorAfterTokens`  — EXPLICIT membership for the flow `:` key/
+//                                    value carve-out, decoupled from `string`
+//                                    (issue #44 (A)). OFF unless declared.
+//   4. `foldTokens`                — EXPLICIT membership for plain-scalar
+//                                    continuation folding, decoupled from
+//                                    `blockPattern` (issue #44 (B)). OFF unless
+//                                    declared.
+//   5. `keyValueSeparator`         — the separator glyph the LEXER (not just
+//                                    gen-tm) reads for key-line sniffs; a non-`:`
+//                                    value is recognized structurally (issue
+//                                    #44 (C)). Default ':'.
 //
-// All three default OFF — a grammar declaring none (YAML) tokenizes
-// byte-identically, which the yaml gates already enforce.
+// All default OFF / neutral — a grammar declaring none (YAML opts in field-by-
+// field) tokenizes byte-identically, which the yaml gates already enforce.
 import { token, rule, defineGrammar, alt, many, many1, opt, seq, oneOf, noneOf, range, star, plus, never } from '../src/api.ts';
 import type { IndentConfig } from '../src/types.ts';
 import { createLexer } from '../src/gen-lexer.ts';
@@ -153,7 +158,8 @@ const Str = token(seq('"', star(noneOf('"')), '"'), { string: true });
 }
 
 // ─────────────────────────────────────────────────────────────────────────────
-// 3. flowColonSeparator: false — `:name` tokens survive after values in flow
+// 3. flowSeparatorAfterTokens — flow `:` carve-out is EXPLICIT membership, OFF by
+//    default, and DECOUPLED from the `string` flag (issue #44 (A) un-overload).
 // ─────────────────────────────────────────────────────────────────────────────
 {
   const Indent = token(never(), {});
@@ -172,25 +178,108 @@ const Str = token(seq('"', star(noneOf('"')), '"'), { string: true });
     indentToken: 'Indent', dedentToken: 'Dedent', newlineToken: 'Newline',
     flowOpen: ['('], flowClose: [')'],
   };
-  const gYaml = mk(base);                                       // default: YAML behavior
-  const gOff = mk({ ...base, flowColonSeparator: false });
-
-  // Default (YAML): a `:` after a quoted value in flow is forced separator punctuation.
-  check('flowColonSeparator default: `:` after a string is separator punct (YAML behavior)',
-    lexed(gYaml, 'tag("v" :k)').some(t => t.type === '' && t.text === ':'));
-
-  // Disabled: the same `:` starts the BoundName token.
-  check('flowColonSeparator false: `:name` after a string lexes as one token',
-    lexed(gOff, 'tag("v" :k)').some(t => t.type === 'BoundName' && t.text === ':k'));
-
-  // Same carve-out after a flow-CLOSE delimiter, nested so flow depth stays > 0.
-  check('flowColonSeparator false: `:name` after `)` (still in flow) lexes as one token',
-    lexed(gOff, 'tag((aa) :k)').some(t => t.type === 'BoundName' && t.text === ':k'));
-  check('flowColonSeparator default: `:` after `)` splits (YAML behavior preserved)',
-    !lexed(gYaml, 'tag((aa) :k)').some(t => t.type === 'BoundName'));
+  // `Str` carries `string: true` (region scoping). NEUTRAL grammar = no flowSeparatorAfterTokens
+  // declared. ROOT-CAUSE PROOF: under the old derivation `string: true` alone enlisted `Str` into the
+  // carve-out; after the un-overload it does NOT — the `:k` survives as one BoundName token.
+  const gNeutral = mk(base);
+  // Opt IN explicitly: name `Str` (and flow-close is then active too).
+  const gOn = mk({ ...base, flowSeparatorAfterTokens: ['Str'] });
+
+  check('flowSeparatorAfterTokens: a `string:true` token is NOT auto-enlisted — `:name` survives after a string',
+    lexed(gNeutral, 'tag("v" :k)').some(t => t.type === 'BoundName' && t.text === ':k'));
+  check('flowSeparatorAfterTokens: neutral grammar — `:name` survives after a flow-close `)` too',
+    lexed(gNeutral, 'tag((aa) :k)').some(t => t.type === 'BoundName' && t.text === ':k'));
+
+  // Declared: the same `:` after the named token is now forced separator punctuation.
+  check('flowSeparatorAfterTokens: declared → `:` after the named token is separator punct',
+    lexed(gOn, 'tag("v" :k)').some(t => t.type === '' && t.text === ':') &&
+    !lexed(gOn, 'tag("v" :k)').some(t => t.type === 'BoundName'));
+  // Declaring the carve-out also activates it after a flow-CLOSE delimiter.
+  check('flowSeparatorAfterTokens: declared → `:` after flow-close `)` also splits',
+    lexed(gOn, 'tag((aa) :k)').some(t => t.type === '' && t.text === ':') &&
+    !lexed(gOn, 'tag((aa) :k)').some(t => t.type === 'BoundName'));
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 4. foldTokens — plain-scalar continuation fold is EXPLICIT membership, OFF by
+//    default, and DECOUPLED from the `blockPattern` flag (issue #44 (B) un-overload).
+// ─────────────────────────────────────────────────────────────────────────────
+{
+  const Indent = token(never(), {});
+  const Dedent = token(never(), {});
+  const Newline = token(never(), {});
+  // A token that DECLARES a block-context variant via `blockPattern`. Its head pattern is `[a-z]+`, so
+  // a line that STARTS with `-` cannot be lexed as a Scalar. Under the old derivation any blockPattern
+  // token got YAML plain-scalar folding; after the un-overload it does not, unless named in foldTokens.
+  const Scalar = token(plus(lower), { scope: 'scalar', blockPattern: plus(lower) });
+  const Line = rule(() => [[Scalar, many(Newline, Scalar)]]);
+  const Lines = rule(() => [[Line, many(Newline, Line)]]);
+  const Doc = rule(() => [[opt(Lines), opt(Indent), opt(Lines), opt(Dedent)]]);
+
+  const mk = (indent: IndentConfig) => defineGrammar({
+    name: 'tinyfold', tokens: { Indent, Dedent, Newline, Scalar }, rules: { Line, Lines, Doc }, entry: Doc, indent,
+  });
+  const base: IndentConfig = { indentToken: 'Indent', dedentToken: 'Dedent', newlineToken: 'Newline' };
+  const gNeutral = mk(base);                                    // blockPattern present, foldTokens absent
+  const gFold = mk({ ...base, foldTokens: ['Scalar'] });        // opt IN
+
+  // ROOT-CAUSE PROOF: a deeper line `- bbb` (leading `-`, not a Scalar head) after a `blockPattern`
+  // LEAF. With folding OFF (the un-overload — blockPattern alone no longer triggers it) the `-` is an
+  // unlexable char → a hard lex error. So the block-context fold genuinely does NOT fire here.
+  let neutralThrew = false;
+  try { lexed(gNeutral, 'aaa\n  - bbb'); } catch { neutralThrew = true; }
+  check('foldTokens: a `blockPattern` token is NOT auto-folded — an illegal-head deeper line errors',
+    neutralThrew);
+
+  // Declared: the same input folds — the whole deeper line (its leading `-` is now scalar content) is
+  // absorbed as ONE continuation Scalar token. Observed: `Scalar:aaa Indent Scalar:"- bbb" Dedent`.
+  const tFold = lexed(gFold, 'aaa\n  - bbb');
+  check('foldTokens: declared → the illegal-head deeper line folds into one continuation Scalar',
+    tFold.some(t => t.type === 'Scalar' && t.text === '- bbb'));
+}
+
+// ─────────────────────────────────────────────────────────────────────────────
+// 5. keyValueSeparator — the lexer (not just gen-tm) reads the separator glyph;
+//    a non-`:` separator is recognized structurally by the parser (issue #44 (C)).
+// ─────────────────────────────────────────────────────────────────────────────
+{
+  const Indent = token(never(), {});
+  const Dedent = token(never(), {});
+  const Newline = token(never(), {});
+  // A key scalar whose block variant ends at the `=` separator, and a plain value scalar. The fold
+  // sniff (`lineHasKeySeparator`) must treat `=` (not `:`) as the structural key separator so a
+  // `k= v` line is a mapping line, not a foldable plain continuation.
+  const Key = token(plus(lower), { scope: 'key', blockPattern: seq(plus(lower), '=') });
+  const Val = token(plus(lower), { scope: 'val', blockPattern: plus(lower) });
+  const Sep = token('=', {});
+  const Entry = rule(() => [[Key, Sep, Val]]);
+  const Lines = rule(() => [[Entry, many(Newline, Entry)]]);
+  const Doc = rule(() => [[opt(Lines), opt(Indent), opt(Lines), opt(Dedent)]]);
+
+  const mk = (indent: IndentConfig) => defineGrammar({
+    name: 'tinykv', tokens: { Indent, Dedent, Newline, Key, Val, Sep }, rules: { Entry, Lines, Doc }, entry: Doc, indent,
+  });
+  // `=` is the separator AND a fold token list so a deeper `k= v` is recognized as a key line, not a fold.
+  const g = mk({ indentToken: 'Indent', dedentToken: 'Dedent', newlineToken: 'Newline', keyValueSeparator: '=', foldTokens: ['Key', 'Val'] });
+
+  // A `=`-led line after a plain leaf is a mapping line (the key separator), so it must NOT fold —
+  // the lexer's key-separator sniff has to read `=`, not `:`. (If it still hardcoded `:`, the `b= c`
+  // line would be seen as separator-less plain content and wrongly fold into one continuation token.)
+  // Observed: `Val:a  Indent  Key:"b="  Val:c  Dedent` — `b=` lexes via the block variant as a Key.
+  const t = lexed(g, 'a\n  b= c');
+  check('keyValueSeparator: the lexer recognizes `=` as the structural separator (`b=` is a Key line, no fold)',
+    t.some(tk => tk.type === 'Key' && tk.text === 'b=') &&
+    t.some(tk => tk.type === 'Val' && tk.text === 'a') &&          // `a` stayed its own leaf — not folded
+    !t.some(tk => tk.type === 'Val' && tk.text.includes('b')));    // `b=` did not fold into a Val continuation
+  // Sanity: with NO `=`, the same-shape deeper plain line IS a foldable continuation — proving the
+  // first case's non-fold is the `=` separator's doing, not folding being off. The block-context fold
+  // emits the deeper line as ONE continuation token (`Val:"b c"`), so it is not two Val tokens.
+  const t2 = lexed(g, 'a\n  b c');
+  check('keyValueSeparator: a separator-less deeper line still folds into one continuation token',
+    t2.some(tk => tk.type === 'Val' && tk.text === 'b c'));
 }
 
 console.log(fail === 0
-  ? `\n${ok}/${ok} indent-extension checks pass — commentExcept / rawBlock / flowColonSeparator behave as specified`
+  ? `\n${ok}/${ok} indent-extension checks pass — commentExcept / rawBlock / flowSeparatorAfterTokens / foldTokens / keyValueSeparator behave as specified`
   : `\n${fail} FAILED`);
 process.exit(fail === 0 ? 0 : 1);
diff --git a/yaml.ts b/yaml.ts
index 847d281..84fa6c8 100644
--- a/yaml.ts
+++ b/yaml.ts
@@ -628,6 +628,15 @@ const indent: IndentConfig = {
   },
   comment: '#',
   keyValueSeparator: ':',
+  // Flow `:` carve-out: a `:` glued (inside flow) after a quoted scalar / quoted key — or after a
+  // flow-close `]`/`}` — is the `key: value` separator, never a `:`-led plain. The quoted-key tokens
+  // opt IN explicitly (decoupled from the `string` flag, which scopes string regions / derives
+  // auto-close delimiters and no longer enlists a token here).
+  flowSeparatorAfterTokens: ['DQuote', 'SQuote', 'DQuoteKey', 'SQuoteKey'],
+  // Plain-scalar continuation fold participants — the UNQUOTED scalar family. The LAST (Plain) is the
+  // generic catch-all (folded-continuation token type + flow body pattern). Opt IN explicitly
+  // (decoupled from the `blockPattern` flag, which now only selects the block-context match variant).
+  foldTokens: ['Key', 'Num', 'BoolNull', 'Plain'],
   blockScalar: { introducers: ['|', '>'], token: 'BlockScalar', documentMarkers: ['---', '...'], indicatorScope: 'keyword.control.flow.block-scalar' },
   compactIndicators: ['-', '?'],
   // Tag-handle per-document membership (§6.8.2 / §6.9.1): a named handle `!h!` used by a Tag must