diff --git a/packages/layout-engine/contracts/src/direction-context.ts b/packages/layout-engine/contracts/src/direction-context.ts index e6246851aa..8e4ce996ff 100644 --- a/packages/layout-engine/contracts/src/direction-context.ts +++ b/packages/layout-engine/contracts/src/direction-context.ts @@ -136,6 +136,17 @@ export type RunBidiContext = { export type RunScriptContext = { /** w:rPr/w:cs. Forces complex-script formatting regardless of Unicode. */ complexScript: boolean; - /** w:rPr/w:lang/@bidi. Complex-script language metadata (spellcheck). */ - language?: string; + /** + * Per-script language metadata, kept on separate fields per ECMA §17.3.2.20 + * because each maps to a different formatting stack (Latin / CS / East Asian). + * Wave 1b consumes these to gate spellcheck and font-stack selection. + */ + language?: { + /** w:rPr/w:lang/@val. Default (Latin) language tag. */ + default?: string; + /** w:rPr/w:lang/@bidi. Complex-script language tag. */ + complexScript?: string; + /** w:rPr/w:lang/@eastAsia. East Asian language tag. */ + eastAsian?: string; + }; }; diff --git a/packages/layout-engine/contracts/src/index.ts b/packages/layout-engine/contracts/src/index.ts index be0b316ce6..71db9a28cc 100644 --- a/packages/layout-engine/contracts/src/index.ts +++ b/packages/layout-engine/contracts/src/index.ts @@ -16,7 +16,7 @@ export type { RunBidiContext, RunScriptContext, } from './direction-context.js'; -import type { ParagraphDirectionContext } from './direction-context.js'; +import type { ParagraphDirectionContext, RunBidiContext, RunScriptContext } from './direction-context.js'; // Export table contracts export { @@ -315,6 +315,19 @@ export type TextRun = RunMarks & { }; /** Tracked-change metadata from ProseMirror marks. */ trackedChange?: TrackedChangeMeta; + /** + * Run-level bidi signals preserved from the source DOCX (run rtl flag, + * embedding/override directions). Direction-only - script formatting lives + * on `script`. Populated by pm-adapter from raw run properties; not yet + * rendered (Wave 1c consumes embedding/override). + */ + bidi?: RunBidiContext; + /** + * Run-level script context preserved from the source DOCX (complex-script + * flag, per-script language metadata). Wave 1b uses `complexScript` to gate + * the formatting-stack selection (Latin variants vs CS variants). + */ + script?: RunScriptContext; }; export type TabRun = RunMarks & { diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts index f773dfaea1..5f6595cfa9 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.test.ts @@ -117,4 +117,84 @@ describe('applyInlineRunProperties', () => { expect(result.bold).toBe(false); }); + + // Wave 1a preserves these signals; nothing renders them yet. + describe('SD-2781 bidi/script preservation', () => { + it('does not attach bidi or script when no relevant signals are set', () => { + const result = applyInlineRunProperties(baseRun, { bold: true }, undefined, { bold: true }); + expect(result.bidi).toBeUndefined(); + expect(result.script).toBeUndefined(); + }); + + it('preserves run rtl on TextRun.bidi (does not affect script)', () => { + const result = applyInlineRunProperties(baseRun, { rtl: true }, undefined, { rtl: true }); + expect(result.bidi).toEqual({ rtl: true }); + expect(result.script).toBeUndefined(); + }); + + it('preserves explicit rtl=false (a meaningful override of inherited rtl)', () => { + const result = applyInlineRunProperties(baseRun, { rtl: false }, undefined, { rtl: false }); + expect(result.bidi).toEqual({ rtl: false }); + }); + + it('preserves complex-script flag on TextRun.script (does not affect bidi)', () => { + const result = applyInlineRunProperties(baseRun, { cs: true }, undefined, { cs: true }); + expect(result.script).toEqual({ complexScript: true }); + expect(result.bidi).toBeUndefined(); + }); + + it('preserves the three lang tags on separate fields per ECMA §17.3.2.20', () => { + const lang = { val: 'en-US', bidi: 'ar-SA', eastAsia: 'ja-JP' }; + const result = applyInlineRunProperties(baseRun, { lang }, undefined, { lang }); + expect(result.script?.language).toEqual({ + default: 'en-US', + complexScript: 'ar-SA', + eastAsian: 'ja-JP', + }); + }); + + it('partial lang attrs only fill the fields that were set', () => { + const lang = { bidi: 'he-IL' }; + const result = applyInlineRunProperties(baseRun, { lang }, undefined, { lang }); + expect(result.script?.language).toEqual({ complexScript: 'he-IL' }); + }); + + it('keeps rtl and cs on separate axes (axis non-collapse)', () => { + const props = { rtl: true, cs: true }; + const result = applyInlineRunProperties(baseRun, props, undefined, props); + // rtl goes to bidi only; cs goes to script only + expect(result.bidi).toEqual({ rtl: true }); + expect(result.script).toEqual({ complexScript: true }); + // cs must NOT leak into bidi, and rtl must NOT leak into script + expect(result.bidi).not.toHaveProperty('complexScript'); + expect(result.script).not.toHaveProperty('rtl'); + }); + + // Cascade-leak guard: when the cascade-resolved runProperties has rtl/cs/lang + // but the raw inline runProperties does NOT, the metadata must not appear. + // Otherwise every style-inherited run gets false bidi/script signals. + it('does NOT populate bidi/script from cascade-resolved props alone', () => { + const cascadedProps = { rtl: true, cs: true, lang: { bidi: 'ar-SA' } }; + const inlineProps = {}; // No inline rtl/cs/lang on the source run + const result = applyInlineRunProperties(baseRun, cascadedProps, undefined, inlineProps); + expect(result.bidi).toBeUndefined(); + expect(result.script).toBeUndefined(); + }); + + it('does NOT populate bidi/script when caller omits inlineRunProperties', () => { + // Default safety: callers that don't opt in to metadata get nothing. + const result = applyInlineRunProperties(baseRun, { rtl: true, cs: true }); + expect(result.bidi).toBeUndefined(); + expect(result.script).toBeUndefined(); + }); + + it('inline overrides survive even when cascade-resolved props differ', () => { + // User explicitly set rtl=true inline; cascade may also resolve to true. + // Either way, inline is the source of truth for preservation. + const cascaded = { rtl: true, fontSize: 12 }; + const inline = { rtl: true }; + const result = applyInlineRunProperties(baseRun, cascaded, undefined, inline); + expect(result.bidi).toEqual({ rtl: true }); + }); + }); }); diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.ts index 8f1c4b2d02..ab06ca96ad 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/common.ts @@ -1,5 +1,12 @@ import type { RunProperties, ParagraphProperties } from '@superdoc/style-engine/ooxml'; -import type { FlowBlock, SdtMetadata, TextRun, ParagraphAttrs } from '@superdoc/contracts'; +import type { + FlowBlock, + RunBidiContext, + RunScriptContext, + SdtMetadata, + TextRun, + ParagraphAttrs, +} from '@superdoc/contracts'; import { HyperlinkConfig, NodeHandlerContext, @@ -19,6 +26,7 @@ type VisitNodeFn = ( activeSdt: SdtMetadata | undefined, activeRunProperties: RunProperties | undefined, activeHidden?: boolean, + activeInlineRunProperties?: RunProperties, ) => void; export class HiddenByVanishError extends Error { @@ -46,6 +54,13 @@ export type InlineConverterParams = { hyperlinkConfig: HyperlinkConfig; themeColors: ThemeColorPalette | undefined; runProperties: RunProperties | undefined; + /** + * The raw inline w:rPr from the run wrapper, BEFORE the style cascade. Used by + * preservation-only metadata (TextRun.bidi / TextRun.script in SD-2781) so + * style-inherited values don't surface as if they were direct formatting. + * Undefined for callers outside a run wrapper. + */ + inlineRunProperties: RunProperties | undefined; paragraphProperties: ParagraphProperties | undefined; converterContext: ConverterContext; enableComments: boolean; @@ -73,10 +88,43 @@ export type BlockConverterOptions = { paragraphAttrs: ParagraphAttrs; }; +/** + * Build a RunBidiContext from raw run properties when any direction signal is set. + * Returns undefined when nothing to preserve, so empty contexts don't bloat the + * layout tree. Wave 1c will populate `embedding` (w:dir) and `override` (w:bdo). + */ +const buildBidiContext = (runProperties: RunProperties): RunBidiContext | undefined => { + if (runProperties.rtl == null) return undefined; + return { rtl: runProperties.rtl === true }; +}; + +/** + * Build a RunScriptContext from raw run properties when any script signal is set. + * Per ECMA §17.3.2.20, w:lang carries three independent language tags - default + * (Latin), bidi (complex-script), eastAsia - mapped here to one structured field. + */ +const buildScriptContext = (runProperties: RunProperties): RunScriptContext | undefined => { + const cs = runProperties.cs; + const lang = runProperties.lang; + const hasLang = lang != null && (lang.val != null || lang.bidi != null || lang.eastAsia != null); + if (cs == null && !hasLang) return undefined; + + const ctx: RunScriptContext = { complexScript: cs === true }; + if (hasLang) { + const language: NonNullable = {}; + if (lang.val != null) language.default = lang.val; + if (lang.bidi != null) language.complexScript = lang.bidi; + if (lang.eastAsia != null) language.eastAsian = lang.eastAsia; + ctx.language = language; + } + return ctx; +}; + export const applyInlineRunProperties = ( run: TextRun, runProperties: RunProperties | undefined, converterContext?: ConverterContext, + inlineRunProperties?: RunProperties, ): TextRun => { if (!runProperties) { return run; @@ -90,5 +138,16 @@ export const applyInlineRunProperties = ( (merged as Record)[key] = runAttrs[key]; } } + // SD-2781: preserve run-level bidi/script metadata. Read from `inlineRunProperties` + // (the raw inline w:rPr, before the style cascade) so style-inherited runs don't + // get false metadata - per ECMA the metadata categories track what the source + // document encoded, not what the cascade resolved to. When the caller doesn't + // supply inline properties, no metadata is populated. + if (inlineRunProperties) { + const bidi = buildBidiContext(inlineRunProperties); + if (bidi) merged.bidi = bidi; + const script = buildScriptContext(inlineRunProperties); + if (script) merged.script = script; + } return merged; }; diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/generic-token.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/generic-token.ts index fe77a18543..2b92b38410 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/generic-token.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/generic-token.ts @@ -27,6 +27,7 @@ export function tokenNodeToRun({ themeColors, sdtMetadata, runProperties, + inlineRunProperties, converterContext, }: InlineConverterParams): TextRun | null { const token = TOKEN_INLINE_TYPES.get(node.type); @@ -35,7 +36,7 @@ export function tokenNodeToRun({ } // Tokens carry a placeholder character so measurers reserve width; painters will replace it with the real value. - const run: TextRun = { + let run: TextRun = { text: '0', token, fontFamily: defaultFont, @@ -61,7 +62,10 @@ export function tokenNodeToRun({ const marks = [...effectiveMarks, ...(inheritedMarks ?? [])]; applyMarksToRun(run, marks, hyperlinkConfig, themeColors, undefined, true, storyKey); - applyInlineRunProperties(run, runProperties, converterContext); + // Reassign the return value: applyInlineRunProperties returns a new object + // via spread, so the merged fields (including SD-2781 bidi/script metadata) + // are dropped if we don't capture them here. + run = applyInlineRunProperties(run, runProperties, converterContext, inlineRunProperties); // If marksAsAttrs carried font styling, mark the run so downstream defaults don't overwrite it. if (marksAsAttrs.length > 0) { diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/no-break-hyphen.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/no-break-hyphen.ts index 9a27dbb46b..f71399c50b 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/no-break-hyphen.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/no-break-hyphen.ts @@ -24,6 +24,7 @@ export function noBreakHyphenNodeToRun({ themeColors, enableComments, runProperties, + inlineRunProperties, converterContext, }: InlineConverterParams): TextRun { let run: TextRun = { @@ -52,7 +53,7 @@ export function noBreakHyphenNodeToRun({ run.sdt = sdtMetadata; } - run = applyInlineRunProperties(run, runProperties, converterContext); + run = applyInlineRunProperties(run, runProperties, converterContext, inlineRunProperties); return run; } diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/run.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/run.ts index 53e9b77bfc..929f211406 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/run.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/run.ts @@ -22,5 +22,10 @@ export function runNodeChildrenToRuns({ false, false, ); - node.content?.forEach((child) => visitNode(child, mergedMarks, sdtMetadata, resolvedRunProperties, false)); + // Pass the RAW inline runProperties alongside the resolved cascade. SD-2781's + // bidi/script preservation must read from inline only - cascade-resolved + // values would tag every style-inherited run with metadata it didn't have. + node.content?.forEach((child) => + visitNode(child, mergedMarks, sdtMetadata, resolvedRunProperties, false, runProperties), + ); } diff --git a/packages/layout-engine/pm-adapter/src/converters/inline-converters/text-run.ts b/packages/layout-engine/pm-adapter/src/converters/inline-converters/text-run.ts index c051b8fe8e..11ae20ed05 100644 --- a/packages/layout-engine/pm-adapter/src/converters/inline-converters/text-run.ts +++ b/packages/layout-engine/pm-adapter/src/converters/inline-converters/text-run.ts @@ -37,6 +37,7 @@ export function textNodeToRun({ themeColors, enableComments, runProperties, + inlineRunProperties, converterContext, }: InlineConverterParams): TextRun { let run: TextRun = { @@ -65,7 +66,7 @@ export function textNodeToRun({ if (sdtMetadata) { run.sdt = sdtMetadata; } - run = applyInlineRunProperties(run, runProperties, converterContext); + run = applyInlineRunProperties(run, runProperties, converterContext, inlineRunProperties); return run; } diff --git a/packages/layout-engine/pm-adapter/src/converters/paragraph.ts b/packages/layout-engine/pm-adapter/src/converters/paragraph.ts index 62fcc93d00..83fca1ee5a 100644 --- a/packages/layout-engine/pm-adapter/src/converters/paragraph.ts +++ b/packages/layout-engine/pm-adapter/src/converters/paragraph.ts @@ -744,6 +744,7 @@ export function paragraphToFlowBlocks({ activeSdt?: SdtMetadata, activeRunProperties?: RunProperties, activeHidden = false, + activeInlineRunProperties?: RunProperties, ) => { if (activeHidden && node.type !== 'run') { suppressedByVanish = true; @@ -765,6 +766,7 @@ export function paragraphToFlowBlocks({ themeColors, enableComments, runProperties: activeRunProperties, + inlineRunProperties: activeInlineRunProperties, paragraphProperties: resolvedParagraphProperties, converterContext, visitNode, diff --git a/packages/layout-engine/pm-adapter/src/integration.test.ts b/packages/layout-engine/pm-adapter/src/integration.test.ts index e9bdc14328..a34cf615c2 100644 --- a/packages/layout-engine/pm-adapter/src/integration.test.ts +++ b/packages/layout-engine/pm-adapter/src/integration.test.ts @@ -982,4 +982,97 @@ describe('page break integration tests', () => { expect(exhibitPageIndex).toBe(1); expect(layout.pages[1].fragments.length).toBeGreaterThan(0); }); + + // SD-2781: end-to-end check that runs the unmocked applyInlineRunProperties + // pipeline. The unit tests in common.test.ts mock computeRunAttrs, so they + // can't catch type-shape regressions in the contracts package. This test + // exercises the full PM -> FlowBlock conversion with raw runProperties on + // a real run-wrapper node, mirroring how the importer emits them. + it('preserves run-level rtl, cs, and lang on TextRun.bidi / TextRun.script', () => { + const pmDoc = { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'run', + attrs: { + runProperties: { + rtl: true, + cs: true, + lang: { val: 'en-US', bidi: 'ar-SA', eastAsia: 'ja-JP' }, + }, + }, + content: [{ type: 'text', text: 'mixed-script run' }], + }, + ], + }, + ], + }; + + const { blocks } = toFlowBlocks(pmDoc); + const paragraph = blocks.find((block) => block.kind === 'paragraph'); + expect(paragraph).toBeDefined(); + if (paragraph?.kind !== 'paragraph') return; + + const textRun = paragraph.runs.find((run) => 'text' in run && run.text === 'mixed-script run'); + expect(textRun).toBeDefined(); + expect(textRun?.bidi).toEqual({ rtl: true }); + expect(textRun?.script).toEqual({ + complexScript: true, + language: { default: 'en-US', complexScript: 'ar-SA', eastAsian: 'ja-JP' }, + }); + }); + + it('omits bidi and script on TextRun when no signals are set (no bloat)', () => { + const pmDoc = { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [{ type: 'text', text: 'plain Latin text' }], + }, + ], + }; + + const { blocks } = toFlowBlocks(pmDoc); + const paragraph = blocks.find((block) => block.kind === 'paragraph'); + if (paragraph?.kind !== 'paragraph') return; + const textRun = paragraph.runs.find((run) => 'text' in run && run.text === 'plain Latin text'); + expect(textRun?.bidi).toBeUndefined(); + expect(textRun?.script).toBeUndefined(); + }); + + // SD-2781 round 2 (codex finding): generic-token.ts:64 calls + // applyInlineRunProperties without reassigning the return value, so token + // runs (page numbers, total page counts) lose run-level bidi/script metadata + // even when wrapped in a run that explicitly sets rtl/cs/lang. + it('preserves bidi/script on page-number token TextRuns inside an rtl run', () => { + const pmDoc = { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'run', + attrs: { runProperties: { rtl: true, cs: true } }, + content: [{ type: 'page-number' }], + }, + ], + }, + ], + }; + + const { blocks } = toFlowBlocks(pmDoc); + const paragraph = blocks.find((block) => block.kind === 'paragraph'); + if (paragraph?.kind !== 'paragraph') return; + const tokenRun = paragraph.runs.find( + (run) => 'token' in run && (run.token === 'pageNumber' || run.token === 'totalPageCount'), + ); + expect(tokenRun, 'token run should be present').toBeDefined(); + expect(tokenRun?.bidi).toEqual({ rtl: true }); + expect(tokenRun?.script).toEqual({ complexScript: true }); + }); });