From 760422c23021aa0783f618f0ef2abe293210fbf5 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 7 May 2026 20:13:59 -0300 Subject: [PATCH 1/2] feat(super-editor,painter): render images inside Word textboxes (SD-2804) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ECMA-376 §20.4.2.38 (CT_TxbxContent) lets a textbox hold rich body-level content — paragraphs whose runs can carry inline w:drawing images. The text-only extractor used to silently skip those drawings, so the textbox rendered empty even though export round-tripped the image untouched. The fix surfaces the inline drawing as a textContent part with kind='image' so the existing shape painter can render it alongside text spans: - TextPart contract gains optional kind/src/width/height/alt fields. - extractTextFromTextBox.handleRun branches on w:drawing, reuses the v3 wp drawing handler (handleImageNode) to resolve rId, then upgrades the path-style src to a data URI from converter.media so the painter can drop it straight into . - DomPainter's createFallbackTextElement renders image parts as inline elements next to existing text spans. Linked: SD-2745 (header-anchored floating textboxes — positions the box where this content now renders). --- packages/layout-engine/contracts/src/index.ts | 12 ++ .../painters/dom/src/renderer.ts | 12 ++ .../wp/helpers/encode-image-node-helpers.js | 49 ++++++++ .../helpers/encode-image-node-helpers.test.js | 115 ++++++++++++++++++ 4 files changed, 188 insertions(+) diff --git a/packages/layout-engine/contracts/src/index.ts b/packages/layout-engine/contracts/src/index.ts index be0b316ce6..6080e1019a 100644 --- a/packages/layout-engine/contracts/src/index.ts +++ b/packages/layout-engine/contracts/src/index.ts @@ -806,6 +806,18 @@ export type TextPart = { isLineBreak?: boolean; /** Indicates this line break follows an empty paragraph (creates extra spacing). */ isEmptyParagraph?: boolean; + /** + * SD-2804: ECMA-376 §20.4.2.38 lets a textbox hold full body-level + * content, including paragraphs whose runs carry inline w:drawing + * images. When the importer encounters such a drawing it appends a + * part with `kind: 'image'` carrying a resolved data-URI src so the + * shape painter can render an alongside the text. + */ + kind?: 'image'; + src?: string; + width?: number; + height?: number; + alt?: string; }; /** Text content configuration for shapes. */ diff --git a/packages/layout-engine/painters/dom/src/renderer.ts b/packages/layout-engine/painters/dom/src/renderer.ts index 3bd988f6ab..7d007017f3 100644 --- a/packages/layout-engine/painters/dom/src/renderer.ts +++ b/packages/layout-engine/painters/dom/src/renderer.ts @@ -4458,6 +4458,18 @@ export class DomPainter { if (part.isEmptyParagraph) { currentParagraph.style.minHeight = '1em'; } + } else if (part.kind === 'image' && part.src) { + // SD-2804: image part produced by the textbox importer for an + // inline w:drawing inside a textbox run. Render as alongside + // sibling text spans so layout matches Word's inline flow. + const img = this.doc!.createElement('img'); + img.src = part.src; + img.alt = part.alt ?? ''; + if (typeof part.width === 'number') img.style.width = `${part.width}px`; + if (typeof part.height === 'number') img.style.height = `${part.height}px`; + img.style.display = 'inline-block'; + img.style.verticalAlign = 'middle'; + currentParagraph.appendChild(img); } else { const span = this.doc!.createElement('span'); span.textContent = this.resolveShapeTextPartText(part, context); diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/wp/helpers/encode-image-node-helpers.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/wp/helpers/encode-image-node-helpers.js index 71b03b717f..55f33b6f0c 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/wp/helpers/encode-image-node-helpers.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/wp/helpers/encode-image-node-helpers.js @@ -1008,6 +1008,29 @@ const handleChartDrawing = (params, node, graphicData, size, padding, marginOffs * wrap: string * }|null} Text content with formatting information and line break markers, or null if no text found */ +// SD-2804: turn a path-style src ("word/media/image1.png" or "media/image1.png") +// into a data URI by reading converter.media. Mirrors how layout-engine's +// hydrateRuns resolves body ImageRuns, but inline since the text-parts model +// the shape painter consumes has no downstream hydration step. +function resolveImagePartSrc(src, params, extension) { + if (!src || src.startsWith('data:')) return src; + const media = params?.converter?.media; + if (!media) return src; + const candidates = [src]; + if (src.startsWith('word/')) candidates.push(src.slice(5)); + else candidates.push(`word/${src}`); + for (const candidate of candidates) { + const data = media[candidate]; + if (!data) continue; + if (typeof data === 'string') { + if (data.startsWith('data:')) return data; + const ext = extension || (src.includes('.') ? src.slice(src.lastIndexOf('.') + 1) : 'png'); + return `data:image/${ext};base64,${data}`; + } + } + return src; +} + function extractTextFromTextBox(textBoxContent, bodyPr, params = {}) { if (!textBoxContent || !textBoxContent.elements) return null; @@ -1061,6 +1084,32 @@ function extractTextFromTextBox(textBoxContent, bodyPr, params = {}) { } else if (el.name === 'sd:totalPageNumber') { hasText = true; appendFieldPart('NUMPAGES', el, paragraphProperties); + } else if (el.name === 'w:drawing') { + // SD-2804 / ECMA-376 §20.4.2.38: a textbox can hold body-level + // content, including runs with inline w:drawing images. Reuse the + // existing v3 wp drawing handler so the rId → resolution matches + // what body paragraphs use, then upgrade the path-style src to a + // data URI from converter.media (the text-parts model has no + // downstream hydration step like body ImageRuns do). + const inlineOrAnchor = el.elements?.find((child) => child?.name === 'wp:inline' || child?.name === 'wp:anchor'); + if (inlineOrAnchor) { + const isAnchor = inlineOrAnchor.name === 'wp:anchor'; + const imagePm = handleImageNode(inlineOrAnchor, { ...params, nodes: [el] }, isAnchor); + if (imagePm?.attrs?.src) { + hasText = true; + const sizeAttr = imagePm.attrs.size || imagePm.attrs; + const resolvedSrc = resolveImagePartSrc(imagePm.attrs.src, params, imagePm.attrs.extension); + textParts.push({ + text: '', + formatting, + kind: 'image', + src: resolvedSrc, + width: typeof sizeAttr?.width === 'number' ? sizeAttr.width : undefined, + height: typeof sizeAttr?.height === 'number' ? sizeAttr.height : undefined, + alt: imagePm.attrs.alt || '', + }); + } + } } }); diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/wp/helpers/encode-image-node-helpers.test.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/wp/helpers/encode-image-node-helpers.test.js index cf17256fc5..d704216d24 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/wp/helpers/encode-image-node-helpers.test.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/wp/helpers/encode-image-node-helpers.test.js @@ -2023,4 +2023,119 @@ describe('getVectorShape', () => { expect(result1.attrs.src).not.toBe(result2.attrs.src); }); }); + + // SD-2804: ECMA-376 §20.4.2.38 — a textbox (CT_TxbxContent) can hold rich + // body-level content, including paragraphs whose runs carry inline images + // via w:drawing > wp:inline > pic:pic. The text-only extractor used to + // silently skip those drawings, leaving the textbox visually empty even + // though export round-tripped the image. The fix surfaces the image as a + // textContent part with kind='image' so the shape painter can render it. + describe('SD-2804: image inside textbox content', () => { + const docxFixture = { + 'word/_rels/header1.xml.rels': { + elements: [ + { + name: 'Relationships', + elements: [ + { + name: 'Relationship', + attributes: { Id: 'rId1', Target: 'media/image1.png' }, + }, + ], + }, + ], + }, + }; + + const makeShape = () => ({ + elements: [ + { + name: 'wps:wsp', + elements: [ + { name: 'wps:cNvSpPr', attributes: { txBox: '1' } }, + { + name: 'wps:spPr', + elements: [ + { name: 'a:prstGeom', attributes: { prst: 'rect' } }, + { name: 'a:xfrm', elements: [{ name: 'a:ext', attributes: { cx: '4745620', cy: '520860' } }] }, + ], + }, + { + name: 'wps:txbx', + elements: [ + { + name: 'w:txbxContent', + elements: [ + { + name: 'w:p', + elements: [ + { + name: 'w:r', + elements: [ + { name: 'w:rPr', elements: [{ name: 'w:noProof' }] }, + { + name: 'w:drawing', + elements: [ + { + name: 'wp:inline', + elements: [ + { name: 'wp:extent', attributes: { cx: '481330', cy: '422910' } }, + { name: 'wp:docPr', attributes: { id: '1', name: 'Picture 2' } }, + { + name: 'a:graphic', + elements: [ + { + name: 'a:graphicData', + attributes: { + uri: 'http://schemas.openxmlformats.org/drawingml/2006/picture', + }, + elements: [ + { + name: 'pic:pic', + elements: [ + { + name: 'pic:blipFill', + elements: [{ name: 'a:blip', attributes: { 'r:embed': 'rId1' } }], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + { name: 'wps:bodyPr', attributes: {} }, + ], + }, + ], + }); + + it('emits an image part in textContent for an inline w:drawing inside the textbox', () => { + const graphicData = makeShape(); + const result = getVectorShape({ + params: { nodes: [{ name: 'w:drawing', elements: [] }], docx: docxFixture, filename: 'header1.xml' }, + node: { name: 'wp:anchor', elements: [] }, + graphicData, + size: { width: 374, height: 41 }, + }); + + expect(result?.type).toBe('vectorShape'); + const parts = result?.attrs?.textContent?.parts || []; + const imagePart = parts.find((p) => p.kind === 'image'); + expect(imagePart).toBeTruthy(); + expect(typeof imagePart?.src).toBe('string'); + expect(imagePart?.src.length).toBeGreaterThan(0); + }); + }); }); From 8e1247c6a2bab7d1cf992b14287f1f0bbbdc4c5a Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Fri, 8 May 2026 20:20:42 -0300 Subject: [PATCH 2/2] fix(super-editor,pm-adapter,painter): address PR #3207 review (SD-2804) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Luccas's review on PR #3207: - (C1) Skip hidden textbox images. handleImageNode flags wp:docPr hidden="1" via attrs.hidden, but the new image-part branch only checked attrs.src and emitted visible s for them. Top-level hidden drawings are filtered later in the pipeline; image parts bypass that filtering. Gate the textParts.push on imagePm.attrs.hidden !== true so hidden textbox drawings stay hidden, matching the body-level behaviour. - (C2) Drop the duplicated resolveImagePartSrc helper in the importer (it rejected Uint8Array, breaking Y.js binary media). Store the raw path + extension + rId on the image part. pm-adapter's hydrateImageBlocks gains a vectorShape branch that hydrates textContent.parts alongside ImageRuns, so all media path candidates and the Uint8Array → TextDecoder decoding live in a single place. - (C3) Anchored drawings inside textboxes are out of scope — wrap / position / transform metadata isn't carried into the text-parts model. Restrict the textbox-image branch to wp:inline and document the limit in the code comment so a future fixture can extend it intentionally. - (C4) Align inserted images to the text baseline like body inline images do (vertical-align: bottom). ECMA-376 §20.4.2.8 specifies that an inline drawing behaves "like a character glyph of similar size", and the body inline image renderer defaults to vertical-align: bottom (renderer.ts ~L5770, L5847) — the textbox image part used vertical-align: middle, visibly misaligning text next to the image inside a textbox compared to outside it. --- packages/layout-engine/contracts/src/index.ts | 8 ++- .../painters/dom/src/renderer.ts | 7 ++- .../layout-engine/pm-adapter/src/utilities.ts | 33 ++++++++++++ .../wp/helpers/encode-image-node-helpers.js | 51 ++++++------------- 4 files changed, 60 insertions(+), 39 deletions(-) diff --git a/packages/layout-engine/contracts/src/index.ts b/packages/layout-engine/contracts/src/index.ts index 6080e1019a..f22d14296e 100644 --- a/packages/layout-engine/contracts/src/index.ts +++ b/packages/layout-engine/contracts/src/index.ts @@ -810,11 +810,15 @@ export type TextPart = { * SD-2804: ECMA-376 §20.4.2.38 lets a textbox hold full body-level * content, including paragraphs whose runs carry inline w:drawing * images. When the importer encounters such a drawing it appends a - * part with `kind: 'image'` carrying a resolved data-URI src so the - * shape painter can render an alongside the text. + * part with `kind: 'image'` carrying the raw media path; pm-adapter's + * hydrateImageBlocks resolves it to a data URI alongside ImageRuns so + * binary (Y.js) and string (zip) media files share the same path + * candidates and Uint8Array decoding. */ kind?: 'image'; src?: string; + extension?: string; + rId?: string; width?: number; height?: number; alt?: string; diff --git a/packages/layout-engine/painters/dom/src/renderer.ts b/packages/layout-engine/painters/dom/src/renderer.ts index 7d007017f3..407565773a 100644 --- a/packages/layout-engine/painters/dom/src/renderer.ts +++ b/packages/layout-engine/painters/dom/src/renderer.ts @@ -4461,14 +4461,17 @@ export class DomPainter { } else if (part.kind === 'image' && part.src) { // SD-2804: image part produced by the textbox importer for an // inline w:drawing inside a textbox run. Render as alongside - // sibling text spans so layout matches Word's inline flow. + // sibling text spans so layout matches Word's inline flow. Match + // body inline images' baseline default (`vertical-align: bottom`) + // so an image and adjacent text line up the same way inside a + // textbox as outside. const img = this.doc!.createElement('img'); img.src = part.src; img.alt = part.alt ?? ''; if (typeof part.width === 'number') img.style.width = `${part.width}px`; if (typeof part.height === 'number') img.style.height = `${part.height}px`; img.style.display = 'inline-block'; - img.style.verticalAlign = 'middle'; + img.style.verticalAlign = 'bottom'; currentParagraph.appendChild(img); } else { const span = this.doc!.createElement('span'); diff --git a/packages/layout-engine/pm-adapter/src/utilities.ts b/packages/layout-engine/pm-adapter/src/utilities.ts index d09d3245ad..4a3bf636d7 100644 --- a/packages/layout-engine/pm-adapter/src/utilities.ts +++ b/packages/layout-engine/pm-adapter/src/utilities.ts @@ -15,6 +15,8 @@ import type { ShapeGroupDrawing, ShapeGroupImageChild, ShapeGroupTransform, + TextPart, + VectorShapeDrawing, FlowBlock, ImageRun, ParagraphBlock, @@ -1174,6 +1176,37 @@ export function hydrateImageBlocks(blocks: FlowBlock[], mediaFiles?: Record { + if (part?.kind !== 'image' || !part.src || part.src.startsWith('data:')) { + return part; + } + const resolvedSrc = resolveImageSrc(part.src, part.rId, undefined, part.extension); + if (resolvedSrc) { + partsChanged = true; + return { ...part, src: resolvedSrc }; + } + return part; + }); + if (partsChanged) { + const vectorShapeBlock = drawingBlock as VectorShapeDrawing; + return { + ...vectorShapeBlock, + textContent: { ...vectorShapeBlock.textContent, parts: hydratedParts }, + }; + } + return blk; + } + if (drawingBlock.drawingKind !== 'shapeGroup') { return blk; } diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/wp/helpers/encode-image-node-helpers.js b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/wp/helpers/encode-image-node-helpers.js index 55f33b6f0c..1079a5b816 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/wp/helpers/encode-image-node-helpers.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v3/handlers/wp/helpers/encode-image-node-helpers.js @@ -1008,29 +1008,6 @@ const handleChartDrawing = (params, node, graphicData, size, padding, marginOffs * wrap: string * }|null} Text content with formatting information and line break markers, or null if no text found */ -// SD-2804: turn a path-style src ("word/media/image1.png" or "media/image1.png") -// into a data URI by reading converter.media. Mirrors how layout-engine's -// hydrateRuns resolves body ImageRuns, but inline since the text-parts model -// the shape painter consumes has no downstream hydration step. -function resolveImagePartSrc(src, params, extension) { - if (!src || src.startsWith('data:')) return src; - const media = params?.converter?.media; - if (!media) return src; - const candidates = [src]; - if (src.startsWith('word/')) candidates.push(src.slice(5)); - else candidates.push(`word/${src}`); - for (const candidate of candidates) { - const data = media[candidate]; - if (!data) continue; - if (typeof data === 'string') { - if (data.startsWith('data:')) return data; - const ext = extension || (src.includes('.') ? src.slice(src.lastIndexOf('.') + 1) : 'png'); - return `data:image/${ext};base64,${data}`; - } - } - return src; -} - function extractTextFromTextBox(textBoxContent, bodyPr, params = {}) { if (!textBoxContent || !textBoxContent.elements) return null; @@ -1086,24 +1063,28 @@ function extractTextFromTextBox(textBoxContent, bodyPr, params = {}) { appendFieldPart('NUMPAGES', el, paragraphProperties); } else if (el.name === 'w:drawing') { // SD-2804 / ECMA-376 §20.4.2.38: a textbox can hold body-level - // content, including runs with inline w:drawing images. Reuse the - // existing v3 wp drawing handler so the rId → resolution matches - // what body paragraphs use, then upgrade the path-style src to a - // data URI from converter.media (the text-parts model has no - // downstream hydration step like body ImageRuns do). - const inlineOrAnchor = el.elements?.find((child) => child?.name === 'wp:inline' || child?.name === 'wp:anchor'); - if (inlineOrAnchor) { - const isAnchor = inlineOrAnchor.name === 'wp:anchor'; - const imagePm = handleImageNode(inlineOrAnchor, { ...params, nodes: [el] }, isAnchor); - if (imagePm?.attrs?.src) { + // content, including runs with inline w:drawing images. Defer to + // the existing v3 wp drawing handler for rId → src + size resolution + // so this branch behaves identically to body inline images. Anchored + // drawings inside textboxes are out of scope (the wrap / position / + // transform metadata isn't carried into the text-parts model); + // confine support to wp:inline. + const inline = el.elements?.find((child) => child?.name === 'wp:inline'); + if (inline) { + const imagePm = handleImageNode(inline, { ...params, nodes: [el] }, false); + // Skip hidden drawings (wp:docPr hidden="1") to match the body-level + // pipeline — handleImageNode flags them via attrs.hidden, and image + // parts bypass the top-level filtering that drops them elsewhere. + if (imagePm?.attrs?.src && imagePm.attrs.hidden !== true) { hasText = true; const sizeAttr = imagePm.attrs.size || imagePm.attrs; - const resolvedSrc = resolveImagePartSrc(imagePm.attrs.src, params, imagePm.attrs.extension); textParts.push({ text: '', formatting, kind: 'image', - src: resolvedSrc, + src: imagePm.attrs.src, + extension: imagePm.attrs.extension, + rId: imagePm.attrs.rId, width: typeof sizeAttr?.width === 'number' ? sizeAttr.width : undefined, height: typeof sizeAttr?.height === 'number' ? sizeAttr.height : undefined, alt: imagePm.attrs.alt || '',