From ac7fe3c48642045c38e3b59dd2b0566cf770b54f Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Wed, 3 Sep 2025 01:01:35 +0000
Subject: [PATCH 1/3] chore(release): 0.16.0 [skip ci]

# [0.16.0](https://github.com/Harbour-Enterprises/SuperDoc/compare/v0.15.18...v0.16.0) (2025-09-03)

### Bug Fixes

* additional fixes to list indent/outdent, split list, toggle list, types and more tests ([02e6cd9](https://github.com/Harbour-Enterprises/SuperDoc/commit/02e6cd971b672adc7a27ee6f4c3e491ea6582927))
* backspaceNextToList, toggleList and tests ([8b33258](https://github.com/Harbour-Enterprises/SuperDoc/commit/8b33258aa9a09cd566191083de2095377f532de5))
* closing dropdown after clicking again ([#835](https://github.com/Harbour-Enterprises/SuperDoc/issues/835)) ([88ff88d](https://github.com/Harbour-Enterprises/SuperDoc/commit/88ff88d06568716d78be4fcdc311cbba0e6ba3fd))
* definition possibly missing name key, add jsdoc ([bb714f1](https://github.com/Harbour-Enterprises/SuperDoc/commit/bb714f14635239301ed6931bb06259b299b11fa8))
* images are missing for the document in edit mode ([#831](https://github.com/Harbour-Enterprises/SuperDoc/issues/831)) ([a9af47e](https://github.com/Harbour-Enterprises/SuperDoc/commit/a9af47ed4def516900b14460218e476374c69a80))
* include package lock on tests folder ([#845](https://github.com/Harbour-Enterprises/SuperDoc/issues/845)) ([1409d02](https://github.com/Harbour-Enterprises/SuperDoc/commit/1409d02ce457db963a5696ec78be30a3f349ffca))
* insertContentAt fails if new line characters (\n) inserted ([dd60d91](https://github.com/Harbour-Enterprises/SuperDoc/commit/dd60d91711e63741e2d6ca2ced02251f2a4e0465))
* install http server ([#846](https://github.com/Harbour-Enterprises/SuperDoc/issues/846)) ([1a6e684](https://github.com/Harbour-Enterprises/SuperDoc/commit/1a6e684f809ac96e00e370bb324f0317ec6917ef))
* **internal:** remove pdfjs from build ([#843](https://github.com/Harbour-Enterprises/SuperDoc/issues/843)) ([021b2c1](https://github.com/Harbour-Enterprises/SuperDoc/commit/021b2c123052215ba8f52ee103034ebaaa72e1e4))
* remove footer line length breaking deployments ([04766cd](https://github.com/Harbour-Enterprises/SuperDoc/commit/04766cdb1f085419730212b70eacf4072ef6eeeb))
* toggle list ([770998a](https://github.com/Harbour-Enterprises/SuperDoc/commit/770998a9e9b5097d1efa031dc12e6bf12920fa8b))
* update condition checks for screenshot updates in CI workflow ([e17fdf0](https://github.com/Harbour-Enterprises/SuperDoc/commit/e17fdf0b939e8caef65f60207611a71343e4cfde))

### Features

* enable dispatching example apps tests ([#844](https://github.com/Harbour-Enterprises/SuperDoc/issues/844)) ([8b2bc73](https://github.com/Harbour-Enterprises/SuperDoc/commit/8b2bc73bb909c2ce93a93e6266f18c17af0b46e2))
* filter out ooxml tags cli to highest priority namespaces ([23b1efa](https://github.com/Harbour-Enterprises/SuperDoc/commit/23b1efabc63f999f1b297ac046e8c178ff345e49))
---
 packages/superdoc/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/superdoc/package.json b/packages/superdoc/package.json
index faa535428..8d0221b72 100644
--- a/packages/superdoc/package.json
+++ b/packages/superdoc/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@harbour-enterprises/superdoc",
   "type": "module",
-  "version": "0.16.0-next.6",
+  "version": "0.16.0",
   "license": "AGPL-3.0",
   "readme": "../../README.md",
   "files": [

From 9bc488d40430b61240d05bacc541cae51ea84ebb Mon Sep 17 00:00:00 2001
From: Nick Bernal <nick@harbourshare.com>
Date: Tue, 2 Sep 2025 21:39:48 -0700
Subject: [PATCH 2/3] fix: imports encoded in utf-16 break DocxZipper

---
 packages/super-editor/src/core/DocxZipper.js  |  48 +++---
 .../super-editor/src/core/DocxZipper.test.js  |  60 ++++++++
 .../super-editor/src/core/encoding-helpers.js |  80 ++++++++++
 .../src/core/encoding-helpers.test.js         | 142 ++++++++++++++++++
 4 files changed, 304 insertions(+), 26 deletions(-)
 create mode 100644 packages/super-editor/src/core/encoding-helpers.js
 create mode 100644 packages/super-editor/src/core/encoding-helpers.test.js

diff --git a/packages/super-editor/src/core/DocxZipper.js b/packages/super-editor/src/core/DocxZipper.js
index 4ba50a94c..e50b0c3c5 100644
--- a/packages/super-editor/src/core/DocxZipper.js
+++ b/packages/super-editor/src/core/DocxZipper.js
@@ -1,6 +1,7 @@
 import xmljs from 'xml-js';
 import JSZip from 'jszip';
 import { getContentTypesFromXml } from './super-converter/helpers.js';
+import { ensureXmlString, isXmlLike } from './encoding-helpers.js';
 
 /**
  * Class to handle unzipping and zipping of docx files
@@ -37,42 +38,37 @@ class DocxZipper {
     const extractedFiles = await this.unzip(file);
     const files = Object.entries(extractedFiles.files);
 
-    const mediaObjects = {};
-    const validTypes = ['xml', 'rels'];
-    for (const file of files) {
-      const [, zipEntry] = file;
-
-      if (validTypes.some((validType) => zipEntry.name.endsWith(validType))) {
-        const content = await zipEntry.async('string');
-        this.files.push({
-          name: zipEntry.name,
-          content,
-        });
+    for (const [, zipEntry] of files) {
+      const name = zipEntry.name;
+
+      if (isXmlLike(name)) {
+        // Read raw bytes and decode (handles UTF-8 & UTF-16)
+        const u8 = await zipEntry.async('uint8array');
+        const content = ensureXmlString(u8);
+        this.files.push({ name, content });
       } else if (
-        (zipEntry.name.startsWith('word/media') && zipEntry.name !== 'word/media/') ||
-        (zipEntry.name.startsWith('media') && zipEntry.name !== 'media/')
+        (name.startsWith('word/media') && name !== 'word/media/') ||
+        (name.startsWith('media') && name !== 'media/')
       ) {
-        // If we are in node, we need to convert the buffer to base64
+        // Media files
         if (isNode) {
           const buffer = await zipEntry.async('nodebuffer');
           const fileBase64 = buffer.toString('base64');
-          this.mediaFiles[zipEntry.name] = fileBase64;
-        }
-
-        // If we are in the browser, we can use the base64 directly
-        else {
+          this.mediaFiles[name] = fileBase64;
+        } else {
           const blob = await zipEntry.async('blob');
-          const extension = this.getFileExtension(zipEntry.name);
+          const extension = this.getFileExtension(name);
           const fileBase64 = await zipEntry.async('base64');
-          this.mediaFiles[zipEntry.name] = `data:image/${extension};base64,${fileBase64}`;
+          this.mediaFiles[name] = `data:image/${extension};base64,${fileBase64}`;
 
-          const file = new File([blob], zipEntry.name, { type: blob.type });
-          const imageUrl = URL.createObjectURL(file);
-          this.media[zipEntry.name] = imageUrl;
+          const fileObj = new File([blob], name, { type: blob.type });
+          const imageUrl = URL.createObjectURL(fileObj);
+          this.media[name] = imageUrl;
         }
-      } else if (zipEntry.name.startsWith('word/fonts') && zipEntry.name !== 'word/fonts/') {
+      } else if (name.startsWith('word/fonts') && name !== 'word/fonts/') {
+        // Font files
         const uint8array = await zipEntry.async('uint8array');
-        this.fonts[zipEntry.name] = uint8array;
+        this.fonts[name] = uint8array;
       }
     }
 
diff --git a/packages/super-editor/src/core/DocxZipper.test.js b/packages/super-editor/src/core/DocxZipper.test.js
index 654e34066..28755f280 100644
--- a/packages/super-editor/src/core/DocxZipper.test.js
+++ b/packages/super-editor/src/core/DocxZipper.test.js
@@ -2,6 +2,7 @@ import path from 'path';
 import fs from 'fs';
 import { describe, it, expect, beforeEach } from 'vitest';
 import DocxZipper from './DocxZipper';
+import JSZip from 'jszip';
 
 async function readFileAsBuffer(filePath) {
   const resolvedPath = path.resolve(__dirname, filePath);
@@ -48,3 +49,62 @@ describe('DocxZipper - file extraction', () => {
     expect(documentXml).toBeTruthy();
   });
 });
+
+// Helper to build a UTF-16LE Buffer with BOM
+function utf16leWithBOM(str) {
+  const bom = Buffer.from([0xff, 0xfe]);
+  const body = Buffer.from(str, 'utf16le');
+  return Buffer.concat([bom, body]);
+}
+
+describe('DocxZipper - UTF-16 XML handling', () => {
+  let zipper;
+  beforeEach(() => {
+    zipper = new DocxZipper();
+  });
+
+  it('decodes a UTF-16LE customXml part correctly (was failing before fix)', async () => {
+    const zip = new JSZip();
+
+    // Minimal [Content_Types].xml to look like a docx
+    const contentTypes = `<?xml version="1.0" encoding="UTF-8"?>
+      <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
+        <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
+        <Default Extension="xml" ContentType="application/xml"/>
+        <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
+      </Types>`;
+    zip.file('[Content_Types].xml', contentTypes);
+
+    // A basic UTF-8 document.xml so there's at least one normal XML entry
+    const documentXml = `<?xml version="1.0" encoding="UTF-8"?>
+      <w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
+        <w:body><w:p><w:r><w:t>Hello</w:t></w:r></w:p></w:body>
+      </w:document>`;
+    zip.file('word/document.xml', documentXml);
+
+    // The problematic UTF-16LE customXml item
+    const customXmlUtf16 = `<?xml version="1.0" encoding="utf-16"?>
+<properties xmlns="http://www.imanage.com/work/xmlschema">
+  <documentid>TELEKOM!4176814.1</documentid>
+  <senderid>A675398</senderid>
+  <senderemail>GUDRUN.JORDAN@TELEKOM.DE</senderemail>
+  <lastmodified>2023-07-06T15:09:00.0000000+02:00</lastmodified>
+  <database>TELEKOM</database>
+</properties>`;
+    zip.file('customXml/item2.xml', utf16leWithBOM(customXmlUtf16));
+
+    // Generate the zip as a Node buffer and feed it to the zipper
+    const buf = await zip.generateAsync({ type: 'nodebuffer' });
+    const files = await zipper.getDocxData(buf /* isNode not needed for XML */);
+
+    // Find the customXml item
+    const item2 = files.find((f) => f.name === 'customXml/item2.xml');
+    expect(item2).toBeTruthy();
+
+    // ✅ With the fix, content is a clean JS string:
+    expect(item2.content).toContain('<?xml'); // prolog present
+    expect(item2.content).toContain('<properties'); // real tag (no NULs interleaved)
+    expect(item2.content).not.toMatch(/\u0000/); // no embedded NULs
+    expect(item2.content.toLowerCase()).toContain('encoding="utf-16"');
+  });
+});
diff --git a/packages/super-editor/src/core/encoding-helpers.js b/packages/super-editor/src/core/encoding-helpers.js
new file mode 100644
index 000000000..f04eeecad
--- /dev/null
+++ b/packages/super-editor/src/core/encoding-helpers.js
@@ -0,0 +1,80 @@
+/**
+ * Quick check for .xml / .rels
+ * @param {string} name
+ * @returns {boolean} True if the name has a .xml or .rels extension
+ */
+export const isXmlLike = (name) => /\.xml$|\.rels$/i.test(name);
+
+/**
+ * Hex dump for optional debugging
+ * @param {Uint8Array|ArrayBuffer} bytes
+ * @param {number} n
+ * @returns {string} Hex dump
+ */
+export function hex(bytes, n = 32) {
+  const u8 = bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes);
+  return Array.from(u8.slice(0, n))
+    .map((b) => b.toString(16).padStart(2, '0'))
+    .join(' ');
+}
+
+/**
+ * Try to detect encoding by BOM / null density
+ * @param {Uint8Array} u8
+ * @returns {string} Detected encoding
+ */
+export function sniffEncoding(u8) {
+  if (u8.length >= 2) {
+    const b0 = u8[0],
+      b1 = u8[1];
+    if (b0 === 0xff && b1 === 0xfe) return 'utf-16le';
+    if (b0 === 0xfe && b1 === 0xff) return 'utf-16be';
+  }
+  // Heuristic: lots of NULs near the start → likely UTF-16
+  let nul = 0;
+  for (let i = 0; i < Math.min(64, u8.length); i++) if (u8[i] === 0) nul++;
+  if (nul > 16) return 'utf-16le';
+  return 'utf-8';
+}
+
+/**
+ * Remove leading BOM from already-decoded JS string
+ * @param {string} str
+ * @returns {string} Cleaned string without BOM
+ */
+export function stripBOM(str) {
+  return str && str.charCodeAt(0) === 0xfeff ? str.slice(1) : str;
+}
+
+/**
+ * Decode XML/RELS content to a clean JS string.
+ * Accepts: string | Uint8Array | ArrayBuffer
+ * @param {string|Uint8Array|ArrayBuffer} content
+ * @returns {string} Clean XML string
+ */
+export function ensureXmlString(content) {
+  if (typeof content === 'string') return stripBOM(content);
+
+  // Accept: Buffer, Uint8Array, DataView, any TypedArray, or ArrayBuffer
+  let u8 = null;
+
+  if (content && typeof content === 'object') {
+    if (content instanceof Uint8Array) {
+      u8 = content;
+    } else if (typeof Buffer !== 'undefined' && Buffer.isBuffer && Buffer.isBuffer(content)) {
+      // Node Buffer
+      u8 = new Uint8Array(content.buffer, content.byteOffset, content.byteLength);
+    } else if (ArrayBuffer.isView && ArrayBuffer.isView(content)) {
+      // Any ArrayBufferView: DataView or other TypedArray
+      u8 = new Uint8Array(content.buffer, content.byteOffset, content.byteLength);
+    } else if (content.constructor && (content instanceof ArrayBuffer || content.constructor.name === 'ArrayBuffer')) {
+      u8 = new Uint8Array(content);
+    }
+  }
+
+  if (!u8) throw new Error('Unsupported content type for XML');
+
+  const enc = sniffEncoding(u8);
+  let xml = new TextDecoder(enc).decode(u8);
+  return stripBOM(xml);
+}
diff --git a/packages/super-editor/src/core/encoding-helpers.test.js b/packages/super-editor/src/core/encoding-helpers.test.js
new file mode 100644
index 000000000..7a0a154bf
--- /dev/null
+++ b/packages/super-editor/src/core/encoding-helpers.test.js
@@ -0,0 +1,142 @@
+import { describe, it, expect } from 'vitest';
+import { isXmlLike, hex, sniffEncoding, stripBOM, ensureXmlString } from './encoding-helpers.js';
+
+function utf16leWithBOM(str) {
+  const bom = Buffer.from([0xff, 0xfe]);
+  const body = Buffer.from(str, 'utf16le');
+  return Buffer.concat([bom, body]);
+}
+
+function utf16beWithBOM(str) {
+  const le = Buffer.from(str, 'utf16le');
+  const swapped = Buffer.alloc(le.length);
+  for (let i = 0; i < le.length; i += 2) {
+    swapped[i] = le[i + 1];
+    swapped[i + 1] = le[i];
+  }
+  const bom = Buffer.from([0xfe, 0xff]);
+  return Buffer.concat([bom, swapped]);
+}
+
+function noBOMUtf16leBytes(str) {
+  // UTF-16LE WITHOUT a BOM (to trigger the NUL-heuristic)
+  return Buffer.from(str, 'utf16le');
+}
+
+describe('isXmlLike', () => {
+  it('matches .xml and .rels', () => {
+    expect(isXmlLike('word/document.xml')).toBe(true);
+    expect(isXmlLike('word/_rels/document.xml.rels')).toBe(true);
+    expect(isXmlLike('docProps/core.xml')).toBe(true);
+  });
+  it('rejects non-xml', () => {
+    expect(isXmlLike('word/media/image1.png')).toBe(false);
+    expect(isXmlLike('customXml/item1.xml.bin')).toBe(false);
+    expect(isXmlLike('word/fonts/font1.odttf')).toBe(false);
+  });
+});
+
+describe('hex', () => {
+  it('renders hex dump of first N bytes', () => {
+    const u8 = new Uint8Array([0xff, 0xfe, 0x3c, 0x00, 0x3f, 0x00]);
+    expect(hex(u8, 6)).toBe('ff fe 3c 00 3f 00');
+  });
+});
+
+describe('sniffEncoding', () => {
+  it('detects UTF-16LE by BOM', () => {
+    const u8 = utf16leWithBOM('<?xml version="1.0"?>');
+    expect(sniffEncoding(u8)).toBe('utf-16le');
+  });
+  it('detects UTF-16BE by BOM', () => {
+    const u8 = utf16beWithBOM('<?xml version="1.0"?>');
+    expect(sniffEncoding(u8)).toBe('utf-16be');
+  });
+  it('defaults to utf-8 for plain ASCII/UTF-8', () => {
+    const u8 = new TextEncoder().encode('<?xml version="1.0"?><a/>');
+    expect(sniffEncoding(u8)).toBe('utf-8');
+  });
+  it('heuristically detects UTF-16 (no BOM) via NUL density', () => {
+    const u8 = noBOMUtf16leBytes('<?xml version="1.0"?><root/>');
+    // Our heuristic returns 'utf-16le' for lots of NULs
+    expect(sniffEncoding(u8)).toBe('utf-16le');
+  });
+});
+
+describe('stripBOM', () => {
+  it('removes U+FEFF if present', () => {
+    const s = '\uFEFF<?xml?><r/>';
+    expect(stripBOM(s)).toBe('<?xml?><r/>');
+  });
+  it('no-ops when no BOM present', () => {
+    const s = '<?xml?><r/>';
+    expect(stripBOM(s)).toBe(s);
+  });
+});
+
+describe('ensureXmlString', () => {
+  it('returns same string when given a plain XML string', () => {
+    const s = '<?xml version="1.0"?><r/>';
+    expect(ensureXmlString(s)).toBe(s);
+  });
+
+  it('strips leading BOM from a decoded string', () => {
+    const s = '\uFEFF<?xml version="1.0"?><r/>';
+    expect(ensureXmlString(s)).toBe('<?xml version="1.0"?><r/>');
+  });
+
+  it('decodes UTF-8 bytes', () => {
+    const u8 = new TextEncoder().encode('<?xml version="1.0"?><root>héllo</root>');
+    const out = ensureXmlString(u8);
+    expect(out).toContain('<?xml');
+    expect(out).toContain('héllo');
+  });
+
+  it('decodes UTF-16LE with BOM bytes', () => {
+    const u8 = utf16leWithBOM('<?xml version="1.0" encoding="utf-16"?><props><k>v</k></props>');
+    const out = ensureXmlString(u8);
+    expect(out.toLowerCase()).toContain('encoding="utf-16"');
+    expect(out).toContain('<props>');
+    expect(out).not.toMatch(/\u0000/);
+  });
+
+  it('decodes UTF-16BE with BOM bytes', () => {
+    const u8 = utf16beWithBOM('<?xml version="1.0" encoding="utf-16"?><props><k>v</k></props>');
+    const out = ensureXmlString(u8);
+    expect(out.toLowerCase()).toContain('encoding="utf-16"');
+    expect(out).toContain('<props>');
+    expect(out).not.toMatch(/\u0000/);
+  });
+
+  it('decodes UTF-16 (no BOM) via heuristic', () => {
+    const u8 = noBOMUtf16leBytes('<?xml version="1.0"?><root>NOBOM</root>');
+    const out = ensureXmlString(u8);
+    expect(out).toContain('<root>');
+    expect(out).toContain('NOBOM');
+    expect(out).not.toMatch(/\u0000/);
+  });
+
+  it('accepts ArrayBuffer input', () => {
+    const u8 = new TextEncoder().encode('<?xml version="1.0"?><r/>');
+    const out = ensureXmlString(u8.buffer);
+    expect(out).toContain('<r/>');
+  });
+
+  it('throws on unsupported content types', () => {
+    expect(() => ensureXmlString(12345)).toThrow(/Unsupported content type/);
+  });
+
+  it('decodes from Node Buffer (utf-8)', () => {
+    const buf = Buffer.from('<?xml version="1.0"?><root/>', 'utf8');
+    const out = ensureXmlString(buf);
+    expect(out).toContain('<root/>');
+  });
+});
+
+describe('ensureXmlString cross-env', () => {
+  it('decodes from Node Buffer (utf-8)', () => {
+    const buf = Buffer.from('<?xml version="1.0"?><root/>', 'utf8');
+    const out = ensureXmlString(buf);
+    expect(out).toContain('<root/>');
+  });
+});

From 6d09115f2bea86dc11d84a7e637d7ef897119116 Mon Sep 17 00:00:00 2001
From: Nick Bernal <nick@harbourshare.com>
Date: Tue, 2 Sep 2025 21:50:17 -0700
Subject: [PATCH 3/3] fix: imports encoded in utf-16 break DocxZipper