Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/export/packer/next-compiler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import xml from "xml";

import { File } from "@file/file";
import { obfuscate } from "@file/fonts/obfuscate-ttf-to-odttf";
import { encodeUtf8 } from "@util/convenience-functions";

import { Formatter } from "../formatter";
import { ImageReplacer } from "./image-replacer";
Expand Down Expand Up @@ -130,15 +131,15 @@ export class Compiler {
for (const [, obj] of map) {
if (Array.isArray(obj)) {
for (const subFile of obj as readonly IXmlifyedFile[]) {
zip.file(subFile.path, subFile.data);
zip.file(subFile.path, encodeUtf8(subFile.data));
}
} else {
zip.file((obj as IXmlifyedFile).path, (obj as IXmlifyedFile).data);
zip.file((obj as IXmlifyedFile).path, encodeUtf8((obj as IXmlifyedFile).data));
}
}

for (const subFile of overrides) {
zip.file(subFile.path, subFile.data);
zip.file(subFile.path, encodeUtf8(subFile.data));
}

for (const data of file.Media.Array) {
Expand Down
4 changes: 2 additions & 2 deletions src/patcher/from-docx.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import { IMediaData, Media } from "@file/media";
import { ConcreteHyperlink, ExternalHyperlink, ParagraphChild } from "@file/paragraph";
import { TargetModeType } from "@file/relationships/relationship/relationship";
import { IContext } from "@file/xml-components";
import { uniqueId } from "@util/convenience-functions";
import { encodeUtf8, uniqueId } from "@util/convenience-functions";
import { OutputByType, OutputType } from "@util/output-type";

import { appendContentType } from "./content-types-manager";
Expand Down Expand Up @@ -382,7 +382,7 @@ export const patchDocument = async <T extends PatchDocumentOutputType = PatchDoc
for (const [key, value] of map) {
const output = toXml(value);

zip.file(key, output);
zip.file(key, encodeUtf8(output));
}

for (const [key, value] of binaryContentMap) {
Expand Down
45 changes: 45 additions & 0 deletions src/util/convenience-functions.spec.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import { describe, expect, it } from "vitest";

// cspell:words CESU

import {
abstractNumUniqueNumericIdGen,
bookmarkUniqueNumericIdGen,
concreteNumUniqueNumericIdGen,
convertInchesToTwip,
convertMillimetersToTwip,
docPropertiesUniqueNumericIdGen,
encodeUtf8,
hashedId,
uniqueId,
uniqueNumericIdCreator,
Expand Down Expand Up @@ -98,4 +101,46 @@ describe("Utility", () => {
expect(uniqueUuid()).to.not.be.empty;
});
});

describe("#encodeUtf8", () => {
it("should encode ASCII strings correctly", () => {
const result = encodeUtf8("hello");
expect(result).to.have.lengthOf(5);
expect(Array.from(result)).to.deep.equal([0x68, 0x65, 0x6c, 0x6c, 0x6f]);
});

it("should encode multi-byte characters correctly", () => {
// "é" is U+00E9, encoded as C3 A9 in UTF-8
const result = encodeUtf8("é");
expect(Array.from(result)).to.deep.equal([0xc3, 0xa9]);
});

it("should encode emoji (surrogate pairs) correctly as 4-byte UTF-8", () => {
// U+1F600 (😀) should be encoded as F0 9F 98 80, NOT as CESU-8 (ED xx xx ED xx xx)
const emoji = String.fromCodePoint(0x1f600);
const result = encodeUtf8(emoji);

// Verify it's 4 bytes (proper UTF-8), not 6 bytes (CESU-8)
expect(result).to.have.lengthOf(4);
expect(Array.from(result)).to.deep.equal([0xf0, 0x9f, 0x98, 0x80]);
});

it("should encode Material Design Icons (high code points) correctly", () => {
// U+F0219 is a Material Design Icon, encoded as F3 B0 88 99 in UTF-8
const icon = String.fromCodePoint(0xf0219);
const result = encodeUtf8(icon);

// Verify it's 4 bytes (proper UTF-8), not 6 bytes (CESU-8)
expect(result).to.have.lengthOf(4);
expect(Array.from(result)).to.deep.equal([0xf3, 0xb0, 0x88, 0x99]);
});

it("should encode mixed content with astral characters correctly", () => {
const mixed = "Hello 😀 World";
const result = encodeUtf8(mixed);

// "Hello " (6 bytes) + 😀 (4 bytes) + " World" (6 bytes) = 16 bytes
expect(result).to.have.lengthOf(16);
});
});
});
16 changes: 16 additions & 0 deletions src/util/convenience-functions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,19 @@ const generateUuidPart = (count: number): string => customAlphabet("1234567890ab
*/
export const uniqueUuid = (): string =>
`${generateUuidPart(8)}-${generateUuidPart(4)}-${generateUuidPart(4)}-${generateUuidPart(4)}-${generateUuidPart(12)}`;

/**
* Encode a string to UTF-8 bytes.
*
* This is used to pre-encode XML content before passing to JSZip,
* which avoids a bug where JSZip's string chunking can split UTF-16
* surrogate pairs for characters above U+FFFF (like emoji).
*
* The copy via `new Uint8Array()` ensures the returned array uses the
* current module's Uint8Array constructor, avoiding cross-realm issues
* in test environments (jsdom) where TextEncoder returns a different
* realm's Uint8Array that fails JSZip's instanceof checks.
*
* @see https://github.com/Stuk/jszip/pull/963
*/
export const encodeUtf8 = (str: string): Uint8Array => new Uint8Array(new TextEncoder().encode(str));
Loading