Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,15 @@ RUN dnf install -y \
texlive-collection-latexrecommended.noarch \
texlive-iftex.noarch \
texlive-braket.noarch \
texlive-cancel.noarch
texlive-cancel.noarch \
texlive-xecjk.noarch \
texlive-ctex.noarch

# Install Noto Sans fonts for Unicode rendering (Latin/Greek/Cyrillic + CJK)
RUN dnf install -y \
google-noto-sans-fonts \
google-noto-sans-cjk-ttc-fonts \
&& fc-cache -fv

# Copy the LaTeX template
COPY ./src/template.latex template.latex
Expand Down
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,41 @@ body:
```


## Testing

### Dependencies

In addition to the runtime dependencies above, running the full test suite locally requires:

- [Pandoc](https://pandoc.org/installing.html) — for integration tests that verify markdown → LaTeX conversion
- A TeX Live distribution with **xelatex** — for compile tests that produce real PDFs
- [poppler-utils](https://poppler.freedesktop.org/) (`pdftotext`) — for content verification of compiled PDFs

On macOS these can be installed via Homebrew:
```bash
brew install pandoc mactex poppler
brew install --cask font-noto-sans font-noto-sans-cjk
```

### Running tests

```bash
yarn test # type-check + all tests
yarn test:unit # unit and integration tests only
yarn test:types # TypeScript type-check only
```

### Test structure

| File | Type | What it tests |
|---|---|---|
| `src/utils.test.ts` | Unit | `fixInlineLatex`, `errorRefiner`, `deleteFile` — pure function logic |
| `index.test.ts` | Unit | Zod schema validation and Lambda handler routing (Pandoc mocked) |
| `src/pandoc.test.ts` | Integration | Real Pandoc: markdown → LaTeX fragment output, math, `implicit_figures`, Unicode |
| `src/compile.test.ts` | End-to-end | Full pipeline: Pandoc + `template.latex` + xelatex → PDF; content verified with `pdftotext` |

The compile tests take ~5–15 seconds each as they invoke xelatex.

## More information

https://github.com/lambda-feedback/technical-documentation/blob/main/docs/pdf_generator/index.md
132 changes: 132 additions & 0 deletions index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import * as fs from "fs";

vi.mock("fs", async (importOriginal) => {
const actual = await importOriginal<typeof import("fs")>();
return { ...actual, rm: vi.fn(), createReadStream: vi.fn() };
});

vi.mock("pdc-ts", () => ({
PdcTs: vi.fn().mockImplementation(() => ({
Execute: vi.fn().mockResolvedValue(""),
})),
}));

vi.mock("@aws-sdk/client-s3", () => ({
S3Client: vi.fn().mockImplementation(() => ({
send: vi.fn().mockResolvedValue({}),
})),
PutObjectCommand: vi.fn().mockImplementation((params: unknown) => params),
}));

import { schema, handler } from "./index";
import { PdcTs } from "pdc-ts";

describe("schema", () => {
it("validates a minimal valid PDF request", () => {
const data = [{ userId: "u1", fileName: "doc", typeOfFile: "PDF", markdown: "# Hello" }];
expect(schema.safeParse(data).success).toBe(true);
});

it("validates a TEX request with implicitFigures", () => {
const data = [{ userId: "u1", fileName: "doc", typeOfFile: "TEX", markdown: "text", implicitFigures: true }];
expect(schema.safeParse(data).success).toBe(true);
});

it("rejects request missing userId", () => {
const data = [{ fileName: "doc", typeOfFile: "PDF", markdown: "text" }];
expect(schema.safeParse(data).success).toBe(false);
});

it("rejects invalid typeOfFile value", () => {
const data = [{ userId: "u1", fileName: "doc", typeOfFile: "DOCX", markdown: "text" }];
expect(schema.safeParse(data).success).toBe(false);
});

it("rejects request missing markdown", () => {
const data = [{ userId: "u1", fileName: "doc", typeOfFile: "PDF" }];
expect(schema.safeParse(data).success).toBe(false);
});

it("rejects a non-array input", () => {
const data = { userId: "u1", fileName: "doc", typeOfFile: "PDF", markdown: "text" };
expect(schema.safeParse(data).success).toBe(false);
});

it("validates a request with a variables map", () => {
const data = [{ userId: "u1", fileName: "doc", typeOfFile: "PDF", markdown: "text", variables: { lang: "ko", CJKmainfont: "Noto Sans CJK KR" } }];
expect(schema.safeParse(data).success).toBe(true);
});

it("rejects variables with non-string values", () => {
const data = [{ userId: "u1", fileName: "doc", typeOfFile: "PDF", markdown: "text", variables: { lang: 42 } }];
expect(schema.safeParse(data).success).toBe(false);
});
});

describe("handler", () => {
beforeEach(() => {
process.env.PUBLIC_S3_BUCKET = "test-bucket";
// eslint-disable-next-line @typescript-eslint/no-explicit-any
vi.mocked(fs.createReadStream as any).mockReturnValue({} as any);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
vi.mocked(fs.rm as any).mockImplementation((...args: any[]) => {
const cb = args[args.length - 1];
if (typeof cb === "function") cb(null);
});
});

afterEach(() => {
vi.clearAllMocks();
});

it("returns 400 when event is null", async () => {
const result = await handler(null as any);
expect(result.statusCode).toBe(400);
});

it("returns 400 when payload does not match schema", async () => {
const result = await handler({ invalid: true } as any);
expect(result.statusCode).toBe(400);
});

it("returns 200 with a URL for a valid PDF request", async () => {
const event = [{ userId: "user1", fileName: "test-doc", typeOfFile: "PDF", markdown: "# Hello" }];
const result = await handler(event as any);
expect(result.statusCode).toBe(200);
const body = JSON.parse(result.body) as { url: string };
expect(body.url).toContain("test-doc.pdf");
expect(body.url).toContain("test-bucket");
});

it("returns 200 for a valid TEX request", async () => {
const event = [{ userId: "user1", fileName: "test-doc", typeOfFile: "TEX", markdown: "# Hello" }];
const result = await handler(event as any);
expect(result.statusCode).toBe(200);
});

it("passes variables as --variable flags to Pandoc", async () => {
const executeMock = vi.fn().mockResolvedValue("");
vi.mocked(PdcTs).mockImplementationOnce(() => ({ Execute: executeMock }) as any);

const event = [{ userId: "user1", fileName: "test-doc", typeOfFile: "TEX", markdown: "# Hello", variables: { lang: "ko", CJKmainfont: "Noto Sans CJK KR" } }];
await handler(event as any);

const calledArgs: string[] = executeMock.mock.calls[0]?.[0]?.pandocArgs ?? [];
expect(calledArgs).toContain("--variable=lang:ko");
expect(calledArgs).toContain("--variable=CJKmainfont:Noto Sans CJK KR");
});

it("returns 500 when Pandoc execution fails", async () => {
vi.mocked(PdcTs).mockImplementationOnce(() => ({
Execute: vi.fn()
.mockRejectedValueOnce(new Error("Pandoc error l.1 bad token"))
.mockResolvedValueOnce("line1\nline2\nline3"),
// eslint-disable-next-line @typescript-eslint/no-explicit-any
}) as any);

const event = [{ userId: "user1", fileName: "test-doc", typeOfFile: "PDF", markdown: "# Hello" }];
const result = await handler(event as any);
expect(result.statusCode).toBe(500);
});
});
7 changes: 5 additions & 2 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export const schema = z.array(
typeOfFile: TypeOfFileSchema,
markdown: z.string(),
implicitFigures: z.boolean().optional(),
variables: z.record(z.string(), z.string()).optional(),
})
);

Expand Down Expand Up @@ -147,13 +148,15 @@ export const handler = async function (
for (let eachRequestData of requestData) {
const markdown = eachRequestData.markdown;
const implicitFigures = eachRequestData.implicitFigures;
const variableArgs = Object.entries(eachRequestData.variables ?? {})
.map(([k, v]) => `--variable=${k}:${v}`);

switch (eachRequestData.typeOfFile) {
case "PDF":
const filenamePDF = `${eachRequestData.fileName}.pdf`;
const localPathPDF = `/tmp/${filenamePDF}`;
const generatePDFResult = await generateFile(
["--pdf-engine=xelatex", `--template=./template.latex`],
["--pdf-engine=xelatex", `--template=./template.latex`, ...variableArgs],
localPathPDF,
markdown,
implicitFigures
Expand All @@ -170,7 +173,7 @@ export const handler = async function (
const filenameTEX = `${eachRequestData.fileName}.tex`;
const localPathTEX = `/tmp/${filenameTEX}`;
await generateFile(
[`--template=./template.latex`],
[`--template=./template.latex`, ...variableArgs],
localPathTEX,
markdown,
implicitFigures
Expand Down
7 changes: 5 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"main": "index.js",
"scripts": {
"test:types": "tsc",
"test": "yarn test:types",
"test:unit": "vitest run",
"test": "yarn test:types && yarn test:unit",
"build": "esbuild index.ts --bundle --minify --sourcemap --platform=node --target=es2020 --outfile=dist/index.js"
},
"keywords": [],
Expand All @@ -19,6 +20,8 @@
"devDependencies": {
"@types/aws-lambda": "^8.10.137",
"@types/node": "^20.12.2",
"esbuild": "^0.20.2"
"esbuild": "^0.20.2",
"typescript": "^6.0.3",
"vitest": "^2"
}
}
87 changes: 87 additions & 0 deletions src/compile.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { describe, it, expect, afterEach } from "vitest";
import * as fs from "fs";
import * as path from "path";
import { execSync } from "child_process";
import { PdcTs } from "pdc-ts";

// End-to-end compile tests — require xelatex (TeX Live) and Pandoc on PATH.
// These run the full production pipeline: markdown → Pandoc + template.latex → xelatex → PDF.
// They are intentionally slow (~5-15s per compile).

// Absolute path so Pandoc can locate the template regardless of its working directory
const TEMPLATE = path.resolve(__dirname, "template.latex");

const pendingPdfs: string[] = [];

const compileToPdf = async (markdown: string, id: string) => {
const tmpPath = `/tmp/compile-test-${id}.pdf`;
pendingPdfs.push(tmpPath);
await new PdcTs().Execute({
from: "markdown",
to: "latex",
pandocArgs: ["--pdf-engine=xelatex", `--template=${TEMPLATE}`],
spawnOpts: { argv0: "+RTS -M512M -RTS" },
outputToFile: true,
sourceText: markdown,
destFilePath: tmpPath,
});
return tmpPath;
};

const extractText = (pdfPath: string) =>
execSync(`pdftotext "${pdfPath}" -`).toString();

afterEach(() => {
for (const p of pendingPdfs.splice(0)) {
try { fs.rmSync(p, { force: true }); } catch { /* ignore */ }
}
});

describe("PDF compile (end-to-end pipeline)", () => {
it(
"renders heading and paragraph text correctly",
async () => {
const pdf = await compileToPdf(
"# Hello\n\nThis is a test document.",
"basic"
);
const text = extractText(pdf);
expect(text).toContain("Hello");
expect(text).toContain("This is a test document");
},
{ timeout: 60_000 }
);

it(
"renders inline and display math without compilation errors",
async () => {
const pdf = await compileToPdf(
"The value is $x^2 + 1$.\n\n$$\\int_0^1 x\\, dx = \\frac{1}{2}$$",
"math"
);
// pdftotext cannot reliably extract math glyph sequences, so we verify
// the surrounding prose appears and the file is non-trivially sized
const text = extractText(pdf);
expect(text).toContain("The value is");
expect(fs.statSync(pdf).size).toBeGreaterThan(5000);
},
{ timeout: 60_000 }
);

it(
"renders Unicode Greek letters in prose correctly",
async () => {
const pdf = await compileToPdf(
"Unicode Greek: α, β, γ, Δ, Σ.\n\nDiscriminant $\\Delta = b^2 - 4ac$ where $\\alpha, \\beta \\in \\mathbb{R}$.\n\n$$\\int_{-\\infty}^{\\infty} e^{-x^2}\\, dx = \\sqrt{\\pi}$$",
"unicode"
);
const text = extractText(pdf);
// Greek letters used as prose text should survive rendering
expect(text).toContain("α");
expect(text).toContain("β");
expect(text).toContain("Δ");
expect(text).toContain("Σ");
},
{ timeout: 60_000 }
);
});
Loading
Loading