Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ npx docusaurus-pdf from-build-config
- Optional: If you have a `baseUrl` configured in your `docusaurus.config.js` then pass this value as `baseUrl`.
- Note: There is a optional parameter to set a custom filename. You can see further details using `npx docusaurus-pdf from-build --help`.

### Table of Contents
A table of contents will be generated wherever you place a `<toc>` tag. The items in the table of contents will be indented according to their header size.
The header will have class `toc-header`, the ul element will have class `toc-list`, and the individual li elements will have class `toc-item`.

## Docker usage
All dependencies needed to create a PDF from your docusaurus site are bundled in our Dockerfile.

Expand Down
75 changes: 38 additions & 37 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
"chalk": "^3.0.0",
"commander": "^4.1.1",
"express": "^4.17.1",
"pdf-lib": "^1.7.0",
"puppeteer": "^2.1.1"
},
"files": [
Expand Down
103 changes: 71 additions & 32 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,11 @@ import chalk = require("chalk");
import puppeteer = require("puppeteer");
import express = require("express");
import { AddressInfo } from "net";
import { PDFDocument } from "pdf-lib";

import * as fs from "fs";
import * as path from "path";

const generatedPdfBuffers: Array<Buffer> = [];

async function mergePdfBuffers(pdfBuffers: Array<Buffer>) {
const outDoc = await PDFDocument.create();
for (const pdfBuffer of pdfBuffers) {
const docToAdd = await PDFDocument.load(pdfBuffer);
const pages = await outDoc.copyPages(docToAdd, docToAdd.getPageIndices());
for (const page of pages) {
outDoc.addPage(page);
}
}

return outDoc.save();
}
const htmlList: Array<string> = [];

const getURL = (origin: string, filePath: string) => {
return origin + "/" + filePath.substring(filePath.startsWith("/") ? 1 : 0);
Expand Down Expand Up @@ -112,10 +98,12 @@ export async function generatePdf(
let scriptPath = "";

let nextPageUrl = initialDocsUrl;
const headers: Array<{ header: string; level: number; id: string }> = [];
let tocLocation = null;

while (nextPageUrl) {
console.log();
console.log(chalk.cyan(`Generating PDF of ${nextPageUrl}`));
console.log(chalk.cyan(`Retrieving html from ${nextPageUrl}`));
console.log();

await page
Expand All @@ -126,8 +114,8 @@ export async function generatePdf(
throw new Error(
`Page could not be loaded! Did not get any HTML for ${nextPageUrl}`
);
stylePath = getStylesheetPathFromHTML(html, origin);
scriptPath = getScriptPathFromHTML(html, origin);
if (!stylePath) stylePath = getStylesheetPathFromHTML(html, origin);
if (!scriptPath) scriptPath = getScriptPathFromHTML(html, origin);
});

try {
Expand All @@ -141,28 +129,79 @@ export async function generatePdf(
nextPageUrl = "";
}

const html = await page.$eval("article", (element) => {
let html = await page.$eval("article", (element) => {
return element.outerHTML;
});

await page.setContent(html);
await page.addStyleTag({ url: stylePath });
await page.addScriptTag({ url: scriptPath });
const pdfBuffer = await page.pdf({
path: "",
format: "A4",
printBackground: true,
margin: { top: 25, right: 35, left: 35, bottom: 25 },
});
const tocMatch = html.match(/(<toc><\/toc>)|(<toc\/>)/);

if (tocMatch && tocLocation == null) {
htmlList.push(html.slice(0, tocMatch.index));
tocLocation = htmlList.length;
html = html.slice(tocMatch.index);
}

generatedPdfBuffers.push(pdfBuffer);
if (tocLocation !== null) {
// parse headers in html for table of contents
html = html.replace(/<h[1-6](.+?)<\/h[1-6]( )*>/g, (str) => {
// docusaurus inserts #s into headers for direct links to the header
const headerText = str
.replace(/<a[^>]*>#<\/a( )*>/g, "")
.replace(/<[^>]*>/g, "")
.trim();
const headerId = `${Math.random().toString(36).substr(2, 5)}-${
headers.length
}`;
headers.push({
header: headerText,
level: Number(str[str.indexOf("h") + 1]),
id: headerId,
});

const text = str.replace(/<h[1-6].*?>/g, (header) => {
if (header.match(/id( )*=( )*"/g)) {
return header.replace(/id( )*=( )*"/g, `id="${headerId} `);
} else {
return (
header.substring(0, header.length - 1) + ` id="${headerId}">`
);
}
});
return text;
});
}

htmlList.push(html);
console.log(chalk.green("Success"));
}
await browser.close();

const mergedPdfBuffer = await mergePdfBuffers(generatedPdfBuffers);
fs.writeFileSync(`${filename}`, mergedPdfBuffer);
if (tocLocation !== null) {
const toc = headers
.map(
(header) =>
`<li class="toc-item" style="margin-left:${
(header.level - 1) * 20
}px"><a href="#${header.id}">${header.header}</a></li>`
)
.join("\n");
htmlList.splice(
tocLocation,
0,
`<h2 class="toc-header">Table of contents:</h2><ul class="toc-list">${toc}</ul>`
);
}

await page.setContent(htmlList.join("\n"));
await page.addStyleTag({ url: stylePath });
await page.addScriptTag({ url: scriptPath });
await page.pdf({
path: filename,
format: "A4",
printBackground: true,
margin: { top: 25, right: 35, left: 35, bottom: 25 },
});

await browser.close();
}

interface LoadedConfig {
Expand Down