diff --git a/src/archivist/extract/index.js b/src/archivist/extract/index.js index 30d2f999b..dc62e5fe6 100644 --- a/src/archivist/extract/index.js +++ b/src/archivist/extract/index.js @@ -35,6 +35,9 @@ export default async function extract(sourceDocument) { if (sourceDocument.mimeType == mime.getType('pdf')) { return await extractFromPDF(sourceDocument); } + if (sourceDocument.mimeType == mime.getType('txt') || sourceDocument.mimeType == mime.getType('md')) { + return await extractFromMd(sourceDocument); + } return await extractFromHTML(sourceDocument); } catch (error) { @@ -126,6 +129,14 @@ export async function extractFromPDF({ location, content: pdfBuffer }) { return markdownContent; } +export async function extractFromMd({ location, content: markdownContent }) { + if (!markdownContent) { + throw new Error(`The markdown file at '${location}' contains no text`); + } + + return markdownContent; +} + function selectRange(webPageDOM, rangeSelector) { const { startBefore, startAfter, endBefore, endAfter } = rangeSelector;