diff --git a/build.boot b/build.boot index 87c569fb..ca0f5820 100644 --- a/build.boot +++ b/build.boot @@ -11,7 +11,8 @@ [gravatar "1.1.1" :scope "test"] [clj-time "0.12.0" :scope "test"] [mvxcvi/puget "1.0.0" :scope "test"] - [com.novemberain/pantomime "2.8.0" :scope "test"]]) + [com.novemberain/pantomime "2.8.0" :scope "test"] + [org.asciidoctor/asciidoctorj "1.5.4.1" :scope "test"]]) (require '[adzerk.bootlaces :refer :all]) diff --git a/src/io/perun.clj b/src/io/perun.clj index d57c53ca..f76e2599 100644 --- a/src/io/perun.clj +++ b/src/io/perun.clj @@ -103,35 +103,52 @@ (perun/set-meta fileset updated-files) (commit fileset tmp)))) -(def ^:private markdown-deps - '[[org.pegdown/pegdown "1.6.0"] - [circleci/clj-yaml "0.5.5"]]) - -(deftask markdown - "Parse markdown files - - This task will look for files ending with `md` or `markdown` +(defmulti content-parser + "Extension point for the `content` task + Takes the language to be parsed and a map of options. + Returns a map with the following keys: + - `:file-exts` A vector of file extensions, on which the parser will operate + - `:parse-form` A fn that takes a list of files to parse, and returns a form that will be evaled to perform the parsing + - `:pod` (optional) A pod that contains dependencies that the parser needs" + (fn [language _] language)) + +(defmethod content-parser :default + [language _] + (perun/report-info "content" "Unknown content language %s" language)) + +(def ^:private +content-defaults+ + {:languages [:markdown]}) + +(deftask content + "Parse content files + + This task will look for file types that perun can parse and add a `:content` key to their metadata containing the - HTML resulting from processing markdown file's content" - [o options OPTS edn "options to be passed to the markdown parser"] - (let [pod (create-pod markdown-deps) + HTML resulting from processing the file's content" + [l languages LANGUAGES [kw] "languages to parse (default: `[:markdown]`)" + p parse-options PARSEOPTS edn "options to pass to underlying parsers (ex: {:markdown {:smarts true}})"] + (let [options (merge +content-defaults+ *opts*) + parsers (doall (map #(content-parser % (% parse-options)) (:languages options))) prev-meta (atom {}) prev-fs (atom nil)] (boot/with-pre-wrap fileset - (let [md-files (->> fileset - (boot/fileset-diff @prev-fs) - boot/user-files - (boot/by-ext ["md" "markdown"]) - add-filedata) - ; process all removed markdown files + (let [updated-files (mapcat + (fn [{:keys [file-exts parse-form pod]}] + (let [files (->> fileset + (boot/fileset-diff @prev-fs) + boot/user-files + (boot/by-ext file-exts) + add-filedata)] + (if pod + (pod/with-call-in @pod ~(parse-form files)) + (eval (parse-form files))))) + parsers) removed? (->> fileset (boot/fileset-removed @prev-fs) boot/user-files - (boot/by-ext ["md" "markdown"]) + (boot/by-ext (mapcat :file-exts parsers)) (map #(boot/tmp-path %)) set) - updated-files (pod/with-call-in @pod - (io.perun.markdown/parse-markdown ~md-files ~options)) initial-metadata (perun/merge-meta* (perun/get-meta fileset) @prev-meta) ; Pure merge instead of `merge-with merge` (meta-meta). ; This is because updated metadata should replace previous metadata to @@ -142,6 +159,26 @@ (reset! prev-meta final-metadata) (perun/set-meta fileset final-metadata))))) +(def ^:private markdown-deps + '[[org.pegdown/pegdown "1.6.0"] + [circleci/clj-yaml "0.5.5"]]) + +(defmethod content-parser :markdown + [_ options] + {:file-exts ["md" "markdown"] + :parse-form (fn [files] `(io.perun.content.markdown/parse-markdown ~files ~options)) + :pod (create-pod markdown-deps)}) + +(def ^:private asciidoc-deps + '[[org.asciidoctor/asciidoctorj "1.5.4.1"] + [circleci/clj-yaml "0.5.5"]]) + +(defmethod content-parser :asciidoc + [_ options] + {:file-exts ["adoc" "asciidoc"] + :parse-form (fn [files] `(io.perun.content.asciidoc/parse-asciidoc ~files ~options)) + :pod (create-pod asciidoc-deps)}) + (deftask global-metadata "Read global metadata from `perun.base.edn` or configured file. diff --git a/src/io/perun/content.clj b/src/io/perun/content.clj new file mode 100644 index 00000000..07d79055 --- /dev/null +++ b/src/io/perun/content.clj @@ -0,0 +1,50 @@ +(ns io.perun.content + (:require [clojure.string :as str] + [clj-yaml.core :as yaml] + [clojure.walk :as walk]) + (:import [flatland.ordered.map OrderedMap] + [flatland.ordered.set OrderedSet])) + +(def ^:dynamic *yaml-head* #"---\r?\n") + +(defn substr-between + "Find string that is nested in between two strings. Return first match. + Copied from https://github.com/funcool/cuerdas" + [s prefix suffix] + (cond + (nil? s) nil + (nil? prefix) nil + (nil? suffix) nil + :else + (some-> s + (str/split prefix) + second + (str/split suffix) + first))) + +(defn normal-colls + "Clj-yaml keeps order of map properties by using ordered maps. These are inconvenient + for us as the ordered library is not necessarily available in other pods." + [x] + (walk/postwalk + (fn [y] + (cond + (instance? OrderedMap y) (into {} y) + (instance? OrderedSet y) (into #{} y) + :else y)) + x)) + +(defn parse-file-metadata [file-content] + (if-let [metadata-str (substr-between file-content *yaml-head* *yaml-head*)] + (if-let [parsed-yaml (normal-colls (yaml/parse-string metadata-str))] + ; we use `original` file flag to distinguish between generated files + ; (e.x. created those by plugins) + (assoc parsed-yaml :original true) + {:original true}) + {:original true})) + +(defn remove-metadata [content] + (let [splitted (str/split content *yaml-head* 3)] + (if (> (count splitted) 2) + (first (drop 2 splitted)) + content))) diff --git a/src/io/perun/content/asciidoc.clj b/src/io/perun/content/asciidoc.clj new file mode 100644 index 00000000..7880d5a8 --- /dev/null +++ b/src/io/perun/content/asciidoc.clj @@ -0,0 +1,51 @@ +(ns io.perun.content.asciidoc + (:require [io.perun.core :as perun] + [io.perun.content :as content] + [clojure.java.io :as io]) + (:import [org.asciidoctor Asciidoctor$Factory])) + +;; Copied from https://github.com/ruedigergad/clj-assorted-utils/blob/master/src/clj_assorted_utils/util.clj +(defn convert-from-clojure-to-java + "Converts the given Clojure specific data structure (list, map, set, vector) into the equivalent \"pure\" Java data structure. + The mapping is as follows: list and vector -> ArrayList, map -> HashMap, set -> HashSet. + Nested data structures will be converted recursively." + [input] + (cond + (or + (list? input) + (vector? input)) (let [out (java.util.ArrayList.)] + (doseq [in-element input] + (if (coll? in-element) + (.add out (convert-from-clojure-to-java in-element)) + (.add out in-element))) + out) + (map? input) (let [out (java.util.HashMap.)] + (doseq [in-element input] + (if (coll? (val in-element)) + (.put out (key in-element) (convert-from-clojure-to-java (val in-element))) + (.put out (key in-element) (val in-element)))) + out) + (set? input) (let [out (java.util.HashSet.)] + (doseq [in-element input] + (if (coll? in-element) + (.add out (convert-from-clojure-to-java in-element)) + (.add out in-element))) + out))) + +(defn asciidoc-to-html [file-content options] + (let [processor (Asciidoctor$Factory/create) + asciidoc-content (content/remove-metadata file-content)] + (.convert processor asciidoc-content (or (convert-from-clojure-to-java options) + (java.util.HashMap.))))) + +(defn process-file [file options] + (perun/report-debug "content" "processing asciidoc" (:filename file)) + (let [file-content (-> file :full-path io/file slurp) + adoc-metadata (content/parse-file-metadata file-content) + html (asciidoc-to-html file-content options)] + (merge adoc-metadata {:content html} file))) + +(defn parse-asciidoc [asciidoc-files options] + (let [updated-files (doall (map #(process-file % options) asciidoc-files))] + (perun/report-info "content" "parsed %s asciidoc files" (count asciidoc-files)) + updated-files)) diff --git a/src/io/perun/markdown.clj b/src/io/perun/content/markdown.clj similarity index 52% rename from src/io/perun/markdown.clj rename to src/io/perun/content/markdown.clj index 0af1fc4c..ba296c4d 100644 --- a/src/io/perun/markdown.clj +++ b/src/io/perun/content/markdown.clj @@ -1,12 +1,8 @@ -(ns io.perun.markdown - (:require [io.perun.core :as perun] - [clojure.java.io :as io] - [clojure.string :as str] - [clj-yaml.core :as yaml] - [clojure.walk :as walk]) - (:import [org.pegdown PegDownProcessor Extensions] - [flatland.ordered.map OrderedMap] - [flatland.ordered.set OrderedSet])) +(ns io.perun.content.markdown + (:require [io.perun.core :as perun] + [io.perun.content :as content] + [clojure.java.io :as io]) + (:import [org.pegdown PegDownProcessor Extensions])) ;; Extension handling has been copied from endophile.core ;; See https://github.com/sirthias/pegdown/blob/master/src/main/java/org/pegdown/Extensions.java @@ -35,8 +31,6 @@ :all-optionals Extensions/ALL_OPTIONALS :all-with-optionals Extensions/ALL_WITH_OPTIONALS}) -(def ^:dynamic *yaml-head* #"---\r?\n") - (defn extensions-map->int [opts] (->> opts (merge {:autolinks true @@ -49,63 +43,21 @@ (apply bit-or 0) int)) -(defn substr-between - "Find string that is nested in between two strings. Return first match. - Copied from https://github.com/funcool/cuerdas" - [s prefix suffix] - (cond - (nil? s) nil - (nil? prefix) nil - (nil? suffix) nil - :else - (some-> s - (str/split prefix) - second - (str/split suffix) - first))) - -(defn normal-colls - "Clj-yaml keeps order of map properties by using ordered maps. These are inconvenient - for us as the ordered library is not necessarily available in other pods." - [x] - (walk/postwalk - (fn [y] - (cond - (instance? OrderedMap y) (into {} y) - (instance? OrderedSet y) (into #{} y) - :else y)) - x)) - -(defn parse-file-metadata [file-content] - (if-let [metadata-str (substr-between file-content *yaml-head* *yaml-head*)] - (if-let [parsed-yaml (normal-colls (yaml/parse-string metadata-str))] - ; we use `original` file flag to distinguish between generated files - ; (e.x. created those by plugins) - (assoc parsed-yaml :original true) - {:original true}) - {:original true})) - -(defn remove-metadata [content] - (let [splitted (str/split content *yaml-head* 3)] - (if (> (count splitted) 2) - (first (drop 2 splitted)) - content))) - (defn markdown-to-html [file-content options] (let [processor (PegDownProcessor. (extensions-map->int (:extensions options)))] (->> file-content - remove-metadata + content/remove-metadata char-array (.markdownToHtml processor)))) (defn process-file [file options] - (perun/report-debug "markdown" "processing markdown" (:filename file)) + (perun/report-debug "content" "processing markdown" (:filename file)) (let [file-content (-> file :full-path io/file slurp) - md-metadata (parse-file-metadata file-content) + md-metadata (content/parse-file-metadata file-content) html (markdown-to-html file-content options)] (merge md-metadata {:content html} file))) (defn parse-markdown [markdown-files options] (let [updated-files (doall (map #(process-file % options) markdown-files))] - (perun/report-info "markdown" "parsed %s markdown files" (count markdown-files)) + (perun/report-info "content" "parsed %s markdown files" (count markdown-files)) updated-files))