Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ $$(SPHINXDIR)/$(1)/$(1).rst : $$(SRCDIR)/$(1)/$(1).tex pandocCustomization/lates
--lua-filter=$$(PANDCUST)/relink-ivoa-citations.lua \
--lua-filter=$$(PANDCUST)/autolink-docnames.lua \
--lua-filter=$$(PANDCUST)/fix_internal_refs.lua \
--lua-filter=$$(PANDCUST)/sanitize-raw-inline.lua \
--lua-filter=$$(PANDCUST)/fix-figure-media-links.lua \
--lua-filter=$$(PANDCUST)/drop-empty-inline-shells.lua \
--lua-filter=$$(PANDCUST)/drop-conformance-section.lua \
--lua-filter=$$(PANDCUST)/number-sections.lua --template=$$(PANDCUST)/default.rst\
> $$(ROOTDIR)/$$@
make -C $$(dir $$<) -f $$(ROOTDIR)/util.mk -f Makefile copyRequiredFiles TODIR=$$(ROOTDIR)/$$(dir $$@)
Expand Down
55 changes: 55 additions & 0 deletions pandocCustomization/drop-conformance-section.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
-- drop-conformance-section.lua
-- Remove the "Conformance-related definitions" boilerplate section that
-- appears in every IVOA standard. This section is standard ivoatex
-- scaffolding (RFC 2119 keyword definitions) and is not useful as rendered
-- per-document prose on the multi-document site.
--
-- The section header is identified by a case-insensitive match on the words
-- "conformance" and "definition" appearing together in the header text.
-- All blocks that belong to the matched section (from its header up to, but
-- not including, the next header at the same or higher level) are dropped.
--
-- Placement in the filter chain:
-- Run before number-sections.lua so the header text has not yet been
-- prefixed with a section number.

local function is_conformance_header(header)
local text = pandoc.utils.stringify(header.content):lower()
return text:find("conformance") and text:find("definition")
end

function Pandoc(doc)
local new_blocks = {}
local skip = false
local skip_level = nil

for _, block in ipairs(doc.blocks) do
if block.tag == "Header" then
if not skip and is_conformance_header(block) then
-- Start skipping: record the section level so we know when
-- a sibling or ancestor header ends the skipped region.
skip = true
skip_level = block.level
elseif skip and block.level <= skip_level then
-- A header at the same or higher level (numerically <=) ends
-- the conformance section; stop skipping and keep this block.
skip = false
skip_level = nil
new_blocks[#new_blocks + 1] = block
elseif not skip then
new_blocks[#new_blocks + 1] = block
end
-- (if skip and block.level > skip_level: a sub-section inside
-- the conformance section – keep skipping, do nothing)
elseif not skip then
new_blocks[#new_blocks + 1] = block
end
end

doc.blocks = pandoc.Blocks(new_blocks)
return doc
end

return {
{ Pandoc = Pandoc }
}
56 changes: 56 additions & 0 deletions pandocCustomization/drop-empty-inline-shells.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
-- drop-empty-inline-shells.lua
-- Remove inline container elements that carry no visible content.
--
-- Empty containers arise from LaTeX constructs such as \emph{}, \textbf{},
-- or empty groupings left over from macro expansion. They produce stray
-- whitespace or orphan punctuation in the rendered HTML output.
--
-- An element is treated as empty when its content list contains only Space,
-- SoftBreak, LineBreak, or whitespace-only Str nodes.
--
-- Handled element types:
-- Span, Emph, Strong, Strikeout, Superscript, Subscript, SmallCaps
--
-- Placement in the filter chain:
-- Run after fix_internal_refs.lua and before number-sections.lua.

local function is_whitespace_only(inlines)
-- Note: %s in Lua patterns matches ASCII whitespace only (space, tab,
-- newline, etc.). Unicode whitespace (e.g., U+00A0 non-breaking space)
-- will not be treated as empty, so a Span containing only a non-breaking
-- space will be kept rather than dropped. This is acceptable for current
-- IVOA standards content; add a UTF-8 check here if Unicode-only whitespace
-- containers become a problem in the future.
for _, el in ipairs(inlines) do
local t = el.tag
if t == "Space" or t == "SoftBreak" or t == "LineBreak" then
-- acceptable whitespace node
elseif t == "Str" then
if not el.text:match("^%s*$") then
return false
end
else
return false
end
end
return true
end

local function drop_if_empty(el)
if not el.content then return end
if #el.content == 0 or is_whitespace_only(el.content) then
return {}
end
end

return {
{
Span = drop_if_empty,
Emph = drop_if_empty,
Strong = drop_if_empty,
Strikeout = drop_if_empty,
Superscript = drop_if_empty,
Subscript = drop_if_empty,
SmallCaps = drop_if_empty,
}
}
50 changes: 50 additions & 0 deletions pandocCustomization/fix-figure-media-links.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
-- fix-figure-media-links.lua
-- Replace Image elements whose source file is a PDF with an RST hyperlink,
-- since browsers cannot render PDFs as inline images.
--
-- Policy: PDF image src -> RST anonymous reference link to the PDF file.
-- The image alt-text (or the bare filename when alt is empty) is used as the
-- visible link label.
--
-- Handles both:
-- * Inline images inside Para elements
-- * Block images inside Figure elements (the Image node is reached in both
-- cases because Pandoc walks all Image elements regardless of context)
--
-- Placement in the filter chain:
-- Run after fix_internal_refs.lua and before number-sections.lua.

local function is_pdf(src)
return src:match("%.[Pp][Dd][Ff]$") ~= nil
end

local function basename(path)
return path:match("([^/\\]+)$") or path
end

-- Escape backticks in RST link label text to avoid breaking the reference syntax.
local function escape_rst_label(s)
return s:gsub("`", "\\`")
end

-- Escape characters in a URL that would break RST angle-bracket delimiters.
local function escape_rst_url(s)
return s:gsub(">", "%%3E")
end

function Image(el)
if not is_pdf(el.src) then return end

local alt = pandoc.utils.stringify(el.caption)
if alt == "" then
alt = basename(el.src)
end

-- Emit an RST anonymous reference so the PDF is reachable as a link.
local rst = "`" .. escape_rst_label(alt) .. " <" .. escape_rst_url(el.src) .. ">`__"
return pandoc.RawInline("rst", rst)
end

return {
{ Image = Image }
}
10 changes: 2 additions & 8 deletions pandocCustomization/relink-ivoa-citations.lua
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,8 @@ function Cite(c)
outstring = ":cite:`" .. doc_to_bib[std_docname] .. "`"
end
elseif bibkey and bibmap[bibkey] then
-- TODO - is this the best way to present?
if(v.text:find("citep")) then
local citetext = "ref"
outstring = ":doc:`".. citetext .." <../" .. bibmap[bibkey] .. "/" .. bibmap[bibkey] ..">`"
else
local citetext = bibmap[bibkey]
outstring = ":doc:`".. citetext .." <../" .. bibmap[bibkey] .. "/" .. bibmap[bibkey] ..">`"
end
local citetext = bibmap[bibkey]
outstring = ":doc:`".. citetext .." <../" .. bibmap[bibkey] .. "/" .. bibmap[bibkey] ..">`"
else

outstring = v.text:gsub("\\cite([pt]){([^}]+)}",":cite:%1:`%2`")
Expand Down
109 changes: 109 additions & 0 deletions pandocCustomization/sanitize-raw-inline.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
-- sanitize-raw-inline.lua
-- Strip or convert residual LaTeX control sequences that survive pandoc's
-- latex+raw_tex pass as RawInline("latex", ...) nodes.
--
-- Placement in the filter chain:
-- Run AFTER fix_internal_refs.lua so that \ref{} and \label{} have already
-- been converted to RST and will not be seen here.
--
-- Policy:
-- * Whitespace/layout commands -> removed (return {})
-- * Known text-expansion macros -> literal text
-- * \index{...}, \phantom{...} -> removed
-- * \hspace{...}, \vspace{...} -> removed
-- * \url{...} -> RST anonymous hyperlink
-- * \href{url}{text} -> RST anonymous hyperlink
-- * Anything else -> left unchanged (preserve unknown macros)

-- Commands that produce no visible output and should be silently dropped.
local DROP_COMMANDS = {
["\\noindent"] = true,
["\\par"] = true,
["\\clearpage"] = true,
["\\newpage"] = true,
["\\linebreak"] = true,
["\\hfill"] = true,
["\\vfill"] = true,
["\\newline"] = true,
["\\centering"] = true,
["\\raggedright"]= true,
["\\raggedleft"] = true,
["\\null"] = true,
["\\relax"] = true,
["\\-"] = true,
}

-- Commands that expand to a fixed literal string.
local EXPAND_COMMANDS = {
["\\TeX"] = "TeX",
["\\LaTeX"] = "LaTeX",
["\\BibTeX"] = "BibTeX",
["\\textbackslash"]= "\\",
["\\ldots"] = "…", -- U+2026 HORIZONTAL ELLIPSIS
["\\dots"] = "…", -- U+2026 HORIZONTAL ELLIPSIS
["\\lq"] = "\xe2\x80\x98", -- U+2018 LEFT SINGLE QUOTATION MARK (')
["\\rq"] = "\xe2\x80\x99", -- U+2019 RIGHT SINGLE QUOTATION MARK (')
}

function RawInline(el)
if el.format ~= "latex" then return end
local text = el.text

-- 1. Drop pure layout/whitespace macros (bare command, optional trailing *)
local cmd = text:match("^(\\%a+)%*?%s*$") or text:match("^(\\%-)$")
if cmd and DROP_COMMANDS[cmd] then
return {}
end

-- 2. Text-expansion macros (bare, possibly followed by {} or whitespace)
-- The third pattern intentionally omits an end-anchor so that a macro
-- immediately followed by punctuation (e.g. "\LaTeX.") is still matched;
-- the suffix is preserved and appended to the expansion below.
local bare = text:match("^(\\%a+)%s*%{%}$")
or text:match("^(\\%a+)%s*$")
or text:match("^(\\%a+)[%s%p]")
if bare and EXPAND_COMMANDS[bare] then
local suffix = text:sub(#bare + 1)
local expansion = EXPAND_COMMANDS[bare]
-- Drop trailing {} or whitespace (TeX swallows a space after control words)
if suffix == "" or suffix:match("^%{%}") or suffix:match("^%s") then
return pandoc.Str(expansion)
end
return pandoc.Str(expansion .. suffix)
end

-- 3. \index{...} and \phantom{...} – drop silently (no visible output)
if text:match("^\\index%s*%{") or text:match("^\\phantom%s*%{") then
return {}
end

-- 4. \hspace{...} / \vspace{...} (with or without *)
if text:match("^\\[hv]space%*?%s*%{") then
return {}
end

-- 5. \url{...} -> RST anonymous hyperlink
-- Note: [^}]+ does not handle braces inside the URL (encoded as %7B/%7D
-- or unencoded). IVOA standard sources use plain URLs without braces,
-- so this is an accepted limitation.
local url = text:match("^\\url%s*%{([^}]+)%}$")
if url then
return pandoc.RawInline("rst", "`" .. url .. " <" .. url .. ">`__")
end

-- 6. \href{url}{text} -> RST anonymous hyperlink
-- Note: [^}]+ / [^}]* do not handle braces in either the URL or the link
-- text. IVOA standard sources do not use braces in \href arguments, so
-- this is an accepted limitation.
local href_url, href_text = text:match("^\\href%s*%{([^}]+)%}%s*%{([^}]*)%}$")
if href_url then
if href_text == "" then href_text = href_url end
return pandoc.RawInline("rst", "`" .. href_text .. " <" .. href_url .. ">`__")
end

-- Everything else: leave unchanged so unknown macros are preserved
end

return {
{ RawInline = RawInline }
}
Loading