Skip to content

Commit 7a7689f

Browse files
committed
feat(Djot): Support Lua filters for altering the AST before processing
Exposed as a "filter" option on the Djot inputter.
1 parent c55beb8 commit 7a7689f

File tree

3 files changed

+222
-4
lines changed

3 files changed

+222
-4
lines changed

examples/custom-filter.lua

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
--- An example Lua filter for Djot.
2+
--
3+
-- This filter does two things in sequence:
4+
--
5+
-- - On both spans and strings, it re-maps some classes to custom styles.
6+
-- - On strings only, it transforms numbers with the class "siecle" into roman numerals with a superscript "e", as per French typographic conventions.
7+
--
8+
-- @license MIT
9+
-- @copyright (c) 2025 Omikhleia / Didier Willis
10+
11+
-- luacheck: globals djot
12+
13+
local CLASS2STYLE = {
14+
software = "Software",
15+
hardware = "Hardware",
16+
}
17+
18+
local function classToStyle (e)
19+
if e.attr and e.attr['class'] then
20+
local styles = {}
21+
local classes = pl.Set(pl.stringx.split(e.attr['class']))
22+
for class, style in pairs(CLASS2STYLE) do
23+
if classes[class] then
24+
styles[#styles+1] = style
25+
classes[class] = nil
26+
end
27+
end
28+
if #styles > 0 then
29+
if #styles > 1 then
30+
SU.warn("Multiple styles implied by classes, using the first one '" .. styles[1] .. "'")
31+
end
32+
if e.attr['custom-style'] then
33+
SU.warn("Ignoring custom-style '" .. e.attr['custom-style'] .. "' because class implies style '" .. styles[1] .. "'")
34+
end
35+
e.attr['custom-style'] = styles[1]
36+
e.attr.class = table.concat(pl.Set.values(classes), " ") -- Unused classes are kept
37+
end
38+
end
39+
end
40+
41+
local function numberToRoman (num)
42+
local val = { 1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1 }
43+
local syms = { "m", "cm", "d", "cd", "c", "xc", "l", "xl", "x", "ix", "v", "iv", "i" }
44+
local roman = ""
45+
for i = 1, #val do
46+
while num >= val[i] do
47+
num = num - val[i]
48+
roman = roman .. syms[i]
49+
end
50+
end
51+
return roman
52+
end
53+
54+
return {
55+
-- A first filter that maps classes to custom styles on spans and strings.
56+
-- Ex. [Pandoc]{.software} --> equivalent to [Pandoc]{custom-style="Software"}
57+
{
58+
span = function(e)
59+
classToStyle(e)
60+
end,
61+
str = function(e)
62+
classToStyle(e)
63+
end,
64+
},
65+
-- A second filter that transforms numbers with the class "siecle" into small caps roman
66+
-- numerals with a superscript "e", as per French typographic conventions.
67+
-- Ex. 21{.siecle} --> equivalent to xxi{.smallcaps}^e^
68+
-- It somewhat specific to French, and we should rather delegate to a SILE command,
69+
-- but it's a good example of AST manipulation.
70+
{
71+
str = function(e)
72+
if e.attr and e.attr['class'] and tonumber(e.text) then
73+
local classes = pl.Set(pl.stringx.split(e.attr['class']))
74+
if not classes["siecle"] then
75+
return -- Nothing to do
76+
end
77+
-- Unused classes are kept
78+
classes["siecle"] = nil
79+
e.attr['class'] = table.concat(pl.Set.values(classes), " ")
80+
local num = tonumber(e.text)
81+
82+
-- Create the roman numeral
83+
local century = djot.ast.new_node("str")
84+
century.text = numberToRoman(num)
85+
century.attr = djot.ast.new_attributes({ class = "smallcaps" })
86+
-- Create the superscript "e"
87+
local exp = djot.ast.new_node("str")
88+
exp.text = num == 1 and "er" or "e"
89+
-- Create the superscript node
90+
local super = djot.ast.new_node("superscript")
91+
super.children = { exp }
92+
-- Transform the original str node into a span
93+
e.tag = "span"
94+
e.text = nil
95+
e.children = { century, super }
96+
end
97+
end,
98+
}
99+
}

inputters/djot.lua

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
-- Using the djot Lua library for parsing.
44
-- Reusing the common commands initially made for the "markdown" inputter/package.
55
--
6-
-- @copyright License: MIT (c) 2023-2024 Omikhleia, Didier Willis
6+
-- @copyright License: MIT (c) 2023-2025 Omikhleia, Didier Willis
77
-- @module inputters.djot
88
--
99
local utils = require("packages.markdown.utils")
@@ -775,6 +775,25 @@ function inputter.appropriate (round, filename, _)
775775
return false
776776
end
777777

778+
function inputter:_loadFilter (name, env)
779+
-- Resolve the Lua file name in the same way as SILE for documents in the user tree.
780+
local qname = name:match("%.lua$") and name or (name .. ".lua")
781+
local filename = SILE.resolveFile(qname)
782+
if filename then
783+
local filter, err = utils.sandboxedLoadfile(filename, env)
784+
if not filter then
785+
SU.error("Failure loading filter '".. name .. "': " .. err)
786+
end
787+
if #filter == 0 then
788+
-- Just a single filter.
789+
-- (The Djot filter logic expects a list of filters applied in sequence.)
790+
filter = { filter }
791+
end
792+
return filter
793+
end
794+
SU.error("Cannot find filter '" .. name .. "'")
795+
end
796+
778797
function inputter:parse (doc)
779798
local djot = require("djot")
780799
local djast = djot.parse(doc, true, function (warning)
@@ -791,6 +810,28 @@ function inputter:parse (doc)
791810
local snippet = luautf8.sub(doc, sp + 1, ep)
792811
SU.warn(warning.message .. " near [[…" .. snippet .. "]]")
793812
end)
813+
814+
local fname = self.options.filter
815+
if fname then
816+
if type(fname) ~= "string" then
817+
SU.error("The 'filter' option to the Djot inputter must be a string")
818+
end
819+
local filter = self:_loadFilter(fname, {
820+
-- Allow Djot AST manipulations in the filter environment.
821+
djot = {
822+
ast = {
823+
insert_attribute = djot.ast.insert_attribute,
824+
copy_attributes = djot.ast.copy_attributes,
825+
new_attributes = djot.ast.new_attributes,
826+
new_node = djot.ast.new_node,
827+
add_child = djot.ast.add_child,
828+
has_children = djot.ast.has_children,
829+
}
830+
}
831+
})
832+
djot.filter.apply_filter(djast, filter)
833+
end
834+
794835
local renderer = Renderer(self.options, doc)
795836
local tree = renderer:render(djast)
796837

packages/markdown/utils.lua

Lines changed: 81 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,25 @@
11
--- A few utilities for the markdown / pandocast inputters
22
--
3-
-- @copyright License: MIT (c) 2022 Omikhleia
3+
-- @copyright License: MIT (c) 2022-2025 Omikhleia
44
-- @module packages.markdown.utils
55
--
66
local createCommand = SU.ast.createCommand
77
local createStructuredCommand = SU.ast.createStructuredCommand
88

99
--- Extract the extension from a file name.
10+
--
1011
-- Assumes a POSIX-compliant name (with a slash as path separators).
12+
--
1113
-- @tparam string fname File name
1214
-- @treturn string File extension
1315
local function getFileExtension (fname)
1416
return fname:match("[^/]+$"):match("[^.]+$")
1517
end
1618

1719
--- Non-breakable space extraction from a string.
20+
--
1821
-- It replaces them with an appropriate non-breakable inter-word space command.
22+
--
1923
-- @tparam string str Input string
2024
-- @treturn string|table Filtered string or SILE AST table
2125
local function nbspFilter (str)
@@ -33,6 +37,7 @@ local function nbspFilter (str)
3337
end
3438

3539
--- Check if a given class is present in the options.
40+
--
3641
-- @tparam table options Command options
3742
-- @tparam string classname Pseudo-class specifier
3843
-- @treturn boolean
@@ -45,6 +50,7 @@ local function hasClass (options, classname)
4550
end
4651

4752
--- Find the first raw handler suitable for the given pseudo-class attributes.
53+
--
4854
-- @tparam table options Command options
4955
-- @treturn function|nil Handler function (if found)
5056
local function hasRawHandler (options)
@@ -58,6 +64,7 @@ local function hasRawHandler (options)
5864
end
5965

6066
--- Find the first embedder suitable for the given pseudo-class attributes.
67+
--
6168
-- @tparam table options Command options with class attribute (nil or list of comma-separated classes)
6269
-- @treturn string|nil Embedder name (if found)
6370
-- @treturn function|nil Embedder handler function (if applicable)
@@ -95,8 +102,10 @@ local metrics = require("fontmetrics")
95102
local bsratiocache = {}
96103

97104
--- Compute the baseline ratio for the current font.
98-
--- This is a ratio of the descender to the theoretical height of the font.
99-
---@treturn number Descender ratio
105+
--
106+
-- This is a ratio of the descender to the theoretical height of the font.
107+
--
108+
-- @treturn number Descender ratio
100109
local function computeBaselineRatio ()
101110
local fontoptions = SILE.font.loadDefaults({})
102111
local bsratio = bsratiocache[SILE.font._key(fontoptions)]
@@ -110,8 +119,10 @@ local function computeBaselineRatio ()
110119
end
111120

112121
--- Naive citation reference parser.
122+
--
113123
-- We only support a very simple syntax for now: `@key[, ]+[locator]`,
114124
-- where the unique locator consists of a name and a value separated by spaces.
125+
--
115126
-- @tparam string str Citation string
116127
-- @tparam[opt] table pos Position in the source (for error reporting)
117128
-- @treturn table AST for the citation command
@@ -143,6 +154,72 @@ local function naiveCitations (str, pos)
143154
return createStructuredCommand("cites", {}, refs, pos)
144155
end
145156

157+
--- A sandboxed loadfile implementation.
158+
--
159+
-- Load and run a Lua file in a restricted environment.
160+
--
161+
-- @tparam string filename File name
162+
-- @tparam[opt] table env Additional environment entries
163+
-- @treturn unknown|nil Loaded chunk
164+
-- @treturn string|nil Error message
165+
local function sandboxedLoadfile(filename, env)
166+
local envbase = {
167+
-- Handy for debugging: print, SU logging functions, pl.pretty.dump.
168+
-- Handy for table and string manipulations: table, pl.tablex, string, pl.stringx, pl.List, pl.Map, pl.Set.
169+
print = print,
170+
SU = {
171+
debug = SU.debug,
172+
error = SU.error,
173+
warn = SU.warn,
174+
},
175+
pl = {
176+
pretty = {
177+
dump = function (data) pl.pretty.dump(data) end -- To avoid the second unsafe argument
178+
},
179+
tablex = pl.tablex,
180+
stringx = pl.stringx,
181+
List = pl.List,
182+
Map = pl.Map,
183+
Set = pl.Set,
184+
},
185+
table = table,
186+
string = string,
187+
-- And a few basic safe functions...
188+
math = math,
189+
ipairs = ipairs,
190+
pairs = pairs,
191+
type = type,
192+
tostring = tostring,
193+
tonumber = tonumber,
194+
next = next,
195+
error = error,
196+
pcall = pcall,
197+
}
198+
env = pl.tablex.union(envbase, env or {}, true)
199+
local f, err
200+
-- Load in a sandboxed environment:
201+
-- Strategies differ between Lua 5.1 and later versions.
202+
if _VERSION == "Lua 5.1" then
203+
f, err = loadfile(filename)
204+
if not f then
205+
return nil, err
206+
end
207+
-- luacheck: push globals setfenv
208+
setfenv(f, env)
209+
-- luacheck: pop
210+
else
211+
f, err = loadfile(filename, "t", env)
212+
if not f then
213+
return nil, err
214+
end
215+
end
216+
-- Run the chunk in protected mode
217+
local ok, res = pcall(f)
218+
if not ok then
219+
return nil, res end
220+
return res
221+
end
222+
146223
--- @export
147224
return {
148225
getFileExtension = getFileExtension,
@@ -152,4 +229,5 @@ return {
152229
hasEmbedHandler = hasEmbedHandler,
153230
computeBaselineRatio = computeBaselineRatio,
154231
naiveCitations = naiveCitations,
232+
sandboxedLoadfile = sandboxedLoadfile,
155233
}

0 commit comments

Comments
 (0)