2021-04-30 19:52:32 +02:00
|
|
|
local properties = node.get_properties_table()
|
|
|
|
|
|
|
|
local function to_unicode(head, tail)
|
2021-06-23 18:49:13 +02:00
|
|
|
local result, subresult, i = {[0] = 'mtext'}, {}, 0
|
2021-04-30 19:52:32 +02:00
|
|
|
local characters, last_fid
|
|
|
|
local iter, state, n = node.traverse(head)
|
|
|
|
while true do
|
|
|
|
local id, sub n, id, sub = iter(state, n)
|
|
|
|
if not n or n == tail then break end
|
|
|
|
local props = properties[n]
|
|
|
|
if props and props.glyph_info then
|
|
|
|
i = i+1
|
2021-06-23 18:49:13 +02:00
|
|
|
result[i] = glyph_info
|
2021-04-30 19:52:32 +02:00
|
|
|
else
|
|
|
|
local char, fid = node.is_glyph(n)
|
|
|
|
if char then
|
|
|
|
if fid ~= last_fid then
|
|
|
|
local fontdir = font.getfont(fid)
|
|
|
|
characters, last_fid = fontdir.characters, fid
|
|
|
|
end
|
|
|
|
local uni = characters[char]
|
|
|
|
local uni = uni and uni.unicode
|
|
|
|
i = i+1
|
|
|
|
if uni then
|
|
|
|
if type(uni) == 'number' then
|
2021-06-23 18:49:13 +02:00
|
|
|
result[i] = utf.char(uni)
|
2021-04-30 19:52:32 +02:00
|
|
|
else
|
2021-06-23 18:49:13 +02:00
|
|
|
result[i] = utf.char(table.unpack(uni))
|
2021-04-30 19:52:32 +02:00
|
|
|
end
|
|
|
|
else
|
|
|
|
if char < 0x110000 then
|
2021-06-23 18:49:13 +02:00
|
|
|
result[i] = utf.char(char)
|
2021-04-30 19:52:32 +02:00
|
|
|
else
|
2021-06-23 18:49:13 +02:00
|
|
|
result[i] = '\u{FFFD}'
|
2021-04-30 19:52:32 +02:00
|
|
|
end
|
|
|
|
end
|
2021-05-03 21:57:58 +02:00
|
|
|
elseif node.id'math' == id then
|
|
|
|
if props then
|
2021-05-31 01:54:21 +02:00
|
|
|
local mml = props.saved_mathml_table or props.saved_mathml_core
|
2021-05-03 21:57:58 +02:00
|
|
|
if mml then
|
2021-06-23 18:49:13 +02:00
|
|
|
i = i+1
|
|
|
|
result[i] = mml
|
2021-05-03 21:57:58 +02:00
|
|
|
n = node.end_of_math(n)
|
|
|
|
end
|
|
|
|
end
|
2021-04-30 19:52:32 +02:00
|
|
|
-- elseif node.id'whatsit' == id then
|
|
|
|
-- TODO(?)
|
|
|
|
elseif node.id'glue' == id then
|
|
|
|
if n.width > 1000 then -- FIXME: Coordinate constant with tagpdf
|
|
|
|
i = i+1
|
2021-06-23 18:49:13 +02:00
|
|
|
result[i] = '\u{00A0}' -- non breaking space... There is no real reason why it has to be non breaking, except that MathML often ignore other spaces
|
2021-04-30 19:52:32 +02:00
|
|
|
end
|
|
|
|
elseif node.id'hlist' == id then
|
2021-05-01 08:27:52 +02:00
|
|
|
local nested = to_unicode(n.head)
|
2021-06-23 18:49:13 +02:00
|
|
|
table.move(nested, 1, #nested, i+1, result)
|
|
|
|
i = i+#nested
|
2021-04-30 19:52:32 +02:00
|
|
|
elseif node.id'vlist' == id then
|
|
|
|
i = i+1
|
2021-06-23 18:49:13 +02:00
|
|
|
result[i] = '\u{FFFD}'
|
2021-04-30 19:52:32 +02:00
|
|
|
elseif node.id'rule' == id then
|
|
|
|
if n.width ~= 0 then
|
|
|
|
i = i+1
|
2021-06-23 18:49:13 +02:00
|
|
|
result[i] = '\u{FFFD}'
|
2021-04-30 19:52:32 +02:00
|
|
|
end
|
|
|
|
end -- CHECK: Everything else can probably be ignored, otherwise shout at me
|
|
|
|
end
|
|
|
|
end
|
2021-05-01 08:27:52 +02:00
|
|
|
return result
|
2021-04-30 19:52:32 +02:00
|
|
|
end
|
|
|
|
|
|
|
|
return to_unicode
|