From bba310e51287a613bda4ae2dbe720bb2d0646eb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Fabian=20Kr=C3=BCger?= Date: Fri, 30 Apr 2021 19:52:32 +0200 Subject: [PATCH] Better tags --- luamml-amsmath.lua | 16 ++---------- luamml-lr.lua | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 14 deletions(-) create mode 100644 luamml-lr.lua diff --git a/luamml-amsmath.lua b/luamml-amsmath.lua index 340ede4..b8ac0e3 100644 --- a/luamml-amsmath.lua +++ b/luamml-amsmath.lua @@ -4,6 +4,7 @@ local save_result = require'luamml-tex'.save_result local store_column = require'luamml-table'.store_column local store_tag = require'luamml-table'.store_tag local get_table = require'luamml-table'.get_table +local to_text = require'luamml-lr' local properties = node.get_properties_table() @@ -42,20 +43,7 @@ token.set_lua('__luamml_amsmath_save_tag:', funcid, 'protected') lua.get_functions_table()[funcid] = function() local nest = tex.nest.top local chars = {} - for n, id, sub in node.traverse(nest.head.next) do - if id == node.id'glyph' then - if sub >= 0x100 then - texio.write_nl'WARNING: Already shaped glyph detected in tag. This might lead to wrong output.' - end - chars[#chars+1] = n.char - elseif id == node.id'glue' then - chars[#chars+1] = 0x20 - elseif id == node.id'kern' then - else - texio.write_nl'WARNING: Unsupported node in tag dropped' - end - end - last_tag = utf8.char(table.unpack(chars)) + last_tag = to_text(nest.head) end funcid = luatexbase.new_luafunction'__luamml_amsmath_set_tag:' diff --git a/luamml-lr.lua b/luamml-lr.lua new file mode 100644 index 0000000..d4921d1 --- /dev/null +++ b/luamml-lr.lua @@ -0,0 +1,63 @@ +local properties = node.get_properties_table() + +local function to_unicode(head, tail) + local result, i = {}, 0 + local characters, last_fid + local iter, state, n = node.traverse(head) + while true do + local id, sub n, id, sub = iter(state, n) + if not n or n == tail then break end + local props = properties[n] + if props and props.glyph_info then + i = i+1 + result[i] = glyph_info + else + local char, fid = node.is_glyph(n) + if char then + if fid ~= last_fid then + local fontdir = font.getfont(fid) + characters, last_fid = fontdir.characters, fid + end + local uni = characters[char] + local uni = uni and uni.unicode + i = i+1 + if uni then + if type(uni) == 'number' then + result[i] = utf.char(uni) + else + result[i] = utf.char(table.unpack(uni)) + end + else + if char < 0x110000 then + result[i] = utf.char(char) + else + result[i] = '\u{FFFD}' + end + end + -- elseif node.id'math' == id then + -- n = node.end_of_math(n) -- Not sure yet + -- elseif node.id'whatsit' == id then + -- TODO(?) + elseif node.id'glue' == id then + if n.width > 1000 then -- FIXME: Coordinate constant with tagpdf + i = i+1 + result[i] = ' ' + end + elseif node.id'hlist' == id then + i = i+1 + result[i] = '\u{FFFD}' + elseif node.id'vlist' == id then + i = i+1 + result[i] = '\u{FFFD}' + elseif node.id'rule' == id then + if n.width ~= 0 then + i = i+1 + result[i] = '\u{FFFD}' + end + end -- CHECK: Everything else can probably be ignored, otherwise shout at me + end + end + return table.concat(result) +end + +return to_unicode