From d318dd3e2bc9fcd8cb229e07ba0b23758e0255b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Fabian=20Kr=C3=BCger?= Date: Sun, 18 Apr 2021 15:19:52 +0200 Subject: [PATCH] Initial --- mlist_to_mml.lua | 172 +++++++++++++++++++++++++++++++++++++++++++++++ test_tex.lua | 13 ++++ test_tex.tex | 14 ++++ test_xml.lua | 16 +++++ write_xml.lua | 38 +++++++++++ 5 files changed, 253 insertions(+) create mode 100644 mlist_to_mml.lua create mode 100644 test_tex.lua create mode 100644 test_tex.tex create mode 100644 test_xml.lua create mode 100644 write_xml.lua diff --git a/mlist_to_mml.lua b/mlist_to_mml.lua new file mode 100644 index 0000000..0ab19dd --- /dev/null +++ b/mlist_to_mml.lua @@ -0,0 +1,172 @@ +local noad_t, accent_t, style_t, choice_t = node.id'noad', node.id'accent', node.id'style', node.id'choice' +local radical_t, fraction_t, fence_t = node.id'radical', node.id'fraction', node.id'fence' + +local math_char_t, sub_box_t, sub_mlist_t = node.id'math_char', node.id'sub_box', node.id'sub_mlist' + +local noad_sub = node.subtypes'noad' +local radical_sub = node.subtypes'radical' +local fence_sub = node.subtypes'fence' + +local nodes_to_table + +-- We ignore large_... since they aren't used for modern fonts +local function delim_to_table(delim) + if not delim then return end + local fam = delim.small_fam + return {[0] = 'mo', utf8.char(delim.small_char), ['tex:family'] = fam ~= 0 and fam or nil } +end + +local function kernel_to_table(kernel) + if not kernel then return end + local id = kernel.id + if id == math_char_t then + local char = kernel.char + local elem = char >= 0x30 and char < 0x39 and 'mn' or 'mi' + local fam = kernel.fam + return {[0] = elem, utf8.char(kernel.char), ['tex:family'] = fam ~= 0 and fam or nil } + elseif id == sub_box_t then + return {[0] = 'mi', {[0] = 'mglyph', ['tex:box'] = kernel.list}} + elseif id == sub_mlist_t then + return nodes_to_table(kernel.list) + else + error'confusion' + end +end + +local function do_sub_sup(t, n) + sub = kernel_to_table(n.sub) + sup = kernel_to_table(n.sup) + if sub then + if sup then + return {[0] = 'msubsup', t, sub, sup} + else + return {[0] = 'msub', t, sub} + end + elseif sup then + return {[0] = 'msup', t, sup} + else + return t + end +end + +local function noad_to_table(noad, sub) + local class = noad_sub[sub] + local nucleus = kernel_to_table(noad.nucleus) + if class == 'ord' then + -- elseif class == 'opdisplaylimits' then + -- elseif class == 'oplimits' then + -- elseif class == 'opnolimits' then + elseif class == 'bin' or class == 'rel' or class == 'open' + or class == 'close' or class == 'punct' or class == 'inner' then + if nucleus[0] == 'mrow' then + -- TODO + else + nucleus[0] = 'mo' + end + nucleus['tex:class'] = class + -- elseif class == 'under' then + -- elseif class == 'over' then + -- elseif class == 'vcenter' then + else + -- error[[confusion]] + nucleus['tex:TODO'] = class + end + return do_sub_sup(nucleus, noad) +end + +local function radical_to_table(radical, sub) + local kind = radical_sub[sub] + local nucleus = kernel_to_table(radical.nucleus) + local left = delim_to_table(radical.left) + local elem + if kind == 'radical' or kind == 'uradical' then + -- FIXME: Check that this is really a square root + elem = {[0] = 'msqrt', nucleus} + elseif kind == 'uroot' then + -- FIXME: Check that this is really a root + elem = {[0] = 'msqrt', nucleus, delim_to_table(radical.degree)} + elseif kind == 'uunderdelimiter' then + elem = {[0] = 'munder', left, nucleus} + elseif kind == 'uoverdelimiter' then + elem = {[0] = 'mover', left, nucleus} + elseif kind == 'udelimiterunder' then + elem = {[0] = 'munder', nucleus, left} + elseif kind == 'udelimiterover' then + elem = {[0] = 'mover', nucleus, left} + else + error[[confusion]] + end + return do_sub_sup(elem, radical) +end + +local function fraction_to_table(fraction, sub) + local num = kernel_to_table(fraction.num) + local denom = kernel_to_table(fraction.denom) + local left = delim_to_table(fraction.left) + -- local middle = delim_to_table(fraction.middle) + local right = delim_to_table(fraction.right) + local mfrac = {[0] = 'mfrac', + linethickness = fraction.width and fraction.width == 0 and 0 or nil, + bevelled = fraction.middle and "true" or nil, + num, + denom, + } + if left then + return {[0] = 'mrow', + left, + mfrac, + right, -- might be nil + } + elseif right then + return {[0] = 'mrow', + mfrac, + right, + } + else + return mfrac + end +end + +local function fence_to_table(fraction, sub) + error[[TODO]] + return { + kind = fence_sub[sub], + } +end + +function nodes_to_table(head) + local t = {[0] = "mrow"} + for n, id, sub in node.traverse(head) do + if id == noad_t then + t[#t+1] = noad_to_table(n, sub) + elseif id == accent_t then + print(n) + t[#t+1] = {[0] = 'TODO', accent = n} + elseif id == style_t then + print(n) + t[#t+1] = {[0] = 'TODO', style = n} + elseif id == choice_t then + print(n) + t[#t+1] = {[0] = 'TODO', choice = n} + elseif id == radical_t then + t[#t+1] = radical_to_table(n, sub) + elseif id == fraction_t then + t[#t+1] = fraction_to_table(n, sub) + elseif id == fence_t then + print(n) + t[#t+1] = {[0] = 'TODO', fence = n} + else + print(n) + t[#t+1] = n + end + end + return t +end + +return function(head) + local result = nodes_to_table(head) + result[0] = 'math' + result.xmlns = 'http://www.w3.org/1998/Math/MathML' + result['xmlns:tex'] = 'http://typesetting.eu/2021/LuaMathML' + return result +end diff --git a/test_tex.lua b/test_tex.lua new file mode 100644 index 0000000..a1b56c0 --- /dev/null +++ b/test_tex.lua @@ -0,0 +1,13 @@ +local inspect = require'inspect' +local function show(t) return print(inspect(t)) end + +local mlist_to_table = require'mlist_to_mml' +local write_xml = require'write_xml' + +luatexbase.add_to_callback('pre_mlist_to_hlist_filter', function(mlist) + print'\n\n' + local xml = mlist_to_table(mlist) + show(write_xml(xml)) + print'\n' + return true +end, 'dump_list') diff --git a/test_tex.tex b/test_tex.tex new file mode 100644 index 0000000..16c5ae9 --- /dev/null +++ b/test_tex.tex @@ -0,0 +1,14 @@ +\documentclass{article} +\usepackage{unicode-math} +\directlua{require'test_tex'} +\begin{document} +\[ + ax^2+b+c=0 +\] +\[ + x = \frac{-b \pm \sqrt{b^2-4ac}}{2a}. +\] +\[ + x = \longdivision{5} +\] +\end{document} diff --git a/test_xml.lua b/test_xml.lua new file mode 100644 index 0000000..f513cb1 --- /dev/null +++ b/test_xml.lua @@ -0,0 +1,16 @@ +local write_xml = require'write_xml' + +print(write_xml{[0] = "math", xmlns = "http://www.w3.org/1998/Math/MathML", + {[0] = "mi", "a"}, + {[0] = "msup", + {[0] = "mi", "x"}, + {[0] = "mn", "2"}, + }, + {[0] = "mo", "+"}, + {[0] = "mi", "b"}, + {[0] = "mi", "x"}, + {[0] = "mo", "+"}, + {[0] = "mi", "c"}, + {[0] = "mo", "="}, + {[0] = "mn", "0"}, + }) diff --git a/write_xml.lua b/write_xml.lua new file mode 100644 index 0000000..aa842b3 --- /dev/null +++ b/write_xml.lua @@ -0,0 +1,38 @@ +-- FIXME: Not sure yet if this will be needed +local function escape_name(name) + return name +end + +-- FIXME: Not sure yet if this will be needed +local escapes = { + ['"'] = """, + ['<'] = "<", + ['>'] = ">", + ['&'] = "&", +} +local function escape_text(text) + return text:gsub('("<>&)', escapes) +end + +local function write_elem(tree) + if not tree[0] then print('ERR', require'inspect'(tree)) end + local escaped_name = escape_name(assert(tree[0])) + local out = "<" .. escaped_name + for attr, val in next, tree do if type(attr) == 'string' then + out = out .. ' ' .. escape_name(attr) .. '="' .. escape_text(val) .. '"' + end end + if not tree[1] then + return out .. '/>' + end + out = out .. '>' + for _, elem in ipairs(tree) do + if type(elem) == 'string' then + out = out .. escape_text(elem) + else + out = out .. write_elem(elem) + end + end + return out .. '' +end + +return write_elem