From d318dd3e2bc9fcd8cb229e07ba0b23758e0255b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Fabian=20Kr=C3=BCger?= Date: Sun, 18 Apr 2021 15:19:52 +0200 Subject: [PATCH 01/10] Initial --- mlist_to_mml.lua | 172 +++++++++++++++++++++++++++++++++++++++++++++++ test_tex.lua | 13 ++++ test_tex.tex | 14 ++++ test_xml.lua | 16 +++++ write_xml.lua | 38 +++++++++++ 5 files changed, 253 insertions(+) create mode 100644 mlist_to_mml.lua create mode 100644 test_tex.lua create mode 100644 test_tex.tex create mode 100644 test_xml.lua create mode 100644 write_xml.lua diff --git a/mlist_to_mml.lua b/mlist_to_mml.lua new file mode 100644 index 0000000..0ab19dd --- /dev/null +++ b/mlist_to_mml.lua @@ -0,0 +1,172 @@ +local noad_t, accent_t, style_t, choice_t = node.id'noad', node.id'accent', node.id'style', node.id'choice' +local radical_t, fraction_t, fence_t = node.id'radical', node.id'fraction', node.id'fence' + +local math_char_t, sub_box_t, sub_mlist_t = node.id'math_char', node.id'sub_box', node.id'sub_mlist' + +local noad_sub = node.subtypes'noad' +local radical_sub = node.subtypes'radical' +local fence_sub = node.subtypes'fence' + +local nodes_to_table + +-- We ignore large_... since they aren't used for modern fonts +local function delim_to_table(delim) + if not delim then return end + local fam = delim.small_fam + return {[0] = 'mo', utf8.char(delim.small_char), ['tex:family'] = fam ~= 0 and fam or nil } +end + +local function kernel_to_table(kernel) + if not kernel then return end + local id = kernel.id + if id == math_char_t then + local char = kernel.char + local elem = char >= 0x30 and char < 0x39 and 'mn' or 'mi' + local fam = kernel.fam + return {[0] = elem, utf8.char(kernel.char), ['tex:family'] = fam ~= 0 and fam or nil } + elseif id == sub_box_t then + return {[0] = 'mi', {[0] = 'mglyph', ['tex:box'] = kernel.list}} + elseif id == sub_mlist_t then + return nodes_to_table(kernel.list) + else + error'confusion' + end +end + +local function do_sub_sup(t, n) + sub = kernel_to_table(n.sub) + sup = kernel_to_table(n.sup) + if sub then + if sup then + return {[0] = 'msubsup', t, sub, sup} + else + return {[0] = 'msub', t, sub} + end + elseif sup then + return {[0] = 'msup', t, sup} + else + return t + end +end + +local function noad_to_table(noad, sub) + local class = noad_sub[sub] + local nucleus = kernel_to_table(noad.nucleus) + if class == 'ord' then + -- elseif class == 'opdisplaylimits' then + -- elseif class == 'oplimits' then + -- elseif class == 'opnolimits' then + elseif class == 'bin' or class == 'rel' or class == 'open' + or class == 'close' or class == 'punct' or class == 'inner' then + if nucleus[0] == 'mrow' then + -- TODO + else + nucleus[0] = 'mo' + end + nucleus['tex:class'] = class + -- elseif class == 'under' then + -- elseif class == 'over' then + -- elseif class == 'vcenter' then + else + -- error[[confusion]] + nucleus['tex:TODO'] = class + end + return do_sub_sup(nucleus, noad) +end + +local function radical_to_table(radical, sub) + local kind = radical_sub[sub] + local nucleus = kernel_to_table(radical.nucleus) + local left = delim_to_table(radical.left) + local elem + if kind == 'radical' or kind == 'uradical' then + -- FIXME: Check that this is really a square root + elem = {[0] = 'msqrt', nucleus} + elseif kind == 'uroot' then + -- FIXME: Check that this is really a root + elem = {[0] = 'msqrt', nucleus, delim_to_table(radical.degree)} + elseif kind == 'uunderdelimiter' then + elem = {[0] = 'munder', left, nucleus} + elseif kind == 'uoverdelimiter' then + elem = {[0] = 'mover', left, nucleus} + elseif kind == 'udelimiterunder' then + elem = {[0] = 'munder', nucleus, left} + elseif kind == 'udelimiterover' then + elem = {[0] = 'mover', nucleus, left} + else + error[[confusion]] + end + return do_sub_sup(elem, radical) +end + +local function fraction_to_table(fraction, sub) + local num = kernel_to_table(fraction.num) + local denom = kernel_to_table(fraction.denom) + local left = delim_to_table(fraction.left) + -- local middle = delim_to_table(fraction.middle) + local right = delim_to_table(fraction.right) + local mfrac = {[0] = 'mfrac', + linethickness = fraction.width and fraction.width == 0 and 0 or nil, + bevelled = fraction.middle and "true" or nil, + num, + denom, + } + if left then + return {[0] = 'mrow', + left, + mfrac, + right, -- might be nil + } + elseif right then + return {[0] = 'mrow', + mfrac, + right, + } + else + return mfrac + end +end + +local function fence_to_table(fraction, sub) + error[[TODO]] + return { + kind = fence_sub[sub], + } +end + +function nodes_to_table(head) + local t = {[0] = "mrow"} + for n, id, sub in node.traverse(head) do + if id == noad_t then + t[#t+1] = noad_to_table(n, sub) + elseif id == accent_t then + print(n) + t[#t+1] = {[0] = 'TODO', accent = n} + elseif id == style_t then + print(n) + t[#t+1] = {[0] = 'TODO', style = n} + elseif id == choice_t then + print(n) + t[#t+1] = {[0] = 'TODO', choice = n} + elseif id == radical_t then + t[#t+1] = radical_to_table(n, sub) + elseif id == fraction_t then + t[#t+1] = fraction_to_table(n, sub) + elseif id == fence_t then + print(n) + t[#t+1] = {[0] = 'TODO', fence = n} + else + print(n) + t[#t+1] = n + end + end + return t +end + +return function(head) + local result = nodes_to_table(head) + result[0] = 'math' + result.xmlns = 'http://www.w3.org/1998/Math/MathML' + result['xmlns:tex'] = 'http://typesetting.eu/2021/LuaMathML' + return result +end diff --git a/test_tex.lua b/test_tex.lua new file mode 100644 index 0000000..a1b56c0 --- /dev/null +++ b/test_tex.lua @@ -0,0 +1,13 @@ +local inspect = require'inspect' +local function show(t) return print(inspect(t)) end + +local mlist_to_table = require'mlist_to_mml' +local write_xml = require'write_xml' + +luatexbase.add_to_callback('pre_mlist_to_hlist_filter', function(mlist) + print'\n\n' + local xml = mlist_to_table(mlist) + show(write_xml(xml)) + print'\n' + return true +end, 'dump_list') diff --git a/test_tex.tex b/test_tex.tex new file mode 100644 index 0000000..16c5ae9 --- /dev/null +++ b/test_tex.tex @@ -0,0 +1,14 @@ +\documentclass{article} +\usepackage{unicode-math} +\directlua{require'test_tex'} +\begin{document} +\[ + ax^2+b+c=0 +\] +\[ + x = \frac{-b \pm \sqrt{b^2-4ac}}{2a}. +\] +\[ + x = \longdivision{5} +\] +\end{document} diff --git a/test_xml.lua b/test_xml.lua new file mode 100644 index 0000000..f513cb1 --- /dev/null +++ b/test_xml.lua @@ -0,0 +1,16 @@ +local write_xml = require'write_xml' + +print(write_xml{[0] = "math", xmlns = "http://www.w3.org/1998/Math/MathML", + {[0] = "mi", "a"}, + {[0] = "msup", + {[0] = "mi", "x"}, + {[0] = "mn", "2"}, + }, + {[0] = "mo", "+"}, + {[0] = "mi", "b"}, + {[0] = "mi", "x"}, + {[0] = "mo", "+"}, + {[0] = "mi", "c"}, + {[0] = "mo", "="}, + {[0] = "mn", "0"}, + }) diff --git a/write_xml.lua b/write_xml.lua new file mode 100644 index 0000000..aa842b3 --- /dev/null +++ b/write_xml.lua @@ -0,0 +1,38 @@ +-- FIXME: Not sure yet if this will be needed +local function escape_name(name) + return name +end + +-- FIXME: Not sure yet if this will be needed +local escapes = { + ['"'] = """, + ['<'] = "<", + ['>'] = ">", + ['&'] = "&", +} +local function escape_text(text) + return text:gsub('("<>&)', escapes) +end + +local function write_elem(tree) + if not tree[0] then print('ERR', require'inspect'(tree)) end + local escaped_name = escape_name(assert(tree[0])) + local out = "<" .. escaped_name + for attr, val in next, tree do if type(attr) == 'string' then + out = out .. ' ' .. escape_name(attr) .. '="' .. escape_text(val) .. '"' + end end + if not tree[1] then + return out .. '/>' + end + out = out .. '>' + for _, elem in ipairs(tree) do + if type(elem) == 'string' then + out = out .. escape_text(elem) + else + out = out .. write_elem(elem) + end + end + return out .. '' +end + +return write_elem From 098811063c4255a53750fa7ec6d595584e4ec590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Fabian=20Kr=C3=BCger?= Date: Sun, 18 Apr 2021 15:19:52 +0200 Subject: [PATCH 02/10] Initial --- mlist_to_mml.lua | 172 +++++++++++++++++++++++++++++++++++++++++++++++ test_tex.lua | 13 ++++ test_tex.tex | 14 ++++ test_xml.lua | 16 +++++ write_xml.lua | 38 +++++++++++ 5 files changed, 253 insertions(+) create mode 100644 mlist_to_mml.lua create mode 100644 test_tex.lua create mode 100644 test_tex.tex create mode 100644 test_xml.lua create mode 100644 write_xml.lua diff --git a/mlist_to_mml.lua b/mlist_to_mml.lua new file mode 100644 index 0000000..0ab19dd --- /dev/null +++ b/mlist_to_mml.lua @@ -0,0 +1,172 @@ +local noad_t, accent_t, style_t, choice_t = node.id'noad', node.id'accent', node.id'style', node.id'choice' +local radical_t, fraction_t, fence_t = node.id'radical', node.id'fraction', node.id'fence' + +local math_char_t, sub_box_t, sub_mlist_t = node.id'math_char', node.id'sub_box', node.id'sub_mlist' + +local noad_sub = node.subtypes'noad' +local radical_sub = node.subtypes'radical' +local fence_sub = node.subtypes'fence' + +local nodes_to_table + +-- We ignore large_... since they aren't used for modern fonts +local function delim_to_table(delim) + if not delim then return end + local fam = delim.small_fam + return {[0] = 'mo', utf8.char(delim.small_char), ['tex:family'] = fam ~= 0 and fam or nil } +end + +local function kernel_to_table(kernel) + if not kernel then return end + local id = kernel.id + if id == math_char_t then + local char = kernel.char + local elem = char >= 0x30 and char < 0x39 and 'mn' or 'mi' + local fam = kernel.fam + return {[0] = elem, utf8.char(kernel.char), ['tex:family'] = fam ~= 0 and fam or nil } + elseif id == sub_box_t then + return {[0] = 'mi', {[0] = 'mglyph', ['tex:box'] = kernel.list}} + elseif id == sub_mlist_t then + return nodes_to_table(kernel.list) + else + error'confusion' + end +end + +local function do_sub_sup(t, n) + sub = kernel_to_table(n.sub) + sup = kernel_to_table(n.sup) + if sub then + if sup then + return {[0] = 'msubsup', t, sub, sup} + else + return {[0] = 'msub', t, sub} + end + elseif sup then + return {[0] = 'msup', t, sup} + else + return t + end +end + +local function noad_to_table(noad, sub) + local class = noad_sub[sub] + local nucleus = kernel_to_table(noad.nucleus) + if class == 'ord' then + -- elseif class == 'opdisplaylimits' then + -- elseif class == 'oplimits' then + -- elseif class == 'opnolimits' then + elseif class == 'bin' or class == 'rel' or class == 'open' + or class == 'close' or class == 'punct' or class == 'inner' then + if nucleus[0] == 'mrow' then + -- TODO + else + nucleus[0] = 'mo' + end + nucleus['tex:class'] = class + -- elseif class == 'under' then + -- elseif class == 'over' then + -- elseif class == 'vcenter' then + else + -- error[[confusion]] + nucleus['tex:TODO'] = class + end + return do_sub_sup(nucleus, noad) +end + +local function radical_to_table(radical, sub) + local kind = radical_sub[sub] + local nucleus = kernel_to_table(radical.nucleus) + local left = delim_to_table(radical.left) + local elem + if kind == 'radical' or kind == 'uradical' then + -- FIXME: Check that this is really a square root + elem = {[0] = 'msqrt', nucleus} + elseif kind == 'uroot' then + -- FIXME: Check that this is really a root + elem = {[0] = 'msqrt', nucleus, delim_to_table(radical.degree)} + elseif kind == 'uunderdelimiter' then + elem = {[0] = 'munder', left, nucleus} + elseif kind == 'uoverdelimiter' then + elem = {[0] = 'mover', left, nucleus} + elseif kind == 'udelimiterunder' then + elem = {[0] = 'munder', nucleus, left} + elseif kind == 'udelimiterover' then + elem = {[0] = 'mover', nucleus, left} + else + error[[confusion]] + end + return do_sub_sup(elem, radical) +end + +local function fraction_to_table(fraction, sub) + local num = kernel_to_table(fraction.num) + local denom = kernel_to_table(fraction.denom) + local left = delim_to_table(fraction.left) + -- local middle = delim_to_table(fraction.middle) + local right = delim_to_table(fraction.right) + local mfrac = {[0] = 'mfrac', + linethickness = fraction.width and fraction.width == 0 and 0 or nil, + bevelled = fraction.middle and "true" or nil, + num, + denom, + } + if left then + return {[0] = 'mrow', + left, + mfrac, + right, -- might be nil + } + elseif right then + return {[0] = 'mrow', + mfrac, + right, + } + else + return mfrac + end +end + +local function fence_to_table(fraction, sub) + error[[TODO]] + return { + kind = fence_sub[sub], + } +end + +function nodes_to_table(head) + local t = {[0] = "mrow"} + for n, id, sub in node.traverse(head) do + if id == noad_t then + t[#t+1] = noad_to_table(n, sub) + elseif id == accent_t then + print(n) + t[#t+1] = {[0] = 'TODO', accent = n} + elseif id == style_t then + print(n) + t[#t+1] = {[0] = 'TODO', style = n} + elseif id == choice_t then + print(n) + t[#t+1] = {[0] = 'TODO', choice = n} + elseif id == radical_t then + t[#t+1] = radical_to_table(n, sub) + elseif id == fraction_t then + t[#t+1] = fraction_to_table(n, sub) + elseif id == fence_t then + print(n) + t[#t+1] = {[0] = 'TODO', fence = n} + else + print(n) + t[#t+1] = n + end + end + return t +end + +return function(head) + local result = nodes_to_table(head) + result[0] = 'math' + result.xmlns = 'http://www.w3.org/1998/Math/MathML' + result['xmlns:tex'] = 'http://typesetting.eu/2021/LuaMathML' + return result +end diff --git a/test_tex.lua b/test_tex.lua new file mode 100644 index 0000000..a1b56c0 --- /dev/null +++ b/test_tex.lua @@ -0,0 +1,13 @@ +local inspect = require'inspect' +local function show(t) return print(inspect(t)) end + +local mlist_to_table = require'mlist_to_mml' +local write_xml = require'write_xml' + +luatexbase.add_to_callback('pre_mlist_to_hlist_filter', function(mlist) + print'\n\n' + local xml = mlist_to_table(mlist) + show(write_xml(xml)) + print'\n' + return true +end, 'dump_list') diff --git a/test_tex.tex b/test_tex.tex new file mode 100644 index 0000000..16c5ae9 --- /dev/null +++ b/test_tex.tex @@ -0,0 +1,14 @@ +\documentclass{article} +\usepackage{unicode-math} +\directlua{require'test_tex'} +\begin{document} +\[ + ax^2+b+c=0 +\] +\[ + x = \frac{-b \pm \sqrt{b^2-4ac}}{2a}. +\] +\[ + x = \longdivision{5} +\] +\end{document} diff --git a/test_xml.lua b/test_xml.lua new file mode 100644 index 0000000..f513cb1 --- /dev/null +++ b/test_xml.lua @@ -0,0 +1,16 @@ +local write_xml = require'write_xml' + +print(write_xml{[0] = "math", xmlns = "http://www.w3.org/1998/Math/MathML", + {[0] = "mi", "a"}, + {[0] = "msup", + {[0] = "mi", "x"}, + {[0] = "mn", "2"}, + }, + {[0] = "mo", "+"}, + {[0] = "mi", "b"}, + {[0] = "mi", "x"}, + {[0] = "mo", "+"}, + {[0] = "mi", "c"}, + {[0] = "mo", "="}, + {[0] = "mn", "0"}, + }) diff --git a/write_xml.lua b/write_xml.lua new file mode 100644 index 0000000..aa842b3 --- /dev/null +++ b/write_xml.lua @@ -0,0 +1,38 @@ +-- FIXME: Not sure yet if this will be needed +local function escape_name(name) + return name +end + +-- FIXME: Not sure yet if this will be needed +local escapes = { + ['"'] = """, + ['<'] = "<", + ['>'] = ">", + ['&'] = "&", +} +local function escape_text(text) + return text:gsub('("<>&)', escapes) +end + +local function write_elem(tree) + if not tree[0] then print('ERR', require'inspect'(tree)) end + local escaped_name = escape_name(assert(tree[0])) + local out = "<" .. escaped_name + for attr, val in next, tree do if type(attr) == 'string' then + out = out .. ' ' .. escape_name(attr) .. '="' .. escape_text(val) .. '"' + end end + if not tree[1] then + return out .. '/>' + end + out = out .. '>' + for _, elem in ipairs(tree) do + if type(elem) == 'string' then + out = out .. escape_text(elem) + else + out = out .. write_elem(elem) + end + end + return out .. '' +end + +return write_elem From 057aec4e6f85de51ad6772bdcf6f4107731c1318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Fabian=20Kr=C3=BCger?= Date: Mon, 19 Apr 2021 13:30:54 +0200 Subject: [PATCH 03/10] Handle many TODOs --- mlist_to_mml.lua | 157 +++++++++++++++++++++++++++++++++-------------- test_tex.lua | 7 ++- test_tex.tex | 2 +- write_xml.lua | 12 +++- 4 files changed, 125 insertions(+), 53 deletions(-) diff --git a/mlist_to_mml.lua b/mlist_to_mml.lua index 0ab19dd..3eed3b8 100644 --- a/mlist_to_mml.lua +++ b/mlist_to_mml.lua @@ -9,6 +9,9 @@ local fence_sub = node.subtypes'fence' local nodes_to_table +local function sub_style(s) return s//4*2+5 end +local function sup_style(s) return s//4*2+4+s%2 end + -- We ignore large_... since they aren't used for modern fonts local function delim_to_table(delim) if not delim then return end @@ -16,7 +19,7 @@ local function delim_to_table(delim) return {[0] = 'mo', utf8.char(delim.small_char), ['tex:family'] = fam ~= 0 and fam or nil } end -local function kernel_to_table(kernel) +local function kernel_to_table(kernel, cur_style) if not kernel then return end local id = kernel.id if id == math_char_t then @@ -27,15 +30,15 @@ local function kernel_to_table(kernel) elseif id == sub_box_t then return {[0] = 'mi', {[0] = 'mglyph', ['tex:box'] = kernel.list}} elseif id == sub_mlist_t then - return nodes_to_table(kernel.list) + return nodes_to_table(kernel.list, cur_style) else error'confusion' end end -local function do_sub_sup(t, n) - sub = kernel_to_table(n.sub) - sup = kernel_to_table(n.sup) +local function do_sub_sup(t, n, cur_style) + local sub = kernel_to_table(n.sub, sub_style(cur_style)) + local sup = kernel_to_table(n.sup, sup_style(cur_style)) if sub then if sup then return {[0] = 'msubsup', t, sub, sup} @@ -49,14 +52,11 @@ local function do_sub_sup(t, n) end end -local function noad_to_table(noad, sub) +local function noad_to_table(noad, sub, cur_style) local class = noad_sub[sub] - local nucleus = kernel_to_table(noad.nucleus) + local nucleus = kernel_to_table(noad.nucleus, class == 'over' and cur_style//2*2+1 or cur_style) if class == 'ord' then - -- elseif class == 'opdisplaylimits' then - -- elseif class == 'oplimits' then - -- elseif class == 'opnolimits' then - elseif class == 'bin' or class == 'rel' or class == 'open' + elseif class == 'opdisplaylimits' or class == 'oplimits' or class == 'opnolimits' or class == 'bin' or class == 'rel' or class == 'open' or class == 'close' or class == 'punct' or class == 'inner' then if nucleus[0] == 'mrow' then -- TODO @@ -64,19 +64,73 @@ local function noad_to_table(noad, sub) nucleus[0] = 'mo' end nucleus['tex:class'] = class - -- elseif class == 'under' then - -- elseif class == 'over' then - -- elseif class == 'vcenter' then - else - -- error[[confusion]] + + if (noad.sup or noad.sub) and (class == 'opdisplaylimits' or class == 'oplimits') then + nucleus.movablelimits = class == 'opdisplaylimits' + local sub = kernel_to_table(noad.sub, sub_style(cur_style)) + local sup = kernel_to_table(noad.sup, sup_style(cur_style)) + return {[0] = sup and (sub and 'munderover' or 'mover') or 'munder', + nucleus, + sub or sup, + sub and sup, + } + end + elseif class == 'under' then + return {[0] = 'munder', + nucleus, + {[0] = 'mo', '_',}, + } + elseif class == 'over' then + return {[0] = 'mover', + nucleus, + {[0] = 'mo', '\u{203E}',}, + } + elseif class == 'vcenter' then nucleus['tex:TODO'] = class + else + error[[confusion]] end - return do_sub_sup(nucleus, noad) + return do_sub_sup(nucleus, noad, cur_style) end -local function radical_to_table(radical, sub) +local function accent_to_table(accent, sub, cur_style) + local nucleus = kernel_to_table(accent.nucleus, cur_style//2*2+1) + local top_acc = kernel_to_table(accent.accent, cur_style) + local bot_acc = kernel_to_table(accent.bot_accent, cur_style) + if top_acc then + top_acc[0] = 'mo' + if sub & 1 == 1 then + top_acc.stretchy = 'false' + end + end + if bot_acc then + bot_acc[0] = 'mo' + if sub & 2 == 2 then + bot_acc.stretchy = 'false' + end + end + return {[0] = top_acc and (bot_acc and 'munderover' or 'mover') or 'munder', + nucleus, + bot_acc or top_acc, + bot_acc and top_acc, + } +end + +local style_table = { + display = {displaystyle = "true", scriptlevel = "0"}, + text = {displaystyle = "false", scriptlevel = "0"}, + script = {displaystyle = "false", scriptlevel = "1"}, + scriptscript = {displaystyle = "false", scriptlevel = "2"}, +} + +style_table.crampeddisplay, style_table.crampedtext, +style_table.crampedscript, style_table.crampedscriptscript = + style_table.display, style_table.text, + style_table.script, style_table.scriptscript + +local function radical_to_table(radical, sub, cur_style) local kind = radical_sub[sub] - local nucleus = kernel_to_table(radical.nucleus) + local nucleus = kernel_to_table(radical.nucleus, cur_style//2*2+1) local left = delim_to_table(radical.left) local elem if kind == 'radical' or kind == 'uradical' then @@ -96,14 +150,13 @@ local function radical_to_table(radical, sub) else error[[confusion]] end - return do_sub_sup(elem, radical) + return do_sub_sup(elem, radical, cur_style) end -local function fraction_to_table(fraction, sub) - local num = kernel_to_table(fraction.num) - local denom = kernel_to_table(fraction.denom) +local function fraction_to_table(fraction, sub, cur_style) + local num = kernel_to_table(fraction.num, sup_style(cur_style)) + local denom = kernel_to_table(fraction.denom, sub_style(cur_style)) local left = delim_to_table(fraction.left) - -- local middle = delim_to_table(fraction.middle) local right = delim_to_table(fraction.right) local mfrac = {[0] = 'mfrac', linethickness = fraction.width and fraction.width == 0 and 0 or nil, @@ -127,46 +180,58 @@ local function fraction_to_table(fraction, sub) end end -local function fence_to_table(fraction, sub) - error[[TODO]] - return { - kind = fence_sub[sub], - } +local function fence_to_table(fence, sub, cur_style) + local delim = delim_to_table(fence.delimiter) + delim.stretchy = 'true' + delim.fence = 'true' + return delim end -function nodes_to_table(head) +function nodes_to_table(head, cur_style) local t = {[0] = "mrow"} + local result = t for n, id, sub in node.traverse(head) do if id == noad_t then - t[#t+1] = noad_to_table(n, sub) + t[#t+1] = noad_to_table(n, sub, cur_style) elseif id == accent_t then - print(n) - t[#t+1] = {[0] = 'TODO', accent = n} + t[#t+1] = accent_to_table(n, sub, cur_style) elseif id == style_t then - print(n) - t[#t+1] = {[0] = 'TODO', style = n} + if #t ~= 0 then + local new_t = {[0] = 'mstyle'} + t[#t+1] = new_t + t = new_t + end + if sub < 2 then + t.displaystyle, t.scriptlevel = true, 0 + else + t.displaystyle, t.scriptlevel = false, sub//2 - 1 + end + cur_style = sub elseif id == choice_t then - print(n) - t[#t+1] = {[0] = 'TODO', choice = n} + local size = cur_style//2 + t[#t+1] = nodes_to_table(n[size == 0 and 'display' or size == 1 and 'text' + or size == 2 and 'script' + or size == 3 and 'scriptscript' or assert(false)], 2*size) elseif id == radical_t then - t[#t+1] = radical_to_table(n, sub) + t[#t+1] = radical_to_table(n, sub, cur_style) elseif id == fraction_t then - t[#t+1] = fraction_to_table(n, sub) + t[#t+1] = fraction_to_table(n, sub, cur_style) elseif id == fence_t then - print(n) - t[#t+1] = {[0] = 'TODO', fence = n} + t[#t+1] = fence_to_table(n, sub, cur_style) else - print(n) - t[#t+1] = n + t[#t+1] = {[0] = 'tex:TODO', other = n} end end - return t + return result end -return function(head) - local result = nodes_to_table(head) +return function(head, style) + local result = nodes_to_table(head, style or 0) result[0] = 'math' result.xmlns = 'http://www.w3.org/1998/Math/MathML' result['xmlns:tex'] = 'http://typesetting.eu/2021/LuaMathML' + if style == 2 then + result.display = 'block' + end return result end diff --git a/test_tex.lua b/test_tex.lua index a1b56c0..50b1b1a 100644 --- a/test_tex.lua +++ b/test_tex.lua @@ -4,10 +4,11 @@ local function show(t) return print(inspect(t)) end local mlist_to_table = require'mlist_to_mml' local write_xml = require'write_xml' -luatexbase.add_to_callback('pre_mlist_to_hlist_filter', function(mlist) +luatexbase.add_to_callback('pre_mlist_to_hlist_filter', function(mlist, style) print'\n\n' - local xml = mlist_to_table(mlist) - show(write_xml(xml)) + local xml = mlist_to_table(mlist, style == 'display' and 2 or 0) + print(write_xml(xml)) + -- print(write_xml(xml, '\n')) print'\n' return true end, 'dump_list') diff --git a/test_tex.tex b/test_tex.tex index 16c5ae9..37c78bc 100644 --- a/test_tex.tex +++ b/test_tex.tex @@ -9,6 +9,6 @@ x = \frac{-b \pm \sqrt{b^2-4ac}}{2a}. \] \[ - x = \longdivision{5} + \sum_a\underline c\dot bc' \] \end{document} diff --git a/write_xml.lua b/write_xml.lua index aa842b3..f67db4e 100644 --- a/write_xml.lua +++ b/write_xml.lua @@ -11,13 +11,14 @@ local escapes = { ['&'] = "&", } local function escape_text(text) - return text:gsub('("<>&)', escapes) + return string.gsub(tostring(text), '("<>&)', escapes) end -local function write_elem(tree) +local function write_elem(tree, indent) if not tree[0] then print('ERR', require'inspect'(tree)) end local escaped_name = escape_name(assert(tree[0])) local out = "<" .. escaped_name + if indent then out = indent .. out end for attr, val in next, tree do if type(attr) == 'string' then out = out .. ' ' .. escape_name(attr) .. '="' .. escape_text(val) .. '"' end end @@ -25,13 +26,18 @@ local function write_elem(tree) return out .. '/>' end out = out .. '>' + local inner_indent = indent and indent .. ' ' for _, elem in ipairs(tree) do if type(elem) == 'string' then + if inner_indent then + out = out .. inner_indent + end out = out .. escape_text(elem) else - out = out .. write_elem(elem) + out = out .. write_elem(elem, inner_indent) end end + if indent then out = out .. indent end return out .. '' end From 165c6bd9ad2af28a25e285cddd7bf63d02a29031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Fabian=20Kr=C3=BCger?= Date: Mon, 19 Apr 2021 13:30:54 +0200 Subject: [PATCH 04/10] Handle many TODOs --- mlist_to_mml.lua | 157 +++++++++++++++++++++++++++++++++-------------- test_tex.lua | 7 ++- test_tex.tex | 2 +- write_xml.lua | 12 +++- 4 files changed, 125 insertions(+), 53 deletions(-) diff --git a/mlist_to_mml.lua b/mlist_to_mml.lua index 0ab19dd..3eed3b8 100644 --- a/mlist_to_mml.lua +++ b/mlist_to_mml.lua @@ -9,6 +9,9 @@ local fence_sub = node.subtypes'fence' local nodes_to_table +local function sub_style(s) return s//4*2+5 end +local function sup_style(s) return s//4*2+4+s%2 end + -- We ignore large_... since they aren't used for modern fonts local function delim_to_table(delim) if not delim then return end @@ -16,7 +19,7 @@ local function delim_to_table(delim) return {[0] = 'mo', utf8.char(delim.small_char), ['tex:family'] = fam ~= 0 and fam or nil } end -local function kernel_to_table(kernel) +local function kernel_to_table(kernel, cur_style) if not kernel then return end local id = kernel.id if id == math_char_t then @@ -27,15 +30,15 @@ local function kernel_to_table(kernel) elseif id == sub_box_t then return {[0] = 'mi', {[0] = 'mglyph', ['tex:box'] = kernel.list}} elseif id == sub_mlist_t then - return nodes_to_table(kernel.list) + return nodes_to_table(kernel.list, cur_style) else error'confusion' end end -local function do_sub_sup(t, n) - sub = kernel_to_table(n.sub) - sup = kernel_to_table(n.sup) +local function do_sub_sup(t, n, cur_style) + local sub = kernel_to_table(n.sub, sub_style(cur_style)) + local sup = kernel_to_table(n.sup, sup_style(cur_style)) if sub then if sup then return {[0] = 'msubsup', t, sub, sup} @@ -49,14 +52,11 @@ local function do_sub_sup(t, n) end end -local function noad_to_table(noad, sub) +local function noad_to_table(noad, sub, cur_style) local class = noad_sub[sub] - local nucleus = kernel_to_table(noad.nucleus) + local nucleus = kernel_to_table(noad.nucleus, class == 'over' and cur_style//2*2+1 or cur_style) if class == 'ord' then - -- elseif class == 'opdisplaylimits' then - -- elseif class == 'oplimits' then - -- elseif class == 'opnolimits' then - elseif class == 'bin' or class == 'rel' or class == 'open' + elseif class == 'opdisplaylimits' or class == 'oplimits' or class == 'opnolimits' or class == 'bin' or class == 'rel' or class == 'open' or class == 'close' or class == 'punct' or class == 'inner' then if nucleus[0] == 'mrow' then -- TODO @@ -64,19 +64,73 @@ local function noad_to_table(noad, sub) nucleus[0] = 'mo' end nucleus['tex:class'] = class - -- elseif class == 'under' then - -- elseif class == 'over' then - -- elseif class == 'vcenter' then - else - -- error[[confusion]] + + if (noad.sup or noad.sub) and (class == 'opdisplaylimits' or class == 'oplimits') then + nucleus.movablelimits = class == 'opdisplaylimits' + local sub = kernel_to_table(noad.sub, sub_style(cur_style)) + local sup = kernel_to_table(noad.sup, sup_style(cur_style)) + return {[0] = sup and (sub and 'munderover' or 'mover') or 'munder', + nucleus, + sub or sup, + sub and sup, + } + end + elseif class == 'under' then + return {[0] = 'munder', + nucleus, + {[0] = 'mo', '_',}, + } + elseif class == 'over' then + return {[0] = 'mover', + nucleus, + {[0] = 'mo', '\u{203E}',}, + } + elseif class == 'vcenter' then nucleus['tex:TODO'] = class + else + error[[confusion]] end - return do_sub_sup(nucleus, noad) + return do_sub_sup(nucleus, noad, cur_style) end -local function radical_to_table(radical, sub) +local function accent_to_table(accent, sub, cur_style) + local nucleus = kernel_to_table(accent.nucleus, cur_style//2*2+1) + local top_acc = kernel_to_table(accent.accent, cur_style) + local bot_acc = kernel_to_table(accent.bot_accent, cur_style) + if top_acc then + top_acc[0] = 'mo' + if sub & 1 == 1 then + top_acc.stretchy = 'false' + end + end + if bot_acc then + bot_acc[0] = 'mo' + if sub & 2 == 2 then + bot_acc.stretchy = 'false' + end + end + return {[0] = top_acc and (bot_acc and 'munderover' or 'mover') or 'munder', + nucleus, + bot_acc or top_acc, + bot_acc and top_acc, + } +end + +local style_table = { + display = {displaystyle = "true", scriptlevel = "0"}, + text = {displaystyle = "false", scriptlevel = "0"}, + script = {displaystyle = "false", scriptlevel = "1"}, + scriptscript = {displaystyle = "false", scriptlevel = "2"}, +} + +style_table.crampeddisplay, style_table.crampedtext, +style_table.crampedscript, style_table.crampedscriptscript = + style_table.display, style_table.text, + style_table.script, style_table.scriptscript + +local function radical_to_table(radical, sub, cur_style) local kind = radical_sub[sub] - local nucleus = kernel_to_table(radical.nucleus) + local nucleus = kernel_to_table(radical.nucleus, cur_style//2*2+1) local left = delim_to_table(radical.left) local elem if kind == 'radical' or kind == 'uradical' then @@ -96,14 +150,13 @@ local function radical_to_table(radical, sub) else error[[confusion]] end - return do_sub_sup(elem, radical) + return do_sub_sup(elem, radical, cur_style) end -local function fraction_to_table(fraction, sub) - local num = kernel_to_table(fraction.num) - local denom = kernel_to_table(fraction.denom) +local function fraction_to_table(fraction, sub, cur_style) + local num = kernel_to_table(fraction.num, sup_style(cur_style)) + local denom = kernel_to_table(fraction.denom, sub_style(cur_style)) local left = delim_to_table(fraction.left) - -- local middle = delim_to_table(fraction.middle) local right = delim_to_table(fraction.right) local mfrac = {[0] = 'mfrac', linethickness = fraction.width and fraction.width == 0 and 0 or nil, @@ -127,46 +180,58 @@ local function fraction_to_table(fraction, sub) end end -local function fence_to_table(fraction, sub) - error[[TODO]] - return { - kind = fence_sub[sub], - } +local function fence_to_table(fence, sub, cur_style) + local delim = delim_to_table(fence.delimiter) + delim.stretchy = 'true' + delim.fence = 'true' + return delim end -function nodes_to_table(head) +function nodes_to_table(head, cur_style) local t = {[0] = "mrow"} + local result = t for n, id, sub in node.traverse(head) do if id == noad_t then - t[#t+1] = noad_to_table(n, sub) + t[#t+1] = noad_to_table(n, sub, cur_style) elseif id == accent_t then - print(n) - t[#t+1] = {[0] = 'TODO', accent = n} + t[#t+1] = accent_to_table(n, sub, cur_style) elseif id == style_t then - print(n) - t[#t+1] = {[0] = 'TODO', style = n} + if #t ~= 0 then + local new_t = {[0] = 'mstyle'} + t[#t+1] = new_t + t = new_t + end + if sub < 2 then + t.displaystyle, t.scriptlevel = true, 0 + else + t.displaystyle, t.scriptlevel = false, sub//2 - 1 + end + cur_style = sub elseif id == choice_t then - print(n) - t[#t+1] = {[0] = 'TODO', choice = n} + local size = cur_style//2 + t[#t+1] = nodes_to_table(n[size == 0 and 'display' or size == 1 and 'text' + or size == 2 and 'script' + or size == 3 and 'scriptscript' or assert(false)], 2*size) elseif id == radical_t then - t[#t+1] = radical_to_table(n, sub) + t[#t+1] = radical_to_table(n, sub, cur_style) elseif id == fraction_t then - t[#t+1] = fraction_to_table(n, sub) + t[#t+1] = fraction_to_table(n, sub, cur_style) elseif id == fence_t then - print(n) - t[#t+1] = {[0] = 'TODO', fence = n} + t[#t+1] = fence_to_table(n, sub, cur_style) else - print(n) - t[#t+1] = n + t[#t+1] = {[0] = 'tex:TODO', other = n} end end - return t + return result end -return function(head) - local result = nodes_to_table(head) +return function(head, style) + local result = nodes_to_table(head, style or 0) result[0] = 'math' result.xmlns = 'http://www.w3.org/1998/Math/MathML' result['xmlns:tex'] = 'http://typesetting.eu/2021/LuaMathML' + if style == 2 then + result.display = 'block' + end return result end diff --git a/test_tex.lua b/test_tex.lua index a1b56c0..50b1b1a 100644 --- a/test_tex.lua +++ b/test_tex.lua @@ -4,10 +4,11 @@ local function show(t) return print(inspect(t)) end local mlist_to_table = require'mlist_to_mml' local write_xml = require'write_xml' -luatexbase.add_to_callback('pre_mlist_to_hlist_filter', function(mlist) +luatexbase.add_to_callback('pre_mlist_to_hlist_filter', function(mlist, style) print'\n\n' - local xml = mlist_to_table(mlist) - show(write_xml(xml)) + local xml = mlist_to_table(mlist, style == 'display' and 2 or 0) + print(write_xml(xml)) + -- print(write_xml(xml, '\n')) print'\n' return true end, 'dump_list') diff --git a/test_tex.tex b/test_tex.tex index 16c5ae9..37c78bc 100644 --- a/test_tex.tex +++ b/test_tex.tex @@ -9,6 +9,6 @@ x = \frac{-b \pm \sqrt{b^2-4ac}}{2a}. \] \[ - x = \longdivision{5} + \sum_a\underline c\dot bc' \] \end{document} diff --git a/write_xml.lua b/write_xml.lua index aa842b3..f67db4e 100644 --- a/write_xml.lua +++ b/write_xml.lua @@ -11,13 +11,14 @@ local escapes = { ['&'] = "&", } local function escape_text(text) - return text:gsub('("<>&)', escapes) + return string.gsub(tostring(text), '("<>&)', escapes) end -local function write_elem(tree) +local function write_elem(tree, indent) if not tree[0] then print('ERR', require'inspect'(tree)) end local escaped_name = escape_name(assert(tree[0])) local out = "<" .. escaped_name + if indent then out = indent .. out end for attr, val in next, tree do if type(attr) == 'string' then out = out .. ' ' .. escape_name(attr) .. '="' .. escape_text(val) .. '"' end end @@ -25,13 +26,18 @@ local function write_elem(tree) return out .. '/>' end out = out .. '>' + local inner_indent = indent and indent .. ' ' for _, elem in ipairs(tree) do if type(elem) == 'string' then + if inner_indent then + out = out .. inner_indent + end out = out .. escape_text(elem) else - out = out .. write_elem(elem) + out = out .. write_elem(elem, inner_indent) end end + if indent then out = out .. indent end return out .. '' end From 2a96d1c7d6a43d35ed47e260d646d728b955ae51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Fabian=20Kr=C3=BCger?= Date: Mon, 19 Apr 2021 17:07:29 +0200 Subject: [PATCH 05/10] Normalize some names of combining characters The list remaps according to the table in MathML-Core, always selecting the variant which has an XML entity name is such a character exists. --- mlist_to_mml.lua | 4 ++++ remap_comb.lua | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 remap_comb.lua diff --git a/mlist_to_mml.lua b/mlist_to_mml.lua index 3eed3b8..e972480 100644 --- a/mlist_to_mml.lua +++ b/mlist_to_mml.lua @@ -1,3 +1,5 @@ +local remap_comb = require'remap_comb' + local noad_t, accent_t, style_t, choice_t = node.id'noad', node.id'accent', node.id'style', node.id'choice' local radical_t, fraction_t, fence_t = node.id'radical', node.id'fraction', node.id'fence' @@ -102,12 +104,14 @@ local function accent_to_table(accent, sub, cur_style) if sub & 1 == 1 then top_acc.stretchy = 'false' end + top_acc[1] = remap_comb[top_acc[1]] or top_acc[1] end if bot_acc then bot_acc[0] = 'mo' if sub & 2 == 2 then bot_acc.stretchy = 'false' end + bot_acc[1] = remap_comb[bot_acc[1]] or bot_acc[1] end return {[0] = top_acc and (bot_acc and 'munderover' or 'mover') or 'munder', nucleus, diff --git a/remap_comb.lua b/remap_comb.lua new file mode 100644 index 0000000..2083a3f --- /dev/null +++ b/remap_comb.lua @@ -0,0 +1,27 @@ +return { + ["\u{0332}"] = "\u{2212}", + ["\u{0330}"] = "\u{02DC}", + ["\u{0328}"] = "\u{02DB}", + ["\u{20EF}"] = "\u{2192}", + ["\u{032C}"] = "\u{02C7}", + ["\u{032E}"] = "\u{02D8}", + ["\u{0306}"] = "\u{02D8}", + ["\u{030B}"] = "\u{02DD}", + ["\u{0302}"] = "\u{02C6}", + ["\u{0324}"] = "\u{00A8}", + ["\u{0317}"] = "\u{00B4}", + ["\u{031F}"] = "\u{002B}", + ["\u{0307}"] = "\u{002E}", + ["\u{0305}"] = "\u{2212}", + ["\u{0303}"] = "\u{02DC}", + ["\u{0316}"] = "\u{0060}", + ["\u{0301}"] = "\u{00B4}", + ["\u{030C}"] = "\u{02C7}", + ["\u{0327}"] = "\u{00B8}", + ["\u{0308}"] = "\u{00A8}", + ["\u{0300}"] = "\u{0060}", + ["\u{0323}"] = "\u{002E}", + ["\u{0304}"] = "\u{00AF}", + ["\u{032D}"] = "\u{005E}", + ["\u{20D7}"] = "\u{2192}", +} From 7ca21203ac20f82aa5bd2a3c53c40d95a60d5365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Fabian=20Kr=C3=BCger?= Date: Mon, 19 Apr 2021 17:07:29 +0200 Subject: [PATCH 06/10] Normalize some names of combining characters The list remaps according to the table in MathML-Core, always selecting the variant which has an XML entity name is such a character exists. --- mlist_to_mml.lua | 4 ++++ remap_comb.lua | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 remap_comb.lua diff --git a/mlist_to_mml.lua b/mlist_to_mml.lua index 3eed3b8..e972480 100644 --- a/mlist_to_mml.lua +++ b/mlist_to_mml.lua @@ -1,3 +1,5 @@ +local remap_comb = require'remap_comb' + local noad_t, accent_t, style_t, choice_t = node.id'noad', node.id'accent', node.id'style', node.id'choice' local radical_t, fraction_t, fence_t = node.id'radical', node.id'fraction', node.id'fence' @@ -102,12 +104,14 @@ local function accent_to_table(accent, sub, cur_style) if sub & 1 == 1 then top_acc.stretchy = 'false' end + top_acc[1] = remap_comb[top_acc[1]] or top_acc[1] end if bot_acc then bot_acc[0] = 'mo' if sub & 2 == 2 then bot_acc.stretchy = 'false' end + bot_acc[1] = remap_comb[bot_acc[1]] or bot_acc[1] end return {[0] = top_acc and (bot_acc and 'munderover' or 'mover') or 'munder', nucleus, diff --git a/remap_comb.lua b/remap_comb.lua new file mode 100644 index 0000000..2083a3f --- /dev/null +++ b/remap_comb.lua @@ -0,0 +1,27 @@ +return { + ["\u{0332}"] = "\u{2212}", + ["\u{0330}"] = "\u{02DC}", + ["\u{0328}"] = "\u{02DB}", + ["\u{20EF}"] = "\u{2192}", + ["\u{032C}"] = "\u{02C7}", + ["\u{032E}"] = "\u{02D8}", + ["\u{0306}"] = "\u{02D8}", + ["\u{030B}"] = "\u{02DD}", + ["\u{0302}"] = "\u{02C6}", + ["\u{0324}"] = "\u{00A8}", + ["\u{0317}"] = "\u{00B4}", + ["\u{031F}"] = "\u{002B}", + ["\u{0307}"] = "\u{002E}", + ["\u{0305}"] = "\u{2212}", + ["\u{0303}"] = "\u{02DC}", + ["\u{0316}"] = "\u{0060}", + ["\u{0301}"] = "\u{00B4}", + ["\u{030C}"] = "\u{02C7}", + ["\u{0327}"] = "\u{00B8}", + ["\u{0308}"] = "\u{00A8}", + ["\u{0300}"] = "\u{0060}", + ["\u{0323}"] = "\u{002E}", + ["\u{0304}"] = "\u{00AF}", + ["\u{032D}"] = "\u{005E}", + ["\u{20D7}"] = "\u{2192}", +} From 73c6f78976d14f5eae731eb5d38bdc617b04d805 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Fabian=20Kr=C3=BCger?= Date: Mon, 19 Apr 2021 17:29:08 +0200 Subject: [PATCH 07/10] Set mathvariant to avoid automatic italic --- mlist_to_mml.lua | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mlist_to_mml.lua b/mlist_to_mml.lua index e972480..eb2af47 100644 --- a/mlist_to_mml.lua +++ b/mlist_to_mml.lua @@ -28,7 +28,7 @@ local function kernel_to_table(kernel, cur_style) local char = kernel.char local elem = char >= 0x30 and char < 0x39 and 'mn' or 'mi' local fam = kernel.fam - return {[0] = elem, utf8.char(kernel.char), ['tex:family'] = fam ~= 0 and fam or nil } + return {[0] = elem, utf8.char(kernel.char), ['tex:family'] = fam ~= 0 and fam or nil, mathvariant = char < 0x10000 and 'normal' or nil } elseif id == sub_box_t then return {[0] = 'mi', {[0] = 'mglyph', ['tex:box'] = kernel.list}} elseif id == sub_mlist_t then @@ -64,6 +64,7 @@ local function noad_to_table(noad, sub, cur_style) -- TODO else nucleus[0] = 'mo' + if nucleus.mathvariant == 'normal' then nucleus.mathvariant = nil end end nucleus['tex:class'] = class @@ -101,6 +102,7 @@ local function accent_to_table(accent, sub, cur_style) local bot_acc = kernel_to_table(accent.bot_accent, cur_style) if top_acc then top_acc[0] = 'mo' + if top_acc.mathvariant == 'normal' then top_acc.mathvariant = nil end if sub & 1 == 1 then top_acc.stretchy = 'false' end @@ -108,6 +110,7 @@ local function accent_to_table(accent, sub, cur_style) end if bot_acc then bot_acc[0] = 'mo' + if bot_acc.mathvariant == 'normal' then bot_acc.mathvariant = nil end if sub & 2 == 2 then bot_acc.stretchy = 'false' end From 3cb96009645091d19478b426dbb4d69fb08429cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Fabian=20Kr=C3=BCger?= Date: Mon, 19 Apr 2021 17:29:08 +0200 Subject: [PATCH 08/10] Set mathvariant to avoid automatic italic --- mlist_to_mml.lua | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mlist_to_mml.lua b/mlist_to_mml.lua index e972480..eb2af47 100644 --- a/mlist_to_mml.lua +++ b/mlist_to_mml.lua @@ -28,7 +28,7 @@ local function kernel_to_table(kernel, cur_style) local char = kernel.char local elem = char >= 0x30 and char < 0x39 and 'mn' or 'mi' local fam = kernel.fam - return {[0] = elem, utf8.char(kernel.char), ['tex:family'] = fam ~= 0 and fam or nil } + return {[0] = elem, utf8.char(kernel.char), ['tex:family'] = fam ~= 0 and fam or nil, mathvariant = char < 0x10000 and 'normal' or nil } elseif id == sub_box_t then return {[0] = 'mi', {[0] = 'mglyph', ['tex:box'] = kernel.list}} elseif id == sub_mlist_t then @@ -64,6 +64,7 @@ local function noad_to_table(noad, sub, cur_style) -- TODO else nucleus[0] = 'mo' + if nucleus.mathvariant == 'normal' then nucleus.mathvariant = nil end end nucleus['tex:class'] = class @@ -101,6 +102,7 @@ local function accent_to_table(accent, sub, cur_style) local bot_acc = kernel_to_table(accent.bot_accent, cur_style) if top_acc then top_acc[0] = 'mo' + if top_acc.mathvariant == 'normal' then top_acc.mathvariant = nil end if sub & 1 == 1 then top_acc.stretchy = 'false' end @@ -108,6 +110,7 @@ local function accent_to_table(accent, sub, cur_style) end if bot_acc then bot_acc[0] = 'mo' + if bot_acc.mathvariant == 'normal' then bot_acc.mathvariant = nil end if sub & 2 == 2 then bot_acc.stretchy = 'false' end From dda6432192ef12e65e8dbe4ea4c65a7ae5880c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Fabian=20Kr=C3=BCger?= Date: Mon, 19 Apr 2021 19:56:03 +0200 Subject: [PATCH 09/10] Better stretchy handling and overwrite support --- mlist_to_mml.lua | 57 +++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/mlist_to_mml.lua b/mlist_to_mml.lua index eb2af47..c584e80 100644 --- a/mlist_to_mml.lua +++ b/mlist_to_mml.lua @@ -1,4 +1,7 @@ local remap_comb = require'remap_comb' +local stretchy = require'stretchy' + +local properties = node.get_properties_table() local noad_t, accent_t, style_t, choice_t = node.id'noad', node.id'accent', node.id'style', node.id'choice' local radical_t, fraction_t, fence_t = node.id'radical', node.id'fraction', node.id'fence' @@ -17,18 +20,39 @@ local function sup_style(s) return s//4*2+4+s%2 end -- We ignore large_... since they aren't used for modern fonts local function delim_to_table(delim) if not delim then return end + local props = properties[delim] props = props and props.mathml_table + if props then return props end local fam = delim.small_fam - return {[0] = 'mo', utf8.char(delim.small_char), ['tex:family'] = fam ~= 0 and fam or nil } + return {[0] = 'mo', utf8.char(delim.small_char), ['tex:family'] = fam ~= 0 and fam or nil, stretchy = not stretchy[delim.small_char] or nil } +end + +-- Like kernel_to_table but always a math_char_t. Also creating a mo and potentially remapping to handle combining chars +local function acc_to_table(acc, cur_style, stretch) + if not acc then return end + local props = properties[acc] props = props and props.mathml_table + if props then return props end + if acc.id ~= math_char_t then + error'confusion' + end + local char = utf8.char(acc.char) + char = remap_comb[char] or char + local fam = acc.fam + if stretch ~= not stretchy[char] then -- Handle nil gracefully in stretchy + stretch = nil + end + return {[0] = 'mo', char, ['tex:family'] = fam ~= 0 and fam or nil, stretchy = stretch} end local function kernel_to_table(kernel, cur_style) if not kernel then return end + local props = properties[kernel] props = props and props.mathml_table + if props then return props end local id = kernel.id if id == math_char_t then local char = kernel.char local elem = char >= 0x30 and char < 0x39 and 'mn' or 'mi' local fam = kernel.fam - return {[0] = elem, utf8.char(kernel.char), ['tex:family'] = fam ~= 0 and fam or nil, mathvariant = char < 0x10000 and 'normal' or nil } + return {[0] = elem, utf8.char(char), ['tex:family'] = fam ~= 0 and fam or nil, mathvariant = char < 0x10000 and 'normal' or nil } elseif id == sub_box_t then return {[0] = 'mi', {[0] = 'mglyph', ['tex:box'] = kernel.list}} elseif id == sub_mlist_t then @@ -64,6 +88,7 @@ local function noad_to_table(noad, sub, cur_style) -- TODO else nucleus[0] = 'mo' + if stretchy[nucleus[1]] then nucleus.stretchy = false end if nucleus.mathvariant == 'normal' then nucleus.mathvariant = nil end end nucleus['tex:class'] = class @@ -98,24 +123,8 @@ end local function accent_to_table(accent, sub, cur_style) local nucleus = kernel_to_table(accent.nucleus, cur_style//2*2+1) - local top_acc = kernel_to_table(accent.accent, cur_style) - local bot_acc = kernel_to_table(accent.bot_accent, cur_style) - if top_acc then - top_acc[0] = 'mo' - if top_acc.mathvariant == 'normal' then top_acc.mathvariant = nil end - if sub & 1 == 1 then - top_acc.stretchy = 'false' - end - top_acc[1] = remap_comb[top_acc[1]] or top_acc[1] - end - if bot_acc then - bot_acc[0] = 'mo' - if bot_acc.mathvariant == 'normal' then bot_acc.mathvariant = nil end - if sub & 2 == 2 then - bot_acc.stretchy = 'false' - end - bot_acc[1] = remap_comb[bot_acc[1]] or bot_acc[1] - end + local top_acc = acc_to_table(accent.accent, cur_style, sub & 1 == 1) + local bot_acc = acc_to_table(accent.bot_accent, cur_style, sub & 2 == 2) return {[0] = top_acc and (bot_acc and 'munderover' or 'mover') or 'munder', nucleus, bot_acc or top_acc, @@ -145,7 +154,7 @@ local function radical_to_table(radical, sub, cur_style) elem = {[0] = 'msqrt', nucleus} elseif kind == 'uroot' then -- FIXME: Check that this is really a root - elem = {[0] = 'msqrt', nucleus, delim_to_table(radical.degree)} + elem = {[0] = 'msqrt', nucleus, kernel_to_table(radical.degree)} elseif kind == 'uunderdelimiter' then elem = {[0] = 'munder', left, nucleus} elseif kind == 'uoverdelimiter' then @@ -189,7 +198,6 @@ end local function fence_to_table(fence, sub, cur_style) local delim = delim_to_table(fence.delimiter) - delim.stretchy = 'true' delim.fence = 'true' return delim end @@ -198,7 +206,10 @@ function nodes_to_table(head, cur_style) local t = {[0] = "mrow"} local result = t for n, id, sub in node.traverse(head) do - if id == noad_t then + local props = properties[n] props = props and props.mathml_table + if props then + t[#t+1] = props + elseif id == noad_t then t[#t+1] = noad_to_table(n, sub, cur_style) elseif id == accent_t then t[#t+1] = accent_to_table(n, sub, cur_style) From b452293590358964e6b3cb838676d2647d01eb7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20Fabian=20Kr=C3=BCger?= Date: Mon, 19 Apr 2021 19:56:03 +0200 Subject: [PATCH 10/10] Better stretchy handling and overwrite support --- mlist_to_mml.lua | 57 +++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/mlist_to_mml.lua b/mlist_to_mml.lua index eb2af47..c584e80 100644 --- a/mlist_to_mml.lua +++ b/mlist_to_mml.lua @@ -1,4 +1,7 @@ local remap_comb = require'remap_comb' +local stretchy = require'stretchy' + +local properties = node.get_properties_table() local noad_t, accent_t, style_t, choice_t = node.id'noad', node.id'accent', node.id'style', node.id'choice' local radical_t, fraction_t, fence_t = node.id'radical', node.id'fraction', node.id'fence' @@ -17,18 +20,39 @@ local function sup_style(s) return s//4*2+4+s%2 end -- We ignore large_... since they aren't used for modern fonts local function delim_to_table(delim) if not delim then return end + local props = properties[delim] props = props and props.mathml_table + if props then return props end local fam = delim.small_fam - return {[0] = 'mo', utf8.char(delim.small_char), ['tex:family'] = fam ~= 0 and fam or nil } + return {[0] = 'mo', utf8.char(delim.small_char), ['tex:family'] = fam ~= 0 and fam or nil, stretchy = not stretchy[delim.small_char] or nil } +end + +-- Like kernel_to_table but always a math_char_t. Also creating a mo and potentially remapping to handle combining chars +local function acc_to_table(acc, cur_style, stretch) + if not acc then return end + local props = properties[acc] props = props and props.mathml_table + if props then return props end + if acc.id ~= math_char_t then + error'confusion' + end + local char = utf8.char(acc.char) + char = remap_comb[char] or char + local fam = acc.fam + if stretch ~= not stretchy[char] then -- Handle nil gracefully in stretchy + stretch = nil + end + return {[0] = 'mo', char, ['tex:family'] = fam ~= 0 and fam or nil, stretchy = stretch} end local function kernel_to_table(kernel, cur_style) if not kernel then return end + local props = properties[kernel] props = props and props.mathml_table + if props then return props end local id = kernel.id if id == math_char_t then local char = kernel.char local elem = char >= 0x30 and char < 0x39 and 'mn' or 'mi' local fam = kernel.fam - return {[0] = elem, utf8.char(kernel.char), ['tex:family'] = fam ~= 0 and fam or nil, mathvariant = char < 0x10000 and 'normal' or nil } + return {[0] = elem, utf8.char(char), ['tex:family'] = fam ~= 0 and fam or nil, mathvariant = char < 0x10000 and 'normal' or nil } elseif id == sub_box_t then return {[0] = 'mi', {[0] = 'mglyph', ['tex:box'] = kernel.list}} elseif id == sub_mlist_t then @@ -64,6 +88,7 @@ local function noad_to_table(noad, sub, cur_style) -- TODO else nucleus[0] = 'mo' + if stretchy[nucleus[1]] then nucleus.stretchy = false end if nucleus.mathvariant == 'normal' then nucleus.mathvariant = nil end end nucleus['tex:class'] = class @@ -98,24 +123,8 @@ end local function accent_to_table(accent, sub, cur_style) local nucleus = kernel_to_table(accent.nucleus, cur_style//2*2+1) - local top_acc = kernel_to_table(accent.accent, cur_style) - local bot_acc = kernel_to_table(accent.bot_accent, cur_style) - if top_acc then - top_acc[0] = 'mo' - if top_acc.mathvariant == 'normal' then top_acc.mathvariant = nil end - if sub & 1 == 1 then - top_acc.stretchy = 'false' - end - top_acc[1] = remap_comb[top_acc[1]] or top_acc[1] - end - if bot_acc then - bot_acc[0] = 'mo' - if bot_acc.mathvariant == 'normal' then bot_acc.mathvariant = nil end - if sub & 2 == 2 then - bot_acc.stretchy = 'false' - end - bot_acc[1] = remap_comb[bot_acc[1]] or bot_acc[1] - end + local top_acc = acc_to_table(accent.accent, cur_style, sub & 1 == 1) + local bot_acc = acc_to_table(accent.bot_accent, cur_style, sub & 2 == 2) return {[0] = top_acc and (bot_acc and 'munderover' or 'mover') or 'munder', nucleus, bot_acc or top_acc, @@ -145,7 +154,7 @@ local function radical_to_table(radical, sub, cur_style) elem = {[0] = 'msqrt', nucleus} elseif kind == 'uroot' then -- FIXME: Check that this is really a root - elem = {[0] = 'msqrt', nucleus, delim_to_table(radical.degree)} + elem = {[0] = 'msqrt', nucleus, kernel_to_table(radical.degree)} elseif kind == 'uunderdelimiter' then elem = {[0] = 'munder', left, nucleus} elseif kind == 'uoverdelimiter' then @@ -189,7 +198,6 @@ end local function fence_to_table(fence, sub, cur_style) local delim = delim_to_table(fence.delimiter) - delim.stretchy = 'true' delim.fence = 'true' return delim end @@ -198,7 +206,10 @@ function nodes_to_table(head, cur_style) local t = {[0] = "mrow"} local result = t for n, id, sub in node.traverse(head) do - if id == noad_t then + local props = properties[n] props = props and props.mathml_table + if props then + t[#t+1] = props + elseif id == noad_t then t[#t+1] = noad_to_table(n, sub, cur_style) elseif id == accent_t then t[#t+1] = accent_to_table(n, sub, cur_style)