diff --git a/README.md b/README.md
index d139829..afdf86e 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,14 @@
-# Automated LuaLaTeX math to MathML conversion
-**Highly experimental! At this point all interfaces may change without prior warning and many features aren't implemented yet. It is not ready for anything beyond simple experiments.**
-
+# LuaMML: Automated LuaLaTeX math to MathML conversion
This is an attempt to implement automatic conversion of LuaLaTeX inline and display math expressions into MathML code to aid with tagging.
It works best with `unicode-math`, but it can also be used with traditional math fonts if mappings to Unicode are provided.
## Installation
Run `l3build install` to install `luamml` into your local `texmf` tree.
-## Demo
-Run `lualatex test_tex` to see all equations from [our example file](./test_tex.tex) converted into MathML.
+## Usage
+Add `\usepackage[tracing]{luamml-demo}` to print MathML to the terminal or `\usepackage[files]{luamml-demo}` to generate separate files with MathML output.
+Alternatively it can be used with latex-lab to automatically integrate with tagging infrastucture.
-To test it on your own files, add `\usepackage[tracing]{luamml-demo}` (to print MathML to the terminal) or `\usepackage[files]{luamml-demo}` to generate separate files with MathML output.
-Also see a [`tagpdf` experiment using this to tag PDF formulas](https://github.com/u-fischer/tagpdf/blob/develop/experiments/exp-mathml-lua.tex).
+
-If you are very brave you can also try running `pdflatex test_pdf` and afterwards run `./pdfmml.lua test_pdf.lua` to get pdflatex formulas converted.
+
diff --git a/build.lua b/build.lua
index 1485bf5..553d44d 100644
--- a/build.lua
+++ b/build.lua
@@ -3,6 +3,8 @@ module = "luamml"
tdsroot = "lualatex"
installfiles = { "luamml-*.lua", "*.sty" }
sourcefiles = { "luamml-*.lua", "*.sty", "*.dtx" }
+typesetsuppfiles = { "*.tex" }
+typesetsourcefiles = { "*.tex" }
stdengine = "luatex"
unpackfiles = { "*.dtx" }
typesetexe = "lualatex"
diff --git a/luamml-demo.sty b/luamml-demo.sty
index 5e2d8df..64e4e3f 100644
--- a/luamml-demo.sty
+++ b/luamml-demo.sty
@@ -1,5 +1,5 @@
\NeedsTeXFormat{LaTeX2e}
-\ProvidesExplPackage{luamml-demo}{2021-06-16}{v0.0.1}{Reasonable default definitions for luamml}
+\ProvidesExplPackage{luamml-demo}{2024-08-14}{v0.1.0}{Reasonable default definitions for luamml}
\sys_if_engine_luatex:F {
\msg_new:nnn {luamml-demo} {pdftex-option-ignored} {Option~`#1'~is~being~ignored~in~pdfTeX~mode.}
diff --git a/luamml-patches-amsmath.sty b/luamml-patches-amsmath.sty
index f3bfc38..c0df5ed 100644
--- a/luamml-patches-amsmath.sty
+++ b/luamml-patches-amsmath.sty
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {luamml-patches-amsmath} {2021-04-23} {0.0.1-alpha}
+\ProvidesExplPackage {luamml-patches-amsmath} {2024-08-14} {0.1.0}
{Feel free to add a description here}
\lua_now:n { require'luamml-amsmath' }
diff --git a/luamml-patches-amstext.sty b/luamml-patches-amstext.sty
index 2850257..2217600 100644
--- a/luamml-patches-amstext.sty
+++ b/luamml-patches-amstext.sty
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {luamml-patches-amstext} {2021-04-23} {0.0.1-alpha}
+\ProvidesExplPackage {luamml-patches-amstext} {2024-08-14} {0.1.0}
{Feel free to add a description here}
\int_new:N \g__luamml_amsmath_text_struct_int
diff --git a/luamml-patches-array.sty b/luamml-patches-array.sty
index 7635323..ee40b2c 100644
--- a/luamml-patches-array.sty
+++ b/luamml-patches-array.sty
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {luamml-patches-array} {2021-04-23} {0.0.1-alpha}
+\ProvidesExplPackage {luamml-patches-array} {2024-08-14} {0.1.0}
{Feel free to add a description here}
\lua_now:n { require'luamml-array' }
diff --git a/luamml-patches-kernel.sty b/luamml-patches-kernel.sty
index 49dc215..fa3aaae 100644
--- a/luamml-patches-kernel.sty
+++ b/luamml-patches-kernel.sty
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {luamml-patches-kernel} {2021-05-30} {0.0.1-alpha}
+\ProvidesExplPackage {luamml-patches-kernel} {2024-08-14} {0.1.0}
{Feel free to add a description here}
\cs_new:Npn \__luamml_kernel_phantom:nnn #1#2#3 {
diff --git a/luamml-patches-lab-math.sty b/luamml-patches-lab-math.sty
index 166a7d0..e2d12c3 100644
--- a/luamml-patches-lab-math.sty
+++ b/luamml-patches-lab-math.sty
@@ -1,4 +1,4 @@
-\ProvidesExplPackage {luamml-patches-lab-math} {2021-05-30} {0.0.1-alpha}
+\ProvidesExplPackage {luamml-patches-lab-math} {2024-08-14} {0.1.0}
{Feel free to add a description here}
\AddToHook{begindocument} {
diff --git a/luamml-pdf-demo.sty b/luamml-pdf-demo.sty
index 0cd981d..fe63814 100644
--- a/luamml-pdf-demo.sty
+++ b/luamml-pdf-demo.sty
@@ -1,5 +1,5 @@
\NeedsTeXFormat{LaTeX2e}
-\ProvidesExplPackage{luamml-pdf-demo}{2021-06-16}{v0.0.1}{Reasonable default definitions for luamml-pdf}
+\ProvidesExplPackage{luamml-pdf-demo}{2024-08-14}{v0.1.0}{Reasonable default definitions for luamml-pdf}
\RequirePackage{luamml-pdf}% Loading luamml-pdf is pretty much the point
% \RequirePackage{amsmath,array}% May come back if the patches get ported
diff --git a/luamml.dtx b/luamml.dtx
index d2134b2..0af076c 100644
--- a/luamml.dtx
+++ b/luamml.dtx
@@ -1,6 +1,6 @@
% \iffalse meta-comment
%
-%% Copyright (C) 2020-2021 by Marcel Krueger
+%% Copyright (C) 2020-2024 by Marcel Krueger
%%
%% This file may be distributed and/or modified under the
%% conditions of the LaTeX Project Public License, either
@@ -35,6 +35,7 @@
\documentclass{l3doc}
\usepackage{luamml}
\usepackage{csquotes,luacolor}
+\MakeShortVerb{\|}
\RecordChanges
\begin{document}
\tracingmathml2
@@ -48,13 +49,60 @@
%
% \fi
%
-% \title{The \pkg{luamml} package}
-%
+% \GetFileInfo{luamml.dtx}
+% \title{The \pkg{luamml} package%
+% \thanks{This doument corresponds to \pkg{luamml}~\fileversion, dated~\filedate.}%
+% }
% \author{Marcel Krüger}
%
% \maketitle
%
% \begin{documentation}
+% \section{Use case}
+% When generating output for the web or tagged output, mathematical content should often be represented as MathML.
+% This uses Lua\TeX~callbacks to automatically attempt to convert Lua\TeX~math mode output into MathML.
+%
+% \section{Usage}
+% The \pkg{luamml} package is designed to be used in automated ways by other packages and usually should not be invoked directly by the end user.
+% For experiments, \texttt{luamml-demo} is included which provides easier to use interfaces.
+%
+% Add in your preamble
+% \begin{verbatim}
+% \usepackage[files]{luamml-demo}
+% \end{verbatim}
+% This will trigger the output of individual files for each block of math output containing corresponding MathML.
+%
+% Alternatively
+% \begin{verbatim}
+% \usepackage[l3build]{luamml-demo}
+% \end{verbatim}
+% will generate a single file witha concatenation of all MathML blocks.
+%
+% For automated use, the \pkg{luamml} package can be included directly, followed by enclosing blocks which should generate files with \cmd{luamml_begin_single_file:} and \cmd{luamml_end_single_file:}.
+% The filename can be set with \cmd{luamml_set_filename:n}.
+%
+% \section{Improving MathML conversion}
+% When using constructs which do not automatically get converted in acceptable form, conversion hints can be provided with \cmd{luamml_annotate:en}.
+% This allows to provide a replacement MathML structure in Lua table form, for example
+% \begin{verbatim}
+% \luamml_annotate:en {
+% nucleus = true,
+% core = {[0] = 'mi', 'TeX'},
+% }{
+% \hbox{\TeX}
+% }
+% \end{verbatim}
+% produces a |TeX| element in the output instead of trying to import \TeX~as a mathematical expression.
+% The table structure is explaned in an appendix.
+%
+% \section{Features \& Limitiations}
+% Currently all mathematical expressions which purely contain Unicode encoded math mode material without embedded non-math should get converted successfully.
+% Usage with non-Unicode math (\TeX's 8-bit math fonts) is highly experimental and undocumented.
+% Any attempt to build complicated structures by embedding arbitrary \TeX\ code in the middle of math mode needs to have a MathML replacement specified.
+% We try to automate more cases in the future.
+%
+% \appendix
+% \input{luamml-algorithm}
% \end{documentation}
%
% \begin{implementation}
@@ -66,11 +114,11 @@
% \begin{macrocode}
%<@@=luamml>
%<*luatex>
-\ProvidesExplPackage {luamml} {2021-04-23} {0.0.1-alpha}
+\ProvidesExplPackage {luamml} {2024-08-14} {0.1.0}
{Automatically generate presentational MathML from LuaTeX math expressions}
%
%<*pdftex>
-\ProvidesExplPackage {luamml-pdf} {2021-05-31} {0.0.1-alpha}
+\ProvidesExplPackage {luamml-pdf} {2024-08-14} {0.1.0}
{MathML generation for L̶u̶a̶pdfLaTeX}
%
% \end{macrocode}
diff --git a/algorithm.tex b/support/luamml-algorithm.tex
similarity index 60%
rename from algorithm.tex
rename to support/luamml-algorithm.tex
index eb71f74..507a0bd 100644
--- a/algorithm.tex
+++ b/support/luamml-algorithm.tex
@@ -1,10 +1,7 @@
-\documentclass{article}
-\begin{document}
-\title{From math lists to MathML}
-\subtitle{The algorithm in luamml}
-\author{Marcel}
-\maketitle
-\section{General concepts}
+\newcommand\Luamml{\pkg{Luamml}}
+\newcommand\luamml{\pkg{luamml}}
+\newcommand\xmltag[1]{\texttt{<#1>}}
+\section{\Luamml's representation of XML and MathML}
In the following I assume basic familiarity with both Lua\TeX's representation of math noads and MathML.
\subsection{Representation of XML elements}
@@ -35,32 +32,32 @@ MathML knows the concept of \enquote{embellished operators}:
\begin{blockquote}
The precise definition of an \enquote{embellished operator} is:
\begin{itemize}
- \item an \tag{mo} element;
- \item or one of the elements \tag{msub}, \tag{msup}, \tag{msubsup}, \tag{munder}, \tag{mover}, \tag{munderover}, \tag{mmultiscripts}, \tag{mfrac}, or \tag{semantics} (§ 5.1 Annotation Framework), whose first argument exists and is an embellished operator;
- \item or one of the elements \tag{mstyle}, \tag{mphantom}, or \tag{mpadded}, such that an mrow containing the same arguments would be an embellished operator;
- \item or an \tag{maction} element whose selected sub-expression exists and is an embellished operator;
- \item or an \tag{mrow} whose arguments consist (in any order) of one embellished operator and zero or more space-like elements.
+ \item an \xmltag{mo} element;
+ \item or one of the elements \xmltag{msub}, \xmltag{msup}, \xmltag{msubsup}, \xmltag{munder}, \xmltag{mover}, \xmltag{munderover}, \xmltag{mmultiscripts}, \xmltag{mfrac}, or \xmltag{semantics} (§ 5.1 Annotation Framework), whose first argument exists and is an embellished operator;
+ \item or one of the elements \xmltag{mstyle}, \xmltag{mphantom}, or \xmltag{mpadded}, such that an mrow containing the same arguments would be an embellished operator;
+ \item or an \xmltag{maction} element whose selected sub-expression exists and is an embellished operator;
+ \item or an \xmltag{mrow} whose arguments consist (in any order) of one embellished operator and zero or more space-like elements.
\end{itemize}
\end{blockquote}
-For every embellished operator, MathML calls the \tag{mo} element defining the embellished operator the \enquote{core} of the embellished operator.
+For every embellished operator, MathML calls the \xmltag{mo} element defining the embellished operator the \enquote{core} of the embellished operator.
\Luamml\ makes this slightly more general: Every expression is represented by a pair of two elements: The expression and it's core.
-The core is always a \tag{mo}, \tag{mi}, or \tag{mn}, \texttt{nil} or s special marker for space like elements.
+The core is always a \xmltag{mo}, \xmltag{mi}, or \xmltag{mn}, \texttt{nil} or s special marker for space like elements.
-If and only if the element is a embellished operator the core is a \tag{mo} element representing the core of the embellished operator.
-The core is a \tag{mi} or a \tag{mn} element if and only if the element would be an embellished operator with this core if this element where a \tag{mo} element.
+If and only if the element is a embellished operator the core is a \xmltag{mo} element representing the core of the embellished operator.
+The core is a \xmltag{mi} or a \xmltag{mn} element if and only if the element would be an embellished operator with this core if this element where a \xmltag{mo} element.
The core is the special space like marker for space like elements. Otherwise the core is \texttt{nil}.
-\section{Translation of math noads}
+\subsection{Translation of math noads}
A math lists can contain the following node types: noad, fence, fraction, radical, accent, style, choice, ins, mark, adjust, boundary, whatsit, penalty, disc, glue, and kern. The \enquote{noads}
-\subsection{Translation of kernel noads}
+\subsubsection{Translation of kernel noads}
The math noads of this list contain nested kernel noads. So in the first step, we look into how kernel nodes are translated to math nodes.
-\subsubsection{\texttt{math_char} kernel noads}
+\paragraph{\texttt{math_char} kernel noads}
First the family and character value in the \texttt{math_char} are used to lookup the Unicode character value of this \texttt{math_char}.
-(For \textt{unicode-math}, this is usually just the character value. Legacy maths has to be remapped based on the family.)
-Then there are two cases: The digits \texttt{0} to \texttt{9} are mapped to \tag{mn} elements, everything else becomes a \tag{mi} element with \texttt{mathvariant} set to \texttt{normal}.
+(For \texttt{unicode-math}, this is usually just the character value. Legacy maths has to be remapped based on the family.)
+Then there are two cases: The digits \texttt{0} to \texttt{9} are mapped to \xmltag{mn} elements, everything else becomes a \xmltag{mi} element with \texttt{mathvariant} set to \texttt{normal}.
(The \texttt{mathvariant} value might get suppressed if the character defaults to mathvariant \texttt{normal}.)
In either case, the \texttt{tex:family} attribute is set to the family number if it's not \texttt{0}.
@@ -73,13 +70,13 @@ The core is always set to the expression itself. E.g.\ the \texttt{math_char} ke
}
\end{verbatim}
-\subsection{\texttt{sub_box} kernel noads}
+\subsubsection{\texttt{sub_box} kernel noads}
I am open to suggestions how to convert them properly.
-\subsection{\texttt{sub_mlist} kernel noads}
-The inner list is converted as a \tag{mrow} element, with the core being the core of the \tag{mrow} element. See the rules for this later.
+\subsubsection{\texttt{sub_mlist} kernel noads}
+The inner list is converted as a \xmltag{mrow} element, with the core being the core of the \xmltag{mrow} element. See the rules for this later.
-\subsection{\texttt{delim} kernel noads}
+\subsubsection{\texttt{delim} kernel noads}
If the \texttt{small_char} is zero, these get converted as space like elements of the form
\begin{verbatim}
{[0] = 'mspace',
@@ -89,15 +86,13 @@ If the \texttt{small_char} is zero, these get converted as space like elements o
where 1.196 is replaced by the current value of \verb+\nulldelimiterspace+ converted to \texttt{bp}.
Otherwise the same rules as for \texttt{math_char} apply,
-except that instead of \texttt{mi} or \tag{mn} elements,
+except that instead of \texttt{mi} or \xmltag{mn} elements,
\texttt{mo} elements are generated,
\texttt{mathvariant} is never set,
\texttt{stretchy} is set to \texttt{true} if the operator is not on the list of default stretchy operators in the MathML specification
nd \texttt{lspace} and \texttt{rspace} attributes are set to zero.
-\subsection{\texttt{acc} kernel noads}
+\subsubsection{\texttt{acc} kernel noads}
Depending on the surrounding element containing the \texttt{acc} kernel noad, it is either stretchy or not.
If it's stretchy, the same rules as for \texttt{delim} apply, except that \texttt{lspace} and \texttt{rspace} are not set.
-Otherwise the \textt{stretchy} attribute is set to false if the operator is on the list of default stretchy operators.
-
-\end{document}
+Otherwise the \texttt{stretchy} attribute is set to false if the operator is on the list of default stretchy operators.
diff --git a/testfiles-lua/cases.mlr b/testfiles-lua/cases.mlr
index 286d847..ab7b075 100644
--- a/testfiles-lua/cases.mlr
+++ b/testfiles-lua/cases.mlr
@@ -1,10 +1,10 @@
-