mathml/luamml.dtx

532 lines
16 KiB
Plaintext
Raw Normal View History

2021-06-22 17:00:14 +02:00
% \iffalse meta-comment
%
2024-08-14 20:10:21 +02:00
%% Copyright (C) 2020-2024 by Marcel Krueger
2021-06-22 17:00:14 +02:00
%%
%% This file may be distributed and/or modified under the
%% conditions of the LaTeX Project Public License, either
%% version 1.3c of this license or (at your option) any later
%% version. The latest version of this license is in:
%%
%% http://www.latex-project.org/lppl.txt
%%
%% and version 1.3 or later is part of all distributions of
%% LaTeX version 2005/12/01 or later.
%
%<*batch>
%<*gobble>
\ifx\jobname\relax\let\documentclass\undefined\fi
\ifx\documentclass\undefined
\csname fi\endcsname
%</gobble>
\input docstrip.tex
\keepsilent
2021-06-26 20:23:14 +02:00
\generate{
\file{luamml.sty}{\from{luamml.dtx}{package,luatex}}
\file{luamml-pdf.sty}{\from{luamml.dtx}{package,pdftex}}
}
2021-06-22 17:00:14 +02:00
\endbatchfile
%</batch>
%<*gobble>
\fi
\expandafter\ifx\csname @currname\endcsname\empty
\csname fi\endcsname
%</gobble>
%<*driver>
\documentclass{l3doc}
\usepackage{luamml}
\usepackage{csquotes,luacolor}
2024-08-14 20:10:21 +02:00
\MakeShortVerb{\|}
2021-06-22 17:00:14 +02:00
\RecordChanges
\begin{document}
\tracingmathml2
\DocInput{luamml.dtx}
\PrintIndex
\PrintChanges
\end{document}
%</driver>
%<*gobble>
\fi
%</gobble>
% \fi
%
2024-08-14 20:10:21 +02:00
% \GetFileInfo{luamml.dtx}
% \title{The \pkg{luamml} package%
% \thanks{This doument corresponds to \pkg{luamml}~\fileversion, dated~\filedate.}%
% }
2021-06-22 17:00:14 +02:00
% \author{Marcel Krüger}
%
% \maketitle
%
% \begin{documentation}
2024-08-14 20:10:21 +02:00
% \section{Use case}
% When generating output for the web or tagged output, mathematical content should often be represented as MathML.
% This uses Lua\TeX~callbacks to automatically attempt to convert Lua\TeX~math mode output into MathML.
%
% \section{Usage}
% The \pkg{luamml} package is designed to be used in automated ways by other packages and usually should not be invoked directly by the end user.
% For experiments, \texttt{luamml-demo} is included which provides easier to use interfaces.
%
% Add in your preamble
% \begin{verbatim}
% \usepackage[files]{luamml-demo}
% \end{verbatim}
% This will trigger the output of individual files for each block of math output containing corresponding MathML.
%
% Alternatively
% \begin{verbatim}
% \usepackage[l3build]{luamml-demo}
% \end{verbatim}
% will generate a single file witha concatenation of all MathML blocks.
%
% For automated use, the \pkg{luamml} package can be included directly, followed by enclosing blocks which should generate files with \cmd{luamml_begin_single_file:} and \cmd{luamml_end_single_file:}.
% The filename can be set with \cmd{luamml_set_filename:n}.
%
% \section{Improving MathML conversion}
% When using constructs which do not automatically get converted in acceptable form, conversion hints can be provided with \cmd{luamml_annotate:en}.
% This allows to provide a replacement MathML structure in Lua table form, for example
% \begin{verbatim}
% \luamml_annotate:en {
% nucleus = true,
% core = {[0] = 'mi', 'TeX'},
% }{
% \hbox{\TeX}
% }
% \end{verbatim}
% produces a |<mi>TeX</mi>| element in the output instead of trying to import \TeX~as a mathematical expression.
% The table structure is explaned in an appendix.
%
% \section{Features \& Limitiations}
% Currently all mathematical expressions which purely contain Unicode encoded math mode material without embedded non-math should get converted successfully.
% Usage with non-Unicode math (\TeX's 8-bit math fonts) is highly experimental and undocumented.
% Any attempt to build complicated structures by embedding arbitrary \TeX\ code in the middle of math mode needs to have a MathML replacement specified.
% We try to automate more cases in the future.
%
% \appendix
% \input{luamml-algorithm}
2021-06-22 17:00:14 +02:00
% \end{documentation}
%
% \begin{implementation}
% \section{Package Implementation}
2021-06-26 20:23:14 +02:00
% \subsection{Initialization}
2021-06-22 17:00:14 +02:00
% \iffalse
%<*package>
% \fi
% \begin{macrocode}
%<@@=luamml>
2021-06-26 20:23:14 +02:00
%<*luatex>
2024-08-14 20:10:21 +02:00
\ProvidesExplPackage {luamml} {2024-08-14} {0.1.0}
2021-06-22 17:00:14 +02:00
{Automatically generate presentational MathML from LuaTeX math expressions}
2021-06-26 20:23:14 +02:00
%</luatex>
%<*pdftex>
2024-08-14 20:10:21 +02:00
\ProvidesExplPackage {luamml-pdf} {2024-08-14} {0.1.0}
2021-06-26 20:23:14 +02:00
{MathML generation for L̶u̶a̶pdfLaTeX}
%</pdftex>
2021-06-22 17:00:14 +02:00
% \end{macrocode}
%
% \subsection{Initialization}
% These variable have to appear before the Lua module is loaded and will be used to
% communicate information to the callback.
%
% Here \cs{tracingmathml} does not use a expl3 name since it is not intended for
% programming use but only as a debugging helper for the user.
% The other variables are internal, but we provide public interfaces for setting
% them later.
% \begin{macrocode}
\int_new:N \l__luamml_flag_int
\int_new:N \l__luamml_pretty_int
2021-06-26 20:23:14 +02:00
%<luatex>\tl_new:N \l__luamml_filename_tl
2021-06-22 17:00:14 +02:00
\tl_new:N \l__luamml_root_tl
\tl_set:Nn \l__luamml_root_tl { mrow }
2021-06-27 00:43:30 +02:00
\tl_new:N \l__luamml_label_tl
2021-06-26 20:23:14 +02:00
%<pdftex>\int_new:N \g__luamml_formula_id_int
%<luatex>\int_new:N \tracingmathml
\int_set:Nn \l__luamml_pretty_int { 1 }
2021-06-22 17:00:14 +02:00
% \end{macrocode}
%
% Now we can load the Lua module which defines the callback.
2021-06-26 20:23:14 +02:00
% Of course until pdf\TeX starts implementing \cs{directlua} this is only
% done in Lua\TeX.
2021-06-22 17:00:14 +02:00
% \begin{macrocode}
2021-06-26 20:23:14 +02:00
%<luatex>\lua_now:n { require'luamml-tex' }
2021-06-22 17:00:14 +02:00
% \end{macrocode}
%
% \subsection{Hook}
% We also call a hook with arguments at the end of every MathML conversion with the result.
% Currently only implemented in Lua\TeX{} since it immediately provides the output.
% \begin{macrocode}
%<*luatex>
\hook_new_with_args:nn { luamml / converted } { 1 }
\cs_new_protected:Npn \__luamml_output_hook:n {
\hook_use:nnw { luamml / converted } { 1 }
}
\__luamml_register_output_hook:N \__luamml_output_hook:n
%</luatex>
% \end{macrocode}
2021-06-22 17:00:14 +02:00
%
% \subsection{Flags}
% The most important interface is for setting the flag which controls how the
% formulas should be converted.
%
% \begin{macro}{\luamml_flag_process:}
% Consider the current formula to be a complete, free-standing mathematical
% expression which should be converted to MathML. Additionally, the formula
% is also saved in the \texttt{start\_math} node as with
% \cs{luamml_flag_save:}.
% \begin{macrocode}
2021-06-22 17:40:09 +02:00
\cs_new_protected:Npn \luamml_flag_process: {
2021-06-27 00:43:30 +02:00
\tl_set:Nn \l__luamml_label_tl {}
2021-06-22 17:00:14 +02:00
\int_set:Nn \l__luamml_flag_int { 3 }
}
% \end{macrocode}
% \end{macro}
%
2021-06-23 14:35:43 +02:00
% \begin{macro}{\__luamml_maybe_structelem:}
% A internal helper which can be added to a tag to preserve the external state
% of the structelem flag.
% \begin{macrocode}
\cs_new:Npn \__luamml_maybe_structelem: {
(
8 * \int_mod:nn {
\int_div_truncate:nn { \l__luamml_flag_int } {8}
} {2}
) +
}
% \end{macro}
%
2021-06-27 04:30:49 +02:00
% \begin{macro}{\__luamml_style_to_num:N}
% \begin{macrocode}
\cs_new:Npn \__luamml_style_to_num:N #1 {
%<luatex> 32 * #1
%<*pdftex>
\token_case_meaning:NnF #1 {
\displaystyle {0}
\textstyle {32}
\scriptstyle {64}
\scriptscriptstyle {96}
} {
\Invalid_mathstyle
}
%</pdftex>
}
% \end{macrocode}
% \end{macro}
%
%
2021-06-27 00:43:30 +02:00
% \begin{macro}{\luamml_flag_save:n,
% \luamml_flag_save:nN,
% \luamml_flag_save:nn,
% \luamml_flag_save:nNn}
2021-06-22 17:00:14 +02:00
% Convert the current formula but only save it's representation in the math
% node without emitting it as a complete formula. This is useful when the
% expression forms part of a bigger formula and will be intergrated into it's
% MathML tables later by special code.
2021-06-27 00:43:30 +02:00
% It optinally accepts three parameters: A label, one math style command
2021-06-22 17:00:14 +02:00
% (\cs{displaystyle}, \cs{textstyle}, etc.) which is the implicit math style
% (so the style which the surrounding code expects this style to have) and a
% name for the root element (defaults to \texttt{mrow}).
% If the root element name is \texttt{mrow}, it will get suppressed in some
% cases.
% \begin{macrocode}
2021-06-27 00:43:30 +02:00
\cs_new_protected:Npn \luamml_flag_save:n #1 {
\tl_set:Nn \l__luamml_label_tl {#1}
2021-06-23 14:35:43 +02:00
\int_set:Nn \l__luamml_flag_int { \__luamml_maybe_structelem: 1 }
2021-06-22 17:00:14 +02:00
}
2021-06-27 00:43:30 +02:00
\cs_new_protected:Npn \luamml_flag_save:nN #1#2 {
\tl_set:Nn \l__luamml_label_tl {#1}
2021-06-27 04:30:49 +02:00
\int_set:Nn \l__luamml_flag_int { \__luamml_maybe_structelem: 17 + \__luamml_style_to_num:N #2 }
2021-06-22 17:00:14 +02:00
}
2021-06-27 00:43:30 +02:00
\cs_new_protected:Npn \luamml_flag_save:nn #1 {
\tl_set:Nn \l__luamml_label_tl {#1}
2021-06-23 14:35:43 +02:00
\int_set:Nn \l__luamml_flag_int { \__luamml_maybe_structelem: 5 }
2021-06-22 17:00:14 +02:00
\tl_set:Nn \l__luamml_root_tl
}
2021-06-27 00:43:30 +02:00
\cs_new_protected:Npn \luamml_flag_save:nNn #1#2 {
\tl_set:Nn \l__luamml_label_tl {#1}
2021-06-27 04:30:49 +02:00
\int_set:Nn \l__luamml_flag_int { \__luamml_maybe_structelem: 21 + \__luamml_style_to_num:N #2 }
2021-06-22 17:00:14 +02:00
\tl_set:Nn \l__luamml_root_tl
}
% \end{macrocode}
% \end{macro}
%
% \begin{macro}{\luamml_flag_ignore:}
% Completely ignore the math mode material.
% \begin{macrocode}
2021-06-22 17:40:09 +02:00
\cs_new_protected:Npn \luamml_flag_ignore: {
2021-06-22 17:00:14 +02:00
\int_set:Nn \l__luamml_flag_int { 0 }
}
% \end{macrocode}
% \end{macro}
%
% \begin{macro}{\luamml_flag_structelem:}
% Like \cs{luamml_flag_process:}, but additionally add PDF structure
2021-06-26 20:23:14 +02:00
% elements. This only works in Lua\TeX\ and requires that the \pkg{tagpdf} package
% has been loaded \emph{before} \texttt{luamml}.
2021-06-22 17:00:14 +02:00
% \begin{macrocode}
2021-06-26 20:23:14 +02:00
%<*luatex>
2021-06-22 17:40:09 +02:00
\cs_new_protected:Npn \luamml_flag_structelem: {
2021-06-27 00:43:30 +02:00
\tl_set:Nn \l__luamml_label_tl {}
2021-06-22 17:00:14 +02:00
\int_set:Nn \l__luamml_flag_int { 11 }
}
2021-06-26 20:23:14 +02:00
%</luatex>
2021-06-22 17:00:14 +02:00
% \end{macrocode}
% \end{macro}
%
% \begin{macro}{\luamml_set_filename:n}
% Allows to set a filename to which the generated MathML gets written.
% Previous content from the file will get overwritten. This includes results
% written by a previous formula. Therefore this has to be called separately
% for every formula or it must expand to different values to be useful.
% The value is fully expanded when the file is written.
%
% Only complete formulas get written into files (so formulas where
% \cs{luamml_flag_process:} or \cs{luamml_flag_structelem:} are in effect).
2021-06-26 20:23:14 +02:00
%
% Only implemented in Lua\TeX, in pdf\TeX\ the arguments for \texttt{pdfmml}
% determine the output location.
2021-06-22 17:00:14 +02:00
% \begin{macrocode}
2021-06-26 20:23:14 +02:00
%<*luatex>
2021-06-22 17:40:09 +02:00
\cs_new_protected:Npn \luamml_set_filename:n {
2021-06-22 17:00:14 +02:00
\tl_set:Nn \l__luamml_filename_tl
}
2021-06-26 20:23:14 +02:00
%</luatex>
2021-06-22 17:00:14 +02:00
% \end{macrocode}
2024-04-04 16:10:35 +02:00
% \end{macro}
2021-10-31 17:18:23 +01:00
%
% \begin{macro}{\luamml_begin_single_file:, \luamml_end_single_file:}
% Everything between these two commands gets written into the same XML file.
% The filename is expanded when \cs{luamml_begin_single_file:} gets executed.
%
% (Implemented in Lua)
2021-06-22 17:00:14 +02:00
% \end{macro}
%
% By default, the flag is set to assume complete formulas.
% \begin{macrocode}
\luamml_flag_process:
% \end{macrocode}
%
% \subsection{Annotations}
2021-06-26 20:23:14 +02:00
% These are implemented very differently depending on the engine, but the interface
% should be the same.
% \subsubsection{Lua\TeX}
% \begin{macrocode}
%<*luatex>
% \end{macrocode}
2021-06-22 17:00:14 +02:00
% \begin{macro}{\luamml_annotate:nen, \luamml_annotate:en}
% A simple annotation scheme: The first argument is the number of top level
% noads to be annotated, the second parameter the annotation and the third
% parameter the actual list of math tokens. The first argument can be omitted to
2021-06-26 20:23:14 +02:00
% let Lua\TeX determine the number itself.
2021-06-22 17:00:14 +02:00
%
% Passing the first parameter explicitly is useful for any annotations which
% should be compatible with fututre pdf\TeX versions of this functionality.
% \begin{macrocode}
\cs_new_protected:Npn \luamml_annotate:nen #1#2#3 {
\__luamml_annotate_begin:
#3
\__luamml_annotate_end:we \tex_numexpr:D #1 \scan_stop: {#2}
2021-06-22 17:00:14 +02:00
}
\cs_new_protected:Npn \luamml_annotate:en #1#2 {
\__luamml_annotate_begin:
#2
\__luamml_annotate_end:e {#1}
}
% \end{macrocode}
% \end{macro}
%
2021-06-26 20:23:14 +02:00
% \begin{macrocode}
%</luatex>
% \end{macrocode}
% \subsubsection{pdf\TeX}
% \begin{macrocode}
%<*pdftex>
% \end{macrocode}
% \begin{macro}{\__luamml_pdf_showlists:}
% Here and in many other locations the \pdfTeX{} implementation is based on \cs{showlists},
% so we define a internal wrapper which sets all relevant parameters.
% \begin{macrocode}
\cs_if_exist:NTF \showstream {
\iow_new:N \l__luamml_pdf_stream
\iow_open:Nn \l__luamml_pdf_stream { \jobname .tml }
\cs_new_protected:Npn \__luamml_pdf_showlists: {
\group_begin:
\int_set:Nn \tex_showboxdepth:D { \c_max_int }
\int_set:Nn \tex_showboxbreadth:D { \c_max_int }
\showstream = \l__luamml_pdf_stream
\tex_showlists:D
\group_end:
}
} {
\cs_set_eq:NN \l__luamml_pdf_stream \c_log_iow
\cs_set_eq:NN \__luamml_pdf_set_showstream: \scan_stop:
\cs_new_protected:Npn \__luamml_pdf_showlists: {
\group_begin:
\int_set:Nn \l_tmpa_int { \tex_interactionmode:D }
\int_set:Nn \tex_interactionmode:D { 0 }
\int_set:Nn \tex_showboxdepth:D { \c_max_int }
\int_set:Nn \tex_showboxbreadth:D { \c_max_int }
\tex_showlists:D
\int_set:Nn \tex_interactionmode:D { \l_tmpa_int }
\group_end:
}
}
% \end{macrocode}
% \end{macro}
%
%
% \begin{macro}{\luamml_annotate:nen, \luamml_annotate:en}
% Now we can define the annotation commands for pdf\TeX.
% \begin{macrocode}
\cs_generate_variant:Nn \tl_to_str:n { e }
\int_new:N \g__luamml_annotation_id_int
\cs_new_protected:Npn \luamml_annotate:nen #1#2#3 {
\int_gincr:N \g__luamml_annotation_id_int
\iow_shipout_x:Nx \l__luamml_pdf_stream {
2021-06-27 05:39:42 +02:00
LUAMML_MARK_REF:
\int_use:N \g__luamml_annotation_id_int
:
}
\iow_now:Nx \l__luamml_pdf_stream {
LUAMML_MARK:
\int_use:N \g__luamml_annotation_id_int
:
count = \int_eval:n {#1},
#2
\iow_newline:
2021-06-26 20:23:14 +02:00
LUAMML_MARK_END
}
#3
}
\cs_new_protected:Npn \luamml_annotate:en #1#2 {
\int_gincr:N \g__luamml_annotation_id_int
\iow_shipout_x:Nx \l__luamml_pdf_stream {
2021-06-27 05:39:42 +02:00
LUAMML_MARK_REF:
\int_use:N \g__luamml_annotation_id_int
:
2021-06-26 20:23:14 +02:00
}
\iow_now:Nx \l__luamml_pdf_stream {
2021-06-27 05:39:42 +02:00
LUAMML_MARK:
2021-06-26 20:23:14 +02:00
\int_use:N \g__luamml_annotation_id_int
2021-06-27 05:39:42 +02:00
:
count = data.count[\int_use:N \g__luamml_annotation_id_int],
#1
\iow_newline:
LUAMML_MARK_END
2021-06-26 20:23:14 +02:00
}
2021-06-27 05:39:42 +02:00
\use:x {
\iow_now:Nn \l__luamml_pdf_stream {
LUAMML_COUNT:
\int_use:N \g__luamml_annotation_id_int
}
\__luamml_pdf_showlists:
\exp_not:n {#2}
\iow_now:Nn \l__luamml_pdf_stream {
LUAMML_COUNT_END:
\int_use:N \g__luamml_annotation_id_int
}
\__luamml_pdf_showlists:
2021-06-26 20:23:14 +02:00
}
}
% \end{macrocode}
% \end{macro}
%
% \begin{macrocode}
%</pdftex>
% \end{macrocode}
%
% \subsection{Trigger for specific formula}
% This only applies for pdf\TeX\ since in Lua\TeX\ everything is controlled by the callback,
% but for compatibility the function is defined anyway.
2021-06-26 20:23:14 +02:00
%
% \begin{macro}{\luamml_pdf_write:}
% We could accept parameters for the flag and tag here, but for compatibility
% with Lua\TeX they are passed in macros instead.
% \begin{macrocode}
%<*pdftex>
2021-06-26 20:23:14 +02:00
\cs_new_protected:Npn \luamml_pdf_write: {
\int_gincr:N \g__luamml_formula_id_int
\iow_now:Nx \l__luamml_pdf_stream {
LUAMML_FORMULA_BEGIN:
\int_use:N \g__luamml_formula_id_int
2021-06-26 20:51:33 +02:00
:
\int_use:N \l__luamml_flag_int
:
\l__luamml_root_tl
2021-06-27 00:43:30 +02:00
:
\l__luamml_label_tl
2021-06-26 20:23:14 +02:00
}
\__luamml_pdf_showlists:
\iow_now:Nx \l__luamml_pdf_stream {
LUAMML_FORMULA_END
}
}
%</pdftex>
%<luatex>\cs_new_eq:NN \luamml_pdf_write: \scan_stop:
2021-06-26 20:23:14 +02:00
% \end{macrocode}
% \end{macro}
%
% \begin{macrocode}
% \end{macrocode}
%
% \subsection{Further helpers}
%
% \begin{macro}{\RegisterFamilyMapping}
% The Lua version of this is defined in the Lua module.
% \begin{macrocode}
%<*pdftex>
\NewDocumentCommand \RegisterFamilyMapping {m m} {
\iow_now:Nx \l__luamml_pdf_stream {
LUAMML_INSTRUCTION:REGISTER_MAPPING: \int_use:N #1 : #2
}
}
%</pdftex>
% \end{macrocode}
2024-04-04 16:10:35 +02:00
% \end{macro}
2021-06-26 20:23:14 +02:00
%
%
2021-06-22 17:00:14 +02:00
% \subsection{Patching}
% For some packages, we ship with patches to make them more compatible and to
% demonstrate how other code can be patched to work with \texttt{luamml}.
%
% These are either loaded directly if the packages are loaded or delayed using
% \LaTeX's hook system otherwise.
% \begin{macro}{\__luamml_patch_package:nn, \__luamml_patch_package:n}
% For this, we use two helpers: First a wrapper which runs arbitrary code either
% now (if the package is already loaded) or as soon as the package loads, second
% an application of the first one to load packages following \texttt{luamml}'s
% naming scheme for these patch packages.
% \begin{macrocode}
2021-06-22 17:40:09 +02:00
\cs_new_protected:Npn \__luamml_patch_package:nn #1 #2 {
2021-06-22 17:00:14 +02:00
\@ifpackageloaded {#1} {#2} {
\hook_gput_code:nnn {package/#1/after} {luamml} {#2}
2021-06-22 17:00:14 +02:00
}
}
2021-06-22 17:40:09 +02:00
\cs_new_protected:Npn \__luamml_patch_package:n #1 {
2021-06-22 17:00:14 +02:00
\__luamml_patch_package:nn {#1} {
\RequirePackage { luamml-patches-#1 }
}
}
% \end{macrocode}
% \end{macro}
%
2021-06-28 06:33:49 +02:00
% We currently provide minimal patching for the kernel, \pkg{amsmath} and \pkg{array}.
% Currently only the kernel code supports pdf\TeX, but it's planned to extend this.
2021-06-22 17:00:14 +02:00
% \begin{macrocode}
\RequirePackage { luamml-patches-kernel }
2021-06-28 06:33:49 +02:00
%<*luatex>
2021-11-01 06:47:55 +01:00
\__luamml_patch_package:n {amstext}
2021-06-22 17:00:14 +02:00
\__luamml_patch_package:n {amsmath}
\__luamml_patch_package:n {array}
2021-06-26 20:23:14 +02:00
%</luatex>
2021-06-22 17:00:14 +02:00
% \end{macrocode}
% \iffalse
%</package>
% \fi
% \end{implementation}
% \Finale