# :Id: $Id: latex2mathml.py 9338 2023-04-08 21:08:47Z milde $
# :Copyright: © 2005 Jens Jørgen Mortensen [1]_
# © 2010, 2021 Günter Milde.
#
# :License: Released under the terms of the `2-Clause BSD license`_, in short:
#
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
# notice and this notice are preserved.
# This file is offered as-is, without any warranty.
#
# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
#
# .. [1] the original `rst2mathml.py` in `sandbox/jensj/latex_math`
"""Convert LaTex maths code into presentational MathML.
This module is provisional:
the API is not settled and may change with any minor Docutils version.
"""
# Usage:
#
# >>> from latex2mathml import *
import re
import unicodedata
from docutils.utils.math import tex2unichar, toplevel_code
# Character data
# --------------
# LaTeX math macro to Unicode mappings.
# Character categories.
# identifiers ->
letters = tex2unichar.mathalpha
letters['hbar'] = '\u210F' # compatibility mapping to ℏ (\hslash).
# (ħ LATIN SMALL LETTER H WITH STROKE is upright)
# special case: Capital Greek letters: (upright in TeX style)
greek_capitals = {
'Phi': '\u03a6', 'Xi': '\u039e', 'Sigma': '\u03a3',
'Psi': '\u03a8', 'Delta': '\u0394', 'Theta': '\u0398',
'Upsilon': '\u03d2', 'Pi': '\u03a0', 'Omega': '\u03a9',
'Gamma': '\u0393', 'Lambda': '\u039b'}
# functions ->
functions = {
# functions with a space in the name
'liminf': 'lim\u202finf',
'limsup': 'lim\u202fsup',
'injlim': 'inj\u202flim',
'projlim': 'proj\u202flim',
# embellished function names (see handle_cmd() below)
'varlimsup': 'lim',
'varliminf': 'lim',
'varprojlim': 'lim',
'varinjlim': 'lim',
# custom function name
'operatorname': None,
}
functions.update((name, name) for name in
('arccos', 'arcsin', 'arctan', 'arg', 'cos',
'cosh', 'cot', 'coth', 'csc', 'deg',
'det', 'dim', 'exp', 'gcd', 'hom',
'ker', 'lg', 'ln', 'log', 'Pr',
'sec', 'sin', 'sinh', 'tan', 'tanh'))
# Function with limits: 'lim', 'sup', 'inf', 'max', 'min':
# use to allow "movablelimits" attribute (see below).
# modulo operator/arithmetic
modulo_functions = {
# cmdname: (binary, named, parentheses, padding)
'bmod': (True, True, False, '0.278em'), # a mod n
'pmod': (False, True, True, '0.444em'), # a (mod n)
'mod': (False, True, False, '0.667em'), # a mod n
'pod': (False, False, True, '0.444em'), # a (n)
}
# math font selection -> or
math_alphabets = {
# 'cmdname': 'mathvariant value' # package
'boldsymbol': 'bold',
'mathbf': 'bold',
'mathit': 'italic',
'mathtt': 'monospace',
'mathrm': 'normal',
'mathsf': 'sans-serif',
'mathcal': 'script',
'mathbfit': 'bold-italic', # isomath
'mathbb': 'double-struck', # amssymb
'mathfrak': 'fraktur', # amssymb
'mathsfit': 'sans-serif-italic', # isomath
'mathsfbfit': 'sans-serif-bold-italic', # isomath
'mathscr': 'script', # mathrsfs
# unsupported: bold-fraktur
# bold-script
# bold-sans-serif
}
# operator, fence, or separator ->
stretchables = {
# extensible delimiters allowed in left/right cmds
'backslash': '\\',
'uparrow': '\u2191', # ↑ UPWARDS ARROW
'downarrow': '\u2193', # ↓ DOWNWARDS ARROW
'updownarrow': '\u2195', # ↕ UP DOWN ARROW
'Uparrow': '\u21d1', # ⇑ UPWARDS DOUBLE ARROW
'Downarrow': '\u21d3', # ⇓ DOWNWARDS DOUBLE ARROW
'Updownarrow': '\u21d5', # ⇕ UP DOWN DOUBLE ARROW
'lmoustache': '\u23b0', # ⎰ … CURLY BRACKET SECTION
'rmoustache': '\u23b1', # ⎱ … LEFT CURLY BRACKET SECTION
'arrowvert': '\u23d0', # ⏐ VERTICAL LINE EXTENSION
'bracevert': '\u23aa', # ⎪ CURLY BRACKET EXTENSION
'lvert': '|', # left |
'lVert': '\u2016', # left ‖
'rvert': '|', # right |
'rVert': '\u2016', # right ‖
'Arrowvert': '\u2016', # ‖
}
stretchables.update(tex2unichar.mathfence)
stretchables.update(tex2unichar.mathopen) # Braces
stretchables.update(tex2unichar.mathclose) # Braces
# >>> print(' '.join(sorted(set(stretchables.values()))))
# [ \ ] { | } ‖ ↑ ↓ ↕ ⇑ ⇓ ⇕ ⌈ ⌉ ⌊ ⌋ ⌜ ⌝ ⌞ ⌟ ⎪ ⎰ ⎱ ⏐ ⟅ ⟆ ⟦ ⟧ ⟨ ⟩ ⟮ ⟯ ⦇ ⦈
operators = {
# negated symbols without pre-composed Unicode character
'nleqq': '\u2266\u0338', # ≦̸
'ngeqq': '\u2267\u0338', # ≧̸
'nleqslant': '\u2a7d\u0338', # ⩽̸
'ngeqslant': '\u2a7e\u0338', # ⩾̸
'ngtrless': '\u2277\u0338', # txfonts
'nlessgtr': '\u2276\u0338', # txfonts
'nsubseteqq': '\u2AC5\u0338', # ⫅̸
'nsupseteqq': '\u2AC6\u0338', # ⫆̸
# compatibility definitions:
'centerdot': '\u2B1D', # BLACK VERY SMALL SQUARE | mathbin
'varnothing': '\u2300', # ⌀ DIAMETER SIGN | empty set
'varpropto': '\u221d', # ∝ PROPORTIONAL TO | sans serif
'triangle': '\u25B3', # WHITE UP-POINTING TRIANGLE | mathord
'triangledown': '\u25BD', # WHITE DOWN-POINTING TRIANGLE | mathord
# alias commands:
'dotsb': '\u22ef', # ⋯ with binary operators/relations
'dotsc': '\u2026', # … with commas
'dotsi': '\u22ef', # ⋯ with integrals
'dotsm': '\u22ef', # ⋯ multiplication dots
'dotso': '\u2026', # … other dots
# functions with movable limits (requires )
'lim': 'lim',
'sup': 'sup',
'inf': 'inf',
'max': 'max',
'min': 'min',
}
operators.update(tex2unichar.mathbin) # Binary symbols
operators.update(tex2unichar.mathrel) # Relation symbols, arrow symbols
operators.update(tex2unichar.mathord) # Miscellaneous symbols
operators.update(tex2unichar.mathpunct) # Punctuation
operators.update(tex2unichar.mathop) # Variable-sized symbols
operators.update(stretchables)
# special cases
thick_operators = {
# style='font-weight: bold;'
'thicksim': '\u223C', # ∼
'thickapprox': '\u2248', # ≈
}
small_operators = {
# mathsize='75%'
'shortmid': '\u2223', # ∣
'shortparallel': '\u2225', # ∥
'nshortmid': '\u2224', # ∤
'nshortparallel': '\u2226', # ∦
'smallfrown': '\u2322', # ⌢ FROWN
'smallsmile': '\u2323', # ⌣ SMILE
'smallint': '\u222b', # ∫ INTEGRAL
}
# Operators and functions with limits above/below in display formulas
# and in index position inline (movablelimits=True)
movablelimits = ('bigcap', 'bigcup', 'bigodot', 'bigoplus', 'bigotimes',
'bigsqcup', 'biguplus', 'bigvee', 'bigwedge',
'coprod', 'intop', 'ointop', 'prod', 'sum',
'lim', 'max', 'min', 'sup', 'inf')
# Depending on settings, integrals may also be in this category.
# (e.g. if "amsmath" is loaded with option "intlimits", see
# http://mirror.ctan.org/macros/latex/required/amsmath/amsldoc.pdf)
# movablelimits.extend(('fint', 'iiiint', 'iiint', 'iint', 'int', 'oiint',
# 'oint', 'ointctrclockwise', 'sqint',
# 'varointclockwise',))
# horizontal space ->
spaces = {'qquad': '2em', # two \quad
'quad': '1em', # 18 mu
'thickspace': '0.2778em', # 5mu = 5/18em
';': '0.2778em', # 5mu thickspace
' ': '0.25em', # inter word space
'medspace': '0.2222em', # 4mu = 2/9em
':': '0.2222em', # 4mu medspace
'thinspace': '0.1667em', # 3mu = 1/6em
',': '0.1667em', # 3mu thinspace
'negthinspace': '-0.1667em', # -3mu = -1/6em
'!': '-0.1667em', # negthinspace
'negmedspace': '-0.2222em', # -4mu = -2/9em
'negthickspace': '-0.2778em', # -5mu = -5/18em
}
# accents ->
accents = {
# TeX: (spacing, combining)
'acute': ('´', '\u0301'),
'bar': ('ˉ', '\u0304'),
'breve': ('˘', '\u0306'),
'check': ('ˇ', '\u030C'),
'dot': ('˙', '\u0307'),
'ddot': ('¨', '\u0308'),
'dddot': ('⋯', '\u20DB'),
'grave': ('`', '\u0300'),
'hat': ('ˆ', '\u0302'),
'mathring': ('˚', '\u030A'),
'tilde': ('˜', '\u0303'), # tilde ~ or small tilde ˜?
'vec': ('→', '\u20d7'), # → too heavy, accents="false"
# TODO: ddddot
}
# limits etc. -> or
over = {
# TeX: (char, offset-correction/em)
'overbrace': ('\u23DE', -0.2), # DejaVu Math -0.6
'overleftarrow': ('\u2190', -0.2),
'overleftrightarrow': ('\u2194', -0.2),
'overline': ('_', -0.2), # \u2012 does not stretch
'overrightarrow': ('\u2192', -0.2),
'widehat': ('^', -0.5),
'widetilde': ('~', -0.3),
}
under = {'underbrace': ('\u23DF', 0.1), # DejaVu Math -0.7
'underleftarrow': ('\u2190', -0.2),
'underleftrightarrow': ('\u2194', -0.2),
'underline': ('_', -0.8),
'underrightarrow': ('\u2192', -0.2),
}
# Character translations
# ----------------------
# characters with preferred alternative in mathematical use
# cf. https://www.w3.org/TR/MathML3/chapter7.html#chars.anomalous
anomalous_chars = {'-': '\u2212', # HYPHEN-MINUS -> MINUS SIGN
':': '\u2236', # COLON -> RATIO
'~': '\u00a0', # NO-BREAK SPACE
}
# blackboard bold (Greek characters not working with "mathvariant" (Firefox 78)
mathbb = {'Γ': '\u213E', # ℾ
'Π': '\u213F', # ℿ
'Σ': '\u2140', # ⅀
'γ': '\u213D', # ℽ
'π': '\u213C', # ℼ
}
# Matrix environments
matrices = {
# name: fences
'matrix': ('', ''),
'smallmatrix': ('', ''), # smaller, see begin_environment()!
'pmatrix': ('(', ')'),
'bmatrix': ('[', ']'),
'Bmatrix': ('{', '}'),
'vmatrix': ('|', '|'),
'Vmatrix': ('\u2016', '\u2016'), # ‖
'cases': ('{', ''),
}
layout_styles = {
'displaystyle': {'displaystyle': True, 'scriptlevel': 0},
'textstyle': {'displaystyle': False, 'scriptlevel': 0},
'scriptstyle': {'displaystyle': False, 'scriptlevel': 1},
'scriptscriptstyle': {'displaystyle': False, 'scriptlevel': 2},
}
# See also https://www.w3.org/TR/MathML3/chapter3.html#presm.scriptlevel
fractions = {
# name: style_attrs, frac_attrs
'frac': ({}, {}),
'cfrac': ({'displaystyle': True, 'scriptlevel': 0,
'CLASS': 'cfrac'}, {}), # in LaTeX with padding
'dfrac': (layout_styles['displaystyle'], {}),
'tfrac': (layout_styles['textstyle'], {}),
'binom': ({}, {'linethickness': 0}),
'dbinom': (layout_styles['displaystyle'], {'linethickness': 0}),
'tbinom': (layout_styles['textstyle'], {'linethickness': 0}),
}
delimiter_sizes = ['', '1.2em', '1.623em', '2.047em', '2.470em']
bigdelimiters = {'left': 0,
'right': 0,
'bigl': 1,
'bigr': 1,
'Bigl': 2,
'Bigr': 2,
'biggl': 3,
'biggr': 3,
'Biggl': 4,
'Biggr': 4,
}
# MathML element classes
# ----------------------
class math:
"""Base class for MathML elements and root of MathML trees."""
nchildren = None
"""Expected number of children or None"""
# cf. https://www.w3.org/TR/MathML3/chapter3.html#id.3.1.3.2
parent = None
"""Parent node in MathML DOM tree."""
_level = 0 # indentation level (static class variable)
xml_entities = {
# for invalid and invisible characters
ord('<'): '<',
ord('>'): '>',
ord('&'): '&',
0x2061: '⁡',
}
_boolstrings = {True: 'true', False: 'false'}
"""String representation of boolean MathML attribute values."""
html_tagname = 'span'
"""Tag name for HTML representation."""
def __init__(self, *children, **attributes):
"""Set up node with `children` and `attributes`.
Attributes are downcased: Use CLASS to set "class" value.
>>> math(mn(3), CLASS='test')
math(mn(3), class='test')
>>> math(CLASS='test').toprettyxml()
''
"""
self.children = []
self.extend(children)
self.attributes = {}
for key in attributes.keys():
# Use .lower() to allow argument `CLASS` for attribute `class`
# (Python keyword). MathML uses only lowercase attributes.
self.attributes[key.lower()] = attributes[key]
def __repr__(self):
content = [repr(item) for item in getattr(self, 'children', [])]
if hasattr(self, 'data'):
content.append(repr(self.data))
if isinstance(self, MathSchema) and self.switch:
content.append('switch=True')
content += ["%s=%r"%(k, v) for k, v in self.attributes.items()
if v is not None]
return self.__class__.__name__ + '(%s)' % ', '.join(content)
def __len__(self):
return len(self.children)
# emulate dictionary-like access to attributes
# see `docutils.nodes.Element` for dict/list interface
def __getitem__(self, key):
return self.attributes[key]
def __setitem__(self, key, item):
self.attributes[key] = item
def get(self, *args, **kwargs):
return self.attributes.get(*args, **kwargs)
def full(self):
"""Return boolean indicating whether children may be appended."""
return (self.nchildren is not None
and len(self) >= self.nchildren)
def append(self, child):
"""Append child and return self or first non-full parent.
If self is full, go up the tree and return first non-full node or
`None`.
"""
if self.full():
raise SyntaxError('Node %s already full!' % self)
self.children.append(child)
child.parent = self
if self.full():
return self.close()
return self
def extend(self, children):
for child in children:
self.append(child)
return self
def close(self):
"""Close element and return first non-full parent or None."""
parent = self.parent
while parent is not None and parent.full():
parent = parent.parent
return parent
def toprettyxml(self):
"""Return XML representation of self as string."""
return ''.join(self._xml())
def _xml(self, level=0):
return ([self.xml_starttag()]
+ self._xml_body(level)
+ ['%s>' % self.__class__.__name__])
def xml_starttag(self):
attrs = ('%s="%s"' % (k, str(v).replace('True', 'true').replace('False', 'false'))
for k, v in self.attributes.items()
if v is not None)
return '<%s>' % ' '.join((self.__class__.__name__, *attrs))
def _xml_body(self, level=0):
xml = []
for child in self.children:
xml.extend(['\n', ' ' * (level+1)])
xml.extend(child._xml(level+1))
xml.extend(['\n', ' ' * level])
return xml
def is_block(self):
"""Return true, if `self` or a parent has ``display='block'``."""
try:
return self['display'] == 'block'
except KeyError:
try:
return self.parent.is_block()
except AttributeError:
return False
# >>> n2 = math(mn(2))
# >>> n2
# math(mn(2))
# >>> n2.toprettyxml()
# ''
# >>> len(n2)
# 1
# >>> eq3 = math(id='eq3', display='block')
# >>> eq3
# math(id='eq3', display='block')
# >>> eq3.toprettyxml()
# ''
# >>> len(eq3)
# 0
# >>> math(CLASS='bold').xml_starttag()
# '