module MathML2AsciiMath

Constants

VERSION

Public Class Methods

encodechars(xml) click to toggle source
# File lib/mathml2asciimath/m2a.rb, line 17
def self.encodechars(xml)
  xml.gsub(/\u03b1/, "alpha")
    .gsub(/\u03b2/, "beta")
    .gsub(/\u03b3/, "gamma")
    .gsub(/\u0393/, "Gamma")
    .gsub(/\u03b4/, "delta")
    .gsub(/\u0394/, "Delta")
    .gsub(/\u2206/, "Delta")
    .gsub(/\u03b5/, "epsilon")
    .gsub(/\u025b/, "varepsilon")
    .gsub(/\u03b6/, "zeta")
    .gsub(/\u03b7/, "eta")
    .gsub(/\u03b8/, "theta")
    .gsub(/\u0398/, "Theta")
    .gsub(/\u03d1/, "vartheta")
    .gsub(/\u03b9/, "iota")
    .gsub(/\u03ba/, "kappa")
    .gsub(/\u03bb/, "lambda")
    .gsub(/\u039b/, "Lambda")
    .gsub(/\u03bc/, "mu")
    .gsub(/\u03bd/, "nu")
    .gsub(/\u03be/, "xi")
    .gsub(/\u039e/, "Xi")
    .gsub(/\u03c0/, "pi")
    .gsub(/\u03a0/, "Pi")
    .gsub(/\u03c1/, "rho")
    .gsub(/\u03c2/, "beta")
    .gsub(/\u03c3/, "sigma")
    .gsub(/\u03a3/, "Sigma")
    .gsub(/\u03c4/, "tau")
    .gsub(/\u03c5/, "upsilon")
    .gsub(/\u03c6/, "phi")
    .gsub(/\u03a6/, "Phi")
    .gsub(/\u03d5/, "varphi")
    .gsub(/\u03c7/, "chi")
    .gsub(/\u03c8/, "psi")
    .gsub(/\u03a8/, "Psi")
    .gsub(/\u03c9/, "omega")
    .gsub(/\u03a9/, "omega")
    .gsub(/\u22c5/, "*")
    .gsub(/\u2219/, "*")
    .gsub(/\u00b7/, "*")
    .gsub(/\u2217/, "**")
    .gsub(/\u22c6/, "***")
    .gsub(/\//, "//")
    .gsub(/\\/, "\\\\")
    .gsub(/\u00d7/, "xx")
    .gsub(/\u22c9/, "|><")
    .gsub(/\u22ca/, "><|")
    .gsub(/\u22c8/, "|><|")
    .gsub(/\u00f7/, "-:")
    .gsub(/\u2218/, "@")
    .gsub(/\u2295/, "o+")
    .gsub(/\u2a01/, "o+")
    .gsub(/\u2297/, "ox")
    .gsub(/\u2299/, "o.")
    .gsub(/\u2211/, "sum")
    .gsub(/\u220f/, "prod")
    .gsub(/\u2227/, "^^")
    .gsub(/\u22c0/, "^^^")
    .gsub(/\u2228/, "vv")
    .gsub(/\u22c1/, "vvv")
    .gsub(/\u2229/, "nn")
    .gsub(/\u22c2/, "nnn")
    .gsub(/\u222a/, "uu")
    .gsub(/\u22c3/, "uuu")
    .gsub(/\u2260/, "!=")
    .gsub(/\u2264/, "<=")
    .gsub(/\u2265/, ">=")
    .gsub(/\u227a/, "-<")
    .gsub(/\u227b/, ">-")
    .gsub(/\u2aaf/, "-<=")
    .gsub(/\u2ab0/, ">-=")
    .gsub(/\u2208/, "in")
    .gsub(/\u2209/, "!in")
    .gsub(/\u2282/, "sub")
    .gsub(/\u2283/, "sup")
    .gsub(/\u2286/, "sube")
    .gsub(/\u2287/, "supe")
    .gsub(/\u2261/, "-=")
    .gsub(/\u2245/, "~=")
    .gsub(/\u2248/, "~~")
    .gsub(/\u221d/, "prop")
    .gsub(/\u00ac/, "not")
    .gsub(/\u21d2/, "=>")
    .gsub(/\u21d4/, "<=>")
    .gsub(/\u2200/, "AA")
    .gsub(/\u2203/, "EE")
    .gsub(/\u22a5/, "_|_")
    .gsub(/\u22a4/, "TT")
    .gsub(/\u22a2/, "|--")
    .gsub(/\u22a8/, "|==")
    .gsub(/\u22a8/, "|==")
    .gsub(/\u2329/, "(:")
    .gsub(/\u232a/, ":)")
    .gsub(/\u2329/, "<<")
    .gsub(/\u27e8/, "<<")
    .gsub(/\u232a/, ">>")
    .gsub(/\u27e9/, ">>")
    .gsub(/\u222b/, "int")
    .gsub(/\u222e/, "oint")
    .gsub(/\u2202/, "del")
    .gsub(/\u2207/, "grad")
    .gsub(/\u00b1/, "+-")
    .gsub(/\u2205/, "O/")
    .gsub(/\u221e/, "oo")
    .gsub(/\u2135/, "aleph")
    .gsub(/\u2234/, ":.")
    .gsub(/\u2235/, ":'")
    .gsub(/\u2220/, "/_")
    .gsub(/\u25b3/, "/_\\")
    .gsub(/\u2032/, "'")
    .gsub(/~/, "tilde")
    .gsub(/\u00a0\u00a0\u00a0\u00a0/, "qquad")
    .gsub(/\u00a0\u00a0/, "quad")
    .gsub(/\u00a0/, "\\ ")
    .gsub(/\u2322/, "frown")
    .gsub(/\u00a0/, "quad")
    .gsub(/\u22ef/, "cdots")
    .gsub(/\u22ee/, "vdots")
    .gsub(/\u22f1/, "ddots")
    .gsub(/\u22c4/, "diamond")
    .gsub(/\u25a1/, "square")
    .gsub(/\u230a/, "|__")
    .gsub(/\u230b/, "__|")
    .gsub(/\u2308/, "|~")
    .gsub(/\u2309/, "~|")
    .gsub(/\u2102/, "CC")
    .gsub(/\u2115/, "NN")
    .gsub(/\u211a/, "QQ")
    .gsub(/\u211d/, "RR")
    .gsub(/\u2124/, "ZZ")
    .gsub(/\u2191/, "uarr")
    .gsub(/\u2193/, "darr")
    .gsub(/\u2190/, "larr")
    .gsub(/\u2194/, "harr")
    .gsub(/\u21d2/, "rArr")
    .gsub(/\u21d0/, "lArr")
    .gsub(/\u21d4/, "hArr")
    .gsub(/\u2192/, "->")
    .gsub(/\u21a3/, ">->")
    .gsub(/\u21a0/, "->>")
    .gsub(/\u2916/, ">->>")
    .gsub(/\u21a6/, "|->")
    .gsub(/\u2026/, "...")
    .gsub(/\u2212/, "-")
    .gsub(/\u2061/, "") # function application
    .gsub(/\u2751/, "square")
    .gsub(/[\u2028\u2029]/, " ") # normalize thin spaces like \u2009, \u2008
end
join_parsed_children(children, delimiter = " ") click to toggle source
# File lib/mathml2asciimath/m2a.rb, line 168
def self.join_parsed_children(children, delimiter = " ")
  children.map do |n|
    parse(n).strip
  end.join(delimiter)
end
m2a(xml) click to toggle source
# File lib/mathml2asciimath/m2a.rb, line 6
def self.m2a(xml)
  normalized = xml

  # &:noblanks skips non-significant whitespaces in MathML
  docxml = Nokogiri::XML.parse(normalized, &:noblanks)

  # Get rid of things like
  #           <mtext>&#x2009;</mtext>
  parse(docxml.root).gsub(/[[:blank:]]/, " ").unicode_normalize.squeeze(" ")
end
parse(node) click to toggle source
# File lib/mathml2asciimath/m2a.rb, line 174
def self.parse(node)
  out = ""
  if node.text?
    return encodechars(HTMLEntities.new.decode(node.text))
  end

  case node.name.sub(/^[^:]*:/, "")
  when "math"
    join_parsed_children(node.elements)

  when "annotation"
    ""

  when "semantics"
    join_parsed_children(node.elements)

  when "mrow"
    out = join_parsed_children(node.elements)
    if %w[mfrac msub munder munderover]
      .include? node.parent.name.sub(/^[^:]*:/, "")
      out = "(#{out})"
    end
    out

  when "mfenced"
    sym_open = node["open"] || "("
    sym_close = node["close"] || ")"

    separator = "," # TODO currently ignore the supplied separators
    out = join_parsed_children(node.elements, separator)
    "#{sym_open}#{out}#{sym_close}"

  when "msqrt"
    "sqrt(#{join_parsed_children(node.elements)})"

  when "mfrac"
    "(#{parse(node.elements[0])})/(#{parse(node.elements[1])})"

  when "msup"
    sup = parse(node.elements[1])
    sup = "(#{sup})" unless sup.length == 1
    op = parse(node.elements[0]).gsub(/ $/, "")
    "#{op}^#{sup}"

  when "msub"
    sub = parse(node.elements[1])
    sub = "(#{sub})" unless sub.length == 1
    op = parse(node.elements[0]).gsub(/ $/, "")
    "#{op}_#{sub}"

  when "munderover", "msubsup"
    sub = parse(node.elements[1])
    sub = "(#{sub})" unless sub.length == 1
    sup = parse(node.elements[2])
    sup = "(#{sup})" unless sup.length == 1
    op = parse(node.elements[0]).gsub(/ $/, "")
    "#{op}_#{sub}^#{sup}"

  when "munder"
    elem1 = parse(node.elements[1]).strip
    accent = case elem1
             when "\u0332" then "ul"
             when "\u23df" then "ubrace"
             else
               "underset"
             end

    if accent == "underset"
      "underset(#{elem1})(#{parse(node.elements[0])})"
    else
      "#{accent} #{parse(node.elements[0])}"
    end

  when "mover"
    elem1 = parse(node.elements[1]).strip
    accent = case elem1
             when "\u005e" then "hat"
             when "\u00af" then "bar"
             # when "\u2192" then "vec"
             when "->" then "vec"
             when "." then "dot"
             when ".." then "ddot"
             when "\u23de" then "obrace"
             else
               "overset"
             end

    if accent == "overset"
      "overset(#{elem1})(#{parse(node.elements[0])})"
    else
      "#{accent} #{parse(node.elements[0])}"
    end

  when "mtable"
    "[#{join_parsed_children(node.elements, ',')}]"

  when "mtr"
    "[#{join_parsed_children(node.elements, ',')}]"

  when "mtd"
    join_parsed_children(node.elements, ",")

  when "mn", "mtext"
    join_parsed_children(node.children, "")

  when "mi"
    # FIXME: What does this comment have to do with Word?
    # mi is not meant to have space around it,
    # but Word is conflating operators and operands
    join_parsed_children(node.children)

    # FIXME: Why do we need to add extra spaces?
    # out = " #{out} " if /[^a-zA-Z0-9',]|[a-z][a-z]/.match out

  when "mo"
    out = join_parsed_children(node.children)
    out = " #{out} " unless node["fence"]
    out

  when "mstyle"
    join_parsed_children(node.children)

  else
    "<math xmlns=\"http://www.w3.org/1998/Math/MathML\">" +
      node.to_xml +
      "</math>"

  end
end