module Unicode(Unicode(..), html2unicode,
allSymbols) where
import List (elemIndex)
-- There's no real support for full Unicode here, just for
-- named symbols from HTML 3.2 (and some symbols from HTML 4.0)
allSymbols :: [String]
allSymbols = [htmlSym | (_, xs) <- unicodeSyms, (htmlSym, _) <- xs]
-- LaTeX symbols incomplete; will be filled when the LaTeX backend arrives.
unicodeSyms :: [(Integer, [(String, String)])]
unicodeSyms =
[(34, [(""", "\""),
("#", undef),
("$", "\\$")
]
),
(60, [("<", "\\ensumremath{<}")]),
(62, [(">", "\\ensumremath{>}")]),
(160, [(" ", "\\ "),
("¡", undef),
("¢", undef),
("£", undef),
("¤", undef),
("¥", undef),
("¦", undef),
("§", undef),
("¨", undef),
("©", undef),
("ª", undef),
("«", undef),
("¬", undef),
("", undef),
("®", undef),
("¯", undef),
("°", undef),
("±", undef),
("²", undef),
("³", undef),
("´", undef),
("µ", undef),
("¶", undef),
("·", undef),
("¸", undef),
("¹", undef),
("º", undef),
("»", undef),
("¼", undef),
("½", undef),
("¾", undef),
("¿", undef),
("À", "\\`A"), -- 192
("Á", "\\'A"),
("Â", undef),
("Ã", undef),
("Ä", "\\\"A"),
("Å", undef),
("Æ", undef),
("Ç", undef),
("È", "\\`E"),
("É", "\\'E"),
("Ê", undef),
("Ë", "\\\"E"),
("Ì", "\\`I"),
("Í", "\\'I"),
("Î", undef),
("Ï", "\\\"I"),
("Ð", undef),
("Ñ", undef),
("Ò", "\\`O"),
("Ó", "\\'O"),
("Ô", undef),
("Õ", undef),
("Ö", "\\\"O"),
("×", "\\ensuremath{\\times}"),
("Ø", undef),
("Ù", "\\`U"),
("Ú", "\\'U"),
("Û", undef),
("Ü", "\\\"U"),
("Ý", "\\'Y"),
("Þ", undef),
("ß", "\\ss"),
("à", "\\`a"),
("á", "\\'a"),
("â", undef),
("ã", undef),
("ä", "\\\"A"),
("å", undef),
("æ", undef),
("ç", undef),
("è", "\\`e"),
("é", "\\'e"),
("ê", undef),
("ë", "\\\"e"),
("ì", "\\`i"),
("í", "\\'A"),
("î", undef),
("ï", "\\\"i"),
("ð", undef),
("ñ", undef),
("ò", "\\`o"),
("ó", "\\'o"),
("ô", undef),
("õ", undef),
("ö", "\\\"o"),
("÷", undef),
("ø", undef),
("ù", "\\`u"),
("ú", "\\'u"),
("û", undef),
("ü", "\\\"u"),
("ý", "\\'y"),
("þ", undef),
("ÿ", "\\\"y")
]
),
(913, [("Α", "\\ensuremath{\\Alpha}"),
("Β", "\\ensuremath{\\Beta}"),
("Γ", "\\ensuremath{\\Gamma}"),
("Δ", "\\ensuremath{\\Delta}"),
("&Epsilon", "\\ensuremath{\\Epsilon}"),
("Ζ", "\\ensuremath{\\Zeta}"),
("Η", "\\ensuremath{\\Eta}"),
("Θ", "\\ensuremath{\\Theta}"),
("Ι", "\\ensuremath{\\Iota}"),
("Κ", "\\ensuremath{\\Kappa}"),
("Λ", "\\ensuremath{\\Lambda"),
("Μ", "\\ensuremath{\\Mu}"),
("Ν", "\\ensuremath{\\Nu}"),
("Ξ", "\\ensuremath{\\Xi}"),
("Ο", "\\ensuremath{\\Omicron}"),
("Π", "\\ensuremath{\\Pi}"),
("Ρ", "\\ensuremath{\\Rho}")
]
),
-- Ρ is Ρ and Σ is Σ, so is missing here
(931, [("Σ", "\\ensuremath{\\Sigma}"),
("Τ", "\\ensuremath{\\Tau}"),
("Υ", "\\ensuremath{\\Upsilon}"),
("Φ", "\\ensuremath{\\Phi}"),
("Χ", "\\ensuremath{\\Chi}"),
("Ψ", "\\ensuremath{\\Psi}"),
("Ω", "\\ensuremath{\\Omega}")]
),
(945, [("α", "\\ensuremath{\\alpha}"),
("β", "\\ensuremath{\\beta}"),
("γ", "\\ensuremath{\\gamma}"),
("δ", "\\ensuremath{\\delta}"),
("&epsilon", "\\ensuremath{\\epsilon}"),
("ζ", "\\ensuremath{\\zeta}"),
("η", "\\ensuremath{\\eta}"),
("θ", "\\ensuremath{\\theta}"),
("ι", "\\ensuremath{\\iota}"),
("κ", "\\ensuremath{\\kappa}"),
("λ", "\\ensuremath{\\lambda"),
("μ", "\\ensuremath{\\mu}"),
("ν", "\\ensuremath{\\nu}"),
("ξ", "\\ensuremath{\\xi}"),
("ο", "\\ensuremath{\\omicron}"),
("π", "\\ensuremath{\\pi}"),
("ρ", "\\ensuremath{\\rho}"),
("ς", undef), -- 962
("σ", "\\ensuremath{\\sigma}"),
("τ", "\\ensuremath{\\tau}"),
("υ", "\\ensuremath{\\upsilon}"),
("φ", "\\ensuremath{\\varphi}"),
("χ", "\\ensuremath{\\chi}"),
("ψ", "\\ensuremath{\\psi}"),
("ω", "\\ensuremath{\\omega}")
]
),
(977, [("ϑ", undef),
("ϒ", undef),
("&piv", undef)
]
)
]
undef = error "some symbol is missing for LaTeX\n (please report this as a bug)"
data Unicode = Unicode { unicodeValue :: Integer,
unicodeHtml :: String,
unicodeLatex :: String
}
deriving (Eq, Show)
html2unicode :: String -> Maybe Unicode
html2unicode str = f unicodeSyms
where
f [] = Nothing
f ((n, xs):us) = case elemIndex str (map fst xs) of
Nothing -> f us
Just off -> Just uni
where
(h, l) = xs !! off
uni = Unicode
{ unicodeValue = n + fromIntegral off,
unicodeHtml = h,
unicodeLatex = l }