module Unicode(Unicode(..), html2unicode, allSymbols) where import List (elemIndex) -- There's no real support for full Unicode here, just for -- named symbols from HTML 3.2 (and some symbols from HTML 4.0) allSymbols :: [String] allSymbols = [htmlSym | (_, xs) <- unicodeSyms, (htmlSym, _) <- xs] -- LaTeX symbols incomplete; will be filled when the LaTeX backend arrives. unicodeSyms :: [(Integer, [(String, String)])] unicodeSyms = [(34, [(""", "\""), ("#", undef), ("$", "\\$") ] ), (60, [("<", "\\ensumremath{<}")]), (62, [(">", "\\ensumremath{>}")]), (160, [(" ", "\\ "), ("¡", undef), ("¢", undef), ("£", undef), ("¤", undef), ("¥", undef), ("¦", undef), ("§", undef), ("¨", undef), ("©", undef), ("ª", undef), ("«", undef), ("¬", undef), ("­", undef), ("®", undef), ("¯", undef), ("°", undef), ("±", undef), ("²", undef), ("³", undef), ("´", undef), ("µ", undef), ("¶", undef), ("·", undef), ("¸", undef), ("¹", undef), ("º", undef), ("»", undef), ("¼", undef), ("½", undef), ("¾", undef), ("¿", undef), ("À", "\\`A"), -- 192 ("Á", "\\'A"), ("Â", undef), ("Ã", undef), ("Ä", "\\\"A"), ("Å", undef), ("Æ", undef), ("Ç", undef), ("È", "\\`E"), ("É", "\\'E"), ("Ê", undef), ("Ë", "\\\"E"), ("Ì", "\\`I"), ("Í", "\\'I"), ("Î", undef), ("Ï", "\\\"I"), ("Ð", undef), ("Ñ", undef), ("Ò", "\\`O"), ("Ó", "\\'O"), ("Ô", undef), ("Õ", undef), ("Ö", "\\\"O"), ("×", "\\ensuremath{\\times}"), ("Ø", undef), ("Ù", "\\`U"), ("Ú", "\\'U"), ("Û", undef), ("Ü", "\\\"U"), ("Ý", "\\'Y"), ("Þ", undef), ("ß", "\\ss"), ("à", "\\`a"), ("á", "\\'a"), ("â", undef), ("ã", undef), ("ä", "\\\"A"), ("å", undef), ("æ", undef), ("ç", undef), ("è", "\\`e"), ("é", "\\'e"), ("ê", undef), ("ë", "\\\"e"), ("ì", "\\`i"), ("í", "\\'A"), ("î", undef), ("ï", "\\\"i"), ("ð", undef), ("ñ", undef), ("ò", "\\`o"), ("ó", "\\'o"), ("ô", undef), ("õ", undef), ("ö", "\\\"o"), ("÷", undef), ("ø", undef), ("ù", "\\`u"), ("ú", "\\'u"), ("û", undef), ("ü", "\\\"u"), ("ý", "\\'y"), ("þ", undef), ("ÿ", "\\\"y") ] ), (913, [("Α", "\\ensuremath{\\Alpha}"), ("Β", "\\ensuremath{\\Beta}"), ("Γ", "\\ensuremath{\\Gamma}"), ("Δ", "\\ensuremath{\\Delta}"), ("&Epsilon", "\\ensuremath{\\Epsilon}"), ("Ζ", "\\ensuremath{\\Zeta}"), ("Η", "\\ensuremath{\\Eta}"), ("Θ", "\\ensuremath{\\Theta}"), ("Ι", "\\ensuremath{\\Iota}"), ("Κ", "\\ensuremath{\\Kappa}"), ("Λ", "\\ensuremath{\\Lambda"), ("Μ", "\\ensuremath{\\Mu}"), ("Ν", "\\ensuremath{\\Nu}"), ("Ξ", "\\ensuremath{\\Xi}"), ("Ο", "\\ensuremath{\\Omicron}"), ("Π", "\\ensuremath{\\Pi}"), ("Ρ", "\\ensuremath{\\Rho}") ] ), -- Ρ is Ρ and Σ is Σ, so ΢ is missing here (931, [("Σ", "\\ensuremath{\\Sigma}"), ("Τ", "\\ensuremath{\\Tau}"), ("Υ", "\\ensuremath{\\Upsilon}"), ("Φ", "\\ensuremath{\\Phi}"), ("Χ", "\\ensuremath{\\Chi}"), ("Ψ", "\\ensuremath{\\Psi}"), ("Ω", "\\ensuremath{\\Omega}")] ), (945, [("α", "\\ensuremath{\\alpha}"), ("β", "\\ensuremath{\\beta}"), ("γ", "\\ensuremath{\\gamma}"), ("δ", "\\ensuremath{\\delta}"), ("&epsilon", "\\ensuremath{\\epsilon}"), ("ζ", "\\ensuremath{\\zeta}"), ("η", "\\ensuremath{\\eta}"), ("θ", "\\ensuremath{\\theta}"), ("ι", "\\ensuremath{\\iota}"), ("κ", "\\ensuremath{\\kappa}"), ("λ", "\\ensuremath{\\lambda"), ("μ", "\\ensuremath{\\mu}"), ("ν", "\\ensuremath{\\nu}"), ("ξ", "\\ensuremath{\\xi}"), ("ο", "\\ensuremath{\\omicron}"), ("π", "\\ensuremath{\\pi}"), ("ρ", "\\ensuremath{\\rho}"), ("ς", undef), -- 962 ("σ", "\\ensuremath{\\sigma}"), ("τ", "\\ensuremath{\\tau}"), ("υ", "\\ensuremath{\\upsilon}"), ("φ", "\\ensuremath{\\varphi}"), ("χ", "\\ensuremath{\\chi}"), ("ψ", "\\ensuremath{\\psi}"), ("ω", "\\ensuremath{\\omega}") ] ), (977, [("ϑ", undef), ("ϒ", undef), ("&piv", undef) ] ) ] undef = error "some symbol is missing for LaTeX\n (please report this as a bug)" data Unicode = Unicode { unicodeValue :: Integer, unicodeHtml :: String, unicodeLatex :: String } deriving (Eq, Show) html2unicode :: String -> Maybe Unicode html2unicode str = f unicodeSyms where f [] = Nothing f ((n, xs):us) = case elemIndex str (map fst xs) of Nothing -> f us Just off -> Just uni where (h, l) = xs !! off uni = Unicode { unicodeValue = n + fromIntegral off, unicodeHtml = h, unicodeLatex = l }