diff -ur alex/src/DFA.hs alex.minimize/src/DFA.hs --- alex/src/DFA.hs Mon Sep 29 20:56:05 1997 +++ alex.minimize/src/DFA.hs Sat Nov 10 16:29:49 2001 @@ -10,7 +10,7 @@ Chris Dornan, Aug-95, 10-Jul-96, 29-Aug-96, 29-Sep-97 ------------------------------------------------------------------------------} -module DFA(scanner2dfa) where +module DFA(scanner2dfa, minimize_dfa) where import Array import Char @@ -236,3 +236,129 @@ t = length(takeWhile id [arr!chr c==df| c<-[255,254..0]]) arr = listArray (chr 0,chr 255) (take 256 (repeat (-1))) // as + + + +--------------------------------- +-- DFA minimization functions (by Armin Groesslinger, April 2001) + +-- Minimize a DFA. +minimize_dfa :: DFA a -> DFA a +minimize_dfa dfa = newDfa + where + -- Construct the minimized DFA (which has (length finalPs) states) + -- by converting the partitions into the new states. + newDfa = array (0,length finalPs-1) (map newSt finalPs) + + -- Minimize the DFA and remove the error state (-1) from the + -- resulting list of partitions. + finalPs = filter (/=[-1]) (zeroToFront $ iterateSplit dfa $ + initialParts dfa) + + -- Make a new state from the old state(s) in the partition by + -- converting the state numbers and the accept list. + newSt (snum:_) = (sMap!snum, St u (map newAcc accs) (sMap ! def) nextNew) + where + St u accs def nextOld = dfa!snum + nextNew = fmap (\s -> sMap!s) nextOld + + -- Convert the accept structure. + newAcc (Acc pri name a sc lead trail) = Acc pri name a sc lead trail' + where + trail' = case trail of + Nothing -> Nothing + Just s -> Just (sMap!s) + + -- `sMap' maps the states of the original DFA to the + -- new states (= partition numbers). + sMap = array (-1,maxOldS) ((-1,-1) : maps) + maps = [(old,new) | (os,new) <- zip finalPs [0..], old <- os] + (_,maxOldS) = bounds dfa + + +-- Calculate the initial partitioning of the DFA's states. +-- Add a partition for the error state (-1). +initialParts :: DFA a -> [[SNum]] +initialParts dfa = [-1] : map (map fst) ps + where + ps = partitionBy (\(_,st1) (_,st2) -> hasEquivAcc st1 st2) (assocs dfa) + + +-- Check if the given states have equivalent accept lists. +hasEquivAcc :: State a -> State a -> Bool +hasEquivAcc (St u1 accs1 _ _) (St u2 accs2 _ _) = + u1 == u2 && length accs1 == length accs2 && + and (zipWith accEquiv accs1 accs2) + + +-- Check the equivalence of the given accept structures. +accEquiv :: Accept a -> Accept a -> Bool +accEquiv (Acc pri1 name1 _ st1 lead1 trail1) + (Acc pri2 name2 _ st2 lead2 trail2) = + pri1 == pri2 && name1 == name2 && trail1 == trail2 && steq && peq + where + steq = msort (<=) st1 == msort (<=) st2 + peq = case (lead1, lead2) of + (Nothing, Nothing) -> True + (Just f1, Just f2) -> and [f1 c == f2 c | c <- dfa_alphabet] + (Just f1, Nothing) -> and [f1 c | c <- dfa_alphabet] + (Nothing, Just f2) -> and [f2 c | c <- dfa_alphabet] + + +-- Move the partition containing the (old) start state 0 to the +-- front of the list of partitions. +zeroToFront :: [[SNum]] -> [[SNum]] +zeroToFront [] = [] +zeroToFront ps@(x:xs) + | 0 `elem` x = ps + | otherwise = zeroToFront xs ++ [x] + + +-- Iterate the splitting of partitions until we reach a fix point. +iterateSplit :: DFA a -> [[SNum]] -> [[SNum]] +iterateSplit dfa curPs + | length newPs > length curPs = iterateSplit dfa newPs + | otherwise = newPs + where + newPs = check curPs + + -- Look for a partition which must be split. If one such partition + -- is found that partition is replaced by the "pieces" resulting + -- from splitting it. + check :: [[SNum]] -> [[SNum]] + check [] = [] + check (p:ps) + | length nPs == 1 = p : check ps + | otherwise = ps ++ nPs + where + nPs = partitionBy f p + f x y = and [follow x ch == follow y ch | ch <- dfa_alphabet] + + -- Find the partition where the transition from state `s' under + -- input symbol `ch' points to. + follow :: SNum -> Char -> [SNum] + follow s ch = state2part curPs followSNum + where + St _ _ def next = dfa ! s + followSNum + | inRange (bounds next) ch = next ! ch + | otherwise = def + + +-- Find the partition state `s' lies in. +state2part :: [[SNum]] -> SNum -> [SNum] +state2part [] s = error ("internal error while minimizing:\n" ++ + " state not found: " ++ show s ++ " ?!") +state2part (x:xs) s + | s `elem` x = x + | otherwise = state2part xs s + + +-- Partition a list using the equivalence relation `equiv'. +partitionBy :: (a -> a -> Bool) -> [a] -> [[a]] +partitionBy equiv list = foldl add [] list + where + add [] y = [[y]] + add (x:xs) y + | y `equiv` (head x) = (y:x) : xs + | otherwise = x : add xs y diff -ur alex/src/Main.hs alex.minimize/src/Main.hs --- alex/src/Main.hs Tue Apr 11 19:44:43 2000 +++ alex.minimize/src/Main.hs Sat Dec 29 20:39:42 2001 @@ -17,6 +17,7 @@ module Main where +import Array import Char import System import Sort @@ -37,16 +38,41 @@ -- `main' decodes the command line arguments and calls `alex'. +data Flags = Flags + { + -- Minimize the resulting DFA + flagMinimize :: Bool, + + -- Output + -- read "[]" + -- instead of + -- [] + -- + -- This doesn't seem to make much sense, but GHC compiles long + -- lists *very* slowly (and uses a lot of memory to do so) + -- currently (up to version 5.00, at least), so + -- read "[]" + -- is much better for compile time performance. + -- (Armin Groesslinger, July 2001) + flagReadHack :: Bool + } + main:: IO () -main = getArgs >>= \args -> - check_flags args >> - case args of - [fn] -> check_lit fn >>= \(lit,fn') -> alex lit fn fn' - [fn,fn'] -> check_lit fn >>= \(lit,_) -> alex lit fn fn' +main = getArgs >>= \args -> + check_flags defaultFlags args >>= \(flags, args2) -> + case args2 of + [fn] -> check_lit fn >>= \(lit,fn') -> alex lit flags fn fn' + [fn,fn'] -> check_lit fn >>= \(lit,_) -> alex lit flags fn fn' _ -> usage where - check_flags (('-':_):_) = usage - check_flags _ = return () + defaultFlags = Flags { flagMinimize=True, flagReadHack=True } + + check_flags flags ("--dont-minimize":xs) = + check_flags (flags { flagMinimize = False }) xs + check_flags flags ("--no-read-hack":xs) = + check_flags (flags { flagReadHack = False }) xs + check_flags _ (('-':opt):xs) = usage + check_flags flags xs = return (flags, xs) check_lit fn = case reverse fn of @@ -57,9 +83,11 @@ usage = getProgName >>= \prog -> error (vrn_ln ++ usg_ln prog) - vrn_ln = "Alex 1.1 by Chris Dornan\n\n" + vrn_ln = "Alex 1.1 by Chris Dornan\n\ + \with DFA minimization by Armin Groesslinger (April 2001)\n\n" - usg_ln prog = "usage: " ++ prog ++ " src_fn [tgt_fn]\n" + usg_ln prog = "usage: " ++ prog ++ + " [--dont-minimize] [--no-read-hack] src_fn [tgt_fn]\n" ext rbs = reverse ('s':'h':'.':rbs) @@ -82,19 +110,41 @@ -- a DFA, dump it and encode it on the output; if required, `encode_assocs' is -- used to dump the association list of tokens and action functions. -alex:: Bool -> String -> String -> IO () -alex lit src tgt = +alex:: Bool -> Flags -> String -> String -> IO () +alex lit flags src tgt = readFile src >>= \prg -> case parse_lx(pp prg) of Right lp -> - writeFile tgt (hdr ind (sc_hdr (foldr fmt "" (zip [0..] l)))) + writeFile tgt (hdr ind (sc_hdr output)) >> + putStrLn message where (LP hdr l,sc_hdr) = encode_start_codes ind lp + (output, (st1, st2)) = foldr fmt ("",(0,0)) (zip [0..] l) + message + | flagMinimize flags = + "DFA minimization reduced the number of states from " + ++ show st1 ++ " to " ++ show st2 + | otherwise = "DFA has " ++ show st1 ++ " states" Left err -> error (src ++ ": " ++ err) where - fmt (n,(fn,Nothing,scr)) tl = encode_dfa ind n fn scr tl - fmt (n,(fn,Just aln,scr)) tl = - encode_assocs ind aln scr (encode_dfa ind n fn scr tl) + fmt (n,(fn,mbAln,scr)) (tl,(st1,st2)) = + case mbAln of + Nothing -> (enc_dfa, states') + Just aln -> (encode_assocs ind aln scr enc_dfa, states') + where + enc_dfa = encode_dfa flags ind n fn dfa2 tl + (dfa1, dfa2) = scr2dfas scr + states' = (st1 + dfa_states dfa1, st2 + dfa_states dfa2) + + scr2dfas :: Scanner -> (DFA (), DFA ()) + scr2dfas scr + | flagMinimize flags = (dfa, minimize_dfa dfa) + | otherwise = (dfa, dfa) + where + dfa = scanner2dfa scr + + dfa_states :: DFA () -> Int + dfa_states dfa = rangeSize (bounds dfa) pp = if lit then literate else id @@ -154,8 +204,8 @@ -- definitions. See the DFA section of the Scan module for details of the -- `DFA' and `DFADump' types. -encode_dfa:: String -> Int -> String -> Scanner -> ShowS -encode_dfa ind n func_nm scr tl = +encode_dfa:: Flags -> String -> Int -> String -> DFA () -> ShowS +encode_dfa flags ind n func_nm dfa tl = "\n" ++ decl ++ defn ++ sub_defns ++ "\n" ++ tl where decl = ind ++ func_nm ++ " :: " ++ ls_tp ++ "\n" @@ -170,9 +220,29 @@ pr (i,e) t = ind ++ x_nm i ++ " :: " ++ nd_tp ++ "\n" ++ - ind ++ x_nm i ++ " = " ++ show e ++ "\n" ++ t + ind ++ x_nm i ++ " = " ++ showEhack e ++ "\n" ++ t + + -- Instead of "\"" ++ makeStr (show e) ++ "\"" + -- we should use show (show e), + -- but this doesn't work with nhc98 since + -- read "'\\NUL'" :: Char + -- fails, only + -- read "'\NUL'" :: Char + -- works. + -- => we don't duplicate backslashes which are + -- followed by an upper case letter. + -- + showEhack e + | flagReadHack flags = "read \"" ++ makeStr (show e) ++ "\"" + | otherwise = show e + makeStr "" = "" + makeStr ('\"':xs) = '\\' : '\"' : makeStr xs + makeStr ('\\':x:xs) + | isUpper x = '\\' : x : makeStr xs + | otherwise = '\\' : '\\' : makeStr (x:xs) + makeStr (x:xs) = x : makeStr xs - dump = dump_dfa(scanner2dfa scr) + dump = dump_dfa dfa ls_tp = "[" ++ nd_tp ++ "]" nd_tp = "(Bool,\ diff -ur alex/src/makefile alex.minimize/src/makefile --- alex/src/makefile Tue Apr 11 19:47:00 2000 +++ alex.minimize/src/makefile Wed Apr 18 00:08:41 2001 @@ -1,7 +1,7 @@ # This makefile assumes that ghc 4.x is installed. To install Alex, put the # Alex executable on the path and make Scan.hs available. -HC = ghc -H10M +HC = ghc -H10M -O CC = gcc OBJS = Sort.o \