(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier and Yann Régis-Gianas, INRIA Rocquencourt *) (* *) (* Copyright 2005 Institut National de Recherche en Informatique et *) (* en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the *) (* change described in file LICENSE. *) (* *) (**************************************************************************) open Grammar module Run (X : sig (* A restricted set of tokens of interest. *) val tokens: TerminalSet.t (* A state of the (merged) LR(1) automaton that we're trying to simulate. *) val goal: Lr1.node end) = struct (* First, let's restrict our interest to the nodes of the merged LR(1) automaton that can reach the goal node. Some experiments show that this can involve one tenth to one half of all nodes. This optimization seems minor, but is easy to implement. *) let relevant = Lr1.reverse_dfs X.goal (* Second, all of the states that we shall consider are restricted to the set of tokens of interest. This is an important idea: by abstracting away some information, we make the construction much faster. *) let restrict = Lr0.restrict X.tokens (* Constructing the automaton. The automaton is represented as a graph. States are never merged -- this is a canonical LR(1) construction! As we go, we record the correspondence between nodes in this automaton and nodes in the merged LR(1) automaton. This allows us to tell when we have reached the desired place. This also allows us not to follow transitions that have already been eliminated, in the merged automaton, via resolution of shift/reduce conflicts. Whenever we follow a transition in the canonical LR(1) automaton, we check that the corresponding transition is legal in the merged LR(1) automaton. The automaton is explored breadth-first and shortest paths from every node to one of the start nodes are recorded. *) type node = { state: Lr0.lr1state; ancestor: (Symbol.t * node) option; shadow: Lr1.node; } (* A queue of pending nodes, whose successors should be explored. *) let queue : node Queue.t = Queue.create() (* Mapping of LR(0) state numbers to lists of nodes. *) let map : node list array = Array.create Lr0.n [] (* Exploring a state. This creates a new node, if necessary, and enqueues it for further exploration. *) exception Goal of node * Terminal.t let explore ancestor shadow (state : Lr0.lr1state) : unit = (* Find all existing nodes that share the same LR(0) core. *) let k = Lr0.core state in assert (k < Lr0.n); let similar = map.(k) in (* Check whether one of these nodes coincides with the candidate new node. If so, stop. This check requires comparing not only the states of the partial, canonical automaton, but also their shadows in the full, merged automaton. This is because a single state of the canonical automaton may be reached along several different paths, leading to distinct shadows in the merged automaton, and we must explore all of these paths in order to ensure that we eventually find a goal node. *) if not (List.exists (fun node -> Lr0.equal state node.state && shadow == node.shadow ) similar) then begin (* Otherwise, create a new node. *) let node = { state = state; ancestor = ancestor; shadow = shadow; } in map.(k) <- node :: similar; Queue.add node queue; (* Check whether this is a goal node. A node [N] is a goal node if (i) [N] has a conflict involving one of the tokens of interest and (ii) [N] corresponds to the goal node, that is, the path that leads to [N] in the canonical LR(1) automaton leads to the goal node in the merged LR(1) automaton. Note that these conditions do not uniquely define [N]. *) if shadow == X.goal then let can_reduce = ref TerminalSet.empty in let reductions1 : Production.index list TerminalMap.t = Lr1.reductions shadow in List.iter (fun (toks, prod) -> TerminalSet.iter (fun tok -> (* We are looking at a [(tok, prod)] pair -- a reduction in the canonical automaton state. *) (* Check that this reduction, which exists in the canonical automaton state, also exists in the merged automaton -- that is, it wasn't suppressed by conflict resolution. *) if List.mem prod (TerminalMap.lookup tok reductions1) then try let (_ : Lr1.node) = SymbolMap.find (Symbol.T tok) (Lr1.transitions shadow) in (* Shift/reduce conflict. *) raise (Goal (node, tok)) with Not_found -> let toks = !can_reduce in (* We rely on the property that [TerminalSet.add tok toks] preserves physical equality when [tok] is a member of [toks]. *) let toks' = TerminalSet.add tok toks in if toks == toks' then (* Reduce/reduce conflict. *) raise (Goal (node, tok)) else (* No conflict so far. *) can_reduce := toks' ) toks ) (Lr0.reductions state) end (* Populate the queue with the start nodes. Until we find a goal node, take a node out the queue, construct the nodes that correspond to its successors, and enqueue them. *) let goal, token = try Array.iteri (fun (i : int) (k : Lr0.node) -> let shadow = Lr1.entry.(i) in if relevant shadow then explore None shadow (restrict (Lr0.start k)) ) Lr0.entry; Misc.qiter (fun node -> SymbolMap.iter (fun symbol state -> try let shadow = SymbolMap.find symbol (Lr1.transitions node.shadow) in if relevant shadow then explore (Some (symbol, node)) shadow (restrict state) with Not_found -> (* No shadow. This can happen if a shift/reduce conflict was resolved in favor in reduce. Ignore that transition. *) () ) (Lr0.transitions node.state) ) queue; (* We didn't find a goal node. This shouldn't happen! If the goal node in the merged LR(1) automaton has a conflict, then there should exist a node with a conflict in the canonical automaton as well. Otherwise, Pager's construction is incorrect. *) begin Printf.fprintf stderr "** Internal failure (Pager's theorem).\n"; Printf.fprintf stderr "** Tokens of interest: %s\n" (TerminalSet.print X.tokens); Printf.fprintf stderr "** Goal state: %d\n" (Lr1.number X.goal); Printf.fprintf stderr "** Please send your grammar to Menhir's developers.\n%!"; exit 1 end with Goal (node, tok) -> node, tok (* Query the goal node that was found about the shortest path from it to one of the entry nodes. *) let source, path = let rec follow path node = match node.ancestor with | None -> Lr1.start2item node.shadow, Array.of_list path | Some (symbol, node) -> follow (symbol :: path) node in follow [] goal let goal = Lr0.export goal.state end