(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier and Yann Régis-Gianas, INRIA Rocquencourt *) (* *) (* Copyright 2005 Institut National de Recherche en Informatique et *) (* en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the *) (* change described in file LICENSE. *) (* *) (**************************************************************************) open Grammar (* -------------------------------------------------------------------------- *) (* Our output channel. *) let out = lazy (open_out (Settings.base ^ ".conflicts")) (* -------------------------------------------------------------------------- *) (* Explaining shift actions. *) (* The existence of a shift action stems from the existence of a shift item in the LR(0) core that underlies the LR(1) state of interest. That is, lookahead sets are not relevant. The existence of a shift item in the LR(0) core is explained by finding a path from a start item to the shift item in the LR(0) nondeterministic automaton, such that the symbols read along this path form the (previously fixed) symbol string that leads to the conflict state in the LR(1) automaton. There may be several such paths: a shortest one is chosen. There may also be several shift items in the conflict state: an arbitrary one is chosen. I believe it would not be interesting to display traces for several shift items: they would be identical except in their last line (where the desired shift item actually appears). *) (* Symbolic execution of the nondeterministic LR(0) automaton. *) (* Configurations are pairs of an LR(0) item and an offset into the input string, which indicates how much has been read so far. *) type configuration0 = Item.t * int (* This function builds a derivation out of a (nonempty, reversed) sequence of configurations. The derivation is constructed from bottom to top, that is, beginning at the last configuration and moving back towards to the start configuration. *) let rec follow derivation offset' = function | [] -> assert (offset' = 0); derivation | (item, offset) :: configs -> let _, _, rhs, pos, _ = Item.def item in let derivation = if offset = offset' then (* This is an epsilon transition. Put a new root node on top of the existing derivation. *) Derivation.build pos rhs derivation None else (* This was a shift transition. Tack symbol in front of the forest. *) Derivation.prepend rhs.(pos) derivation in follow derivation offset configs (* Symbolic execution begins with a start item (corresponding to one of the automaton's entry nodes), a fixed string of input symbols, to be fully consumed, and a goal item. The objective is to find a path through the automaton that leads from the start configuration [(stop, 0)] to the goal configuration [(stop, n)], where [n] is the length of the input string. The automaton is explored via breadth-first search. A hash table is used to record which configurations have been visited and to build a spanning tree of shortest paths. *) exception Done let explain_shift_item (start : Item.t) (input : Symbol.t array) (stop : Item.t) : Derivation.t = let n = Array.length input in let table : (configuration0, configuration0 option) Hashtbl.t = Hashtbl.create 1023 in let queue : configuration0 Queue.t = Queue.create() in let enqueue ancestor config = try let _ = Hashtbl.find table config in () with Not_found -> Hashtbl.add table config ancestor; Queue.add config queue in enqueue None (start, 0); try Misc.qiter (function (item, offset) as config -> (* If the item we're looking at is the goal item and if we have read all of the input symbols, stop. *) if (Item.equal item stop) && (offset = n) then raise Done; (* Otherwise, explore the transitions out of this item. *) let prod, _, rhs, pos, length = Item.def item in (* Shift transition, followed only if the symbol matches the symbol found in the input string. *) if (pos < length) && (offset < n) && (Symbol.equal rhs.(pos) input.(offset)) then begin let config' = (Item.import (prod, pos+1), offset+1) in enqueue (Some config) config' end; (* Epsilon transitions. *) if pos < length then match rhs.(pos) with | Symbol.N nt -> Production.iternt nt (fun prod -> let config' = (Item.import (prod, 0), offset) in enqueue (Some config) config' ) | Symbol.T _ -> () ) queue; assert false with Done -> (* We have found a (shortest) path from the start configuration to the goal configuration. Turn it into an explicit derivation. *) let configs = Misc.materialize table (stop, n) in let _, _, rhs, pos, _ = Item.def stop in let derivation = Derivation.tail pos rhs in let derivation = follow derivation n configs in derivation (* -------------------------------------------------------------------------- *) (* Explaining reduce actions. *) (* The existence of a reduce action stems from the existence of a reduce item, whose lookahead set contains the token of interest, in the state of interest. Here, lookahead sets are relevant only insofar as they contain or do not contain the token of interest -- in other words, lookahead sets can be abstracted by Boolean values. The existence of the reduce item is explained by finding a path from a start item to the reduce item in the LR(1) nondeterministic automaton, such that the symbols read along this path form the (previously fixed) symbol string that leads to the conflict state in the LR(1) automaton. There may be several such paths: a shortest one is chosen. *) (* Symbolic execution of the nondeterministic LR(1) automaton. *) (* Configurations are pairs of an LR(1) item and an offset into the input string, which indicates how much has been read so far. An LR(1) item is itself represented as the combination of an LR(0) item and a Boolean flag, telling whether the token of interest appears or does not appear in the lookahead set. *) type configuration1 = Item.t * bool * int (* This function builds a derivation out of a sequence of configurations. The end of the sequence is dealt with specially -- we want to explain how the lookahead symbol appears and is inherited. Once that is done, the rest (that is, the beginning) of the derivation is dealt with as above. *) let config1toconfig0 (item, _, offset) = (item, offset) let rec follow1 tok derivation offset' = function | [] -> assert (Terminal.equal tok Terminal.sharp); (* One could emit a comment saying that the lookahead token is initially [#]. That comment would have to be displayed above the derivation, though, and there is no support for that at the moment, so let's skip it. *) derivation | (item, _, offset) :: configs -> let _, _, rhs, pos, length = Item.def item in if offset = offset' then (* This is an epsilon transition. Attack a new line and add a comment that explains why the lookahead symbol is produced or inherited. *) let nullable, first = Analysis.nullable_first_rhs rhs (pos + 1) in if TerminalSet.mem tok first then (* The lookahead symbol is produced (and perhaps also inherited, but let's ignore that). *) let e = Analysis.explain_first_rhs tok rhs (pos + 1) in let comment = "lookahead token appears" ^ (if e = "" then "" else " because " ^ e) in let derivation = Derivation.build pos rhs derivation (Some comment) in (* Print the rest of the derivation without paying attention to the lookahead symbols. *) follow derivation offset (List.map config1toconfig0 configs) else begin (* The lookahead symbol is not produced, so it is definitely inherited. *) assert nullable; let comment = "lookahead token is inherited" ^ (if pos + 1 < length then Printf.sprintf " because %scan vanish" (Symbol.printao (pos + 1) rhs) else "") in let derivation = Derivation.build pos rhs derivation (Some comment) in follow1 tok derivation offset configs end else (* This is a shift transition. Tack symbol in front of forest. *) let derivation = Derivation.prepend rhs.(pos) derivation in follow1 tok derivation offset configs (* Symbolic execution is performed in the same manner as above. *) let explain_reduce_item (tok : Terminal.t) (start : Item.t) (input : Symbol.t array) (stop : Item.t) : Derivation.t = let n = Array.length input in let table : (configuration1, configuration1 option) Hashtbl.t = Hashtbl.create 1023 in let queue : configuration1 Queue.t = Queue.create() in let enqueue ancestor config = try let _ = Hashtbl.find table config in () with Not_found -> Hashtbl.add table config ancestor; Queue.add config queue in (* If the lookahead token is #, then it initially appear in the lookahead set, otherwise it doesn't. *) enqueue None (start, Terminal.equal tok Terminal.sharp, 0); try Misc.qiter (function (item, lookahead, offset) as config -> (* If the item we're looking at is the goal item and if we have read all of the input symbols, stop. *) if (Item.equal item stop) && lookahead && (offset = n) then raise Done; (* Otherwise, explore the transitions out of this item. *) let prod, nt, rhs, pos, length = Item.def item in (* Shift transition, followed only if the symbol matches the symbol found in the input string. *) if (pos < length) && (offset < n) && (Symbol.equal rhs.(pos) input.(offset)) then begin let config' = (Item.import (prod, pos+1), lookahead, offset+1) in enqueue (Some config) config' end; (* Epsilon transitions. *) if pos < length then match rhs.(pos) with | Symbol.N nt -> let nullable, first = Analysis.nullable_first_rhs rhs (pos + 1) in let first : bool = TerminalSet.mem tok first in let lookahead' = if nullable then first || lookahead else first in Production.iternt nt (fun prod -> let config' = (Item.import (prod, 0), lookahead', offset) in enqueue (Some config) config' ) | Symbol.T _ -> () ) queue; assert false with Done -> (* We have found a (shortest) path from the start configuration to the goal configuration. Turn it into an explicit derivation. *) let configs = Misc.materialize table (stop, true, n) in let derivation = Derivation.empty in let derivation = follow1 tok derivation n configs in derivation (* -------------------------------------------------------------------------- *) (* Putting it all together. *) let () = if Settings.explain then begin Lr1.conflicts (fun toks node -> (* Construct a partial LR(1) automaton, looking for a conflict in a state that corresponds to this node. Because Pager's algorithm can merge two states as soon as one of them has a conflict, we can't be too specific about the conflict that we expect to find in the canonical automaton. So, we must supply a set of conflict tokens and accept any kind of conflict that involves one of them. *) (* TEMPORARY with the new compatibility criterion, we can be sure that every conflict token is indeed involved in a conflict. Exploit that? Avoid focusing on a single token? *) let module P = Lr1partial.Run (struct let tokens = toks let goal = node end) in let closure = Lr0.closure P.goal in (* Determine what kind of conflict was found. *) let shift, reduce = Item.Map.fold (fun item toks (shift, reduce) -> match Item.classify item with | Item.Shift (Symbol.T tok, _) when Terminal.equal tok P.token -> shift + 1, reduce | Item.Reduce prod when TerminalSet.mem P.token toks -> shift, reduce + 1 | _ -> shift, reduce ) closure (0, 0) in let kind = if (shift > 0) && (reduce > 1) then "shift/reduce/reduce" else if (shift > 0) then "shift/reduce" else "reduce/reduce" in (* Explain how the conflict state is reached. *) let out = Lazy.force out in Printf.fprintf out "\n\ ** Conflict (%s) in state %d.\n\ ** Token%s involved: %s\n%s\ ** This state is reached from %s after reading:\n\n%s\n" kind (Lr1.number node) (if TerminalSet.cardinal toks > 1 then "s" else "") (TerminalSet.print toks) (if TerminalSet.cardinal toks > 1 then Printf.sprintf "** The following explanations concentrate on token %s.\n" (Terminal.print P.token) else "") (Nonterminal.print false (Item.startnt P.source)) (Symbol.printa P.path); (* Examine the items in that state, focusing on one particular token. Out of the shift items, we explain just one -- this seems enough. We explain each of the reduce items. *) (* First, build a mapping of items to derivations. *) let (_ : bool), derivations = Item.Map.fold (fun item toks (still_looking_for_shift_item, derivations) -> match Item.classify item with | Item.Shift (Symbol.T tok, _) when still_looking_for_shift_item && (Terminal.equal tok P.token) -> false, let derivation = explain_shift_item P.source P.path item in Item.Map.add item derivation derivations | Item.Reduce prod when TerminalSet.mem P.token toks -> still_looking_for_shift_item, let derivation = explain_reduce_item P.token P.source P.path item in Item.Map.add item derivation derivations | _ -> still_looking_for_shift_item, derivations ) closure (true, Item.Map.empty) in (* Factor out the common context among all derivations, so as to avoid repeating it. This helps prevent derivation trees from drifting too far away towards the right. It also helps produce sub-derivations that are quite compact. *) let context, derivations = Derivation.factor derivations in (* Display the common context. *) Printf.fprintf out "\n** The derivations that appear below have the following common factor:\ \n** (The question mark symbol (?) represents the spot where the derivations begin to differ.)\n\n"; Derivation.printc out context; (* Then, display the sub-derivations. *) Item.Map.iter (fun item derivation -> Printf.fprintf out "\n** In state %d, looking ahead at %s, " (Lr1.number node) (Terminal.print P.token); begin match Item.classify item with | Item.Shift _ -> Printf.fprintf out "shifting is permitted\n** because of the following sub-derivation:\n\n" | Item.Reduce prod -> Printf.fprintf out "reducing production\n** %s\n** is permitted because of the following sub-derivation:\n\n" (Production.print prod) end; Derivation.print out derivation ) derivations; flush out ); Time.tick "Explaining conflicts" end (* ------------------------------------------------------------------------ *) (* Resolve the conflicts that remain in the automaton. *) let () = Lr1.default_conflict_resolution(); Time.tick "Resolving remaining conflicts"