(*
 * ocamlweb - A WEB-like tool for ocaml
 * Copyright (C) 1999-2001 Jean-Christophe FILLIÂTRE and Claude MARCHÉ
 * 
 * This software is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License version 2, as published by the Free Software Foundation.
 * 
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * 
 * See the GNU Library General Public License version 2 for more details
 * (enclosed in the file LGPL).
 *)

(*i $Id: yacc_lexer.mll,v 1.5 2002/02/01 09:40:05 marche Exp $ i*)

{

  open Lex_syntax
  open Yacc_syntax
  open Yacc_parser

(* Auxiliaries for the lexical analyzer *)

let brace_depth = ref 0
and comment_depth = ref 0
and mark_count = ref 0

exception Lexical_error of string * int * int

let handle_lexical_error fn lexbuf =
  let line = !current_line_num
  and column = Lexing.lexeme_start lexbuf - !current_line_start_pos in
  try
    fn lexbuf
  with Lexical_error(msg, _, _) ->
    raise(Lexical_error(msg, line, column))

(*s yacc keywords *)

let keyword_table = Hashtbl.create 17
let _ =
  List.iter (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok)
    [ "token", Ttoken;
      "start", Tstart; 
      "type", Ttype;
      "left", Tleft;
      "right", Tright;
      "nonassoc", Tnonassoc;
      "prec", Tprec ]

let keyword_token lexbuf =
  try
    Hashtbl.find keyword_table (Lexing.lexeme lexbuf)
  with
      Not_found ->
	raise(Lexical_error
             ("unknown keyword " ^ String.escaped(Lexing.lexeme lexbuf),
              !current_line_num, Lexing.lexeme_start lexbuf - !current_line_start_pos)) 

let cur_loc lexbuf = 
  { start_pos = Lexing.lexeme_start lexbuf; 
    end_pos = Lexing.lexeme_end lexbuf; 
    start_line = !current_line_num; 
    start_col = Lexing.lexeme_start lexbuf - !current_line_start_pos } 

let reset_lexer f lexbuf =
  current_file_name := f;
  mark_count := 0;
  current_line_num := 1;
  current_line_start_pos := 0;
  current_lexbuf := lexbuf
    
  
}

(*s main rule for tokens in yacc files *)

rule main = parse

  | [' ' '\013' '\009' '\012' ] + 
    { main lexbuf }

(*

   Although few grammar files include commas anywhere commas are
skipped in yacc.  The original yacc code is used for ocaml. See
\verb|yacc/reader.c:nextc(),read_grammar()| from the ocaml 3.04 distribution.

   We issue a warning for non conformity to ocamlyacc documentation.

*)

  | ','
    { issue_warning 
	"use of commas in mly files is allowed but not conform to ocamlyacc documentation";
	main lexbuf }

  | '\010'
    { current_line_start_pos := Lexing.lexeme_end lexbuf;
      incr current_line_num;
      main lexbuf }
  | "/*" 
    { handle_lexical_error yacc_comment lexbuf;
      main lexbuf }
  | ['A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '\'' '_' '0'-'9'] *
    { match Lexing.lexeme lexbuf with
	  "error" -> Terror
	| s -> 	  let l = cur_loc lexbuf in
	  (*i
	  Printf.eprintf "ident '%s' occurs at (%d,%d)\n" 
	    s l.start_pos l.end_pos;
	  i*)
	  Tident (s,l) }
  | '{' 
    { let n1 = Lexing.lexeme_end lexbuf
      and l1 = !current_line_num
      and s1 = !current_line_start_pos in
      brace_depth := 1;
      let n2 = handle_lexical_error action lexbuf in
      Taction({start_pos = n1; end_pos = n2;
               start_line = l1; start_col = n1 - s1}) }
  | '|' 
      { Tor }
  | ';' 
      { Tsemicolon }
  | ':' 
      { Tcolon }
  | '%'  
      { yacc_keyword lexbuf }
  | '<'
      { let n1 = Lexing.lexeme_end lexbuf
	and l1 = !current_line_num
	and s1 = !current_line_start_pos in
	let n2 = handle_lexical_error typedecl lexbuf in
	Ttypedecl({start_pos = n1; end_pos = n2;
		   start_line = l1; start_col = n1 - s1}) }
  | eof 
      { EOF }
  | _
    { raise(Lexical_error
             ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf),
              !current_line_num, Lexing.lexeme_start lexbuf - !current_line_start_pos)) }

and yacc_keyword = parse
  | '%' 
      { incr mark_count;
	if !mark_count = 1 then Tmark else 
	  let n1 = Lexing.lexeme_end lexbuf
	  and l1 = !current_line_num
	  and s1 = !current_line_start_pos in
	  brace_depth := 0;
	  let n2 = handle_lexical_error action lexbuf in
	  Taction({start_pos = n1; end_pos = n2;
		   start_line = l1; start_col = n1 - s1}) }
  | '{'  
      { let n1 = Lexing.lexeme_end lexbuf
	and l1 = !current_line_num
	and s1 = !current_line_start_pos in
	brace_depth := 1;
	let n2 = handle_lexical_error action lexbuf in
	Taction({start_pos = n1; end_pos = n2;
		 start_line = l1; start_col = n1 - s1}) }
  | ['a'-'z']+
      { keyword_token lexbuf }
  | _ 
      { raise(Lexical_error
             ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf),
              !current_line_num, Lexing.lexeme_start lexbuf - !current_line_start_pos)) }


(*s recognizes a CAML action *)
 
and action = parse
  | '{' 
    { incr brace_depth;
      action lexbuf }
  | '}' 
    { decr brace_depth;
      if !brace_depth = 0 then Lexing.lexeme_start lexbuf else action lexbuf }
  | "%}" 
    { decr brace_depth;
      if !brace_depth = 0 then Lexing.lexeme_start lexbuf else 
	raise(Lexical_error
		("ill-balanced brace ",
		 !current_line_num, Lexing.lexeme_start lexbuf - !current_line_start_pos)) }
  | '"' 
    { string lexbuf;
      action lexbuf }
  | "'" [^ '\\'] "'" 
    { action lexbuf }
  | "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'" 
    { action lexbuf }
  | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" 
    { action lexbuf }
  | "(*" 
    { comment_depth := 1;
      comment lexbuf;
      action lexbuf }
  | eof 
    { if !brace_depth = 0 then Lexing.lexeme_start lexbuf else
	raise (Lexical_error("unterminated action", 0, 0)) }
  | '\010'
    { current_line_start_pos := Lexing.lexeme_end lexbuf;
      incr current_line_num;
      action lexbuf }
  | _ 
    { action lexbuf }

(*s recognizes a CAML type between $<$ and $>$ *)
      
and typedecl = parse
  | '>' 
    { Lexing.lexeme_start lexbuf }
  | eof 
    { raise (Lexical_error("unterminated type declaration", 0, 0)) }
  | '\010'
    { current_line_start_pos := Lexing.lexeme_end lexbuf;
      incr current_line_num;
      typedecl lexbuf }
  | "->" 
    { typedecl lexbuf }
  | _ 
    { typedecl lexbuf }
      
and string = parse
    '"' 
    { () }
  | '\\' [' ' '\013' '\009' '\012'] * '\010' [' ' '\013' '\009' '\012'] *
    { current_line_start_pos := Lexing.lexeme_end lexbuf;
      incr current_line_num;
      string lexbuf }
  | '\\' ['\\' '"' 'n' 't' 'b' 'r'] 
    { string lexbuf }
  | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] 
    { string lexbuf }
  | eof 
    { raise(Lexical_error("unterminated string", 0, 0)) }
  | '\010'
    { current_line_start_pos := Lexing.lexeme_end lexbuf;
      incr current_line_num;
      string lexbuf }
  | _ 
    { string lexbuf }

and comment = parse
    "(*" 
    { incr comment_depth; comment lexbuf }
  | "*)" 
    { decr comment_depth;
      if !comment_depth = 0 then () else comment lexbuf }
  | '"' 
    { string lexbuf;
      comment lexbuf }
  | "''"
      { comment lexbuf }
  | "'" [^ '\\' '\''] "'"
      { comment lexbuf }
  | "'\\" ['\\' '\'' 'n' 't' 'b' 'r'] "'"
      { comment lexbuf }
  | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
      { comment lexbuf }
  | eof 
    { raise(Lexical_error("unterminated comment", 0, 0)) }
  | '\010'
    { current_line_start_pos := Lexing.lexeme_end lexbuf;
      incr current_line_num;
      comment lexbuf }
  | _ 
    { comment lexbuf }

and yacc_comment = parse
  | "*/" 
    { () }
  | eof 
    { raise(Lexical_error("unterminated yacc comment", 0, 0)) }
  | '\010'
    { current_line_start_pos := Lexing.lexeme_end lexbuf;
      incr current_line_num;
      yacc_comment lexbuf }
  | _ 
    { yacc_comment lexbuf }