(* * ocamlweb - A WEB-like tool for ocaml * Copyright (C) 1999-2001 Jean-Christophe FILLIÂTRE and Claude MARCHÉ * * This software is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License version 2, as published by the Free Software Foundation. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Library General Public License version 2 for more details * (enclosed in the file LGPL). *) (*i $Id: doclexer.mll,v 1.23 2003/07/07 08:18:11 filliatr Exp $ i*) { open Printf open Lexing open Output open Web open Pretty (*s Global variables and functions used by the lexer. *) (* [skip_header] tells whether option \verb|--header| has been selected by the user. *) let skip_header = ref true (* for displaying error message if any, [current_file] records the name of the file currently read, and [comment_or_string_start] records the starting position of the comment or the string currently being read. *) let current_file = ref "" let comment_or_string_start = ref 0 (* [brace_depth] records the current depth of imbrication of braces \verb|{..}|, to know in lex files whether we are in an action or not. [lexyacc_brace_start] records the position of the starting brace of the current action, to display an error message if this brace is unclosed. *) let in_lexyacc_action = ref false let doublepercentcounter = ref 0 let brace_depth = ref 0 let lexyacc_brace_start = ref 0 (* [web_style] records if web sections were used anywhere in any file. *) let web_style = ref false (* global variables for temporarily recording data, for building the [Web.file] structure. *) let parbuf = Buffer.create 8192 let ignoring = ref false let push_char c = if not !ignoring then Buffer.add_char parbuf c let push_first_char lexbuf = if not !ignoring then Buffer.add_char parbuf (lexeme_char lexbuf 0) let push_string s = if not !ignoring then Buffer.add_string parbuf s let subparlist = ref ([] : sub_paragraph list) let push_caml_subpar () = if Buffer.length parbuf > 0 then begin subparlist := (CamlCode (Buffer.contents parbuf)) :: !subparlist; Buffer.clear parbuf end let push_lex_subpar () = if Buffer.length parbuf > 0 then begin subparlist := (LexCode (Buffer.contents parbuf)) :: !subparlist; Buffer.clear parbuf end let push_yacc_subpar () = if Buffer.length parbuf > 0 then begin subparlist := (YaccCode (Buffer.contents parbuf)) :: !subparlist; Buffer.clear parbuf end let parlist = ref ([] : paragraph list) let code_beg = ref 0 let push_code () = assert (List.length !subparlist = 0); if Buffer.length parbuf > 0 then begin parlist := (Code (!code_beg, Buffer.contents parbuf)) :: !parlist; Buffer.clear parbuf end let push_lexyacccode () = assert (Buffer.length parbuf = 0) ; if List.length !subparlist > 0 then begin parlist := (LexYaccCode (!code_beg, List.rev !subparlist)) :: !parlist; subparlist := [] end let big_section = ref false let initial_spaces = ref 0 let push_doc () = if Buffer.length parbuf > 0 then begin let doc = Documentation (!big_section, !initial_spaces, Buffer.contents parbuf) in parlist := doc :: !parlist; big_section := false; Buffer.clear parbuf end let seclist = ref ([] : raw_section list) let section_beg = ref 0 let push_section () = if !parlist <> [] then begin let s = { sec_contents = List.rev !parlist; sec_beg = !section_beg } in seclist := s :: !seclist; parlist := [] end let reset_lexer f = current_file := f; comment_or_string_start := 0; section_beg := 0; code_beg := 0; parlist := []; seclist := []; in_lexyacc_action := false; doublepercentcounter := 0 let backtrack lexbuf = (*i eprintf "backtrack to %d\n" (lexbuf.lex_abs_pos + lexbuf.lex_start_pos); i*) lexbuf.lex_curr_pos <- lexbuf.lex_start_pos } (*s Shortcuts for regular expressions. *) let space = [' ' '\r' '\t'] let space_or_nl = [' ' '\t' '\r' '\n'] let character = "'" ( [^ '\\' '\''] | '\\' ['\\' '\'' 'n' 't' 'b' 'r'] | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] ) "'" let up_to_end_of_comment = [^ '*']* '*' (([^ '*' ')'] [^ '*']* '*') | '*')* ')' (*s Entry point to skip the headers. Returns when headers are skipped. *) rule header = parse | "(*" { comment_or_string_start := lexeme_start lexbuf; skip_comment lexbuf; skip_spaces_until_nl lexbuf; header lexbuf } | "\n" { () } | space+ { header lexbuf } | _ { backtrack lexbuf } | eof { () } (* To skip a comment (used by [header]). *) and skip_comment = parse | "(*" { skip_comment lexbuf; skip_comment lexbuf } | "*)" { () } | eof { eprintf "File \"%s\", character %d\n" !current_file !comment_or_string_start; eprintf "Unterminated ocaml comment\n"; exit 1 } | _ { skip_comment lexbuf } (*s Same as [header] but for \textsf{OCamlYacc} *) and yacc_header = parse | "/*" { comment_or_string_start := lexeme_start lexbuf; skip_yacc_comment lexbuf; skip_spaces_until_nl lexbuf; yacc_header lexbuf } | "\n" { () } | space+ { yacc_header lexbuf } | _ { backtrack lexbuf } | eof { () } and skip_yacc_comment = parse | "/*" { skip_yacc_comment lexbuf; skip_yacc_comment lexbuf } | "*/" { () } | eof { eprintf "File \"%s\", character %d\n" !current_file !comment_or_string_start; eprintf "Unterminated ocamlyacc comment\n"; exit 1 } | _ { skip_yacc_comment lexbuf } (*s Recognizes a complete caml module body or interface, after header has been skipped. After calling that entry, the whole text read is in [seclist]. *) and caml_implementation = parse | _ { backtrack lexbuf; paragraph lexbuf; caml_implementation lexbuf } | eof { push_section () } (* recognizes a paragraph of caml code or documentation. After calling that entry, the paragraph has been added to [parlist]. *) and paragraph = parse | space* '\n' { paragraph lexbuf } | space* ";;" { paragraph lexbuf } | space* "(*" '*'* "*)" space* '\n' { paragraph lexbuf } | space* "(*" { comment_or_string_start := lexeme_start lexbuf; let s = lexeme lexbuf in initial_spaces := count_spaces (String.sub s 0 (String.length s - 2)); start_of_documentation lexbuf; push_doc () } | space* ("(*c" | _) { code_beg := lexeme_start lexbuf; backtrack lexbuf; caml_subparagraph lexbuf; push_code() } | eof { () } (* recognizes a whole lex description file, after header has been skipped. After calling that entry, the whole text read is in [seclist]. *) and lex_description = parse | _ { backtrack lexbuf; lex_paragraph lexbuf ; lex_description lexbuf } | eof { push_section () } and yacc_description = parse | _ { backtrack lexbuf ; yacc_paragraph lexbuf; yacc_description lexbuf } | eof { push_section () } (* Recognizes a paragraph of a lex description file. After calling that entry, the paragraph has been added to [parlist]. *) and lex_paragraph = parse | space* '\n' { lex_paragraph lexbuf } | space* "(*" '*'* "*)" space* '\n' { lex_paragraph lexbuf } | space* ("(*c" | _) { code_beg := lexeme_start lexbuf; backtrack lexbuf; lex_subparagraphs lexbuf ; push_lexyacccode() } | space* "(*" { comment_or_string_start := lexeme_start lexbuf; start_of_documentation lexbuf; push_doc () } | eof { () } and yacc_paragraph = parse | space* '\n' { yacc_paragraph lexbuf } | space* "/*" '*'* "*/" space* '\n' { if not !in_lexyacc_action then yacc_paragraph lexbuf else begin code_beg := lexeme_start lexbuf; backtrack lexbuf; yacc_subparagraphs lexbuf ; push_lexyacccode() end } | space* "/*" { if not !in_lexyacc_action then begin comment_or_string_start := lexeme_start lexbuf; start_of_yacc_documentation lexbuf; push_doc () end else begin code_beg := lexeme_start lexbuf; backtrack lexbuf; yacc_subparagraphs lexbuf ; push_lexyacccode() end } | space* "(*" '*'* "*)" space* '\n' { if !in_lexyacc_action then yacc_paragraph lexbuf else begin code_beg := lexeme_start lexbuf; backtrack lexbuf; yacc_subparagraphs lexbuf ; push_lexyacccode() end } | space* "(*" { if !in_lexyacc_action then begin comment_or_string_start := lexeme_start lexbuf; start_of_documentation lexbuf; push_doc () end else begin code_beg := lexeme_start lexbuf; backtrack lexbuf; yacc_subparagraphs lexbuf ; push_lexyacccode() end } | space* ("/*c" | "(*c" | _) { code_beg := lexeme_start lexbuf; backtrack lexbuf; yacc_subparagraphs lexbuf ; push_lexyacccode() } | eof { () } (*s At the beginning of the documentation part, just after the \verb|"(*"|. If the first character is ['s'], then a new section is started. After calling that entry, the [parbuf] buffer contains the doc read. *) and start_of_documentation = parse | space_or_nl+ { in_documentation lexbuf } | ('s' | 'S') space_or_nl* { web_style := true; push_section (); section_beg := lexeme_start lexbuf; big_section := (lexeme_char lexbuf 0 == 'S'); in_documentation lexbuf } | 'p' up_to_end_of_comment { let s = lexeme lexbuf in push_in_preamble (String.sub s 1 (String.length s - 3)) } | 'i' { ignore lexbuf } | _ { backtrack lexbuf; in_documentation lexbuf } | eof { in_documentation lexbuf } and start_of_yacc_documentation = parse | space_or_nl+ { in_yacc_documentation lexbuf } | ('s' | 'S') space_or_nl* { web_style := true; push_section (); section_beg := lexeme_start lexbuf; big_section := (lexeme_char lexbuf 0 == 'S'); in_yacc_documentation lexbuf } | 'p' up_to_end_of_comment { let s = lexeme lexbuf in push_in_preamble (String.sub s 1 (String.length s - 3)) } | 'i' { yacc_ignore lexbuf } | _ { backtrack lexbuf; in_yacc_documentation lexbuf } | eof { in_yacc_documentation lexbuf } (*s Inside the documentation part, anywhere after the "(*". After calling that entry, the [parbuf] buffer contains the doc read. *) and in_documentation = parse | "(*" { push_string "(*"; in_documentation lexbuf; push_string "*)"; in_documentation lexbuf } | "*)" { () } | '\n' " * " { push_char '\n'; in_documentation lexbuf } | '"' { push_char '"'; in_string lexbuf; in_documentation lexbuf } | character { push_string (lexeme lexbuf); in_documentation lexbuf } | _ { push_first_char lexbuf; in_documentation lexbuf } | eof { eprintf "File \"%s\", character %d\n" !current_file !comment_or_string_start; eprintf "Unterminated ocaml comment\n"; exit 1 } (* yacc comments are NOT nested *) and in_yacc_documentation = parse | "*/" { () } | '\n' " * " { push_char '\n'; in_yacc_documentation lexbuf } | '"' { push_char '"'; in_string lexbuf; in_yacc_documentation lexbuf } | character { push_string (lexeme lexbuf); in_yacc_documentation lexbuf } | _ { push_first_char lexbuf; in_yacc_documentation lexbuf } | eof { eprintf "File \"%s\", character %d\n" !current_file !comment_or_string_start; eprintf "Unterminated ocamlyacc comment\n"; exit 1 } (*s Recognizes a subparagraph of caml code. After calling that entry, the [parbuf] buffer contains the code read. *) and caml_subparagraph = parse | space* '\n' space* '\n' { backtrack lexbuf } | ";;" { backtrack lexbuf } | eof { () } | "(*" | "(*c" { comment_or_string_start := lexeme_start lexbuf; push_string "(*"; comment lexbuf; caml_subparagraph lexbuf } | "(*i" { comment_or_string_start := lexeme_start lexbuf; ignore lexbuf; caml_subparagraph lexbuf } | '"' { comment_or_string_start := lexeme_start lexbuf; push_char '"'; in_string lexbuf; caml_subparagraph lexbuf } | '{' { incr brace_depth; push_char '{'; caml_subparagraph lexbuf } | '}' { if !brace_depth = 0 then backtrack lexbuf else begin decr brace_depth; push_char '}'; caml_subparagraph lexbuf end } | "%}" { if !brace_depth = 0 then backtrack lexbuf else begin decr brace_depth; push_string "%}"; caml_subparagraph lexbuf end } | character { push_string (lexeme lexbuf); caml_subparagraph lexbuf } | _ { push_first_char lexbuf; caml_subparagraph lexbuf } (*s Recognizes a sequence of subparagraphs of lex description, including CAML actions. After calling that entry, the subparagraphs read are in [subparlist]. *) and lex_subparagraphs = parse | space* '\n' space* '\n' { backtrack lexbuf } | ";;" { () } | eof { if !in_lexyacc_action then begin eprintf "File \"%s\", character %d\n" !current_file !lexyacc_brace_start; eprintf "Unclosed brace\n" ; exit 1 end } | '}' { if !in_lexyacc_action then begin push_char '}'; in_lexyacc_action := false; lex_subparagraph lexbuf; push_lex_subpar(); lex_subparagraphs lexbuf end else begin eprintf "File \"%s\", character %d\n" !current_file (lexeme_start lexbuf); eprintf "Unexpected closing brace "; exit 1 end } | _ { backtrack lexbuf; if !in_lexyacc_action then begin caml_subparagraph lexbuf; push_caml_subpar() end else begin lex_subparagraph lexbuf; push_lex_subpar() end; lex_subparagraphs lexbuf } and yacc_subparagraphs = parse | space* '\n' space* '\n' { backtrack lexbuf } | "%%" { if !in_lexyacc_action then begin push_string "%%"; caml_subparagraph lexbuf; push_caml_subpar(); yacc_subparagraphs lexbuf end else begin push_yacc_subpar(); push_string "%%"; push_yacc_subpar(); incr doublepercentcounter; if !doublepercentcounter >= 2 then in_lexyacc_action := true end } | ";;" { if !in_lexyacc_action then () else begin push_string ";;"; yacc_subparagraph lexbuf; push_yacc_subpar(); yacc_subparagraphs lexbuf end } | eof { if !in_lexyacc_action && !doublepercentcounter <= 1 then begin eprintf "File \"%s\", character %d\n" !current_file !lexyacc_brace_start; eprintf "Unclosed brace\n" ; exit 1 end } | '}' { if !in_lexyacc_action then begin push_char '}'; in_lexyacc_action := false; yacc_subparagraph lexbuf; push_yacc_subpar(); yacc_subparagraphs lexbuf end else begin eprintf "File \"%s\", character %d\n" !current_file (lexeme_start lexbuf); eprintf "Unexpected closing brace "; exit 1 end } | "%}" { if !in_lexyacc_action then begin push_string "%}"; in_lexyacc_action := false; yacc_subparagraph lexbuf; push_yacc_subpar(); yacc_subparagraphs lexbuf end else begin eprintf "File \"%s\", character %d\n" !current_file (lexeme_start lexbuf); eprintf "Unexpected closing brace "; exit 1 end } | _ { backtrack lexbuf; if !in_lexyacc_action then begin caml_subparagraph lexbuf; push_caml_subpar() end else begin yacc_subparagraph lexbuf; push_yacc_subpar() end; yacc_subparagraphs lexbuf } (*s Recognizes a subparagraph of lex description. After calling that entry, the subparagraph read is in [parbuf]. *) and lex_subparagraph = parse | space* '\n' space* '\n' { backtrack lexbuf } | ";;" { backtrack lexbuf } | eof { () } | "(*" | "(*c" { comment_or_string_start := lexeme_start lexbuf; push_string "(*"; comment lexbuf; lex_subparagraph lexbuf } | space* "(*i" { comment_or_string_start := lexeme_start lexbuf; ignore lexbuf; lex_subparagraph lexbuf } | '"' { comment_or_string_start := lexeme_start lexbuf; push_char '"'; in_string lexbuf; lex_subparagraph lexbuf } | '{' { lexyacc_brace_start := lexeme_start lexbuf; push_char '{'; in_lexyacc_action := true } | character { push_string (lexeme lexbuf); lex_subparagraph lexbuf } | _ { push_first_char lexbuf; lex_subparagraph lexbuf } and yacc_subparagraph = parse | space* '\n' space* '\n' { backtrack lexbuf } | "%%" { backtrack lexbuf } | ";;" { backtrack lexbuf } | eof { () } | "/*" | "/*c" { comment_or_string_start := lexeme_start lexbuf; push_string "/*"; yacc_comment lexbuf; yacc_subparagraph lexbuf } | space* "/*i" { comment_or_string_start := lexeme_start lexbuf; yacc_ignore lexbuf; yacc_subparagraph lexbuf } | '"' { comment_or_string_start := lexeme_start lexbuf; push_char '"'; in_string lexbuf; yacc_subparagraph lexbuf } | "%{" { lexyacc_brace_start := lexeme_start lexbuf; push_string "%{"; in_lexyacc_action := true } | '{' { lexyacc_brace_start := lexeme_start lexbuf; push_char '{'; in_lexyacc_action := true } | '<' { lexyacc_brace_start := lexeme_start lexbuf; push_char '<'; push_yacc_subpar (); yacc_type lexbuf; yacc_subparagraph lexbuf } | character { push_string (lexeme lexbuf); yacc_subparagraph lexbuf } | _ { push_first_char lexbuf; yacc_subparagraph lexbuf } and yacc_type = parse | "->" { push_string "->"; yacc_type lexbuf } | '>' { push_caml_subpar(); push_char '>' } | _ { push_first_char lexbuf; yacc_type lexbuf } | eof { eprintf "File \"%s\", character %d\n" !current_file !lexyacc_brace_start; eprintf "Unclosed '<'"; exit 1 } (*s To skip spaces until a newline. *) and skip_spaces_until_nl = parse | space* '\n'? { () } | eof { () } | _ { backtrack lexbuf } (*s To read a comment inside a piece of code. *) and comment = parse | "(*" | "(*c" { push_string "(*"; comment lexbuf; comment lexbuf } | "*)" { push_string "*)" } | eof { eprintf "File \"%s\", character %d\n" !current_file !comment_or_string_start; eprintf "Unterminated ocaml comment\n"; exit 1 } | _ { push_first_char lexbuf; comment lexbuf } and yacc_comment = parse | "*/" { push_string "*/" } | eof { eprintf "File \"%s\", character %d\n" !current_file !comment_or_string_start; eprintf "Unterminated ocamlyacc comment\n"; exit 1 } | _ { push_first_char lexbuf; yacc_comment lexbuf } (*s Ignored parts, between "(*i" and "i*)". Note that such comments are not nested. *) and ignore = parse | "i*)" { skip_spaces_until_nl lexbuf } | eof { eprintf "File \"%s\", character %d\n" !current_file !comment_or_string_start; eprintf "Unterminated ocamlweb comment\n"; exit 1 } | _ { ignore lexbuf } and yacc_ignore = parse | "i*/" { skip_spaces_until_nl lexbuf } | eof { eprintf "File \"%s\", character %d\n" !current_file !comment_or_string_start; eprintf "Unterminated ocamlweb comment\n"; exit 1 } | _ { yacc_ignore lexbuf } (*s Strings in code. *) and in_string = parse | '"' { push_char '"' } | '\\' ['\\' '"' 'n' 't' 'b' 'r'] { push_string (lexeme lexbuf); in_string lexbuf } | eof { eprintf "File \"%s\", character %d\n" !current_file !comment_or_string_start; eprintf "Unterminated ocaml string\n"; exit 1 } | _ { push_first_char lexbuf; in_string lexbuf } { (*s \textbf{Caml files.} *) type caml_file = { caml_filename : string; caml_module : string } let module_name f = String.capitalize (Filename.basename f) let make_caml_file f = { caml_filename = f; caml_module = module_name (Filename.chop_extension f) } type file_type = | File_impl of caml_file | File_intf of caml_file | File_lex of caml_file | File_yacc of caml_file | File_other of string (*s \textbf{Reading Caml files.} *) let raw_read_file header entry f = reset_lexer f; let c = open_in f in let buf = Lexing.from_channel c in if !skip_header then header buf; entry buf; close_in c; List.rev !seclist let read header entry m = { content_file = m.caml_filename; content_name = m.caml_module; content_contents = raw_read_file header entry m.caml_filename; } let read_one_file = function | File_impl m -> Implem (read header caml_implementation m) | File_intf m -> Interf (read header caml_implementation m) | File_lex m -> Lex (read header lex_description m) | File_yacc m -> Yacc (read yacc_header yacc_description m) | File_other f -> Other f }