(* * ocamlweb - A WEB-like tool for ocaml * Copyright (C) 1999-2001 Jean-Christophe FILLIÂTRE and Claude MARCHÉ * * This software is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License version 2, as published by the Free Software Foundation. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Library General Public License version 2 for more details * (enclosed in the file LGPL). *) (*i $Id: pretty.mll,v 1.41 2003/01/31 16:51:46 filliatr Exp $ i*) { open Printf open Output open Lexing (*s Global variables and functions used by the lexer. *) (* [comment_depth] indicates how many opening-braces we saw, so we know how many closing-braces we have to look for, in order to respect caml's-specifications concerning comments imbrication. *) let comment_depth = ref 0 (* Accounts for braket-depth in order to imbricate them. *) let bracket_depth = ref 0 (* Set a reference on the starting character when we see [\verb]. *) let verb_delim = ref (Char.chr 0) (* counts occurences of "%%" in yacc files *) let yaccdoublepercentcounter = ref 0 (* This function returns the first char of a lexbuf. *) let first_char lexbuf = lexeme_char lexbuf 0 (* The [count_spaces] function acccounts for spaces in order to respect alignment (in the \LaTeX{}-outputed file) concerning left margins. *) let count_spaces s = let c = ref 0 in for i = 0 to String.length s - 1 do if s.[i] = '\t' then c := !c + (8 - (!c mod 8)) else incr c done; !c (* This boolean value is true if we enter in math mode, false otherwise. *) let user_math_mode = ref false (* [user_math] function does everything is needed to set the math mode when it is called, particularly it checks/sets the [user_math_mode] value as needed. *) let user_math () = if not !user_math_mode then begin user_math_mode := true; enter_math () end else begin user_math_mode := false; leave_math () end (* Checks if we are in maths mode and prints char c considering the case. *) let check_user_math c = if !user_math_mode then output_char c else output_escaped_char c (* This function sets values in order to reset the lexer, so we could call it on an another file. *) let reset_pretty () = reset_output (); yaccdoublepercentcounter := 0; user_math_mode := false } (*s Shortcuts for regular expressions. *) let space = [' ' '\t'] let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_'] let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222'] (* This is for the identifiers as specified in caml's specifications. *) let identchar = ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9'] let identifier = (lowercase | uppercase) identchar* (* This one helps protecting special caracters. *) let symbolchar = ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~'] let caml_token = "[" | "]" | "[|" | "|]" | "[<" | ">]" | "{" | "}" | "{<" | ">}" | "[]" | "(" | ")" | "or" | "not" | "||" let symbol_token = caml_token | (symbolchar+) let character = "'" ( [^ '\\' '\''] | '\\' ['\\' '\'' 'n' 't' 'b' 'r'] | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] ) "'" let decimal_literal = ['0'-'9']+ let hex_literal = '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+ let oct_literal = '0' ['o' 'O'] ['0'-'7']+ let bin_literal = '0' ['b' 'B'] ['0'-'1']+ let float_literal = ['0'-'9']+ ('.' ['0'-'9']*)? (['e' 'E'] ['+' '-']? ['0'-'9']+)? (*s Pretty-printing of code. Main entry points for Caml and Lex and Yacc files, counts for spaces in order to respect alignment. The following function pretty-prints some code and assumes that we are at the beginning of a line. *) rule pr_camlcode = parse | space* { let n = count_spaces (lexeme lexbuf) in indentation n; pr_camlcode_inside lexbuf; pr_camlcode lexbuf } | eof { leave_math () } and pr_lexcode = parse | space* { let n = count_spaces (lexeme lexbuf) in indentation n; pr_lexcode_inside lexbuf; pr_lexcode lexbuf } | eof { leave_math () } and pr_yacccode = parse | space* { let n = count_spaces (lexeme lexbuf) in indentation n; pr_yacccode_inside lexbuf; pr_yacccode lexbuf } | eof { leave_math () } (*s That function pretty-prints the Caml code anywhere else. *) and pr_camlcode_inside = parse | '\n' { end_line () } | space+ { output_char '~'; pr_camlcode_inside lexbuf } | character { output_verbatim (lexeme lexbuf); pr_camlcode_inside lexbuf } | "'" identifier { let id = lexeme lexbuf in output_type_variable (String.sub id 1 (pred (String.length id))); pr_camlcode_inside lexbuf } | "(*r" { output_hfill (); output_bc (); comment_depth := 1; pr_comment lexbuf; pr_camlcode_inside lexbuf } | "(*" { output_bc (); comment_depth := 1; pr_comment lexbuf; pr_camlcode_inside lexbuf } | '"' { output_bs (); pr_code_string lexbuf; pr_camlcode_inside lexbuf } | symbol_token { output_symbol (lexeme lexbuf); pr_camlcode_inside lexbuf } | (identifier '.')* identifier { output_ident (lexeme lexbuf); pr_camlcode_inside lexbuf } | eof { () (*i end_line() supprime par dorland-muller i*) } | decimal_literal | hex_literal | oct_literal | bin_literal { output_integer (lexeme lexbuf); pr_camlcode_inside lexbuf } | float_literal { output_float (lexeme lexbuf); pr_camlcode_inside lexbuf } | _ { output_escaped_char (first_char lexbuf); pr_camlcode_inside lexbuf } (*s That function pretty-prints the Lex code anywhere else. *) and pr_lexcode_inside = parse | '_' { output_string "\\ocwlexwc"; pr_lexcode_inside lexbuf } | '*' { enter_math (); output_string "^\\star{}"; pr_lexcode_inside lexbuf } | '+' { enter_math (); output_string "^{\\scriptscriptstyle +}"; pr_lexcode_inside lexbuf } | '-' { leave_math (); output_string "--"; pr_lexcode_inside lexbuf } | '\n' { end_line () } | space+ { output_char '~'; pr_lexcode_inside lexbuf } | character { output_verbatim (lexeme lexbuf); pr_lexcode_inside lexbuf } | "(*" { output_bc (); comment_depth := 1; pr_comment lexbuf; pr_lexcode_inside lexbuf } | "(*r" { output_hfill (); output_bc (); comment_depth := 1; pr_comment lexbuf; pr_lexcode_inside lexbuf } | '"' { output_bs (); pr_code_string lexbuf; pr_lexcode_inside lexbuf } | identifier { output_lex_ident (lexeme lexbuf); pr_lexcode_inside lexbuf } | eof { () } | _ { output_escaped_char (first_char lexbuf); pr_lexcode_inside lexbuf } (*s That function pretty-prints the Yacc code anywhere else. *) and pr_yacccode_inside = parse | "%%" { incr yaccdoublepercentcounter; output_string (if !yaccdoublepercentcounter = 1 then "\\ocwyaccrules" else "\\ocwyacctrailer"); pr_yacccode_inside lexbuf } | "%{" { output_string "\\ocwyaccopercentbrace"; pr_yacccode_inside lexbuf } | "%}" { output_string "\\ocwyacccpercentbrace"; pr_yacccode_inside lexbuf } | ":" { output_string "\\ocwyacccolon"; pr_yacccode_inside lexbuf } | ";" { output_string "\\ocwyaccendrule"; pr_yacccode_inside lexbuf } | "|" { output_string "\\ocwyaccpipe"; pr_yacccode_inside lexbuf } | '\n' { end_line (); } | space+ { output_char '~'; pr_yacccode_inside lexbuf } | "/*r" { output_hfill (); output_byc (); pr_yacc_comment lexbuf; pr_yacccode_inside lexbuf } | "/*" { output_byc (); pr_yacc_comment lexbuf; pr_yacccode_inside lexbuf } | '%'? identifier { output_yacc_ident (lexeme lexbuf); pr_yacccode_inside lexbuf } | _ { output_escaped_char (first_char lexbuf); pr_yacccode_inside lexbuf } | eof { () } (*s Comments. *) and pr_comment = parse | "(*" { output_bc (); incr comment_depth; pr_comment lexbuf } | "*)" { output_ec (); decr comment_depth; if !comment_depth > 0 then pr_comment lexbuf } (*i utile ????? | '\n' space* '*' ' ' { output_string "\n "; pr_comment lexbuf } i*) | '"' { output_bs (); pr_code_string lexbuf; pr_comment lexbuf; } | '[' { if !user_math_mode then output_char '[' else begin bracket_depth := 1; begin_dcode (); escaped_code lexbuf; end_dcode () end; pr_comment lexbuf } | eof { () } | "\\$" { output_string (lexeme lexbuf); pr_comment lexbuf } | '$' { user_math(); pr_comment lexbuf } | _ { output_char (first_char lexbuf); pr_comment lexbuf } (* The [C_like_comments] are not inbricable *) and pr_yacc_comment = parse | "*/" { output_eyc (); } | '\n' space* '*' ' ' { output_string "\n "; pr_yacc_comment lexbuf } | '[' { if !user_math_mode then output_char '[' else begin bracket_depth := 1; begin_dcode (); escaped_code lexbuf; end_dcode () end; pr_yacc_comment lexbuf } | eof { () } | "\\$" { output_string (lexeme lexbuf); pr_yacc_comment lexbuf } | '$' { user_math(); pr_yacc_comment lexbuf } | _ { output_char (first_char lexbuf); pr_yacc_comment lexbuf } (*s Strings in code. *) and pr_code_string = parse | '"' { output_es () } | '\n' { end_line_string (); pr_code_string lexbuf } | ' ' { output_vspace (); pr_code_string lexbuf } | '\\' ['"' 't' 'b' 'r'] { output_escaped_char '\\'; output_char (lexeme_char lexbuf 1); pr_code_string lexbuf } | '\\' '\n' { output_escaped_char '\\'; end_line_string (); pr_code_string lexbuf } | '\\' '\\' { output_escaped_char '\\'; output_escaped_char '\\'; pr_code_string lexbuf } | eof { () } | '-' { output_ascii_char 45; pr_code_string lexbuf } | _ { output_escaped_char (first_char lexbuf); pr_code_string lexbuf } (*s Escaped code. *) and escaped_code = parse | '[' { output_char '['; incr bracket_depth; escaped_code lexbuf } | ']' { decr bracket_depth; if !bracket_depth > 0 then begin output_char ']'; escaped_code lexbuf end else if not !user_math_mode then leave_math () } | '"' { output_bs (); pr_code_string lexbuf; escaped_code lexbuf } | space+ { output_char '~'; escaped_code lexbuf } | character { output_verbatim (lexeme lexbuf); escaped_code lexbuf } | "'" identifier { let id = lexeme lexbuf in output_type_variable (String.sub id 1 (pred (String.length id))); escaped_code lexbuf } | symbol_token { output_symbol (lexeme lexbuf); escaped_code lexbuf } | identifier { output_ident (lexeme lexbuf); escaped_code lexbuf } | eof { if not !user_math_mode then leave_math () } | decimal_literal | hex_literal | oct_literal | bin_literal { output_integer (lexeme lexbuf); escaped_code lexbuf } | float_literal { output_float (lexeme lexbuf); escaped_code lexbuf } | _ { output_escaped_char (first_char lexbuf); escaped_code lexbuf } (*s Documentation. It is output `as is', except for quotations. *) and pr_doc = parse | '[' { if !user_math_mode then output_char '[' else begin bracket_depth := 1; begin_dcode (); escaped_code lexbuf; end_dcode () end; pr_doc lexbuf } | "\\$" { output_string (lexeme lexbuf); pr_doc lexbuf } | '$' { user_math(); pr_doc lexbuf } | "\\verb" _ { verb_delim := lexeme_char lexbuf 5; output_string (lexeme lexbuf); pr_verb lexbuf; pr_doc lexbuf } | "\\begin{verbatim}" { output_string (lexeme lexbuf); pr_verbatim lexbuf; pr_doc lexbuf } | eof { () } | _ { output_char (first_char lexbuf); pr_doc lexbuf } and pr_doc_title = parse | '[' { if !user_math_mode then output_char '[' else begin bracket_depth := 1; begin_dcode (); escaped_code lexbuf; end_dcode () end; pr_doc_title lexbuf } | '.' { output_string ".}\\quad{}" } | eof { () } | _ { output_char (first_char lexbuf); pr_doc_title lexbuf } and pr_verb = parse | eof { () } | _ { let c = lexeme_char lexbuf 0 in output_char c; if c == !verb_delim then () else pr_verb lexbuf } and pr_verbatim = parse | "\\end{verbatim}" { output_string (lexeme lexbuf) } | eof { () } | _ { output_char (lexeme_char lexbuf 0); pr_verbatim lexbuf } { (*s pretty-printing subparagraphs *) let pretty_print_caml_subpar s = pr_camlcode (Lexing.from_string s) let pretty_print_lex_subpar s = pr_lexcode (Lexing.from_string s) let pretty_print_yacc_subpar s = pr_yacccode (Lexing.from_string s) (*s Then we can introduce two functions [pretty_print_code] and [pretty_print_doc], which pretty-print respectively code and documentation parts. *) let pretty_print_code is_last_paragraph s = pr_camlcode (Lexing.from_string s); end_code_paragraph is_last_paragraph let pretty_print_doc is_first_paragraph (big,n,s) = begin_doc_paragraph is_first_paragraph n; let lb = Lexing.from_string s in if big then begin output_string "\\textbf{"; pr_doc_title lb end; pr_doc lb; end_doc_paragraph () }