(* $Id: unescape.ml,v 1.1.1.1 2003/08/10 20:12:28 yori Exp $ *) (* Copyright 2003 Yamagata Yoriyuki *) open Camomile exception Error of string let () = Get_enc.setlocale "" let enc = let enc = ref None in Arg.parse ["--encoding", Arg.String (fun s -> enc := Some s), "Encoding"] (fun _ -> raise (Arg.Bad "Too many arguments")) "Usage: unescape [--encoding enc] < input > output"; let s = match !enc with Some s -> s | None -> Get_enc.get_enc () in CharEncoding.of_name s class ['a] unget (c : 'a obj_input_channel) = let q = Queue.create () in object method get = try Queue.take q with Queue.Empty -> c#get method unget x : unit = Queue.add x q method close : unit = Queue.clear q; c#close end let src = new unget (new CharEncoding.in_channel enc stdin) let dst = new CharEncoding.out_channel enc stdout let char_u = Char.code 'u' let char_U = Char.code 'U' let char_0 = Char.code '0' let char_9 = Char.code '9' let char_a = Char.code 'a' let char_f = Char.code 'f' let char_A = Char.code 'A' let char_F = Char.code 'F' let char_bs = Char.code '\\' let count = ref 0 let rec read_escape m c = if c = 0 then dst#put (uchar_of_int m) else begin incr count; let n = int_of_uchar src#get in if n >= char_0 && n <= char_9 then read_escape (m lsl 4 lor (n - char_0)) (c - 1) else if n >= char_a && n <= char_f then read_escape (m lsl 4 lor (n - char_a + 10)) (c - 1) else if n >= char_A && n <= char_F then read_escape (m lsl 4 lor (n - char_A + 10)) (c - 1) else begin dst#put (uchar_of_int m); src#unget (uchar_of_int n) end end let () = try while true do incr count; let u = src#get in let n = int_of_uchar u in if n = char_bs then begin incr count; let u = src#get in let n = int_of_uchar u in if n = char_bs then dst#put (uchar_of_int char_bs) else if n = char_u then read_escape 0 4 else if n = char_U then read_escape 0 8 else raise (Error "Broken escape notation") end else dst#put u done; assert false with Error s -> Printf.eprintf "Error \"%s\" occurs in the location %d\n" s !count; exit 1 | CharEncoding.Malformed_code -> Printf.eprintf "Malformed code in the location %d\n" !count; exit 1 | CharEncoding.Out_of_range -> Printf.eprintf "Out of range character in the location %d\n" !count; exit 1 | End_of_file -> dst#close; exit 0