408 lines
8.6 KiB
OCaml
408 lines
8.6 KiB
OCaml
type severity = Style | Warning | Error
|
|
|
|
type parse_error = {
|
|
severity: severity;
|
|
pos: int;
|
|
message: string;
|
|
expected: string list option;
|
|
found: string option;
|
|
other_case: parse_error option;
|
|
}
|
|
|
|
type src_info = {
|
|
source: string;
|
|
path: string;
|
|
}
|
|
|
|
type cursor = { source: string; pos: int }
|
|
|
|
type parse_result = {
|
|
cursor: cursor;
|
|
errors: parse_error list
|
|
}
|
|
|
|
type 'a parsr = cursor -> 'a * parse_result
|
|
|
|
type off2lres = {
|
|
line: int; (** 0-indexed *)
|
|
col: int; (** 0-indexed *)
|
|
lstr: string;
|
|
};;
|
|
|
|
exception Parser_Cant_Recover of parse_result
|
|
exception Parser_No_Progress of cursor
|
|
|
|
let num_occ s f =
|
|
let num = ref 0 in
|
|
s |> Seq.iter begin fun x ->
|
|
if f x then
|
|
num := !num + 1
|
|
end;
|
|
!num
|
|
;;
|
|
|
|
let rec off2l (s:string) (p:int) =
|
|
let e = match String.index_from_opt s p '\n' with
|
|
Some(lb) -> lb
|
|
| None -> String.length s
|
|
in
|
|
match String.rindex_from_opt s p '\n' with
|
|
Some(lb) -> {
|
|
line = num_occ (Seq.take lb (String.to_seq s)) (fun x -> x = '\n');
|
|
col = p - lb;
|
|
lstr = String.sub s (lb + 1) (e - lb - 1) }
|
|
| None -> {
|
|
line = 0;
|
|
col = p;
|
|
lstr = String.sub s 0 e }
|
|
;;
|
|
|
|
let str_before s c =
|
|
match String.index_opt s c with
|
|
Some(x) -> String.sub s 0 (x - 1)
|
|
| None -> s
|
|
;;
|
|
|
|
let limitw len s =
|
|
let s = str_before s '\n' in
|
|
if String.length s > len then
|
|
(String.sub s 0 (len - 3)) ^ "..."
|
|
else s
|
|
;;
|
|
|
|
let rec pp_err (s:src_info) out (e:parse_error) =
|
|
let open Format in
|
|
fprintf out "[%s] @[" begin match e.severity with
|
|
Style -> "style"
|
|
| Warning -> "warn "
|
|
| Error -> "error"
|
|
end;
|
|
fprintf out "%s@]\n" e.message;
|
|
let where = off2l s.source e.pos in
|
|
fprintf out " in @[%s@]\n" s.path;
|
|
fprintf out " at %i:%i\n\n" where.line where.col;
|
|
fprintf out "%3d: %s\n" where.line where.lstr;
|
|
fprintf out " ";
|
|
for _ = 0 to where.col - 2 do
|
|
fprintf out " "
|
|
done;
|
|
fprintf out "^\n";
|
|
begin match e.expected with
|
|
Some([]) -> fprintf out "expected: nothing\nbut "
|
|
| Some(li) -> fprintf out "expected: @[%a@]\nbut "
|
|
(Format.pp_print_list Format.pp_print_text) li
|
|
| None -> fprintf out "unexpected\n"
|
|
end;
|
|
fprintf out "got '@[";
|
|
begin match e.found with
|
|
| Some(x) -> fprintf out "%s" @@ limitw 22 x
|
|
| None -> fprintf out "something else"
|
|
end;
|
|
fprintf out "@]'\n";
|
|
|
|
begin match e.other_case with
|
|
Some(c) -> begin
|
|
fprintf out "\n ^^^\n";
|
|
fprintf out " \\\ === OR tried alternative parser === \\\\\\\n";
|
|
fprintf out " \\/\n\n";
|
|
pp_err s out c
|
|
end
|
|
| None -> () end
|
|
;;
|
|
|
|
let pp_errli s out e =
|
|
let open Format in
|
|
let first = ref true in
|
|
e |> List.iter begin fun e ->
|
|
if not !first then begin
|
|
fprintf out "\n"
|
|
end;
|
|
first := false;
|
|
pp_err s out e
|
|
end
|
|
;;
|
|
|
|
let head c =
|
|
if String.length c.source > 0 then
|
|
Some(String.get c.source 0)
|
|
else
|
|
None;;
|
|
|
|
let create src = { source = src; pos = 0 };;
|
|
|
|
let expected c str =
|
|
{
|
|
severity = Error;
|
|
pos = c.pos;
|
|
message = "";
|
|
expected = Some([str]);
|
|
found = Some(c.source);
|
|
other_case = None;
|
|
};;
|
|
|
|
let seek num c =
|
|
{ source =
|
|
String.sub c.source num ((String.length c.source) - num);
|
|
pos = c.pos + num };;
|
|
|
|
let merge a b =
|
|
{ cursor = b.cursor;
|
|
errors = a.errors @ b.errors };;
|
|
|
|
let ok c = { cursor = c; errors = [] }
|
|
let put_err err res = { res with errors = res.errors @ [err] }
|
|
let err c e = put_err e (ok c)
|
|
;;
|
|
|
|
let single_inline_white c =
|
|
match head c with
|
|
Some(' ') |
|
|
Some('\b') |
|
|
Some('\t') ->
|
|
((), ok (seek 1 c))
|
|
| _ -> raise (Parser_Cant_Recover (err c (expected c "inline whitespace")))
|
|
;;
|
|
|
|
let single_white c =
|
|
match head c with
|
|
Some(' ') |
|
|
Some('\b') |
|
|
Some('\t') |
|
|
Some('\r') |
|
|
Some('\n') ->
|
|
((), ok (seek 1 c))
|
|
| _ -> raise (Parser_Cant_Recover (err c (expected c "whitespace")))
|
|
;;
|
|
|
|
let any c =
|
|
match head c with
|
|
Some(x) -> (x, ok (seek 1 c))
|
|
| None -> raise (Parser_Cant_Recover (err c (expected c "any")))
|
|
;;
|
|
|
|
let rec repeat p c =
|
|
try
|
|
let (v, res) = p c in
|
|
if res.cursor.pos = c.pos then
|
|
raise (Parser_No_Progress c)
|
|
else
|
|
let (v2, res2) = repeat p res.cursor in
|
|
(v :: v2, merge res res2)
|
|
with Parser_Cant_Recover _ ->
|
|
([], ok c)
|
|
;;
|
|
|
|
let many p c =
|
|
let (av, ar) = p c in
|
|
let (bv, br) = repeat p (ar.cursor) in
|
|
(av :: bv, merge ar br)
|
|
;;
|
|
|
|
let rec until endp p c =
|
|
try
|
|
let (_, res) = endp c in
|
|
([], res)
|
|
with Parser_Cant_Recover _ ->
|
|
let (v, res) = p c in
|
|
if res.cursor.pos = c.pos then
|
|
raise (Parser_No_Progress c)
|
|
else
|
|
let (v2, res2) = until endp p res.cursor in
|
|
(v :: v2, merge res res2)
|
|
;;
|
|
|
|
let map fn p c =
|
|
let (v, res) = p c in
|
|
((fn v), res);;
|
|
|
|
let set v p c =
|
|
let (_, res) = p c in
|
|
(v, res);;
|
|
|
|
let opt p c =
|
|
try
|
|
map (fun x -> Some(x)) p c
|
|
with Parser_Cant_Recover _ ->
|
|
(None, ok c)
|
|
;;
|
|
|
|
let inv p c =
|
|
let fail = (try
|
|
let (_, r) = p c in
|
|
Some(r)
|
|
with Parser_Cant_Recover _ ->
|
|
None) in
|
|
|
|
let rec ouch r =
|
|
match r with
|
|
[] -> None
|
|
| head :: tail ->
|
|
let h = (match head.expected with
|
|
Some(x) -> Some( List.map (fun x -> "not " ^ x) x )
|
|
| None -> None
|
|
) in
|
|
let t = ouch tail in
|
|
match h, t with
|
|
Some(x), Some(y) -> Some(x @ y)
|
|
| Some(x), None -> Some(x)
|
|
| None, Some(y) -> Some(y)
|
|
| None, None -> None
|
|
in
|
|
|
|
match fail with
|
|
Some(r) -> raise (Parser_Cant_Recover (err c {
|
|
severity = Error;
|
|
pos = c.pos;
|
|
message = "inverted parser matched";
|
|
expected = ouch r.errors;
|
|
found = Some(c.source);
|
|
other_case = None
|
|
}))
|
|
| None -> ((), ok c)
|
|
;;
|
|
|
|
let also a b c =
|
|
let (av, ar) = a c in
|
|
let (_, br) = b c in
|
|
(av, merge br ar);;
|
|
|
|
exception Recursive_Parser_Used_Before_Created
|
|
let recursive (prod:'a->'c-> 'v * 'r) (c:cursor) =
|
|
let future = ref None in
|
|
let par = prod (fun c -> match !future with
|
|
None -> raise Recursive_Parser_Used_Before_Created
|
|
| Some(p) -> p c) in
|
|
future := Some(par);
|
|
par c
|
|
;;
|
|
|
|
let just str c =
|
|
if String.starts_with ~prefix:str c.source then
|
|
(str, ok (seek (String.length str) c))
|
|
else
|
|
raise (Parser_Cant_Recover (err c (expected c str)))
|
|
;;
|
|
|
|
let ex_end c =
|
|
if (String.length c.source) > 0 then
|
|
raise (Parser_Cant_Recover (err c (expected c "end of file")))
|
|
else
|
|
((), ok c)
|
|
;;
|
|
|
|
let chain a b c =
|
|
let ar = a c in
|
|
let br = b (snd ar).cursor in
|
|
((fst ar, fst br), merge (snd ar) (snd br))
|
|
;;
|
|
|
|
let ignore_then a b c =
|
|
let ar = a c in
|
|
let br = b (snd ar).cursor in
|
|
((fst br), merge (snd ar) (snd br))
|
|
;;
|
|
|
|
let then_ignore a b c =
|
|
let ar = a c in
|
|
let br = b (snd ar).cursor in
|
|
((fst ar), merge (snd ar) (snd br))
|
|
;;
|
|
|
|
let nd_of3 a b c =
|
|
then_ignore (ignore_then a b) c;;
|
|
|
|
let outer_of3 a b c =
|
|
chain (then_ignore a b) c;;
|
|
|
|
let ignore p c =
|
|
let (_, r) = p c in
|
|
((), r);;
|
|
|
|
let either a b c =
|
|
try a c
|
|
with Parser_Cant_Recover ar ->
|
|
try b c
|
|
with Parser_Cant_Recover br ->
|
|
raise (Parser_Cant_Recover {
|
|
cursor = ar.cursor;
|
|
errors = [{ (List.hd ar.errors) with
|
|
other_case = Some(List.hd br.errors) }]
|
|
})
|
|
;;
|
|
|
|
let match_hd label fn c =
|
|
match head c with
|
|
Some(x) when fn x ->
|
|
(x, ok (seek 1 c))
|
|
| _ -> raise (Parser_Cant_Recover (err c (expected c label)))
|
|
;;
|
|
|
|
let hd_oneof label li =
|
|
match_hd label (fun x ->
|
|
Seq.exists (fun t->t=x) li);;
|
|
|
|
let digitch =
|
|
(hd_oneof "digit"
|
|
(String.to_seq "0123456789"));;
|
|
|
|
let digit =
|
|
map (fun x -> Char.code x - Char.code '0')
|
|
digitch;;
|
|
|
|
let lower =
|
|
hd_oneof "lowercase letter"
|
|
(String.to_seq "abcdefghijklmnopqrstuvwxyz");;
|
|
|
|
let upper =
|
|
hd_oneof "uppercase letter"
|
|
(String.to_seq "ABCDEFGHIJKLMNOPQRSTUVWXYZ");;
|
|
|
|
let alpha = either lower upper;;
|
|
let alnum = either alpha digitch;;
|
|
|
|
let sign c =
|
|
match head c with
|
|
Some '+' -> ( 1, ok (seek 1 c))
|
|
| Some '-' -> (-1, ok (seek 1 c))
|
|
| _ -> ( 1, ok (seek 1 c))
|
|
;;
|
|
|
|
let p_uint =
|
|
map (fun digits ->
|
|
List.fold_right (fun x acc -> acc*10 + x) (List.rev digits) 0
|
|
) (many digit)
|
|
;;
|
|
|
|
let p_int =
|
|
chain sign p_uint
|
|
|> map (fun (sign,d) -> sign * d)
|
|
;;
|
|
|
|
let p_uflt =
|
|
let fract =
|
|
map (fun digits ->
|
|
List.fold_right
|
|
(fun x (acc,d) -> (acc +. (Float.of_int x) /. d), d *. 10.0 )
|
|
(List.rev digits)
|
|
(0.0,10.0)
|
|
) (many digit)
|
|
in outer_of3 p_int (just ".") fract
|
|
|> map (fun (a,(b,_)) -> (Float.of_int a) +. b)
|
|
;;
|
|
|
|
let p_flt =
|
|
chain sign p_uflt
|
|
|> map (fun (sign,d) -> Float.of_int sign *. d)
|
|
;;
|
|
|
|
let doparse f (src:src_info) parsr =
|
|
let (v,r) = try
|
|
let (v,r) = parsr (create src.source) in
|
|
(Some(v), r)
|
|
with Parser_Cant_Recover r ->
|
|
(None, r)
|
|
in
|
|
pp_errli src f r.errors;
|
|
v
|
|
;;
|