
open Location
open Util
open Errors

module IM = Data.IntMap
module IH = Data.IntHash
type 'a intmap = 'a IM.t
type 'a inthash = 'a IH.t

type key = int
type namevar = {default : string; prefix : string; key : key; fixed : bool}

type whitereq = Space | ForceSpace | NewLine | Break | Indent | UnIndent

type token = 
	| TReal of string * key 
	| TWhite of whitereq

and element =
	| Choice of key * element list
	| Seq of element * element
	| Token of string * key
	| Unknown of key * namevar
	| White of whitereq
	| Empty

type pretty = element * element

let next_key = ref 0
let new_key () = next_key := !next_key + 1; !next_key
let new_namevar default prefix = 
	{default = default; prefix = prefix; key = new_key (); fixed = false}
let new_fixed_namevar fixval =
	{default = fixval; prefix = ""; key = new_key (); fixed = true} 
	
let justreal = option_map (function TReal (s,k) -> Some (s,k) | _ -> None)	


(* ------------------------------------------------------------
 * Errors
 * ------------------------------------------------------------ *)

let error_body_mismatch span expected got =
	error span ("malformed input file: expected " ^ 
		expected ^ " but given " ^ got) 	
let warn_comment_lost span = warning span "comment will be lost"
let warn_whitespace_lost span = 
	warning span "whitespace will not be preserved"
let warn_tokentwice s = intwarn ("token is used twice - "^s)
let warn_jklnotwin s = intwarn ("jekyll token is untwinned - "^s)


(* ------------------------------------------------------------
 * Base Combinators
 * ------------------------------------------------------------ *)
	
let (<+>) (j1,c1) (j2,c2) = Seq (j1,j2),Seq (c1,c2)
let empty = Empty,Empty
let twin j c = let n = new_key () in Token (j,n),Token(c,n)
let onlyc_block (j,c) = Empty,c
let onlyjkl_block (j,c) = j,Empty
let onlyc_var v = Empty,Unknown (new_key (),v)
let onlyc s = onlyc_block (twin s s) 
let onlyjkl s = onlyjkl_block (twin s s)
let onlyjkl_var v = Unknown (new_key (),v), Empty
let extract_c (j,c) = Empty,c
let extract_jkl (j,c) = j,Empty


let decodejkl_var v s = 
	if !Cmdargs.j2c then twin s s else
	let k = new_key () in Unknown (k,v),Token(s,k)


(* ------------------------------------------------------------
 * Non Determinism
 * ------------------------------------------------------------ *)

let jkl_choice key opts (j,c as p) = if !Cmdargs.j2c then p else
	let els = List.map fst opts in
	Choice (key,els),c


(* ------------------------------------------------------------
 * Untwinned Whitespace Combinators
 * ------------------------------------------------------------ *)

let single x = White x,White x	
let space = single Space
let forcespace = single ForceSpace
let newline = single NewLine
let break = single Break
let indent p = single Indent <+> newline <+> p <+> newline <+> 
				single UnIndent


(* ------------------------------------------------------------
 * Derived Combinators
 * ------------------------------------------------------------ *)

let str s = twin s s
let int i = str (string_of_int i)
let (<++>) x y = x <+> space <+> y
let parens x = str "(" <+> x <+> str ")"
let braceblock x = str "{" <+> indent x <+> str "}"

let rec pprint_list_sep sep f l = match l with
	| [] -> empty
	| [x] -> f x
	| x::xs -> f x <+> sep () <++> pprint_list_sep sep f xs

let pprint_list (f : 'a -> pretty) l = 
	pprint_list_sep (fun () -> str ",") f l
let concat_pretty l = List.fold_left (<+>) empty l
let pprint_seq f l = concat_pretty (List.map f l)

let jkl_twin_l p l = p <+> pprint_seq str l
let jkl_twin_r l p = pprint_seq str l <+> p	


let onlyc_prefix l shared = onlyc_block (pprint_seq str l) <+> shared

	
(* ------------------------------------------------------------
 * DOS vs Unix line endings
 * ------------------------------------------------------------ *)

let dos_endings = ref false
let line_break () = if !dos_endings then "\r\n" else "\n"


(* ------------------------------------------------------------
 * Resolve Non-Determinism
 * ------------------------------------------------------------ *)

type ndenv = {prevtokens : lextoken array; currentpos : int; 
		startpos : int;
		startline : int;
		bindings : bool Data.intmap;
		namebinds : string Data.intmap;
		whitespace : string Data.intmap;
		linestarts : int list;
		choices : int Data.intmap}

let rec find_linestarts oldlines acc = match oldlines with
	| x::xs when List.length x = 0 -> find_linestarts xs acc
	| x::xs -> acc :: find_linestarts xs (acc + List.length x)
	| [] -> []
		
let new_ndenv oldlines = 
	{prevtokens = Array.of_list (List.concat oldlines);
	 startpos = 0;
	 currentpos = 0;
	 startline = 0;
	 bindings = IM.empty;
	 namebinds = IM.empty;
	 choices = IM.empty;
	 whitespace = IM.empty;
	 linestarts = find_linestarts oldlines 0}

let with_lines lines env = {env with
	prevtokens = Array.of_list (List.concat lines);
	currentpos = 0;
	startpos = 0;
	linestarts = find_linestarts lines 0}


let gettoken env = 
		if env.currentpos < Array.length env.prevtokens then
			Array.get env.prevtokens env.currentpos
		else empty_token
let advance env key = {env with 
		currentpos = env.currentpos + 1;
		whitespace = IM.add key (lt_white (gettoken env)) env.whitespace}
let must_match env = lt_what (gettoken env)
let setvar env var b = {env with bindings = IM.add var b env.bindings}
let setpos env line pos =
	{env with currentpos = pos; startpos = pos; startline = line}

let boundname env i = IM.mem i env.namebinds
let getname env i = IM.find i env.namebinds
let setname env i s = {env with namebinds = IM.add i s env.namebinds}
let setchoice env i n = {env with choices = IM.add i n env.choices}
let isbound env i = IM.mem i env.choices
let getchoice env i = 
	if IM.mem i env.choices then IM.find i env.choices
	else (intwarn "nondet choice not resolved"; 0)
let getlength env = env.currentpos - env.startpos
				
let rec select_longest results = match results with
	| [x] -> x 
	| [] -> intfatal "no possible choices"
	| (env,el)::xs ->
		let (env2,el2) = select_longest xs in
		if getlength env >= getlength env2 then env,el
			else env2,el2

		
let rec trymatch env element = match element with
	| Empty -> env,Empty
	| Seq (a,b) -> (match trymatch env a with
		| env,Empty -> trymatch env b 
		| env,x -> env,Seq(x,b))
	| Choice (key,choices) when isbound env key ->
		trymatch env (List.nth choices (getchoice env key))
	| Choice (key,choices) ->
		let results = count_map (try_choice env key) choices in
		select_longest results	
	| Token (s,key) -> 
		if s = must_match env then 
			advance env key,Empty
		else env,element
	| Unknown (k,v) ->
		let s = must_match env in
		if v.fixed then
			let env = setname env v.key v.default in
			trymatch env (Token (v.default,k))
		else if boundname env v.key then 
			trymatch env (Token (getname env v.key,k))
		else if is_prefix v.prefix s || v.default = s then
			advance (setname env v.key s) k,Empty
		else env,element
	| White w -> env,Empty

and try_choice env key n element =
	let env = setchoice env key n in
	trymatch env element 		

let rec skip env element = match element with
	| Empty -> env,Empty
	| Seq (Empty,b) -> skip env b
	| Seq (a,b) -> let env, a = skip env a in env, Seq(a,b)
	| Token (s,_) -> env,Empty
	| Choice (key,choices) when isbound env key ->
		skip env (List.nth choices (getchoice env key))
	| Choice (key,choices) ->
	 	skip (setchoice env key 0) element
	| Unknown (k,v) ->
		let env = setname env v.key v.default in
		env,Empty
	| White w -> env,Empty
	
let rec skip_n n env element = match n with
	| 0 -> env,element
	| n -> let env,element = skip env element in
			skip_n (n-1) env element
	
let rec next_token element = match element with
	| Empty -> "!"
	| Seq (a,b) -> 
		(match next_token a with
			| "!" -> next_token b
			| x -> x)
	| Token (s,_) -> s
	| Choice (_,choices) -> 
			let e = List.hd choices in
			"?" ^ next_token e
	| Unknown (k,v) -> "@" ^ v.default
	| White _ -> "!"
				
let rec use_defaults env element = match skip env element with
	| env,Empty -> env
	| env,element -> use_defaults env element				
				
let rec resolve_nondet env element =
	let lineenvs = count_map (fun line -> setpos env line) env.linestarts in	
	let results = List.map (fun e -> trymatch e element) lineenvs in	
	match select_longest results with
	| env,Empty -> env
	| env2,_ when getlength env2 < 8 ->
		let next = next_token element in
		msg ("skipped unmatchable token: "^next);
		let env,e = skip_n (getlength env2 + 1) env element in
		resolve_nondet env e
	| env,e -> 
		msg ("parsed "^string_of_int (getlength env)^
			 " tokens from line "^string_of_int env.startline);
		resolve_nondet env e


(* ------------------------------------------------------------
 * Flatten a non-det tree to a list of tokens, given the choices made
 * ------------------------------------------------------------ *)

let rec flatten_nondet_acc env element acc = match element with
	| Choice (key,choices) ->
		let e = List.nth choices(getchoice env key) in
		flatten_nondet_acc env e acc
	| Seq (a,b) -> flatten_nondet_acc env a (flatten_nondet_acc env b acc)
	| Token (s,k) -> TReal (s,k) :: acc
	| White w -> TWhite w :: acc
	| Unknown (k,namevar) -> TReal (getname env namevar.key,k) :: acc
	| Empty -> acc
 
let flatten_nondet env element = flatten_nondet_acc env element []
		

(* ------------------------------------------------------------
 * Indentation
 * ------------------------------------------------------------ *)

type whitenev = {
	white : string intmap ref; 
	printed : unit inthash;
	mutable space : bool;
	mutable forcespace : bool;
	mutable newline : bool;
	mutable previndent : string
	}
let getwhite env key = 
	if IM.mem key !(env.white) then IM.find key !(env.white) else " "

type tokenindent = 
	| StartLine of int * int * string
	| SameLine of int

let token_indent env (s,k) = 
	let white = getwhite env k in
	if String.contains white '\n' then
		let linestart = String.rindex white '\n' in
		let pad = String.sub white (linestart + 1)
				(String.length white - linestart - 1) in
		StartLine (
			linestart,
			String.length pad + String.length s,
			strip_comments pad)
	else
		SameLine (String.length white + String.length s)
	
let rec next_indent env tokens = match tokens with
	| TReal (t,k)::rest -> 
		(match token_indent env (t,k) with
			| SameLine i -> next_indent env rest
			| StartLine(_,_,i) -> i)
	| TWhite (Indent | UnIndent)::_ -> "" 
	| _::rest -> next_indent env rest
	| [] -> ""

let column_after_token env oldcolumn t = match token_indent env t with
	| SameLine i -> oldcolumn + i
	| StartLine(_,i,_) -> i

let new_indent env t indent = match token_indent env t with
	| SameLine i -> indent
	| StartLine (_,_,i) -> i


(* ------------------------------------------------------------
 * Compute whitespace for untwinned tokens
 * ------------------------------------------------------------ *)

let create_whitespace env future = 
	if env.newline then line_break () ^ 
		string_longest env.previndent (next_indent env future)
	else if env.space then " "
	else ""
	
let fill_in_whitespace env key future = 
	if not (IM.mem key !(env.white)) then 
		env.white := IM.add key (create_whitespace env future) !(env.white)
	else if IM.find key !(env.white) = "" && env.forcespace then
		env.white := IM.add key " " !(env.white)
	
let compute_token_whitespace tokens env token future = match token with
	| TWhite Space -> env.space <- true
	| TWhite ForceSpace -> env.forcespace <- true
	| TWhite NewLine -> env.newline <- true
	| TWhite Break -> ()
	| TWhite Indent -> env.previndent <- env.previndent ^ "    "
	| TWhite UnIndent -> 
		if is_prefix "    " env.previndent then
			env.previndent <- strip_prefix "    " env.previndent
	| TReal (t,key) ->
		env.previndent <- new_indent env (t,key) env.previndent;
		fill_in_whitespace env key future;
		env.space <- false; env.newline <- false; env.forcespace <- false;
		if IH.mem key env.printed then warn_tokentwice t;
			IH.add key () env.printed

let basicenv ndenv = 
	{white = ref ndenv.whitespace; printed = IH.create ();
	newline = false; space = false; previndent = "";
	forcespace = false}

let compute_whitespace ndenv tokens = 
	let env = basicenv ndenv in
	tailiter (compute_token_whitespace tokens env) tokens;
	!(env.white)


(* ------------------------------------------------------------
 * Decide what tokens and whitespace to print
 * ------------------------------------------------------------ *)

let translate element from tolines fromlines = 
	let env = new_ndenv fromlines in
	msg "matching output against input file";
	let env = if fromlines <> [] && not !Cmdargs.dropsrc then
			resolve_nondet env from
		else env in
	msg "matching output against prev file";
	let env = with_lines tolines env in
	let env = 
		if tolines <> [] then 
			resolve_nondet env element 
		else use_defaults env element in
	msg "flattening";
	let totokens = flatten_nondet env element in
	msg "computing extra whitespace";
	let white = compute_whitespace env totokens in
	justreal totokens,white
	
let translate_default element = translate element Empty [] []

	
(* ------------------------------------------------------------
 * Output
 * ------------------------------------------------------------ *)
	
let token_string white (s,k) = IM.find k white ^ s
			
let token_output out white token = output_string out (token_string white token)	
	
let output_pretty out (j,c) =
	let tokens,white = translate_default j in
	List.iter (token_output out white) tokens
	
let pretty_to_string (j,c) = 
	let tokens,white = translate_default j in
	String.concat "" (List.map (token_string white) tokens)

let format_pretty fmt pretty = 
	Format.pp_print_string fmt (pretty_to_string pretty)

let output_preserving_whitespace out fromlines (j,c) tolines =
	let el_from,el_to = if !Cmdargs.j2c then j,c else c,j in
	let tokens,white = translate el_to el_from tolines fromlines in
	List.iter (token_output out white) tokens

