
open Location
open Util
open Errors

module IM = Data.IntMap
type 'a intmap = 'a Data.intmap

type key = int
type namevar = {default : string; prefix : string; key : key}

type realtoken = {
    	body : string ; mutable white : string;
	mutable twin : realtoken option;
	mutable printed : bool}

type whitereq = Space | NewLine | Break | Indent | UnIndent

type token = 
	| TReal of string * key 
	| TWhite of whitereq

type condition =
	| Once of key		(* only one element can use it *)
	| Set of key		(* set var if used *)
	| Need of key		(* need thing to be set *)
	| Always		(* always allowed *)
type choice = key * condition * element
and element =
	| Choice of key * choice list
	| Seq of element * element
	| Token of string * key
	| Unknown of namevar
	| White of whitereq
	| Empty

type pretty = element * element

let next_key = ref 0
let new_key () = next_key := !next_key + 1; !next_key
	
let justreal = option_map (function TReal (s,k) -> Some (s,k) | _ -> None)	
let justrealp (j,c) = justreal j,justreal c 


(* ------------------------------------------------------------
 * Errors
 * ------------------------------------------------------------ *)

let error_body_mismatch span expected got =
	error span ("malformed input file: expected " ^ 
		expected ^ " but given " ^ got) 	
let warn_comment_lost span = warning span "comment will be lost"
let warn_whitespace_lost span = 
	warning span "whitespace will not be preserved"
let warn_tokentwice s = intwarn ("token is used twice - "^s)
let warn_jklnotwin s = intwarn ("jekyll token is untwinned - "^s)


(* ------------------------------------------------------------
 * Base Combinators
 * ------------------------------------------------------------ *)
	
let (<+>) (j1,c1) (j2,c2) = Seq (j1,j2),Seq (c1,c2)
let empty = Empty,Empty
let twin j c = let n = new_key () in Token (j,n),Token(c,n)
let onlyc_block (j,c) = Empty,c
let onlyjkl_block (j,c) = j,Empty
let onlyc_var r = Empty,Unknown r
let onlyc s = onlyc_block (twin s s) 
let onlyjkl s = onlyjkl_block (twin s s)
let extract_c (j,c) = Empty,c
let extract_jkl (j,c) = j,Empty


(* ------------------------------------------------------------
 * Untwinned Whitespace Combinators
 * ------------------------------------------------------------ *)

let single x = White x,White x	
let space = single Space
let newline = single NewLine
let break = single Break
let indent p = single Indent <+> newline <+> p <+> newline <+> 
				single UnIndent


(* ------------------------------------------------------------
 * Derived Combinators
 * ------------------------------------------------------------ *)

let str s = twin s s
let int i = str (string_of_int i)
let (<++>) x y = x <+> space <+> y
let parens x = str "(" <+> x <+> str ")"
let braceblock x = str "{" <+> indent x <+> str "}"

let rec pprint_list_sep sep f l = match l with
	| [] -> empty
	| [x] -> f x
	| x::xs -> f x <+> sep () <++> pprint_list_sep sep f xs

let pprint_list (f : 'a -> pretty) l = 
	pprint_list_sep (fun () -> str ",") f l
let concat_pretty l = List.fold_left (<+>) empty l
let pprint_seq f l = concat_pretty (List.map f l)

let jkl_twin_l p l = p <+> pprint_seq str l
let jkl_twin_r l p = pprint_seq str l <+> p	
let onlyc_prefix l shared = match l,shared with
	| x::xs, (Token (sj,kj),Token (sc,kc)) ->
		(Token (sj,kj),Token (x,kc)) <+> 
		onlyc_block (pprint_seq str (xs@[sc]))
	| _ -> intfatal "onlyc_prefix must be applied to a single token"
	
	
(* ------------------------------------------------------------
 * DOS vs Unix line endings
 * ------------------------------------------------------------ *)

let dos_endings = ref false
let line_break () = if !dos_endings then "\r\n" else "\n"


(* ------------------------------------------------------------
 * Resolve Non-Determinism
 * ------------------------------------------------------------ *)

type ndenv = {prevtokens : lextoken array; currentpos : int; 
		bindings : bool Data.intmap;
		namebinds : string Data.intmap;
		linestarts : int list;
		choices : int Data.intmap}

let advance env = {env with currentpos = env.currentpos + 1}
let must_match env = lt_what (Array.get env.prevtokens env.currentpos)
let setvar env var b = {env with bindings = IM.add var b env.bindings}
let setpos env pos = {env with currentpos = pos}

let boundname env i = IM.mem i env.namebinds
let getname env i = IM.find i env.namebinds
let setname env i s = {env with namebinds = IM.add i s env.namebinds}
let setchoice env i n = {env with choices = IM.add i n env.choices}
let getchoice env i = IM.find i env.choices

let choice_allowed env (_,choice,_) = match choice with
	| Once var -> not (IM.mem var env.bindings)
	| Set _ | Always -> true
	| Need var -> IM.mem var env.bindings && IM.find var env.bindings
	
let condition_apply env choice = match choice with
	| Once var | Set var -> setvar env var true
	| Need _ | Always -> env
	
let rec select_furthest results = match results with
	| [x] -> x 
	| [] -> intfatal "no possible choices"
	| (env,el)::xs ->
		let (env2,el2) = select_furthest xs in
		if env.currentpos > env2.currentpos then env,el
			else env2,el2
		
let rec trymatch env element = match element with
	| Empty -> env,Empty
	| Seq (a,b) -> (match trymatch env a with
		| env,Empty -> trymatch env b 
		| env,x -> env,Seq(x,b))
	| Choice (key,choices) ->
		let allowed = List.filter (choice_allowed env) choices in
		let results = List.map (try_choice env key) allowed in
		select_furthest results	
	| Token (s,_) -> 
		if s = must_match env then advance env,Empty
		else env,element
	| Unknown v ->
		if boundname env v.key then 
			trymatch env (Token (getname env v.key,v.key))
		else if is_prefix v.prefix (must_match env) then
			advance (setname env v.key (must_match env)),Empty
		else env,element
	| White w -> env,Empty

and try_choice env key (n,condition,element) =
	let env = condition_apply env condition in
	let env = setchoice env key n in
	trymatch env element 		

let rec skip env element = match element with
	| Seq (a,b) -> let env, a = skip env a in env, Seq(a,b)
	| Token (s,_) -> env,Empty
	| Choice (key,choices) ->
		let allowed = List.filter (choice_allowed env) choices in
		let n,cond,e = List.hd allowed in
		let env = setchoice env key n in
		condition_apply env cond,e
	| Unknown v ->
		let env = setname env v.key v.default in
		env,Empty
	| _ -> env,element
		
let rec resolve_nondet env element =
	let lineenvs = List.map (setpos env) env.linestarts in
	let results = List.map (fun e -> trymatch e element) lineenvs in
	let oldpos = env.currentpos in
	match select_furthest results with
	| env,Empty -> env
	| env,e when env.currentpos = oldpos ->
		let env,e = skip env e in
		resolve_nondet env e
	| env,e -> resolve_nondet env e


(* ------------------------------------------------------------
 * Flatten a non-det tree to a list of tokens, given the choices made
 * ------------------------------------------------------------ *)

let rec flatten_nondet_acc env element acc = match element with
	| Choice (key,choices) ->
		let k,_,e = List.nth choices(getchoice env key) in
		flatten_nondet_acc env e acc
	| Seq (a,b) -> flatten_nondet_acc env a (flatten_nondet_acc env b acc)
	| Token (s,k) -> TReal (s,k) :: acc
	| White w -> TWhite w :: acc
	| Unknown namevar -> TReal (getname env namevar.key,namevar.key) :: acc
	| Empty -> acc
 
let flatten_nondet env element = flatten_nondet_acc env element []
		

(* ------------------------------------------------------------
 * Indentation
 * ------------------------------------------------------------ *)

type whitenev = {white : string intmap}
let getwhite env key = IM.find key env.white

type tokenindent = 
	| StartLine of int * int * string
	| SameLine of int

let token_indent env (s,k) = 
	let white = getwhite env k in
	if String.contains white '\n' then
		let linestart = String.rindex white '\n' in
		let pad = String.sub white (linestart + 1)
				(String.length white - linestart - 1) in
		StartLine (
			linestart,
			String.length pad + String.length s,
			strip_comments pad)
	else
		SameLine (String.length white + String.length s)
	
let rec next_indent env tokens = match tokens with
	| TReal (t,k)::rest -> 
		(match token_indent env (t,k) with
			| SameLine i -> next_indent rest
			| StartLine(_,_,i) -> i)
	| TWhite (Indent | UnIndent)::_ -> "" 
	| _::rest -> next_indent env rest
	| [] -> ""

let column_after_token env oldcolumn t = match token_indent env t with
	| SameLine i -> oldcolumn + i
	| StartLine(_,i,_) -> i

let new_indent env t indent = match token_indent env t with
	| SameLine i -> indent
	| StartLine (_,_,i) -> i


(* ------------------------------------------------------------
 * Compute whitespace for untwinned tokens
 * ------------------------------------------------------------ *)

type env = {mutable space : bool; 
		   mutable newline : bool;
		   mutable previndent : string; 
		   preserve_whitespace : bool}

let create_whitespace env future = 
	if env.newline then line_break () ^ 
		string_longest env.previndent (next_indent future)
	else if env.space then " "
	else ""
	
let fill_in_whitespace env token future = match token.twin with
	| Some _ when env.preserve_whitespace -> ()
	| _ -> 	token.white <- create_whitespace env future	
	
let compute_token_whitespace tokens env token future = match token with
	| TWhite Space -> env.space <- true
	| TWhite NewLine -> env.newline <- true
	| TWhite Break -> ()
	| TWhite Indent -> env.previndent <- env.previndent ^ "    "
	| TWhite UnIndent -> 
		if is_prefix "    " env.previndent then
			env.previndent <- strip_prefix "    " env.previndent
	| TReal t ->
		env.previndent <- new_indent t env.previndent;
		fill_in_whitespace env t future;
		env.space <- false; env.newline <- false;
		if t.printed then warn_tokentwice (bodystr t);
			t.printed <- true

let basicenv preserve = 
	{newline = false; space = false; previndent = ""; 
	preserve_whitespace = preserve}

let compute_whitespace preserve tokens = 
	tailiter (compute_token_whitespace tokens (basicenv preserve)) tokens


(* ------------------------------------------------------------
 * Find whitespace from lines of previous version
 * ------------------------------------------------------------ *)

let line_matches new_tokens old_tokens = 
	List.length new_tokens >= List.length old_tokens &&
	List.for_all2 (fun n o -> lt_what o = bodystr n || not (body_known n)) 
		(list_n_head (List.length old_tokens) new_tokens) old_tokens
	
let rec find_first_match new_tokens future = match future with
	| [] -> None
	| x::xs when line_matches new_tokens x -> Some (x,xs)
	| x::xs -> find_first_match new_tokens xs
		
let diffmatch_token old_token new_token =
	fix_body new_token (lt_what old_token); 
	if new_token.twin = None then new_token.white <- (lt_white old_token)
		
let rec diff_match new_tokens old_lines = 
	match find_first_match new_tokens old_lines,new_tokens with
	| _,[] -> ()
	| None,t::ts -> diff_match ts old_lines
	| Some (old_tokens,remaining_lines),_ ->
		let match_tokens,other_tokens = 
			list_divide (List.length old_tokens) new_tokens in
		List.iter2 diffmatch_token old_tokens match_tokens;
		diff_match other_tokens remaining_lines


(* ------------------------------------------------------------
 * Flow whitespace from lexed tokens to output tokens
 * ------------------------------------------------------------ *)

let token_match intoken t_gen = 
	if (lt_what intoken <> bodystr t_gen) && body_known t_gen then 
		error_body_mismatch (lt_span intoken) (bodystr t_gen) (lt_what intoken);
	fix_body t_gen (lt_what intoken);
	match t_gen.twin with
		| Some twin -> 
			t_gen.white <- lt_white intoken;
			twin.white <- lt_white intoken 
		| None -> ()

let match_tokens = List.iter2 token_match

let whitecheck_token intoken t_gen =
	if lt_white intoken <> t_gen.white && !Cmdargs.warn_white then
		if contains_comment (lt_what intoken) then
			warn_comment_lost (lt_span intoken)
		else
			warn_whitespace_lost (lt_span intoken)

let check_whitespace = List.iter2 whitecheck_token
		
let translate lexedtokens fromtokens totokens oldlines =
	let realtokens = justreal fromtokens in
	match_tokens lexedtokens realtokens;
	compute_whitespace true fromtokens;
	check_whitespace lexedtokens realtokens;
	compute_whitespace true totokens;
	diff_match (justreal totokens) oldlines
	

(* ------------------------------------------------------------
 * Output
 * ------------------------------------------------------------ *)
	
let token_string t = t.white ^ bodystr t
			
let token_output out token = output_string out (token_string token)	
	
let output_pretty out (j,c) = 
	compute_whitespace false j;
	List.iter (token_output out) (justreal j)
	
let pretty_to_string (j,c) = 
	compute_whitespace false j;
	String.concat "" (List.map token_string (justreal j))

let format_pretty fmt pretty = 
	Format.pp_print_string fmt (pretty_to_string pretty)

let output_preserving_whitespace out lextokens (j,c) oldlines =
	let fromtokens = if !Cmdargs.jekyll_in then j else c in
	let totokens = if !Cmdargs.jekyll_out then j else c in
	translate lextokens fromtokens totokens oldlines;
	List.iter (token_output out) (justreal totokens)

