(* General lexing utilities. *)

(*
$File: Parsing/LexUtils.sml $
$Date: 1992/04/29 16:35:08 $
$Revision: 1.11 $
$Locker:  $
*)

(*$LexUtils: LEX_BASICS ParseSML_ BASIC_IO FLAGS CRASH LEX_UTILS*)
functor LexUtils(structure LexBasics: LEX_BASICS
		 structure Token: Topdec_TOKENS
		 structure BasicIO: BASIC_IO
		 structure Flags: FLAGS
		 structure Crash: CRASH
		): LEX_UTILS =
  struct
    open LexBasics Token

    datatype LexArgument = LEX_ARGUMENT of {sourceReader: SourceReader,
					    stringChars: string list,
					    commentDepth: int
					   }

    fun sourceReaderOf(LEX_ARGUMENT{sourceReader, ...}) = sourceReader

    type arg = LexArgument

    fun asQualId text =
      let
	fun glue(".", y :: ys) = (* "." :: *) glue(y, ys)
	  | glue(x, "." :: ys) = x :: glue(".", ys)
	  | glue(x, y :: ys) = glue(x ^ y, ys)
	  | glue(".", nil) = Crash.impossible "asQualId.glue"
	  | glue(x, nil) = [x]
      in
	glue("", explode text)
      end

    val asQualId =
      if Flags.DEBUG_LEXING then
	fn text =>
	  let
	    val result = asQualId text
	  in
	    BasicIO.println("asQualId(" ^ text ^ ") = "
			    ^ List.stringSep "[" "]" ", " (fn x => x) result
			   );
	    result
	  end
      else asQualId

    fun isQualStar id =
      case List.rev(asQualId id)
	of "*" :: _ => true
	 | _ => false			(* We can't get nil (or [_]). *)

    fun asDigit text = ord text - ord "0"

    local
      fun accumInt(xs, sign, n) =
	case xs
	  of "~" :: xs' => accumInt(xs', ~1, n)
	   | "." :: _ => (sign * n, xs)
	   | "E" :: _ => (sign * n, xs)
	   | x :: xs' => accumInt(xs', sign, n * 10 + asDigit x)
	   | nil => (sign * n, nil)

      fun accumDec(xs, mul, n) =
	case xs
	  of "E" :: _ => (n, xs)
	   | x :: xs' => accumDec(xs', mul/10.0, n + mul * real(asDigit x))
	   | nil => (n, nil)
    in
      fun asInteger text =
	#1(accumInt(explode text, 1, 0))

      fun asReal text =
	let
	  val (intPart, rest) = accumInt(explode text, 1, 0)
(***
	  val _ = BasicIO.println("[intPart=" ^ Int.string intPart ^ "]")
	  val _ = BasicIO.println("[rest=" ^ implode rest ^ "]")
 ***)

	  val (decPart, rest') =
	    case rest
	      of "." :: xs => accumDec(xs, 0.1, 0.0)
	       | _ => (0.0, rest)

(***
	  val _ = BasicIO.println("[decPart=" ^ Real.string decPart ^ "]")
	  val _ = BasicIO.println("[rest'=" ^ implode rest' ^ "]")
 ***)

	  val (expPart, _) =
	    case rest'
	      of "E" :: xs => accumInt(xs, 1, 0)
	       | _ => (0, nil)

(***
	  val _ = BasicIO.println("[expPart=" ^ Int.string expPart ^ "]")
 ***)

	  fun E(x, y) =
	    exp(ln x + y * ln 10.0)
	    handle _ => Crash.impossible("LexUtils.E("
					 ^ Real.string x ^ ", "
					 ^ Real.string y ^ ")"
					)
	in
	  if intPart = 0 then 0.0
	  else if intPart < 0 then ~(E(real(~intPart) + decPart, real expPart))
	  else E(real intPart + decPart, real expPart)
	end
    end

    fun initArg sourceReader = LEX_ARGUMENT{sourceReader=sourceReader,
					    stringChars=nil,
					    commentDepth=0
					   }

    fun clearString arg = initArg(sourceReaderOf arg)

    fun newComment arg = LEX_ARGUMENT{sourceReader=sourceReaderOf arg,
				      stringChars=nil, commentDepth=1
				     }

    fun addChars text (LEX_ARGUMENT{sourceReader, stringChars, ...}) =
      LEX_ARGUMENT{sourceReader=sourceReader,
		   stringChars=text :: stringChars, commentDepth=0
		  }

    fun addControlChar text arg =
      addChars (chr(ord(String.nth 1 text) - ord "@")) arg

    fun addAsciiChar (pos, text) arg =
      let
	val ascii =
	  case explode text
	    of ["\\", c1, c2, c3] =>
		 asDigit c1 * 100 + asDigit c2 * 10 + asDigit c3
	     | _ =>
		 Crash.impossible "addAsciiChar"
      in
	if ascii > 255 then
	  raise LexBasics.LEXICAL_ERROR(pos, "bad ASCII escape: " ^ text)
	else
	  addChars (chr ascii) arg
      end

    fun asString(LEX_ARGUMENT{stringChars, ...}) = implode(rev stringChars)

   (*keyword detection (better done here than by the lexer). *)

    fun identifier(text, p1, p2) =
      let
	fun keyword tok = (shifting("KEY(" ^ text ^ ")"); tok(p1, p2))
      in
        case text
	  of "abstype"	 => keyword ABSTYPE
	   | "and"	 => keyword AND
	   | "andalso"	 => keyword ANDALSO
	   | "as"	 => keyword AS
  (* alx#1 *)
           | "axiom"     => keyword AXIOM
  (* alx#1 *)
	   | "case"	 => keyword CASE
	   | "do"	 => keyword DO
	   | "datatype"	 => keyword DATATYPE
	   | "else"	 => keyword ELSE
	   | "end"	 => keyword END
	   | "eqtype"	 => keyword EQTYPE
	   | "exception" => keyword EXCEPTION
(* alx#1.1 *)
      | "exists" => keyword EXISTS
(* alx#1.1#end *)
	   | "fn"	 => keyword FN
(* alx#1.1 *)
      | "forall" => keyword FORALL
(* alx#1.1#end *)
	   | "fun"	 => keyword FUN
		| "functor"	 => keyword FUNCTOR
	   | "handle"	 => keyword HANDLE
	   | "if"	 => keyword IF
 (* robmar#3 *)
           | "implies"   => keyword IMPLIES
 (* mikon#last *)
	   | "iff"       => keyword IFF
	   | "in"	 => keyword IN
	   | "include"	 => keyword INCLUDE
	   | "infix"	 => keyword INFIX
	   | "infixr"	 => keyword INFIXR
	   | "let"	 => keyword LET
	   | "local"	 => keyword LOCAL
	   | "nonfix"	 => keyword NONFIX
	   | "of"	 => keyword OF
	   | "op"	 => keyword OP
	   | "open"	 => keyword OPEN
	   | "orelse"	 => keyword ORELSE
(* alx#1.1 *)
      | "proper" => keyword PROPER
(* alx#1.1#end *)
	   | "raise"	 => keyword RAISE
(* alx#1.1 *)
      | "raises" => keyword RAISES
(* alx#1.1#end *)
	   | "rec"	 => keyword REC
	   | "sharing"	 => keyword SHARING
	   | "sig"	 => keyword SIG
	   | "signature" => keyword SIGNATURE
	   | "struct"	 => keyword STRUCT
	   | "structure" => keyword STRUCTURE
(* alx#1.1 *)
      | "terminates" => keyword TERMINATES
(* alx#1.1#end *)
	   | "then"	 => keyword THEN
	   | "type"	 => keyword TYPE
	   | "val"	 => keyword VAL
	   | "with"	 => keyword WITH
	   | "withtype"	 => keyword WITHTYPE
	   | "while"	 => keyword WHILE

	   | ":"	 => keyword COLON
	   | "|"	 => keyword BAR
	   | "="	 => keyword EQUALS
	   | "=>"	 => keyword DARROW
(* alx#1.1 *)
	   | "=="	 => keyword EQUALEQUAL
      | "=/="	 => keyword EQUALSLASHEQUAL
(* alx#1.1#end *)
	   | "->"	 => keyword ARROW
	   | "#"	 => keyword HASH
	   | "*"	 => keyword STAR
					(* Not actually reserved, but ... *)
(* alx#1.1 *)
      | "?" => keyword QUESTIONMARK
(* alx#1.1#end *)

	   | _		 => (shifting("ID(" ^ text ^ ")"); ID(text, p1, p2))
      end

    fun incComment(LEX_ARGUMENT{sourceReader, commentDepth, ...}) =
      LEX_ARGUMENT{sourceReader=sourceReader,
		   stringChars=nil, commentDepth=commentDepth+1
		  }

    fun decComment(LEX_ARGUMENT{sourceReader, commentDepth, ...}) =
      (commentDepth-1,
       LEX_ARGUMENT{sourceReader=sourceReader,
		    stringChars=nil, commentDepth=commentDepth-1
		   }
      )
  end;
