0 | module Text.TOML.Lexer
2 | import Data.Time.Time
5 | import Text.Parse.Manual
6 | import Text.Time.Lexer
7 | import Text.TOML.Types
15 | fromString : String -> TomlToken
22 | isKeyChar : Char -> Bool
23 | isKeyChar '-' = True
24 | isKeyChar '_' = True
25 | isKeyChar c = isAlphaNum c
27 | key : SnocList Char -> AutoTok e String
29 | if isKeyChar x then key (sc :< x) xs else Succ (cast sc) (x::xs)
30 | key sc [] = Succ (cast sc) []
36 | MaxChar,LowerInvalid,UpperInvalid : Nat
38 | LowerInvalid = 0xD800
39 | UpperInvalid = 0xDFFF
42 | tryHex : Nat -> List Char -> Maybe Char
44 | if k <= MaxChar && (k < LowerInvalid || k > UpperInvalid) then Just $
cast k
46 | tryHex k (x :: xs) = case isHexDigit x of
47 | True => tryHex (k*16 + hexDigit x) xs
50 | commentControl : Char -> Bool
51 | commentControl '\127' = True
52 | commentControl x = x <= '\8' || (x >= '\10' && x <= '\31')
54 | tomlControl : Char -> Bool
55 | tomlControl '\n' = True
56 | tomlControl '\f' = True
57 | tomlControl '\b' = True
58 | tomlControl '\r' = True
59 | tomlControl '\t' = True
60 | tomlControl x = commentControl x
63 | str : SnocList Char -> AutoTok e String
64 | str sc ('\\' :: c :: xs) = case c of
65 | '"' => str (sc :< '"') xs
66 | 'n' => str (sc :< '\n') xs
67 | 'f' => str (sc :< '\f') xs
68 | 'b' => str (sc :< '\b') xs
69 | 'r' => str (sc :< '\r') xs
70 | 't' => str (sc :< '\t') xs
71 | '\\' => str (sc :< '\\') xs
73 | w::x::y::z::t' => case tryHex 0 [w,x,y,z] of
74 | Just c => str (sc :< c) t'
75 | Nothing => invalidEscape p t'
76 | _ => invalidEscape p xs
78 | s::t::u::v::w::x::y::z::t' => case tryHex 0 [s,t,u,v,w,x,y,z] of
79 | Just c => str (sc :< c) t'
80 | Nothing => invalidEscape p t'
81 | _ => invalidEscape p xs
82 | _ => invalidEscape p (c::xs)
83 | str sc ('"' :: xs) = Succ (cast sc) xs
85 | if tomlControl c then range (InvalidControl c) p xs
86 | else str (sc :< c) xs
89 | validTrim : List Char -> Bool
90 | validTrim ('\t' :: xs) = validTrim xs
91 | validTrim (' ' :: xs) = validTrim xs
92 | validTrim ('\n' :: xs) = True
93 | validTrim ('\r'::'\n'::xs) = True
97 | strML : (trim : Bool) -> SnocList Char -> AutoTok e String
98 | strML trim sc ('\\' :: c :: xs) = case c of
99 | '"' => strML False (sc :< '"') xs
100 | 'n' => strML False (sc :< '\n') xs
101 | 'f' => strML False (sc :< '\f') xs
102 | 'b' => strML False (sc :< '\b') xs
103 | 'r' => strML False (sc :< '\r') xs
104 | 't' => strML False (sc :< '\t') xs
105 | '\\' => strML False (sc :< '\\') xs
107 | w::x::y::z::t' => case tryHex 0 [w,x,y,z] of
108 | Just c => strML False (sc :< c) t'
109 | Nothing => invalidEscape p t'
110 | _ => invalidEscape p xs
112 | s::t::u::v::w::x::y::z::t' => case tryHex 0 [s,t,u,v,w,x,y,z] of
113 | Just c => strML False (sc :< c) t'
114 | Nothing => invalidEscape p t'
115 | _ => invalidEscape p xs
116 | _ => if validTrim (c::xs) then strML True sc xs
117 | else invalidEscape p (c::xs)
118 | strML trim sc ('"'::'"'::'"'::'"'::'"'::xs) = Succ (cast $
sc :< '"' :< '"') xs
119 | strML trim sc ('"'::'"'::'"'::'"'::xs) = Succ (cast $
sc :< '"') xs
120 | strML trim sc ('"'::'"'::'"'::xs) = Succ (cast sc) xs
121 | strML trim sc ('\n'::cs) =
122 | if trim then strML trim sc cs else strML trim (sc :< '\n') cs
123 | strML trim sc ('\t'::cs) =
124 | if trim then strML trim sc cs else strML trim (sc :< '\t') cs
125 | strML trim sc ('\r'::'\n'::cs) =
126 | if trim then strML trim sc cs else strML trim (sc :< '\r' :< '\n') cs
127 | strML trim sc (c ::xs) =
128 | if tomlControl c then range (InvalidControl c) p xs
129 | else if isSpace c && trim then strML trim sc xs
130 | else strML False (sc :< c) xs
131 | strML trim sc [] = eoiAt p
134 | literal : SnocList Char -> AutoTok e String
135 | literal sc ('\''::cs) = Succ (cast sc) cs
136 | literal sc (c :: cs) =
137 | if tomlControl c && c /= '\t' then range (InvalidControl c) p cs
138 | else literal (sc :< c) cs
139 | literal sc [] = eoiAt p
142 | literalML : SnocList Char -> AutoTok e String
143 | literalML sc ('\''::'\''::'\''::'\''::'\''::cs) = Succ (cast $
sc :< '\'' :< '\'') cs
144 | literalML sc ('\''::'\''::'\''::'\''::cs) = Succ (cast $
sc :< '\'') cs
145 | literalML sc ('\''::'\''::'\''::cs) = Succ (cast sc) cs
146 | literalML sc ('\n'::cs) = literalML (sc :< '\n') cs
147 | literalML sc ('\r'::'\n'::cs) = literalML (sc :< '\r' :< '\n') cs
148 | literalML sc (c :: cs) =
149 | if tomlControl c && c /= '\t' then range (InvalidControl c) p cs
150 | else literalML (sc :< c) cs
151 | literalML sc [] = eoiAt p
158 | intLit : SnocList Char -> (res,pow : Nat) -> TomlToken
159 | intLit [<] res _ = TVal (TInt $
cast res)
160 | intLit [<'-'] res _ = TVal (TInt . negate $
cast res)
161 | intLit (sx :< x) res pow = intLit sx (res + pow * digit x) (pow * 10)
164 | dblLit : SnocList Char -> TomlToken
166 | if any (\c => '.' == c || 'e' == c) sc
167 | then TVal . TDbl . Float . cast $
cast {to = String} sc
170 | num',rest,dot,exp,digs,digs1 : SnocList Char -> AutoTok e TomlToken
173 | digs sc ('_'::x::xs) =
174 | if isDigit x then digs (sc:<x) xs else unknownRange p xs
176 | if isDigit x then digs (sc:<x) xs else Succ (dblLit sc) (x::xs)
177 | digs sc [] = Succ (dblLit sc) []
180 | digs1 sc (x :: xs) = if isDigit x then digs (sc:<x) xs else unknown p
181 | digs1 sc [] = eoiAt p
185 | exp sc ('e'::'+'::xs) = digs1 (sc:<'e') xs
186 | exp sc ('E'::'+'::xs) = digs1 (sc:<'e') xs
187 | exp sc ('e'::'-'::xs) = digs1 (sc:<'e':<'-') xs
188 | exp sc ('E'::'-'::xs) = digs1 (sc:<'e':<'-') xs
189 | exp sc ('e'::xs) = digs1 (sc:<'e') xs
190 | exp sc ('E'::xs) = digs1 (sc:<'e') xs
191 | exp sc xs = Succ (dblLit sc) xs
194 | dot sc ('_'::x::xs) = if isDigit x then dot (sc:<x) xs else unknownRange p xs
195 | dot sc (x::xs) = if isDigit x then dot (sc:<x) xs else exp sc (x::xs)
196 | dot sc [] = Succ (dblLit sc) []
199 | rest sc ('.'::x::xs) = if isDigit x then dot (sc:<'.':< x) xs else unknown p
200 | rest sc ('.'::[]) = unknown p
201 | rest sc xs = exp sc xs
204 | num' sc ('_'::x::xs) = if isDigit x then num' (sc:<x) xs else unknownRange p xs
205 | num' sc (x::xs) = if isDigit x then num' (sc:<x) xs else rest sc (x::xs)
206 | num' sc [] = Succ (intLit sc 0 1) []
208 | num : Tok True e TomlToken
209 | num ('-'::'0'::t) = rest [<'-','0'] t
210 | num ('+'::'0'::t) = rest [<'0'] t
211 | num ('-'::d::t) = if isDigit d then num' [<'-',d] t else unknownRange Same t
212 | num ('+'::d::t) = if isDigit d then num' [<d] t else unknown Same
213 | num ('0'::t) = rest [<'0'] t
214 | num (d::t) = if isDigit d then num' [<d] t else unknown Same
215 | num [] = eoiAt Same
221 | comment : AutoTok e TomlToken
222 | comment [] = Succ Comment []
223 | comment ('\n' :: xs) = Succ Comment ('\n' :: xs)
224 | comment ('\r' :: xs) = Succ Comment ('\r' :: xs)
225 | comment ('\t' :: xs) = comment xs
226 | comment (x :: xs) =
227 | if commentControl x then range (InvalidControl x) p xs else comment xs
229 | validSpace : Char -> Bool
230 | validSpace ' ' = True
231 | validSpace '\n' = True
232 | validSpace '\t' = True
233 | validSpace _ = False
235 | space : AutoTok e TomlToken
236 | space ('\r'::'\n'::xs) = space xs
237 | space (x::xs) = if validSpace x then space xs else Succ Space (x::xs)
238 | space [] = Succ Space []
244 | isTime : List Char -> Bool
245 | isTime (_::_::':'::_) = True
246 | isTime (_::_::_::_::'-'::_) = True
249 | isNum : List Char -> Bool
250 | isNum ('-'::_) = True
251 | isNum ('+'::_) = True
252 | isNum (d::_) = isDigit d
256 | other : Tok True e TomlToken
257 | other ('.' :: xs) = Succ "." xs
258 | other (',' :: xs) = Succ "," xs
259 | other ('=' :: xs) = Succ "=" xs
260 | other ('[' :: xs) = Succ "[" xs
261 | other (']' :: xs) = Succ "]" xs
262 | other ('}' :: xs) = Succ "}" xs
263 | other ('#' :: xs) = comment xs
264 | other ('\r'::'\n'::xs) = space xs
265 | other (x :: xs) = if validSpace x then space xs else unknown Same
266 | other [] = eoiAt Same
271 | -> LexRes True cs e String
272 | -> LexRes True cs e TomlToken
273 | toKey x t (Succ v xs @{p}) = Succ (key1 $
KT v t $
BS x (move x p)) xs
274 | toKey _ _ (Fail x y z) = Fail x y z
278 | anyKey : Position -> Tok True e TomlToken
279 | anyKey pos ('"' :: xs) = toKey pos Quoted $
str [<] xs
280 | anyKey pos ('\'' :: xs) = toKey pos Literal $
literal [<] xs
281 | anyKey pos ('['::'[':: xs) = Succ "[[" xs
282 | anyKey pos (']'::']':: xs) = Succ "]]" xs
283 | anyKey pos (x :: xs) =
284 | if isKeyChar x then toKey pos Plain (key [<x] xs) else other (x::xs)
285 | anyKey _ xs = other xs
288 | val : Tok True e TomlToken
289 | val ('{' :: xs) = Succ "{" xs
290 | val ('"' :: '"' :: '"' :: xs) = case xs of
291 | '\n'::t => TVal . TStr <$> strML False [<] t
292 | '\r' :: '\n'::t => TVal . TStr <$> strML False [<] t
293 | _ => TVal . TStr <$> strML False [<] xs
294 | val ('\'' :: '\'' :: '\'' :: xs) = case xs of
295 | '\n'::t => TVal . TStr <$> literalML [<] t
296 | '\r' :: '\n'::t => TVal . TStr <$> literalML [<] t
297 | _ => TVal . TStr <$> literalML [<] xs
298 | val ('"' :: xs) = TVal . TStr <$> str [<] xs
299 | val ('\'' :: xs) = TVal . TStr <$> literal [<] xs
300 | val ('0'::'x':: xs) = TVal . TInt . cast <$> hexSep xs
301 | val ('0'::'b':: xs) = TVal . TInt . cast <$> binSep xs
302 | val ('0'::'o':: xs) = TVal . TInt . cast <$> octSep xs
303 | val ('t'::'r'::'u'::'e'::xs) = Succ (TVal $
TBool True) xs
304 | val ('f'::'a'::'l'::'s'::'e'::xs) = Succ (TVal $
TBool False) xs
305 | val ('n'::'a'::'n'::xs) = Succ (TVal $
TDbl NaN) xs
306 | val ('+'::'n'::'a'::'n'::xs) = Succ (TVal $
TDbl NaN) xs
307 | val ('-'::'n'::'a'::'n'::xs) = Succ (TVal $
TDbl NaN) xs
308 | val ('i'::'n'::'f'::xs) = Succ (TVal $
TDbl $
Infty Nothing) xs
309 | val ('+'::'i'::'n'::'f'::xs) = Succ (TVal $
TDbl $
Infty $
Just Plus) xs
310 | val ('-'::'i'::'n'::'f'::xs) = Succ (TVal $
TDbl $
Infty $
Just Minus) xs
312 | (TVal . TTime <$> anyTime cs) <|> num cs <|> other cs
321 | data Ctxt = Key | Value
327 | data LexState : Ctxt -> Type where
328 | TopLevel : {0 t : Ctxt} -> LexState t
329 | InArray : (outer : LexState Value) -> LexState Value
330 | InTable : {0 t : Ctxt} -> (outer : LexState Value) -> LexState t
334 | switch : LexState Key -> LexState Value
335 | switch TopLevel = TopLevel
336 | switch (InTable outer) = InTable outer
338 | adjState : (t : Ctxt) -> TomlToken -> LexState t -> (
t2 ** LexState t2)
339 | adjState Key "=" outer = (
_ ** switch outer)
340 | adjState Value NL TopLevel = (
Key ** TopLevel)
341 | adjState Value "]" (InArray outer) = (
_ ** outer)
342 | adjState _ "}" (InTable outer) = (
_ ** outer)
343 | adjState Value "[" outer = (
_ ** InArray outer)
344 | adjState Value "{" outer = (
Key ** InTable outer)
345 | adjState Value "," (InTable outer) = (
Key ** InTable outer)
346 | adjState t _ st = (
t ** st)
351 | anyTok : Position -> Ctxt -> Tok True e TomlToken
352 | anyTok pos Key = anyKey pos
353 | anyTok _ Value = val
358 | adjSpace : LexState c -> (hasNL : Bool) -> TomlToken -> Maybe TomlToken
359 | adjSpace TopLevel True Space = Just NL
360 | adjSpace (InTable _) True Space = Just NL
361 | adjSpace _ _ Space = Nothing
362 | adjSpace _ _ Comment = Nothing
363 | adjSpace _ _ t = Just t
373 | List (Bounded TomlToken)
374 | -> Bounded (List1 KeyToken)
375 | -> SnocList (Bounded TomlToken)
376 | -> List (Bounded TomlToken)
382 | List (Bounded TomlToken)
383 | -> SnocList (Bounded TomlToken)
384 | -> List (Bounded TomlToken)
386 | groupKeys ts ks (sx :< B (TKey p) bk :< B "." bd) =
387 | groupKeys ts (B (p <+> ks.val) (ks.bounds <+> bd <+> bk)) sx
388 | groupKeys ts ks sx = postProcess (map TKey ks :: ts) sx
390 | postProcess ts [<] = ts
391 | postProcess ts (sx :< x) = case x.val of
392 | TKey s => groupKeys ts (x $> s) sx
393 | Comment => postProcess ts sx
394 | _ => postProcess (x::ts) sx
404 | -> SnocList (Bounded TomlToken)
405 | -> (cs : List Char)
406 | -> (0 acc : SuffixAcc cs)
407 | -> Either (Bounded TomlErr) (List $
Bounded TomlToken)
408 | lex st pos sx [] _ = Right $
postProcess [] sx
409 | lex st pos sx xs (SA r) = case anyTok pos t xs of
410 | Succ val ys @{p} =>
411 | let pos2 := endPos pos p
412 | Just v := adjSpace st (pos2.line > pos.line) val
413 | | Nothing => lex st pos2 sx ys r
414 | (
t2 ** st2)
:= adjState t v st
415 | in lex st2 pos2 (sx :< bounded v pos pos2) ys r
416 | Fail start errEnd y => Left $
boundedErr pos start errEnd y
419 | lexToml : String -> Either (Bounded TomlErr) (List $
Bounded TomlToken)
420 | lexToml s = lex {t = Key} TopLevel begin [<] (unpack s) suffixAcc