1 | module TyRE.Text.Lexer
3 | import public Data.Bool
4 | import public Data.List
5 | import public Data.Nat
7 | import public TyRE.Text.Lexer.Core
8 | import public TyRE.Text.Quantity
9 | import public TyRE.Text.Token
12 | toTokenMap : List (Lexer, k) -> TokenMap (Token k)
13 | toTokenMap = map $
\(l, kind) => (l, Tok kind)
19 | any = pred (const True)
25 | opt : (l : Lexer) -> Recognise False
31 | non : (l : Lexer) -> Lexer
32 | non l = reject l <+> any
38 | choiceMap : {c : Bool} ->
39 | Foldable t => (a -> Recognise c) -> t a -> Recognise c
40 | choiceMap f xs = foldr (\x, acc => rewrite sym (andSameNeutral c) in
48 | Foldable t => t (Recognise c) -> Recognise c
49 | choice = Lexer.choiceMap id
55 | (xs : List (Recognise c)) -> Recognise (c && isCons xs)
56 | concat = Lexer.Core.concatMap id
61 | is : (x : Char) -> Lexer
67 | isNot : (x : Char) -> Lexer
68 | isNot x = pred (/=x)
73 | like : (x : Char) -> Lexer
74 | like x = pred (\y => toUpper x == toUpper y)
79 | notLike : (x : Char) -> Lexer
80 | notLike x = pred (\y => toUpper x /= toUpper y)
86 | exact : (str : String) -> Lexer
87 | exact str = case unpack str of
89 | (x :: xs) => concatMap is (x :: xs)
95 | approx : (str : String) -> Lexer
96 | approx str = case unpack str of
98 | (x :: xs) => concatMap like (x :: xs)
103 | oneOf : (chars : String) -> Lexer
104 | oneOf chars = pred (\x => x `elem` unpack chars)
109 | range : (start : Char) -> (end : Char) -> Lexer
110 | range start end = pred (\x => (x >= min start end)
111 | && (x <= max start end))
117 | some : Lexer -> Lexer
118 | some l = l <+> many l
124 | many : Lexer -> Recognise False
125 | many l = opt (some l)
132 | manyUntil : (stopBefore : Recognise c) -> (l : Lexer) -> Recognise False
133 | manyUntil stopBefore l = many (reject stopBefore <+> l)
140 | manyThen : (stopAfter : Recognise c) -> (l : Lexer) -> Recognise c
141 | manyThen stopAfter l = manyUntil stopAfter l <+> stopAfter
148 | manyTill : (l : Lexer) -> (end : Lexer) -> Recognise False
149 | manyTill l end = end <|> opt (l <+> manyTill l end)
155 | count : (q : Quantity) -> (l : Lexer) -> Recognise (isSucc (min q))
156 | count (Qty Z Nothing) l = many l
157 | count (Qty Z (Just Z)) _ = empty
158 | count (Qty Z (Just (S max))) l = opt $
l <+> count (atMost max) l
159 | count (Qty (S min) Nothing) l = l <+> count (atLeast min) l
160 | count (Qty (S min) (Just Z)) _ = fail
161 | count (Qty (S min) (Just (S max))) l = l <+> count (between min max) l
167 | digit = pred isDigit
173 | digits = some digit
179 | hexDigit = pred isHexDigit
185 | hexDigits = some hexDigit
191 | octDigit = pred isHexDigit
197 | octDigits = some hexDigit
203 | alpha = pred isAlpha
209 | alphas = some alpha
215 | lower = pred isLower
221 | lowers = some lower
227 | upper = pred isUpper
233 | uppers = some upper
239 | alphaNum = pred isAlphaNum
245 | alphaNums = some alphaNum
251 | space = pred isSpace
257 | spaces = some space
263 | newline = let crlf = "\r\n" in
264 | exact crlf <|> oneOf crlf
270 | newlines = some newline
276 | symbol = pred (\x => not (isSpace x || isAlphaNum x))
282 | symbols = some symbol
288 | control = pred isControl
294 | controls = some control
300 | surround : (start : Lexer) -> (end : Lexer) -> (l : Lexer) -> Lexer
301 | surround start end l = start <+> manyThen end l
307 | quote : (q : Lexer) -> (l : Lexer) -> Lexer
308 | quote q l = surround q q l
313 | escape : (esc : Char) -> Lexer -> Lexer
314 | escape esc l = is esc <+> l
321 | stringLit = quote (is '"') (escape '\\' any <|> any)
328 | charLit = let q = '\'' in
329 | is q <+> (escape '\\' (control <|> any) <|> isNot q) <+> is q
331 | lexStr : List String -> Lexer
333 | lexStr (t :: ts) = exact t <|> lexStr ts
336 | control = lexStr ["NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
337 | "BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
338 | "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
339 | "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
341 | <|> (is 'x' <+> hexDigits)
342 | <|> (is 'o' <+> octDigits)
349 | intLit = opt (is '-') <+> digits
355 | hexLit = approx "0x" <+> hexDigits
362 | lineComment : (start : Lexer) -> Lexer
363 | lineComment start = start <+> manyUntil newline any <+> opt newline
371 | blockComment : (start : Lexer) -> (end : Lexer) -> Lexer
372 | blockComment start end = start <+> middle <+> end
374 | middle : Recognise False
375 | middle = manyUntil end (blockComment start end <|> any)