3 | import Text.Lex.Manual
10 | stripQuotes : SnocList Char -> String
11 | stripQuotes (sx :< _) = case sx <>> [] of
14 | stripQuotes [<] = ""
18 | ltrim : List Char -> List Char
19 | ltrim (c :: cs) = if isSpace c then ltrim cs else (c :: cs)
23 | countHashtag : List Char -> Nat
24 | countHashtag ('#' :: t) = S $
countHashtag t
29 | rtrim : SnocList Char -> SnocList Char
30 | rtrim (sc :< c) = if isSpace c then rtrim sc else (sc :< c)
34 | dropHead : Nat -> SnocList Char -> List Char
35 | dropHead n = drop n . (<>> [])
38 | countHashtag : SnocList Char -> Nat
39 | countHashtag = countHashtag . (<>> [])
52 | non : (l : Lexer) -> Lexer
53 | non l = reject l <+> any
58 | is : (c : Char) -> Lexer
64 | isNot : (x : Char) -> Lexer
65 | isNot x = pred (/=x)
70 | space = pred isSpace
75 | digit = pred isDigit
80 | like : (x : Char) -> Lexer
81 | like x = let x' := toUpper x in pred ((x' ==) . toUpper)
86 | notLike : (x : Char) -> Lexer
87 | notLike x = let x' := toUpper x in pred ((x' /=) . toUpper)
91 | oneOf : List Char -> Lexer
92 | oneOf ts = pred (`elem` ts)
97 | range : (start : Char) -> (end : Char) -> Lexer
99 | let s := min start end
101 | in pred (\x => x >= s && x <= e)
108 | prefixBy : (fs : List (Char -> Bool)) -> Lexer
109 | prefixBy (f :: []) = pred f
110 | prefixBy (f :: fs) = pred f <+> prefixBy fs
114 | exact : String -> Lexer
116 | let cs@(_ :: _) := unpack s | [] => fail
117 | in autoLift $
exact cs
120 | approx : String -> Lexer
121 | approx = prefixBy . map check . unpack
124 | check : Char -> Char -> Bool
125 | check c d = toLower c == toLower d
130 | digits = autoLift digits1
136 | symbol = pred (\x => not (isSpace x || isAlphaNum x))
142 | symbols = preds (\x => not (isSpace x || isAlphaNum x))
148 | control = pred isControl
154 | controls = preds isControl
158 | someOf : List Char -> Lexer
159 | someOf ts = preds (`elem` ts)
164 | ranges : (start, end : Char) -> Lexer
166 | let s := min start end
168 | in preds (\x => x >= s && x <= e)
173 | spaces = preds isSpace
178 | spaceChars = preds isSpaceChar
183 | newline = Lift $
\sc,cs => case cs of
184 | '\r' :: '\n' :: t => Succ t
185 | '\n' :: t => Succ t
186 | '\r' :: t => Succ t
192 | manyTillNewline : Recognise False
193 | manyTillNewline = preds0 $
\case {
'\n' => False;
'\r' => False;
_ => True}
198 | manyTillLineFeed : Recognise False
199 | manyTillLineFeed = preds0 $
\case {
'\n' => False;
_ => True}
206 | lineComment : (pre : Lexer) -> Lexer
207 | lineComment pre = pre <+> manyTillNewline
214 | linefeedComment : (pre : Lexer) -> Lexer
215 | linefeedComment pre = pre <+> manyTillLineFeed
222 | atLeast : (n : Nat) -> Lexer -> Recognise (isSucc n)
223 | atLeast 0 f = many f
224 | atLeast (S k) f = f <+> atLeast k f
227 | exactly : (n : Nat) -> Lexer -> {auto 0 _ : IsSucc n} -> Lexer
229 | exactly (S k@(S _)) f = f <+> exactly k f
232 | manyUntil : (stopBefore : Recognise c) -> Lexer -> Recognise False
233 | manyUntil sb l = many (reject sb <+> l)
236 | someUntil : (stopBefore : Recognise c) -> Lexer -> Lexer
237 | someUntil sb l = some (reject sb <+> l)
240 | manyThen : (stopAfter : Recognise c) -> (l : Lexer) -> Recognise c
241 | manyThen stopAfter l = manyUntil stopAfter l <+> stopAfter
247 | surround : (start : Lexer) -> (end : Lexer) -> (l : Lexer) -> Lexer
248 | surround start end l = start <+> manyThen end l
254 | quote : (q : Lexer) -> (l : Lexer) -> Lexer
255 | quote q l = surround q q l
261 | escape : (esc : Lexer) -> Lexer -> Lexer
262 | escape esc l = esc <+> l
270 | stringLit = Lift string
276 | intLit = autoLift int
281 | intLitPlus = autoLift intPlus
285 | binDigits = preds isBinDigit
289 | hexDigits = preds isHexDigit
293 | octDigits = preds isOctDigit
299 | binLit = exact "0b" <+> binDigits
305 | hexLit = approx "0x" <+> hexDigits
311 | octLit = exact "0o" <+> preds isOctDigit
316 | digitsUnderscoredLit : Lexer
317 | digitsUnderscoredLit = digits <+> many (is '_' <+> digits)
322 | binUnderscoredLit : Lexer
323 | binUnderscoredLit = binLit <+> many (is '_' <+> binDigits)
328 | hexUnderscoredLit : Lexer
329 | hexUnderscoredLit = hexLit <+> many (is '_' <+> hexDigits)
334 | octUnderscoredLit : Lexer
335 | octUnderscoredLit = octLit <+> many (is '_' <+> octDigits)
342 | charLit = let q = '\'' in
343 | is q <+> (escape (is '\\') (control <|> any) <|> isNot q) <+> is q
345 | lexStr : List String -> Lexer
347 | lexStr (t :: ts) = exact t <|> lexStr ts
352 | ["NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
353 | "BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
354 | "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
355 | "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
357 | <|> (is 'x' <+> hexDigits)
358 | <|> (is 'o' <+> octDigits)
363 | doubleLit = Lift number