0 | module Idrall.Parser.Lexer
6 | import public Text.Lexer.Tokenizer
9 | data IsMultiline = Multi | Single
11 | Eq IsMultiline where
12 | (==) Multi Multi = True
13 | (==) Single Single = True
27 | | StringBegin IsMultiline
36 | | HomeDirImport String
44 | (==) (Ident x) (Ident y) = x == y
45 | (==) (Symbol x) (Symbol y) = x == y
46 | (==) (Keyword x) (Keyword y) = x == y
47 | (==) (Builtin x) (Builtin y) = x == y
48 | (==) (TNatural x) (TNatural y) = x == y
49 | (==) (TInteger x) (TInteger y) = x == y
50 | (==) (TDouble x) (TDouble y) = x == y
51 | (==) InterpBegin InterpBegin = True
52 | (==) InterpEnd InterpEnd = True
53 | (==) (StringBegin x) (StringBegin y) = x == y
54 | (==) StringEnd StringEnd = True
55 | (==) (StringLit x) (StringLit y) = x == y
56 | (==) White White = True
57 | (==) (Comment x) (Comment y) = x == y
58 | (==) Unrecognised Unrecognised = True
59 | (==) EndInput EndInput = True
60 | (==) (RelImport x) (RelImport y) = x == y
61 | (==) (AbsImport x) (AbsImport y) = x == y
62 | (==) (HomeDirImport x) (HomeDirImport y) = x == y
63 | (==) (EnvImport x) (EnvImport y) = x == y
64 | (==) (HttpImport x) (HttpImport y) = x == y
65 | (==) (Sha x) (Sha y) = x == y
66 | (==) MissingImport MissingImport = True
71 | show (Ident x) = "Ident \{show x}"
72 | show (Symbol x) = "Symbol \{show x}"
73 | show (Keyword x) = "Keyword \{show x}"
74 | show (Builtin x) = "Builtin \{show x}"
75 | show (TNatural x) = "TNatural \{show x}"
76 | show (TInteger x) = "TInteger \{show x}"
77 | show (TDouble x) = "TDouble \{show x}"
78 | show InterpBegin = "InterpBegin"
79 | show InterpEnd = "InterpEnd"
80 | show (StringBegin x) = "StringBegin"
81 | show StringEnd = "StringEnd"
82 | show (StringLit x) = "StringLit \{show x}"
83 | show White = "White"
84 | show (Comment x) = "Comment \{show x}"
85 | show Unrecognised = "Unrecognised"
86 | show EndInput = "EndInput"
87 | show (RelImport x) = "RelImport \{show x}"
88 | show (AbsImport x) = "AbsImport \{show x}"
89 | show (HomeDirImport x) = "HomeDirImport \{show x}"
90 | show (EnvImport x) = "EnvImport \{show x}"
91 | show (HttpImport x) = "HttpImport \{show x}"
92 | show (Sha x) = "Sha \{show x}"
93 | show MissingImport = "MissingImport"
96 | TokenRawToken : Type
97 | TokenRawToken = RawToken
100 | builtins : List String
102 | [ "Type", "Kind", "Sort"
103 | , "Bool", "True", "False"
104 | , "Natural", "Natural/build", "Natural/fold", "Natural/isZero", "Natural/even"
105 | , "Natural/odd", "Natural/subtract", "Natural/toInteger", "Natural/show"
106 | , "Integer", "Integer/show", "Integer/negate", "Integer/clamp", "Integer/toDouble"
107 | , "Double", "Double/show"
108 | , "List/build", "List/fold", "List/length", "List/head"
109 | , "List/last", "List/indexed", "List/reverse", "List"
110 | , "Text", "Text/show", "Text/replace"
111 | , "Optional", "Some", "None"
116 | keywords : List String
117 | keywords = ["let", "in", "with",
118 | "if", "then", "else",
119 | "merge", "toMap", "missing", "forall",
125 | (pred $
isIdentStart) <+> (many . pred $
isIdentTrailing)
127 | isIdentStart : Char -> Bool
128 | isIdentStart '_' = True
129 | isIdentStart x = isAlpha x || x > chr 160
130 | isIdentTrailing : Char -> Bool
131 | isIdentTrailing '_' = True
132 | isIdentTrailing '/' = True
133 | isIdentTrailing x = isAlphaNum x || x > chr 160
135 | quotedIdent : Lexer
136 | quotedIdent = is '`' <+> (manyThen (is '`') (any))
138 | parseIdent : String -> RawToken
140 | let isKeyword = elem x keywords
141 | isBuiltin = elem x builtins in
142 | case (isKeyword, isBuiltin) of
143 | (True, False) => Keyword x
144 | (False, True) => Builtin x
147 | parseQuotedIdent : String -> RawToken
148 | parseQuotedIdent x = Ident $
dropQuotes x
150 | dropLast : List a -> List a
151 | dropLast xs = reverse (drop 1 $
reverse xs)
152 | dropFirst : List a -> List a
153 | dropFirst xs = drop 1 xs
154 | dropQuotes : String -> String
158 | case length str >= 2 of
159 | True => pack $
dropFirst . dropLast $
str
164 | sign = is '-' <|> is '+'
167 | exponent = is 'e' <+> opt sign <+> digits
170 | naturalLit = digits
179 | <+> ((digits <+> is '.' <+> digits <+> opt exponent)
180 | <|> (digits <+> exponent))
182 | posInfinity : Lexer
183 | posInfinity = exact "Infinity"
185 | negInfinity : Lexer
186 | negInfinity = is '-' <+> exact "Infinity"
196 | toEndComment : (k : Nat) -> Recognise (k /= 0)
197 | toEndComment Z = empty
199 | = some (pred (\c => c /= '-' && c /= '{' && c /= '"'))
200 | <+> toEndComment (S k)
201 | <|> is '{' <+> singleBrace k
202 | <|> is '-' <+> singleDash k
203 | <|> stringLit <+> toEndComment (S k)
208 | singleBrace : (k : Nat) -> Lexer
210 | = is '-' <+> many (is '-')
211 | <+> singleDash (S k)
212 | <|> toEndComment (S k)
217 | singleDash : (k : Nat) -> Lexer
219 | = is '-' <+> doubleDash k
220 | <|> is '}' <+> toEndComment k
221 | <|> toEndComment (S k)
226 | doubleDash : (k : Nat) -> Lexer
227 | doubleDash k = with Prelude.(::)
228 | many (is '-') <+> choice
229 | [ is '}' <+> toEndComment k
230 | , many (isNot '\n') <+> toEndComment (S k)
233 | blockComment : Lexer
234 | blockComment = is '{' <+> is '-' <+> toEndComment 1
236 | lineComment : Lexer
237 | lineComment = exact "--" <+> (someUntil (is '\n') (any))
240 | httpImport : Tokenizer RawToken
241 | httpImport = match (httpStart <+> (someUntil (space) charLexer)) HttpImport
244 | httpStart = exact "http://" <|> exact "https://"
249 | envImport : Tokenizer RawToken
250 | envImport = match (envStart <+> (someUntil (space) charLexer)) removePrefix
252 | removePrefix : String -> RawToken
253 | removePrefix x = EnvImport $
pack $
drop 4 (unpack x)
255 | envStart = exact "env:"
260 | pathImport : (String -> RawToken) -> Lexer -> Tokenizer RawToken
261 | pathImport f pathStart = match (pathStart <+> (someUntil (space) (escapeLexer <|> charLexer))) f
265 | escapeLexer : Lexer
266 | escapeLexer = escape (exact "\\") any
270 | relImport : Tokenizer RawToken
271 | relImport = pathImport RelImport (exact "../" <|> exact "./")
273 | absImport : Tokenizer RawToken
274 | absImport = pathImport AbsImport (exact "/")
276 | homeDirImport : Tokenizer RawToken
277 | homeDirImport = pathImport HomeDirImport (exact "~/")
280 | shaImport = (exact "sha256:" <+> (someUntil (space) (pred $
isAlphaNum)))
282 | embed : Tokenizer RawToken
283 | embed = httpImport <|> envImport <|> relImport <|> absImport <|> homeDirImport
286 | groupSymbols : List String
287 | groupSymbols = ["{", "[", ".(", ".{", "<", "("]
289 | groupClose : String -> String
290 | groupClose "{" = "}"
291 | groupClose "[" = "]"
292 | groupClose ".(" = ")"
293 | groupClose ".{" = "}"
294 | groupClose "(" = ")"
295 | groupClose "<" = ">"
298 | emptyString : Lexer
299 | emptyString = exact "\"\""
301 | stringBegin : Lexer
302 | stringBegin = is '"'
307 | stringMultiBegin : Lexer
308 | stringMultiBegin = exact "''"
310 | multilineEnd : String
311 | multilineEnd = "''"
314 | stringTokens : Bool -> Nat -> Tokenizer RawToken
315 | stringTokens multi _ =
316 | let escapeChars = "\\"
318 | escapeLexer = escape (exact escapeChars) any
319 | charLexer = non $
exact (if multi then multilineEnd else stringEnd)
320 | in match (someUntil (exact interpStart) (escapeLexer <|> charLexer)) (\x => StringLit x)
321 | <|> compose (exact interpStart)
322 | (const InterpBegin)
328 | rawTokens : Tokenizer RawToken
330 | match (blockComment <|> lineComment) Comment
331 | <|> match doubleLit (TDouble . cast)
332 | <|> match integerLit (TInteger . cast)
333 | <|> compose (choice $
exact <$> groupSymbols)
337 | (exact . groupClose)
339 | <|> match (exact "//\\\\") Symbol
340 | <|> match (exact "⩓") Symbol
341 | <|> match (exact "//") Symbol
342 | <|> match (exact "⫽") Symbol
343 | <|> match (exact "/\\") Symbol
344 | <|> match (exact "∧") Symbol
345 | <|> match (exact "\\") Symbol
346 | <|> match (exact "λ") Symbol
347 | <|> match (exact "∀") Symbol
348 | <|> match (exact "@") Symbol
350 | <|> match (exact "missing") (const MissingImport)
351 | <|> match shaImport Sha
352 | <|> match posInfinity (const $
TDouble (1.0/0.0))
353 | <|> match negInfinity (const $
TDouble (-1.0/0.0))
354 | <|> match (exact "||") Symbol
355 | <|> match (exact "&&") Symbol
356 | <|> match (exact "===") Symbol
357 | <|> match (exact "≡") Symbol
358 | <|> match (exact "==") Symbol
359 | <|> match (exact "!=") Symbol
360 | <|> match (exact "=") Symbol
361 | <|> match (exact "->") Symbol
362 | <|> match (exact "→") Symbol
363 | <|> match (exact "++") Symbol
364 | <|> match (exact "+") Symbol
365 | <|> match (exact "-") Symbol
366 | <|> match (exact "*") Symbol
367 | <|> match (exact "#") Symbol
368 | <|> match (exact "::") Symbol
369 | <|> match (exact ":") Symbol
370 | <|> match (exact "?") Symbol
371 | <|> match (exact "|") Symbol
372 | <|> match (exact ",") Symbol
373 | <|> match (exact ".") Symbol
374 | <|> match (exact "as Text") Keyword
375 | <|> match (exact "as Location") Keyword
376 | <|> match spaces (const White)
377 | <|> match naturalLit (TNatural . cast)
378 | <|> match quotedIdent parseQuotedIdent
379 | <|> match ident parseIdent
380 | <|> compose stringBegin
381 | (const $
StringBegin Single)
383 | (stringTokens False)
384 | (\hashtag => exact stringEnd <+> reject (is '"'))
386 | <|> compose stringMultiBegin
387 | (const $
StringBegin Multi)
389 | (stringTokens True)
390 | (\hashtag => exact multilineEnd <+> reject (exact "''"))
392 | <|> match any (const Unrecognised)
396 | String -> Either (StopReason, Int, Int, String) (List (WithBounds RawToken))
398 | = case lexTo reject rawTokens str of
401 | (tok, (EndInput, l, c, _)) => Right (filter notComment tok ++
402 | [MkBounded EndInput False (MkBounds l c l c)])
403 | (_, fail) => Left fail
405 | notComment : WithBounds RawToken -> Bool
406 | notComment t = case t.val of
412 | lex : String -> Either (StopReason, Int, Int, String) (List (WithBounds RawToken))
413 | lex = lexTo (pred $
const False)