0 | module Parser.Lexer.Source
2 | import public Parser.Lexer.Common
7 | import Libraries.Data.String.Extra
8 | import public Libraries.Text.Bounded
9 | import Libraries.Text.Lexer.Tokenizer
10 | import Libraries.Text.PrettyPrint.Prettyprinter
11 | import Libraries.Text.PrettyPrint.Prettyprinter.Util
14 | import Libraries.Utils.Octal
15 | import Libraries.Utils.String
22 | data IsMultiline = Multi | Single
33 | DebugLoc == DebugLoc = True
34 | DebugFile == DebugFile = True
35 | DebugLine == DebugLine = True
36 | DebugCol == DebugCol = True
44 | | IntegerLit Integer
46 | | StringBegin Nat IsMultiline
54 | | DotSepIdent Namespace String
63 | | CGDirective String
67 | | MagicDebugInfo DebugInfo
68 | | Unrecognised String
71 | Show DebugInfo where
72 | show DebugLoc = "__LOC__"
73 | show DebugFile = "__FILE__"
74 | show DebugLine = "__LINE__"
75 | show DebugCol = "__COL__"
80 | show (CharLit x) = "character " ++ show x
81 | show (DoubleLit x) = "double " ++ show x
82 | show (IntegerLit x) = "literal " ++ show x
84 | show (StringBegin hashtag Single) = "string begin"
85 | show (StringBegin hashtag Multi) = "multiline string begin"
86 | show StringEnd = "string end"
87 | show InterpBegin = "string interp begin"
88 | show InterpEnd = "string interp end"
89 | show (StringLit x) = "string " ++ show x
91 | show (HoleIdent x) = "hole identifier " ++ x
92 | show (Ident x) = "identifier " ++ x
93 | show (DotSepIdent ns n) = "namespaced identifier " ++ show ns ++ "." ++ show n
94 | show (DotIdent x) = "dot+identifier " ++ x
95 | show (Symbol x) = "symbol " ++ x
97 | show Space = "whitespace"
99 | show Comment = "comment"
100 | show (DocComment c) = "doc comment: \"" ++ c ++ "\""
102 | show (CGDirective x) = "CGDirective " ++ x
103 | show EndInput = "end of input"
104 | show (Keyword x) = x
105 | show (Pragma x) = "pragma " ++ x
106 | show (MagicDebugInfo di) = show di
107 | show (Unrecognised x) = "Unrecognised " ++ x
110 | Pretty Void Token where
112 | pretty (CharLit x) = pretty "character" <++> squotes (pretty x)
113 | pretty (DoubleLit x) = pretty "double" <++> pretty (show x)
114 | pretty (IntegerLit x) = pretty "literal" <++> pretty (show x)
116 | pretty (StringBegin hashtag Single) = reflow "string begin"
117 | pretty (StringBegin hashtag Multi) = reflow "multiline string begin"
118 | pretty StringEnd = reflow "string end"
119 | pretty InterpBegin = reflow "string interp begin"
120 | pretty InterpEnd = reflow "string interp end"
121 | pretty (StringLit x) = pretty "string" <++> dquotes (pretty x)
123 | pretty (HoleIdent x) = reflow "hole identifier" <++> pretty x
124 | pretty (Ident x) = pretty "identifier" <++> pretty x
125 | pretty (DotSepIdent ns n) = reflow "namespaced identifier" <++> pretty ns <+> dot <+> pretty n
126 | pretty (DotIdent x) = pretty "dot+identifier" <++> pretty x
127 | pretty (Symbol x) = pretty "symbol" <++> pretty x
129 | pretty Space = pretty "space"
131 | pretty Comment = pretty "comment"
132 | pretty (DocComment c) = reflow "doc comment:" <++> dquotes (pretty c)
134 | pretty (CGDirective x) = pretty "CGDirective" <++> pretty x
135 | pretty EndInput = reflow "end of input"
136 | pretty (Keyword x) = pretty x
137 | pretty (Pragma x) = pretty "pragma" <++> pretty x
138 | pretty (MagicDebugInfo di) = pretty (show di)
139 | pretty (Unrecognised x) = pretty "Unrecognised" <++> pretty x
142 | docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n')
145 | holeIdent = is '?' <+> identNormal
148 | dotIdent = is '.' <+> identNormal
151 | pragma = is '%' <+> identNormal
155 | = digits <+> is '.' <+> digits <+> opt
156 | (is 'e' <+> opt (is '-' <|> is '+') <+> digits)
158 | stringBegin : Lexer
159 | stringBegin = many (is '#') <+> (is '"')
161 | stringEnd : Nat -> String
162 | stringEnd hashtag = "\"" ++ replicate hashtag '#'
164 | multilineBegin : Lexer
165 | multilineBegin = many (is '#') <+> (exact "\"\"\"") <+>
166 | manyUntil newline space <+> newline
168 | multilineEnd : Nat -> String
169 | multilineEnd hashtag = "\"\"\"" ++ replicate hashtag '#'
173 | cgDirective : Lexer
177 | some (pred isAlphaNum) <+> many space <+>
178 | is '{' <+> many (isNot '}') <+>
180 | <|> many (isNot '\n'))
182 | mkDirective : String -> Token
183 | mkDirective str = CGDirective (trim (substr 3 (length str) str))
186 | fixityKeywords : List String
187 | fixityKeywords = ["infixl", "infixr", "infix", "prefix"]
193 | keywords : List String
194 | keywords = ["data", "module", "where", "let", "in", "do", "record",
195 | "auto", "default", "implicit", "failing", "mutual", "namespace",
196 | "parameters", "with", "proof", "impossible", "case", "of",
197 | "if", "then", "else", "forall", "rewrite", "typebind", "autobind",
198 | "using", "interface", "implementation", "open", "import",
199 | "public", "export", "private"] ++
201 | ["total", "partial", "covering"]
204 | debugInfo : List (String, DebugInfo)
205 | debugInfo = map (\ di => (show di, di))
206 | [ DebugLoc, DebugFile, DebugLine, DebugCol ]
209 | special : List String
210 | special = ["%lam", "%pi", "%imppi", "%let"]
213 | symbols : List String
214 | symbols = [",", ";", "_", "`"]
217 | groupSymbols : List String
218 | groupSymbols = [".(",
220 | "@{", "[|", "(", "{", "[<", "[>", "[", "`(", "`{", "`["]
223 | groupClose : String -> String
224 | groupClose ".(" = ")"
225 | groupClose "@{" = "}"
226 | groupClose "[|" = "|]"
227 | groupClose ".[|" = "|]"
228 | groupClose "(" = ")"
229 | groupClose "[" = "]"
230 | groupClose "[<" = "]"
231 | groupClose "[>" = "]"
232 | groupClose "{" = "}"
233 | groupClose "`(" = ")"
234 | groupClose "`{" = "}"
235 | groupClose "`[" = "]"
238 | validSymbol : Lexer
239 | validSymbol = some (pred isOpChar)
243 | reservedInfixSymbols : List String
244 | reservedInfixSymbols
245 | = ["%", "\\", ":", "=", ":=", "$=", "|", "|||", "<-", "->", "=>", "?", "!",
246 | "&", "**", "..", "~", "@"]
250 | reservedSymbols : List String
253 | ++ groupSymbols ++ (groupClose <$> groupSymbols)
254 | ++ reservedInfixSymbols
256 | fromBinLit : String -> Integer
258 | = if length str <= 2
260 | else let num = assert_total (strTail (strTail str)) in
261 | fromBin . reverse . map castBin . unpack $
num
263 | castBin : Char -> Integer
264 | castBin '1' = 1;
castBin _ = 0
265 | fromBin : List Integer -> Integer
267 | fromBin (0 :: xs) = 2 * fromBin xs
268 | fromBin (x :: xs) = x + (2 * fromBin xs)
270 | fromHexLit : String -> Integer
272 | = if length str <= 2
274 | else let num = assert_total (strTail (strTail str)) in
275 | fromMaybe 0 (fromHex (reverse num))
278 | fromOctLit : String -> Integer
280 | = if length str <= 2
282 | else let num = assert_total (strTail (strTail str)) in
283 | fromMaybe 0 (fromOct (reverse num))
287 | stringTokens : Bool -> Nat -> Tokenizer Token
288 | stringTokens multi hashtag
289 | = let escapeChars = "\\" ++ replicate hashtag '#'
290 | interpStart = escapeChars ++ "{"
291 | escapeLexer = escape (exact escapeChars) any
292 | charLexer = if multi
293 | then non $
exact (multilineEnd hashtag)
294 | else non (newline <|> exact (stringEnd hashtag))
296 | match (someUntil (exact interpStart) (escapeLexer <|> charLexer)) (\x => StringLit x)
297 | <|> compose (exact interpStart)
298 | (const InterpBegin)
304 | rawTokens : Tokenizer Token
306 | match comment (const Comment)
307 | <|> match blockComment (const Comment)
308 | <|> match docComment (DocComment . removeOptionalLeadingSpace . drop 3)
309 | <|> match cgDirective mkDirective
310 | <|> match holeIdent (\x => HoleIdent (assert_total (strTail x)))
311 | <|> compose (choice $
exact <$> groupSymbols)
315 | (exact . groupClose)
317 | <|> match (choice $
(exact . fst) <$> debugInfo) (MagicDebugInfo . fromMaybe DebugLoc . flip lookup debugInfo)
318 | <|> match (choice $
exact <$> symbols) Symbol
319 | <|> match doubleLit (DoubleLit . cast)
320 | <|> match binUnderscoredLit (IntegerLit . fromBinLit . removeUnderscores)
321 | <|> match hexUnderscoredLit (IntegerLit . fromHexLit . removeUnderscores)
322 | <|> match octUnderscoredLit (IntegerLit . fromOctLit . removeUnderscores)
323 | <|> match digitsUnderscoredLit (IntegerLit . cast . removeUnderscores)
324 | <|> compose multilineBegin
325 | (\begin => StringBegin (countHashtag begin) Multi)
327 | (stringTokens True)
328 | (exact . multilineEnd)
330 | <|> compose stringBegin
331 | (\begin => StringBegin (countHashtag begin) Single)
333 | (stringTokens False)
334 | (\hashtag => exact (stringEnd hashtag) <+> reject (is '"'))
336 | <|> match charLit (CharLit . stripQuotes)
337 | <|> match dotIdent (\x => DotIdent (assert_total $
strTail x))
338 | <|> match namespacedIdent parseNamespace
339 | <|> match identNormal parseIdent
340 | <|> match pragma (\x => Pragma (assert_total $
strTail x))
341 | <|> match space (const Space)
342 | <|> match validSymbol Symbol
343 | <|> match symbol Unrecognised
345 | parseIdent : String -> Token
346 | parseIdent x = if x `elem` keywords then Keyword x
349 | parseNamespace : String -> Token
350 | parseNamespace ns = case mkNamespacedIdent ns of
351 | (Nothing, ident) => parseIdent ident
352 | (Just ns, n) => DotSepIdent ns n
354 | countHashtag : String -> Nat
355 | countHashtag = count (== '#') . unpack
357 | removeOptionalLeadingSpace : String -> String
358 | removeOptionalLeadingSpace str = case strM str of
359 | StrCons ' ' tail => tail
362 | removeUnderscores : String -> String
363 | removeUnderscores s = fastPack $
filter (/= '_') (fastUnpack s)
368 | Either (StopReason, Int, Int, String)
369 | ( List (WithBounds ())
370 | , List (WithBounds Token))
372 | = case lexTo reject rawTokens str of
373 | (toks, (EndInput, l, c, _)) =>
376 | let end = [MkBounded EndInput False (MkBounds l c l c)] in
377 | Right $
map (++ end)
380 | $
filter isNotSpace toks
381 | (_, fail) => Left fail
384 | isNotSpace : WithBounds Token -> Bool
385 | isNotSpace t = case t.val of
389 | spotComment : WithBounds Token ->
390 | Either (WithBounds ()) (WithBounds Token)
391 | spotComment t = case t.val of
392 | Comment => Left (() <$ t)
397 | Either (StopReason, Int, Int, String)
398 | ( List (WithBounds ())
399 | , List (WithBounds Token))
400 | lex = lexTo (pred $
const False)