1 | module TyRE.Text.Lexer.Core
3 | import public Control.Delayed
4 | import public Data.Bool
5 | import public Data.List
6 | import public Data.Nat
7 | import public Data.String
8 | import public Data.Maybe
14 | data Recognise : (consumes : Bool) -> Type where
15 | Empty : Recognise False
17 | Lookahead : (positive : Bool) -> Recognise c -> Recognise False
18 | Pred : (Char -> Bool) -> Recognise True
19 | SeqEat : Recognise True -> Inf (Recognise e) -> Recognise True
20 | SeqEmpty : Recognise e1 -> Recognise e2 -> Recognise (e1 || e2)
21 | Alt : Recognise e1 -> Recognise e2 -> Recognise (e1 && e2)
26 | Lexer = Recognise True
34 | (<+>) : {c1 : Bool} ->
35 | Recognise c1 -> inf c1 (Recognise c2) -> Recognise (c1 || c2)
36 | (<+>) {c1 = False} = SeqEmpty
37 | (<+>) {c1 = True} = SeqEat
39 | %allow_overloads (<|>)
45 | (<|>) : Recognise c1 -> Recognise c2 -> Recognise (c1 && c2)
56 | empty : Recognise False
62 | pred : (Char -> Bool) -> Lexer
67 | expect : Recognise c -> Recognise False
68 | expect = Lookahead True
72 | reject : Recognise c -> Recognise False
73 | reject = Lookahead False
75 | %allow_overloads concatMap
81 | concatMap : (a -> Recognise c) -> (xs : List a) ->
82 | Recognise (c && (isCons xs))
83 | concatMap _ [] = rewrite andFalseFalse c in Empty
84 | concatMap f (x :: xs)
85 | = rewrite andTrueNeutral c in
86 | rewrite sym (orSameAndRightNeutral c (isCons xs)) in
87 | SeqEmpty (f x) (Core.concatMap f xs)
90 | data StrLen : Type where
91 | MkStrLen : String -> Nat -> StrLen
95 | show (MkStrLen str n) = str ++ "(" ++ show n ++ ")"
98 | getString : StrLen -> String
99 | getString (MkStrLen str n) = str
102 | strIndex : StrLen -> Nat -> Maybe Char
103 | strIndex (MkStrLen str len) i
104 | = if cast {to = Integer} i >= cast len then Nothing
105 | else Just (assert_total (prim__strIndex str (cast i)))
108 | mkStr : String -> StrLen
109 | mkStr str = MkStrLen str (length str)
112 | strTail : Nat -> StrLen -> StrLen
113 | strTail start (MkStrLen str len)
114 | = MkStrLen (substr start len str) (minus len start)
117 | unpack' : String -> List Char
119 | = case strUncons str of
121 | Just (x, xs) => x :: unpack' xs
126 | scan : Recognise c -> List Char -> List Char -> Maybe (List Char, List Char)
127 | scan Empty tok str = pure (tok, str)
128 | scan Fail tok str = Nothing
129 | scan (Lookahead positive r) tok str
130 | = if isJust (scan r tok str) == positive
131 | then pure (tok, str)
133 | scan (Pred f) tok [] = Nothing
134 | scan (Pred f) tok (c :: str)
136 | then Just (c :: tok, str)
138 | scan (SeqEat r1 r2) tok str
139 | = do (tok', rest) <- scan r1 tok str
141 | assert_total (scan r2 tok' rest)
142 | scan (SeqEmpty r1 r2) tok str
143 | = do (tok', rest) <- scan r1 tok str
145 | scan (Alt r1 r2) tok str
146 | = maybe (scan r2 tok str) Just (scan r1 tok str)
153 | TokenMap : (tokenType : Type) -> Type
154 | TokenMap tokenType = List (Lexer, String -> tokenType)
158 | record TokenData a where
159 | constructor MkToken
165 | Show a => Show (TokenData a) where
166 | show t = show (line t) ++ ":" ++ show (col t) ++ ":" ++ show (tok t)
169 | tokenise : (TokenData a -> Bool) ->
170 | (line : Int) -> (col : Int) ->
171 | List (TokenData a) -> TokenMap a ->
172 | List Char -> (List (TokenData a), (Int, Int, List Char))
173 | tokenise pred line col acc tmap str
174 | = case getFirstToken tmap str of
175 | Just (tok, line', col', rest) =>
178 | then (reverse acc, (line, col, []))
179 | else assert_total (tokenise pred line' col' (tok :: acc) tmap rest)
180 | Nothing => (reverse acc, (line, col, str))
182 | countNLs : List Char -> Nat
183 | countNLs str = List.length (filter (== '\n') str)
185 | getCols : List Char -> Int -> Int
187 | = case span (/= '\n') (reverse x) of
188 | (incol, []) => c + cast (length incol)
189 | (incol, _) => cast (length incol)
191 | getFirstToken : TokenMap a -> List Char ->
192 | Maybe (TokenData a, Int, Int, List Char)
193 | getFirstToken [] str = Nothing
194 | getFirstToken ((lex, fn) :: ts) str
195 | = case scan lex [] str of
196 | Just (tok, rest) => Just (MkToken line col (fn (pack (reverse tok))),
197 | line + cast (countNLs tok),
198 | getCols tok col, rest)
199 | Nothing => getFirstToken ts str
206 | lex : TokenMap a -> String -> (List (TokenData a), (Int, Int, String))
208 | = let (ts, (l, c, str')) = tokenise (const False) 0 0 [] tmap (unpack str) in
209 | (ts, (l, c, pack str'))
212 | lexTo : (TokenData a -> Bool) ->
213 | TokenMap a -> String -> (List (TokenData a), (Int, Int, String))
214 | lexTo pred tmap str
215 | = let (ts, (l, c, str')) = tokenise pred 0 0 [] tmap (unpack str) in
216 | (ts, (l, c, pack str'))