0 | module Libraries.Text.Lexer.Core
5 | import public Libraries.Control.Delayed
6 | import public Libraries.Text.Bounded
14 | data Recognise : (consumes : Bool) -> Type where
15 | Empty : Recognise False
17 | EOF : Recognise False
18 | Lookahead : (positive : Bool) -> Recognise c -> Recognise False
19 | Pred : (Char -> Bool) -> Recognise True
20 | SeqEat : Recognise True -> Inf (Recognise e) -> Recognise True
21 | SeqEmpty : Recognise e1 -> Recognise e2 -> Recognise (e1 || e2)
22 | SeqSame : Recognise e -> Recognise e -> Recognise e
23 | Alt : Recognise e1 -> Recognise e2 -> Recognise (e1 && e2)
28 | Lexer = Recognise True
33 | (<+>) : {c1 : Bool} ->
34 | Recognise c1 -> inf c1 (Recognise c2) -> Recognise (c1 || c2)
35 | (<+>) {c1 = False} = SeqEmpty
36 | (<+>) {c1 = True} = SeqEat
41 | (<|>) : Recognise c1 -> Recognise c2 -> Recognise (c1 && c2)
51 | empty : Recognise False
56 | eof : Recognise False
61 | pred : (Char -> Bool) -> Lexer
66 | expect : Recognise c -> Recognise False
67 | expect = Lookahead True
71 | reject : Recognise c -> Recognise False
72 | reject = Lookahead False
78 | concatMap : (a -> Recognise c) -> (xs : List a) -> Recognise (isCons xs && c)
79 | concatMap _ [] = Empty
80 | concatMap f (x :: []) = f x
81 | concatMap f (x :: xs@(_ :: _)) = SeqSame (f x) (concatMap f xs)
83 | data StrLen : Type where
84 | MkStrLen : String -> Nat -> StrLen
86 | getString : StrLen -> String
87 | getString (MkStrLen str n) = str
89 | strIndex : StrLen -> Nat -> Maybe Char
90 | strIndex (MkStrLen str len) i
91 | = if cast {to = Integer} i >= cast len then Nothing
92 | else Just (assert_total (prim__strIndex str (cast i)))
94 | mkStr : String -> StrLen
95 | mkStr str = MkStrLen str (length str)
97 | strTail : Nat -> StrLen -> StrLen
98 | strTail start (MkStrLen str len)
99 | = MkStrLen (substr start len str) (minus len start)
104 | scan : Recognise c -> List Char -> List Char -> Maybe (List Char, List Char)
105 | scan Empty tok str = pure (tok, str)
106 | scan Fail tok str = Nothing
107 | scan EOF tok [] = Just (tok,[])
108 | scan EOF tok (_::_) = Nothing
109 | scan (Lookahead positive r) tok str
110 | = if isJust (scan r tok str) == positive
111 | then pure (tok, str)
113 | scan (Pred f) tok [] = Nothing
114 | scan (Pred f) tok (c :: str)
116 | then Just (c :: tok, str)
118 | scan (SeqEat r1 r2) tok str
119 | = do (tok', rest) <- scan r1 tok str
121 | assert_total (scan r2 tok' rest)
122 | scan (SeqEmpty r1 r2) tok str
123 | = do (tok', rest) <- scan r1 tok str
125 | scan (SeqSame r1 r2) tok str
126 | = do (tok', rest) <- scan r1 tok str
128 | scan (Alt r1 r2) tok str
129 | = maybe (scan r2 tok str) Just (scan r1 tok str)
136 | TokenMap : (tokenType : Type) -> Type
137 | TokenMap tokenType = List (Lexer, String -> tokenType)
139 | tokenise : (a -> Bool) ->
140 | (line : Int) -> (col : Int) ->
141 | List (WithBounds a) -> TokenMap a ->
142 | List Char -> (List (WithBounds a), (Int, Int, List Char))
143 | tokenise pred line col acc tmap str
144 | = case getFirstToken tmap str of
145 | Just (tok, line', col', rest) =>
148 | then (reverse acc, (line, col, []))
149 | else assert_total (tokenise pred line' col' (tok :: acc) tmap rest)
150 | Nothing => (reverse acc, (line, col, str))
152 | countNLs : List Char -> Nat
153 | countNLs str = List.length (filter (== '\n') str)
155 | getCols : List Char -> Int -> Int
157 | = case span (/= '\n') x of
158 | (incol, []) => c + cast (length incol)
159 | (incol, _) => cast (length incol)
161 | getFirstToken : TokenMap a -> List Char ->
162 | Maybe (WithBounds a, Int, Int, List Char)
163 | getFirstToken [] str = Nothing
164 | getFirstToken ((lex, fn) :: ts) str
165 | = case scan lex [] str of
166 | Just (tok, rest) =>
167 | let line' = line + cast (countNLs tok)
168 | col' = getCols tok col in
169 | Just (MkBounded (fn (fastPack (reverse tok))) False (MkBounds line col line' col'),
171 | Nothing => getFirstToken ts str
174 | lexTo : (a -> Bool) ->
175 | TokenMap a -> String -> (List (WithBounds a), (Int, Int, String))
176 | lexTo pred tmap str
177 | = let (ts, (l, c, str')) = tokenise pred 0 0 [] tmap (unpack str) in
178 | (ts, (l, c, fastPack str'))
185 | lex : TokenMap a -> String -> (List (WithBounds a), (Int, Int, String))
186 | lex = lexTo (const False)