0 | module Libraries.Text.Lexer.Tokenizer
4 | import Libraries.Text.Lexer.Core
5 | import Libraries.Text.PrettyPrint.Prettyprinter
7 | import public Libraries.Control.Delayed
8 | import public Libraries.Text.Bounded
14 | data Tokenizer : (tokenType : Type) -> Type where
15 | Match : Lexer -> (String -> tokenType) -> Tokenizer tokenType
16 | Compose : (begin : Lexer) ->
17 | (mapBegin : String -> tokenType) ->
18 | (tagger : String -> tag) ->
19 | (middle : Inf (tag -> Tokenizer tokenType)) ->
20 | (end : tag -> Lexer) ->
21 | (mapEnd : String -> tokenType) ->
23 | Alt : Tokenizer tokenType -> Lazy (Tokenizer tokenType) -> Tokenizer tokenType
27 | (<|>) : Tokenizer t -> Lazy (Tokenizer t) -> Tokenizer t
32 | match : Lexer -> (String -> a) -> Tokenizer a
39 | compose : (begin : Lexer) ->
40 | (mapBegin : String -> a) ->
41 | (tagger : String -> tag) ->
42 | (middle : Inf (tag -> Tokenizer a)) ->
43 | (end : tag -> Lexer) ->
44 | (mapEnd : String -> a) ->
52 | data StopReason = EndInput | NoRuleApply | ComposeNotClosing (Int, Int) (Int, Int)
55 | Show StopReason where
56 | show EndInput = "EndInput"
57 | show NoRuleApply = "NoRuleApply"
58 | show (ComposeNotClosing start end) = "ComposeNotClosing " ++ show start ++ " " ++ show end
61 | Pretty Void StopReason where
62 | pretty EndInput = pretty "EndInput"
63 | pretty NoRuleApply = pretty "NoRuleApply"
64 | pretty (ComposeNotClosing start end) = "ComposeNotClosing" <++> pretty (show start) <++> pretty (show end)
68 | (line, col : Int) -> List (WithBounds a) ->
70 | (List (WithBounds a), (StopReason, Int, Int, List Char))
71 | tokenise reject tokenizer line col acc [] = (reverse acc, EndInput, (line, col, []))
72 | tokenise reject tokenizer line col acc str
73 | = case scan reject [] str of
74 | Just _ => (reverse acc, (EndInput, line, col, str))
75 | Nothing => case getFirstMatch tokenizer str of
76 | Right (toks, line', col', rest) =>
78 | assert_total (tokenise reject tokenizer line' col' (toks ++ acc) rest)
79 | Left reason => (reverse acc, reason, (line, col, str))
81 | countNLs : List Char -> Nat
82 | countNLs str = List.length (filter (== '\n') str)
84 | getCols : List Char -> Int -> Int
86 | = case span (/= '\n') x of
87 | (incol, []) => c + cast (length incol)
88 | (incol, _) => cast (length incol)
93 | getNext : (lexer : Lexer) -> (line, col : Int) -> (input : List Char) -> Maybe (String, Int, Int, List Char)
94 | getNext lexer line col str =
95 | let Just (token, rest) = scan lexer [] str
97 | line' = line + cast (countNLs token)
98 | col' = getCols token col
99 | tokenStr = fastPack $
reverse token
100 | in pure (tokenStr, line', col', rest)
102 | getFirstMatch : Tokenizer a -> List Char ->
103 | Either StopReason (List (WithBounds a), Int, Int, List Char)
104 | getFirstMatch (Match lex fn) str
105 | = let Just (tok, line', col', rest) = getNext lex line col str
106 | | _ => Left NoRuleApply
107 | tok' = MkBounded (fn tok) False (MkBounds line col line' col')
108 | in Right ([tok'], line', col', rest)
109 | getFirstMatch (Compose begin mapBegin tagger middleFn endFn mapEnd) str
110 | = let Just (beginTok', line', col' , rest) = getNext begin line col str
111 | | Nothing => Left NoRuleApply
112 | tag = tagger beginTok'
113 | middle = middleFn tag
115 | beginTok'' = MkBounded (mapBegin beginTok') False (MkBounds line col line' col')
116 | (midToks, (reason, line'', col'', rest'')) =
117 | assert_total $
tokenise end middle line' col' [] rest
119 | ComposeNotClosing {} => Left reason
120 | _ => let Just (endTok', lineEnd, colEnd, restEnd) =
121 | getNext end line'' col'' rest''
122 | | _ => Left $
ComposeNotClosing (line, col) (line', col')
123 | endTok'' = MkBounded (mapEnd endTok') False (MkBounds line'' col'' lineEnd colEnd)
124 | in Right ([endTok''] ++ reverse midToks ++ [beginTok''], lineEnd, colEnd, restEnd)
125 | getFirstMatch (Alt t1 t2) str
126 | = case getFirstMatch t1 str of
127 | Right result => Right result
128 | Left reason@(ComposeNotClosing {}) => Left reason
129 | Left _ => getFirstMatch t2 str
135 | (List (WithBounds a), (StopReason, Int, Int, String))
136 | lexTo reject tokenizer str
137 | = let (ts, reason, (l, c, str')) =
138 | tokenise reject tokenizer 0 0 [] (fastUnpack str) in
139 | (ts, reason, (l, c, fastPack str'))
145 | lex : Tokenizer a -> String -> (List (WithBounds a), (StopReason, Int, Int, String))
146 | lex tokenizer str = lexTo (pred $
const False) tokenizer str