0 | module Text.Regex.Naive
2 | import public Data.Alternative
4 | import public Data.List.Quantifiers
5 | import public Data.List.Lazy
6 | import public Data.List1
8 | import public Data.Vect
10 | import public Syntax.IHateParens.List
12 | import public Text.Regex.Interface
13 | import public Text.Matcher
21 | record RegExp (a : Type)
24 | data RegExpOp : Type -> Type where
25 | Seq : All RegExp tys -> RegExpOp $
All Prelude.id tys
26 | Sel : All RegExp tys -> RegExpOp $
Any Prelude.id tys
28 | WordB : (l, r : Bool) -> RegExpOp ()
29 | WithMatch : RegExp a -> RegExpOp (List Char, a)
31 | Rep1 : RegExp a -> RegExpOp $
List1 a
33 | Edge : LineMode -> EdgeSide -> RegExpOp ()
34 | AnyChar : LineMode -> RegExpOp Char
35 | Sym : (Char -> Maybe a) -> RegExpOp a
38 | record RegExp a where
40 | operation : RegExpOp opTy
46 | Functor RegExp where
47 | map f = {mapping $= (f .)}
49 | splitAt' : All f ls -> All g (ls ++ rs) -> (All g ls, All g rs)
50 | splitAt' [] ps = ([], ps)
51 | splitAt' (_::xs) (p::ps) = mapFst (p::) (splitAt' xs ps)
54 | Applicative RegExp where
55 | pure x = Seq [] `RE` const x
57 | RE (Seq ls) ml <*> RE (Seq rs) mr = Seq (ls ++ rs) `RE` \xx => let (l, r) = splitAt' ls xx in ml l $
mr r
58 | RE (Seq ls) ml <*> r = Seq (ls ++ [r]) `RE` \xx => let (l, r) = splitAt' ls xx in ml l $
head r
59 | l <*> RE (Seq rs) mr = Seq (l :: rs) `RE` \(x::xs) => x $
mr xs
60 | l <*> r = Seq [l, r] `RE` \[l, r] => l r
62 | altWith : All f ls -> (Any g ls -> a) -> (Any g rs -> a) -> Any g (ls ++ rs) -> a
63 | altWith [] _ r an = r an
64 | altWith (x::xs) l r $
Here y = l $
Here y
65 | altWith (x::xs) l r $
There y = altWith xs (l . There) r y
67 | theOnly : Any p [x] -> p x
68 | theOnly $
Here y = y
71 | Alternative RegExp where
72 | empty = Sel [] `RE` \case _ impossible
74 | l <|> RE (Sel []) _ = l
75 | RE (Sel []) _ <|> r = r
76 | RE (Sel ls) ml <|> RE (Sel rs) mr = Sel (ls ++ rs) `RE` altWith ls ml mr
77 | l <|> RE (Sel rs) mr = Sel (l :: rs) `RE` \case Here x => x;
There y => mr y
78 | RE (Sel ls) ml <|> Delay r = Sel (ls ++ [r]) `RE` altWith ls ml theOnly
79 | x <|> y = Sel [x, y] `RE` \case Here x => x;
There (Here x) => x
87 | rawMatch : (multiline : Bool) -> RegExp a -> (orig, str : List Char) -> LazyList (Maybe $
Fin $
S str.length, a)
88 | rawMatch multiline r orig str with (length orig)
89 | _ | origL = go' (origL == str.length) r str where
91 | precDrop : forall a. (xs : List a) -> (n : Fin $
S xs.length) -> (ys : List a ** Fin (S ys.length) -> Fin (S xs.length))
92 | precDrop xs FZ = (
xs ** id)
93 | precDrop (x::xs) (FS i) = let (
ys ** f)
= precDrop xs i in (
ys ** FS . f)
95 | lazyAllAnies : All p xs -> LazyList (Any p xs)
96 | lazyAllAnies [] = []
97 | lazyAllAnies (x::xs) = Here x :: map There (lazyAllAnies xs)
99 | hasntMove : Maybe (Fin $
S n) -> Bool
100 | hasntMove Nothing = True
101 | hasntMove (Just FZ) = True
102 | hasntMove (Just $
FS _) = False
104 | postponeNothings : forall a, b. LazyList (Maybe a, b) -> LazyList (Maybe a, b)
105 | postponeNothings xs = filter (isJust . fst) xs ++ filter (not . isJust . fst) xs
107 | isText : LineMode -> Bool
109 | isText Line = False
111 | prev : (curr : List Char) -> Maybe Char
113 | let currL = length curr
114 | let True = origL > currL | False => Nothing
115 | let prevPos = origL `minus` S currL
116 | let Yes _ = inBounds prevPos orig | No _ => Nothing
117 | Just $
index prevPos orig
118 | go' : forall a. Bool -> RegExp a -> (str : List Char) -> LazyList (Maybe $
Fin $
S str.length, a)
119 | go : forall a. Bool -> RegExpOp a -> (str : List Char) -> LazyList (Maybe $
Fin $
S str.length, a)
120 | go' atStart (RE op ma) cs = map @{Compose} ma $
go atStart op cs
121 | go atStart (Seq []) cs = pure (Nothing, [])
122 | go atStart (Seq $
r::rs) cs = go' atStart r cs >>= \(idx, x) => do
123 | let (
ds ** f)
= precDrop cs $
fromMaybe FZ idx
124 | let convIdx : Maybe (Fin $
S ds.length) -> Maybe (Fin $
S cs.length)
125 | convIdx $
Just i = Just $
f i
126 | convIdx Nothing = idx $> f FZ
127 | postponeNothings $
bimap convIdx (x::) <$> go (atStart && hasntMove idx) (Seq rs) ds
128 | go atStart (Sel rs) cs = postponeNothings $
lazyAllAnies rs >>= \r => go' atStart (assert_smaller rs $
pushOut r) cs
129 | go atStart (WordB l r) cs = do let wL = map (charClass Word) (prev cs)
130 | let wR = map (charClass Word) (head' cs)
131 | let lB = wL /= Just True && wR /= Just False
132 | let rB = wL /= Just False && wR /= Just True
133 | flip whenT (Just 0, ()) $
l && lB || r && rB || not l && not r && not lB && not rB
134 | go atStart (WithMatch rs) cs = go' atStart rs cs <&> \(idx, x) => (idx, maybe (const []) (\i => take (finToNat i)) idx cs, x)
135 | go atStart rr@(Rep1 r) cs = do (Just idx@(FS _), x) <- go' atStart r cs | (idx, x) => pure (idx, singleton x)
136 | let (
ds ** f)
= precDrop cs idx
137 | let sub = filter (isJust . fst) $
bimap (map f) ((x:::) . toList) <$> go False rr (assert_smaller cs ds)
138 | sub ++ [(Just idx, singleton x)]
139 | go _ (Edge _ End) [] = pure (Just FZ, ())
140 | go _ (Edge Line End) (c::cs) = whenT (multiline && isNL c) (Just FZ, ())
141 | go _ (Edge Text End) cs = empty
142 | go True (Edge _ Start) cs = pure (Just FZ, ())
143 | go False (Edge Line Start) cs = whenTs multiline $
whenJs (prev cs) $
flip whenT (Just FZ, ()) . isNL
144 | go False (Edge Text Start) cs = empty
145 | go _ (AnyChar m) [] = empty
146 | go _ (AnyChar m) (c::cs) = whenT (not multiline || isText m || not (isNL c)) (Just 1, c)
147 | go _ (Sym _) [] = empty
148 | go _ (Sym f) (c::cs) = fromList $
toList $
(Just 1,) <$> f c
151 | rawMatchIn : (multiline : Bool) -> RegExp a -> (orig, curr : List Char) -> LazyList (List Char, List Char, a, List Char)
152 | rawMatchIn multiline r orig cs = lazySplits cs >>= \(pre, cs) => rawMatch multiline r orig cs <&> \(idx, x) =>
153 | let (mid, post) = splitAt (finToNat $
fromMaybe FZ idx) cs in (asList pre, mid, x, post)
155 | lazySplits : forall a. List a -> LazyList (SnocList a, List a)
156 | lazySplits [] = pure ([<], [])
157 | lazySplits xxs@(x::xs) = ([<], xxs) :: (mapFst (:< x) <$> lazySplits xs)
160 | rawMatchAll : (multiline : Bool) -> RegExp a -> (orig, curr : List Char) -> LazyList (List (List Char, List Char, a), List Char)
161 | rawMatchAll multiline r orig cs = case rawMatchIn multiline r orig cs of
162 | [] => pure ([], cs)
163 | xs => xs >>= \(pre, ms, mx, post) => case (null ms, post) of
164 | (True, []) => pure ([(pre, ms, mx)], post)
165 | (True, p::post) => rawMatchAll multiline r orig (assert_smaller cs post) <&> mapFst ((pre, ms, mx) ::) . addPreChar p
166 | (False, post) => rawMatchAll multiline r orig (assert_smaller cs post) <&> mapFst ((pre, ms, mx) ::)
168 | addPreChar : forall a. Char -> (List (List Char, List Char, a), List Char) -> (List (List Char, List Char, a), List Char)
169 | addPreChar p ([] , post) = ([] , p::post)
170 | addPreChar p ((ppre, r)::rs, post) = ((p :: ppre, r) :: rs, post)
179 | [Naive] Regex RegExp where
180 | sym' = (`RE` id) . Sym
181 | anyChar = (`RE` id) . AnyChar
182 | edge = (`RE` id) .: Edge
183 | wordBoundary = (`RE` id) .: WordB
184 | withMatch = (`RE` mapFst pack) . WithMatch
185 | all = (`RE` id) . Seq
186 | exists [x] = map Here x
187 | exists xs = (`RE` id) $
Sel xs
188 | rep1 = (`RE` id) . Rep1
190 | public export %hint RegexRegExp : Regex RegExp;
RegexRegExp = Naive
195 | [Naive] TextMatcher RegExp where
196 | matchWhole' multiline r str = do
197 | let str = unpack str
198 | (idx, x) <- head' $
rawMatch multiline r str str
199 | guard (fromMaybe FZ idx == last) $> x
200 | matchInside' multiline r str = do
201 | let str = unpack str
202 | head' (rawMatchIn multiline r str str) <&> \(pre, mid, x, post) => MkOneMatchInside (pack pre) (pack mid) x (pack post)
203 | matchAll' multiline r str = let ustr = unpack str in maybe (Stop str) (uncurry conv) $
head' $
rawMatchAll multiline r ustr ustr where
204 | conv : List (List Char, List Char, a) -> (end : List Char) -> AllMatchedInside a
205 | conv stmids end = foldr (\(pre, ms, mx), ami => Match (pack pre) (pack ms) mx ami) (Stop $
pack end) stmids
207 | public export %hint TextMatcherRegExp : TextMatcher RegExp;
TextMatcherRegExp = Naive