1 | module Data.ByteString.Search.KnuthMorrisPratt
3 | import Data.ByteString.Search.Internal.Utils
5 | import Data.Array.Core
6 | import Data.Array.Mutable
8 | import Data.ByteString
9 | import Data.Linear.Ref1
12 | %hide Data.Buffer.Core.get
13 | %hide Data.Buffer.Core.set
24 | matcher overlap pat chunks t =
25 | let bords # t := kmpBorders pat t
26 | searcher' # t := searcher Z Z pat chunks Lin bords overlap t
27 | in (searcher' <>> []) # t
30 | findMatch : (prior : Nat)
33 | -> (pat : ByteString)
34 | -> (strs : List ByteString)
35 | -> (final : SnocList Nat)
36 | -> (bords : MArray s (S (length pat)) Nat)
38 | -> F1 s (SnocList Nat)
39 | findMatch _ _ _ _ [] final _ _ t =
41 | findMatch prior pati stri pat strs@(str::rest) final bords overlap t =
42 | let patlen := length pat
43 | in case pati == patlen of
47 | case tryNatToFin patlen of
49 | (assert_total $
idris_crash "Data.ByteString.Search.KnuthMorrisPratt.matcher.findMatch: can't index into ByteString") # t
51 | let final' := minus (plus prior stri) patlen
52 | ami # t := get bords patlen' t
55 | let final'' := final :< final'
56 | in assert_total (checkHead prior stri pat strs final'' bords overlap t)
58 | let final'' := final :< final'
59 | in assert_total (findMatch prior ami stri pat strs final'' bords overlap t)
61 | let final' := minus (plus prior stri) patlen
62 | final'' := final :< final'
63 | in assert_total (checkHead prior stri pat strs final'' bords overlap t)
65 | let strlen := length str
66 | in case stri == strlen of
68 | assert_total (searcher (plus prior strlen) pati pat rest final bords overlap t)
70 | let pati' := index pati pat
73 | (assert_total $
idris_crash "Data.ByteString.Search.KnuthMorrisPratt.matcher.findMatch: can't index into ByteString") # t
75 | let stri' := index stri str
78 | (assert_total $
idris_crash "Data.ByteString.Search.KnuthMorrisPratt.matcher.findMatch: can't index into ByteString") # t
80 | case stri'' == pati'' of
82 | assert_total (findMatch prior (S pati) (S stri) pat strs final bords overlap t)
84 | case tryNatToFin pati of
86 | (assert_total $
idris_crash "Data.ByteString.Search.KnuthMorrisPratt.matcher.findMatch: can't convert Nat to Fin") # t
88 | let pati'''' # t := get bords pati''' t
89 | in case pati'''' == Z of
91 | assert_total (checkHead prior (S stri) pat strs final bords overlap t)
93 | assert_total (findMatch prior pati'''' stri pat strs final bords overlap t)
94 | checkHead : (prior : Nat)
96 | -> (pat : ByteString)
97 | -> (strs : List ByteString)
98 | -> (final : SnocList Nat)
99 | -> (bords : MArray s (S (length pat)) Nat)
100 | -> (overlap : Bool)
101 | -> F1 s (SnocList Nat)
102 | checkHead _ _ _ [] final _ _ t =
104 | checkHead prior stri pat strs@(str::rest) final bords overlap t =
105 | let strlen := length str
106 | in case stri == strlen of
108 | assert_total (searcher (plus prior strlen) Z pat rest final bords overlap t)
110 | let stri' := index stri str
113 | (assert_total $
idris_crash "Data.ByteString.Search.KnuthMorrisPratt.matcher.checkHead: can't index into ByteString") # t
115 | let patzero := index Z pat
118 | (assert_total $
idris_crash "Data.ByteString.Search.KnuthMorrisPratt.matcher.checkHead: can't index into ByteString") # t
120 | case stri'' == patzero' of
122 | assert_total (findMatch prior (S Z) (S stri) pat strs final bords overlap t)
124 | assert_total (checkHead prior (S stri) pat strs final bords overlap t)
125 | searcher : (prior : Nat)
127 | -> (pat : ByteString)
128 | -> (strs : List ByteString)
129 | -> (final : SnocList Nat)
130 | -> (bords : MArray s (S (length pat)) Nat)
131 | -> (overlap : Bool)
132 | -> F1 s (SnocList Nat)
133 | searcher _ _ _ [] final _ _ t =
135 | searcher prior Z pat strs final bords overlap t =
136 | assert_total (checkHead prior Z pat strs final bords overlap t)
137 | searcher prior patpos pat strs final bords overlap t =
138 | assert_total (findMatch prior patpos Z pat strs final bords overlap t)
166 | matchKMP : (pat : ByteString)
167 | -> (target : ByteString)
168 | -> {0 prfpat : So (not $
null pat)}
169 | -> {0 prftarget : So (not $
null target)}
171 | matchKMP pat target {prfpat} {prftarget} t =
172 | matcher False pat [target] t
197 | indicesKMP : (pat : ByteString)
198 | -> (target : ByteString)
199 | -> {0 prfpat : So (not $
null pat)}
200 | -> {0 prftarget : So (not $
null target)}
202 | indicesKMP pat target {prfpat} {prftarget} t =
203 | matcher True pat [target] t
217 | breakKMP : (pat : ByteString)
218 | -> (target : ByteString)
219 | -> {0 prfpat : So (not $
null pat)}
220 | -> {0 prftarget : So (not $
null target)}
221 | -> {0 prflength : So ((length target) >= (length pat))}
222 | -> F1 s (ByteString, ByteString)
223 | breakKMP pat target {prfpat} {prftarget} {prflength} t =
224 | let matcher' # t := matcher False pat [target] t
225 | in case matcher' of
227 | (target, empty) # t
229 | let target' := splitAt (cast {to=Nat} i) target
232 | (assert_total $
idris_crash "Data.ByteString.Search.KnuthMorrisPratt.breakKMP: can't split ByteString") # t
249 | breakAfterKMP : (pat : ByteString)
250 | -> (target : ByteString)
251 | -> {0 prfpat : So (not $
null pat)}
252 | -> {0 prftarget : So (not $
null target)}
253 | -> {0 prflength : So ((length target) >= (length pat))}
254 | -> F1 s (ByteString, ByteString)
255 | breakAfterKMP pat target {prfpat} {prftarget} {prflength} t =
256 | let matcher' # t := matcher False pat [target] t
257 | in case matcher' of
259 | (target, empty) # t
261 | let target' := splitAt (plus (cast {to=Nat} i) (length pat)) target
264 | (assert_total $
idris_crash "Data.ByteString.Search.KnuthMorrisPratt.breakAfterKMP: can't split ByteString") # t
283 | splitKeepFrontKMP : (pat : ByteString)
284 | -> (target : ByteString)
285 | -> {0 prfpat : So (not $
null pat)}
286 | -> {0 prftarget : So (not $
null target)}
287 | -> {0 prflength : So ((length target) >= (length pat))}
288 | -> F1 s (List ByteString)
289 | splitKeepFrontKMP pat target {prfpat} {prftarget} {prflength} t =
290 | let splitter' # t := splitter pat target Lin t
291 | in (splitter' <>> []) # t
293 | psSplitter : (pat : ByteString)
294 | -> (target : ByteString)
295 | -> (final : SnocList ByteString)
296 | -> F1 s (SnocList ByteString)
297 | psSplitter pat target final t =
298 | let matcher' # t := matcher False pat [(drop (length pat) target)] t
299 | in case matcher' of
301 | let final' := final :< target
304 | let length' := plus (cast {to=Nat} i) (length pat)
305 | final' := final :< (take length' target)
306 | in assert_total (psSplitter pat (drop length' target) final' t)
307 | splitter : (pat : ByteString)
308 | -> (target : ByteString)
309 | -> (final : SnocList ByteString)
310 | -> F1 s (SnocList ByteString)
311 | splitter pat target final t =
312 | let matcher' # t := matcher False pat [target] t
313 | in case matcher' of
315 | let final' := final :< target
320 | assert_total (psSplitter pat target final t)
322 | let final' := final :< (take (cast {to=Nat} i) target)
323 | in assert_total (psSplitter pat (drop (cast {to=Nat} i) target) final' t)
346 | splitKeepEndKMP : (pat : ByteString)
347 | -> (target : ByteString)
348 | -> {0 prfpat : So (not $
null pat)}
349 | -> {0 prftarget : So (not $
null target)}
350 | -> {0 prflength : So ((length target) >= (length pat))}
351 | -> F1 s (List ByteString)
352 | splitKeepEndKMP pat target {prfpat} {prftarget} {prflength} t =
353 | let splitter' # t := splitter pat target Lin t
354 | in (splitter' <>> []) # t
356 | splitter : (pat : ByteString)
357 | -> (target : ByteString)
358 | -> (final : SnocList ByteString)
359 | -> F1 s (SnocList ByteString)
360 | splitter pat target final t =
361 | let matcher' # t := matcher False pat [target] t
362 | in case matcher' of
364 | let final' := final :< target
367 | let length' := plus (cast {to=Nat} i) (length pat)
368 | final' := final :< (take length' target)
369 | in assert_total (splitter pat (drop length' target) final' t)
394 | splitDropKMP : (pat : ByteString)
395 | -> (target : ByteString)
396 | -> {0 prfpat : So (not $
null pat)}
397 | -> {0 prftarget : So (not $
null target)}
398 | -> {0 prflength : So ((length target) >= (length pat))}
399 | -> F1 s (List ByteString)
400 | splitDropKMP pat target {prfpat} {prftarget} {prflength} t =
401 | let splitter' # t := splitter pat target Lin t
402 | in (splitter' <>> []) # t
404 | splitter : (pat : ByteString)
405 | -> (target : ByteString)
406 | -> (final : SnocList ByteString)
407 | -> F1 s (SnocList ByteString)
408 | splitter pat target final t =
409 | let matcher' # t := matcher False pat [target] t
410 | in case matcher' of
412 | let final' := final :< target
415 | let length' := plus (cast {to=Nat} i) (length pat)
416 | final' := final :< (take (cast {to=Nat} i) target)
417 | in assert_total (splitter pat (drop length' target) final' t)
440 | replaceKMP : (pat : ByteString)
441 | -> (sub : ByteString)
442 | -> (target : ByteString)
443 | -> {0 prfpat : So (not $
null pat)}
444 | -> {0 prftarget : So (not $
null target)}
445 | -> {0 prflength : So ((length target) >= (length pat))}
446 | -> F1 s (List ByteString)
447 | replaceKMP pat sub target {prfpat} {prftarget} {prflength} t =
448 | let replacer' # t := replacer pat sub target Lin t
449 | in (replacer' <>> []) # t
451 | replacer : (pat : ByteString)
452 | -> (sub : ByteString)
453 | -> (target : ByteString)
454 | -> (final : SnocList ByteString)
455 | -> F1 s (SnocList ByteString)
456 | replacer pat sub target final t =
457 | let matcher' # t := matcher False pat [target] t
458 | in case matcher' of
460 | let final' := final :< target
467 | assert_total (replacer pat sub (drop (length pat) target) final t)
469 | let final' := final :< sub
470 | in assert_total (replacer pat sub (drop (length pat) target) final') t
474 | let length' := plus (cast {to=Nat} i) (length pat)
475 | final' := final :< (take (cast {to=Nat} i) target)
476 | in assert_total (replacer pat sub (drop length' target) final' t)
478 | let length' := plus (cast {to=Nat} i) (length pat)
479 | final' := final :< (take (cast {to=Nat} i) target) :< sub
480 | in assert_total (replacer pat sub (drop length' target) final' t)