1 | module Data.ByteString.Search.BoyerMoore
3 | import Data.ByteString.Search.Internal.Utils
5 | import Data.Array.Core
6 | import Data.Array.Mutable
8 | import Data.ByteString
9 | import Data.Linear.Ref1
12 | %hide Data.Buffer.Core.get
13 | %hide Data.Buffer.Core.set
24 | matcher overlap pat target t =
25 | case length pat == S Z of
27 | let patzero := index Z pat
30 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher: can't index into ByteString") # t
32 | let headelem := elemIndex patzero' pat
35 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher: byte does not appear in ByteString") # t
37 | ((cast {to=Int} headelem') :: []) # t
39 | case decSo $
(not $
null pat) of
41 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher: pattern is null") # t
43 | let occurrencesarr # t := occurrences pat {prf=patprf} t
44 | suffixshiftsarr # t := suffixShifts pat {prf=patprf} t
45 | matches # t := checkEnd (cast {to=Int} (minus (length pat) (S 0))) pat target Lin occurrencesarr suffixshiftsarr overlap t
46 | in (matches <>> []) # t
49 | checkEnd : (stri : Int)
50 | -> (pat : ByteString)
51 | -> (target : ByteString)
52 | -> (final : SnocList Int)
53 | -> (occurrencesarr : MArray s 256 Int)
54 | -> (suffixshiftsarr : MArray s (length pat) Int)
56 | -> F1 s (SnocList Int)
57 | checkEnd stri pat target final occurrencesarr suffixshiftarr overlap t =
58 | let patend := (cast {to=Int} (length pat)) - 1
59 | strend := (cast {to=Int} (length target)) - 1
60 | in case strend < stri of
64 | let target' := index (cast {to=Nat} stri) target
67 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.checkEnd: can't index into ByteString") # t
69 | let pat' := index (cast {to=Nat} patend) pat
72 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.checkEnd: can't index into ByteString") # t
74 | case target'' == pat'' of
76 | assert_total (findMatch (stri - patend) (patend - 1) pat target final occurrencesarr suffixshiftarr overlap t)
78 | case tryNatToFin (cast {to=Nat} target'') of
80 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.checkEnd: can't convert Nat to Fin") # t
82 | let target'''' # t := get occurrencesarr target''' t
83 | newtarget := stri + patend + target''''
84 | in assert_total (checkEnd newtarget pat target final occurrencesarr suffixshiftarr overlap t)
85 | findMatch : (diff : Int)
87 | -> (pat : ByteString)
88 | -> (target : ByteString)
89 | -> (final : SnocList Int)
90 | -> (occurrencesarr : MArray s 256 Int)
91 | -> (suffixshiftsarr : MArray s (length pat) Int)
93 | -> F1 s (SnocList Int)
94 | findMatch diff pati pat target final occurrencesarr suffixshiftarr overlap t =
95 | let diffpati := index (cast {to=Nat} (diff + pati)) target
98 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.findMatch: can't index into ByteString") # t
100 | let pati' := index (cast {to=Nat} pati) pat
103 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.findMatch: can't index into ByteString") # t
105 | case diffpati' == pati'' of
109 | let final' := final :< diff
112 | case tryNatToFin Z of
114 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.findMatch: can't convert Nat to Fin") # t
116 | let skip # t := get suffixshiftarr zero t
117 | diff' := diff + skip
118 | maxdiff := minus (length target) (length pat)
119 | in case (cast {to=Int} maxdiff) < diff' of
123 | case skip == (cast {to=Int} (length pat)) of
125 | assert_total (checkEnd (diff' + ((cast {to=Int} (length pat)) - 1)) pat target final' occurrencesarr suffixshiftarr overlap t)
127 | assert_total (afterMatch diff' ((cast {to=Int} (length pat)) - 1) pat target final' occurrencesarr suffixshiftarr overlap t)
129 | let skip := length pat
130 | diff' := diff + (cast {to=Int} skip)
131 | maxdiff := minus (length target) (length pat)
132 | in case (cast {to=Int} maxdiff) < diff' of
136 | case skip == (length pat) of
138 | assert_total (checkEnd (diff' + ((cast {to=Int} (length pat)) - 1)) pat target final' occurrencesarr suffixshiftarr overlap t)
140 | assert_total (afterMatch diff' ((cast {to=Int} (length pat)) - 1) pat target final' occurrencesarr suffixshiftarr overlap t)
142 | assert_total (findMatch diff (pati - 1) pat target final occurrencesarr suffixshiftarr overlap t)
144 | case tryNatToFin (cast {to=Nat} diffpati') of
146 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.findMatch: can't convert Nat to Fin") # t
148 | case tryNatToFin (cast {to=Nat} pati) of
150 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.findMatch: can't convert Nat to Fin") # t
152 | let occur # t := get occurrencesarr diffpati'' t
153 | suff # t := get suffixshiftarr pati' t
154 | diff' := diff + (max (pati + occur) suff)
155 | maxdiff := minus (length target) (length pat)
156 | in case (cast {to=Int} maxdiff) < diff' of
160 | assert_total (checkEnd (diff' + ((cast {to=Int} (length pat)) - 1)) pat target final occurrencesarr suffixshiftarr overlap t)
161 | afterMatch : (diff : Int)
163 | -> (pat : ByteString)
164 | -> (target : ByteString)
165 | -> (final : SnocList Int)
166 | -> (occurrencesarr : MArray s 256 Int)
167 | -> (suffixshiftsarr : MArray s (length pat) Int)
168 | -> (overlap : Bool)
169 | -> F1 s (SnocList Int)
170 | afterMatch diff pati pat target final occurrencesarr suffixshiftarr overlap t =
171 | let diffpati := index (cast {to=Nat} (diff + pati)) target
172 | in case diffpati of
174 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.afterMatch: can't index into ByteString") # t
176 | let pati' := index (cast {to=Nat} pati) pat
179 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.afterMatch: can't index into ByteString") # t
181 | case diffpati' == pati'' of
185 | case tryNatToFin Z of
187 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.afterMatch: can't convert Nat to Fin") # t
189 | let skip # t := get suffixshiftarr zero t
190 | kept := (cast {to=Int} (length pat)) - skip
191 | in case pati == kept of
193 | let final' := final :< diff
194 | diff' := diff + skip
195 | maxdiff := minus (length target) (length pat)
196 | in case (cast {to=Int} maxdiff) < diff' of
200 | assert_total (afterMatch diff' ((cast {to=Int} (length pat)) - 1) pat target final' occurrencesarr suffixshiftarr overlap t)
202 | assert_total (afterMatch diff (pati - 1) pat target final occurrencesarr suffixshiftarr overlap t)
204 | let kept := minus (length pat) (length pat)
205 | in case pati == (cast {to=Int} kept) of
207 | let final' := final :< diff
209 | diff' := diff + (cast {to=Int} skip)
210 | maxdiff := minus (length target) (length pat)
211 | in case (cast {to=Int} maxdiff) < diff' of
215 | assert_total (afterMatch diff' ((cast {to=Int} (length pat)) - 1) pat target final' occurrencesarr suffixshiftarr overlap t)
217 | assert_total (afterMatch diff (pati - 1) pat target final occurrencesarr suffixshiftarr overlap t)
219 | case pati == ((cast {to=Int} (length pat)) - 1) of
221 | case tryNatToFin (cast {to=Nat} diffpati') of
223 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.afterMatch: can't convert Nat to Fin") # t
225 | let occur # t := get occurrencesarr diffpati'' t
226 | occur' := diff + (2 * ((cast {to=Int} (length pat)) - 1)) + occur
227 | in assert_total (checkEnd occur' pat target final occurrencesarr suffixshiftarr overlap t)
229 | case tryNatToFin (cast {to=Nat} diffpati') of
231 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.afterMatch: can't convert Nat to Fin") # t
233 | case tryNatToFin (cast {to=Nat} pati) of
235 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.matcher.afterMatch: can't convert Nat to Fin") # t
237 | let occur # t := get occurrencesarr diffpati'' t
238 | goodshift # t := get suffixshiftarr pati'' t
239 | badshift := pati + occur
240 | diff' := diff + (max badshift goodshift)
241 | maxdiff := minus (length target) (length pat)
242 | in case (cast {to=Int} maxdiff) < diff' of
246 | assert_total (checkEnd (diff + ((cast {to=Int} (length pat)) - 1)) pat target final occurrencesarr suffixshiftarr overlap t)
272 | matchBM : (pat : ByteString)
273 | -> (target : ByteString)
274 | -> {0 prfpat : So (not $
null pat)}
275 | -> {0 prftarget : So (not $
null target)}
276 | -> {0 prflength : So ((length target) >= (length pat))}
278 | matchBM pat target {prfpat} {prftarget} {prflength} t =
279 | matcher False pat target t
303 | indicesBM : (pat : ByteString)
304 | -> (target : ByteString)
305 | -> {0 prfpat : So (not $
null pat)}
306 | -> {0 prftarget : So (not $
null target)}
307 | -> {0 prflength : So ((length target) >= (length pat))}
309 | indicesBM pat target {prfpat} {prftarget} {prflength} t =
310 | matcher True pat target t
324 | breakBM : (pat : ByteString)
325 | -> (target : ByteString)
326 | -> {0 prfpat : So (not $
null pat)}
327 | -> {0 prftarget : So (not $
null target)}
328 | -> {0 prflength : So ((length target) >= (length pat))}
329 | -> F1 s (ByteString, ByteString)
330 | breakBM pat target {prfpat} {prftarget} {prflength} t =
331 | let matcher' # t := matcher False pat target t
332 | in case matcher' of
334 | (target, empty) # t
336 | let target' := splitAt (cast {to=Nat} i) target
339 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.breakBM: can't split ByteString") # t
356 | breakAfterBM : (pat : ByteString)
357 | -> (target : ByteString)
358 | -> {0 prfpat : So (not $
null pat)}
359 | -> {0 prftarget : So (not $
null target)}
360 | -> {0 prflength : So ((length target) >= (length pat))}
361 | -> F1 s (ByteString, ByteString)
362 | breakAfterBM pat target {prfpat} {prftarget} {prflength} t =
363 | let matcher' # t := matcher False pat target t
364 | in case matcher' of
366 | (target, empty) # t
368 | let target' := splitAt (plus (cast {to=Nat} i) (length pat)) target
371 | (assert_total $
idris_crash "Data.ByteString.Search.BoyerMoore.breakBM: can't split ByteString") # t
390 | splitKeepFrontBM : (pat : ByteString)
391 | -> (target : ByteString)
392 | -> {0 prfpat : So (not $
null pat)}
393 | -> {0 prftarget : So (not $
null target)}
394 | -> {0 prflength : So ((length target) >= (length pat))}
395 | -> F1 s (List ByteString)
396 | splitKeepFrontBM pat target {prfpat} {prftarget} {prflength} t =
397 | let splitter' # t := splitter pat target Lin t
398 | in (splitter' <>> []) # t
400 | psSplitter : (pat : ByteString)
401 | -> (target : ByteString)
402 | -> (final : SnocList ByteString)
403 | -> F1 s (SnocList ByteString)
404 | psSplitter pat target final t =
405 | let matcher' # t := matcher False pat (drop (length pat) target) t
406 | in case matcher' of
408 | let final' := final :< target
411 | let length' := plus (cast {to=Nat} i) (length pat)
412 | final' := final :< (take length' target)
413 | in assert_total (psSplitter pat (drop length' target) final' t)
414 | splitter : (pat : ByteString)
415 | -> (target : ByteString)
416 | -> (final : SnocList ByteString)
417 | -> F1 s (SnocList ByteString)
418 | splitter pat target final t =
419 | let matcher' # t := matcher False pat target t
420 | in case matcher' of
422 | let final' := final :< target
427 | assert_total (psSplitter pat target final t)
429 | let final' := final :< (take (cast {to=Nat} i) target)
430 | in assert_total (psSplitter pat (drop (cast {to=Nat} i) target) final' t)
453 | splitKeepEndBM : (pat : ByteString)
454 | -> (target : ByteString)
455 | -> {0 prfpat : So (not $
null pat)}
456 | -> {0 prftarget : So (not $
null target)}
457 | -> {0 prflength : So ((length target) >= (length pat))}
458 | -> F1 s (List ByteString)
459 | splitKeepEndBM pat target {prfpat} {prftarget} {prflength} t =
460 | let splitter' # t := splitter pat target Lin t
461 | in (splitter' <>> []) # t
463 | splitter : (pat : ByteString)
464 | -> (target : ByteString)
465 | -> (final : SnocList ByteString)
466 | -> F1 s (SnocList ByteString)
467 | splitter pat target final t =
468 | let matcher' # t := matcher False pat target t
469 | in case matcher' of
471 | let final' := final :< target
474 | let length' := plus (cast {to=Nat} i) (length pat)
475 | final' := final :< (take length' target)
476 | in assert_total (splitter pat (drop length' target) final' t)
501 | splitDropBM : (pat : ByteString)
502 | -> (target : ByteString)
503 | -> {0 prfpat : So (not $
null pat)}
504 | -> {0 prftarget : So (not $
null target)}
505 | -> {0 prflength : So ((length target) >= (length pat))}
506 | -> F1 s (List ByteString)
507 | splitDropBM pat target {prfpat} {prftarget} {prflength} t =
508 | let splitter' # t := splitter pat target Lin t
509 | in (splitter' <>> []) # t
511 | splitter : (pat : ByteString)
512 | -> (target : ByteString)
513 | -> (final : SnocList ByteString)
514 | -> F1 s (SnocList ByteString)
515 | splitter pat target final t =
516 | let matcher' # t := matcher False pat target t
517 | in case matcher' of
519 | let final' := final :< target
522 | let length' := plus (cast {to=Nat} i) (length pat)
523 | final' := final :< (take (cast {to=Nat} i) target)
524 | in assert_total (splitter pat (drop length' target) final' t)
547 | replaceBM : (pat : ByteString)
548 | -> (sub : ByteString)
549 | -> (target : ByteString)
550 | -> {0 prfpat : So (not $
null pat)}
551 | -> {0 prftarget : So (not $
null target)}
552 | -> {0 prflength : So ((length target) >= (length pat))}
553 | -> F1 s (List ByteString)
554 | replaceBM pat sub target {prfpat} {prftarget} {prflength} t =
555 | let replacer' # t := replacer pat sub target Lin t
556 | in (replacer' <>> []) # t
558 | replacer : (pat : ByteString)
559 | -> (sub : ByteString)
560 | -> (target : ByteString)
561 | -> (final : SnocList ByteString)
562 | -> F1 s (SnocList ByteString)
563 | replacer pat sub target final t =
564 | let matcher' # t := matcher False pat target t
565 | in case matcher' of
567 | let final' := final :< target
574 | assert_total (replacer pat sub (drop (length pat) target) final t)
576 | let final' := final :< sub
577 | in assert_total (replacer pat sub (drop (length pat) target) final') t
581 | let length' := plus (cast {to=Nat} i) (length pat)
582 | final' := final :< (take (cast {to=Nat} i) target)
583 | in assert_total (replacer pat sub (drop length' target) final' t)
585 | let length' := plus (cast {to=Nat} i) (length pat)
586 | final' := final :< (take (cast {to=Nat} i) target) :< sub
587 | in assert_total (replacer pat sub (drop length' target) final' t)