0 | module Text.Regex.Parser
6 | import Text.Regex.Interface
15 | data BadRegex : Type where
16 | RegexIsBad : (index : Nat) -> (reason : String) -> BadRegex
25 | | Class Bool CharClass
29 | matchesBracket : Char -> BracketChars -> Bool
30 | matchesBracket c $
One k = c == k
31 | matchesBracket c $
RangeOpen k = c == k
32 | matchesBracket c $
Class nn cl = nn == charClass cl c
33 | matchesBracket c $
Range l r = l <= c && c <= r
37 | bracketMatcher : Regex rx => Foldable f => (positive : Bool) -> f BracketChars -> rx Char
38 | bracketMatcher p cs = sym (\c => p == any (matchesBracket c) cs)
41 | baseNumDescr : (base : Nat) -> String
42 | baseNumDescr 10 = "decimal"
43 | baseNumDescr 16 = "hexadecimal"
44 | baseNumDescr 2 = "binary"
45 | baseNumDescr 8 = "octal"
46 | baseNumDescr b = "\{show b}-base"
49 | parseNat : (base : Nat) -> (0 _ : So $
2 <= base && base <= 36) => {default Z acc : Nat} -> (pos : Lazy Nat) -> List Char -> Either BadRegex Nat
50 | parseNat base pos [] = Left $
RegexIsBad pos "a \{baseNumDescr base} number is expected"
51 | parseNat base pos (x::xs) = do
52 | let Just n = parseDigit base x
53 | | Nothing => Left $
RegexIsBad pos "bad \{baseNumDescr base} number"
54 | let acc = acc * base + cast n
57 | _ => parseNat base {acc} (S pos) xs
62 | parseCharsSet : (startLen, origLen : Lazy Nat) -> (start : Bool) -> SnocList BracketChars -> List Char -> Either BadRegex (List Char, List BracketChars)
63 | parseCharsSet stL orL start curr [] = Left $
RegexIsBad stL "unmatched opening square bracket"
64 | parseCharsSet stL orL False curr (']' :: xs) = pure (xs, cast curr)
65 | parseCharsSet stL orL start curr $
'\\'::'n' :: xs = parseCharsSet stL orL False (curr :< One '\n') xs
66 | parseCharsSet stL orL start curr $
'\\'::'r' :: xs = parseCharsSet stL orL False (curr :< One '\r') xs
67 | parseCharsSet stL orL start curr $
'\\'::'t' :: xs = parseCharsSet stL orL False (curr :< One '\t') xs
68 | parseCharsSet stL orL start curr $
'\\'::'f' :: xs = parseCharsSet stL orL False (curr :< One '\f') xs
69 | parseCharsSet stL orL start curr $
'\\'::'v' :: xs = parseCharsSet stL orL False (curr :< One '\v') xs
70 | parseCharsSet stL orL start curr $
'\\'::'0' :: xs = parseCharsSet stL orL False (curr :< One '\0') xs
71 | parseCharsSet stL orL start curr $
'\\'::'a' :: xs = parseCharsSet stL orL False (curr :< One '\a') xs
72 | parseCharsSet stL orL start curr $
'\\'::'\\':: xs = parseCharsSet stL orL False (curr :< One '\\') xs
73 | parseCharsSet stL orL start curr $
'\\'::'w' :: xs = parseCharsSet stL orL False (curr :< Class True Word) xs
74 | parseCharsSet stL orL start curr $
'\\'::'W' :: xs = parseCharsSet stL orL False (curr :< Class False Word) xs
75 | parseCharsSet stL orL start curr $
'\\'::'s' :: xs = parseCharsSet stL orL False (curr :< Class True Space) xs
76 | parseCharsSet stL orL start curr $
'\\'::'S' :: xs = parseCharsSet stL orL False (curr :< Class False Space) xs
77 | parseCharsSet stL orL start curr $
'\\'::'d' :: xs = parseCharsSet stL orL False (curr :< Class True Digit) xs
78 | parseCharsSet stL orL start curr $
'\\'::'D' :: xs = parseCharsSet stL orL False (curr :< Class False Digit) xs
79 | parseCharsSet stL orL start curr $
'\\'::'x'::'{' :: xs = do
80 | let (hexes, rest) = span (/= '}') xs
81 | let '}' :: rest = rest | _ => Left $
RegexIsBad (orL `minus` S (length xs)) "unmatched opening curly bracket"
82 | n <- parseNat 16 (orL `minus` length xs) hexes
83 | let True = shiftR (cast {to=Integer} n) 32 == 0 | False => Left $
RegexIsBad (orL `minus` length xs) "we expect no more than a 32-bit hex number"
84 | parseCharsSet stL orL False (curr :< One (chr $
cast n)) $
assert_smaller xs rest
85 | parseCharsSet stL orL start curr $
'\\'::'x'::ulxs@(u::l :: xs) = do
86 | n <- parseNat 16 (orL `minus` length ulxs) [u,l]
87 | parseCharsSet stL orL False (curr :< One (chr $
cast n)) xs
88 | parseCharsSet stL orL start curr xxs@('\\'::'x':: xs) =
89 | Left $
RegexIsBad (orL `minus` length xxs) "bad hex character command, use formats \xFF or \x{FFF...}"
90 | parseCharsSet stL orL start curr $
'\\'::x :: xs = parseCharsSet stL orL False (curr :< One x) xs
91 | parseCharsSet stL orL start curr $
'['::':'::'u'::'p'::'p'::'e'::'r'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Upper) xs
92 | parseCharsSet stL orL start curr $
'['::':'::'l'::'o'::'w'::'e'::'r'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Lower) xs
93 | parseCharsSet stL orL start curr $
'['::':'::'a'::'l'::'p'::'h'::'a'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Alpha) xs
94 | parseCharsSet stL orL start curr $
'['::':'::'a'::'l'::'n'::'u'::'m'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Alnum) xs
95 | parseCharsSet stL orL start curr $
'['::':'::'d'::'i'::'g'::'i'::'t'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Digit) xs
96 | parseCharsSet stL orL start curr $
'['::':'::'x'::'d'::'i'::'g'::'i'::'t'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True XDigit) xs
97 | parseCharsSet stL orL start curr $
'['::':'::'p'::'u'::'n'::'c'::'t'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Punct) xs
98 | parseCharsSet stL orL start curr $
'['::':'::'b'::'l'::'a'::'n'::'k'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Blank) xs
99 | parseCharsSet stL orL start curr $
'['::':'::'s'::'p'::'a'::'c'::'e'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Space) xs
100 | parseCharsSet stL orL start curr $
'['::':'::'c'::'n'::'t'::'r'::'l'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Cntrl) xs
101 | parseCharsSet stL orL start curr $
'['::':'::'g'::'r'::'a'::'p'::'h'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Graph) xs
102 | parseCharsSet stL orL start curr $
'['::':'::'p'::'r'::'i'::'n'::'t'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Print) xs
103 | parseCharsSet stL orL start curr $
'['::':'::'a'::'s'::'c'::'i'::'i'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Ascii) xs
104 | parseCharsSet stL orL start curr $
'['::':'::'w'::'o'::'r'::'d'::':'::']' :: xs = parseCharsSet stL orL False (curr :< Class True Word) xs
105 | parseCharsSet stL orL start (curr :< RangeOpen l :< One '-') rxs@(r :: xs) = if l <= r
106 | then parseCharsSet stL orL False (curr :< Range l r) xs
107 | else Left $
RegexIsBad (orL `minus` 1 + length rxs) "invalid range, left is greater than `right"
108 | parseCharsSet stL orL start curr (l::'-' :: xs) = parseCharsSet stL orL False (curr :< RangeOpen l :< One '-') xs
109 | parseCharsSet stL orL start curr (x :: xs) = parseCharsSet stL orL False (curr :< One x) xs