7 | tail : String -> String
9 | tail str = assert_total (strTail str)
12 | splitBy : Char -> String -> (String, String)
13 | splitBy sep = mapSnd tail . break (== sep)
15 | utf8_bytelen : Bits8 -> Maybe (Bits8, Nat)
17 | if (x .&. 0b01111111) == x then Just (x, 0)
18 | else if (shiftR x 5) == 0b110 then Just (x .&. 0b0011111, 1)
19 | else if (shiftR x 4) == 0b1110 then Just (x .&. 0b0001111, 2)
20 | else if (shiftR x 3) == 0b11110 then Just (x .&. 0b0000111, 3)
23 | utf8_unmask : Bits8 -> Maybe Bits8
24 | utf8_unmask x = const (x .&. 0b00111111) <$> guard (shiftR x 6 == 0b10)
26 | utf8_pushbits : Integer -> List Bits8 -> Integer
27 | utf8_pushbits acc [] = acc
28 | utf8_pushbits acc (x::xs) = utf8_pushbits ((shiftL acc 6) .|. (cast x)) xs
31 | utf8_pack : List Bits8 -> Maybe String
34 | go : List Char -> List Bits8 -> Maybe String
35 | go acc [] = Just $
pack $
reverse acc
36 | go acc (x :: xs) = do
37 | (x, l) <- utf8_bytelen x
38 | let (y,ys) = splitAt l xs
39 | guard (length y == l)
40 | y <- traverse utf8_unmask y
41 | let c = utf8_pushbits (cast x) y
42 | go ((cast c) :: acc) ys
44 | utf8_char_bytelen : Integer -> (Nat, Bits8)
45 | utf8_char_bytelen x =
46 | if x <= 0x7F then (0, 0)
47 | else if x <= 0x07FF then (1, 0b11000000)
48 | else if x <= 0xFFFF then (2, 0b11100000)
49 | else (3, 0b11110000)
51 | utf8_encode : Integer -> List Bits8
53 | case utf8_char_bytelen i of
54 | (0, _) => [ cast i ]
55 | (n, m) => loop [] i n m where
56 | loop : List Bits8 -> Integer -> Nat -> Bits8 -> List Bits8
57 | loop acc i Z mask = (cast i .|. mask) :: acc
58 | loop acc i (S n) mask =
59 | let b = cast (i .&. 0b111111) .|. 0b10000000
61 | in loop (b :: acc) i' n mask
64 | utf8_unpack : String -> List Bits8
65 | utf8_unpack str = unpack str >>= utf8_encode . cast