4 | import Data.ByteString
5 | import Data.Linear.Ref1
6 | import Derive.Prelude
8 | import IO.Async.Loop.Epoll
9 | import IO.Async.Loop.Posix
11 | import Text.ILex.Derive
14 | import public Text.ILex
17 | %language ElabReflection
24 | data CoordinateSystem = ZeroBased
27 | %runElab derive "CoordinateSystem" [Show,Eq]
34 | data FASTAValue : Type where
35 | NL : ByteString -> FASTAValue
36 | HeaderStart : FASTAValue
37 | HeaderValue : String -> FASTAValue
38 | Adenine : Nat -> FASTAValue
39 | Thymine : Nat -> FASTAValue
40 | Guanine : Nat -> FASTAValue
41 | Cytosine : Nat -> FASTAValue
43 | %runElab derive "FASTAValue" [Show,Eq]
45 | isHeader : FASTAValue -> Bool
46 | isHeader HeaderStart = True
47 | isHeader (HeaderValue _) = True
50 | isData : FASTAValue -> Bool
51 | isData (Adenine _) = True
52 | isData (Thymine _) = True
53 | isData (Guanine _) = True
54 | isData (Cytosine _) = True
62 | record FASTALine where
63 | constructor MkFASTALine
65 | values : List FASTAValue
67 | %runElab derive "FASTALine" [Show,Eq]
69 | Interpolation FASTALine where interpolate = show
77 | FASTA = List FASTALine
83 | linebreak : RExp True
84 | linebreak = '\n' <|> '\r' <|> "\r\n"
86 | nucleotide : RExp True
87 | nucleotide = 'A' <|> 'T' <|> 'G' <|> 'C'
98 | cytosine : RExp True
106 | record FSTCK (q : Type) where
110 | psns : Ref q (SnocList Position)
111 | strs : Ref q (SnocList String)
112 | err : Ref q (Maybe $
BoundedErr Void)
113 | fastavalues : Ref q (SnocList FASTAValue)
114 | fastalines : Ref q (SnocList FASTALine)
115 | fastacounter : Ref q Nat
116 | bytes : Ref q ByteString
119 | HasPosition FSTCK where
122 | positions = FSTCK.psns
125 | HasError FSTCK Void where
129 | HasStringLits FSTCK where
133 | HasStack FSTCK (SnocList FASTALine) where
137 | HasBytes FSTCK where
138 | bytes = FSTCK.bytes
141 | fastainit : CoordinateSystem -> F1 q (FSTCK q)
142 | fastainit coordsys = T1.do
150 | fc <- case coordsys of
151 | ZeroBased => ref1 Z
152 | OneBased => ref1 (S Z)
154 | pure (F l c bs ss er fvs fls fc by)
160 | %runElab deriveParserState "FSz" "FST"
161 | ["FIni", "FBroken", "FHdr", "FHdrToNLR", "FHdrToNLS", "FHdrDone", "FD", "FDNL", "FEmpty", "FComplete"]
167 | fastaErr : Arr32 FSz (FSTCK q -> F1 q (BoundedErr Void))
169 | arr32 FSz (unexpected [])
170 | [ E FBroken $
unexpected ["character other than '>'"]
171 | , E FEmpty $
unexpected ["sequence data"]
172 | , E FHdr $
unexpected ["sequence line"]
179 | onFASTAValueHdrS : (x : FSTCK q) => FASTAValue -> F1 q FST
180 | onFASTAValueHdrS v = push1 x.fastavalues v >> pure FHdrToNLS
182 | onFASTAValueHdrR : (x : FSTCK q) => FASTAValue -> F1 q FST
183 | onFASTAValueHdrR v = push1 x.fastavalues v >> pure FHdrToNLR
185 | onFASTAValueAdenine : (x : FSTCK q) => F1 q FST
186 | onFASTAValueAdenine = T1.do
187 | fc <- read1 x.fastacounter
188 | push1 x.fastavalues (Adenine fc) >> write1 x.fastacounter (S fc) >> pure FD
190 | onFASTAValueThymine : (x : FSTCK q) => F1 q FST
191 | onFASTAValueThymine = T1.do
192 | fc <- read1 x.fastacounter
193 | push1 x.fastavalues (Thymine fc) >> write1 x.fastacounter (S fc) >> pure FD
195 | onFASTAValueGuanine : (x : FSTCK q) => F1 q FST
196 | onFASTAValueGuanine = T1.do
197 | fc <- read1 x.fastacounter
198 | push1 x.fastavalues (Guanine fc) >> write1 x.fastacounter (S fc) >> pure FD
200 | onFASTAValueCytosine : (x : FSTCK q) => F1 q FST
201 | onFASTAValueCytosine = T1.do
202 | fc <- read1 x.fastacounter
203 | push1 x.fastavalues (Cytosine fc) >> write1 x.fastacounter (S fc) >> pure FD
205 | onNLFHdr : (x : FSTCK q) => ByteString -> F1 q FST
208 | push1 x.fastavalues (NL v)
209 | fvs@(_::_) <- getList x.fastavalues | [] => pure FEmpty
210 | case Prelude.any isHeader fvs && Prelude.any isData fvs of
211 | True => pure FBroken
214 | push1 x.fastalines (MkFASTALine ln fvs)
217 | onNLFD : (x : FSTCK q) => ByteString -> F1 q FST
220 | push1 x.fastavalues (NL v)
221 | fvs@(_::_) <- getList x.fastavalues | [] => pure FEmpty
222 | case Prelude.any isHeader fvs && Prelude.any isData fvs of
223 | True => pure FBroken
226 | push1 x.fastalines (MkFASTALine ln fvs)
229 | onEOI : (x : FSTCK q) => F1 q (Either (BoundedErr Void) FST)
232 | fvs@(_::_) <- getList x.fastavalues
233 | | [] => arrFail FSTCK fastaErr FEmpty x
235 | push1 x.fastalines (MkFASTALine ln fvs)
236 | pure (Right FComplete)
238 | fastaInit : DFA q FSz FSTCK
241 | [ read '>' (\_ => onFASTAValueHdrS HeaderStart)
244 | fastaHdrStrStart : DFA q FSz FSTCK
247 | [ read dot (onFASTAValueHdrR . HeaderValue)
250 | fastaHdrStrRest : DFA q FSz FSTCK
253 | [ read dot (onFASTAValueHdrR . HeaderValue)
254 | , conv linebreak (\bs => onNLFHdr bs)
257 | fastaFDInit : DFA q FSz FSTCK
260 | [ read adenine (\_ => onFASTAValueAdenine)
261 | , read thymine (\_ => onFASTAValueThymine)
262 | , read guanine (\_ => onFASTAValueGuanine)
263 | , read cytosine (\_ => onFASTAValueCytosine)
266 | fastaFD : DFA q FSz FSTCK
269 | [ conv linebreak (\bs => onNLFD bs)
270 | , read adenine (\_ => onFASTAValueAdenine)
271 | , read thymine (\_ => onFASTAValueThymine)
272 | , read guanine (\_ => onFASTAValueGuanine)
273 | , read cytosine (\_ => onFASTAValueCytosine)
276 | fastaSteps : Lex1 q FSz FSTCK
280 | , E FHdrToNLS fastaHdrStrStart
281 | , E FHdrToNLR fastaHdrStrRest
282 | , E FHdrDone fastaFDInit
283 | , E FDNL fastaFDInit
287 | fastaEOI : FST -> FSTCK q -> F1 q (Either (BoundedErr Void) FASTA)
289 | case st == FIni || st == FHdr || st == FEmpty || st == FBroken of
290 | True => arrFail FSTCK fastaErr st x
293 | fasta <- getList x.fastalines
301 | fasta : CoordinateSystem -> P1 q (BoundedErr Void) FASTA
302 | fasta coordsys = P FIni (fastainit coordsys) fastaSteps snocChunk fastaErr fastaEOI
305 | parseFASTA : CoordinateSystem -> Origin -> String -> Either (ParseError Void) FASTA
306 | parseFASTA coordsys origin str = parseString (fasta coordsys) origin str
312 | streamFASTA : CoordinateSystem
314 | -> AsyncPull Poll Void [ParseError Void, Errno] ()
315 | streamFASTA coordsys pth =
317 | |> streamParse (fasta coordsys) (FileSrc pth)
319 | |> printLnTo Stdout
321 | streamFASTAFiles : CoordinateSystem
322 | -> AsyncPull Poll String [ParseError Void, Errno] ()
323 | -> AsyncPull Poll Void [ParseError Void, Errno] ()
324 | streamFASTAFiles coordsys pths =
325 | flatMap pths (\p => readBytes p |> streamParse (fasta coordsys) (FileSrc p))
327 | |> printLnTo Stdout