0 | module Text.Smiles.Parser
5 | import Derive.Prelude
8 | import Text.ILex.Derive
9 | import Text.Smiles.Types
12 | %language ElabReflection
19 | drop : List a -> List a
24 | doubleHead : List a -> List a
25 | doubleHead l@(h::_) = h::l
29 | data SmilesErr : Type where
30 | RingBondMismatch : SmilesErr
31 | UnclosedRing : SmilesErr
32 | ManyEntries : SmilesErr
35 | Interpolation SmilesErr where
36 | interpolate RingBondMismatch = "Ring bonds do not match"
37 | interpolate UnclosedRing = "Unclosed ring"
38 | interpolate ManyEntries = "More than one molecule"
40 | %runElab derive "SmilesErr" [Eq,Show]
43 | 0 SmilesParseErr : Type
44 | SmilesParseErr = ParseError SmilesErr
46 | data DOB : Type where
49 | Bnd : SmilesBond -> DOB
51 | record RingInfo n where
56 | bond : Maybe SmilesBond
59 | record AtomInfo n where
64 | smilesBond : (xa,ya : Bool) -> SmilesBond
65 | smilesBond True True = Arom
66 | smilesBond _ _ = Sngl
68 | ringBond : (b,c : Maybe SmilesBond) -> (x,y : SmilesAtom) -> Maybe SmilesBond
69 | ringBond Nothing Nothing x y = Just $
smilesBond (isArom x) (isArom y)
70 | ringBond Nothing (Just x) _ _ = Just x
71 | ringBond (Just x) Nothing _ _ = Just x
72 | ringBond (Just x) (Just y) _ _ = if x == y then Just x else Nothing
74 | lookupRing : RingNr -> List (RingInfo n) -> Maybe (RingInfo n)
75 | lookupRing r [] = Nothing
76 | lookupRing r (x::xs) = case compare r (nr x) of
79 | GT => lookupRing r xs
81 | insert : RingInfo n -> List (RingInfo n) -> List (RingInfo n)
83 | insert r (x::xs) = if r.nr < x.nr then r::x::xs else x::insert r xs
85 | delete : RingNr -> List (RingInfo n) -> List (RingInfo n)
87 | delete r (x::xs) = if r == x.nr then xs else x::delete r xs
89 | ringBondMismatch : Ring -> BytePos -> BBErr SmilesErr
90 | ringBondMismatch r p =
91 | let bs := BB p $
incLen (length "\{r}") p
92 | in B (Custom RingBondMismatch) bs
97 | stck : List (AtomInfo cnt)
98 | atoms : SnocVect cnt SmilesAtom
99 | bonds : List (Edge cnt SmilesBond)
100 | rings : List (RingInfo cnt)
103 | empty = S 0 [] [<] [] []
105 | toGraph : ST -> Either (BBErr SmilesErr) SmilesGraph
106 | toGraph (S n _ a b []) = Right $
G n (mkGraph (cast a) b)
107 | toGraph (S _ _ _ _ (R _ r _ b p ::_)) =
108 | let bs := BB p $
incLen (length "\{R r b}") p
109 | in Left $
B (Custom UnclosedRing) bs
111 | weakenST : List (AtomInfo n) -> List (AtomInfo $
S n)
112 | weakenST x = believe_me x
114 | weakenBS : List (Edge n e) -> List (Edge (S n) e)
115 | weakenBS x = believe_me x
117 | weakenRS : List (RingInfo n) -> List (RingInfo (S n))
118 | weakenRS x = believe_me x
120 | plainAtom : SmilesAtom -> ST -> ST
121 | plainAtom a1 (S c (A n a::ss) sa bs rs) =
122 | let bs2 := edge n (smilesBond (isArom a) (isArom a1)) :: weakenBS bs
123 | st2 := A last a1 :: weakenST ss
124 | in S (S c) st2 (sa:<a1) bs2 (weakenRS rs)
125 | plainAtom a1 st = st
127 | atomWithBond : SmilesBond -> SmilesAtom -> ST -> ST
128 | atomWithBond b a1 (S c (A n _::ss) sa bs rs) =
129 | let bs2 := edge n b :: weakenBS bs
130 | st2 := A last a1 :: weakenST ss
131 | in S (S c) st2 (sa:<a1) bs2 (weakenRS rs)
132 | atomWithBond b a1 st = st
134 | dottedAtom : SmilesAtom -> ST -> ST
135 | dottedAtom a1 (S c st sa bs rs) =
136 | S (S c) (A last a1 :: weakenST st) (sa:<a1) (weakenBS bs) (weakenRS rs)
138 | addRing : BytePos -> Ring -> ST -> Either (BBErr SmilesErr) ST
139 | addRing p (R r mb1) st =
141 | A n1 a1::_ => case lookupRing r st.rings of
142 | Just (R n2 nr a2 mb2 p) => case ringBond mb1 mb2 a1 a2 of
143 | Just b => case mkEdge n1 n2 b of
144 | Just e => Right $
{bonds $= (e::), rings $= delete r} st
145 | Nothing => Right st
146 | Nothing => Left (ringBondMismatch (R r mb1) p)
147 | Nothing => Right $
{rings $= insert (R n1 r a1 mb1 p)} st
155 | record SSTCK (q : Type) where
157 | prev_ : Ref q ByteString
158 | cur_ : Ref q ByteString
159 | offset_ : Ref q Nat
160 | relpos_ : Ref q Integer
162 | positions_ : Ref q (SnocList BytePos)
165 | mass : Ref q (Maybe MassNr)
166 | elem : Ref q AromElem
167 | chirality : Ref q Chirality
168 | hcount : Ref q HCount
169 | charge : Ref q Charge
170 | error_ : Ref q (Maybe $
BBErr SmilesErr)
171 | stack_ : Ref q (SnocList SmilesGraph)
173 | %runElab derive "SSTCK" [HasBytes, HasBBErr, HasStack]
175 | init : F1 q (SSTCK q)
186 | el <- ref1 (MkAE C False)
187 | cy <- ref1 (the Chirality None)
188 | hc <- ref1 (the HCount 0)
189 | ch <- ref1 (the Charge 0)
192 | pure (SS pr fl ro rr ll ps s db ms el cy hc ch er st)
194 | %runElab deriveParserState "SSz" "SST"
195 | [ "Chain", "NewBranch", "SRing", "Closed", "Err", "Atom"
196 | , "BMass","BElem","BChiral","BHCount","BCharge","BEnd"
199 | endGraph : SSTCK q -> F1 q (Maybe (BBErr SmilesErr))
200 | endGraph sk = T1.do
201 | st <- replace1 sk.st empty
203 | let Right g := toGraph st | Left err => pure (Just err)
204 | [<] <- replace1 sk.positions_ [<]
205 | | _:<p => pure $
Just (B (Unclosed "(") (BB p p))
208 | _ => push1 sk.stack_ g >> pure Nothing
210 | onAtom : SmilesAtom -> Step1 q SSz SSTCK
211 | onAtom a = \sk,t =>
212 | let s # t := read1 sk.st t
213 | in case read1 sk.dob t of
214 | No # t => writeAs sk.st (plainAtom a s) SRing t
216 | let _ # t := write1 sk.dob No t
217 | in writeAs sk.st (atomWithBond b a s) SRing t
219 | let _ # t := write1 sk.dob No t
220 | in writeAs sk.st (dottedAtom a s) SRing t
222 | onRing : Ring -> Step1 q SSz SSTCK
223 | onRing r = \sk,t =>
224 | let p # t := startPos t
225 | s # t := read1 sk.st t
226 | in case addRing p r s of
227 | Right s2 => writeAs sk.st s2 SRing t
228 | Left x => failWith x Err t
230 | bracket : (sk : SSTCK q) => F1 q SST
232 | let m # t := replace1 sk.mass Nothing t
233 | e # t := read1 sk.elem t
234 | cy # t := replace1 sk.chirality None t
235 | h # t := replace1 sk.hcount 0 t
236 | ch # t := replace1 sk.charge 0 t
237 | in onAtom (bracket (aromIsotope m e) cy h ch) sk t
239 | mass : (RExp True, Step q SSz SSTCK)
240 | mass = bytes (plus digit) wrt
242 | %inline wrt : (sk : SSTCK q) => ByteString ->F1 q SST
243 | wrt bs = writeAs sk.mass (refineMassNr $
cast $
decimal bs) BElem
245 | elem : List (RExp True, Step q SSz SSTCK)
246 | elem = writeVals interpolate elem BChiral values
248 | chirality : List (RExp True, Step q SSz SSTCK)
249 | chirality = writeVals interpolate chirality BHCount values
251 | hc : List (RExp True, Step q SSz SSTCK)
252 | hc = step "H1" (wrt 1) :: vals encodeH (\v => \sk,t => wrt v t) values
254 | wrt : HCount -> (sk : SSTCK q) => F1 q SST
255 | wrt c = writeAs sk.hcount c BCharge
257 | chrg : List (RExp True, Step q SSz SSTCK)
260 | :: step "-1" (wrt (-
1))
261 | :: step "++" (wrt 2)
262 | :: step "--" (wrt (-
2))
263 | :: vals encodeCharge (\v => \sk,t => wrt v t) values
265 | wrt : Charge -> (sk : SSTCK q) => F1 q SST
266 | wrt c = writeAs sk.charge c BEnd
268 | bend : List (RExp True, Step q SSz SSTCK)
269 | bend = [close ']' bracket]
271 | atom : List (RExp True, Step q SSz SSTCK)
272 | atom = opn' '[' BMass :: vals encodeAtom onAtom subset
274 | ring : List (RExp True, Step q SSz SSTCK)
275 | ring = vals interpolate onRing values
277 | bond : List (RExp True, Step q SSz SSTCK)
278 | bond = step '.' dot :: vals interpolate wrt values
280 | %inline wrt : SmilesBond -> Step1 q SSz SSTCK
281 | wrt b = \sk,t => writeAs sk.dob (Bnd b) Atom t
283 | dot : (k : SSTCK q) => F1 q SST
284 | dot = writeAs k.dob Dot Atom
286 | openClose : List (RExp True, Step q SSz SSTCK)
287 | openClose = [opn '(' op, close ')' cls]
289 | op,cls : (k : SSTCK q) => F1 q SST
290 | op = read1 k.st >>= \s => writeAs k.st ({stck $= doubleHead} s) NewBranch
291 | cls = read1 k.st >>= \s => writeAs k.st ({stck $= drop} s) Closed
293 | space : List (RExp True, Step q SSz SSTCK)
294 | space = [bytes (plus $
oneof [' ', '\t']) (const end), step nl end]
297 | nl = "\r\n" <|> '\n' <|> '\r'
299 | end : (sk : SSTCK q) => F1 q SST
301 | Nothing <- endGraph sk | Just x => failWith x Err
304 | smilesTrans : Lex1 q SSz SSTCK
307 | [ E Chain $
dfa (atom ++ space)
308 | , E Atom $
dfa atom
309 | , E SRing $
dfa (atom ++ ring ++ bond ++ openClose ++ space)
310 | , E NewBranch $
dfa (atom ++ bond)
311 | , E Closed $
dfa (atom ++ bond ++ openClose ++ space)
312 | , E BMass $
dfa (mass :: elem)
313 | , E BElem $
dfa elem
314 | , E BChiral $
dfa (chirality ++ hc ++ chrg ++ bend)
315 | , E BHCount $
dfa (hc ++ chrg ++ bend)
316 | , E BCharge $
dfa (chrg ++ bend)
317 | , E BEnd $
dfa bend
320 | smilesErr : Arr32 SSz (SSTCK q -> F1 q (BBErr SmilesErr))
322 | arr32 SSz (unexpected [])
323 | [ E BMass $
unclosedIfNLorEOI "[" []
324 | , E BElem $
unclosedIfNLorEOI "[" []
325 | , E BChiral $
unclosedIfNLorEOI "[" []
326 | , E BHCount $
unclosedIfNLorEOI "[" []
327 | , E BEnd $
unclosedIfNLorEOI "[" []
333 | -> F1 q (Either (BBErr SmilesErr) (List SmilesGraph))
335 | case st == Chain || st == SRing || st == Closed of
336 | False => arrFail SSTCK smilesErr st sk
337 | True => endGraph sk >>= \case
338 | Just x => pure (Left x)
339 | Nothing => getList sk.stack_ >>= pure . Right
342 | smiles : P1 q (BBErr SmilesErr) (List SmilesGraph)
343 | smiles = P Chain init smilesTrans snocChunk smilesErr smilesEOI
347 | parseSmiles : Origin -> String -> Either SmilesParseErr (List SmilesGraph)
348 | parseSmiles = parseString smiles
350 | test : String -> IO ()
352 | case parseSmiles Virtual s of
353 | Right x => for_ x $
\(G _ g) => putStrLn (pretty interpolate interpolate g)
354 | Left x => putStrLn "\{x}"
358 | {auto has : Has SmilesParseErr es}
361 | -> ChemRes es SmilesGraph
362 | readSmilesFrom o s =
363 | case parseSmiles o s of
364 | Left x => Left (inject x)
365 | Right [] => Right (G 0 empty)
366 | Right [g] => Right g
368 | Left (inject $
toParseError o s (B (Custom ManyEntries) NoBounds))
371 | readSmiles : Has SmilesParseErr es => String -> ChemRes es SmilesGraph
372 | readSmiles = readSmilesFrom Virtual
379 | readSmiles' : String -> Either String SmilesGraph
381 | mapFst (interpolate . project1) . readSmiles {es = [SmilesParseErr]}