0 | module Text.Smiles.Parser
5 | import Derive.Prelude
8 | import Text.ILex.Derive
9 | import Text.Smiles.Types
12 | %language ElabReflection
19 | drop : List a -> List a
24 | doubleHead : List a -> List a
25 | doubleHead l@(h::_) = h::l
29 | data SmilesErr : Type where
30 | RingBondMismatch : SmilesErr
31 | UnclosedRing : SmilesErr
32 | ManyEntries : SmilesErr
35 | Interpolation SmilesErr where
36 | interpolate RingBondMismatch = "Ring bonds do not match"
37 | interpolate UnclosedRing = "Unclosed ring"
38 | interpolate ManyEntries = "More than one molecule"
40 | %runElab derive "SmilesErr" [Eq,Show]
43 | 0 SmilesParseErr : Type
44 | SmilesParseErr = ParseError SmilesErr
46 | data DOB : Type where
49 | Bnd : SmilesBond -> DOB
51 | record RingInfo n where
56 | bond : Maybe SmilesBond
59 | record AtomInfo n where
64 | smilesBond : (xa,ya : Bool) -> SmilesBond
65 | smilesBond True True = Arom
66 | smilesBond _ _ = Sngl
68 | ringBond : (b,c : Maybe SmilesBond) -> (x,y : SmilesAtom) -> Maybe SmilesBond
69 | ringBond Nothing Nothing x y = Just $
smilesBond (isArom x) (isArom y)
70 | ringBond Nothing (Just x) _ _ = Just x
71 | ringBond (Just x) Nothing _ _ = Just x
72 | ringBond (Just x) (Just y) _ _ = if x == y then Just x else Nothing
74 | lookupRing : RingNr -> List (RingInfo n) -> Maybe (RingInfo n)
75 | lookupRing r [] = Nothing
76 | lookupRing r (x::xs) = case compare r (nr x) of
79 | GT => lookupRing r xs
81 | insert : RingInfo n -> List (RingInfo n) -> List (RingInfo n)
83 | insert r (x::xs) = if r.nr < x.nr then r::x::xs else x::insert r xs
85 | delete : RingNr -> List (RingInfo n) -> List (RingInfo n)
87 | delete r (x::xs) = if r == x.nr then xs else x::delete r xs
89 | ringBondMismatch : Ring -> Position -> BoundedErr SmilesErr
90 | ringBondMismatch r p =
91 | let bs := BS p $
addCol (length "\{r}") p
92 | in B (Custom RingBondMismatch) bs
97 | stck : List (AtomInfo cnt)
98 | atoms : SnocVect cnt SmilesAtom
99 | bonds : List (Edge cnt SmilesBond)
100 | rings : List (RingInfo cnt)
103 | empty = S 0 [] [<] [] []
105 | toGraph : ST -> Either (BoundedErr SmilesErr) SmilesGraph
106 | toGraph (S n _ a b []) = Right $
G n (mkGraph (cast a) b)
107 | toGraph (S _ _ _ _ (R _ r _ b p ::_)) =
108 | let bs := BS p $
addCol (length "\{R r b}") p
109 | in Left $
B (Custom UnclosedRing) bs
111 | weakenST : List (AtomInfo n) -> List (AtomInfo $
S n)
112 | weakenST x = believe_me x
114 | weakenBS : List (Edge n e) -> List (Edge (S n) e)
115 | weakenBS x = believe_me x
117 | weakenRS : List (RingInfo n) -> List (RingInfo (S n))
118 | weakenRS x = believe_me x
120 | plainAtom : SmilesAtom -> ST -> ST
121 | plainAtom a1 (S c (A n a::ss) sa bs rs) =
122 | let bs2 := edge n (smilesBond (isArom a) (isArom a1)) :: weakenBS bs
123 | st2 := A last a1 :: weakenST ss
124 | in S (S c) st2 (sa:<a1) bs2 (weakenRS rs)
125 | plainAtom a1 st = st
127 | atomWithBond : SmilesBond -> SmilesAtom -> ST -> ST
128 | atomWithBond b a1 (S c (A n _::ss) sa bs rs) =
129 | let bs2 := edge n b :: weakenBS bs
130 | st2 := A last a1 :: weakenST ss
131 | in S (S c) st2 (sa:<a1) bs2 (weakenRS rs)
132 | atomWithBond b a1 st = st
134 | dottedAtom : SmilesAtom -> ST -> ST
135 | dottedAtom a1 (S c st sa bs rs) =
136 | S (S c) (A last a1 :: weakenST st) (sa:<a1) (weakenBS bs) (weakenRS rs)
138 | addRing : Position -> Ring -> ST -> Either (BoundedErr SmilesErr) ST
139 | addRing p (R r mb1) st =
141 | A n1 a1::_ => case lookupRing r st.rings of
142 | Just (R n2 nr a2 mb2 p) => case ringBond mb1 mb2 a1 a2 of
143 | Just b => case mkEdge n1 n2 b of
144 | Just e => Right $
{bonds $= (e::), rings $= delete r} st
145 | Nothing => Right st
146 | Nothing => Left (ringBondMismatch (R r mb1) p)
147 | Nothing => Right $
{rings $= insert (R n1 r a1 mb1 p)} st
155 | record SSTCK (q : Type) where
159 | positions_ : Ref q (SnocList Position)
162 | bytes_ : Ref q ByteString
163 | mass : Ref q (Maybe MassNr)
164 | elem : Ref q AromElem
165 | chirality : Ref q Chirality
166 | hcount : Ref q HCount
167 | charge : Ref q Charge
168 | error_ : Ref q (Maybe $
BoundedErr SmilesErr)
169 | stack_ : Ref q (SnocList SmilesGraph)
171 | %runElab derive "SSTCK" [HasPosition, HasBytes, HasError, HasStack]
173 | init : F1 q (SSTCK q)
180 | b <- ref1 ByteString.empty
182 | el <- ref1 (MkAE C False)
183 | cy <- ref1 (the Chirality None)
184 | hc <- ref1 (the HCount 0)
185 | ch <- ref1 (the Charge 0)
188 | pure (SS l c p s db b ms el cy hc ch er st)
190 | %runElab deriveParserState "SSz" "SST"
191 | [ "Chain", "NewBranch", "SRing", "Closed", "Err", "Atom"
192 | , "BMass","BElem","BChiral","BHCount","BCharge","BEnd"
195 | endGraph : SSTCK q -> F1 q (Maybe (BoundedErr SmilesErr))
196 | endGraph sk = T1.do
197 | st <- replace1 sk.st empty
199 | let Right g := toGraph st | Left err => pure (Just err)
200 | [<] <- replace1 sk.positions_ [<]
201 | | _:<p => pure $
Just (B (Unclosed "(") (BS p (incCol p)))
204 | _ => push1 sk.stack_ g >> pure Nothing
206 | onAtom : SmilesAtom -> Step1 q SSz SSTCK
207 | onAtom a = \(sk # t) =>
208 | let s # t := read1 sk.st t
209 | in case read1 sk.dob t of
210 | No # t => writeAs sk.st (plainAtom a s) SRing t
212 | let _ # t := write1 sk.dob No t
213 | in writeAs sk.st (atomWithBond b a s) SRing t
215 | let _ # t := write1 sk.dob No t
216 | in writeAs sk.st (dottedAtom a s) SRing t
218 | onRing : Ring -> Step1 q SSz SSTCK
219 | onRing r = \(sk # t) =>
220 | let p # t := getPosition t
221 | s # t := read1 sk.st t
222 | in case addRing p r s of
223 | Right s2 => writeAs sk.st s2 SRing t
224 | Left x => failWith x Err t
226 | bracket : (sk : SSTCK q) => F1 q SST
228 | let m # t := replace1 sk.mass Nothing t
229 | e # t := read1 sk.elem t
230 | cy # t := replace1 sk.chirality None t
231 | h # t := replace1 sk.hcount 0 t
232 | ch # t := replace1 sk.charge 0 t
233 | in onAtom (bracket (aromIsotope m e) cy h ch) (sk # t)
235 | mass : (RExp True, Step q SSz SSTCK)
236 | mass = conv (plus digit) wrt
238 | %inline wrt : (sk : SSTCK q) => ByteString ->F1 q SST
239 | wrt bs = writeAs sk.mass (refineMassNr $
cast $
decimal bs) BElem
241 | elem : List (RExp True, Step q SSz SSTCK)
242 | elem = writeVals interpolate elem BChiral values
244 | chirality : List (RExp True, Step q SSz SSTCK)
245 | chirality = writeVals interpolate chirality BHCount values
247 | hc : List (RExp True, Step q SSz SSTCK)
248 | hc = cexpr "H1" (wrt 1) :: vals encodeH (\v => \(sk # t) => wrt v t) values
250 | wrt : HCount -> (sk : SSTCK q) => F1 q SST
251 | wrt c = writeAs sk.hcount c BCharge
253 | chrg : List (RExp True, Step q SSz SSTCK)
256 | :: cexpr "-1" (wrt (-
1))
257 | :: cexpr "++" (wrt 2)
258 | :: cexpr "--" (wrt (-
2))
259 | :: vals encodeCharge (\v => \(sk # t) => wrt v t) values
261 | wrt : Charge -> (sk : SSTCK q) => F1 q SST
262 | wrt c = writeAs sk.charge c BEnd
264 | bend : List (RExp True, Step q SSz SSTCK)
265 | bend = [cclose ']' bracket]
267 | atom : List (RExp True, Step q SSz SSTCK)
268 | atom = copen' '[' BMass :: vals encodeAtom onAtom subset
270 | ring : List (RExp True, Step q SSz SSTCK)
271 | ring = vals interpolate onRing values
273 | bond : List (RExp True, Step q SSz SSTCK)
274 | bond = cexpr '.' dot :: vals interpolate wrt values
276 | %inline wrt : SmilesBond -> Step1 q SSz SSTCK
277 | wrt b = \(sk # t) => writeAs sk.dob (Bnd b) Atom t
279 | dot : (k : SSTCK q) => F1 q SST
280 | dot = writeAs k.dob Dot Atom
282 | openClose : List (RExp True, Step q SSz SSTCK)
283 | openClose = [copen '(' opn, cclose ')' cls]
285 | opn,cls : (k : SSTCK q) => F1 q SST
286 | opn = read1 k.st >>= \s => writeAs k.st ({stck $= doubleHead} s) NewBranch
287 | cls = read1 k.st >>= \s => writeAs k.st ({stck $= drop} s) Closed
289 | space : List (RExp True, Step q SSz SSTCK)
290 | space = [conv (plus $
oneof [' ', '\t']) (const end), newline nl end]
293 | nl = "\r\n" <|> '\n' <|> '\r'
295 | end : (sk : SSTCK q) => F1 q SST
297 | Nothing <- endGraph sk | Just x => failWith x Err
300 | smilesTrans : Lex1 q SSz SSTCK
303 | [ E Chain $
dfa (atom ++ space)
304 | , E Atom $
dfa atom
305 | , E SRing $
dfa (atom ++ ring ++ bond ++ openClose ++ space)
306 | , E NewBranch $
dfa (atom ++ bond)
307 | , E Closed $
dfa (atom ++ bond ++ openClose ++ space)
308 | , E BMass $
dfa (mass :: elem)
309 | , E BElem $
dfa elem
310 | , E BChiral $
dfa (chirality ++ hc ++ chrg ++ bend)
311 | , E BHCount $
dfa (hc ++ chrg ++ bend)
312 | , E BCharge $
dfa (chrg ++ bend)
313 | , E BEnd $
dfa bend
316 | smilesErr : Arr32 SSz (SSTCK q -> F1 q (BoundedErr SmilesErr))
318 | arr32 SSz (unexpected [])
319 | [ E BMass $
unclosedIfNLorEOI "[" []
320 | , E BElem $
unclosedIfNLorEOI "[" []
321 | , E BChiral $
unclosedIfNLorEOI "[" []
322 | , E BHCount $
unclosedIfNLorEOI "[" []
323 | , E BEnd $
unclosedIfNLorEOI "[" []
329 | -> F1 q (Either (BoundedErr SmilesErr) (List SmilesGraph))
331 | case st == Chain || st == SRing || st == Closed of
332 | False => arrFail SSTCK smilesErr st sk
333 | True => endGraph sk >>= \case
334 | Just x => pure (Left x)
335 | Nothing => getList sk.stack_ >>= pure . Right
338 | smiles : P1 q (BoundedErr SmilesErr) (List SmilesGraph)
339 | smiles = P Chain init smilesTrans snocChunk smilesErr smilesEOI
343 | parseSmiles : Origin -> String -> Either SmilesParseErr (List SmilesGraph)
344 | parseSmiles = parseString smiles
346 | test : String -> IO ()
348 | case parseSmiles Virtual s of
349 | Right x => for_ x $
\(G _ g) => putStrLn (pretty interpolate interpolate g)
350 | Left x => putStrLn "\{x}"
354 | {auto has : Has SmilesParseErr es}
357 | -> ChemRes es SmilesGraph
358 | readSmilesFrom o s =
359 | case parseSmiles o s of
360 | Left x => Left (inject x)
361 | Right [] => Right (G 0 empty)
362 | Right [g] => Right g
364 | Left (inject $
toParseError o s (B (Custom ManyEntries) NoBounds))
367 | readSmiles : Has SmilesParseErr es => String -> ChemRes es SmilesGraph
368 | readSmiles = readSmilesFrom Virtual
375 | readSmiles' : String -> Either String SmilesGraph
377 | mapFst (interpolate . project1) . readSmiles {es = [SmilesParseErr]}