#include #include #include "dt_smiles.h" #ifndef TRUE #define TRUE 1 #define FALSE 0 #endif #define MX_SMILES 1000 /*============================================================================ * sayerrors() -- dump errors in message queue to stderr */ static void sayerrors(void) { dt_Handle sob, sos = dt_errors(DX_ERR_ERROR); dt_String str; dt_Integer lens; while (NULL_OB != (sob = dt_next(sos))) { str = dt_stringvalue(&lens, sob); if (str) fprintf(stderr, "%.*s\n", lens, str); dt_dealloc(sob); } dt_dealloc(sos); dt_errorclear(); } /*============================================================================ * sobeqv() -- are to string objects lexically equivalent? */ static dt_Boolean sobeqv(dt_Handle sob1, dt_Handle sob2) { dt_Integer lens1, lens2; dt_String s1 = dt_stringvalue(&lens1, sob1); dt_String s2 = dt_stringvalue(&lens2, sob2); return (lens1 == lens2 && 0 == strncmp(s1, s2, lens2)); } /*============================================================================ * isomeric_sob() -- does given SMILES contain any [/\@] specification(s)? */ static dt_Boolean isomeric(dt_Handle sob) { dt_Integer lens; dt_String smi = dt_stringvalue(&lens, sob); if (smi) { if (memchr(smi, '@', lens)) return TRUE; if (memchr(smi, '/', lens)) return TRUE; if (memchr(smi, '\\', lens)) return TRUE; } return FALSE; } /*============================================================================ * bondsequence() -- returns a sequence containing all bonds to an atom. * * Implicit hydrogens are included in the sequence as dt_isohydro's. */ static dt_Handle bondsequence(dt_Handle atom) { dt_Handle bonds, bond, seq; dt_Integer nh; /*** Take a stream of bonds over given atom. ***/ if (NULL_OB == (bonds = dt_stream(atom, TYP_BOND))) return NULL_OB; /*** Copy to a sequence. ***/ seq = dt_alloc_seq(); while (NULL_OB != (bond = dt_next(bonds))) dt_append(seq, bond); dt_dealloc(bonds); /*** Add implicit hydrogens. ***/ nh = dt_imp_hcount(atom); while (nh--) dt_append(seq, dt_isohydro()); /*** Clean up, reset and return sequence. ***/ dt_reset(seq); return seq; } /*============================================================================ * is_chiral() -- is given atom or bond object a specified chiral center? * * Returns TRUE iff object is *already* specified as a chiral center. */ static dt_Boolean is_chiral(dt_Handle ob) { dt_Handle atoms, bonds, b1, b2; dt_Integer chival; /*** Switch on object type. ***/ switch (dt_type(ob)) { /*** Atom: Does it already have a chiral value? ***/ case TYP_ATOM: bonds = bondsequence(ob); chival = dt_chival(ob, bonds); dt_dealloc(bonds); return (DX_CHI_NONE != chival); /*** Bond: Does it already have double bond orientation? ***/ case TYP_BOND: /*** Only true double bonds can be chiral. ***/ if (DX_BTY_DOUBLE != dt_bondtype(ob)) return FALSE; /*** Get one external bond from each end atom. ***/ atoms = dt_stream(ob, TYP_ATOM); bonds = dt_stream(dt_next(atoms), TYP_BOND); if (ob == (b1 = dt_next(bonds))) b1 = dt_next(bonds); dt_dealloc(bonds); bonds = dt_stream(dt_next(atoms), TYP_BOND); if (ob == (b2 = dt_next(bonds))) b2 = dt_next(bonds); dt_dealloc(bonds); dt_dealloc(atoms); /*** Return TRUE iff dbo is already assigned. ***/ return (DX_CHI_NO_DBO != dt_dbo(ob, b1, b2)); } /*** FALSE for objects other than atoms or bonds. ***/ return FALSE; } /*============================================================================ * is_prochiral() -- is given atom or bond object a prochiral center? * * Returns TRUE iff object *could* be a chiral center. */ static dt_Boolean is_prochiral(dt_Handle ob) { dt_Handle atoms, a1, a2; /*** Switch on object type. ***/ switch (dt_type(ob)) { /*** Atom: Is it a tetrahedral center with 0 or 1 H's? ***/ case TYP_ATOM: /*** Atom can't be chiral with more than one implicit hydrogen. ***/ if (1 < dt_imp_hcount(ob)) return FALSE; /*** Atom can't be chiral with less than four total connections. ***/ if (4 > (dt_imp_hcount(ob) + dt_count(ob, TYP_BOND))) return FALSE; /*** Call atom prochiral. ***/ return TRUE; /*** Bond: Is it a double bond substituted on each end? ***/ case TYP_BOND: /*** Only true double bonds can be chiral. ***/ if (DX_BTY_DOUBLE != dt_bondtype(ob)) return FALSE; /*** Bond can't be prochiral if either end-atom is terminal. ***/ atoms = dt_stream(ob, TYP_ATOM); a1 = dt_next(atoms); a2 = dt_next(atoms); dt_dealloc(atoms); if (1 == dt_count(a1, TYP_BOND)) return FALSE; if (1 == dt_count(a2, TYP_BOND)) return FALSE; /*** Call bond prochiral. ***/ return TRUE; } /*** FALSE for objects other than atoms or bonds. ***/ return FALSE; } /*============================================================================ * combinations() -- combine chiral objects (cobs) recursively * * Adds SMILES of resultant molecules to sequence `outs'. * Expects molecule to be in mod_on state and leaves it that way. */ static void combinations(dt_Handle mol, dt_Handle cobs, dt_Handle outs) { dt_Handle atom, atoms, bond, bonds, b1, b2, cob, cops; dt_Integer oldstate; /*** Pull next chiral object out of sequence. ***/ cob = dt_next(cobs); /*** When out of chiral objects, save absolute SMILES of molecule. ***/ if (NULL_OB == cob) { dt_String smi; dt_Integer lens; if (dt_mod_off(mol)) { smi = dt_cansmiles(&lens, mol, TRUE); dt_append(outs, dt_alloc_string(lens, smi)); } dt_mod_on(mol); return; } /*** Chiral atom: try CCW then CW and recurse. ***/ if (TYP_ATOM == dt_type(cob)) { atom = cob; /*** Create an explicit sequence of bonds in any particular order. ***/ bonds = bondsequence(atom); /*** Save previously-existing chiral state. ***/ oldstate = dt_chival(atom, bonds); /*** Set atom's chirality CCW in bondsequence order, recurse. ***/ dt_setchival(atom, bonds, DX_CHI_THCCW); cops = dt_copy(cobs); combinations(mol, cops, outs); dt_dealloc(cops); /*** Set atom's chirality CW in same bondsequence order, recurse. ***/ dt_setchival(atom, bonds, DX_CHI_THCW); cops = dt_copy(cobs); combinations(mol, cops, outs); dt_dealloc(cops); /*** Restore atom's previous chiral state before returning.. ***/ dt_setchival(atom, bonds, oldstate); dt_dealloc(bonds); /*** Chiral bond: try CIS then TRANS and recurse. ***/ } else if (TYP_BOND == dt_type(cob)) { bond = cob; /*** Get one external bond from each end atom. ***/ atoms = dt_stream(bond, TYP_ATOM); bonds = dt_stream(dt_next(atoms), TYP_BOND); if (bond == (b1 = dt_next(bonds))) b1 = dt_next(bonds); dt_dealloc(bonds); bonds = dt_stream(dt_next(atoms), TYP_BOND); if (bond == (b2 = dt_next(bonds))) b2 = dt_next(bonds); dt_dealloc(bonds); dt_dealloc(atoms); /*** Save previously-exising double bond orientation. ***/ oldstate = dt_dbo(bond, b1, b2); /*** Set bond's double bond orientation to CIS and recurse. ***/ dt_setdbo(bond, b1, b2, DX_CHI_CIS); cops = dt_copy(cobs); combinations(mol, cops, outs); dt_dealloc(cops); /*** Set bond's double bond orientation to TRANS and recurse. ***/ dt_setdbo(bond, b1, b2, DX_CHI_TRANS); cops = dt_copy(cobs); combinations(mol, cops, outs); dt_dealloc(cops); /*** Restore bond's previous orientation before returning. ***/ dt_setdbo(bond, b1, b2, oldstate); } } /*============================================================================ * du_chiralify() -- return SMILES of tetrahedral isomers for given molecule * * Returns sequence of SMILES or NULL_OB if mol is not chiral. */ dt_Handle du_chiralify(dt_Handle mol) { dt_Handle outs, cobs, atoms, atom, bonds, bond, sob, sob2; /*** Create sequence of unspecified prochiral atom and bonds. ***/ cobs = dt_alloc_seq(); atoms = dt_stream(mol, TYP_ATOM); while (NULL_OB != (atom = dt_next(atoms))) if (!is_chiral(atom) && is_prochiral(atom)) dt_append(cobs, atom); dt_dealloc(atoms); bonds = dt_stream(mol, TYP_BOND); while (NULL_OB != (bond = dt_next(bonds))) if (!is_chiral(bond) && is_prochiral(bond)) dt_append(cobs, bond); dt_dealloc(bonds); /*** Can't chiralify if there are no unspecified prochiral centers. ***/ if (0 == dt_count(cobs, TYP_ANY)) { dt_dealloc(cobs); return NULL_OB; } /*** Set mod_on mode and recursively generate combinations. ***/ outs = dt_alloc_seq(); dt_mod_on(mol); dt_reset(cobs); combinations(mol, cobs, outs); dt_mod_off(mol); dt_dealloc(cobs); /*** Remove non-isomeric entries. ***/ dt_reset(outs); while (NULL_OB != (sob = dt_next(outs))) if (!isomeric(sob)) dt_delete(outs); /*** If output sequence is empty, return NULL_OB. ***/ if (0 == dt_count(outs, TYP_STRING)) { dt_dealloc(outs); return NULL_OB; } /*** Sort sequence and remove repeats. ***/ dt_seqsort(outs); dt_reset(outs); sob = dt_next(outs); while (NULL_OB != (sob2 = dt_next(outs))) if (sobeqv(sob, sob2)) dt_delete(outs); else sob = sob2; /*** Reset output sequence and return it. ***/ dt_reset(outs); return outs; }