/****************************************************************************** *** ringsmi.c - write SMILES for ringsystems ******************************************************************************* *** Function ringsystems: *** Find rings and ring systems. A ring system is defined *** as a set of cycles (rings) where each cycle shares at *** least one atom with another cycle. For each ring system, *** a substructure object is allocated, containing each atom *** and bond. For each molecule, a sequence of these *** substructures is formed. ******************************************************************************* *** Function subsmi: *** Then, for each substructure, a copy of the molecule is *** created and "trimmed" to contain the substructure atoms *** and bonds only, and a SMILES for this ringsystem is *** written if possible. *** *** Attached atoms: *** H's are left attached. *** Double or triple-bonded atoms are left attached. *** Hetero single-bonded atoms are left attached. *** Single-bonded atoms are replaced by wildcards (*). ******************************************************************************* *** The result is a valid SMILES representing both the *** ringsystem itself and the attached atoms. To represent *** the ringsystem alone, perhaps for searching purposes, *** it is probably best to strip away the wildcards -- carefully *** -- and produce a SMARTS which may or may not be a valid SMILES. ******************************************************************************* *** Author: Jeremy Yang *** Date: 10 Nov 1997 ******************************************************************************/ #include #include "dt_smiles.h" #include "du_utils.h" dt_Handle ringsystems(dt_Handle mol); void *subsmi(dt_Handle sub, dt_Handle mol, char **subsmiles); dt_Handle stream2seq(dt_Handle stream); static void zapseq(dt_Handle seq); main() { dt_Handle mol; /*** molecule object ***/ dt_Handle subseq; /*** molecule object ***/ dt_Handle sub; /*** substructure object ***/ char *smi = NULL; /*** smiles ***/ char *subsmiles = NULL; /*** substructure smiles ***/ int count = 0; /*** smiles count ***/ int len; /*** smiles length ***/ int rsystotal = 0; /*** ringsystem count ***/ int rsyssmicount = 0; /*** ringsystem smi count ***/ int subcount = 0; /*** substructure count ***/ while (NULL != (smi = du_fgetline(&len, stdin))) { ++count; mol = dt_smilin(strlen(smi), smi); if (NULL_OB == mol) { fprintf(stderr, "ERROR: Can't interpret SMILES \"%s\"\n", smi); du_printerrors(stderr, DX_ERR_ERROR); continue; } subseq = ringsystems(mol); while(NULL_OB != (sub = dt_next(subseq))) { subsmi(sub, mol, &subsmiles); if (0 < strlen(subsmiles)) ++rsyssmicount; printf("%s\n", subsmiles); } subcount = dt_count(subseq, TYP_ANY); rsystotal += subcount; zapseq(subseq); dt_dealloc(subseq); dt_dealloc(mol); if (!((count)%1000)) fprintf(stderr, "%d...", count); fflush(stderr); } if (subsmiles) free(subsmiles); fprintf(stderr, "\nTotal SMILES read: %d.\n", count); fprintf(stderr, "Total ringsystems found: %d.\n", rsystotal); fprintf(stderr, "Total ringsystems SMILES: %d.\n", rsyssmicount); fprintf(stderr, "So long, baby!\n"); } /****************************************************************************** *** ringsystems - returns a sequence of substructures which are the *** ringsystems for the molecule. ******************************************************************************/ dt_Handle ringsystems(dt_Handle mol) { dt_Handle atom; dt_Handle atom2; dt_Handle atomstream; dt_Handle atomstream2; dt_Handle bond; dt_Handle bondstream; dt_Handle cycle; dt_Handle cyclestream; dt_Handle newsub; dt_Handle sub; dt_Handle sub2; static dt_Handle *subarray = NULL; dt_Handle subseq; int fused; int subcount = 0; int i, j; static int subarraysize = 0; subseq = dt_alloc_seq(); /*************************************************** * Examine each cycle... ***************************************************/ cyclestream = dt_stream(mol, TYP_CYCLE); while (NULL_OB != (cycle = dt_next(cyclestream))) { /*************************************************** * Compare each atom of each cycle with each atom * of each ring-system (substruct) found so far. * If any are equal, then cycle is part of ring-system. ***************************************************/ dt_reset(subseq); atomstream = dt_stream(cycle, TYP_ATOM); fused = 0; while (!fused && (NULL_OB != (sub = dt_next(subseq)))) { atomstream2 = dt_stream(sub, TYP_ATOM); while (!fused && (NULL_OB != (atom = dt_next(atomstream)))) { while (!fused && (NULL_OB != (atom2 = dt_next(atomstream2)))) { if (atom == atom2) fused = 1; } dt_reset(atomstream2); } dt_reset(atomstream); } bondstream = dt_stream(cycle, TYP_BOND); /*************************************************** * If one atom is shared, then add cycle's atoms to * the ring-system. ***************************************************/ if (fused) { while (NULL_OB != (atom = dt_next(atomstream))) dt_add(sub, atom); while (NULL_OB != (bond = dt_next(bondstream))) dt_add(sub, bond); /*************************************************** * If no atom is shared, then create new ring-system * for this cycle. ***************************************************/ } else { newsub = dt_alloc_substruct(mol); while (NULL_OB != (atom = dt_next(atomstream))) dt_add(newsub, atom); while (NULL_OB != (bond = dt_next(bondstream))) dt_add(newsub, bond); dt_append(subseq, newsub); ++subcount; } } /*************************************************** * Create array of ringsystem substructs for possible * fusing. ***************************************************/ if (!subarray) { subarray = (dt_Handle *)malloc(subcount * sizeof(dt_Handle)); } else if (subcount > subarraysize) { subarray = (dt_Handle *)realloc(subarray, subcount * sizeof(dt_Handle)); } dt_reset(subseq); for (i = 0; NULL_OB != (sub = dt_next(subseq)); ++i) subarray[i] = sub; /*************************************************** * Now fuse any ringsystems which overlap. (We may * have created these from disjoint rings in the * same ringsystem, encountered before the joining * rings.) ***************************************************/ for (i = 0; i < subcount; ++i) { sub = subarray[i]; if (NULL_OB == sub) continue; for (j = 0; j < subcount; ++j) { sub2 = subarray[j]; if (NULL_OB == sub2 || sub == sub2) continue; atomstream = dt_stream(sub, TYP_ATOM); atomstream2 = dt_stream(sub2, TYP_ATOM); fused = 0; while (!fused && (NULL_OB != (atom = dt_next(atomstream)))) { while (!fused && (NULL_OB != (atom2 = dt_next(atomstream2)))) { if (atom == atom2) fused = 1; } dt_reset(atomstream2); } /*************************************************** * If one atom is shared, then add sub2's atoms and * bonds to sub and dealloc sub2. ***************************************************/ bondstream = dt_stream(sub2, TYP_BOND); if (fused) { while (NULL_OB != (atom = dt_next(atomstream2))) dt_add(sub, atom); while (NULL_OB != (bond = dt_next(bondstream))) dt_add(sub, bond); dt_reset(subseq); while (sub2 != dt_next(subseq)) ; dt_delete(subseq); dt_dealloc(sub2); sub2 = NULL_OB; } dt_dealloc(atomstream); dt_dealloc(atomstream2); } } dt_reset(subseq); return subseq; } /******************************************************************** *** subsmi - Write a smiles for a substructure *** Note that we are adding some attached atoms here, *** hence modifying the substructure. ********************************************************************/ void *subsmi(dt_Handle sub, dt_Handle mol, char **pssmi) { dt_Handle atomstream, atomstream2, bondstream, bondstream2; dt_Handle atom, bond, atoms, bonds, atoms2, bonds2, ob, ob2; dt_Handle xatom, newatom, mol2; int len, border, atno, xatno, xhcount; static int bufsize = 0; char *p; dt_mod_on(mol2 = dt_copy(mol)); /********************************************************** *** Form sequences since we can't structurally modify *** members of atom or bond streams without dealloc-ing the *** stream. **********************************************************/ atoms = stream2seq(atomstream = dt_stream(mol, TYP_ATOM)); atoms2 = stream2seq(atomstream2 = dt_stream(mol2, TYP_ATOM)); bonds = stream2seq(bondstream = dt_stream(mol, TYP_BOND)); bonds2 = stream2seq(bondstream2 = dt_stream(mol2, TYP_BOND)); dt_dealloc(atomstream); dt_dealloc(bondstream); dt_dealloc(atomstream2); dt_dealloc(bondstream2); /********************************************************** *** Dealloc all bonds and atoms not in substructure *** or attachment points. **********************************************************/ while (NULL_OB != (ob = dt_next(bonds))) { ob2 = dt_next(bonds2); if (!dt_member(sub, ob)) dt_dealloc(ob2); else dt_setadjunct(ob, ob2); } while (NULL_OB != (ob = dt_next(atoms))) { ob2 = dt_next(atoms2); if (!dt_member(sub, ob)) dt_dealloc(ob2); else dt_setadjunct(ob, ob2); } dt_dealloc(atoms); dt_dealloc(atoms2); dt_dealloc(bonds); dt_dealloc(bonds2); /*************************************************************** *** Then, add attachment atoms (* if single-bonded, !H, and *** !(!C - ringsystem !C)). ***************************************************************/ atomstream = dt_stream(sub, TYP_ATOM); while (NULL_OB != (atom = dt_next(atomstream))) { atno = dt_number(atom); bondstream = dt_stream(atom, TYP_BOND); while (NULL_OB != (bond = dt_next(bondstream))) { if (!dt_member(sub, bond)) { border = dt_bondorder(bond); xatom = dt_xatom(atom, bond); xatno = dt_number(xatom); xhcount = dt_hcount(xatom); if ((1 == border) && (DX_ATN_H != xatno) && !((DX_ATN_C != atno) && (DX_ATN_C != xatno))) { xatno = DX_ATN_WILD; xhcount = 0; } newatom = dt_addatom(mol2, xatno, xhcount); dt_addbond(dt_adjunct(atom), newatom, border); } } dt_dealloc(bondstream); } dt_dealloc(atomstream); /********************************************************** *** Write SMILES for substructure if possible. **********************************************************/ if (!dt_mod_off(mol2)) { fprintf(stderr, "ERROR (ringsmi): Can't write smi for sub...\n"); return (""); } else { p = dt_cansmiles(&len, mol2, 1); if (!*pssmi) { *pssmi = (char *)malloc((len + 1)*sizeof(char)); } else if (bufsize < len + 1) { *pssmi = (char *)realloc(*pssmi, (len + 1)*sizeof(char)); bufsize = len + 1; } strncpy(*pssmi, p, len); (*pssmi)[len] = '\0'; dt_dealloc(mol2); return; } } /******************************************************************** *** stream2seq - Copy a stream to a new sequence. ********************************************************************/ dt_Handle stream2seq(dt_Handle stream) { dt_Handle seq = dt_alloc_seq(), ob; while (NULL_OB != (ob = dt_next(stream))) { dt_append(seq, ob); } return seq; } /******************************************************************** *** zapseq - deallocate a sequence of objects ********************************************************************/ static void zapseq(dt_Handle seq) { dt_Handle ob; dt_reset(seq); while (NULL_OB != (ob = dt_next(seq))) dt_dealloc(ob); dt_dealloc(seq); }