/* -*-C-*- */ /********************************************************************** * This source code is public domain, and may be freely distributed, * * modified, and used for any purpose. IT COMES WITH ABSOLUTELY NO * * WARRANTY OF ANY KIND. * **********************************************************************/ /*+====================================================================== | FILE: enumerate.c | DESCRIPTION: | Take a "MTZ" mixture specification on stdin, and generates | the full enumeration of molecules on stdout. The term "MTZ" | comes from the three legal input directives: | | MOLECULES: { Molecule SMILES } | TRANSFORM: { Transform string } | ZAPPAT: { molecule SMARTS } | | Each time the "MOLECULES:" directive is encountered, the set of | molecule SMILES is interpreted as a sequence of molecules. Each | new set of molecules kept until a reaction is run. | | Each time the "TRANSFORM:" directive is encountered, dt_transform() is | executed combinatorially for all of the sets of molecules input up | to that point. | | When "ZAPPAT:" is encountered, the pattern in matched against all | of the current reaction products. Any dot-separated components | which match are deleted from the products. | | Finally, lines starting with '#' are ignored as comments. +====================================================================== */ #include #include #include #include "dt_smiles.h" #include "dt_smarts.h" #include "du_smiles.h" #ifndef TRUE #define TRUE 1 #define FALSE 0 #endif dt_Handle reaction[200]; /*************************************************************************** * Prototypes ***************************************************************************/ dt_Integer recurse(dt_Handle, dt_Handle, dt_Integer, dt_Handle, dt_Integer); dt_Integer do_step(dt_Handle, dt_Integer); /*************************************************************************** * FUNCTION: main * * RETURNS: (int) ***************************************************************************/ int main(int argc, char *argv[]) { dt_Handle mol, tempatoms, tempatom; dt_Integer count, slen, did_transform, part, i; char tempstr[500]; dt_String molstr, str; dt_Handle tempmol, transform, zappat, pathset, zapseq; dt_Handle atoms, atom; dt_String ptr, new; /*** Make the starting molecules ***/ if (argc != 1) return (1); /*** Read the input ***/ count = 0; did_transform = FALSE; while (gets(tempstr)) { /*================================================================= * Comments ignored. ==========================+======================================= */ if (strncmp(tempstr, "#", 1) == 0) continue; /*================================================================= * TRANSFORM: ================================================================== */ if (strncmp(tempstr, "TRANSFORM: ", 11) == 0) { did_transform = TRUE; transform = dt_smirkin(strlen(tempstr + 11), tempstr + 11); if (transform == NULL_OB) { fprintf(stderr, "Couldn't make transformation: %s\n", tempstr); exit(1); } /*** Calculate the results from the molecule sets input to date. ***/ if (count > 0) { count = do_step(transform, count); dt_dealloc(transform); } else { fprintf(stderr, "No molecules upon which to operate.\n"); exit(1); } } /*================================================================= * ZAPPAT: ================================================================== */ if (strncmp(tempstr, "ZAPPAT: ", 8) == 0) { /*** We must have just done a transform, or it is an error. ***/ if (!did_transform) { fprintf(stderr, "Can only zap a molecule after a transform.\n"); exit(1); } /*** Make a molecule from the target. ***/ zappat = dt_smartin(strlen(tempstr + 8), tempstr + 8); if (zappat == NULL_OB) { fprintf(stderr, "Invalid pattern: %s\n", tempstr); exit(1); } /*** Go through each molecule. If it matches, remove that part. ***/ if (reaction[0] != NULL_OB) { dt_reset(reaction[0]); while (NULL_OB != (mol = dt_next(reaction[0]))) { pathset = dt_vmatch(zappat, mol, FALSE); if (pathset == NULL_OB) continue; /*** Zap only if there is a match. ***/ zapseq = dt_alloc_seq(); atoms = dt_stream(pathset, TYP_ATOM); tempatoms = dt_stream(mol, TYP_ATOM); /*** Collect all of the atoms that are in the same part as any of the hit atoms in the sequence. We'll delete all of them at the end. ***/ du_partcount(mol); /* causes regeneration of part numbers */ while (NULL_OB != (atom = dt_next(atoms))) { part = du_part(atom); dt_reset(tempatoms); while (NULL_OB != (tempatom = dt_next(tempatoms))) if (du_part(tempatom) == part) dt_setinteger(tempatom, 3, "zap", TRUE); } dt_dealloc(atoms); /*** We make the sequence in two steps so we guarantee that the sequence contains a unique set of atoms. ***/ dt_reset(tempatoms); while (NULL_OB != (tempatom = dt_next(tempatoms))) if (dt_integer(tempatom, 3, "zap")) dt_append(zapseq, tempatom); dt_dealloc(tempatoms); dt_dealloc(pathset); /*** Now delete the atoms we've collected. ***/ dt_mod_on(mol); dt_reset(zapseq); while (NULL_OB != (atom = dt_next(zapseq))) dt_dealloc(atom); dt_dealloc(zapseq); dt_mod_off(mol); if (dt_count(mol, TYP_ATOM) == 0) { dt_delete(reaction[0]); dt_dealloc(mol); } } } dt_dealloc(zappat); } /*================================================================= * MOLECULES: ================================================================== */ if (strncmp(tempstr, "MOLECULES: ", 11) == 0) { /*** There are some new molecules ***/ did_transform = FALSE; /*** Copy the string to parse it. ***/ molstr = (char *)malloc(strlen(tempstr) + 10); strcpy(molstr, tempstr + 11); reaction[count] = dt_alloc_seq(); for (ptr = molstr; new = strtok(ptr, "."); ptr = NULL) { tempmol = dt_smilin(strlen(new), new); if (tempmol != NULL_OB) dt_append(reaction[count], tempmol); } free(molstr); count++; } } /*====================================================================== * Finished with the input file. Write out the collected products. ====================================================================== */ if (reaction[0] != NULL_OB) { dt_reset(reaction[0]); while (NULL_OB != (mol = dt_next(reaction[0]))) { str = dt_cansmiles(&slen, mol, FALSE); if ((str != NULL) && (slen > 0)) printf("%.*s\n", slen, str); dt_dealloc(mol); } dt_dealloc(reaction[0]); } else printf("No molecules created.\n"); count = dt_vh_count(); fprintf(stderr, "Handle count: %d\n", count); return (0); } /*************************************************************************** * FUNCTION: do_step * * DESCRIPTION: * Take 'count' molecule streams, apply the transforms combinatorially * to generate a single sequence of molecules. Clear all of the molecule * streams, and return the result in reaction[0]. * * RETURNS: (dt_Integer) ***************************************************************************/ dt_Integer do_step(dt_Handle transform, dt_Integer count) { dt_Handle results, mols, base, tempmol; dt_Integer i; results = dt_alloc_seq(); mols = dt_alloc_seq(); recurse(results, mols, 0, transform, count); for (i = 0; i < count; i++) { if (dt_type(reaction[i]) == TYP_SEQUENCE) { dt_reset(reaction[i]); while (NULL_OB != (tempmol = dt_next(reaction[i]))) dt_dealloc(tempmol); dt_dealloc(reaction[i]); } else { base = dt_base(reaction[i]); dt_dealloc(reaction[i]); dt_dealloc(base); } } dt_dealloc(mols); if (dt_count(results, TYP_MOLECULE) > 0) { reaction[0] = results; return (1); } dt_dealloc(results); reaction[0] = NULL_OB; return(0); } /*************************************************************************** * FUNCTION: recurse * * DESCRIPTION: * Tricky, uncommented, and generally confusing. * * RETURNS: (dt_Integer) ***************************************************************************/ dt_Integer recurse(dt_Handle results, dt_Handle mols, dt_Integer this_step, dt_Handle transform, dt_Integer count) { dt_Handle mol, result_seq, result_rxn; dt_Handle result_rxn_stream, result_mol, tempmol; dt_reset(reaction[this_step]); while (NULL_OB != (mol = dt_next(reaction[this_step]))) { dt_append(mols, mol); if (this_step < (count - 1)) recurse(results, mols, this_step + 1, transform, count); else { result_seq = dt_transform(transform, mols, DX_FORWARD, TRUE); if (result_seq != NULL_OB) { dt_reset(result_seq); result_rxn = dt_next(result_seq); result_rxn_stream = dt_stream(result_rxn, TYP_MOLECULE); while (NULL_OB != (result_mol = dt_next(result_rxn_stream))) if (dt_getrole(result_mol, result_rxn) == DX_ROLE_PRODUCT) dt_append(results, dt_copy(result_mol)); dt_dealloc(result_rxn_stream); dt_dealloc(result_rxn); dt_dealloc(result_seq); } } /*** Zap the mol that we just used. ***/ dt_reset(mols); do tempmol = dt_next(mols); while (tempmol != mol); dt_delete(mols); } return(TRUE); }