/* -*-C-*-*/
/**********************************************************************
* This source code is public domain, and may be freely distributed, *
* modified, and used for any purpose. IT COMES WITH ABSOLUTELY NO *
* WARRANTY OF ANY KIND. *
**********************************************************************/
/*+======================================================================
| FILE: findrxn.c
| DESCRIPTION:
| Given a reactant and product, find all reaction trees. This
| finds reactions which may have more components than just the
| reactant and product. For example, given the
| reactant: CC(=O)O and product: CC(=O)OCC, this finds all of
| the following reaction SMILES:
|
| CC(=O)O>>CC(=O)OCC
| CC(=O)O>>CC(=O)OCC.O
| CC(=O)O.OCC>>CC(=O)OCC.O
|
| If we just looked for the reaction by reaction SMILES, we'd
| only find the first one.
+======================================================================
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "dt_smiles.h"
#include "dt_thor.h"
#include "du_utils.h"
#define MX_SMILES 8000
/*============================================================================
* crash_and_burn() -- write errors to stderr and exit program in error
*/
static void crash_and_burn(dt_String s)
{
dt_errorsave("findpath", DX_ERR_ERROR, s);
du_printerrors(stderr, 0);
exit(1);
}
/*============================================================================
*
*/
main(int argc, char *argv[])
{
dt_Handle server, db;
dt_Handle smidt, rmoldt, pmoldt, rsos, psos, sa, sb, seq;
char *vala, *valb, rdbname[100], host[100];
int oops, lena, lenb, more;
char rmol[MX_SMILES], pmol[MX_SMILES];
/**** Set host and database names from arguments or set defaults. ****/
gethostname(host, 100);
strcpy(rdbname, "chemreact");
if (argc > 1) strcpy(host, argv[1]);
if (argc > 2) strcpy(rdbname, argv[2]);
/**** Connect to server and open database readonly. ****/
server = dt_thor_connect(host, "5555", "thor", "");
printf("Connected to %s, opening database...\n", host);
db = dt_thor_open(server, rdbname, "r", "");
if (server == NULL_OB || db == NULL_OB)
crash_and_burn("Can't open reaction database");
printf("Reaction database %s opened.\n", rdbname);
/**** Get the datatype objects ****/
smidt = dt_thor_getdatatype(db, 4, "$SMI");
if (NULL_OB == smidt)
crash_and_burn("Can't find SMILES datatype!");
rmoldt = dt_thor_getdatatype(db, 5, "$RMOL");
if (NULL_OB == rmoldt)
crash_and_burn("Can't find Reactant molecule datatype!");
pmoldt = dt_thor_getdatatype(db, 5, "$PMOL");
if (NULL_OB == pmoldt)
crash_and_burn("Can't find Product molecule datatype!");
/**** Read from input, print TDTs to output ****/
while (1)
{
/*** Get the cross reference sequences of strings for the reactant
and product molecule ***/
printf("Enter a reactant molecule:\n");
if (NULL == gets(rmol)) break;
rsos = dt_thor_xrefget(db, rmoldt, strlen(rmol), rmol, &oops);
if (NULL_OB == rsos)
{
printf("Reactant not found. Try again.\n");
if (dt_errorworst() > 0) du_printerrors(stderr, 0);
continue;
}
printf("Enter a product molecule:\n");
if (NULL == gets(pmol)) break;
psos = dt_thor_xrefget(db, pmoldt, strlen(pmol), pmol, &oops);
if (NULL_OB == psos)
{
printf("Product not found. Try again.\n");
if (dt_errorworst() > 0) du_printerrors(stderr, 0);
continue;
}
/*** zap the first in each (its the molecule itself) ***/
sa = dt_next(rsos);
sb = dt_next(psos);
dt_delete(rsos);
dt_delete(psos);
dt_dealloc(sa);
dt_dealloc(sb);
/*** Sort'em ***/
dt_seqsort(rsos);
dt_seqsort(psos);
dt_reset(rsos);
dt_reset(psos);
printf("Reactant found in %d reactions.\n", dt_count(rsos, TYP_ANY));
printf("Product found in %d reactions.\n", dt_count(psos, TYP_ANY));
seq = dt_alloc_seq();
more = TRUE;
sa = dt_next(rsos);
sb = dt_next(psos);
if ((sa == NULL_OB) && (sb == NULL_OB)) more = FALSE;
else more = TRUE;
/*** Compare them until we run out. Deallocate them as we go. ***/
while (more)
{
vala = dt_stringvalue(&lena, sa);
valb = dt_stringvalue(&lenb, sb);
/*** SMILES are same. ***/
if ((lena == lenb) && (strncmp(vala, valb, lena) == 0))
{
dt_append(seq, dt_copy(sa));
dt_dealloc(sa);
dt_dealloc(sb);
if (NULL_OB == (sa = dt_next(rsos))) more = FALSE;
if (NULL_OB == (sb = dt_next(psos))) more = FALSE;
}
/*** SMILES are not the same. Throw away the 'lowest' one
of the two sequences, and compare again. ***/
else if (strncmp(vala, valb, DU_MIN(lena, lenb)) == 0)
{
if (lena < lenb)
{
dt_dealloc(sa);
if (NULL_OB == (sa = dt_next(rsos))) more = FALSE;
}
else
{
dt_dealloc(sb);
if (NULL_OB == (sb = dt_next(psos))) more = FALSE;
}
}
else if (strncmp(vala, valb, DU_MIN(lena, lenb)) < 0)
{
dt_dealloc(sa);
if (NULL_OB == (sa = dt_next(rsos))) more = FALSE;
}
else
{
dt_dealloc(sb);
if (NULL_OB == (sb = dt_next(psos))) more = FALSE;
}
}
/*** Deallocate any left ***/
if (sa != NULL_OB)
{
dt_dealloc(sa);
while (NULL_OB != (sa = dt_next(rsos)))
dt_dealloc(sa);
}
if (sb != NULL_OB)
{
dt_dealloc(sb);
while (NULL_OB != (sb = dt_next(psos)))
dt_dealloc(sb);
}
/*** If any were found, print them out. ***/
if (dt_count(seq, TYP_ANY) != 0)
{
dt_reset(seq);
while (NULL_OB != (sa = dt_next(seq)))
{
vala = dt_stringvalue(&lena, sa);
printf("Reaction Found: %.*s\n", lena, vala);
}
}
else
printf("No reactions found.\n");
dt_dealloc(seq);
}
/*** Be polite to thor server; close connection before exiting. ***/
dt_dealloc(server);
return 0;
}