/* -*-C-*-*/

/**********************************************************************
 * This source code is public domain, and may be freely distributed,   *
 * modified, and used for any purpose.  IT COMES WITH ABSOLUTELY NO    *
 * WARRANTY OF ANY KIND.                                               *
 **********************************************************************/

/*+======================================================================
  | FILE:	findrxn.c
  | DESCRIPTION:
  |	Given a reactant and product, find all reaction trees.  This
  |     finds reactions which may have more components than just the
  |     reactant and product.  For example, given the
  |     reactant: CC(=O)O and product: CC(=O)OCC, this finds all of
  |     the following reaction SMILES:
  |     
  |     CC(=O)O>>CC(=O)OCC
  |     CC(=O)O>>CC(=O)OCC.O
  |     CC(=O)O.OCC>>CC(=O)OCC.O
  |
  |     If we just looked for the reaction by reaction SMILES, we'd
  |     only find the first one.
  +======================================================================
  */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "dt_smiles.h"
#include "dt_thor.h"

#include "du_utils.h"

#define MX_SMILES 8000

/*============================================================================
 *  crash_and_burn() -- write errors to stderr and exit program in error
 */

static void crash_and_burn(dt_String s)
{
  dt_errorsave("findpath", DX_ERR_ERROR, s);
  du_printerrors(stderr, 0);
  exit(1);
}

/*============================================================================
 *
 */

main(int argc, char *argv[])
{
  dt_Handle server, db;
  dt_Handle smidt, rmoldt, pmoldt, rsos, psos, sa, sb, seq;
  char      *vala, *valb, rdbname[100], host[100];
  int       oops, lena, lenb, more;
  char      rmol[MX_SMILES], pmol[MX_SMILES];
  
   /**** Set host and database names from arguments or set defaults. ****/
   
   gethostname(host, 100);
   strcpy(rdbname, "chemreact");
   if (argc > 1) strcpy(host,    argv[1]);
   if (argc > 2) strcpy(rdbname, argv[2]);
   
   /**** Connect to server and open database readonly. ****/
   
   server = dt_thor_connect(host, "5555", "thor", "");
   printf("Connected to %s, opening database...\n", host);
   
   db = dt_thor_open(server, rdbname, "r", "");
   if (server == NULL_OB || db == NULL_OB)
     crash_and_burn("Can't open reaction database");
   printf("Reaction database %s opened.\n", rdbname);
  
   /**** Get the datatype objects ****/

   smidt = dt_thor_getdatatype(db, 4, "$SMI");
   if (NULL_OB == smidt)
     crash_and_burn("Can't find SMILES datatype!");

   rmoldt = dt_thor_getdatatype(db, 5, "$RMOL");
   if (NULL_OB == rmoldt)
     crash_and_burn("Can't find Reactant molecule datatype!");

   pmoldt = dt_thor_getdatatype(db, 5, "$PMOL");
   if (NULL_OB == pmoldt)
     crash_and_burn("Can't find Product molecule datatype!");

   /**** Read from input, print TDTs to output ****/

   while (1) 
     {
       /*** Get the cross reference sequences of strings for the reactant
	 and product molecule ***/

       printf("Enter a reactant molecule:\n");
       if (NULL == gets(rmol)) break;
       rsos = dt_thor_xrefget(db, rmoldt, strlen(rmol), rmol, &oops);

       if (NULL_OB == rsos) 
	 {
	   printf("Reactant not found.  Try again.\n");
	   if (dt_errorworst() > 0) du_printerrors(stderr, 0);
	   continue;
	 }

       printf("Enter a product molecule:\n");
       if (NULL == gets(pmol)) break;
       psos = dt_thor_xrefget(db, pmoldt, strlen(pmol), pmol, &oops);

       if (NULL_OB == psos) 
	 {
	   printf("Product not found.  Try again.\n");
	   if (dt_errorworst() > 0) du_printerrors(stderr, 0);
	   continue;
	 }

       /*** zap the first in each (its the molecule itself) ***/

       sa = dt_next(rsos);
       sb = dt_next(psos);
       dt_delete(rsos);
       dt_delete(psos);
       dt_dealloc(sa);
       dt_dealloc(sb);

       /*** Sort'em ***/

       dt_seqsort(rsos);
       dt_seqsort(psos);
       dt_reset(rsos);
       dt_reset(psos);
       printf("Reactant found in %d reactions.\n", dt_count(rsos, TYP_ANY));
       printf("Product found in %d reactions.\n", dt_count(psos, TYP_ANY));

       seq = dt_alloc_seq();
       more = TRUE;
       sa = dt_next(rsos);
       sb = dt_next(psos);

       if ((sa == NULL_OB) && (sb == NULL_OB)) more = FALSE; 
       else more = TRUE;

       /*** Compare them until we run out.  Deallocate them as we go. ***/

       while (more)
	 {
	   vala = dt_stringvalue(&lena, sa);
	   valb = dt_stringvalue(&lenb, sb);
	   
	   /*** SMILES are same. ***/

	   if ((lena == lenb) && (strncmp(vala, valb, lena) == 0))
	     {
	       dt_append(seq, dt_copy(sa));
	       dt_dealloc(sa);
	       dt_dealloc(sb);
	       if (NULL_OB == (sa = dt_next(rsos))) more = FALSE;
	       if (NULL_OB == (sb = dt_next(psos))) more = FALSE;
	     }

	   /*** SMILES are not the same.  Throw away the 'lowest' one
	     of the two sequences, and compare again. ***/

	   else if (strncmp(vala, valb, DU_MIN(lena, lenb)) == 0)
	     {
	       if (lena < lenb)
		 {	       
		   dt_dealloc(sa);
		   if (NULL_OB == (sa = dt_next(rsos))) more = FALSE;
		 }
	       else 
		 {
		   dt_dealloc(sb);
		   if (NULL_OB == (sb = dt_next(psos))) more = FALSE;
		 }
	     }   
	   else if (strncmp(vala, valb, DU_MIN(lena, lenb)) < 0)
	     {
	       dt_dealloc(sa);
	       if (NULL_OB == (sa = dt_next(rsos))) more = FALSE;
	     }
	   else
	     {
	       dt_dealloc(sb);
	       if (NULL_OB == (sb = dt_next(psos))) more = FALSE;
	     }
	 }
       
       /*** Deallocate any left ***/

       if (sa != NULL_OB)
	 {
	   dt_dealloc(sa);
	   while (NULL_OB != (sa = dt_next(rsos)))
	     dt_dealloc(sa);
	 }

       if (sb != NULL_OB)
	 {
	   dt_dealloc(sb);
	   while (NULL_OB != (sb = dt_next(psos)))
	     dt_dealloc(sb);
	 }

       /*** If any were found, print them out. ***/

       if (dt_count(seq, TYP_ANY) != 0)
	 {
	   dt_reset(seq);
	   while (NULL_OB != (sa = dt_next(seq)))
	     {
	       vala = dt_stringvalue(&lena, sa);
	       printf("Reaction Found: %.*s\n", lena, vala);
	     }
	 }
       else
	 printf("No reactions found.\n");
       dt_dealloc(seq);
     }

   /*** Be polite to thor server; close connection before exiting. ***/
   
   dt_dealloc(server);
   return 0;
 }