/******************************************************************** *** merq.c - Merlin query filter. Similarity, superstructure, or *** smarts search on input smiles. *** Normal output is: smiles, number of hits, then each hit smiles, *** space separated. HITSONLY omits the first two fields. *** ONEHITPERLINE omits the first two fields, and adds a newline *** after each hit smiles, to facilitate postprocessing (more *** smarts filtering perhaps). *** *** Author: Jeremy Yang *** Rev: 10 Nov 2000 ********************************************************************/ #include #include #include #include "dt_smiles.h" #include "dt_merlin.h" #include "du_utils.h" #define MAX(x,y) (((x) > (y)) ? (x) : (y)) #define MIN(x,y) (((x) < (y)) ? (x) : (y)) /*** function declarations ***/ int sameword(char *p, char *q) {while(*p||*q) {if (tolower(*p++)!=tolower(*q++)) return 0;} return 1;} dt_Handle get_ftype(dt_Handle pool, char *tag); int simsearch(char *smi, dt_Handle hits, float minsim); int supsearch(char *smi, dt_Handle hits); int init_hitlist(dt_Handle hits); int printhits(dt_Handle hits, int n); void progress(dt_Handle server); void help() { fprintf(stderr, "\t************************************************************\n" "\t| merq - find most similar structures in db |\n" "\t************************************************************\n" "\t| |\n" "\t| merq [options] [hits.out] |\n" "\t| |\n" "\t| options: |\n" "\t| -i (required for passwd input) |\n" "\t| -o |\n" "\t| -search_type SIM|SUP|SMA [default is SIM] |\n" "\t| -maxhits # ... max # of hits to show [10] |\n" "\t| -db db ... database (pool) |\n" "\t| -minsim # ... minimum sim [0.9] (for simsearch) |\n" "\t| -hits_only ... don't print query smiles/smarts |\n" "\t| -output_format ONESMIPERLINE|NORMAL |\n" "\t| -HELP ... this help |\n" "\t| |\n" "\t************************************************************\n" "\t| Daylight CIS, Inc. |\n" "\t| Toolkit Contrib Program |\n" "\t************************************************************\n"); exit(1); } /*** Global variables ***/ char *smi=NULL; /*** smiles ***/ dt_Handle ftype_smi=NULL_OB; /*** smiles fieldtype ***/ dt_Handle col_smi=NULL_OB; /*** smiles column ***/ dt_Handle ftype_sim=NULL_OB; /*** similarity fieldtype ***/ dt_Handle col_sim=NULL_OB; /*** similarity column ***/ char *prog; /*** program name ***/ FILE *fin, *fout; /*** file ptrs ***/ int hitsonly=0; /*** output format ***/ int onesmiperline=0,normal=1; /*** output formats ***/ main(int argc, char *argv[]) { dt_Handle pool; /*** db (pool) object ***/ dt_Handle hits=NULL_OB; /*** Hitlist object ***/ dt_Handle svr; /*** Server object ***/ char *db=NULL; /*** database (pool) full spec ***/ char dbname[100]=""; /*** Database name ***/ char dbpw[100]=""; /*** Database password ***/ char host[100]=""; /*** Hostname ***/ char service[100]=""; /*** Thor service ***/ char user[100]=""; /*** Thor user ***/ char userpw[100]=""; /*** Thor user password ***/ char *p; /*** generic char * ***/ char *finname = NULL; /*** input filename ***/ char *foutname = NULL; /*** output filename ***/ int count=0; /*** input smiles count ***/ int yescount=0; /*** successful searches ***/ int totalhits=0; /*** total hit smiles printed ***/ int nhits; /*** hits for a search ***/ int i; /*** Generic int ***/ int len; /*** String length ***/ int isnew; /*** ***/ int maxhits=10; /*** max hits to list ***/ int sim_mode=0,sup_mode=0,sma_mode=0;/*** search modes ***/ float minsim=0.9; /*** min sim coeff ***/ /************************************** *** Parse command-line **************************************/ prog = *argv; for (++argv; --argc; ++argv) { if (sameword(*argv,"-help")) help(); if (sameword(*argv,"-maxhits")) { if (!--argc) help(); maxhits = atoi(*++argv); } else if (sameword(*argv,"-minsim")) { if (!--argc) help(); minsim = atof(*++argv); } else if (sameword(*argv,"-db")) { if (!--argc) help(); db = *++argv; } else if (sameword(*argv, "-i")) { if (!--argc) help(); finname = *++argv; fprintf(stderr, "NOTE: input file set to %s...\n", finname); } else if (sameword(*argv, "-o")) { if (!--argc) help(); foutname = *++argv; fprintf(stderr, "NOTE: output file set to %s...\n", foutname); } else if (sameword(*argv,"-search_type")) { if (!--argc) help(); if (sameword("sup",*++argv)) sup_mode = 1; else if (sameword("sma",*argv)) sma_mode = 1; else if (sameword("sim",*argv)) sim_mode = 1; else help(); } else if (sameword(*argv,"-hitsonly")) { hitsonly = 1; } else if (sameword(*argv,"-output_format")) { if (!--argc) help(); normal = 0; if (sameword("onesmiperline",*++argv)) onesmiperline = 1; else if (sameword("normal",*argv)) normal = 1; else help(); } else { fprintf(stderr,"ERROR(%s): Bad option \"%s\".\n",prog,*argv); help(); } } if (!db) { fprintf(stderr,"ERROR(%s): No database specified.\n",prog); help(); } if (finname) { if (!(fin=fopen(finname,"r"))) { fprintf(stderr,"Error opening file \"%s\"\n",finname); help(); } } else { fin = stdin; } if (foutname) { if (!(fout=fopen(foutname,"w"))) { fprintf(stderr,"Error opening file \"%s\"\n",foutname); help(); } } else { fout = stdout; } if (sup_mode) { fprintf(stderr, "NOTE: SUPERSTRUCTURE search mode...\n"); } else if (sma_mode) { fprintf(stderr, "NOTE: SMARTS search mode...\n"); } else { sim_mode = 1; fprintf(stderr, "NOTE: SIMILARITY search mode...\n"); } fprintf(stderr,"Minimum similarity: %.2f\n",minsim); fprintf(stderr,"Maximum hits per search: %d\n",maxhits); /************************************** *** Parse db specification **************************************/ if (!du_parse_db(db,dbname,dbpw,host,service,user,userpw)) help(); if (sameword(service,"thor")) strcpy(service,"merlin"); /****************************************************** *** Connect to server. ******************************************************/ svr = dt_mer_server(strlen(host),host,strlen(service),service, strlen(user),user,strlen(userpw),userpw,&isnew); if (NULL_OB==svr) { dt_errorclear(); fprintf(stderr,"Enter server passwd for %s:%s:%s >> ",host,service,user); system("stty -echo"); fgets(userpw,50,stdin); userpw[strlen(userpw)-1] = '\0'; /***chop newline***/ system("stty echo"); fprintf(stderr,"\n"); svr = dt_mer_server(strlen(host),host,strlen(service),service, strlen(user),user,strlen(userpw),userpw,&isnew); if (NULL_OB==svr) { fprintf(stderr,"ERROR(%s): Failed to connect to server \"%s:%s:%s\".\n", prog,host,service,user); goto exit; } } if (!dt_isopen(svr,strlen(dbname),dbname)) { fprintf(stderr,"ERROR(%s): Pool not loaded: \"%s@%s:%s\"\n", prog,dbname,host,service); goto exit; } /****************************************************** *** Open database. ******************************************************/ pool = dt_open(svr,strlen(dbname),dbname,1,"r",strlen(dbpw),dbpw,&isnew); if (NULL_OB==pool) { dt_errorclear(); fprintf(stderr,"Enter database password for %s@%s >> ",dbname,host); system("stty -echo"); fgets(dbpw,50,stdin); dbpw[strlen(dbpw)-1] = '\0'; /***chop newline***/ system("stty echo"); fprintf(stderr, "\n"); pool = dt_open(svr,strlen(dbname),dbname,1,"r",strlen(dbpw),dbpw,&isnew); if (NULL_OB==pool) { fprintf(stderr,"ERROR(%s): Failed to open \"%s@%s:%s:%s\"\n", prog,dbname,host,service,user); goto exit; } } fprintf(stderr,"Database \"%s@%s:%s:%s\" opened.\n",dbname,host,service,user); hits = dt_mer_alloc_hitlist(pool); if (NULL_OB==hits) { fprintf(stderr,"ERROR(%s): Could not create hitlist.\n",prog); goto exit; } /****************************************************** *** Main smiles loop ******************************************************/ for (count=0; NULL!=(smi=du_fgetline(&len,fin)); ++count) { if (NULL!=(p=strchr(smi,' '))) *p = '\0'; if (sup_mode) { nhits = supsearch(smi,hits); } else if (sma_mode) { nhits = smasearch(smi,hits); /*** "smi" is smarts ***/ } else { nhits = simsearch(smi,hits,minsim); } if (nhits>0) { ++yescount; } if (!hitsonly) fprintf(fout,"%s %d",smi,dt_mer_length(hits)); totalhits += printhits(hits,MIN(nhits,maxhits)); du_printerrors(stderr,DX_ERR_ERROR); dt_errorclear(); if (!dt_ping(svr,7,"whassup")) { /*** Make sure connection ok. ***/ pool = svr = NULL_OB; fprintf(stderr,"ERROR(%s): Lost server; exiting.\n",prog); goto exit; } } exit: du_printerrors(stderr,DX_ERR_ERROR); dt_dealloc(pool); dt_dealloc(svr); fflush(fout); fclose(fout); fclose(fin); fprintf(stderr,"%s results:\n",prog); fprintf(stderr,"\ttotal searches: %5d\n",count); fprintf(stderr,"\tsearches returning at least one hit: %5d\n",yescount); fprintf(stderr,"\ttotal output hit smiles: %5d\n",totalhits); fprintf(stderr,"Adios!\n"); return(0); } /******************************************************************** *** simsearch - similarity select and sort. Return number of hits. ********************************************************************/ int simsearch(char *smi, dt_Handle hits, float minsim) { int status, dsort, ndone, nhits; dt_Handle pool = dt_parent(hits); if (!init_hitlist(hits)) return(-1); dt_mer_clear(hits); ndone = dt_mer_similarselect(hits,col_sim, DX_SIMILAR_TANIMOTO,DX_ACTION_ADD_HITS, -1,&status,strlen(smi),smi,minsim,0.0,0.0); if (status==DX_STATUS_NOT_FOUND) { ; /*** Do nothing. ***/ } else if (status==DX_STATUS_ERROR) { fprintf(stderr,"ERROR(%s): Similarity search error.\n",prog); } else { progress(dt_server(pool)); dsort = dt_mer_defaultsort(col_sim); ndone = dt_mer_sort(hits,col_sim,dsort,0,&status); while (DX_STATUS_IN_PROGRESS==status) dt_continue(dt_server(pool),&status); if (status==DX_STATUS_ERROR) { fprintf(stderr,"ERROR(%s): Similarity sort error.\n",prog); } } nhits = dt_mer_length(hits); return(nhits); } /******************************************************************** *** supsearch - superstructure select and sort by similarity. *** Return number of hits. ********************************************************************/ int supsearch(char *smi, dt_Handle hits) { int status, dsort, ndone, nhits; dt_Handle pool = dt_parent(hits); if (!init_hitlist(hits)) return(-1); dt_mer_clear(hits); ndone = dt_mer_superselect(hits,col_smi, DX_SUPER_SMILES,DX_ACTION_ADD_HITS, -1,&status,strlen(smi),smi); if (status==DX_STATUS_NOT_FOUND) { ; /*** Do nothing. ***/ } else if (status==DX_STATUS_ERROR) { fprintf(stderr,"ERROR(%s): Superstructure search error.\n",prog); } else { progress(dt_server(pool)); dsort = dt_mer_defaultsort(col_sim); ndone = dt_mer_sort(hits,col_sim,dsort,0,&status); while (DX_STATUS_IN_PROGRESS==status) dt_continue(dt_server(pool),&status); if (status==DX_STATUS_ERROR) { fprintf(stderr,"ERROR(%s): Similarity sort error.\n",prog); } } nhits = dt_mer_length(hits); return(nhits); } /******************************************************************** *** smasearch - smarts select and sort by similarity. *** Return number of hits. ********************************************************************/ int smasearch(char *sma, dt_Handle hits) { int status, dsort, ndone, nhits, old_pct_done=0, pct_done; dt_Handle pool = dt_parent(hits); if (!init_hitlist(hits)) return(-1); dt_mer_clear(hits); ndone = dt_mer_superselect(hits,col_smi, DX_SUPER_SMARTS,DX_ACTION_ADD_HITS, -1,&status,strlen(sma),sma); if (status==DX_STATUS_NOT_FOUND) { ; /*** Do nothing. ***/ } else if (status==DX_STATUS_ERROR) { fprintf(stderr,"ERROR(%s): SMARTS search error.\n",prog); } else { progress(dt_server(pool)); dsort = dt_mer_defaultsort(col_sim); ndone = dt_mer_sort(hits,col_sim,dsort,0,&status); while (DX_STATUS_IN_PROGRESS==status) dt_continue(dt_server(pool),&status); if (status==DX_STATUS_ERROR) { fprintf(stderr,"ERROR(%s): Similarity sort error.\n",prog); } } nhits = dt_mer_length(hits); return(nhits); } /******************************************************************** *** init_hitlist - initialize hitlist ********************************************************************/ int init_hitlist(dt_Handle hits) { dt_Handle pool = dt_parent(hits); /******************************************************************** *** Initialize ftypes and cols if first search. ********************************************************************/ if (!ftype_sim) ftype_sim = get_ftype(pool,"SIMILARITY"); if (!col_sim) col_sim = dt_mer_alloc_column(pool,ftype_sim,DX_FUNC_FIRST); if (!ftype_smi) ftype_smi = get_ftype(pool,"$SMI"); if (!col_smi) col_smi = dt_mer_alloc_column(pool,ftype_smi,DX_FUNC_FIRST); if (!ftype_sim||!ftype_smi||!col_sim||!col_smi) return(0); return(1); } /******************************************************************** *** printhits - print nhits and hit smiles on one line. ********************************************************************/ int printhits(dt_Handle hits, int n) { int i, j=0, len; char *hitsmi; for (i=0; i0) { fprintf(fout,"%s%.*s",onesmiperline?"\n":" ",len,hitsmi); ++j; } } fprintf(fout,"\n"); return(j); } /******************************************************************** *** get_ftype - get first fieldtype for given tag, pool. ********************************************************************/ dt_Handle get_ftype(dt_Handle pool, char *tag) { dt_Handle dtype, ftypes, ftype; dtype = dt_getdatatype(pool, strlen(tag), tag); ftypes = dt_stream(dtype, TYP_FIELDTYPE); if (NULL_OB == (ftype = dt_next(ftypes))) { fprintf(stderr,"ERROR(%s): Can't get fieldtype for \"%s\".\n",prog,tag); du_printerrors(stderr, DX_ERR_ERROR); return(NULL_OB); } dt_dealloc(ftypes); return (ftype); } /******************************************************************** *** progress - wait for server to be done; show progress wheel. ********************************************************************/ void progress(dt_Handle server) { int done_when, ndone, status, pct_done, old_pct_done=0; int i=0; static char c[] = "|/-\\|/-\\"; done_when = dt_done_when(server); ndone = dt_continue(server,&status); while (DX_STATUS_IN_PROGRESS==status) { pct_done = (int)((float)(100*ndone)/(float)done_when); if (pct_done!=old_pct_done) { if (i) fputc(8,stderr); fputc(c[i++%8],stderr); } old_pct_done = pct_done; ndone = dt_continue(server,&status); } fputc(8,stderr); return; }