/*
  Calculation of polar surface area based on fragment contributions (TPSA)
  Peter Ertl, Novartis Pharma AG, Basel, August 2000
  peter.ertl@pharma.novartis.com || peter.ertl@altavista.net
  The program uses the SMILES Daylight Toolkit module.

  For description of the methodology see :
  P. Ertl, B. Rohde, P. Selzer
  Fast Calculation of Molecular Polar Surface Area as a Sum of Fragment-based
  Contributions and Its Application to the Prediction of Drug Transport 
  Properties, J.Med.Chem. 43, 3714-3717, 2000

  The program reads a list of SMILES strings from stdin, and writes SMILES
  and the calculated TPSA on stdout.
  The program expects "problematic" nitrogens in pentavalent form, i.e. nitro 
  groups as -N(=O)=O, N-oxides as c1ccccn1=O, and azides as -N=N#N.  

  This contrib program comes with absolutely no warranty of any kind.

  Rev: 29 Mar 2001
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "dt_smiles.h"
#include "du_utils.h"

float calculatePSA(char *);

main(int argc, char *argv[]) {
  char *smiles;
  float tpsa;
  int len;

  while (NULL != (smiles = du_fgetline(&len, stdin))) {
    tpsa = calculatePSA(smiles);
    if (tpsa < 0.)
      printf("%s ERROR\n",smiles);
    else
      printf("%s %.2f\n",smiles,tpsa);
  }
}

float calculatePSA(char *smi) {
  dt_Handle mol,atom,atoms,bond,bonds,ring,rings,ratom,ratoms;
  float psa,p;
  int an,nh,q,nv,bt;
  int nsingle,ndouble,ntriple,naromatic;
  int isAromatic,isIn3ring;

  mol = dt_smilin(strlen(smi),smi);
  if (NULL_OB==mol) {
    /*fprintf(stderr, "Error in smiles `%s'\n", smi);*/
    return -1.;
  }

  atoms = dt_stream(mol,TYP_ATOM);
  rings = dt_stream(mol,TYP_CYCLE);

  psa = 0.;
  while (NULL_OB != (atom = dt_next(atoms))) {
    an = dt_number(atom);
    if (an != 8 && an != 7) continue; /* only O and N atoms considered */
    nh = dt_hcount(atom);
    q = dt_charge(atom);
    isAromatic = dt_aromatic(atom);
    /* checking whether this atom is in a 3-membered ring */
    isIn3ring = 0;
    while (NULL_OB != (ring = dt_next(rings))) {
      if (dt_count(ring,TYP_ATOM) > 3) continue;
      ratoms = dt_stream(ring,TYP_ATOM);
      while (NULL_OB != (ratom = dt_next(ratoms))) {
	if (atom==ratom) {isIn3ring = 1; break;}
      }
      if (isIn3ring) break;
    }
    dt_reset(rings);

    nv = 0; /* number of neighbours */
    /* finding the number of -,=,#,: bonds originating from the atom */
    nsingle = 0; ndouble = 0; ntriple = 0; naromatic = 0;
    bonds = dt_stream(atom,TYP_BOND);
    while (NULL_OB != (bond = dt_next(bonds))) {
      nv++;
      bt = dt_bondtype(bond);
      if (bt==DX_BTY_SINGLE) nsingle++;
      else if (bt==DX_BTY_DOUBLE) ndouble++;
      else if (bt==DX_BTY_TRIPLE) ntriple++;
      else naromatic++;
    }
    /* finding the psa contribution for this fragment (atom with hydrogens) */
    p = -1.;
    switch (an) {
      case 7:
        if (nv == 1) {
          if (nh==0 && ntriple==1 && q==0) p = 23.79; /* N# */
          else if (nh==1 && ndouble==1 && q==0) p = 23.85; /* [NH]= */
          else if (nh==2 && nsingle==1 && q==0) p = 26.02; /* [NH2]- */
          else if (nh==2 && ndouble==1 && q==1) p = 25.59; /* [NH2+]= */
          else if (nh==3 && nsingle==1 && q==1) p = 27.64; /* [NH3+]- */
        }
        else if (nv == 2) {
          if (nh==0 && nsingle==1 && ndouble==1 && q==0) p = 12.36; /* =N- */
          else if (nh==0 && ntriple==1 && ndouble==1 && q==0) p = 13.60; /* =N# */
          else if (nh==1 && nsingle==2 && q==0 && !isIn3ring) p = 12.03; /* -[NH]- */
          else if (nh==1 && nsingle==2 && q==0 && isIn3ring) p = 21.94; /* -[NH]-r3 */
          else if (nh==0 && ntriple==1 && nsingle==1 && q==1) p = 4.36; /* -[N+]# */
          else if (nh==1 && ndouble==1 && nsingle==1 && q==1) p = 13.97; /* -[NH+]= */
          else if (nh==2 && nsingle==2 && q==1) p = 16.61; /* -[NH2+]- */
          else if (nh==0 && naromatic==2 && q==0) p = 12.89; /* :[n]: */
          else if (nh==1 && naromatic==2 && q==0) p = 15.79; /* :[nH]: */
          else if (nh==1 && naromatic==2 && q==1) p = 14.14; /* :[nH+]: */
        }
        else if (nv == 3) {
          if (nh==0 && nsingle==3 && q==0 && !isIn3ring) p = 3.24; /* -N(-)- */
          else if (nh==0 && nsingle==3 && q==0 && isIn3ring) p = 3.01; /* -N(-)-r3 */
          else if (nh==0 && nsingle==1 && ndouble==2 && q==0) p = 11.68; /* -N(=)= */
          else if (nh==0 && nsingle==2 && ndouble==1 && q==1) p = 3.01; /* =[N+](-)- */
          else if (nh==1 && nsingle==3 && q==1) p = 4.44; /* -[NH+](-)- */
          else if (nh==0 && naromatic==3 && q==0) p = 4.41; /* :[n](:): */
          else if (nh==0 && nsingle==1 && naromatic==2 && q==0) p = 4.93; /* -:[n](:): */
          else if (nh==0 && ndouble==1 && naromatic==2 && q==0) p = 8.39; /* =:[n](:): */
          else if (nh==0 && naromatic==3 && q==1) p = 4.10; /* :[n+](:): */
          else if (nh==0 && nsingle==1 && naromatic==2 && q==1) p = 3.88; /* -:[n+](:): */
        }
        else if (nv == 4) {
          if (nh==0 && nsingle==4 && q==1) p = 0.00; /* -[N+](-)(-)- */
        }
        if (p < 0.) { /* N with non-standard valency */
          p = 30.5 - nv * 8.2 + nh * 1.5;
          if (p < 0.) p = 0.;
        }
        break;
      case 8:
        if (nv == 1) {
          if (nh==0 && ndouble==1 && q==0) p = 17.07; /* O= */
          else if (nh==1 && nsingle==1 && q==0) p = 20.23; /* [OH]- */
          else if (nh==0 && nsingle==1 && q==-1) p = 23.06; /* [O-]- */
        }
        else if (nv == 2) {
          if (nh==0 && nsingle==2 && q==0 && !isIn3ring) p = 9.23; /* -O- */
          else if (nh==0 && nsingle==2 && q==0 && isIn3ring) p = 12.53; /* -O-r3 */
          else if (nh==0 && naromatic==2 && q==0) p = 13.14; /* :o: */
        }
        if (p < 0.) { /* O with non-standard valency */
          p = 28.5 - nv * 8.6 + nh * 1.5;
          if (p < 0.) p = 0.;
        }
        break;
    }

    psa += p;
  }

  dt_dealloc(mol);

  return psa;
}
