/* Calculation of polar surface area based on fragment contributions (TPSA) Peter Ertl, Novartis Pharma AG, Basel, August 2000 peter.ertl@pharma.novartis.com || peter.ertl@altavista.net The program uses the SMILES Daylight Toolkit module. For description of the methodology see : P. Ertl, B. Rohde, P. Selzer Fast Calculation of Molecular Polar Surface Area as a Sum of Fragment-based Contributions and Its Application to the Prediction of Drug Transport Properties, J.Med.Chem. 43, 3714-3717, 2000 The program reads a list of SMILES strings from stdin, and writes SMILES and the calculated TPSA on stdout. The program expects "problematic" nitrogens in pentavalent form, i.e. nitro groups as -N(=O)=O, N-oxides as c1ccccn1=O, and azides as -N=N#N. This contrib program comes with absolutely no warranty of any kind. Rev: 29 Mar 2001 */ #include #include #include #include "dt_smiles.h" #include "du_utils.h" float calculatePSA(char *); main(int argc, char *argv[]) { char *smiles; float tpsa; int len; while (NULL != (smiles = du_fgetline(&len, stdin))) { tpsa = calculatePSA(smiles); if (tpsa < 0.) printf("%s ERROR\n",smiles); else printf("%s %.2f\n",smiles,tpsa); } } float calculatePSA(char *smi) { dt_Handle mol,atom,atoms,bond,bonds,ring,rings,ratom,ratoms; float psa,p; int an,nh,q,nv,bt; int nsingle,ndouble,ntriple,naromatic; int isAromatic,isIn3ring; mol = dt_smilin(strlen(smi),smi); if (NULL_OB==mol) { /*fprintf(stderr, "Error in smiles `%s'\n", smi);*/ return -1.; } atoms = dt_stream(mol,TYP_ATOM); rings = dt_stream(mol,TYP_CYCLE); psa = 0.; while (NULL_OB != (atom = dt_next(atoms))) { an = dt_number(atom); if (an != 8 && an != 7) continue; /* only O and N atoms considered */ nh = dt_hcount(atom); q = dt_charge(atom); isAromatic = dt_aromatic(atom); /* checking whether this atom is in a 3-membered ring */ isIn3ring = 0; while (NULL_OB != (ring = dt_next(rings))) { if (dt_count(ring,TYP_ATOM) > 3) continue; ratoms = dt_stream(ring,TYP_ATOM); while (NULL_OB != (ratom = dt_next(ratoms))) { if (atom==ratom) {isIn3ring = 1; break;} } if (isIn3ring) break; } dt_reset(rings); nv = 0; /* number of neighbours */ /* finding the number of -,=,#,: bonds originating from the atom */ nsingle = 0; ndouble = 0; ntriple = 0; naromatic = 0; bonds = dt_stream(atom,TYP_BOND); while (NULL_OB != (bond = dt_next(bonds))) { nv++; bt = dt_bondtype(bond); if (bt==DX_BTY_SINGLE) nsingle++; else if (bt==DX_BTY_DOUBLE) ndouble++; else if (bt==DX_BTY_TRIPLE) ntriple++; else naromatic++; } /* finding the psa contribution for this fragment (atom with hydrogens) */ p = -1.; switch (an) { case 7: if (nv == 1) { if (nh==0 && ntriple==1 && q==0) p = 23.79; /* N# */ else if (nh==1 && ndouble==1 && q==0) p = 23.85; /* [NH]= */ else if (nh==2 && nsingle==1 && q==0) p = 26.02; /* [NH2]- */ else if (nh==2 && ndouble==1 && q==1) p = 25.59; /* [NH2+]= */ else if (nh==3 && nsingle==1 && q==1) p = 27.64; /* [NH3+]- */ } else if (nv == 2) { if (nh==0 && nsingle==1 && ndouble==1 && q==0) p = 12.36; /* =N- */ else if (nh==0 && ntriple==1 && ndouble==1 && q==0) p = 13.60; /* =N# */ else if (nh==1 && nsingle==2 && q==0 && !isIn3ring) p = 12.03; /* -[NH]- */ else if (nh==1 && nsingle==2 && q==0 && isIn3ring) p = 21.94; /* -[NH]-r3 */ else if (nh==0 && ntriple==1 && nsingle==1 && q==1) p = 4.36; /* -[N+]# */ else if (nh==1 && ndouble==1 && nsingle==1 && q==1) p = 13.97; /* -[NH+]= */ else if (nh==2 && nsingle==2 && q==1) p = 16.61; /* -[NH2+]- */ else if (nh==0 && naromatic==2 && q==0) p = 12.89; /* :[n]: */ else if (nh==1 && naromatic==2 && q==0) p = 15.79; /* :[nH]: */ else if (nh==1 && naromatic==2 && q==1) p = 14.14; /* :[nH+]: */ } else if (nv == 3) { if (nh==0 && nsingle==3 && q==0 && !isIn3ring) p = 3.24; /* -N(-)- */ else if (nh==0 && nsingle==3 && q==0 && isIn3ring) p = 3.01; /* -N(-)-r3 */ else if (nh==0 && nsingle==1 && ndouble==2 && q==0) p = 11.68; /* -N(=)= */ else if (nh==0 && nsingle==2 && ndouble==1 && q==1) p = 3.01; /* =[N+](-)- */ else if (nh==1 && nsingle==3 && q==1) p = 4.44; /* -[NH+](-)- */ else if (nh==0 && naromatic==3 && q==0) p = 4.41; /* :[n](:): */ else if (nh==0 && nsingle==1 && naromatic==2 && q==0) p = 4.93; /* -:[n](:): */ else if (nh==0 && ndouble==1 && naromatic==2 && q==0) p = 8.39; /* =:[n](:): */ else if (nh==0 && naromatic==3 && q==1) p = 4.10; /* :[n+](:): */ else if (nh==0 && nsingle==1 && naromatic==2 && q==1) p = 3.88; /* -:[n+](:): */ } else if (nv == 4) { if (nh==0 && nsingle==4 && q==1) p = 0.00; /* -[N+](-)(-)- */ } if (p < 0.) { /* N with non-standard valency */ p = 30.5 - nv * 8.2 + nh * 1.5; if (p < 0.) p = 0.; } break; case 8: if (nv == 1) { if (nh==0 && ndouble==1 && q==0) p = 17.07; /* O= */ else if (nh==1 && nsingle==1 && q==0) p = 20.23; /* [OH]- */ else if (nh==0 && nsingle==1 && q==-1) p = 23.06; /* [O-]- */ } else if (nv == 2) { if (nh==0 && nsingle==2 && q==0 && !isIn3ring) p = 9.23; /* -O- */ else if (nh==0 && nsingle==2 && q==0 && isIn3ring) p = 12.53; /* -O-r3 */ else if (nh==0 && naromatic==2 && q==0) p = 13.14; /* :o: */ } if (p < 0.) { /* O with non-standard valency */ p = 28.5 - nv * 8.6 + nh * 1.5; if (p < 0.) p = 0.; } break; } psa += p; } dt_dealloc(mol); return psa; }