################################################################################
##
## PACKAGE CALCMW
##	
##	This package contains a routine which will, given a smiles code, 
##	calculate either the monoisotopic or average molecular weight.
##
## Author: Alex Wong
## Date:   15 March 1996
##
## (C) 1996 CHIRON CORPORATION
##	
################################################################################
package CalcMW;

	use DayPerl;

	$TRUE = 1;
	$FALSE = 0;


#//////////////////////////////////////////////////////////////////////////////#
#
#	The following hash table contains the atomic masses of most atoms.
#	There  are  several masses for each atom.  The ones with the plain
#	symbol represent the mass  of  the  most  common  isotope.   Those
#	entries  with  preceding  numbers  are  isotopic  values.    Those
#	beginning with '_' are average masses.
#
#//////////////////////////////////////////////////////////////////////////////#

	%ATOMIC_MASS = (
		'1H' 	=>		1.007825,
		'H' 	=>	1.007825,			# Hydrogen
		'_H' 	=>	1.008,			# Ave wt of Hydrogen
		'2H' 	=>		2.0140,
		'3H' 	=>		3.01605,
		'He' 	=>	4.0026,			# Helium
		'_He' 	=>	4.0026,			# Ave wt of Helium
		'Li' 	=>	6.94,			# Lithium
		'_Li' 	=>	6.94,			# Ave wt of Lithium
		'Be' 	=>	9.01218,			# Beryllium
		'_Be' 	=>	9.01218,			# Ave wt of Beryllium
		'B' 	=>	10.81,			# Boron
		'_B' 	=>	10.81,			# Ave wt of Boron
		'12C' 	=>		12.000000,
		'C' 	=>	12.000,			# Carbon
		'_C' 	=>	12.011,			# Ave wt of Carbon
		'13C' 	=>		13.003355,
		'14N' 	=>		14.003074,
		'N' 	=>	14.003074,			# Nitrogen
		'14C' 	=>		14.003241,
		'_N' 	=>	14.0067,			# Ave wt of Nitrogen
		'15N' 	=>		15.000108,
		'15C' 	=>		15.010599,
		'O' 	=>	15.9949146,			# Oxygen
		'16O' 	=>		15.994915,
		'_O' 	=>	15.9994,			# Ave wt of Oxygen
		'16C' 	=>		16.014701,
		'17O' 	=>		16.999131,
		'18O' 	=>		17.999160,
		'19F' 	=>		18.998403,
		'F' 	=>	18.9984033,			# Fluorine
		'_F' 	=>	18.99846,			# Ave wt of Fluorine
		'Ne' 	=>	20.17,			# Neon
		'_Ne' 	=>	20.17,			# Ave wt of Neon
		'Na' 	=>	22.98977,			# Sodium
		'_Na' 	=>	22.98977,			# Ave wt of Sodium
		'Mg' 	=>	24.305,			# Magnesium
		'_Mg' 	=>	24.305,			# Ave wt of Magnesium
		'Al' 	=>	26.9815,			# Aluminum
		'_Al' 	=>	26.9815,			# Ave wt of Aluminum
		'28Si' 	=>		27.976927,
		'Si' 	=>	28.086,			# Silicon
		'_Si' 	=>	28.086,			# Ave wt of Silicon
		'29Si' 	=>		28.976495,
		'30Si' 	=>		29.973770,
		'31P' 	=>		30.973762,
		'P' 	=>	30.9738,			# Phosphorus
		'_P' 	=>	30.9738,			# Ave wt of Phosphorus
		'32S' 	=>		31.972070,
		'S' 	=>	31.9720718,			# Sulfur
		'_S' 	=>	32.06,			# Ave wt of Sulfur
		'33S' 	=>		32.971456,
		'34S' 	=>		33.967866,
		'35Cl' 	=>		34.968852,
		'Cl' 	=>	34.9688527,			# Chlorine
		'_Cl' 	=>	35.453,			# Ave wt of Chlorine
		'37Cl' 	=>		36.965903,
		'K' 	=>	39.1,			# Potassium
		'_K' 	=>	39.1,			# Ave wt of Potassium
		'Ar' 	=>	39.948,			# Argon
		'_Ar' 	=>	39.948,			# Ave wt of Argon
		'Ca' 	=>	40.08,			# Calcium
		'_Ca' 	=>	40.08,			# Ave wt of Calcium
		'Sc' 	=>	44.9559,			# Scandium
		'_Sc' 	=>	44.9559,			# Ave wt of Scandium
		'Ti' 	=>	47.9,			# Titanium
		'_Ti' 	=>	47.9,			# Ave wt of Titanium
		'V' 	=>	50.941,			# Vanadium
		'_V' 	=>	50.941,			# Ave wt of Vanadium
		'Cr' 	=>	51.996,			# Chromium
		'_Cr' 	=>	51.996,			# Ave wt of Chromium
		'Mn' 	=>	54.938,			# Manganese
		'_Mn' 	=>	54.938,			# Ave wt of Manganese
		'Fe' 	=>	55.847,			# Iron
		'_Fe' 	=>	55.847,			# Ave wt of Iron
		'Ni' 	=>	58.71,			# Nickel
		'_Ni' 	=>	58.71,			# Ave wt of Nickel
		'Co' 	=>	58.9332,			# Cobalt
		'_Co' 	=>	58.9332,			# Ave wt of Cobalt
		'Cu' 	=>	63.543,			# Copper
		'_Cu' 	=>	63.543,			# Ave wt of Copper
		'Zn' 	=>	65.38,			# Zinc
		'_Zn' 	=>	65.38,			# Ave wt of Zinc
		'Ga' 	=>	69.72,			# Gallium
		'_Ga' 	=>	69.72,			# Ave wt of Gallium
		'Ge' 	=>	72.59,			# Germanium
		'_Ge' 	=>	72.59,			# Ave wt of Germanium
		'As' 	=>	74.9216,			# Arsenic
		'_As' 	=>	74.9216,			# Ave wt of Arsenic
		'79Br' 	=>		78.918336,
		'Se' 	=>	78.9183361,			# Selenium
		'_Se' 	=>	78.96,			# Ave wt of Selenium
		'Br' 	=>	78.918336,			# Bromine
		'_Br' 	=>	79.904,			# Ave wt of Bromine
		'81Br' 	=>		80.916289,
		'Kr' 	=>	83.8,			# Krypton
		'_Kr' 	=>	83.8,			# Ave wt of Krypton
		'Rb' 	=>	85.467,			# Rubidium
		'_Rb' 	=>	85.467,			# Ave wt of Rubidium
		'Sr' 	=>	87.62,			# Strontium
		'_Sr' 	=>	87.62,			# Ave wt of Strontium
		'Y' 	=>	88.9059,			# Yttrium
		'_Y' 	=>	88.9059,			# Ave wt of Yttrium
		'Zr' 	=>	91.22,			# Zirconium
		'_Zr' 	=>	91.22,			# Ave wt of Zirconium
		'Nb' 	=>	92.9064,			# Niobium
		'_Nb' 	=>	92.9064,			# Ave wt of Niobium
		'Mo' 	=>	95.94,			# Molybdenum
		'_Mo' 	=>	95.94,			# Ave wt of Molybdenum
		'Tc' 	=>	98.9062,			# Technetium
		'_Tc' 	=>	98.9062,			# Ave wt of Technetium
		'Ru' 	=>	101.07,			# Ruthenium
		'_Ru' 	=>	101.07,			# Ave wt of Ruthenium
		'Rh' 	=>	102.9055,			# Rhodium
		'_Rh' 	=>	102.9055,			# Ave wt of Rhodium
		'Pd' 	=>	106.4,			# Palladium
		'_Pd' 	=>	106.4,			# Ave wt of Palladium
		'Ag' 	=>	107.868,			# Silver
		'_Ag' 	=>	107.868,			# Ave wt of Silver
		'Cd' 	=>	112.4,			# Cadmium
		'_Cd' 	=>	112.4,			# Ave wt of Cadmium
		'In' 	=>	114.82,			# Indium
		'_In' 	=>	114.82,			# Ave wt of Indium
		'Sn' 	=>	118.69,			# Tin
		'_Sn' 	=>	118.69,			# Ave wt of Tin
		'Sb' 	=>	121.75,			# Antimony
		'_Sb' 	=>	121.75,			# Ave wt of Antimony
		'127I' 	=>		126.904473,
		'I' 	=>	126.9045,			# Iodine
		'_I' 	=>	126.9045,			# Ave wt of Iodine
		'Te' 	=>	127.6,			# Tellurium
		'_Te' 	=>	127.6,			# Ave wt of Tellurium
		'Xe' 	=>	131.3,			# Xenon
		'_Xe' 	=>	131.3,			# Ave wt of Xenon
		'Cs' 	=>	132.9055,			# Cesium
		'_Cs' 	=>	132.9055,			# Ave wt of Cesium
		'Ba' 	=>	137.34,			# Barium
		'_Ba' 	=>	137.34,			# Ave wt of Barium
		'La' 	=>	138.9055,			# Lanthanum
		'_La' 	=>	138.9055,			# Ave wt of Lanthanum
		'Ce' 	=>	140.12,			# Cerium
		'_Ce' 	=>	140.12,			# Ave wt of Cerium
		'Pr' 	=>	140.9077,			# Praseodymium
		'_Pr' 	=>	140.9077,			# Ave wt of Praseodymium
		'Nd' 	=>	144.24,			# Neodymium
		'_Nd' 	=>	144.24,			# Ave wt of Neodymium
		'Pm' 	=>	145.,			# Promethium
		'_Pm' 	=>	145.,			# Ave wt of Promethium
		'Sm' 	=>	150.4,			# Samarium
		'_Sm' 	=>	150.4,			# Ave wt of Samarium
		'Eu' 	=>	151.96,			# Europium
		'_Eu' 	=>	151.96,			# Ave wt of Europium
		'Gd' 	=>	157.25,			# Gadolinium
		'_Gd' 	=>	157.25,			# Ave wt of Gadolinium
		'Tb' 	=>	158.9254,			# Terbium
		'_Tb' 	=>	158.9254,			# Ave wt of Terbium
		'Dy' 	=>	162.5,			# Dysprosium
		'_Dy' 	=>	162.5,			# Ave wt of Dysprosium
		'Ho' 	=>	164.9303,			# Holmium
		'_Ho' 	=>	164.9303,			# Ave wt of Holmium
		'Er' 	=>	167.26,			# Erbium
		'_Er' 	=>	167.26,			# Ave wt of Erbium
		'Tm' 	=>	168.9342,			# Thulium
		'_Tm' 	=>	168.9342,			# Ave wt of Thulium
		'Yb' 	=>	173.04,			# Ytterbium
		'_Yb' 	=>	173.04,			# Ave wt of Ytterbium
		'Lu' 	=>	174.97,			# Lutetium
		'_Lu' 	=>	174.97,			# Ave wt of Lutetium
		'Hf' 	=>	178.49,			# Hafnium
		'_Hf' 	=>	178.49,			# Ave wt of Hafnium
		'Ta' 	=>	180.947,			# Tantalum
		'_Ta' 	=>	180.947,			# Ave wt of Tantalum
		'W' 	=>	183.85,			# Tungsten
		'_W' 	=>	183.85,			# Ave wt of Tungsten
		'Re' 	=>	186.2,			# Rhenium
		'_Re' 	=>	186.2,			# Ave wt of Rhenium
		'Os' 	=>	190.2,			# Osmium
		'_Os' 	=>	190.2,			# Ave wt of Osmium
		'Ir' 	=>	192.22,			# Iridium
		'_Ir' 	=>	192.22,			# Ave wt of Iridium
		'Pt' 	=>	195.09,			# Platinum
		'_Pt' 	=>	195.09,			# Ave wt of Platinum
		'Au' 	=>	196.9665,			# Gold
		'_Au' 	=>	196.9665,			# Ave wt of Gold
		'Hg' 	=>	200.59,			# Mercury
		'_Hg' 	=>	200.59,			# Ave wt of Mercury
		'Tl' 	=>	204.37,			# Thallium
		'_Tl' 	=>	204.37,			# Ave wt of Thallium
		'Pb' 	=>	207.2,			# Lead
		'_Pb' 	=>	207.2,			# Ave wt of Lead
		'Bi' 	=>	208.9806,			# Bismuth
		'_Bi' 	=>	208.9806,			# Ave wt of Bismuth
		'Po' 	=>	209.,			# Polonium
		'_Po' 	=>	209.,			# Ave wt of Polonium
		'At' 	=>	210.,			# Astatine
		'_At' 	=>	210.,			# Ave wt of Astatine
		'Rn' 	=>	222.,			# Radon
		'_Rn' 	=>	222.,			# Ave wt of Radon
		'Fr' 	=>	223.,			# Francium
		'_Fr' 	=>	223.,			# Ave wt of Francium
		'Ra' 	=>	226.,			# Radium
		'_Ra' 	=>	226.,			# Ave wt of Radium
		'Ac' 	=>	227.,			# Actinium
		'_Ac' 	=>	227.,			# Ave wt of Actinium
		'Pa' 	=>	231.0359,			# Protactinium
		'_Pa' 	=>	231.0359,			# Ave wt of Protactinium
		'Th' 	=>	232.0381,			# Thorium
		'_Th' 	=>	232.0381,			# Ave wt of Thorium
		'Np' 	=>	237.0408,			# Neptunium
		'_Np' 	=>	237.0408,			# Ave wt of Neptunium
		'U' 	=>	238.029,			# Uranium
		'_U' 	=>	238.029,			# Ave wt of Uranium
		'Am' 	=>	243.,			# Americium
		'_Am' 	=>	243.,			# Ave wt of Americium
		'Pu' 	=>	244.,			# Plutonium
		'_Pu' 	=>	244.,			# Ave wt of Plutonium
		'Bk' 	=>	247.,			# Berkelium
		'Cm' 	=>	247.,			# Curium
		'_Bk' 	=>	247.,			# Ave wt of Berkelium
		'_Cm' 	=>	247.,			# Ave wt of Curium
		'Cf' 	=>	251.,			# Californium
		'_Cf' 	=>	251.,			# Ave wt of Californium
		'Es' 	=>	254.09,			# Einsteinium
		'_Es' 	=>	254.09,			# Ave wt of Einsteinium
		'Fm' 	=>	257.08,			# Fermium
		'_Fm' 	=>	257.08,			# Ave wt of Fermium
		'Md' 	=>	258.098,			# Mendelevium
		'_Md' 	=>	258.098,			# Ave wt of Mendelevium
		'No' 	=>	259.10,			# Nobelium
		'_No' 	=>	259.10,			# Ave wt of Nobelium
		'Lr' 	=>	260.1,			# Lawrencium
		'_Lr' 	=>	260.1,			# Ave wt of Lawrencium
	);


#//////////////////////////////////////////////////////////////////////////////#
#	
#	calc_molweight_of_smiles
#	 
#		This  routine  reads  a  smiles  and  a  type,  which  is   either
#		'ave[rage]'  or  'exact',  and  calculates  either   the   average
#		molecular weight or monoisotopic molecular weight, accordingly.
#	
#	usage: $mw = calc_molweight_of_smiles($smiles, 'exact'|'ave');
#	
#//////////////////////////////////////////////////////////////////////////////#

sub calc_molweight_of_smiles {
	my $smiles = shift;
	my $type = shift;
	my $molh;				# molecule object created by slurping in smiles
	my $mw = 0.0;			# resulting molecular weight
	my $atomstream;
	my $atomh;
	my $atom_number;
	my $atom_weight;
	my $symbol;				# atomic symbol
	my $ave;				# boolean for faster decisions in loop
	
	$ave = ($type =~ /exact/i) ? $FALSE : $TRUE;
	
	$molh = dt_smilin($smiles);
	
	return 0.0 if $molh == NULL_OB;

	$atomstream = dt_stream($molh, TYP_ATOM);

	while (($atomh = dt_next($atomstream)) != NULL_OB) {

		$symbol = dt_symbol($atomh);	

		if ($ave == $TRUE) {
			$mw += $ATOMIC_MASS{"_$symbol"};	# add in ave mass of atom
			$mw += dt_imp_hcount($atomh) * $ATOMIC_MASS{_H};
			
		} else {
			$atom_number = dt_number($atomh);
			$atom_weight = dt_weight($atomh);

			if ($atom_weight == 0) {
				$mw += $ATOMIC_MASS{$symbol};	
			} else {
				$mw += $ATOMIC_MASS{"$atom_weight$symbol"};	
			}	
			$mw += dt_imp_hcount($atomh) * $ATOMIC_MASS{H};
		}
	}
	dt_dealloc($atomstream);
	dt_dealloc($molh);
	
	return $mw;		# return calculated atom mass
}
