################################################################################ ## ## PACKAGE CALCMW ## ## This package contains a routine which will, given a smiles code, ## calculate either the monoisotopic or average molecular weight. ## ## Author: Alex Wong ## Date: 15 March 1996 ## ## (C) 1996 CHIRON CORPORATION ## ################################################################################ package CalcMW; use DayPerl; $TRUE = 1; $FALSE = 0; #//////////////////////////////////////////////////////////////////////////////# # # The following hash table contains the atomic masses of most atoms. # There are several masses for each atom. The ones with the plain # symbol represent the mass of the most common isotope. Those # entries with preceding numbers are isotopic values. Those # beginning with '_' are average masses. # #//////////////////////////////////////////////////////////////////////////////# %ATOMIC_MASS = ( '1H' => 1.007825, 'H' => 1.007825, # Hydrogen '_H' => 1.008, # Ave wt of Hydrogen '2H' => 2.0140, '3H' => 3.01605, 'He' => 4.0026, # Helium '_He' => 4.0026, # Ave wt of Helium 'Li' => 6.94, # Lithium '_Li' => 6.94, # Ave wt of Lithium 'Be' => 9.01218, # Beryllium '_Be' => 9.01218, # Ave wt of Beryllium 'B' => 10.81, # Boron '_B' => 10.81, # Ave wt of Boron '12C' => 12.000000, 'C' => 12.000, # Carbon '_C' => 12.011, # Ave wt of Carbon '13C' => 13.003355, '14N' => 14.003074, 'N' => 14.003074, # Nitrogen '14C' => 14.003241, '_N' => 14.0067, # Ave wt of Nitrogen '15N' => 15.000108, '15C' => 15.010599, 'O' => 15.9949146, # Oxygen '16O' => 15.994915, '_O' => 15.9994, # Ave wt of Oxygen '16C' => 16.014701, '17O' => 16.999131, '18O' => 17.999160, '19F' => 18.998403, 'F' => 18.9984033, # Fluorine '_F' => 18.99846, # Ave wt of Fluorine 'Ne' => 20.17, # Neon '_Ne' => 20.17, # Ave wt of Neon 'Na' => 22.98977, # Sodium '_Na' => 22.98977, # Ave wt of Sodium 'Mg' => 24.305, # Magnesium '_Mg' => 24.305, # Ave wt of Magnesium 'Al' => 26.9815, # Aluminum '_Al' => 26.9815, # Ave wt of Aluminum '28Si' => 27.976927, 'Si' => 28.086, # Silicon '_Si' => 28.086, # Ave wt of Silicon '29Si' => 28.976495, '30Si' => 29.973770, '31P' => 30.973762, 'P' => 30.9738, # Phosphorus '_P' => 30.9738, # Ave wt of Phosphorus '32S' => 31.972070, 'S' => 31.9720718, # Sulfur '_S' => 32.06, # Ave wt of Sulfur '33S' => 32.971456, '34S' => 33.967866, '35Cl' => 34.968852, 'Cl' => 34.9688527, # Chlorine '_Cl' => 35.453, # Ave wt of Chlorine '37Cl' => 36.965903, 'K' => 39.1, # Potassium '_K' => 39.1, # Ave wt of Potassium 'Ar' => 39.948, # Argon '_Ar' => 39.948, # Ave wt of Argon 'Ca' => 40.08, # Calcium '_Ca' => 40.08, # Ave wt of Calcium 'Sc' => 44.9559, # Scandium '_Sc' => 44.9559, # Ave wt of Scandium 'Ti' => 47.9, # Titanium '_Ti' => 47.9, # Ave wt of Titanium 'V' => 50.941, # Vanadium '_V' => 50.941, # Ave wt of Vanadium 'Cr' => 51.996, # Chromium '_Cr' => 51.996, # Ave wt of Chromium 'Mn' => 54.938, # Manganese '_Mn' => 54.938, # Ave wt of Manganese 'Fe' => 55.847, # Iron '_Fe' => 55.847, # Ave wt of Iron 'Ni' => 58.71, # Nickel '_Ni' => 58.71, # Ave wt of Nickel 'Co' => 58.9332, # Cobalt '_Co' => 58.9332, # Ave wt of Cobalt 'Cu' => 63.543, # Copper '_Cu' => 63.543, # Ave wt of Copper 'Zn' => 65.38, # Zinc '_Zn' => 65.38, # Ave wt of Zinc 'Ga' => 69.72, # Gallium '_Ga' => 69.72, # Ave wt of Gallium 'Ge' => 72.59, # Germanium '_Ge' => 72.59, # Ave wt of Germanium 'As' => 74.9216, # Arsenic '_As' => 74.9216, # Ave wt of Arsenic '79Br' => 78.918336, 'Se' => 78.9183361, # Selenium '_Se' => 78.96, # Ave wt of Selenium 'Br' => 78.918336, # Bromine '_Br' => 79.904, # Ave wt of Bromine '81Br' => 80.916289, 'Kr' => 83.8, # Krypton '_Kr' => 83.8, # Ave wt of Krypton 'Rb' => 85.467, # Rubidium '_Rb' => 85.467, # Ave wt of Rubidium 'Sr' => 87.62, # Strontium '_Sr' => 87.62, # Ave wt of Strontium 'Y' => 88.9059, # Yttrium '_Y' => 88.9059, # Ave wt of Yttrium 'Zr' => 91.22, # Zirconium '_Zr' => 91.22, # Ave wt of Zirconium 'Nb' => 92.9064, # Niobium '_Nb' => 92.9064, # Ave wt of Niobium 'Mo' => 95.94, # Molybdenum '_Mo' => 95.94, # Ave wt of Molybdenum 'Tc' => 98.9062, # Technetium '_Tc' => 98.9062, # Ave wt of Technetium 'Ru' => 101.07, # Ruthenium '_Ru' => 101.07, # Ave wt of Ruthenium 'Rh' => 102.9055, # Rhodium '_Rh' => 102.9055, # Ave wt of Rhodium 'Pd' => 106.4, # Palladium '_Pd' => 106.4, # Ave wt of Palladium 'Ag' => 107.868, # Silver '_Ag' => 107.868, # Ave wt of Silver 'Cd' => 112.4, # Cadmium '_Cd' => 112.4, # Ave wt of Cadmium 'In' => 114.82, # Indium '_In' => 114.82, # Ave wt of Indium 'Sn' => 118.69, # Tin '_Sn' => 118.69, # Ave wt of Tin 'Sb' => 121.75, # Antimony '_Sb' => 121.75, # Ave wt of Antimony '127I' => 126.904473, 'I' => 126.9045, # Iodine '_I' => 126.9045, # Ave wt of Iodine 'Te' => 127.6, # Tellurium '_Te' => 127.6, # Ave wt of Tellurium 'Xe' => 131.3, # Xenon '_Xe' => 131.3, # Ave wt of Xenon 'Cs' => 132.9055, # Cesium '_Cs' => 132.9055, # Ave wt of Cesium 'Ba' => 137.34, # Barium '_Ba' => 137.34, # Ave wt of Barium 'La' => 138.9055, # Lanthanum '_La' => 138.9055, # Ave wt of Lanthanum 'Ce' => 140.12, # Cerium '_Ce' => 140.12, # Ave wt of Cerium 'Pr' => 140.9077, # Praseodymium '_Pr' => 140.9077, # Ave wt of Praseodymium 'Nd' => 144.24, # Neodymium '_Nd' => 144.24, # Ave wt of Neodymium 'Pm' => 145., # Promethium '_Pm' => 145., # Ave wt of Promethium 'Sm' => 150.4, # Samarium '_Sm' => 150.4, # Ave wt of Samarium 'Eu' => 151.96, # Europium '_Eu' => 151.96, # Ave wt of Europium 'Gd' => 157.25, # Gadolinium '_Gd' => 157.25, # Ave wt of Gadolinium 'Tb' => 158.9254, # Terbium '_Tb' => 158.9254, # Ave wt of Terbium 'Dy' => 162.5, # Dysprosium '_Dy' => 162.5, # Ave wt of Dysprosium 'Ho' => 164.9303, # Holmium '_Ho' => 164.9303, # Ave wt of Holmium 'Er' => 167.26, # Erbium '_Er' => 167.26, # Ave wt of Erbium 'Tm' => 168.9342, # Thulium '_Tm' => 168.9342, # Ave wt of Thulium 'Yb' => 173.04, # Ytterbium '_Yb' => 173.04, # Ave wt of Ytterbium 'Lu' => 174.97, # Lutetium '_Lu' => 174.97, # Ave wt of Lutetium 'Hf' => 178.49, # Hafnium '_Hf' => 178.49, # Ave wt of Hafnium 'Ta' => 180.947, # Tantalum '_Ta' => 180.947, # Ave wt of Tantalum 'W' => 183.85, # Tungsten '_W' => 183.85, # Ave wt of Tungsten 'Re' => 186.2, # Rhenium '_Re' => 186.2, # Ave wt of Rhenium 'Os' => 190.2, # Osmium '_Os' => 190.2, # Ave wt of Osmium 'Ir' => 192.22, # Iridium '_Ir' => 192.22, # Ave wt of Iridium 'Pt' => 195.09, # Platinum '_Pt' => 195.09, # Ave wt of Platinum 'Au' => 196.9665, # Gold '_Au' => 196.9665, # Ave wt of Gold 'Hg' => 200.59, # Mercury '_Hg' => 200.59, # Ave wt of Mercury 'Tl' => 204.37, # Thallium '_Tl' => 204.37, # Ave wt of Thallium 'Pb' => 207.2, # Lead '_Pb' => 207.2, # Ave wt of Lead 'Bi' => 208.9806, # Bismuth '_Bi' => 208.9806, # Ave wt of Bismuth 'Po' => 209., # Polonium '_Po' => 209., # Ave wt of Polonium 'At' => 210., # Astatine '_At' => 210., # Ave wt of Astatine 'Rn' => 222., # Radon '_Rn' => 222., # Ave wt of Radon 'Fr' => 223., # Francium '_Fr' => 223., # Ave wt of Francium 'Ra' => 226., # Radium '_Ra' => 226., # Ave wt of Radium 'Ac' => 227., # Actinium '_Ac' => 227., # Ave wt of Actinium 'Pa' => 231.0359, # Protactinium '_Pa' => 231.0359, # Ave wt of Protactinium 'Th' => 232.0381, # Thorium '_Th' => 232.0381, # Ave wt of Thorium 'Np' => 237.0408, # Neptunium '_Np' => 237.0408, # Ave wt of Neptunium 'U' => 238.029, # Uranium '_U' => 238.029, # Ave wt of Uranium 'Am' => 243., # Americium '_Am' => 243., # Ave wt of Americium 'Pu' => 244., # Plutonium '_Pu' => 244., # Ave wt of Plutonium 'Bk' => 247., # Berkelium 'Cm' => 247., # Curium '_Bk' => 247., # Ave wt of Berkelium '_Cm' => 247., # Ave wt of Curium 'Cf' => 251., # Californium '_Cf' => 251., # Ave wt of Californium 'Es' => 254.09, # Einsteinium '_Es' => 254.09, # Ave wt of Einsteinium 'Fm' => 257.08, # Fermium '_Fm' => 257.08, # Ave wt of Fermium 'Md' => 258.098, # Mendelevium '_Md' => 258.098, # Ave wt of Mendelevium 'No' => 259.10, # Nobelium '_No' => 259.10, # Ave wt of Nobelium 'Lr' => 260.1, # Lawrencium '_Lr' => 260.1, # Ave wt of Lawrencium ); #//////////////////////////////////////////////////////////////////////////////# # # calc_molweight_of_smiles # # This routine reads a smiles and a type, which is either # 'ave[rage]' or 'exact', and calculates either the average # molecular weight or monoisotopic molecular weight, accordingly. # # usage: $mw = calc_molweight_of_smiles($smiles, 'exact'|'ave'); # #//////////////////////////////////////////////////////////////////////////////# sub calc_molweight_of_smiles { my $smiles = shift; my $type = shift; my $molh; # molecule object created by slurping in smiles my $mw = 0.0; # resulting molecular weight my $atomstream; my $atomh; my $atom_number; my $atom_weight; my $symbol; # atomic symbol my $ave; # boolean for faster decisions in loop $ave = ($type =~ /exact/i) ? $FALSE : $TRUE; $molh = dt_smilin($smiles); return 0.0 if $molh == NULL_OB; $atomstream = dt_stream($molh, TYP_ATOM); while (($atomh = dt_next($atomstream)) != NULL_OB) { $symbol = dt_symbol($atomh); if ($ave == $TRUE) { $mw += $ATOMIC_MASS{"_$symbol"}; # add in ave mass of atom $mw += dt_imp_hcount($atomh) * $ATOMIC_MASS{_H}; } else { $atom_number = dt_number($atomh); $atom_weight = dt_weight($atomh); if ($atom_weight == 0) { $mw += $ATOMIC_MASS{$symbol}; } else { $mw += $ATOMIC_MASS{"$atom_weight$symbol"}; } $mw += dt_imp_hcount($atomh) * $ATOMIC_MASS{H}; } } dt_dealloc($atomstream); dt_dealloc($molh); return $mw; # return calculated atom mass }