I'm trying RDKit to calculate 3D descriptors, but I get significant different descriptors if I read molecules from a SMILES file (and clean/optimize the 3D structure before calculating the descriptors) or if I read the SDF file obtained from exactly the same SMILES file using exactly the same code to optimize the structures.
Scripts attached. Running smiltodesc_check.py produces descr_myfile.txt Running gen3D_check.py and then descr_from_sdf_check.py produces myfile_descr.txt But the two files are significantly different. Why aren't they the same? Which is wrong? JSousa
myfile.smi
Description: application/smil
import rdkit
from rdkit import Chem
from rdkit.Chem import Descriptors, rdMolDescriptors
from rdkit.ML.Descriptors import MoleculeDescriptors
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem import AllChem
md = rdMolStandardize.MetalDisconnector()
lfc = rdMolStandardize.LargestFragmentChooser(preferOrganic=True)
u = rdMolStandardize.Uncharger()
suppl = Chem.rdmolfiles.SmilesMolSupplier('myfile.smi', nameColumn=-1, titleLine=False)
foutput = open("descr_myfile.txt", "w")
foutput.close()
for mol in suppl:
mol = md.Disconnect(mol)
mol = lfc.choose(mol)
mol = u.uncharge(mol)
rdMolStandardize.Cleanup(mol)
mol = Chem.AddHs(mol)
Chem.SanitizeMol(mol)
AllChem.EmbedMolecule(mol,useRandomCoords=True)
AllChem.MMFFOptimizeMolecule(mol,'MMFF94s',maxIters=5000)
descriptorsRDF=rdMolDescriptors.CalcRDF(mol)
descriptorsMORSE=rdMolDescriptors.CalcMORSE(mol)
descriptorsMW=rdMolDescriptors.CalcExactMolWt(mol)
descriptorsWHIM=rdMolDescriptors.CalcWHIM(mol)
descriptorsAUTOCORR3D=rdMolDescriptors.CalcAUTOCORR3D(mol)
descriptorsGETAWAY=rdMolDescriptors.CalcGETAWAY(mol)
descriptorsPEOE=rdMolDescriptors.PEOE_VSA_(mol)
descriptorsSMR=rdMolDescriptors.SMR_VSA_(mol)
foutput = open("descr_myfile.txt", "a")
for item in descriptorsRDF:
foutput.write("%s," % item)
for item in descriptorsMORSE:
foutput.write("%s," % item)
for item in descriptorsWHIM:
foutput.write("%s," % item)
for item in descriptorsAUTOCORR3D:
foutput.write("%s," % item)
for item in descriptorsGETAWAY:
foutput.write("%s," % item)
for item in descriptorsPEOE:
foutput.write("%s," % item)
for item in descriptorsSMR:
foutput.write("%s," % item)
foutput.write("{:f},".format(descriptorsMW))
mol = Chem.RemoveHs(mol)
foutput.write(Chem.MolToSmiles(mol))
foutput.write("\n")
foutput.close()
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.MolStandardize import rdMolStandardize
md = rdMolStandardize.MetalDisconnector()
lfc = rdMolStandardize.LargestFragmentChooser(preferOrganic=True)
u = rdMolStandardize.Uncharger()
suppl = Chem.rdmolfiles.SmilesMolSupplier('myfile.smi', nameColumn=-1, titleLine=False)
w = Chem.SDWriter('myfile_3D.sdf')
for mol in suppl:
mol = md.Disconnect(mol)
mol = lfc.choose(mol)
mol = u.uncharge(mol)
rdMolStandardize.Cleanup(mol)
mol = Chem.AddHs(mol)
Chem.SanitizeMol(mol)
AllChem.EmbedMolecule(mol,useRandomCoords=True)
AllChem.MMFFOptimizeMolecule(mol,'MMFF94s',maxIters=5000)
w.write(mol)
import rdkit
from rdkit import Chem
from rdkit.Chem import Descriptors, rdMolDescriptors
from rdkit.ML.Descriptors import MoleculeDescriptors
suppl = Chem.SDMolSupplier('myfile_3D.sdf')
foutput = open("myfile_descr.txt", "w")
foutput.close()
for mol in suppl:
descriptorsRDF=rdMolDescriptors.CalcRDF(mol)
descriptorsMORSE=rdMolDescriptors.CalcMORSE(mol)
descriptorsMW=rdMolDescriptors.CalcExactMolWt(mol)
descriptorsWHIM=rdMolDescriptors.CalcWHIM(mol)
descriptorsAUTOCORR3D=rdMolDescriptors.CalcAUTOCORR3D(mol)
descriptorsGETAWAY=rdMolDescriptors.CalcGETAWAY(mol)
descriptorsPEOE=rdMolDescriptors.PEOE_VSA_(mol)
descriptorsSMR=rdMolDescriptors.SMR_VSA_(mol)
foutput = open("myfile_descr.txt", "a")
for item in descriptorsRDF:
foutput.write("%s," % item)
for item in descriptorsMORSE:
foutput.write("%s," % item)
for item in descriptorsWHIM:
foutput.write("%s," % item)
for item in descriptorsAUTOCORR3D:
foutput.write("%s," % item)
for item in descriptorsGETAWAY:
foutput.write("%s," % item)
for item in descriptorsPEOE:
foutput.write("%s," % item)
for item in descriptorsSMR:
foutput.write("%s," % item)
foutput.write("{:f},".format(descriptorsMW))
mol = Chem.RemoveHs(mol)
foutput.write(Chem.MolToSmiles(mol))
foutput.write("\n")
foutput.close()
_______________________________________________ Rdkit-discuss mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

