Dear RDKit Developers
,
First and foremost thank you for your excellent offering of a singularly
useful set of molecular descriptors!
As one of the authors cited by Dr Labute (
http://www.chemcomp.com/journal/vsadesc.htm)
,
i've appreciated RDKit's implementation of the "Labute descriptor" set. So
i've noticed that two particular descriptors in that set, specifically
SlogP_VSA9 and SMR_VSA8, appear to always return a value of 0.0 no matter
which of a
substantial
set of small molecules are tested via:
from rdkit.Chem import Descriptors
Descriptors.SlogP_VSA9(molecule)
Descriptors.SMR_VSA8(molecule)
All other descriptors i've tried seem to produce sensible results (with
those same molecules)
Of course it's always possible that it's something i've mis-coded. I'm
attaching a simple python program which returns the anomalous descriptor
values for me.
Thank you very much for all your efforts!
--
jeff godden
#!/usr/bin/python
# output some Labute descriptors via RDKit
from rdkit import Chem
from rdkit.Chem import Descriptors
import sys
if len(sys.argv) < 2:
print("need an SDF filename on the commandline.")
sys.exit(-1)
# Labute descriptors function pointers
dl = { 'SlogP_VSA1' : Descriptors.SlogP_VSA1, 'SlogP_VSA2' : Descriptors.SlogP_VSA2,
'SlogP_VSA3' : Descriptors.SlogP_VSA3, 'SlogP_VSA4' : Descriptors.SlogP_VSA4,
'SlogP_VSA5' : Descriptors.SlogP_VSA5, 'SlogP_VSA6' : Descriptors.SlogP_VSA6,
'SlogP_VSA7' : Descriptors.SlogP_VSA7, 'SlogP_VSA8' : Descriptors.SlogP_VSA8,
'SlogP_VSA9' : Descriptors.SlogP_VSA9, 'SlogP_VSA10' : Descriptors.SlogP_VSA10,
'SlogP_VSA11' : Descriptors.SlogP_VSA11, 'SlogP_VSA12' : Descriptors.SlogP_VSA12,
'SMR_VSA1' : Descriptors.SMR_VSA1, 'SMR_VSA2' : Descriptors.SMR_VSA2,
'SMR_VSA3' : Descriptors.SMR_VSA3, 'SMR_VSA4' : Descriptors.SMR_VSA4,
'SMR_VSA5' : Descriptors.SMR_VSA5, 'SMR_VSA6' : Descriptors.SMR_VSA6,
'SMR_VSA7' : Descriptors.SMR_VSA7, 'SMR_VSA8' : Descriptors.SMR_VSA8,
'SMR_VSA9' : Descriptors.SMR_VSA9, 'SMR_VSA10' : Descriptors.SMR_VSA10,
'PEOE_VSA1' : Descriptors.PEOE_VSA1, 'PEOE_VSA2' : Descriptors.PEOE_VSA2,
'PEOE_VSA3' : Descriptors.PEOE_VSA3, 'PEOE_VSA4' : Descriptors.PEOE_VSA4,
'PEOE_VSA5' : Descriptors.PEOE_VSA5, 'PEOE_VSA6' : Descriptors.PEOE_VSA6,
'PEOE_VSA7' : Descriptors.PEOE_VSA7, 'PEOE_VSA8' : Descriptors.PEOE_VSA8,
'PEOE_VSA9' : Descriptors.PEOE_VSA9, 'PEOE_VSA10' : Descriptors.PEOE_VSA10,
'PEOE_VSA11' : Descriptors.PEOE_VSA11, 'PEOE_VSA12' : Descriptors.PEOE_VSA12,
'PEOE_VSA13' : Descriptors.PEOE_VSA13, 'PEOE_VSA14' : Descriptors.PEOE_VSA14 }
a = dl.keys()
print('{0:13s} {1:7s} '
'{2:12s} {3:12s} {4:12s} {5:12s} {6:12s} {7:12s} {8:12s} {9:12s} '
'{10:12s} {11:12s} {12:12s} {13:12s} {14:12s} {15:12s} {16:12s} '
'{17:12s} {18:12s} {19:12s} {20:12s} {21:12s} {22:12s} {23:12s} '
'{24:12s} {25:12s} {26:12s} {27:12s} {28:12s} {29:12s} {30:12s} '
'{31:12s} {32:12s} {33:12s} {34:12s} {35:12s} {36:12s} {37:12s}'.format('name', 'mw', *a))
with open(sys.argv[1], 'rb') as fin: # 'rb' rdkit wants this to be bytes *sigh*
mols = Chem.ForwardSDMolSupplier(fin)
for m in mols:
name = m.GetProp('_Name') # s
mw = Descriptors.MolWt(m) # f
a = []
for k, f in dl.items():
a.append(f(m))
print('{0:13s} {1:7.3f} '
'{2:12.6f} {3:12.6f} {4:12.6f} {5:12.6f} {6:12.6f} {7:12.6f} {8:12.6f} {9:12.6f} '
'{10:12.6f} {11:12.6f} {12:12.6f} {13:12.6f} {14:12.6f} {15:12.6f} {16:12.6f} '
'{17:12.6f} {18:12.6f} {19:12.6f} {20:12.6f} {21:12.6f} {22:12.6f} {23:12.6f} '
'{24:12.6f} {25:12.6f} {26:12.6f} {27:12.6f} {28:12.6f} {29:12.6f} {30:12.6f} '
'{31:12.6f} {32:12.6f} {33:12.6f} {34:12.6f} {35:12.6f} {36:12.6f} {37:12.6f}'.format(name, mw, *a))
# import pandas as pd
# d = pd.read_fwf('data')
# for k in d.keys():
# if 'name' not in k:
# print(k, d[k].mean())
# output from 3133 'random' ZINC database molecules
## descriptor mean
# mw 319.9400134014041
# SlogP_VSA1 9.619846179961822
# SlogP_VSA2 34.997968996171075
# SlogP_VSA3 9.793027915762599
# SlogP_VSA4 6.049321886407102
# SlogP_VSA5 25.947096066368807
# SlogP_VSA6 32.45248890906195
# SlogP_VSA7 0.7908396506062566
# SlogP_VSA8 4.296596145181884
# SlogP_VSA9 0.0
# SlogP_VSA10 4.070114919272454
# SlogP_VSA11 2.3151243225909495
# SlogP_VSA12 4.265721061263566
# SMR_VSA1 12.63901541544354
# SMR_VSA2 0.25907870899808505
# SMR_VSA3 10.056625507019914
# SMR_VSA4 2.714953877153796
# SMR_VSA5 25.574662986917733
# SMR_VSA6 19.44462612635612
# SMR_VSA7 44.14966515762617
# SMR_VSA8 0.0
# SMR_VSA9 4.360574342054924
# SMR_VSA10 15.398944225590286
# PEOE_VSA1 12.060297876515577
# PEOE_VSA2 6.086653529674543
# PEOE_VSA3 5.443402438098357
# PEOE_VSA4 2.052723992022967
# PEOE_VSA5 1.953408211550721
# PEOE_VSA6 18.40846272878091
# PEOE_VSA7