Hi All,
I've been trying to calculate BCUT2D descriptors in parallel with Dask and
get this error with the code below.
TypeError: cannot pickle 'Boost.Python.function' object
Everything works if I call mw_df, which calculates molecular weight, but I
get the error above if I call bcut_df. Does anyone have a workaround?
Thanks,
Pat
#!/usr/bin/env python
import sys
import dask.dataframe as dd
import pandas as pd
from rdkit import Chem
from rdkit.Chem.Descriptors import MolWt
from rdkit.Chem.rdMolDescriptors import BCUT2D
import time
# -- molecular weight functions
def calc_mw(smi):
mol = Chem.MolFromSmiles(smi)
return MolWt(mol)
def mw_df(df):
return df.SMILES.apply(calc_mw)
# -- bcut functions
def bcut_df(df):
return df.apply(calc_bcut)
def calc_bcut(smi):
mol = Chem.MolFromSmiles(smi)
return BCUT2D(mol)
def main():
start = time.time()
df = pd.read_csv(sys.argv[1],sep=" ",names=["SMILES","Name"])
ddf = dd.from_pandas(df,npartitions=16)
ddf['MW'] =
ddf.map_partitions(mw_df,meta='float').compute(scheduler='processes')
ddf['BCUT'] =
ddf.map_partitions(bcut_df,meta='float').compute(scheduler='processes')
print(time.time()-start)
print(ddf.head())
if __name__ == "__main__":
main()
_______________________________________________
Rdkit-discuss mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss