Hi all,

since the new release I'm experiencing problems with exact structure search in 
the cartridge. If an index is defined on the mol column exact structure search 
( @= ) doesn't work (i.e. yields no results). I tried it with rdkit compiled 
from source under CentOS 6.5 and with the RPMs from Gianluca Sforna for Fedora 
20. In both cases postgres 9.3 was used. Can anyone confirm this or am I 
missing something?

Here's what I've done (based on the emolecules example from the docs):

RDKit 2014.09 (not working):
#####################
[moe@localhost db]$ createdb emolecules
[moe@localhost db]$ psql -c 'create extension rdkit' emolecules
CREATE EXTENSION
[moe@localhost db]$ psql -c 'SELECT rdkit_version()' emolecules
rdkit_version
---------------
0.73.0
(1 row)

[moe@localhost db]$ wget 
http://downloads.emolecules.com/free/2014-10-01/version.smi.gz
[...]
2014-10-31 10:52:29 (1,08 MB/s) - 'version.smi.gz' saved [88871202/88871202]

[moe@localhost db]$ psql -c 'create table raw_data (id SERIAL, smiles text, 
emol_id integer, parent_id integer)' emolecules
CREATE TABLE
[moe@localhost db]$ zcat version.smi.gz | sed '1d; s/\\/\\\\/g' | psql -c "copy 
raw_data (smiles,emol_id,parent_id) from stdin with delimiter ' '" emolecules
[moe@localhost db]$ psql emolecules
psql (9.3.5)
Type "help" for help.

emolecules=# SELECT * INTO mols FROM (SELECT 
id,mol_from_smiles(smiles::cstring) m FROM raw_data) tmp WHERE m IS NOT null 
LIMIT 1000;
SELECT 1000
emolecules=# SELECT * FROM mols WHERE m @= Mol_From_Smiles('CCOC(=N)CC');
id  |     m
-----+------------
383 | CCOC(=N)CC
(1 row)

emolecules=# CREATE INDEX molidx ON mols USING gist(m);
CREATE INDEX
emolecules=# SELECT * FROM mols WHERE m @= Mol_From_Smiles('CCOC(=N)CC');
id | m
----+---
(0 rows)

emolecules=# DROP INDEX molidx; DROP TABLE mols; SELECT * INTO mols FROM 
(SELECT id,mol_from_smiles(smiles::cstring) m FROM raw_data) tmp WHERE m IS NOT 
null LIMIT 1000;  SELECT * FROM mols WHERE m @= Mol_From_Smiles('CCOC(=N)CC');
DROP INDEX
DROP TABLE
SELECT 1000
id  |     m
-----+------------
383 | CCOC(=N)CC
(1 row)
#####################


RDKit 2014.03 (working):
#####################
-bash-4.1$ createdb emolecules_test
-bash-4.1$ psql -c 'SELECT rdkit_version()' emolecules_test
rdkit_version
---------------
0.72.0
(1 row)

-bash-4.1$ psql -c 'create table raw_data (id SERIAL, smiles text, emol_id 
integer, parent_id integer)' emolecules_test
CREATE TABLE
-bash-4.1$ zcat version.smi.gz | sed '1d; s/\\/\\\\/g' | psql -c "copy raw_data 
(smiles,emol_id,parent_id) from stdin with delimiter ' '" emolecules_test
-bash-4.1$ psql emolecules_test
psql (9.3.4)
Type "help" for help.

emolecules_test=# SELECT * INTO mols FROM (SELECT 
id,mol_from_smiles(smiles::cstring) m FROM raw_data) tmp WHERE m IS NOT null 
LIMIT 1000;
SELECT 1000
emolecules_test=# SELECT * FROM mols WHERE m @= Mol_From_Smiles('CCOC(=N)CC');
id  |     m
-----+------------
383 | CCOC(=N)CC
(1 row)

emolecules_test=# CREATE INDEX molidx ON mols USING gist(m);
CREATE INDEX
emolecules_test=# SELECT * FROM mols WHERE m @= Mol_From_Smiles('CCOC(=N)CC');
id  |     m
-----+------------
383 | CCOC(=N)CC
(1 row)

emolecules_test=# DROP INDEX molidx; DROP TABLE mols; SELECT * INTO mols FROM 
(SELECT id,mol_from_smiles(smiles::cstring) m FROM raw_data) tmp WHERE m IS NOT 
null LIMIT 1000;  SELECT * FROM mols WHERE m @= Mol_From_Smiles('CCOC(=N)CC');
DROP INDEX
DROP TABLE
SELECT 1000
id  |     m
-----+------------
383 | CCOC(=N)CC
(1 row)
#####################



Best,
Daniel

------------------------------------------------------------------------------
_______________________________________________
Rdkit-discuss mailing list
Rdkit-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to