Hi Stephanie, It seems like the problem is with the SMILESReader. Where your code has :
SMILESReader sr = new SMILESReader(new StringReader(structure)); MoleculeSet set = (MoleculeSet) sr.read(builder.newMoleculeSet()); mol = set.getMolecule(0); you could use instead: SmilesParser parser = new SmilesParser(builder); parser.setPreservingAromaticity(true); mol = parser.parseSmiles(structure); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); which will preserve the double-bond structure in your smiles. The last method call is necessary for the hydrogen adder. gilleain On 10/3/11, Stephanie Canny <[email protected]> wrote: > I saw the note in the SMARTSQueryTool saying that because of > CDKHueckelAromaticityDetector<http://pele.farmbio.uu.se/nightly-1.3.1/cdk-javadoc-1.4.0/org/openscience/cdk/aromaticity/CDKHueckelAromaticityDetector.html>, > the query tool might not be able to properly detect patterns in polycyclic > molecules. But version 1.0.3 does seem to be able to detect correct > patterns in polycyclic molecules. Can someone please explain the > discrepancy?? > > Thanks > > From: Stephanie Canny [mailto:[email protected]] > Sent: Friday, September 30, 2011 2:26 PM > To: [email protected] > Subject: [Cdk-user] UniversalIsomorphismTester differences between CDK 1.0.3 > and 1.4.4 > > We are experiencing different results using the UniversalIsomorphismTester > between CDK versions 1.0.3 and 1.4.4, particularly with aromatic groups. If > the test code is correct then it is possible that version 1.4.4 is wrong. > The test code and output is below. > > public static void main(String[] args) { > String[] fgSmiles = { "O=C1C=CC=CC1=O", "O=C1C=CC(=O)C=C1", > "ClC=N" }; > // SIDs 26755257, 29215022 > String[] testSmiles = { > > "CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)CO)O)[NH3+])O", > > "CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O", > > "CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O.Cl", > > "CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)CO)O)N)O.Cl", > > "CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O", > "C1CC2=CC=CC=C2C1NC3=C(C(=O)C4=C(C3=O)N=CC=C4)Cl", > > "CC1(OCC(O1)CSC2=C(C(=O)C3=C(C2=O)C=CC=N3)SCC4COC(O4)(C)C)C", > > "CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)CO)O)N)O", > "CC1COC2=C1C(=O)C(=O)C3=C2C=CC4=C3CCCC4(C)C", > "C1=CC=C2C(=C1)C=CC3=C2C=CC4=C3C(=O)C=CC4=O", > "CC1=CC(=O)C2=C(C1=O)C=CC=C2O", > > "CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O", > > "C1=CC=C2C(=C1)C(=O)C3=C(C2=O)C(=C(C=C3NC4=CC(=C(C=C4)NC5=NC(=NC(=N5)NC6=CC=C(C=C6)S(=O)(=O)[O-])Cl)S(=O)(=O)[O-])S(=O)(=O)[O-])N.[Na+].[Na+].[Na+]", > > "CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5O)O)(C(=O)C)O)N)O", > > "C1=CC=C2C(=C1)C(=O)C3=C(C2=O)C(=C(C=C3NC4=CC(=C(C=C4)NC5=NC(=NC(=N5)NC6=CC=C(C=C6)S(=O)(=O)[O-])Cl)S(=O)(=O)[O-])S(=O)(=O)[O-])N", > > "C1=CC(=C(C=C1CN=C(N)NC(=O)C2=C(N=C(C(=N2)Cl)N)N)Cl)Cl.Cl", > "C1=CC2=C(C(=O)C=CC2=O)C(=C1)O" }; > try { > > System.out.print("Smiles\t"); > for (String group : fgSmiles) { > System.out.print(group + "\t"); > } > System.out.println(); > for (String smiles : testSmiles) { > System.out.print(smiles); > IMolecule mol1 = getMolecule(smiles); > for (String ss : fgSmiles) { > IMolecule mol2 = getMolecule(ss); > System.out.print("\t" + > UniversalIsomorphismTester.isSubgraph(mol1, mol2) ); > } > System.out.println(); > } > > } catch (Exception e) { > // TODO Auto-generated catch block > e.printStackTrace(); > } > } > > /* CDK Version 1.4.4 */ > public static IMolecule getMolecule(String structure) throws > CDKException, IOException { > IMolecule mol = null; > IChemObjectBuilder builder = > DefaultChemObjectBuilder.getInstance(); > > SMILESReader sr = new SMILESReader(new StringReader(structure)); > MoleculeSet set = (MoleculeSet) sr.read(builder.newInstance(new > MoleculeSet().getClass())); > mol = set.getMolecule(0); > > CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(builder); > adder.addImplicitHydrogens(mol); > > return mol; > } > > /* CDK Version 1.0.3 */ > public static IMolecule getMolecule(String structure) throws > CDKException, IOException { > IMolecule mol = null; > DefaultChemObjectBuilder builder = > DefaultChemObjectBuilder.getInstance(); > SMILESReader sr = new SMILESReader(new StringReader(structure)); > MoleculeSet set = (MoleculeSet) > sr.read(builder.newMoleculeSet()); > mol = set.getMolecule(0); > > HydrogenAdder adder = new HydrogenAdder(); > adder.addImplicitHydrogensToSatisfyValency(mol); > > return mol; > } > > > CDK 1.4.4 > > CDK 1.0.3 > > CDK 1.4.4 > > CDK 1.0.3 > > CDK 1.4.4 > > CDK 1.0.3 > > Smiles > > O=C1C=CC(=O)C=C1 > > O=C1C=CC(=O)C=C1 > > O=C1C=CC=CC1=O > > O=C1C=CC=CC1=O > > ClC=N > > ClC=N > > CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)CO)O)[NH3+])O > > FALSE > > TRUE > > CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O > > FALSE > > TRUE > > CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O.Cl > > FALSE > > TRUE > > CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)CO)O)N)O.Cl > > FALSE > > TRUE > > CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O > > FALSE > > TRUE > > C1CC2=CC=CC=C2C1NC3=C(C(=O)C4=C(C3=O)N=CC=C4)Cl > > FALSE > > TRUE > > CC1(OCC(O1)CSC2=C(C(=O)C3=C(C2=O)C=CC=N3)SCC4COC(O4)(C)C)C > > FALSE > > TRUE > > CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)CO)O)N)O > > FALSE > > TRUE > > C1=CC=C2C(=C1)C=CC3=C2C=CC4=C3C(=O)C=CC4=O > > FALSE > > TRUE > > CC1=CC(=O)C2=C(C1=O)C=CC=C2O > > FALSE > > TRUE > > CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O > > FALSE > > TRUE > > CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5O)O)(C(=O)C)O)N)O > > FALSE > > TRUE > > C1=CC2=C(C(=O)C=CC2=O)C(=C1)O > > FALSE > > TRUE > > CC1COC2=C1C(=O)C(=O)C3=C2C=CC4=C3CCCC4(C)C > > FALSE > > TRUE > > C1=CC=C2C(=C1)C(=O)C3=C(C2=O)C(=C(C=C3NC4=CC(=C(C=C4)NC5=NC(=NC(=N5)NC6=CC=C(C=C6)S(=O)(=O)[O-])Cl)S(=O)(=O)[O-])S(=O)(=O)[O-])N.[Na+].[Na+].[Na+] > > FALSE > > TRUE > > C1=CC=C2C(=C1)C(=O)C3=C(C2=O)C(=C(C=C3NC4=CC(=C(C=C4)NC5=NC(=NC(=N5)NC6=CC=C(C=C6)S(=O)(=O)[O-])Cl)S(=O)(=O)[O-])S(=O)(=O)[O-])N > > FALSE > > TRUE > > C1=CC(=C(C=C1CN=C(N)NC(=O)C2=C(N=C(C(=N2)Cl)N)N)Cl)Cl.Cl > > FALSE > > TRUE > > > > ------------------------------------------------------------------------------ All the data continuously generated in your IT infrastructure contains a definitive record of customers, application performance, security threats, fraudulent activity and more. Splunk takes this data and makes sense of it. Business sense. IT sense. Common sense. http://p.sf.net/sfu/splunk-d2dcopy1 _______________________________________________ Cdk-user mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/cdk-user

