Hi Gustavo,

you should be able to address this with a custom AtomCompare (and
BondCompare, if you want to use bond queries too) class, that now is also
supported from Python.
You can take a look at Code/GraphMol/FMCS/Wrap/testFMCS.py for inspiration
how to use it; here's something that seems to work for your example:

from rdkit import Chem
from rdkit.Chem import rdFMCS

template =
Chem.MolFromSmarts('[a]1(-[S](-*)(=[O])=[O]):[a]:[a]:[a]:[a]:[a]:1')
# This should give a sulfone connected to an aromatic ring and
# some other (any) element. Notice that the ring may have
# any atoms (N,C,O), but for me it is important to have the SO2 group.

template
[image: image.png]

mol1 = Chem.MolFromSmiles('CS(=O)(=O)c1ccc(C2=C(c3ccccc3)CCN2)cc1')
# This molecule has the pattern.

mol1
[image: image.png]

compare = [template, mol1]
res = rdFMCS.FindMCS(compare,
                atomCompare=rdFMCS.AtomCompare.CompareElements,
                bondCompare=rdFMCS.BondCompare.CompareAny,
                ringMatchesRingOnly=False,
                completeRingsOnly=False)
res.smartsString
# gives: '[#16](=[#8])=[#8]'

# Let's address the problem with a custom AtomCompare class:

class CompareQueryAtoms(rdFMCS.MCSAtomCompare):
    def __call__(self, p, mol1, atom1, mol2, atom2):
        a1 = mol1.GetAtomWithIdx(atom1)
        a2 = mol2.GetAtomWithIdx(atom2)
        if ((not a1.HasQuery()) and (not a2.HasQuery()) and
a1.GetAtomicNum() != a2.GetAtomicNum()):
            return False
        if (p.MatchValences and a1.GetTotalValence() !=
a2.GetTotalValence()):
            return False
        if (p.MatchChiralTag and not self.CheckAtomChirality(p, mol1,
atom1, mol2, atom2)):
            return False
        if (p.MatchFormalCharge and (not a1.HasQuery()) and (not
a2.HasQuery()) and not self.CheckAtomCharge(p, mol1, atom1, mol2, atom2)):
            return False
        if p.RingMatchesRingOnly:
            return self.CheckAtomRingMatch(p, mol1, atom1, mol2, atom2)
        if ((a1.HasQuery() or a2.HasQuery()) and (not a1.Match(a2))):
            return False
        return True

params = rdFMCS.MCSParameters()
params.AtomCompareParameters.RingMatchesRingOnly = False
params.BondCompareParameters.RingMatchesRingOnly = False
params.AtomCompareParameters.CompleteRingsOnly = False
params.BondCompareParameters.CompleteRingsOnly = False
params.BondTyper = rdFMCS.BondCompare.CompareAny
params.AtomTyper = CompareQueryAtoms()

compare = [template, mol1]
res = rdFMCS.FindMCS(compare, params)
res.smartsString

'[#16](-[#0,#6])(=[#8])(=[#8])-[#0,#6]1:[#0,#6]:[#0,#6]:[#0,#6]:[#0,#6]:[#0,#6]:1'


# the queryMol returned by MCS will match the template, but the
original template query

# has many more details, so we extract the MCS part of the original
template and use that

# as query instead

def trim_template(template, query):
    template_mcs_core = Chem.ReplaceSidechains(template, query)
    for a in template_mcs_core.GetAtoms():
        if (not a.GetAtomicNum()) and a.GetIsotope():
            a.SetAtomicNum(1)
            a.SetIsotope(0)
    return Chem.RemoveAllHs(template_mcs_core)


query_mol = trim_template(template, res.queryMol)
template.GetSubstructMatch(query_mol)

(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)


# here there seems to a be a bug with the 2D depiction, but that's another story

template

[image: image.png]

mol1.GetSubstructMatches(query_mol)

((4, 1, 0, 2, 3, 5, 6, 7, 19, 20),)


mol1

[image: image.png]


mol2 = Chem.MolFromSmiles('Cc1ccc(C2=CCNC2c2ccc(C(C)(F)F)nc2)nn1')
compare = [template, mol2]


mol2

[image: image.png]


res = rdFMCS.FindMCS(compare, params)
res.smartsString

'[#0,#6]1:[#0,#6]:[#0,#6]:[#0,#6]:[#0,#7]:[#0,#7]:1'


query_mol = trim_template(template, res.queryMol)

query_mol

[image: image.png]


mol2.GetSubstructMatches(query_mol)

((1, 2, 3, 4, 20, 21), (10, 11, 12, 13, 18, 19))


mol2

[image: image.png]


I hope the above helps, cheers

p.


On Thu, Jul 22, 2021 at 7:45 PM Gustavo Seabra <gustavo.sea...@gmail.com>
wrote:

> Hi all,,
>
> I would appreciate some pointers on how it would be possible to find the
> maximum common substructure of 2 molecules, where in the template structure
> some atoms may be *any*, but some other atoms must be fixed.
>
> Currently, I'm trying to use rdFMCS module. For example:
>
> from rdkit import Chem
> from rdkit.Chem import rdFMCS
>
> template =
> Chem.MolFromSmarts('[a]1(-[S](-*)(=[O])=[O]):[a]:[a]:[a]:[a]:[a]:1')
> # This should give a sulfone connected to an aromatic ring and
> # some other (any) element. Notice that the ring may have
> # any atoms (N,C,O), but for me it is important to have the SO2 group.
>
> mol1 = Chem.MolFromSmiles('CS(=O)(=O)c1ccc(C2=C(c3ccccc3)CCN2)cc1')
> # This molecule has the pattern.
>
> # Now, if I try to find a substructure match, I use:
> compare = [template, mol1]
> res = rdFMCS.FindMCS(compare,
>                 atomCompare=rdFMCS.AtomCompare.CompareElements,
>                 bondCompare=rdFMCS.BondCompare.CompareAny,
>                 ringMatchesRingOnly=False,
>                 completeRingsOnly=False)
> res.smartsString
> # gives: '[#16](=[#8])=[#8]'
>
> # Notice that the only match is the SO2, it does not match the ring.
> However, if I try that with another structure that has a CF3 in place of
> the SO2, I get:
> mol2 = Chem.MolFromSmiles('Cc1ccc(C2=CCNC2c2ccc(C(C)(F)F)nc2)nn1')
> compare = [template,mol2]
> res = rdFMCS.FindMCS(compare,
>                 atomCompare=rdFMCS.AtomCompare.CompareElements,
>                 bondCompare=rdFMCS.BondCompare.CompareAny,
>                 ringMatchesRingOnly=False,
>                 completeRingsOnly=False)
> res.smartsString
> # Returns: '' (empty string)
>
> # if I change to AtomCompare.CompareAny, now a CF3 will also match
> # in the SO2-X:
> mol2 = Chem.MolFromSmiles('Cc1ccc(C2=CCNC2c2ccc(C(C)(F)F)nc2)nn1')
> compare = [template,mol2]
> res = rdFMCS.FindMCS(compare,
>                 atomCompare=rdFMCS.AtomCompare.CompareAny,
>                 bondCompare=rdFMCS.BondCompare.CompareAny,
>                 ringMatchesRingOnly=False,
>                 completeRingsOnly=False)
> res.smartsString
> # Returns:
> '[#16,#6](-[#0,#6])(=,-[#8,#9])(=,-[#8,#9])-[#0,#6]1:[#0,#6]:[#0,#6]:[#0,#6]:[#0,#6]:[#0,#7]:1'
>
> But now theCF3 is counted in place of the SO2. The result I'd like to get
> here would be just the ring, as in the case:
> new_template = Chem.MolFromSmarts('CS(=O)(=O)c1cnccc1')
> mol2 = Chem.MolFromSmiles('Cc1ccc(C2=CCNC2c2ccc(C(C)(F)F)nc2)nn1')
> compare = [new_template,mol2]
> res = rdFMCS.FindMCS(compare,
>                 atomCompare=rdFMCS.AtomCompare.CompareElements,
>                 bondCompare=rdFMCS.BondCompare.CompareAny,
>                 ringMatchesRingOnly=False,
>                 completeRingsOnly=False)
> res.smartsString
> # Returns: '[#6]1:[#6]:[#7]:[#6]:[#6]:[#6]:1' (just the ring)
>
> Notice that if I use CompareElements, there seems to be no way to match
> the ring with either N or C.
>
> Does anyone have a suggestion on how I can specify flexibility (similar to
> AtomCompare.CompareAny) only for a portion of the molecule and still
> enforce specific atoms in another portion?
>
> Thank you so much!
> --
> Gustavo Seabra.
> _______________________________________________
> Rdkit-discuss mailing list
> Rdkit-discuss@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/rdkit-discuss
>
_______________________________________________
Rdkit-discuss mailing list
Rdkit-discuss@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rdkit-discuss

Reply via email to