RDKit
Open-source cheminformatics and machine learning.
SubstructMatch.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_SUBSTRUCTMATCH_H
12 #define RD_SUBSTRUCTMATCH_H
13 
14 // std bits
15 #include <vector>
16 #include <functional>
17 #include <unordered_map>
18 #include <cstdint>
19 #include "GraphMol/StereoGroup.h"
20 #include <string>
21 
22 namespace RDKit {
23 class ROMol;
24 class Atom;
25 class Bond;
26 class ResonanceMolSupplier;
27 class MolBundle;
28 
29 //! \brief used to return matches from substructure searching,
30 //! The format is (queryAtomIdx, molAtomIdx)
31 typedef std::vector<std::pair<int, int>> MatchVectType;
32 
34  bool useChirality = false; //!< Use chirality in determining whether or not
35  //!< atoms/bonds match
36  bool useEnhancedStereo = false; //!< Use enhanced stereochemistry in
37  //!< determining whether atoms/bonds match
38  bool aromaticMatchesConjugated = false; //!< Aromatic and conjugated bonds
39  //!< match each other
40  bool useQueryQueryMatches = false; //!< Consider query-query matches, not
41  //!< just simple matches
42  bool useGenericMatchers = false; //!< Looks for generic atoms in the query
43  //!< and uses them as part of the matching
44  bool recursionPossible = true; //!< Allow recursive queries
45  bool uniquify = true; //!< uniquify (by atom index) match results
46  unsigned int maxMatches = 1000; //!< maximum number of matches to return
47  int numThreads = 1; //!< number of threads to use when multi-threading
48  //!< is possible. 0 selects the number of
49  //!< concurrent threads supported by the hardware
50  //!< negative values are added to the number of
51  //!< concurrent threads supported by the hardware
52  std::function<bool(const ROMol &mol,
53  const std::vector<unsigned int> &match)>
54  extraFinalCheck; //!< a function to be called at the end to validate a
55  //!< match
56 
58 };
59 
61  SubstructMatchParameters &params, const std::string &json);
62 
63 //! Find a substructure match for a query in a molecule
64 /*!
65  \param mol The ROMol to be searched
66  \param query The query ROMol
67  \param matchParams Parameters controlling the matching
68 
69  \return The matches, if any
70 
71 */
72 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
73  const ROMol &mol, const ROMol &query,
75 
76 //! Find all substructure matches for a query in a ResonanceMolSupplier object
77 /*!
78  \param resMolSuppl The ResonanceMolSupplier object to be searched
79  \param query The query ROMol
80  \param matchParams Parameters controlling the matching
81 
82  \return The matches, if any
83 
84 */
85 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
86  ResonanceMolSupplier &resMolSuppl, const ROMol &query,
88 
89 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
90  const MolBundle &bundle, const ROMol &query,
92 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
93  const ROMol &mol, const MolBundle &query,
95 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
96  const MolBundle &bundle, const MolBundle &query,
98 
99 //! Find a substructure match for a query
100 /*!
101  \param mol The object to be searched
102  \param query The query
103  \param matchVect Used to return the match
104  (pre-existing contents will be deleted)
105  \param recursionPossible flags whether or not recursive matches are allowed
106  \param useChirality use atomic CIP codes as part of the comparison
107  \param useQueryQueryMatches if set, the contents of atom and bond queries
108  will be used as part of the matching
109 
110  \return whether or not a match was found
111 
112 */
113 template <typename T1, typename T2>
114 bool SubstructMatch(T1 &mol, const T2 &query, MatchVectType &matchVect,
115  bool recursionPossible = true, bool useChirality = false,
116  bool useQueryQueryMatches = false) {
118  params.recursionPossible = recursionPossible;
119  params.useChirality = useChirality;
120  params.useQueryQueryMatches = useQueryQueryMatches;
121  params.maxMatches = 1;
122  std::vector<MatchVectType> matchVects = SubstructMatch(mol, query, params);
123  if (matchVects.size()) {
124  matchVect = matchVects.front();
125  } else {
126  matchVect.clear();
127  }
128  return matchVect.size() != 0;
129 };
130 
131 //! Find all substructure matches for a query
132 /*!
133  \param mol The object to be searched
134  \param query The query
135  \param matchVect Used to return the matches
136  (pre-existing contents will be deleted)
137  \param uniquify Toggles uniquification (by atom index) of the results
138  \param recursionPossible flags whether or not recursive matches are allowed
139  \param useChirality use atomic CIP codes as part of the comparison
140  \param useQueryQueryMatches if set, the contents of atom and bond queries
141  will be used as part of the matching
142  \param maxMatches The maximum number of matches that will be returned.
143  In high-symmetry cases with medium-sized molecules, it is
144  very
145  easy to end up with a combinatorial explosion in the
146  number of
147  possible matches. This argument prevents that from having
148  unintended consequences
149 
150  \return the number of matches found
151 
152 */
153 template <typename T1, typename T2>
154 unsigned int SubstructMatch(T1 &mol, const T2 &query,
155  std::vector<MatchVectType> &matchVect,
156  bool uniquify = true, bool recursionPossible = true,
157  bool useChirality = false,
158  bool useQueryQueryMatches = false,
159  unsigned int maxMatches = 1000,
160  int numThreads = 1) {
162  params.uniquify = uniquify;
163  params.recursionPossible = recursionPossible;
164  params.useChirality = useChirality;
165  params.useQueryQueryMatches = useQueryQueryMatches;
166  params.maxMatches = maxMatches;
167  params.numThreads = numThreads;
168  matchVect = SubstructMatch(mol, query, params);
169  return static_cast<unsigned int>(matchVect.size());
170 };
171 
172 // ----------------------------------------------
173 //
174 // find one match in ResonanceMolSupplier object
175 //
176 template <>
177 inline bool SubstructMatch(ResonanceMolSupplier &resMolSupplier,
178  const ROMol &query, MatchVectType &matchVect,
179  bool recursionPossible, bool useChirality,
180  bool useQueryQueryMatches) {
182  params.recursionPossible = recursionPossible;
183  params.useChirality = useChirality;
184  params.useQueryQueryMatches = useQueryQueryMatches;
185  params.maxMatches = 1;
186  std::vector<MatchVectType> matchVects =
187  SubstructMatch(resMolSupplier, query, params);
188  if (matchVects.size()) {
189  matchVect = matchVects.front();
190  } else {
191  matchVect.clear();
192  }
193  return matchVect.size() != 0;
194 }
195 
196 template <>
197 inline unsigned int SubstructMatch(ResonanceMolSupplier &resMolSupplier,
198  const ROMol &query,
199  std::vector<MatchVectType> &matchVect,
200  bool uniquify, bool recursionPossible,
201  bool useChirality, bool useQueryQueryMatches,
202  unsigned int maxMatches, int numThreads) {
204  params.uniquify = uniquify;
205  params.recursionPossible = recursionPossible;
206  params.useChirality = useChirality;
207  params.useQueryQueryMatches = useQueryQueryMatches;
208  params.maxMatches = maxMatches;
209  params.numThreads = numThreads;
210  matchVect = SubstructMatch(resMolSupplier, query, params);
211  return static_cast<unsigned int>(matchVect.size());
212 };
213 
214 //! Class used as a final step to confirm whether or not a given atom->atom
215 //! mapping is a valid substructure match.
217  public:
218  MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol,
219  const SubstructMatchParameters &ps);
220 
221  bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]) const;
222 
223  private:
224  const ROMol &d_query;
225  const ROMol &d_mol;
226  const SubstructMatchParameters &d_params;
227  std::unordered_map<unsigned int, StereoGroup const *> d_molStereoGroups;
228 };
229 
230 } // namespace RDKit
231 
232 #endif
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
MolBundle contains a collection of related ROMols.
Definition: MolBundle.h:39
MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol, const SubstructMatchParameters &ps)
bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]) const
#define RDKIT_SUBSTRUCTMATCH_EXPORT
Definition: export.h:489
Std stuff.
Definition: Abbreviations.h:19
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector< MatchVectType > SubstructMatch(const ROMol &mol, const ROMol &query, const SubstructMatchParameters &params=SubstructMatchParameters())
Find a substructure match for a query in a molecule.
RDKIT_SUBSTRUCTMATCH_EXPORT void updateSubstructMatchParamsFromJSON(SubstructMatchParameters &params, const std::string &json)
unsigned int maxMatches
maximum number of matches to return
bool uniquify
uniquify (by atom index) match results
bool recursionPossible
Allow recursive queries.
std::function< bool(const ROMol &mol, const std::vector< unsigned int > &match)> extraFinalCheck