RDKit
Open-source cheminformatics and machine learning.
RGroupDecomp.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2017-2021, Novartis Institutes for BioMedical Research Inc.
3 // and other RDKit contributors
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #include <RDGeneral/export.h>
12 #ifndef RDKIT_RGROUPDECOMP_H
13 #define RDKIT_RGROUPDECOMP_H
14 
15 #include "../RDKitBase.h"
17 #include <chrono>
18 
19 namespace RDKit {
20 
21 //! Compute the isomorphic degenerative points in the
22 //! molecule. These are points that are symmetrically
23 //! equivalent.
24 /*!
25  \param mol Molecule to compute the degenerative points
26 
27  \return the set of degenerative points (set<unsigned int>)
28 */
29 
30 typedef enum {
31  IsotopeLabels = 0x01,
32  AtomMapLabels = 0x02,
36  DummyAtomLabels = 0x20, // These are rgroups but will get relabelled
37  AutoDetect = 0xFF,
38 } RGroupLabels;
39 
40 typedef enum {
41  Greedy = 0x01,
42  GreedyChunks = 0x02,
43  Exhaustive = 0x04, // not really useful for large sets
45  GA = 0x10,
47 
48 typedef enum {
49  AtomMap = 0x01,
50  Isotope = 0x02,
51  MDLRGroup = 0x04,
53 
54 typedef enum {
55  // DEPRECATED, remove the following line in release 2021.03
56  None = 0x0,
57  NoAlignment = 0x0,
58  MCS = 0x01,
60 
61 typedef enum {
62  Match = 0x1,
64 } RGroupScore;
65 
67  const bool success;
68  const double score;
69  RGroupDecompositionProcessResult(const bool success, const double score)
70  : success(success), score(score) {}
71 };
72 
73 struct RGroupMatch;
74 
76  unsigned int labels = AutoDetect;
77  unsigned int matchingStrategy = GreedyChunks;
78  unsigned int scoreMethod = Match;
79  unsigned int rgroupLabelling = AtomMap | MDLRGroup;
80  unsigned int alignment = MCS;
81 
82  unsigned int chunkSize = 5;
83  //! only allow rgroup decomposition at the specified rgroups
84  bool onlyMatchAtRGroups = false;
85  //! remove all user-defined rgroups that only have hydrogens
86  bool removeAllHydrogenRGroups = true;
87  //! remove all user-defined rgroups that only have hydrogens,
88  //! and also remove the corresponding labels from the core
89  bool removeAllHydrogenRGroupsAndLabels = true;
90  //! remove all hydrogens from the output molecules
91  bool removeHydrogensPostMatch = true;
92  //! allow labelled Rgroups of degree 2 or more
93  bool allowNonTerminalRGroups = false;
94  // unlabelled core atoms can have multiple rgroups
95  bool allowMultipleRGroupsOnUnlabelled = false;
96 
97  double timeout = -1.0; ///< timeout in seconds. <=0 indicates no timeout
98 
99  // Determine how to assign the rgroup labels from the given core
100  unsigned int autoGetLabels(const RWMol &);
101 
102  // Prepare the core for substructure searching and rgroup assignment
103  bool prepareCore(RWMol &, const RWMol *alignCore);
104 
105  // Add r groups to unlabelled atoms if allowMultipleRGroupsOnUnlabelled is set
107 
108  // Parameters specific to GA
109 
110  // GA population size or -1 to use best guess
111  int gaPopulationSize = -1;
112  // GA maximum number of operations or -1 to use best guess
113  int gaMaximumOperations = -1;
114  // GA number of operations permitted without improvement before exiting (-1
115  // for best guess)
116  int gaNumberOperationsWithoutImprovement = -1;
117  // GA random number seed (-1 for default, -2 for random seed)
118  int gaRandomSeed = -1;
119  // Number of runs
120  int gaNumberRuns = 1;
121  // Sequential or parallel runs?
122 #ifdef RDK_BUILD_THREADSAFE_SSS
123  bool gaParallelRuns = true;
124 #else
125  bool gaParallelRuns = false;
126 #endif
127  // Controls the way substructure matching with the core is done
129 
130  RGroupDecompositionParameters() { substructmatchParams.useChirality = true; }
131 
132  private:
133  int indexOffset{-1};
134  void checkNonTerminal(const Atom &atom) const;
135 };
136 
137 typedef std::map<std::string, ROMOL_SPTR> RGroupRow;
138 typedef std::vector<ROMOL_SPTR> RGroupColumn;
139 
140 typedef std::vector<RGroupRow> RGroupRows;
141 typedef std::map<std::string, RGroupColumn> RGroupColumns;
142 
144  public:
145  UsedLabelMap(const std::map<int, int> &mapping) {
146  for (const auto &rl : mapping) {
147  d_map[rl.second] = std::make_pair(false, (rl.first > 0));
148  }
149  }
150  bool has(int label) const { return d_map.find(label) != d_map.end(); }
151  bool getIsUsed(int label) const { return d_map.at(label).first; }
152  void setIsUsed(int label) { d_map[label].first = true; }
153  bool isUserDefined(int label) const { return d_map.at(label).second; }
154 
155  private:
156  std::map<int, std::pair<bool, bool>> d_map;
157 };
158 
159 struct RGroupDecompData;
161  private:
162  RGroupDecompData *data; // implementation details
163  RGroupDecomposition(const RGroupDecomposition &); // no copy construct
164  RGroupDecomposition &operator=(
165  const RGroupDecomposition &); // Prevent assignment
166  RWMOL_SPTR outputCoreMolecule(const RGroupMatch &match,
167  const UsedLabelMap &usedRGroupMap) const;
168  std::map<int, bool> getBlankRGroupMap() const;
169 
170  public:
172  const RGroupDecompositionParameters &params =
174  RGroupDecomposition(const std::vector<ROMOL_SPTR> &cores,
175  const RGroupDecompositionParameters &params =
177 
179 
180  //! Returns the index of the added molecule in the RGroupDecomposition
181  /// or a negative error code
182  /// :param mol: Molecule to add to the decomposition
183  /// :result: index of the molecle or
184  /// -1 if none of the core matches
185  /// -2 if the matched molecule has no sidechains, i.e. is the
186  /// same as the scaffold
187  int add(const ROMol &mol);
189  bool process();
190 
192  //! return the current group labels
193  std::vector<std::string> getRGroupLabels() const;
194 
195  //! return rgroups in row order group[row][attachment_point] = ROMol
197  //! return rgroups in column order group[attachment_point][row] = ROMol
199 };
200 
202  const std::vector<ROMOL_SPTR> &cores, const std::vector<ROMOL_SPTR> &mols,
203  RGroupRows &rows, std::vector<unsigned int> *unmatched = nullptr,
204  const RGroupDecompositionParameters &options =
206 
208  const std::vector<ROMOL_SPTR> &cores, const std::vector<ROMOL_SPTR> &mols,
209  RGroupColumns &columns, std::vector<unsigned int> *unmatched = nullptr,
210  const RGroupDecompositionParameters &options =
212 
213 inline bool checkForTimeout(const std::chrono::steady_clock::time_point &t0,
214  double timeout, bool throwOnTimeout = true) {
215  if (timeout <= 0) {
216  return false;
217  }
218  auto t1 = std::chrono::steady_clock::now();
219  std::chrono::duration<double> elapsed = t1 - t0;
220  if (elapsed.count() >= timeout) {
221  if (throwOnTimeout) {
222  throw std::runtime_error("operation timed out");
223  }
224  return true;
225  }
226  return false;
227 }
228 
229 } // namespace RDKit
230 
231 #endif
RGroupRows getRGroupsAsRows() const
return rgroups in row order group[row][attachment_point] = ROMol
RGroupDecomposition(const std::vector< ROMOL_SPTR > &cores, const RGroupDecompositionParameters &params=RGroupDecompositionParameters())
RGroupColumns getRGroupsAsColumns() const
return rgroups in column order group[attachment_point][row] = ROMol
const RGroupDecompositionParameters & params() const
RGroupDecomposition(const ROMol &core, const RGroupDecompositionParameters &params=RGroupDecompositionParameters())
int add(const ROMol &mol)
RGroupDecompositionProcessResult processAndScore()
std::vector< std::string > getRGroupLabels() const
return the current group labels
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
void setIsUsed(int label)
Definition: RGroupDecomp.h:152
bool getIsUsed(int label) const
Definition: RGroupDecomp.h:151
bool isUserDefined(int label) const
Definition: RGroupDecomp.h:153
UsedLabelMap(const std::map< int, int > &mapping)
Definition: RGroupDecomp.h:145
bool has(int label) const
Definition: RGroupDecomp.h:150
#define RDKIT_RGROUPDECOMPOSITION_EXPORT
Definition: export.h:401
Std stuff.
Definition: Abbreviations.h:19
@ FingerprintVariance
Definition: RGroupDecomp.h:63
RGroupCoreAlignment
Definition: RGroupDecomp.h:54
@ NoAlignment
Definition: RGroupDecomp.h:57
RGroupMatching
Definition: RGroupDecomp.h:40
@ NoSymmetrization
Definition: RGroupDecomp.h:44
@ Greedy
Definition: RGroupDecomp.h:41
@ Exhaustive
Definition: RGroupDecomp.h:43
@ GreedyChunks
Definition: RGroupDecomp.h:42
std::map< std::string, ROMOL_SPTR > RGroupRow
Definition: RGroupDecomp.h:137
std::vector< ROMOL_SPTR > RGroupColumn
Definition: RGroupDecomp.h:138
std::map< std::string, RGroupColumn > RGroupColumns
Definition: RGroupDecomp.h:141
RGroupLabels
Definition: RGroupDecomp.h:30
@ MDLRGroupLabels
Definition: RGroupDecomp.h:35
@ AtomMapLabels
Definition: RGroupDecomp.h:32
@ AtomIndexLabels
Definition: RGroupDecomp.h:33
@ RelabelDuplicateLabels
Definition: RGroupDecomp.h:34
@ AutoDetect
Definition: RGroupDecomp.h:37
@ DummyAtomLabels
Definition: RGroupDecomp.h:36
@ IsotopeLabels
Definition: RGroupDecomp.h:31
RGroupLabelling
Definition: RGroupDecomp.h:48
@ MDLRGroup
Definition: RGroupDecomp.h:51
@ AtomMap
Definition: RGroupDecomp.h:49
@ Isotope
Definition: RGroupDecomp.h:50
bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, double timeout, bool throwOnTimeout=true)
Definition: RGroupDecomp.h:213
RDKIT_RGROUPDECOMPOSITION_EXPORT unsigned int RGroupDecompose(const std::vector< ROMOL_SPTR > &cores, const std::vector< ROMOL_SPTR > &mols, RGroupRows &rows, std::vector< unsigned int > *unmatched=nullptr, const RGroupDecompositionParameters &options=RGroupDecompositionParameters())
std::vector< RGroupRow > RGroupRows
Definition: RGroupDecomp.h:140
boost::shared_ptr< RWMol > RWMOL_SPTR
Definition: RWMol.h:217
void addDummyAtomsToUnlabelledCoreAtoms(RWMol &core)
unsigned int autoGetLabels(const RWMol &)
bool prepareCore(RWMol &, const RWMol *alignCore)
SubstructMatchParameters substructmatchParams
Definition: RGroupDecomp.h:128
RGroupDecompositionProcessResult(const bool success, const double score)
Definition: RGroupDecomp.h:69
RGroupMatch is the decomposition for a single molecule.
Definition: RGroupMatch.h:19