RDKit
Open-source cheminformatics and machine learning.
LinkNode.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/Invariant.h>
11 
12 #include <map>
13 #include <boost/lexical_cast.hpp>
14 #include <boost/tokenizer.hpp>
15 #include <boost/format.hpp>
16 #include <algorithm>
17 
18 typedef boost::tokenizer<boost::char_separator<char>> tokenizer;
19 
20 namespace RDKit {
21 namespace MolEnumerator {
22 
23 struct LinkNode {
24  unsigned int minRep = 0;
25  unsigned int maxRep = 0;
26  unsigned int nBonds = 0;
27  std::vector<std::pair<unsigned int, unsigned int>> bondAtoms;
28 };
29 
30 namespace utils {
31 inline std::vector<LinkNode> getMolLinkNodes(
32  const ROMol &mol, bool strict = true,
33  const std::map<unsigned, Atom *> *atomIdxMap = nullptr) {
34  std::vector<LinkNode> res;
35  std::string pval;
37  return res;
38  }
39  std::vector<int> mapping;
40 
41  boost::char_separator<char> pipesep("|");
42  boost::char_separator<char> spacesep(" ");
43  for (auto linknodetext : tokenizer(pval, pipesep)) {
44  LinkNode node;
45  tokenizer tokens(linknodetext, spacesep);
46  std::vector<unsigned int> data;
47  try {
48  std::transform(tokens.begin(), tokens.end(), std::back_inserter(data),
49  [](const std::string &token) -> unsigned int {
50  return boost::lexical_cast<unsigned int>(token);
51  });
52  } catch (boost::bad_lexical_cast &) {
53  std::ostringstream errout;
54  errout << "Cannot convert values in LINKNODE '" << linknodetext
55  << "' to unsigned ints";
56  if (strict) {
57  throw ValueErrorException(errout.str());
58  } else {
59  BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
60  continue;
61  }
62  }
63  // the second test here is for the atom-pairs defining the bonds
64  // data[2] contains the number of bonds
65  if (data.size() < 5 || data.size() < 3 + 2 * data[2]) {
66  std::ostringstream errout;
67  errout << "not enough values in LINKNODE '" << linknodetext << "'";
68  if (strict) {
69  throw ValueErrorException(errout.str());
70  } else {
71  BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
72  continue;
73  }
74  }
75 
76  node.minRep = data[0];
77  node.maxRep = data[1];
78  if (node.minRep == 0 || node.maxRep < node.minRep) {
79  std::ostringstream errout;
80  errout << "bad counts in LINKNODE '" << linknodetext << "'";
81  if (strict) {
82  throw ValueErrorException(errout.str());
83  } else {
84  BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
85  continue;
86  }
87  }
88  node.nBonds = data[2];
89  if (node.nBonds != 2) {
90  if (strict) {
92  "only link nodes with 2 bonds are currently supported");
93  } else {
95  << "only link nodes with 2 bonds are currently supported"
96  << std::endl;
97  continue;
98  }
99  }
100  // both bonds must start from the same atom:
101  if (data[3] != data[5]) {
102  std::ostringstream errout;
103  errout << "bonds don't start at the same atom for LINKNODE '"
104  << linknodetext << "'";
105  if (strict) {
106  throw ValueErrorException(errout.str());
107  } else {
108  BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
109  continue;
110  }
111  }
112 
113  if (atomIdxMap) {
114  // map the indices back to the original atom numbers
115  for (unsigned int i = 3; i <= 6; ++i) {
116  const auto aidx = atomIdxMap->find(data[i] - 1);
117  if (aidx == atomIdxMap->end()) {
118  std::ostringstream errout;
119  errout << "atom index " << data[i]
120  << " cannot be found in molecule for LINKNODE '"
121  << linknodetext << "'";
122  if (strict) {
123  throw ValueErrorException(errout.str());
124  } else {
125  BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
126  continue;
127  }
128  } else {
129  data[i] = aidx->second->getIdx();
130  }
131  }
132  } else {
133  for (unsigned int i = 3; i <= 6; ++i) {
134  --data[i];
135  }
136  }
137  node.bondAtoms.push_back(std::make_pair(data[3], data[4]));
138  node.bondAtoms.push_back(std::make_pair(data[5], data[6]));
139  if (!mol.getBondBetweenAtoms(data[4], data[3]) ||
140  !mol.getBondBetweenAtoms(data[6], data[5])) {
141  std::ostringstream errout;
142  errout << "bond not found between atoms in LINKNODE '" << linknodetext
143  << "'";
144  if (strict) {
145  throw ValueErrorException(errout.str());
146  } else {
147  BOOST_LOG(rdWarningLog) << errout.str() << std::endl;
148  continue;
149  }
150  }
151  res.push_back(std::move(node));
152  }
153  return res;
154 }
155 
156 } // namespace utils
157 } // namespace MolEnumerator
158 
159 } // namespace RDKit
#define UNDER_CONSTRUCTION(fn)
Definition: Invariant.h:125
boost::tokenizer< boost::char_separator< char > > tokenizer
Definition: LinkNode.h:18
#define BOOST_LOG(__arg__)
Definition: RDLog.h:92
RDKIT_RDGENERAL_EXPORT RDLogger rdWarningLog
bool getPropIfPresent(const std::string &key, T &res) const
Definition: RDProps.h:121
Bond * getBondBetweenAtoms(unsigned int idx1, unsigned int idx2)
returns a pointer to the bond between two atoms, Null on failure
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
Definition: Exceptions.h:40
std::vector< LinkNode > getMolLinkNodes(const ROMol &mol, bool strict=true, const std::map< unsigned, Atom * > *atomIdxMap=nullptr)
Definition: LinkNode.h:31
RDKIT_RDGENERAL_EXPORT const std::string molFileLinkNodes
Std stuff.
Definition: Abbreviations.h:19
std::vector< std::pair< unsigned int, unsigned int > > bondAtoms
Definition: LinkNode.h:27