RDKit
Open-source cheminformatics and machine learning.
new_canon.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Greg Landrum
3 // Adapted from pseudo-code from Roger Sayle
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 
12 #include <RDGeneral/export.h>
13 #include <RDGeneral/hanoiSort.h>
14 #include <GraphMol/ROMol.h>
15 #include <GraphMol/RingInfo.h>
17 #include <cstdint>
18 #include <boost/dynamic_bitset.hpp>
20 #include <cstring>
21 #include <iostream>
22 #include <cassert>
23 #include <cstring>
24 #include <vector>
25 
26 // #define VERBOSE_CANON 1
27 
28 namespace RDKit {
29 namespace Canon {
30 class canon_atom;
31 
33  Bond::BondType bondType{Bond::BondType::UNSPECIFIED};
34  unsigned int bondStereo{
35  static_cast<unsigned int>(Bond::BondStereo::STEREONONE)};
36  unsigned int nbrSymClass{0};
37  unsigned int nbrIdx{0};
38  Bond::BondStereo stype{Bond::BondStereo::STEREONONE};
39  const canon_atom *controllingAtoms[4]{nullptr, nullptr, nullptr, nullptr};
40  const std::string *p_symbol{
41  nullptr}; // if provided, this is used to order bonds
42  unsigned int bondIdx{0};
43 
45  bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni,
46  unsigned int nsc, unsigned int bidx)
47  : bondType(bt),
48  bondStereo(static_cast<unsigned int>(bs)),
49  nbrSymClass(nsc),
50  nbrIdx(ni),
51  bondIdx(bidx) {}
52  bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
53  unsigned int nsc, unsigned int bidx)
54  : bondType(bt),
55  bondStereo(bs),
56  nbrSymClass(nsc),
57  nbrIdx(ni),
58  bondIdx(bidx) {}
59 
60  int compareStereo(const bondholder &o) const;
61 
62  bool operator<(const bondholder &o) const { return compare(*this, o) < 0; }
63  static bool greater(const bondholder &lhs, const bondholder &rhs) {
64  return compare(lhs, rhs) > 0;
65  }
66 
67  static int compare(const bondholder &x, const bondholder &y,
68  unsigned int div = 1) {
69  if (x.p_symbol && y.p_symbol) {
70  if ((*x.p_symbol) < (*y.p_symbol)) {
71  return -1;
72  } else if ((*x.p_symbol) > (*y.p_symbol)) {
73  return 1;
74  }
75  }
76  if (x.bondType < y.bondType) {
77  return -1;
78  } else if (x.bondType > y.bondType) {
79  return 1;
80  }
81  if (x.bondStereo < y.bondStereo) {
82  return -1;
83  } else if (x.bondStereo > y.bondStereo) {
84  return 1;
85  }
86  auto scdiv = x.nbrSymClass / div - y.nbrSymClass / div;
87  if (scdiv) {
88  return scdiv;
89  }
90  if (x.bondStereo && y.bondStereo) {
91  auto cs = x.compareStereo(y);
92  if (cs) {
93  return cs;
94  }
95  }
96  return 0;
97  }
98 };
100  public:
101  const Atom *atom{nullptr};
102  int index{-1};
103  unsigned int degree{0};
104  unsigned int totalNumHs{0};
105  bool hasRingNbr{false};
106  bool isRingStereoAtom{false};
107  int *nbrIds{nullptr};
108  const std::string *p_symbol{
109  nullptr}; // if provided, this is used to order atoms
110  std::vector<int> neighborNum;
111  std::vector<int> revistedNeighbors;
112  std::vector<bondholder> bonds;
113 
115 
116  ~canon_atom() { free(nbrIds); }
117 };
118 
120  canon_atom *atoms, std::vector<bondholder> &nbrs);
121 
123  canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
124  std::vector<std::pair<unsigned int, unsigned int>> &result);
125 
126 /*
127  * Different types of atom compare functions:
128  *
129  * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
130  *dependent chirality
131  * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
132  *highly symmetrical graphs/molecules
133  * - AtomCompareFunctor: Basic atom compare function which also allows to
134  *include neighbors within the ranking
135  */
136 
138  public:
139  Canon::canon_atom *dp_atoms{nullptr};
140  const ROMol *dp_mol{nullptr};
141  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
142  *dp_bondsInPlay{nullptr};
143 
146  Canon::canon_atom *atoms, const ROMol &m,
147  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
148  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
149  : dp_atoms(atoms),
150  dp_mol(&m),
151  dp_atomsInPlay(atomsInPlay),
152  dp_bondsInPlay(bondsInPlay) {}
153  int operator()(int i, int j) const {
154  PRECONDITION(dp_atoms, "no atoms");
155  PRECONDITION(dp_mol, "no molecule");
156  PRECONDITION(i != j, "bad call");
157  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
158  return 0;
159  }
160 
161  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
162  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
163  }
164  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
165  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
166  }
167  for (unsigned int ii = 0;
168  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
169  int cmp =
170  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
171  if (cmp) {
172  return cmp;
173  }
174  }
175 
176  std::vector<std::pair<unsigned int, unsigned int>> swapsi;
177  std::vector<std::pair<unsigned int, unsigned int>> swapsj;
178  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
179  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
180  }
181  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
182  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
183  }
184  for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
185  int cmp = swapsi[ii].second - swapsj[ii].second;
186  if (cmp) {
187  return cmp;
188  }
189  }
190  return 0;
191  }
192 };
193 
195  public:
196  Canon::canon_atom *dp_atoms{nullptr};
197  const ROMol *dp_mol{nullptr};
198  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
199  *dp_bondsInPlay{nullptr};
200 
203  Canon::canon_atom *atoms, const ROMol &m,
204  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
205  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
206  : dp_atoms(atoms),
207  dp_mol(&m),
208  dp_atomsInPlay(atomsInPlay),
209  dp_bondsInPlay(bondsInPlay) {}
210  int operator()(int i, int j) const {
211  PRECONDITION(dp_atoms, "no atoms");
212  PRECONDITION(dp_mol, "no molecule");
213  PRECONDITION(i != j, "bad call");
214  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
215  return 0;
216  }
217 
218  if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
219  return -1;
220  } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
221  return 1;
222  }
223 
224  if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
225  return -1;
226  } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
227  return 1;
228  }
229 
230  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
231  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
232  }
233  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
234  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
235  }
236  for (unsigned int ii = 0;
237  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
238  int cmp =
239  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
240  if (cmp) {
241  return cmp;
242  }
243  }
244 
245  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
246  return -1;
247  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
248  return 1;
249  }
250  return 0;
251  }
252 };
253 
254 namespace {
255 unsigned int getChiralRank(const ROMol *dp_mol, canon_atom *dp_atoms,
256  unsigned int i) {
257  unsigned int res = 0;
258  std::vector<unsigned int> perm;
259  perm.reserve(dp_atoms[i].atom->getDegree());
260  for (const auto nbr : dp_mol->atomNeighbors(dp_atoms[i].atom)) {
261  auto rnk = dp_atoms[nbr->getIdx()].index;
262  // make sure we don't have duplicate ranks
263  if (std::find(perm.begin(), perm.end(), rnk) != perm.end()) {
264  break;
265  } else {
266  perm.push_back(rnk);
267  }
268  }
269  if (perm.size() == dp_atoms[i].atom->getDegree()) {
270  auto ctag = dp_atoms[i].atom->getChiralTag();
271  if (ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CW ||
272  ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CCW) {
273  auto sortedPerm = perm;
274  std::sort(sortedPerm.begin(), sortedPerm.end());
275  auto nswaps = countSwapsToInterconvert(perm, sortedPerm);
276  res = ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CW ? 2 : 1;
277  if (nswaps % 2) {
278  res = res == 2 ? 1 : 2;
279  }
280  }
281  }
282  return res;
283 }
284 } // namespace
286  unsigned int getAtomRingNbrCode(unsigned int i) const {
287  if (!dp_atoms[i].hasRingNbr) {
288  return 0;
289  }
290 
291  int *nbrs = dp_atoms[i].nbrIds;
292  unsigned int code = 0;
293  for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
294  if (dp_atoms[nbrs[j]].isRingStereoAtom) {
295  code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
296  }
297  }
298  return code;
299  }
300 
301  int basecomp(int i, int j) const {
302  unsigned int ivi, ivj;
303 
304  // always start with the current class:
305  ivi = dp_atoms[i].index;
306  ivj = dp_atoms[j].index;
307  if (ivi < ivj) {
308  return -1;
309  } else if (ivi > ivj) {
310  return 1;
311  }
312  // use the atom-mapping numbers if they were assigned
313  /* boost::any_cast FILED here:
314  std::string molAtomMapNumber_i="";
315  std::string molAtomMapNumber_j="";
316  */
317  int molAtomMapNumber_i = 0;
318  int molAtomMapNumber_j = 0;
319  dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
320  molAtomMapNumber_i);
321  dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
322  molAtomMapNumber_j);
323  if (molAtomMapNumber_i < molAtomMapNumber_j) {
324  return -1;
325  } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
326  return 1;
327  }
328  // start by comparing degree
329  ivi = dp_atoms[i].degree;
330  ivj = dp_atoms[j].degree;
331  if (ivi < ivj) {
332  return -1;
333  } else if (ivi > ivj) {
334  return 1;
335  }
336  if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
337  if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
338  return -1;
339  } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
340  return 1;
341  } else {
342  return 0;
343  }
344  }
345 
346  // move onto atomic number
347  ivi = dp_atoms[i].atom->getAtomicNum();
348  ivj = dp_atoms[j].atom->getAtomicNum();
349  if (ivi < ivj) {
350  return -1;
351  } else if (ivi > ivj) {
352  return 1;
353  }
354  // isotopes if we're using them
355  if (df_useIsotopes) {
356  ivi = dp_atoms[i].atom->getIsotope();
357  ivj = dp_atoms[j].atom->getIsotope();
358  if (ivi < ivj) {
359  return -1;
360  } else if (ivi > ivj) {
361  return 1;
362  }
363  }
364 
365  // nHs
366  ivi = dp_atoms[i].totalNumHs;
367  ivj = dp_atoms[j].totalNumHs;
368  if (ivi < ivj) {
369  return -1;
370  } else if (ivi > ivj) {
371  return 1;
372  }
373  // charge
374  ivi = dp_atoms[i].atom->getFormalCharge();
375  ivj = dp_atoms[j].atom->getFormalCharge();
376  if (ivi < ivj) {
377  return -1;
378  } else if (ivi > ivj) {
379  return 1;
380  }
381  // chirality if we're using it
382  if (df_useChirality) {
383  // first atom stereochem:
384  ivi = 0;
385  ivj = 0;
386  // can't actually use values here, because they are arbitrary
387  ivi = dp_atoms[i].atom->getChiralTag() != 0;
388  ivj = dp_atoms[j].atom->getChiralTag() != 0;
389  if (ivi < ivj) {
390  return -1;
391  } else if (ivi > ivj) {
392  return 1;
393  }
394  // stereo set
395  if (ivi && ivj) {
396  if (ivi) {
397  ivi = getChiralRank(dp_mol, dp_atoms, i);
398  }
399  if (ivj) {
400  ivj = getChiralRank(dp_mol, dp_atoms, j);
401  }
402  if (ivi < ivj) {
403  return -1;
404  } else if (ivi > ivj) {
405  return 1;
406  }
407  }
408  }
409 
410  if (df_useChiralityRings) {
411  // ring stereochemistry
412  ivi = getAtomRingNbrCode(i);
413  ivj = getAtomRingNbrCode(j);
414  if (ivi < ivj) {
415  return -1;
416  } else if (ivi > ivj) {
417  return 1;
418  } // bond stereo is taken care of in the neighborhood comparison
419  }
420  return 0;
421  }
422 
423  public:
424  Canon::canon_atom *dp_atoms{nullptr};
425  const ROMol *dp_mol{nullptr};
426  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
427  *dp_bondsInPlay{nullptr};
428  bool df_useNbrs{false};
429  bool df_useIsotopes{true};
430  bool df_useChirality{true};
431  bool df_useChiralityRings{true};
432 
435  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
436  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
437  : dp_atoms(atoms),
438  dp_mol(&m),
439  dp_atomsInPlay(atomsInPlay),
440  dp_bondsInPlay(bondsInPlay),
441  df_useNbrs(false),
442  df_useIsotopes(true),
443  df_useChirality(true),
444  df_useChiralityRings(true) {}
445  int operator()(int i, int j) const {
446  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
447  return 0;
448  }
449  int v = basecomp(i, j);
450  if (v) {
451  return v;
452  }
453 
454  if (df_useNbrs) {
455  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
456  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
457  }
458  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
459  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
460  }
461 
462  for (unsigned int ii = 0;
463  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
464  ++ii) {
465  int cmp =
466  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
467  if (cmp) {
468  return cmp;
469  }
470  }
471 
472  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
473  return -1;
474  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
475  return 1;
476  }
477  }
478  return 0;
479  }
480 };
481 
482 /*
483  * A compare function to discriminate chiral atoms, similar to the CIP rules.
484  * This functionality is currently not used.
485  */
486 
487 const unsigned int ATNUM_CLASS_OFFSET = 10000;
489  void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
490  for (unsigned j = 0; j < nbrs.size(); ++j) {
491  unsigned int nbrIdx = nbrs[j].nbrIdx;
492  if (nbrIdx == ATNUM_CLASS_OFFSET) {
493  // Ignore the Hs
494  continue;
495  }
496  const Atom *nbr = dp_atoms[nbrIdx].atom;
497  nbrs[j].nbrSymClass =
498  nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
499  }
500  std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
501  // FIX: don't want to be doing this long-term
502  }
503 
504  int basecomp(int i, int j) const {
505  PRECONDITION(dp_atoms, "no atoms");
506  unsigned int ivi, ivj;
507 
508  // always start with the current class:
509  ivi = dp_atoms[i].index;
510  ivj = dp_atoms[j].index;
511  if (ivi < ivj) {
512  return -1;
513  } else if (ivi > ivj) {
514  return 1;
515  }
516 
517  // move onto atomic number
518  ivi = dp_atoms[i].atom->getAtomicNum();
519  ivj = dp_atoms[j].atom->getAtomicNum();
520  if (ivi < ivj) {
521  return -1;
522  } else if (ivi > ivj) {
523  return 1;
524  }
525 
526  // isotopes:
527  ivi = dp_atoms[i].atom->getIsotope();
528  ivj = dp_atoms[j].atom->getIsotope();
529  if (ivi < ivj) {
530  return -1;
531  } else if (ivi > ivj) {
532  return 1;
533  }
534 
535  // atom stereochem:
536  ivi = 0;
537  ivj = 0;
538  std::string cipCode;
539  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
540  cipCode)) {
541  ivi = cipCode == "R" ? 2 : 1;
542  }
543  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
544  cipCode)) {
545  ivj = cipCode == "R" ? 2 : 1;
546  }
547  if (ivi < ivj) {
548  return -1;
549  } else if (ivi > ivj) {
550  return 1;
551  }
552 
553  // bond stereo is taken care of in the neighborhood comparison
554  return 0;
555  }
556 
557  public:
558  Canon::canon_atom *dp_atoms{nullptr};
559  const ROMol *dp_mol{nullptr};
560  bool df_useNbrs{false};
563  : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}
564  int operator()(int i, int j) const {
565  PRECONDITION(dp_atoms, "no atoms");
566  PRECONDITION(dp_mol, "no molecule");
567  PRECONDITION(i != j, "bad call");
568  int v = basecomp(i, j);
569  if (v) {
570  return v;
571  }
572 
573  if (df_useNbrs) {
574  getAtomNeighborhood(dp_atoms[i].bonds);
575  getAtomNeighborhood(dp_atoms[j].bonds);
576 
577  // we do two passes through the neighbor lists. The first just uses the
578  // atomic numbers (by passing the optional 10000 to bondholder::compare),
579  // the second takes the already-computed index into account
580  for (unsigned int ii = 0;
581  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
582  ++ii) {
583  int cmp = bondholder::compare(
584  dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
585  if (cmp) {
586  return cmp;
587  }
588  }
589  for (unsigned int ii = 0;
590  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
591  ++ii) {
592  int cmp =
593  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
594  if (cmp) {
595  return cmp;
596  }
597  }
598  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
599  return -1;
600  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
601  return 1;
602  }
603  }
604  return 0;
605  }
606 };
607 
608 /*
609  * Basic canonicalization function to organize the partitions which will be
610  * sorted next.
611  * */
612 
613 template <typename CompareFunc>
614 void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
615  int mode, int *order, int *count, int &activeset,
616  int *next, int *changed, char *touchedPartitions) {
617  unsigned int nAtoms = mol.getNumAtoms();
618  int partition;
619  int symclass = 0;
620  int *start;
621  int offset;
622  int index;
623  int len;
624  int i;
625  // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
626 
627  // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
628  while (activeset != -1) {
629  // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
630  // std::cerr<<" next: ";
631  // for(unsigned int ii=0;ii<nAtoms;++ii){
632  // std::cerr<<ii<<":"<<next[ii]<<" ";
633  // }
634  // std::cerr<<std::endl;
635  // for(unsigned int ii=0;ii<nAtoms;++ii){
636  // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
637  // "<<atoms[order[ii]].index<<std::endl;
638  // }
639 
640  partition = activeset;
641  activeset = next[partition];
642  next[partition] = -2;
643 
644  len = count[partition];
645  offset = atoms[partition].index;
646  start = order + offset;
647  // std::cerr<<"\n\n**************************************************************"<<std::endl;
648  // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
649  // for(unsigned int ii=0;ii<len;++ii){
650  // std::cerr<<" "<<order[offset+ii]+1;
651  // }
652  // std::cerr<<std::endl;
653  // for(unsigned int ii=0;ii<nAtoms;++ii){
654  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
655  // "<<atoms[order[ii]].index<<std::endl;
656  // }
657  hanoisort(start, len, count, changed, compar);
658  // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
659  // std::cerr<<" result:";
660  // for(unsigned int ii=0;ii<nAtoms;++ii){
661  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
662  // "<<atoms[order[ii]].index<<std::endl;
663  // }
664  for (int k = 0; k < len; ++k) {
665  changed[start[k]] = 0;
666  }
667 
668  index = start[0];
669  // std::cerr<<" len:"<<len<<" index:"<<index<<"
670  // count:"<<count[index]<<std::endl;
671  for (i = count[index]; i < len; i++) {
672  index = start[i];
673  if (count[index]) {
674  symclass = offset + i;
675  }
676  atoms[index].index = symclass;
677  // std::cerr<<" "<<index+1<<"("<<symclass<<")";
678  // if(mode && (activeset<0 || count[index]>count[activeset]) ){
679  // activeset=index;
680  //}
681  for (unsigned j = 0; j < atoms[index].degree; ++j) {
682  changed[atoms[index].nbrIds[j]] = 1;
683  }
684  }
685  // std::cerr<<std::endl;
686 
687  if (mode) {
688  index = start[0];
689  for (i = count[index]; i < len; i++) {
690  index = start[i];
691  for (unsigned j = 0; j < atoms[index].degree; ++j) {
692  unsigned int nbor = atoms[index].nbrIds[j];
693  touchedPartitions[atoms[nbor].index] = 1;
694  }
695  }
696  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
697  if (touchedPartitions[ii]) {
698  partition = order[ii];
699  if ((count[partition] > 1) && (next[partition] == -2)) {
700  next[partition] = activeset;
701  activeset = partition;
702  }
703  touchedPartitions[ii] = 0;
704  }
705  }
706  }
707  }
708 } // end of RefinePartitions()
709 
710 template <typename CompareFunc>
711 void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
712  int mode, int *order, int *count, int &activeset, int *next,
713  int *changed, char *touchedPartitions) {
714  unsigned int nAtoms = mol.getNumAtoms();
715  int partition;
716  int offset;
717  int index;
718  int len;
719  int oldPart = 0;
720 
721  for (unsigned int i = 0; i < nAtoms; i++) {
722  partition = order[i];
723  oldPart = atoms[partition].index;
724  while (count[partition] > 1) {
725  len = count[partition];
726  offset = atoms[partition].index + len - 1;
727  index = order[offset];
728  atoms[index].index = offset;
729  count[partition] = len - 1;
730  count[index] = 1;
731 
732  // test for ions, water molecules with no
733  if (atoms[index].degree < 1) {
734  continue;
735  }
736  for (unsigned j = 0; j < atoms[index].degree; ++j) {
737  unsigned int nbor = atoms[index].nbrIds[j];
738  touchedPartitions[atoms[nbor].index] = 1;
739  changed[nbor] = 1;
740  }
741 
742  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
743  if (touchedPartitions[ii]) {
744  int npart = order[ii];
745  if ((count[npart] > 1) && (next[npart] == -2)) {
746  next[npart] = activeset;
747  activeset = npart;
748  }
749  touchedPartitions[ii] = 0;
750  }
751  }
752  RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
753  changed, touchedPartitions);
754  }
755  // not sure if this works each time
756  if (atoms[partition].index != oldPart) {
757  i -= 1;
758  }
759  }
760 } // end of BreakTies()
761 
763  int *order, int *count,
764  canon_atom *atoms);
765 
766 RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order,
767  int *count, int &activeset,
768  int *next, int *changed);
769 
771  std::vector<unsigned int> &res,
772  bool breakTies = true,
773  bool includeChirality = true,
774  bool includeIsotopes = true);
775 
777  const ROMol &mol, std::vector<unsigned int> &res,
778  const boost::dynamic_bitset<> &atomsInPlay,
779  const boost::dynamic_bitset<> &bondsInPlay,
780  const std::vector<std::string> *atomSymbols,
781  const std::vector<std::string> *bondSymbols, bool breakTies,
782  bool includeChirality, bool includeIsotope);
783 
784 inline void rankFragmentAtoms(
785  const ROMol &mol, std::vector<unsigned int> &res,
786  const boost::dynamic_bitset<> &atomsInPlay,
787  const boost::dynamic_bitset<> &bondsInPlay,
788  const std::vector<std::string> *atomSymbols = nullptr,
789  bool breakTies = true, bool includeChirality = true,
790  bool includeIsotopes = true) {
791  rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
792  breakTies, includeChirality, includeIsotopes);
793 };
794 
796  std::vector<unsigned int> &res);
797 
799  std::vector<Canon::canon_atom> &atoms,
800  bool includeChirality = true);
801 
802 namespace detail {
804  std::vector<Canon::canon_atom> &atoms,
805  bool includeChirality,
806  const std::vector<std::string> *atomSymbols,
807  const std::vector<std::string> *bondSymbols,
808  const boost::dynamic_bitset<> &atomsInPlay,
809  const boost::dynamic_bitset<> &bondsInPlay,
810  bool needsInit);
811 void freeCanonAtoms(std::vector<Canon::canon_atom> &atoms);
812 template <typename T>
813 void rankWithFunctor(T &ftor, bool breakTies, int *order,
814  bool useSpecial = false, bool useChirality = false,
815  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
816  const boost::dynamic_bitset<> *bondsInPlay = nullptr);
817 
818 } // namespace detail
819 
820 } // namespace Canon
821 } // namespace RDKit
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
The class for representing atoms.
Definition: Atom.h:68
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:126
BondType
the type of Bond
Definition: Bond.h:56
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition: Bond.h:95
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:434
int operator()(int i, int j) const
Definition: new_canon.h:445
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition: new_canon.h:562
int operator()(int i, int j) const
Definition: new_canon.h:564
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:145
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:202
std::vector< bondholder > bonds
Definition: new_canon.h:112
std::vector< int > revistedNeighbors
Definition: new_canon.h:111
std::vector< int > neighborNum
Definition: new_canon.h:110
unsigned int getNumAtoms() const
returns our number of atoms
Definition: ROMol.h:413
CXXAtomIterator< const MolGraph, Atom *const, MolGraph::adjacency_iterator > atomNeighbors(Atom const *at) const
Definition: ROMol.h:282
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:225
void rankWithFunctor(T &ftor, bool breakTies, int *order, bool useSpecial=false, bool useChirality=false, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
void initFragmentCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, bool needsInit)
void freeCanonAtoms(std::vector< Canon::canon_atom > &atoms)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int >> &result)
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const unsigned int ATNUM_CLASS_OFFSET
Definition: new_canon.h:487
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:711
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:614
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope)
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
RDKIT_RDGENERAL_EXPORT const std::string _CIPCode
RDKIT_RDGENERAL_EXPORT const std::string molAtomMapNumber
Std stuff.
Definition: Abbreviations.h:19
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition: hanoiSort.h:151
unsigned int countSwapsToInterconvert(const T &ref, T probe)
Definition: utils.h:54
const std::string * p_symbol
Definition: new_canon.h:40
Bond::BondType bondType
Definition: new_canon.h:33
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition: new_canon.h:63
bool operator<(const bondholder &o) const
Definition: new_canon.h:62
int compareStereo(const bondholder &o) const
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition: new_canon.h:52
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition: new_canon.h:45
unsigned int bondStereo
Definition: new_canon.h:34
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition: new_canon.h:67
unsigned int nbrSymClass
Definition: new_canon.h:36