RDKit
Open-source cheminformatics and machine learning.
MultiFPBReader.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2016 Greg Landrum
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_MULTIFPBREADER_H_APR2016
12 #define RD_MULTIFPBREADER_H_APR2016
13 /*! \file MultiFPBReader.h
14 
15  \brief contains a class for reading and searching collections of FPB files
16 
17  \b Note that this functionality is experimental and the API may change
18  in future releases.
19 */
20 
21 #include <RDGeneral/Exceptions.h>
23 #include <DataStructs/FPBReader.h>
24 #include <boost/tuple/tuple.hpp>
25 
26 namespace RDKit {
27 
28 //! class for reading and searching multiple FPB files
29 /*!
30  basic usage:
31  \code
32  FPBReader r1("foo1.fpb"),r2("foo2.fpb");
33  std::vector<FPBReader *> readers;
34  readers.append(&r1);
35  readers.append(&r2);
36  MultiFPBReader fpbs(readers);
37  fpbs.init();
38  boost::shared_ptr<ExplicitBitVect> ebv = fpbs.getReader(0)->getFP(95);
39  std::vector<boost::tuple<double,unsigned int, unsigned int> > nbrs =
40  fpbs.getTanimotoNeighbors(*ebv.get(), 0.70);
41  \endcode
42 
43  \b Note: this functionality is experimental and the API may change
44  in future releases.
45 
46  <b>Note on thread safety</b>
47  Operations that involve reading from FPB files are not thread safe.
48  This means that the \c init() method is not thread safe and none of the
49  search operations are thread safe when an \c FPBReader is initialized in
50  \c lazyRead mode.
51 
52 */
54  public:
55  typedef boost::tuple<double, unsigned int, unsigned int> ResultTuple;
57 
58  /*!
59  \param initOnSearch: if this is true, the \c init() method on child readers
60  will not be called until the first search is done. This is useful with large
61  FPB readers.
62  */
63  MultiFPBReader(bool initOnSearch)
64  : df_init(false),
65  df_initOnSearch(initOnSearch),
66  df_takeOwnership(false) {}
67  /*!
68  \param readers: the set of FPBReader objects to use.
69  \param takeOwnership: if true, we own the memory for the FPBReaders
70  \param initOnSearch: if this is true, the \c init() method on child readers
71  will not be called until the first search is done. This is useful with large
72  FPB readers.
73  */
74  MultiFPBReader(std::vector<FPBReader *> &readers, bool takeOwnership = false,
75  bool initOnSearch = false);
76 
78  df_init = false;
79  if (df_takeOwnership) {
80  for (auto &rdr : d_readers) {
81  delete rdr;
82  }
83  d_readers.clear();
84  }
85  }
86 
87  //! Read the data from the file and initialize internal data structures
88  /*!
89  This must be called before most of the other methods of this class.
90  It calls the \c init() method on each of the child FPBReaders
91 
92  */
93  void init();
94 
95  //! returns the number of readers
96  unsigned int length() const { return d_readers.size(); }
97  //! returns the number of bits in our fingerprints (all readers are expected
98  //! to have the same length)
99  unsigned int nBits() const;
100 
101  //! returns a particular reader
102  /*!
103 
104  \param which: the reader to return
105 
106  */
107  FPBReader *getReader(unsigned int which);
108 
109  //! adds a new FPBReader to our list
110  /*!
111 
112  This does no error checking on the reader, so be careful.
113 
114  If \c takeOwnership is \c true then we will take ownership of the memory.
115 
116  \param rdr: the reader to add. If we have already been initialized, the
117  reader's \c init() method will be called
118 
119  \returns a count of the current number of readers
120  */
121  unsigned int addReader(FPBReader *rdr) {
122  PRECONDITION(rdr, "no reader provided");
123  d_readers.push_back(rdr);
124  if (df_init) {
125  rdr->init();
126  }
127  return d_readers.size();
128  }
129 
130  //! returns tanimoto neighbors that are within a similarity threshold
131  /*!
132  The result vector of (similarity,index,reader) tuples is sorted in order
133  of decreasing similarity
134 
135  \param bv the query fingerprint
136  \param threshold the minimum similarity to return
137  \param numThreads Sets the number of threads to use (more than one thread
138  will only be used if the RDKit was build with multithread support) If set to
139  zero, the max supported by the system will be used.
140 
141  */
142  std::vector<ResultTuple> getTanimotoNeighbors(const std::uint8_t *bv,
143  double threshold = 0.7,
144  int numThreads = 1) const;
145  //! \overload
146  std::vector<ResultTuple> getTanimotoNeighbors(
147  boost::shared_array<std::uint8_t> bv, double threshold = 0.7,
148  int numThreads = 1) const {
149  return getTanimotoNeighbors(bv.get(), threshold, numThreads);
150  }
151  //! \overload
152  std::vector<ResultTuple> getTanimotoNeighbors(const ExplicitBitVect &ebv,
153  double threshold = 0.7,
154  int numThreads = 1) const;
155 
156  //! returns Tversky neighbors that are within a similarity threshold
157  /*!
158  The result vector of (similarity,index) pairs is sorted in order
159  of decreasing similarity
160 
161  \param bv the query fingerprint
162  \param ca the Tversky a coefficient
163  \param cb the Tversky a coefficient
164  \param threshold the minimum similarity to return
165  \param numThreads Sets the number of threads to use (more than one thread
166  will only be used if the RDKit was build with multithread support) If set to
167  zero, the max supported by the system will be used.
168 
169  */
170  std::vector<ResultTuple> getTverskyNeighbors(const std::uint8_t *bv,
171  double ca, double cb,
172  double threshold = 0.7,
173  int numThreads = 1) const;
174  //! \overload
175  std::vector<ResultTuple> getTverskyNeighbors(
176  boost::shared_array<std::uint8_t> bv, double ca, double cb,
177  double threshold = 0.7, int numThreads = 1) const {
178  return getTverskyNeighbors(bv.get(), ca, cb, threshold, numThreads);
179  }
180  //! \overload
181  std::vector<ResultTuple> getTverskyNeighbors(const ExplicitBitVect &ebv,
182  double ca, double cb,
183  double threshold = 0.7,
184  int numThreads = 1) const;
185 
186  //! returns indices of all fingerprints that completely contain this one
187  /*! (i.e. where all the bits set in the query are also set in the db
188  molecule)
189  */
190  std::vector<std::pair<unsigned int, unsigned int>> getContainingNeighbors(
191  const std::uint8_t *bv, int numThreads = 1) const;
192  //! \overload
193  std::vector<std::pair<unsigned int, unsigned int>> getContainingNeighbors(
194  boost::shared_array<std::uint8_t> bv, int numThreads = 1) const {
195  return getContainingNeighbors(bv.get(), numThreads);
196  }
197  //! \overload
198  std::vector<std::pair<unsigned int, unsigned int>> getContainingNeighbors(
199  const ExplicitBitVect &ebv, int numThreads = 1) const;
200 
201  private:
202  std::vector<FPBReader *> d_readers;
203  bool df_init{false}, df_initOnSearch{false}, df_takeOwnership{false};
204 
205  // disable automatic copy constructors and assignment operators
206  // for this class and its subclasses. They will likely be
207  // carrying around stream pointers and copying those is a recipe
208  // for disaster.
209  MultiFPBReader(const MultiFPBReader &);
210  MultiFPBReader &operator=(const MultiFPBReader &);
211 };
212 } // namespace RDKit
213 #endif
contains a simple class for reading and searching FPB files
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
a class for bit vectors that are densely occupied
class for reading and searching FPB files
Definition: FPBReader.h:58
void init()
Read the data from the file and initialize internal data structures.
class for reading and searching multiple FPB files
std::vector< std::pair< unsigned int, unsigned int > > getContainingNeighbors(boost::shared_array< std::uint8_t > bv, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< ResultTuple > getTverskyNeighbors(const ExplicitBitVect &ebv, double ca, double cb, double threshold=0.7, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< ResultTuple > getTanimotoNeighbors(boost::shared_array< std::uint8_t > bv, double threshold=0.7, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
unsigned int length() const
returns the number of readers
std::vector< ResultTuple > getTverskyNeighbors(boost::shared_array< std::uint8_t > bv, double ca, double cb, double threshold=0.7, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
std::vector< ResultTuple > getTanimotoNeighbors(const std::uint8_t *bv, double threshold=0.7, int numThreads=1) const
returns tanimoto neighbors that are within a similarity threshold
std::vector< ResultTuple > getTanimotoNeighbors(const ExplicitBitVect &ebv, double threshold=0.7, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
unsigned int addReader(FPBReader *rdr)
adds a new FPBReader to our list
FPBReader * getReader(unsigned int which)
returns a particular reader
void init()
Read the data from the file and initialize internal data structures.
MultiFPBReader(std::vector< FPBReader * > &readers, bool takeOwnership=false, bool initOnSearch=false)
MultiFPBReader(bool initOnSearch)
std::vector< std::pair< unsigned int, unsigned int > > getContainingNeighbors(const std::uint8_t *bv, int numThreads=1) const
returns indices of all fingerprints that completely contain this one
std::vector< std::pair< unsigned int, unsigned int > > getContainingNeighbors(const ExplicitBitVect &ebv, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
unsigned int nBits() const
boost::tuple< double, unsigned int, unsigned int > ResultTuple
std::vector< ResultTuple > getTverskyNeighbors(const std::uint8_t *bv, double ca, double cb, double threshold=0.7, int numThreads=1) const
returns Tversky neighbors that are within a similarity threshold
#define RDKIT_DATASTRUCTS_EXPORT
Definition: export.h:81
Std stuff.
Definition: Abbreviations.h:19