libpappsomspp
Library for mass spectrometry
msrundatasettree.cpp
Go to the documentation of this file.
1 // GPL 3+
2 // Filippo Rusconi
3 
4 #include <map>
5 #include <limits>
6 #include <iostream>
7 #include <iomanip>
8 
9 #include "msrundatasettree.h"
10 
11 #include "../pappsoexception.h"
12 #include "../exception/exceptionnotpossible.h"
13 
14 
15 namespace pappso
16 {
17 
18 
20  : mcsp_msRunId(ms_run_id_csp)
21 {
22 }
23 
24 
26 {
27  // qDebug();
28 
29  for(auto &&node : m_rootNodes)
30  {
31  // Each node is responsible for freeing its children nodes!
32 
33  delete node;
34  }
35 
36  m_rootNodes.clear();
37 
38  // Beware not to delete the node member of the map, as we have already
39  // destroyed them above!
40  //
41  // for(auto iterator = m_indexNodeMap.begin(); iterator !=
42  // m_indexNodeMap.end();
43  //++iterator)
44  //{
45  // delete(iterator->second);
46  //}
47 
48  // qDebug();
49 }
50 
51 
54  QualifiedMassSpectrumCstSPtr mass_spectrum_csp)
55 {
56  // qDebug();
57 
58  if(mass_spectrum_csp == nullptr)
59  qFatal("Cannot be nullptr");
60 
61  if(mass_spectrum_csp.get() == nullptr)
62  qFatal("Cannot be nullptr");
63 
64  // We need to get the precursor spectrum index, in case this spectrum is a
65  // fragmentation index.
66 
67  MsRunDataSetTreeNode *new_node_p = nullptr;
68 
69  std::size_t precursor_spectrum_index =
70  mass_spectrum_csp->getPrecursorSpectrumIndex();
71 
72  // qDebug() << "The precursor_spectrum_index:" << precursor_spectrum_index;
73 
74  if(precursor_spectrum_index == std::numeric_limits<std::size_t>::max())
75  {
76  // This spectrum is a full scan spectrum, not a fragmentation spectrum.
77  // Create a new node with no parent and push it back to the root nodes
78  // vector.
79 
80  new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, nullptr);
81 
82  // Since there is no parent in this overload, it is assumed that the node
83  // to be populated with the new node is the root node.
84 
85  m_rootNodes.push_back(new_node_p);
86 
87  // qDebug() << "to the roots node vector.";
88  }
89  else
90  {
91  // This spectrum is a fragmentation spectrum.
92 
93  // Sanity check
94 
95  if(mass_spectrum_csp->getMsLevel() <= 1)
96  {
98  "msrundatasettree.cpp -- ERROR the MS level needs to be > 1 in a "
99  "fragmentation spectrum.");
100  }
101 
102  // Get the node that contains the precursor ion mass spectrum.
103  MsRunDataSetTreeNode *parent_node_p = findNode(precursor_spectrum_index);
104 
105  if(parent_node_p == nullptr)
106  {
107  throw ExceptionNotPossible(
108  "msrundatasettree.cpp -- ERROR could not find "
109  "a tree node matching the index.");
110  }
111 
112  // qDebug() << "Fragmentation spectrum"
113  //<< "Found parent node:" << parent_node_p
114  //<< "for precursor index:" << precursor_spectrum_index;
115 
116  // At this point, create a new node with the right parent.
117 
118  new_node_p = new MsRunDataSetTreeNode(mass_spectrum_csp, parent_node_p);
119 
120  parent_node_p->m_children.push_back(new_node_p);
121  }
122 
123  // And now document that addition in the node index map.
124  m_indexNodeMap.insert(std::pair<std::size_t, MsRunDataSetTreeNode *>(
125  mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex(), new_node_p));
126 
127  // We also want to document the new node relating to the
128  // retention time.
129 
131  mass_spectrum_csp->getRtInMinutes(), new_node_p, DataKind::rt);
132 
133  // Likewise for the drift time.
134 
136  mass_spectrum_csp->getDtInMilliSeconds(), new_node_p, DataKind::dt);
137 
138  ++m_spectrumCount;
139 
140  // qDebug() << "New index/node map:"
141  //<< mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex() << "/"
142  //<< new_node_p;
143 
144  return new_node_p;
145 }
146 
147 
148 const std::map<std::size_t, MsRunDataSetTreeNode *> &
150 {
151  return m_indexNodeMap;
152 }
153 
154 
155 std::size_t
157 {
158  // We have a node and we want to get the matching mass spectrum index.
159 
160  if(node == nullptr)
161  throw("Cannot be that the node pointer is nullptr");
162 
163  std::map<std::size_t, MsRunDataSetTreeNode *>::const_iterator iterator =
164  std::find_if(
165  m_indexNodeMap.begin(),
166  m_indexNodeMap.end(),
167  [node](const std::pair<std::size_t, MsRunDataSetTreeNode *> pair) {
168  return pair.second == node;
169  });
170 
171  if(iterator != m_indexNodeMap.end())
172  return iterator->first;
173 
174  return std::numeric_limits<std::size_t>::max();
175 }
176 
177 
178 std::size_t
180  QualifiedMassSpectrumCstSPtr qualified_mass_spectrum_csp) const
181 {
182  MsRunDataSetTreeNode *node_p = findNode(qualified_mass_spectrum_csp);
183 
184  return massSpectrumIndex(node_p);
185 }
186 
187 
188 const std::vector<MsRunDataSetTreeNode *> &
190 {
191  return m_rootNodes;
192 }
193 
194 
195 void
197 {
198  // qDebug() << "Going to call node->accept(visitor) for each root node.";
199 
200  for(auto &&node : m_rootNodes)
201  {
202  // qDebug() << "Calling accept for root node:" << node;
203 
204  if(visitor.shouldStop())
205  break;
206 
207  node->accept(visitor);
208  }
209 }
210 
211 
212 void
215  std::vector<MsRunDataSetTreeNode *>::const_iterator nodes_begin_iterator,
216  std::vector<MsRunDataSetTreeNode *>::const_iterator nodes_end_iterator)
217 {
218  // qDebug() << "Visitor:" << &visitor << "The distance is between iterators
219  // is:"
220  //<< std::distance(nodes_begin_iterator, nodes_end_iterator);
221 
222  using Iterator = std::vector<MsRunDataSetTreeNode *>::const_iterator;
223 
224  Iterator iter = nodes_begin_iterator;
225 
226  // Inform the visitor of the number of nodes to work on.
227 
228  std::size_t node_count =
229  std::distance(nodes_begin_iterator, nodes_end_iterator);
230 
231  visitor.setNodesToProcessCount(node_count);
232 
233  while(iter != nodes_end_iterator)
234  {
235  // qDebug() << "Visitor:" << &visitor
236  //<< "The distance is between iterators is:"
237  //<< std::distance(nodes_begin_iterator, nodes_end_iterator);
238 
239  // qDebug() << "Node visited:" << (*iter)->toString();
240 
241  if(visitor.shouldStop())
242  break;
243 
244  (*iter)->accept(visitor);
245  ++iter;
246  }
247 }
248 
249 
252 {
253  // qDebug();
254 
255  for(auto &node : m_rootNodes)
256  {
257  // qDebug() << "In one node of the root nodes.";
258 
259  MsRunDataSetTreeNode *iterNode = node->findNode(mass_spectrum_csp);
260  if(iterNode != nullptr)
261  return iterNode;
262  }
263 
264  return nullptr;
265 }
266 
267 
269 MsRunDataSetTree::findNode(std::size_t spectrum_index) const
270 {
271  // qDebug();
272 
273  for(auto &node : m_rootNodes)
274  {
275  // qDebug() << "In one node of the root nodes.";
276 
277  MsRunDataSetTreeNode *iterNode = node->findNode(spectrum_index);
278  if(iterNode != nullptr)
279  return iterNode;
280  }
281 
282  return nullptr;
283 }
284 
285 
286 std::vector<MsRunDataSetTreeNode *>
288 {
289  // We want to push back all the nodes of the tree in a flat vector of nodes.
290 
291  std::vector<MsRunDataSetTreeNode *> nodes;
292 
293  for(auto &&node : m_rootNodes)
294  {
295  // The node will store itself and all of its children.
296  node->flattenedView(nodes, true /* with_descendants */);
297  }
298 
299  return nodes;
300 }
301 
302 
303 std::vector<MsRunDataSetTreeNode *>
305  bool with_descendants)
306 {
307  std::vector<MsRunDataSetTreeNode *> nodes;
308 
309  // Logically, ms_level cannot be 0.
310 
311  if(!ms_level)
312  {
313  throw ExceptionNotPossible(
314  "msrundatasettree.cpp -- ERROR the MS level cannot be 0.");
315 
316  return nodes;
317  }
318 
319  // The depth of the tree at which we are right at this point is 0, we have not
320  // gone into the children yet.
321 
322  std::size_t depth = 0;
323 
324  // If ms_level is 1, then that means that we want the nodes starting right at
325  // the root nodes with or without the descendants.
326 
327  // std::cout << __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__ << " () "
328  //<< "ms_level: " << ms_level << " depth: " << depth << std::endl;
329 
330  if(ms_level == 1)
331  {
332  for(auto &&node : m_rootNodes)
333  {
334  // std::cout << __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__
335  //<< " () "
336  //<< "Handling one of the root nodes at ms_level = 1."
337  //<< std::endl;
338 
339  node->flattenedView(nodes, with_descendants);
340  }
341 
342  return nodes;
343  }
344 
345  // At this point, we know that we want the descendants of the root nodes since
346  // we want ms_level > 1, so we need go to to the children of the root nodes.
347 
348  // Let depth to 0, because if we go to the children of the root nodes we will
349  // still be at depth 0, that is MS level 1.
350 
351  for(auto &node : m_rootNodes)
352  {
353  // std::cout
354  //<< __FILE__ << " @ " << __LINE__ << " " << __FUNCTION__ << " () "
355  //<< std::setprecision(15)
356  //<< "Requesting a flattened view of the root's child nodes with depth: "
357  //<< depth << std::endl;
358 
359  node->flattenedViewMsLevelNodes(ms_level, depth, nodes, with_descendants);
360  }
361 
362  return nodes;
363 }
364 
365 
368  std::size_t product_spectrum_index)
369 {
370 
371  // qDebug();
372 
373  // Find the node that holds the mass spectrum that was acquired as the
374  // precursor that when fragmented gave a spectrum at spectrum_index;
375 
376  // Get the node that contains the product_spectrum_index first.
377  MsRunDataSetTreeNode *node = nullptr;
378  node = findNode(product_spectrum_index);
379 
380  // Now get the node that contains the precursor_spectrum_index.
381 
382  return findNode(node->mcsp_massSpectrum->getPrecursorSpectrumIndex());
383 }
384 
385 
386 std::vector<MsRunDataSetTreeNode *>
388  std::size_t precursor_spectrum_index)
389 {
390  std::vector<MsRunDataSetTreeNode *> nodes;
391 
392  // First get the node of the precursor spectrum index.
393 
394  MsRunDataSetTreeNode *precursor_node = findNode(precursor_spectrum_index);
395 
396  if(precursor_node == nullptr)
397  return nodes;
398 
399  nodes.assign(precursor_node->m_children.begin(),
400  precursor_node->m_children.end());
401 
402  return nodes;
403 }
404 
405 
406 std::vector<MsRunDataSetTreeNode *>
408  PrecisionPtr precision_ptr)
409 {
410 
411  // Find all the precursor nodes holding a mass spectrum that contained a
412  // precursor mz-value.
413 
414  if(precision_ptr == nullptr)
415  throw ExceptionNotPossible(
416  "msrundatasettree.cpp -- ERROR precision_ptr cannot be nullptr.");
417 
418  std::vector<MsRunDataSetTreeNode *> product_nodes;
419 
420  // As a first step, find all the nodes that hold a mass spectrum that was
421  // acquired as a fragmentation spectrum of an ion of mz, that is, search all
422  // the product ion nodes for which precursor was mz.
423 
424  for(auto &&node : m_rootNodes)
425  {
426  node->productNodesByPrecursorMz(mz, precision_ptr, product_nodes);
427  }
428 
429  // Now, for each node found get the precursor node
430 
431  std::vector<MsRunDataSetTreeNode *> precursor_nodes;
432 
433  for(auto &&node : product_nodes)
434  {
435  precursor_nodes.push_back(
436  findNode(node->mcsp_massSpectrum->getPrecursorSpectrumIndex()));
437  }
438 
439  return precursor_nodes;
440 }
441 
442 
443 bool
445  MsRunDataSetTreeNode *node_p,
446  DataKind data_kind)
447 {
448  // qDebug();
449 
450  using NodeVector = std::vector<MsRunDataSetTreeNode *>;
451  using DoubleNodeVectorMap = std::map<double, NodeVector>;
452  using MapPair = std::pair<double, NodeVector>;
453  using MapIterator = DoubleNodeVectorMap::iterator;
454 
455  DoubleNodeVectorMap *map_p;
456 
457  if(data_kind == DataKind::rt)
458  {
459  map_p = &m_rtDoubleNodeVectorMap;
460  }
461  else if(data_kind == DataKind::dt)
462  {
463  map_p = &m_dtDoubleNodeVectorMap;
464  }
465  else
466  qFatal("Programming error.");
467 
468  // There are two possibilities:
469  //
470  // 1. The time was never encountered yet. We won't find it. We need to
471  // allocate a vector of Node's and set it associated to time in the map.
472  //
473  // 2. The time was encountered already, we will find it in the maps, we'll
474  // just push_back the Node in the vector of nodes.
475 
476  MapIterator found_iterator = map_p->find(time);
477 
478  if(found_iterator != map_p->end())
479  {
480  // The time value was encountered already.
481 
482  found_iterator->second.push_back(node_p);
483 
484  // qDebug() << "Found iterator for time:" << time;
485  }
486  else
487  {
488  // We need to create a new vector with the node.
489 
490  NodeVector node_vector = {node_p};
491 
492  map_p->insert(MapPair(time, node_vector));
493 
494  // qDebug() << "Inserted new time:node_vector pair.";
495  }
496 
497  return true;
498 }
499 
500 
503  QualifiedMassSpectrumCstSPtr mass_spectrum_csp,
504  MsRunDataSetTreeNode *parent_p)
505 {
506  // qDebug();
507 
508  // We want to add a mass spectrum. Either the parent_p argument is nullptr or
509  // not. If it is nullptr, then we just append the mass spectrum to the vector
510  // of root nodes. If it is not nullptr, we need to append the mass spectrum to
511  // that node.
512 
513  MsRunDataSetTreeNode *new_node_p =
514  new MsRunDataSetTreeNode(mass_spectrum_csp, parent_p);
515 
516  if(parent_p == nullptr)
517  {
518  m_rootNodes.push_back(new_node_p);
519 
520  // qDebug() << "Pushed back" << new_node << "to root nodes:" <<
521  // &m_rootNodes;
522  }
523  else
524  {
525  parent_p->m_children.push_back(new_node_p);
526 
527  // qDebug() << "Pushed back" << new_node << "with parent:" << parent_p;
528  }
529 
530  ++m_spectrumCount;
531 
532  // And now document that addition in the node index map.
533  m_indexNodeMap.insert(std::pair<std::size_t, MsRunDataSetTreeNode *>(
534  mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex(), new_node_p));
535 
536  // We also want to document the new node relating to the
537  // retention time.
538 
540  mass_spectrum_csp->getRtInMinutes(), new_node_p, DataKind::rt);
541 
542  // Likewise for the drift time.
543 
545  mass_spectrum_csp->getDtInMilliSeconds(), new_node_p, DataKind::dt);
546 
547  // qDebug() << "New index/node map:"
548  //<< mass_spectrum_csp->getMassSpectrumId().getSpectrumIndex() << "/"
549  //<< new_node;
550 
551  return new_node_p;
552 }
553 
554 
557  QualifiedMassSpectrumCstSPtr mass_spectrum_csp,
558  std::size_t precursor_spectrum_index)
559 {
560  // qDebug();
561 
562  // First get the node containing the mass spectrum that was acquired at index
563  // precursor_spectrum_index.
564 
565  // qDebug() << "Need to find the precursor's mass spectrum node for precursor
566  // "
567  //"spectrum index:"
568  //<< precursor_spectrum_index;
569 
570  MsRunDataSetTreeNode *mass_spec_data_node_p =
571  findNode(precursor_spectrum_index);
572 
573  // qDebug() << "Found node" << mass_spec_data_node_p
574  //<< "for precursor index:" << precursor_spectrum_index;
575 
576  if(mass_spec_data_node_p == nullptr)
577  {
578  throw ExceptionNotPossible(
579  "msrundatasettree.cpp -- ERROR could not find a a "
580  "tree node matching the index.");
581  }
582 
583  // qDebug() << "Calling addMassSpectrum with parent node:"
584  //<< mass_spec_data_node_p;
585 
586  return addMassSpectrum(mass_spectrum_csp, mass_spec_data_node_p);
587 }
588 
589 
590 std::size_t
592  double end,
593  NodeVector &nodes,
594  DataKind data_kind) const
595 {
596  using NodeVector = std::vector<MsRunDataSetTreeNode *>;
597  using DoubleNodeVectorMap = std::map<double, NodeVector>;
598  using MapIterator = DoubleNodeVectorMap::const_iterator;
599 
600  const DoubleNodeVectorMap *map_p;
601 
602  if(data_kind == DataKind::rt)
603  {
604  map_p = &m_rtDoubleNodeVectorMap;
605  }
606  else if(data_kind == DataKind::dt)
607  {
608  map_p = &m_dtDoubleNodeVectorMap;
609  }
610  else
611  qFatal("Programming error.");
612 
613  std::size_t added_nodes = 0;
614 
615  // Get the iterator to the map item that has the key greater or equal to
616  // start.
617 
618  MapIterator start_iterator = map_p->lower_bound(start);
619 
620  if(start_iterator == map_p->end())
621  return 0;
622 
623  // Now get the end of the map useful range of items.
624 
625  MapIterator end_iterator = map_p->upper_bound(end);
626 
627  // Now that we have the iterator range, iterate in it and get the mass spectra
628  // from each item's pair.second node vector.
629 
630  for(MapIterator iterator = start_iterator; iterator != end_iterator;
631  ++iterator)
632  {
633  // We are iterating in MapPair items.
634 
635  NodeVector node_vector = iterator->second;
636 
637  // All the nodes in the node vector need to be copied to the mass_spectra
638  // vector passed as parameter.
639 
640  for(auto &&node_p : node_vector)
641  {
642  nodes.push_back(node_p);
643 
644  ++added_nodes;
645  }
646  }
647 
648  return added_nodes;
649 }
650 
651 
652 std::size_t
654  double start, double end, NodeVector &nodes, DataKind data_kind) const
655 {
656  using NodeVector = std::vector<MsRunDataSetTreeNode *>;
657  using NodeVectorIterator = NodeVector::iterator;
658 
659  using DoubleNodeVectorMap = std::map<double, NodeVector>;
660  using MapIterator = DoubleNodeVectorMap::const_iterator;
661 
662  const DoubleNodeVectorMap *map_p;
663 
664  if(data_kind == DataKind::rt)
665  {
666  map_p = &m_rtDoubleNodeVectorMap;
667  }
668  else if(data_kind == DataKind::dt)
669  {
670  map_p = &m_dtDoubleNodeVectorMap;
671  }
672  else
673  qFatal("Programming error.");
674 
675  std::size_t removed_vector_items = 0;
676 
677  // We want to remove from the nodes vector all the nodes that contain a mass
678  // spectrum acquired at a time range outside of [ start-end ], that is, the
679  // time values [begin() - start [ and ]end -- end()[.
680 
681  // Get the iterator to the map item that has the key less to
682  // start (we want to keep the map item having key == start).
683 
684  MapIterator first_end_iterator = (*map_p).upper_bound(start);
685 
686  // Now that we have the first_end_iterator, we can iterate between [begin --
687  // first_end_iterator[
688 
689  for(MapIterator iterator = map_p->begin(); iterator != first_end_iterator;
690  ++iterator)
691  {
692  // Remove from the nodes vector the nodes.
693 
694  // We are iterating in MapPair items.
695 
696  NodeVector node_vector = iterator->second;
697 
698  // All the nodes in the node vector need to be removed from the
699  // mass_spectra vector passed as parameter if found.
700 
701  for(auto &&node_p : node_vector)
702  {
703  NodeVectorIterator iterator =
704  std::find(nodes.begin(), nodes.end(), node_p);
705 
706  if(iterator != nodes.end())
707  {
708  // We found the node: remove it.
709 
710  nodes.erase(iterator);
711 
712  ++removed_vector_items;
713  }
714  }
715  }
716 
717  // Now the second begin iterator, so that we can remove all the items
718  // contained in the second range, that is, ]end--end()[.
719 
720  // The second_first_iterator will point to the item having its time value less
721  // or equal to end. But we do not want to get items having their time equal to
722  // end, only < end. So, if the iterator is not begin(), we just need to
723  // decrement it once.
724  MapIterator second_first_iterator = map_p->upper_bound(end);
725  if(second_first_iterator != map_p->begin())
726  --second_first_iterator;
727 
728  for(MapIterator iterator = second_first_iterator; iterator != map_p->end();
729  ++iterator)
730  {
731  // We are iterating in MapPair items.
732 
733  NodeVector node_vector = iterator->second;
734 
735  // All the nodes in the node vector need to be removed from the
736  // mass_spectra vector passed as parameter if found.
737 
738  for(auto &&node_p : node_vector)
739  {
740  NodeVectorIterator iterator =
741  std::find(nodes.begin(), nodes.end(), node_p);
742 
743  if(iterator != nodes.end())
744  {
745  // We found the node: remove it.
746 
747  nodes.erase(iterator);
748 
749  ++removed_vector_items;
750  }
751  }
752  }
753 
754  return removed_vector_items;
755 }
756 
757 
758 std::size_t
760  double start,
761  double end,
762  QualMassSpectraVector &mass_spectra,
763  DataKind data_kind) const
764 {
765  // qDebug() << "With start:" << start << "and end:" << end;
766 
767  if(start == end)
768  qDebug() << "Special case, start and end are equal:" << start;
769 
770  // We will use the maps that relate rt | dt to a vector of data tree nodes.
771  // Indeed, we may have more than one mass spectrum acquired for a given rt, in
772  // case of ion mobility mass spectrometry. Same for dt: we will have as many
773  // spectra for each dt as there are retention time values...
774 
775  using DoubleNodeVectorMap = std::map<double, NodeVector>;
776  using MapIterator = DoubleNodeVectorMap::const_iterator;
777 
778  const DoubleNodeVectorMap *map_p;
779 
780  if(data_kind == DataKind::rt)
781  {
782  map_p = &m_rtDoubleNodeVectorMap;
783 
784  // qDebug() << "The RT map has size:" << map_p->size() << "start:" <<
785  // start
786  //<< "end:" << end;
787  }
788  else if(data_kind == DataKind::dt)
789  {
790  map_p = &m_dtDoubleNodeVectorMap;
791 
792  // qDebug() << "The DT map has size:" << map_p->size() << "start:" <<
793  // start
794  //<< "end:" << end;
795  }
796  else
797  qFatal("Programming error.");
798 
799  // qDebug() << "The rt |dt / mass spectra map has size:" << map_p->size()
800  //<< "The start:" << start << "the end:" << end;
801 
802  std::size_t added_mass_spectra = 0;
803 
804  // Get the iterator to the map item that has the key greater or equal to
805  // start.
806 
807  MapIterator start_iterator = map_p->lower_bound(start);
808 
809  if(start_iterator == map_p->end())
810  {
811  qDebug() << "The start iterator is end()!";
812  return 0;
813  }
814 
815  // qDebug() << "The start_iterator points to:" << start_iterator->first
816  //<< "as a rt|dt time.";
817 
818  // Now get the end of the map's useful range of items.
819 
820  // Returns an iterator pointing to the first element in the container whose
821  // key is considered to go after 'end'.
822 
823  MapIterator end_iterator = map_p->upper_bound(end);
824 
825  // Immediately very if there is no distance between start and end.
826  if(!std::distance(start_iterator, end_iterator))
827  {
828  qDebug() << "No range of mass spectra could be selected.";
829  return 0;
830  }
831 
832  if(end_iterator == map_p->end())
833  {
834  // qDebug() << "The end_iterator points to the end of the map."
835  //<< "The last map item is prev() at key value: "
836  //<< std::prev(end_iterator)->first;
837  }
838  else
839  {
840  // qDebug() << "The end_iterator points to:" << end_iterator->first
841  //<< "as a rt|dt time and the accounted key value is actually"
842  //<< std::prev(end_iterator)->first;
843  }
844 
845  // qDebug() << "The number of time values to iterate through:"
846  //<< std::distance(start_iterator, end_iterator)
847  //<< "with values: start: " << start_iterator->first
848  //<< "and end: " << std::prev(end_iterator)->first;
849 
850  // Now that we have the iterator range, iterate in it and get the mass
851  // spectra from each item's pair.second node vector.
852 
853  for(MapIterator iterator = start_iterator; iterator != end_iterator;
854  ++iterator)
855  {
856  // We are iterating in MapPair items.
857 
858  NodeVector node_vector = iterator->second;
859 
860  // All the nodes' mass spectra in the node vector need to be copied to
861  // the mass_spectra vector passed as parameter.
862 
863  for(auto &&node_p : node_vector)
864  {
865  QualifiedMassSpectrumCstSPtr qualified_mass_spectrum_csp =
866  node_p->getQualifiedMassSpectrum();
867 
868 #if 0
869  // Sanity check only for deep debugging.
870 
871  if(qualified_mass_spectrum_csp == nullptr ||
872  qualified_mass_spectrum_csp.get() == nullptr)
873  {
874  throw ExceptionNotPossible(
875  "The QualifiedMassSpectrumCstSPtr cannot be nullptr.");
876  }
877  else
878  {
879  //qDebug() << "Current mass spectrum is valid with rt:"
880  //<< qualified_mass_spectrum_csp->getRtInMinutes();
881  }
882 #endif
883 
884  mass_spectra.push_back(qualified_mass_spectrum_csp);
885 
886  ++added_mass_spectra;
887  }
888  }
889 
890  // qDebug() << "Returning added_mass_spectra:" << added_mass_spectra;
891 
892  return added_mass_spectra;
893 }
894 
895 
896 std::size_t
898  double start,
899  double end,
900  QualMassSpectraVector &mass_spectra,
901  DataKind data_kind) const
902 {
903  using QualMassSpectraVectorIterator = QualMassSpectraVector::iterator;
904 
905  using DoubleNodeVectorMap = std::map<double, NodeVector>;
906  using MapIterator = DoubleNodeVectorMap::const_iterator;
907 
908  const DoubleNodeVectorMap *map_p;
909 
910  if(data_kind == DataKind::rt)
911  {
912  map_p = &m_rtDoubleNodeVectorMap;
913 
914  // qDebug() << "The RT map has size:" << map_p->size() << "start:" <<
915  // start
916  //<< "end:" << end;
917  }
918  else if(data_kind == DataKind::dt)
919  {
920  map_p = &m_dtDoubleNodeVectorMap;
921 
922  // qDebug() << "The DT map has size:" << map_p->size() << "start:" <<
923  // start
924  //<< "end:" << end;
925  }
926  else
927  qFatal("Programming error.");
928 
929  std::size_t removed_vector_items = 0;
930 
931  // We want to remove from the nodes vector all the nodes that contain a mass
932  // spectrum acquired at a time range outside of [ start-end ], that is, the
933  // time values [begin() - start [ and ]end -- end()[.
934 
935  // Looking for an iterator that points to an item having a time < start.
936 
937  // lower_bound returns an iterator pointing to the first element in the
938  // range [first, last) that is not less than (i.e. greater or equal to)
939  // value, or last if no such element is found.
940 
941  MapIterator first_end_iterator = (*map_p).lower_bound(start);
942 
943  // first_end_iterator points to the item that has the next time value with
944  // respect to start. This is fine because we'll not remove that point
945  // because the for loop below will stop one item short of
946  // first_end_iterator. That means that we effectively remove all the items
947  // [begin() -> start[ (start not include). Exactly what we want.
948 
949  // qDebug() << "lower_bound for start:" << first_end_iterator->first;
950 
951  // Now that we have the first_end_iterator, we can iterate between [begin --
952  // first_end_iterator[
953 
954  for(MapIterator iterator = map_p->begin(); iterator != first_end_iterator;
955  ++iterator)
956  {
957  // Remove from the nodes vector the nodes.
958 
959  // We are iterating in MapPair items.
960 
961  NodeVector node_vector = iterator->second;
962 
963  // All the nodes in the node vector need to be removed from the
964  // mass_spectra vector passed as parameter if found.
965 
966  for(auto &&node_p : node_vector)
967  {
968  QualMassSpectraVectorIterator iterator =
969  std::find(mass_spectra.begin(),
970  mass_spectra.end(),
971  node_p->getQualifiedMassSpectrum());
972 
973  if(iterator != mass_spectra.end())
974  {
975  // We found the mass spectrum: remove it.
976 
977  mass_spectra.erase(iterator);
978 
979  ++removed_vector_items;
980  }
981  }
982  }
983 
984  // Now the second begin iterator, so that we can remove all the items
985  // contained in the second range, that is, ]end--end()[.
986 
987  // The second_first_iterator will point to the item having its time value
988  // less or equal to end. But we do not want to get items having their time
989  // equal to end, only < end. So, if the iterator is not begin(), we just
990  // need to decrement it once.
991 
992  MapIterator second_first_iterator = map_p->upper_bound(end);
993 
994  // second_first_iterator now points to the item after the one having time
995  // end. Which is exactly what we want: we want to remove ]end--end()[ and
996  // this is exactly what the loop starting a the point after end below.
997 
998  // qDebug() << "second_first_iterator for end:" <<
999  // second_first_iterator->first;
1000 
1001  for(MapIterator iterator = second_first_iterator; iterator != map_p->end();
1002  ++iterator)
1003  {
1004  // We are iterating in MapPair items.
1005 
1006  NodeVector node_vector = iterator->second;
1007 
1008  // All the nodes in the node vector need to be removed from the
1009  // mass_spectra vector passed as parameter if found.
1010 
1011  for(auto &&node_p : node_vector)
1012  {
1013  QualMassSpectraVectorIterator iterator =
1014  std::find(mass_spectra.begin(),
1015  mass_spectra.end(),
1016  node_p->getQualifiedMassSpectrum());
1017 
1018  if(iterator != mass_spectra.end())
1019  {
1020  // We found the node: remove it.
1021 
1022  mass_spectra.erase(iterator);
1023 
1024  ++removed_vector_items;
1025  }
1026  }
1027  }
1028 
1029  return removed_vector_items;
1030 }
1031 
1032 
1033 std::size_t
1035 {
1036  // We want to know what is the depth of the tree, that is the highest level
1037  // of MSn, that is, n.
1038 
1039  if(!m_rootNodes.size())
1040  return 0;
1041 
1042  // qDebug() << "There are" << m_rootNodes.size() << "root nodes";
1043 
1044  // By essence, we are at MS0: only if we have at least one root node do we
1045  // know we have MS1 data. So we already know that we have at least one
1046  // child, so start with depth 1.
1047 
1048  std::size_t depth = 1;
1049  std::size_t tmp_depth = 0;
1050  std::size_t greatest_depth = 0;
1051 
1052  for(auto &node : m_rootNodes)
1053  {
1054  tmp_depth = node->depth(depth);
1055 
1056  // qDebug() << "Returned depth:" << tmp_depth;
1057 
1058  if(tmp_depth > greatest_depth)
1059  greatest_depth = tmp_depth;
1060  }
1061 
1062  return greatest_depth;
1063 }
1064 
1065 
1066 std::size_t
1068 {
1069 
1070  std::size_t cumulative_node_count = 0;
1071 
1072  for(auto &node : m_rootNodes)
1073  {
1074  node->size(cumulative_node_count);
1075 
1076  // qDebug() << "Returned node_count:" << node_count;
1077  }
1078 
1079  return cumulative_node_count;
1080 }
1081 
1082 
1083 std::size_t
1085 {
1086  return m_indexNodeMap.size();
1087 }
1088 
1089 
1090 std::size_t
1092 {
1093  return m_spectrumCount;
1094 }
1095 
1096 
1097 } // namespace pappso
virtual void setNodesToProcessCount(std::size_t)=0
QualifiedMassSpectrumCstSPtr mcsp_massSpectrum
MsRunDataSetTreeNode * findNode(std::size_t spectrum_index)
std::vector< MsRunDataSetTreeNode * > m_children
MsRunDataSetTreeNode * findNode(QualifiedMassSpectrumCstSPtr mass_spectrum_csp) const
const std::vector< MsRunDataSetTreeNode * > & getRootNodes() const
std::vector< QualifiedMassSpectrumCstSPtr > QualMassSpectraVector
std::vector< MsRunDataSetTreeNode * > flattenedViewMsLevel(std::size_t ms_level, bool with_descendants=false)
std::size_t indexNodeMapSize() const
void accept(MsRunDataSetTreeNodeVisitorInterface &visitor)
MsRunDataSetTree(MsRunIdCstSPtr ms_run_id_csp)
bool documentNodeInDtRtMap(double time, MsRunDataSetTreeNode *node_p, DataKind data_kind)
std::vector< MsRunDataSetTreeNode * > flattenedView()
std::size_t getSpectrumCount() const
std::map< std::size_t, MsRunDataSetTreeNode * > m_indexNodeMap
std::vector< MsRunDataSetTreeNode * > m_rootNodes
std::size_t addDataSetTreeNodesInsideDtRtRange(double start, double end, NodeVector &nodes, DataKind data_kind) const
std::map< double, NodeVector > DoubleNodeVectorMap
std::size_t removeDataSetTreeNodesOutsideDtRtRange(double start, double end, NodeVector &nodes, DataKind data_kind) const
std::vector< MsRunDataSetTreeNode * > precursorNodesByPrecursorMz(pappso_double mz, PrecisionPtr precision_ptr)
std::vector< MsRunDataSetTreeNode * > NodeVector
std::size_t addDataSetQualMassSpectraInsideDtRtRange(double start, double end, QualMassSpectraVector &mass_spectra, DataKind data_kind) const
MsRunDataSetTreeNode * precursorNodeByProductSpectrumIndex(std::size_t product_spectrum_index)
const std::map< std::size_t, MsRunDataSetTreeNode * > & getIndexNodeMap() const
MsRunDataSetTreeNode * addMassSpectrum(QualifiedMassSpectrumCstSPtr mass_spectrum)
std::size_t removeDataSetQualMassSpectraOutsideDtRtRange(double start, double end, QualMassSpectraVector &mass_spectra, DataKind data_kind) const
std::size_t massSpectrumIndex(const MsRunDataSetTreeNode *node) const
DoubleNodeVectorMap m_rtDoubleNodeVectorMap
std::vector< MsRunDataSetTreeNode * > productNodesByPrecursorSpectrumIndex(std::size_t precursor_spectrum_index)
DoubleNodeVectorMap m_dtDoubleNodeVectorMap
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:44
double pappso_double
A type definition for doubles.
Definition: types.h:49
std::shared_ptr< const QualifiedMassSpectrum > QualifiedMassSpectrumCstSPtr
DataKind
Definition: types.h:172
@ dt
Drift time.
@ rt
Retention time.