libpappsomspp
Library for mass spectrometry
msrunreader.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/msrun/msrunreader.cpp
3  * \date 29/05/2018
4  * \author Olivier Langella
5  * \brief base interface to read MSrun files
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  ******************************************************************************/
27 
28 #include <QDebug>
29 #include <QObject>
30 
31 #include "msrunreader.h"
32 #include "../../pappsomspp/exception/exceptionnotfound.h"
33 
34 
36  qRegisterMetaType<pappso::MsRunReaderSPtr>("pappso::MsRunReaderSPtr");
37 
38 
39 namespace pappso
40 {
41 
42 
43 bool
45 {
46  return false;
47 }
48 void
50 {
51 }
52 void
54  [[maybe_unused]] std::size_t size)
55 {
56 }
57 void
59 {
60  m_isReadAhead = is_read_ahead;
61 }
62 
63 bool
65 {
66  return m_isReadAhead;
67 }
68 
69 bool
71  unsigned int ms_level) const
72 {
73  if(needPeakList() == true)
74  {
75  if(ms_level < m_needPeakListByMsLevel.size())
76  {
77  return m_needPeakListByMsLevel[ms_level];
78  }
79  else
80  return true;
81  }
82  else
83  {
84  return false;
85  }
86 }
87 void
89  unsigned int ms_level, bool want_peak_list)
90 {
91  if(ms_level < m_needPeakListByMsLevel.size())
92  {
93  m_needPeakListByMsLevel[ms_level] = want_peak_list;
94  }
95 }
96 
97 bool
99 {
100  return false;
101 }
102 
103 
104 void
106 {
107  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
108  //<< "The data loading process ended.";
109 }
110 
111 
112 void
114  const QualifiedMassSpectrum &qspectrum)
115 {
116  // The vector[0] contains the number of spectra at MS
117  // The vector[1] contains the number of spectra at MS^2
118  // The vector[2] contains the number of spectra at MS^3
119  // ...
120 
121  unsigned int ms_level = qspectrum.getMsLevel();
122  if(ms_level == 0)
123  return;
124  if(ms_level > m_countMsLevelSpectrum.size())
125  {
126  m_countMsLevelSpectrum.resize(ms_level);
127  }
128  m_countMsLevelSpectrum[ms_level - 1]++;
129 }
130 
131 
132 unsigned long
133 MsRunSimpleStatistics::getMsLevelCount(unsigned int ms_level) const
134 {
135  if(ms_level == 0)
136  return 0;
137  if(ms_level > m_countMsLevelSpectrum.size())
138  return 0;
139  return (m_countMsLevelSpectrum[ms_level - 1]);
140 }
141 
142 
143 unsigned long
145 {
146  unsigned long total = 0;
147  for(unsigned long count : m_countMsLevelSpectrum)
148  {
149  total += count;
150  }
151  return total;
152 }
153 
154 
156 {
157  // qDebug();
158 }
159 
160 
162 {
163  // qDebug();
164 }
165 
166 
167 bool
169 {
170  return false;
171 }
172 
173 void
175  const QualifiedMassSpectrum &qspectrum)
176 {
177  qDebug() << " " << qspectrum.getMassSpectrumId().getNativeId();
178 
179  QStringList native_id_list =
180  qspectrum.getMassSpectrumId().getNativeId().split("=");
181  if(native_id_list.size() < 2)
182  {
183  return;
184  }
185  else
186  {
187  std::size_t scan_number = native_id_list.back().toULong();
188  m_mmap_scan2index.insert(std::pair<std::size_t, std::size_t>(
189  scan_number, qspectrum.getMassSpectrumId().getSpectrumIndex()));
190 
191  qDebug() << "scan number " << scan_number << "=>"
192  << qspectrum.getMassSpectrumId().getSpectrumIndex();
193  }
194 }
195 
196 std::size_t
198  std::size_t scan_number) const
199 {
200 
201  qDebug() << m_mmap_scan2index.size();
202 
203  auto it = m_mmap_scan2index.find(scan_number);
204 
205  if(it == m_mmap_scan2index.end())
206  {
207  throw ExceptionNotFound(
208  QObject::tr("scan number %1 not found").arg(scan_number));
209  }
210 
211  std::size_t index = it->second;
212 
213  it++;
214  if((it != m_mmap_scan2index.end()) && (it->first == scan_number))
215  {
216  throw PappsoException(
217  QObject::tr("scan number %1 found multiple times").arg(scan_number));
218  }
219  return index;
220 }
221 
222 
224 {
225  // qDebug();
226 }
227 
228 
230 {
231  // qDebug();
232 }
233 
234 
235 bool
237 {
238  return false;
239 }
240 
241 
242 void
244  const QualifiedMassSpectrum &qspectrum)
245 {
246  qDebug() << " " << qspectrum.getMassSpectrumId().getNativeId();
247 
248  m_retention_time_list.push_back(qspectrum.getRtInSeconds());
249 }
250 
251 const std::vector<double> &
253 {
254  return m_retention_time_list;
255 }
256 
257 
258 MsRunReader::MsRunReader(MsRunIdCstSPtr &ms_run_id) : mcsp_msRunId(ms_run_id)
259 {
260 }
261 
263  : mcsp_msRunId(other.mcsp_msRunId)
264 {
265  mpa_multiMapScanNumber = nullptr;
267 }
268 
269 
270 const MsRunIdCstSPtr &
272 {
273  return mcsp_msRunId;
274 }
275 
276 
278 {
279  if(mpa_multiMapScanNumber == nullptr)
280  delete mpa_multiMapScanNumber;
281 }
282 
283 void
284 MsRunReader::setMonoThread(bool is_mono_thread)
285 {
286  m_isMonoThread = is_mono_thread;
287 }
288 
289 bool
291 {
292  return m_isMonoThread;
293 }
294 
295 
296 std::size_t
298 {
299  qDebug() << " " << mpa_multiMapScanNumber;
300 
301  if(mpa_multiMapScanNumber == nullptr)
302  {
305  }
306  try
307  {
309  scan_number);
310  }
311 
312  catch(ExceptionNotFound &error)
313  {
314  throw ExceptionNotFound(QObject::tr("error reading file %1 : %2")
315  .arg(mcsp_msRunId.get()->getFileName())
316  .arg(error.qwhat()));
317  }
318  catch(PappsoException &error)
319  {
320  throw PappsoException(QObject::tr("error reading file %1 : %2")
321  .arg(mcsp_msRunId.get()->getFileName())
322  .arg(error.qwhat()));
323  }
324 }
325 
326 
327 bool
329 {
330  return false;
331 }
332 
333 std::vector<double>
335 {
336  qDebug();
337 
338  try
339  {
340 
341  MsRunReaderRetentionTimeLine reader_timeline;
342 
343  readSpectrumCollectionByMsLevel(reader_timeline, 1);
344 
345  return reader_timeline.getRetentionTimeLine();
346  }
347 
348  catch(ExceptionNotFound &error)
349  {
350  throw ExceptionNotFound(QObject::tr("error reading file %1 : %2")
351  .arg(mcsp_msRunId.get()->getFileName())
352  .arg(error.qwhat()));
353  }
354  catch(PappsoException &error)
355  {
356  throw PappsoException(QObject::tr("error reading file %1 : %2")
357  .arg(mcsp_msRunId.get()->getFileName())
358  .arg(error.qwhat()));
359  }
360 }
361 
362 
363 Trace
365 {
366  qDebug();
367 
368  try
369  {
370  MsRunReaderTicChromatogram ms_run_reader;
371 
372  readSpectrumCollection(ms_run_reader);
373 
374  return ms_run_reader.getTicChromatogram();
375  }
376 
377  catch(ExceptionNotFound &error)
378  {
379  throw ExceptionNotFound(QObject::tr("error reading file %1 : %2")
380  .arg(mcsp_msRunId.get()->getFileName())
381  .arg(error.qwhat()));
382  }
383  catch(PappsoException &error)
384  {
385  throw PappsoException(QObject::tr("error reading file %1 : %2")
386  .arg(mcsp_msRunId.get()->getFileName())
387  .arg(error.qwhat()));
388  }
389 }
390 
391 
393 {
394 }
395 
396 
398 {
399 }
400 
401 
402 bool
404 {
405  return true;
406 }
407 
408 
409 void
411  const QualifiedMassSpectrum &qualified_mass_spectrum)
412 {
413  // In this specialized reader we want to compute the total ion current
414  // chromatogram that plot the sum of all the ion intensities in the spectra as
415  // a function of the retention time.
416 
417  uint spectrum_ms_level = qualified_mass_spectrum.getMsLevel();
418 
419  if(spectrum_ms_level != 1)
420  return;
421 
422  double sumY = qualified_mass_spectrum.getMassSpectrumSPtr()->sumY();
423 
424  if(!sumY)
425  return;
426 
427  double rt = qualified_mass_spectrum.getRtInMinutes();
428 
429  using Pair = std::pair<double, double>;
430  using Map = std::map<double, double>;
431  using Iterator = Map::iterator;
432 
433  std::pair<Iterator, bool> res = m_ticChromMapTrace.insert(Pair(rt, sumY));
434 
435  if(!res.second)
436  {
437  // One other same rt value was seen already (like in ion mobility mass
438  // spectrometry, for example). Only increment the y value.
439 
440  res.first->second += sumY;
441  }
442 }
443 
444 
445 Trace
447 {
448  return m_ticChromMapTrace.toTrace();
449 }
450 
451 
452 } // namespace pappso
Trace toTrace() const
Definition: maptrace.cpp:219
std::size_t getSpectrumIndex() const
const QString & getNativeId() const
collect retention times along MS run
Definition: msrunreader.h:151
const std::vector< double > & getRetentionTimeLine() const
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
std::vector< double > m_retention_time_list
Definition: msrunreader.h:153
provides a multimap to find quickly spectrum index from scan number
Definition: msrunreader.h:133
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
std::size_t getSpectrumIndexFromScanNumber(std::size_t scan_number) const
std::multimap< std::size_t, std::size_t > m_mmap_scan2index
Definition: msrunreader.h:135
calculate a TIC chromatogram
Definition: msrunreader.h:169
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &qualified_mass_spectrum) override
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition: msrunreader.h:192
MsRunIdCstSPtr mcsp_msRunId
Definition: msrunreader.h:301
MsRunReaderScanNumberMultiMap * mpa_multiMapScanNumber
Definition: msrunreader.h:302
virtual bool hasScanNumbers() const
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
void setMonoThread(bool is_mono_thread)
set only one is_mono_thread to true
virtual std::vector< double > getRetentionTimeLine()
retention timeline get retention times along the MSrun in seconds
virtual std::size_t scanNumber2SpectrumIndex(std::size_t scan_number)
if possible, converts a scan number into a spectrum index This is a convenient function to help trans...
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler)=0
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
bool isMonoThread() const
virtual Trace getTicChromatogram()
get a TIC chromatogram
MsRunReader(MsRunIdCstSPtr &ms_run_id)
virtual void readSpectrumCollectionByMsLevel(SpectrumCollectionHandlerInterface &handler, unsigned int ms_level)=0
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler by Ms Levels
const MsRunIdCstSPtr & getMsRunId() const
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
unsigned long getTotalCount() const
virtual void loadingEnded() override
std::vector< unsigned long > m_countMsLevelSpectrum
Definition: msrunreader.h:116
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
Definition: msrunreader.cpp:98
unsigned long getMsLevelCount(unsigned int ms_level) const
virtual const QString & qwhat() const
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
pappso_double getRtInMinutes() const
Get the retention time in minutes.
const MassSpectrumId & getMassSpectrumId() const
Get the MassSpectrumId.
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
virtual bool isReadAhead() const
tells if we want to read ahead spectrum
Definition: msrunreader.cpp:64
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
virtual bool needMsLevelPeakList(unsigned int ms_level) const final
tells if we need the peak list (if we want the binary data) for each spectrum, given an MS level
Definition: msrunreader.cpp:70
virtual void setReadAhead(bool is_read_ahead) final
use threads to read a spectrum by batch of batch_size
Definition: msrunreader.cpp:58
virtual void setNeedMsLevelPeakList(unsigned int ms_level, bool want_peak_list) final
tells if we need the peak list given
Definition: msrunreader.cpp:88
virtual void spectrumListHasSize(std::size_t size)
Definition: msrunreader.cpp:53
A simple container of DataPoint instances.
Definition: trace.h:148
int msRunReaderSPtrMetaTypeId
Definition: msrunreader.cpp:35
base interface to read MSrun files
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:44
@ rt
Retention time.
unsigned int uint
Definition: types.h:56