libpappsomspp
Library for mass spectrometry
pwizmsrunreader.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/msrun/private/pwizmsrunreader.cpp
3  * \date 29/05/2018
4  * \author Olivier Langella
5  * \brief MSrun file reader base on proteowizard library
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 
32 #include <QDebug>
33 
34 #include "pwizmsrunreader.h"
35 
36 #include <pwiz/data/msdata/DefaultReaderList.hpp>
37 
38 
39 #include "../../utils.h"
40 #include "../../pappsoexception.h"
41 #include "../../exception/exceptionnotfound.h"
42 #include "../../exception/exceptionnotpossible.h"
43 
44 
45 // int pwizMsRunReaderMetaTypeId =
46 // qRegisterMetaType<pappso::PwizMsRunReader>("pappso::PwizMsRunReader");
47 
48 
49 namespace pappso
50 {
51 
52 
54  : MsRunReader(msrun_id_csp)
55 {
56  // The initialization needs to be done immediately so that we get the pwiz
57  // MsDataPtr corresponding to the right ms_run_id in the parameter. That
58  // pointer will be set to msp_msData.
59 
60  initialize();
61 }
62 
63 
64 void
66 {
67  std::string file_name_std =
69 
70  // Make a backup of the current locale
71  std::string env_backup = setlocale(LC_ALL, "");
72  // struct lconv *lc = localeconv();
73 
74  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
75  //<< "env_backup=" << env_backup.c_str() << "lc->decimal_point"
76  //<< lc->decimal_point;
77 
78  // Now actually search the useful MSDataPtr to the member variable.
79 
80  pwiz::msdata::DefaultReaderList defaultReaderList;
81 
82  std::vector<pwiz::msdata::MSDataPtr> msDataPtrVector;
83 
84  try
85  {
86  defaultReaderList.read(file_name_std, msDataPtrVector);
87  }
88  catch(std::exception &error)
89  {
90  qDebug() << QString("Failed to read the data from file %1")
91  .arg(QString::fromStdString(file_name_std));
92 
93  throw(PappsoException(
94  QString("Error reading file %1 in PwizMsRunReader, for msrun %2:\n%3")
95  .arg(mcsp_msRunId->getFileName())
96  .arg(mcsp_msRunId.get()->toString())
97  .arg(error.what())));
98  }
99 
100  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
101  //<< "The number of runs is:" << msDataPtrVector.size()
102  //<< "The number of spectra in first run is:"
103  //<< msDataPtrVector.at(0)->run.spectrumListPtr->size();
104 
105  // Single-run file handling here.
106 
107  // Specific case of the MGF data format: we do not have a run id for that kind
108  // of data. In this case there must be a single run!
109 
110  if(mcsp_msRunId->getRunId().isEmpty())
111  {
112  if(msDataPtrVector.size() != 1)
113  throw(
114  ExceptionNotPossible("For the kind of file at hand there can only be "
115  "one run in the file."));
116 
117  // At this point we know the single msDataPtr is the one we are looking
118  // for.
119 
120  msp_msData = msDataPtrVector.front();
121  }
122  else
123  {
124  // Multi-run file handling here.
125  for(auto &msDataPtr : msDataPtrVector)
126  {
127  if(msDataPtr->run.id == mcsp_msRunId->getRunId().toStdString())
128  {
129  msp_msData = msDataPtr;
130 
131  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
132  //<< "Found the right MSDataPtr for run id.";
133 
134  break;
135  }
136  }
137  }
138 
139  if(msp_msData == nullptr)
140  {
141  throw(ExceptionNotPossible(
142  QString("Could not find a MSDataPtr matching the requested run id : %1")
143  .arg(mcsp_msRunId.get()->toString())));
144  }
145 
146 
147  // check if this MS run can be used with scan numbers
148  // MS:1000490 Agilent instrument model
149  pwiz::cv::CVID native_id_format =
150  pwiz::msdata::id::getDefaultNativeIDFormat(*msp_msData.get());
151 
152  // msp_msData.get()->getDefaultNativeIDFormat();
153 
154  if(native_id_format == pwiz::cv::CVID::MS_Thermo_nativeID_format)
155  {
156  m_hasScanNumbers = true;
157  }
158  else
159  {
160  m_hasScanNumbers = false;
161  }
162 
163  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::mzXML)
164  {
165  m_hasScanNumbers = true;
166  }
167 }
168 
169 
171 {
172 }
173 
174 
175 pwiz::msdata::SpectrumPtr
176 PwizMsRunReader::getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list,
177  std::size_t spectrum_index,
178  bool want_binary_data) const
179 {
180  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp;
181 
182  try
183  {
184  native_pwiz_spectrum_sp =
185  p_spectrum_list->spectrum(spectrum_index, want_binary_data);
186  }
187  catch(std::runtime_error &error)
188  {
189  qDebug() << "getPwizSpectrumPtr error " << error.what() << " "
190  << typeid(error).name();
191 
192  throw ExceptionNotFound(QObject::tr("Pwiz spectrum index %1 not found in "
193  "MS file std::runtime_error :\n%2")
194  .arg(spectrum_index)
195  .arg(error.what()));
196  }
197  catch(std::exception &error)
198  {
199  qDebug() << "getPwizSpectrumPtr error " << error.what()
200  << typeid(error).name();
201 
202  throw ExceptionNotFound(
203  QObject::tr("Pwiz spectrum index %1 not found in MS file :\n%2")
204  .arg(spectrum_index)
205  .arg(error.what()));
206  }
207 
208  if(native_pwiz_spectrum_sp.get() == nullptr)
209  {
210  throw ExceptionNotFound(
211  QObject::tr(
212  "Pwiz spectrum index %1 not found in MS file : null pointer")
213  .arg(spectrum_index));
214  }
215 
216  return native_pwiz_spectrum_sp;
217 }
218 
219 
220 bool
222  pwiz::msdata::Spectrum *spectrum_p,
223  QualifiedMassSpectrum &qualified_mass_spectrum) const
224 {
225 
226  // We now have to set the retention time at which this mass spectrum
227  // was acquired. This is the scan start time.
228 
229  if(!spectrum_p->scanList.scans[0].hasCVParam(
230  pwiz::msdata::MS_scan_start_time))
231  {
232  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
233  { // MGF could not have scan start time
234  qualified_mass_spectrum.setRtInSeconds(-1);
235  }
236  else
237  {
238  throw(ExceptionNotPossible(
239  "The spectrum has no scan start time value set."));
240  }
241  }
242  else
243  {
244  pwiz::data::CVParam retention_time_cv_param =
245  spectrum_p->scanList.scans[0].cvParam(pwiz::msdata::MS_scan_start_time);
246 
247  // Try to get the units of the retention time value.
248 
249  std::string unit_name = retention_time_cv_param.unitsName();
250  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
251  //<< "Unit name for the retention time:"
252  //<< QString::fromStdString(unit_name);
253 
254  if(unit_name == "second")
255  {
256  qualified_mass_spectrum.setRtInSeconds(
257  retention_time_cv_param.valueAs<double>());
258  }
259  else if(unit_name == "minute")
260  {
261  qualified_mass_spectrum.setRtInSeconds(
262  retention_time_cv_param.valueAs<double>() * 60);
263  }
264  else
265  throw(
266  ExceptionNotPossible("Could not determine the unit for the "
267  "scan start time value."));
268  }
269 
270  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
271  //<< "Retention time for spectrum is:"
272  //<< qualified_mass_spectrum.getRtInSeconds();
273 
274  // Old version not checking unit (by default unit is minutes for RT,
275  // not seconds)
276  //
277  // pappso_double retentionTime =
278  // QString(spectrum_p->scanList.scans[0]
279  //.cvParam(pwiz::msdata::MS_scan_start_time)
280  //.value.c_str())
281  //.toDouble();
282  // qualified_mass_spectrum.setRtInSeconds(retentionTime);
283 
284  return true;
285 }
286 
287 
288 bool
290  pwiz::msdata::Spectrum *spectrum_p,
291  QualifiedMassSpectrum &qualified_mass_spectrum) const
292 {
293  // Not all the acquisitions have ion mobility data. We need to test
294  // that:
295 
296  if(spectrum_p->scanList.scans[0].hasCVParam(
297  pwiz::msdata::MS_ion_mobility_drift_time))
298  {
299 
300  // qDebug() << "as strings:"
301  //<< QString::fromStdString(
302  // spectrum_p->scanList.scans[0]
303  //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
304  //.valueAs<std::string>());
305 
306  pappso_double driftTime =
307  spectrum_p->scanList.scans[0]
308  .cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
309  .valueAs<double>();
310 
311  // qDebug() << "driftTime:" << driftTime;
312 
313  // Old version requiring use of QString.
314  // pappso_double driftTime =
315  // QString(spectrum_p->scanList.scans[0]
316  //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
317  //.value.c_str())
318  //.toDouble();
319 
320  // Now make positively sure that the obtained value is correct.
321  // Note that I suffered a lot with Waters Synapt data that
322  // contained apparently correct drift time XML element that in
323  // fact contained either NaN or inf. When such mass spectra were
324  // encountered, the mz,i data were bogus and crashed the data
325  // loading functions. We just want to skip this kind of bogus mass
326  // spectrum by letting the caller know that the drift time was
327  // bogus ("I" is Filippo Rusconi).
328 
329  if(std::isnan(driftTime) || std::isinf(driftTime))
330  {
331  // qDebug() << "detected as nan or inf.";
332 
333  return false;
334  }
335  else
336  {
337  // The mzML standard stipulates that drift times are in
338  // milliseconds.
339  qualified_mass_spectrum.setDtInMilliSeconds(driftTime);
340  }
341  }
342  // End of
343  // if(spectrum_p->scanList.scans[0].hasCVParam(
344  // pwiz::msdata::MS_ion_mobility_drift_time))
345  else
346  {
347  // Not a bogus mass spectrum but also not a drift spectrum, set -1
348  // as the drift time value.
349  qualified_mass_spectrum.setDtInMilliSeconds(-1);
350  }
351 
352  return true;
353 }
354 
355 
358  const MassSpectrumId &massSpectrumId,
359  pwiz::msdata::Spectrum *spectrum_p,
360  bool want_binary_data,
361  bool &ok) const
362 {
363  // qDebug();
364 
365  std::string env;
366  env = setlocale(LC_ALL, "");
367  setlocale(LC_ALL, "C");
368 
369  QualifiedMassSpectrum qualified_mass_spectrum(massSpectrumId);
370 
371  try
372  {
373 
374  // We want to store the ms level for this spectrum
375 
376  int msLevel =
377  (spectrum_p->cvParam(pwiz::msdata::MS_ms_level).valueAs<int>());
378 
379  qualified_mass_spectrum.setMsLevel(msLevel);
380 
381  // We want to know if this spectrum is a fragmentation spectrum obtained
382  // from a selected precursor ion.
383 
384  std::size_t precursor_list_size = spectrum_p->precursors.size();
385 
386  // qDebug() << "For spectrum at index:" <<
387  // massSpectrumId.getSpectrumIndex()
388  //<< "msLevel:" << msLevel
389  //<< "with number of precursors:" << precursor_list_size;
390 
391  if(precursor_list_size > 0)
392  {
393 
394  // Sanity check
395  if(msLevel < 2)
396  {
397  qDebug() << "Going to throw: msLevel cannot be less than two for "
398  "a spectrum that has items in its Precursor list.";
399 
400  throw(ExceptionNotPossible(
401  "msLevel cannot be less than two for "
402  "a spectrum that has items in its Precursor list."));
403  }
404 
405  // See what is the first precursor in the list.
406 
407  for(auto &precursor : spectrum_p->precursors)
408  {
409 
410  // Set this variable ready as we need that default value in
411  // certain circumstances.
412 
413  std::size_t precursor_spectrum_index =
414  std::numeric_limits<std::size_t>::max();
415 
416  // The spectrum ID of the precursor might be empty.
417 
418  if(precursor.spectrumID.empty())
419  {
420  // qDebug() << "The precursor's spectrum ID is empty.";
421 
422  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
423  {
424  // qDebug()
425  //<< "Format is MGF, precursor's spectrum ID can be
426  // empty.";
427  }
428  else
429  {
430  // When performing Lumos Fusion fragmentation experiments
431  // in Tune mode and with recording, the first spectrum of
432  // the list is a fragmentation spectrum (ms level 2) that
433  // has no identity for the precursor spectrum because
434  // there is no full scan accquisition.
435  }
436  }
437  // End of
438  // if(precursor.spectrumID.empty())
439  else
440  {
441  // We could get a native precursor spectrum id, so convert
442  // that native id to a spectrum index.
443 
444  qualified_mass_spectrum.setPrecursorNativeId(
445  QString::fromStdString(precursor.spectrumID));
446 
447  if(qualified_mass_spectrum.getPrecursorNativeId().isEmpty())
448  {
449  // qDebug() << "The native id of the precursor spectrum is
450  // empty.";
451  }
452 
453  // Get the spectrum index of the spectrum that contained the
454  // precursor ion.
455 
456  precursor_spectrum_index =
457  msp_msData->run.spectrumListPtr->find(precursor.spectrumID);
458 
459  // Note that the Mascot MGF format has a peculiar handling of
460  // the precursor ion stuff so we cannot throw.
461  if(precursor_spectrum_index ==
462  msp_msData->run.spectrumListPtr->size())
463  {
464  if(mcsp_msRunId.get()->getMzFormat() != MzFormat::MGF)
465  {
466  throw(ExceptionNotPossible(
467  "Failed to find the index of the "
468  "precursor ion's spectrum."));
469  }
470  }
471 
472  qualified_mass_spectrum.setPrecursorSpectrumIndex(
473  precursor_spectrum_index);
474 
475  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
476  // "()"
477  //<< "Set the precursor spectrum index to:"
478  //<< qualified_mass_spectrum.getPrecursorSpectrumIndex()
479  //<< "for qualified mass spectrum:"
480  //<< &qualified_mass_spectrum;
481  }
482 
483  if(!precursor.selectedIons.size())
484  {
485  qDebug()
486  << "Going to throw The spectrum has msLevel > 1 but the "
487  "precursor ions's selected ions list is empty..";
488 
489  throw(
490  ExceptionNotPossible("The spectrum has msLevel > 1 but the "
491  "precursor ions's selected ions "
492  "list is empty."));
493  }
494 
495  pwiz::msdata::SelectedIon &ion =
496  *(precursor.selectedIons.begin());
497 
498  // selected ion m/z
499 
500  pappso_double selected_ion_mz =
501  QString(
502  ion.cvParam(pwiz::cv::MS_selected_ion_m_z).value.c_str())
503  .toDouble();
504 
505  // selected ion peak intensity
506 
507  pappso_double selected_ion_peak_intensity =
508  QString(ion.cvParam(pwiz::cv::MS_peak_intensity).value.c_str())
509  .toDouble();
510 
511  // charge state
512 
513  unsigned int selected_ion_charge_state =
514  QString(ion.cvParam(pwiz::cv::MS_charge_state).value.c_str())
515  .toUInt();
516 
517  // At this point we can craft a new PrecursorIonData instance and
518  // push it back to the vector.
519 
520  PrecursorIonData precursor_ion_data(selected_ion_mz,
521  selected_ion_charge_state,
522  selected_ion_peak_intensity);
523 
524  qualified_mass_spectrum.appendPrecursorIonData(
525  precursor_ion_data);
526 
527  // General sum-up
528 
529  // qDebug()
530  //<< "Appended new PrecursorIonData:"
531  //<< "mz:"
532  //<< qualified_mass_spectrum.getPrecursorIonData().back().mz
533  //<< "charge:"
534  //<< qualified_mass_spectrum.getPrecursorIonData().back().charge
535  //<< "intensity:"
536  //<< qualified_mass_spectrum.getPrecursorIonData()
537  //.back()
538  //.intensity;
539  }
540  // End of
541  // for(auto &precursor : spectrum_p->precursors)
542  }
543  // End of
544  // if(precursor_list_size > 0)
545  else
546  {
547  // Sanity check
548 
549  // Unfortunately, logic here is defeated by some vendors that have
550  // files with MS2 spectra without <precursorList>. Thus we have
551  // spectrum_p->precursors.size() == 0 and msLevel > 1.
552 
553  // if(msLevel != 1)
554  //{
555  // throw(
556  // ExceptionNotPossible("msLevel cannot be different than 1 if "
557  //"there is not a single precursor ion."));
558  //}
559  }
560 
561  // Sanity check.
562 
563  if(precursor_list_size !=
564  qualified_mass_spectrum.getPrecursorIonData().size())
565  {
566  qDebug() << "Going to throw The number of precursors in the file is "
567  "different from the number of precursors in memory.";
568 
570  QObject::tr("The number of precursors in the file is different "
571  "from the number of precursors in memory."));
572  }
573 
574  // if(precursor_list_size == 1)
575  //{
576  // qDebug() << "Trying to get the mz value of the unique precursor ion:"
577  //<< qualified_mass_spectrum.getPrecursorMz();
578  //}
579 
580  processRetentionTime(spectrum_p, qualified_mass_spectrum);
581 
582  processDriftTime(spectrum_p, qualified_mass_spectrum);
583 
584  // for(pwiz::data::CVParam cv_param : ion.cvParams)
585  //{
586  // pwiz::msdata::CVID param_id = cv_param.cvid;
587  // qDebug() << param_id;
588  // qDebug() << cv_param.cvid.c_str();
589  // qDebug() << cv_param.name().c_str();
590  // qDebug() << cv_param.value.c_str();
591  //}
592 
593  if(want_binary_data)
594  {
595 
596  // Fill-in MZIntensityPair vector for convenient access to binary
597  // data
598 
599  std::vector<pwiz::msdata::MZIntensityPair> pairs;
600  spectrum_p->getMZIntensityPairs(pairs);
601 
602  MassSpectrum spectrum;
603  double tic = 0;
604  // std::size_t iterCount = 0;
605 
606  // Iterate through the m/z-intensity pairs
607  for(std::vector<pwiz::msdata::MZIntensityPair>::const_iterator
608  it = pairs.begin(),
609  end = pairs.end();
610  it != end;
611  ++it)
612  {
613  //++iterCount;
614 
615  // qDebug() << "it->mz " << it->mz << " it->intensity" <<
616  // it->intensity;
617  if(it->intensity)
618  {
619  spectrum.push_back(DataPoint(it->mz, it->intensity));
620  tic += it->intensity;
621  }
622  }
623 
624  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
625  {
626  // Sort peaks by mz
627  spectrum.sortMz();
628  }
629 
630  // lc = localeconv ();
631  // qDebug() << " env=" << localeconv () << " lc->decimal_point "
632  // << lc->decimal_point;
633  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()
634  // "<< spectrum.size();
635  MassSpectrumSPtr spectrum_sp = spectrum.makeMassSpectrumSPtr();
636  qualified_mass_spectrum.setMassSpectrumSPtr(spectrum_sp);
637 
638  // double sumY =
639  // qualified_mass_spectrum.getMassSpectrumSPtr()->sumY(); qDebug()
640  // <<
641  // __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
642  //<< "iterCount:" << iterCount << "Spectrum size "
643  //<< spectrum.size() << "with tic:" << tic
644  //<< "and sumY:" << sumY;
645  }
646  else
647  qualified_mass_spectrum.setMassSpectrumSPtr(nullptr);
648  }
649  catch(PappsoException &errorp)
650  {
651  qDebug() << "Going to throw";
652 
654  QObject::tr("Error reading data using the proteowizard library: %1")
655  .arg(errorp.qwhat()));
656  }
657  catch(std::exception &error)
658  {
659  qDebug() << "Going to throw";
660 
662  QObject::tr("Error reading data using the proteowizard library: %1")
663  .arg(error.what()));
664  }
665 
666  // setlocale(LC_ALL, env.c_str());
667 
668  ok = true;
669 
670  // qDebug() << "QualifiedMassSpectrum: " <<
671  // qualified_mass_spectrum.toString();
672  return qualified_mass_spectrum;
673 }
674 
675 
678  bool want_binary_data,
679  bool &ok) const
680 {
681 
682  std::string env;
683  env = setlocale(LC_ALL, "");
684  // struct lconv *lc = localeconv();
685 
686  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
687  //<< "env=" << env.c_str()
688  //<< "lc->decimal_point:" << lc->decimal_point;
689 
690  setlocale(LC_ALL, "C");
691 
692  MassSpectrumId massSpectrumId(mcsp_msRunId);
693 
694  if(msp_msData == nullptr)
695  {
696  setlocale(LC_ALL, env.c_str());
697  return (QualifiedMassSpectrum(massSpectrumId));
698  }
699 
700  // const bool want_binary_data = true;
701 
702  pwiz::msdata::SpectrumListPtr spectrum_list_p =
703  msp_msData->run.spectrumListPtr;
704 
705  if(spectrum_index == spectrum_list_p.get()->size())
706  {
707  setlocale(LC_ALL, env.c_str());
708  throw ExceptionNotFound(
709  QObject::tr("The spectrum index cannot be equal to the size of the "
710  "spectrum list."));
711  }
712 
713  // At this point we know the spectrum index might be sane, so store it in
714  // the mass spec id object.
715  massSpectrumId.setSpectrumIndex(spectrum_index);
716 
717  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
718  getPwizSpectrumPtr(spectrum_list_p.get(), spectrum_index, want_binary_data);
719 
720  setlocale(LC_ALL, env.c_str());
721 
722  massSpectrumId.setNativeId(
723  QString::fromStdString(native_pwiz_spectrum_sp->id));
724 
726  massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
727 }
728 
729 
730 bool
731 PwizMsRunReader::accept(const QString &file_name) const
732 {
733  // We want to know if we can handle the file_name.
734  pwiz::msdata::ReaderList reader_list;
735 
736  std::string reader_type = reader_list.identify(file_name.toStdString());
737 
738  if(!reader_type.empty())
739  return true;
740 
741  return false;
742 }
743 
744 
746 PwizMsRunReader::massSpectrumSPtr(std::size_t spectrum_index)
747 {
748  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
749  return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumSPtr();
750 }
751 
753 PwizMsRunReader::massSpectrumCstSPtr(std::size_t spectrum_index)
754 {
755  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
756  return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumCstSPtr();
757 }
758 
760 PwizMsRunReader::qualifiedMassSpectrum(std::size_t spectrum_index,
761  bool want_binary_data) const
762 {
763 
764  QualifiedMassSpectrum spectrum;
765  bool ok = false;
766 
767  spectrum =
768  qualifiedMassSpectrumFromPwizMSData(spectrum_index, want_binary_data, ok);
769 
770  if(mcsp_msRunId->getMzFormat() == pappso::MzFormat::MGF)
771  {
772  if(spectrum.getRtInSeconds() == 0)
773  {
774  // spectrum = qualifiedMassSpectrumFromPwizMSData(scan_num - 1);
775  }
776  }
777 
778  // if(!ok)
779  // qDebug() << "Encountered a mass spectrum for which the status is bad.";
780 
781  return spectrum;
782 }
783 
784 
785 void
788 {
790 }
791 
792 
793 void
795  SpectrumCollectionHandlerInterface &handler, unsigned int ms_level)
796 {
797 
798  acquireDevice();
799  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
800 
801  // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
802  // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
803  // spectrum has been fully qualified (that is, the member data have been
804  // set), it is transferred to the handler passed as parameter to this
805  // function for the consumer to do what it wants with it.
806 
807  // Does the handler consuming the mass spectra read from file want these
808  // mass spectra to hold the binary data arrays (mz/i vectors)?
809 
810  const bool want_binary_data = handler.needPeakList();
811 
812 
813  std::string env;
814  env = setlocale(LC_ALL, "");
815  setlocale(LC_ALL, "C");
816 
817 
818  // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
819  // run member of msp_msData.
820 
821  pwiz::msdata::SpectrumListPtr spectrum_list_p =
822  msp_msData->run.spectrumListPtr;
823 
824  // We'll need it to perform the looping in the spectrum list.
825  std::size_t spectrum_list_size = spectrum_list_p.get()->size();
826 
827  // qDebug() << "The spectrum list has size:" << spectrum_list_size;
828 
829  // Inform the handler of the spectrum list so that it can handle feedback to
830  // the user.
831  handler.spectrumListHasSize(spectrum_list_size);
832 
833  // Iterate in the full list of spectra.
834 
835  for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
836  {
837 
838  // If the user of this reader instance wants to stop reading the
839  // spectra, then break this loop.
840  if(handler.shouldStop())
841  {
842  qDebug() << "The operation was cancelled. Breaking the loop.";
843  break;
844  }
845 
846  // Get the native pwiz-spectrum from the spectrum list.
847  // Note that this pointer is a shared pointer from pwiz.
848 
849  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
850  getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
851 
852  /*
853  * we want to load metadata of the spectrum even if it does not contain
854  peaks
855 
856  * if(!native_pwiz_spectrum_sp->hasBinaryData())
857  {
858  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
859  "
860  ()"
861  //<< "native pwiz spectrum is empty, continuing.";
862  continue;
863  }
864  */
865 
866  // Instantiate the mass spectrum id that will hold critical information
867  // like the the native id string and the spectrum index.
868 
869  MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
870 
871  // Get the spectrum native id as a QString to store it in the mass
872  // spectrum id class. This is will allow later to refer to the same
873  // spectrum starting back from the file.
874 
875  QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
876  massSpectrumId.setNativeId(native_id);
877 
878  // Finally, instantiate the qualified mass spectrum with its id. This
879  // function will continue performing pappso-spectrum detailed
880  // qualification.
881 
882  bool ok = false;
883 
884  QualifiedMassSpectrum qualified_mass_spectrum =
886  massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
887 
888  if(!ok)
889  {
890  // qDebug() << "Encountered a mass spectrum for which the returned "
891  //"status is bad.";
892  continue;
893  }
894 
895  // Before handing the mass spectrum out to the handler, see if the
896  // native mass spectrum was empty or not.
897 
898  // if(!native_pwiz_spectrum_sp->defaultArrayLength)
899  // qDebug() << "The mass spectrum has not defaultArrayLength";
900 
901  qualified_mass_spectrum.setEmptyMassSpectrum(
902  !native_pwiz_spectrum_sp->defaultArrayLength);
903 
904  // The handler will receive the index of the mass spectrum in the
905  // current run via the mass spectrum id member datum.
906  if(ms_level == 0)
907  {
908  handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
909  }
910  else
911  {
912  if(qualified_mass_spectrum.getMsLevel() == ms_level)
913  {
914  handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
915  }
916  }
917  }
918 
919  setlocale(LC_ALL, env.c_str());
920  // End of
921  // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
922 
923  // Now let the loading handler know that the loading of the data has ended.
924  // The handler might need this "signal" to perform additional tasks or to
925  // cleanup cruft.
926 
927  // qDebug() << "Loading ended";
928  handler.loadingEnded();
929 }
930 
931 std::size_t
933 {
934  return msp_msData->run.spectrumListPtr.get()->size();
935 }
936 
937 bool
939 {
940  return m_hasScanNumbers;
941 }
942 
943 bool
945 {
946  msp_msData = nullptr;
947  return true;
948 }
949 
950 bool
952 {
953  if(msp_msData == nullptr)
954  {
955  initialize();
956  }
957  return true;
958 }
959 
960 
963  std::size_t spectrum_index, pappso::PrecisionPtr precision) const
964 {
965 
966  QualifiedMassSpectrum mass_spectrum =
967  qualifiedMassSpectrum(spectrum_index, false);
968 
969  return newXicCoordSPtrFromQualifiedMassSpectrum(mass_spectrum, precision);
970 }
971 
974  const pappso::QualifiedMassSpectrum &mass_spectrum,
975  pappso::PrecisionPtr precision) const
976 {
977  XicCoordSPtr xic_coord = std::make_shared<XicCoord>();
978 
979  xic_coord.get()->rtTarget = mass_spectrum.getRtInSeconds();
980 
981  xic_coord.get()->mzRange = MzRange(mass_spectrum.getPrecursorMz(), precision);
982 
983  return xic_coord;
984 }
985 
986 } // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
Definition: massspectrum.h:71
void sortMz()
Sort the DataPoint instances of this spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition: msrunreader.h:192
MsRunIdCstSPtr mcsp_msRunId
Definition: msrunreader.h:301
virtual const QString & qwhat() const
virtual pappso::XicCoordSPtr newXicCoordSPtrFromSpectrumIndex(std::size_t spectrum_index, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum index
pwiz::msdata::MSDataPtr msp_msData
virtual void readSpectrumCollectionByMsLevel(SpectrumCollectionHandlerInterface &handler, unsigned int ms_level) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler by Ms Levels
bool processDriftTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual MassSpectrumCstSPtr massSpectrumCstSPtr(std::size_t spectrum_index) override
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizMSData(std::size_t spectrum_index, bool want_binary_data, bool &ok) const
PwizMsRunReader(MsRunIdCstSPtr &msrun_id_csp)
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizSpectrumPtr(const MassSpectrumId &massSpectrumId, pwiz::msdata::Spectrum *spectrum_p, bool want_binary_data, bool &ok) const
virtual QualifiedMassSpectrum qualifiedMassSpectrum(std::size_t spectrum_index, bool want_binary_data=true) const override
get a QualifiedMassSpectrum class given its scan number
virtual bool hasScanNumbers() const override
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
bool processRetentionTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual bool acquireDevice() override
acquire data back end device
virtual void initialize() override
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
virtual bool accept(const QString &file_name) const override
tells if the reader is able to handle this file must be implemented by private MS run reader,...
virtual std::size_t spectrumListSize() const override
get the totat number of spectrum conained in the MSrun data file
pwiz::msdata::SpectrumPtr getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list, std::size_t spectrum_index, bool want_binary_data) const
virtual bool releaseDevice() override
release data back end device if a the data back end is released, the developper has to use acquireDev...
virtual pappso::XicCoordSPtr newXicCoordSPtrFromQualifiedMassSpectrum(const pappso::QualifiedMassSpectrum &mass_spectrum, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum
virtual MassSpectrumSPtr massSpectrumSPtr(std::size_t spectrum_index) override
get a MassSpectrumSPtr class given its spectrum index
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
void setPrecursorNativeId(const QString &native_id)
Set the scan native id of the precursor ion.
const std::vector< PrecursorIonData > & getPrecursorIonData() const
void setDtInMilliSeconds(pappso_double rt)
Set the drift time in milliseconds.
const QString & getPrecursorNativeId() const
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
void setPrecursorSpectrumIndex(std::size_t precursor_scan_num)
Set the scan number of the precursor ion.
pappso_double getPrecursorMz(bool *ok=nullptr) const
Get the precursor m/z ratio.
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
void setEmptyMassSpectrum(bool is_empty_mass_spectrum)
interface to collect spectrums from the MsRunReader class
Definition: msrunreader.h:56
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void spectrumListHasSize(std::size_t size)
Definition: msrunreader.cpp:53
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)=0
static std::string toUtf8StandardString(const QString &text)
Definition: utils.cpp:143
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
@ MGF
Mascot format.
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:44
double pappso_double
A type definition for doubles.
Definition: types.h:49
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
Definition: massspectrum.h:54
std::shared_ptr< XicCoord > XicCoordSPtr
Definition: xiccoord.h:41
MSrun file reader base on proteowizard library.