libpappsomspp
Library for mass spectrometry
mzintegrationparams.cpp
Go to the documentation of this file.
1 /* BEGIN software license
2  *
3  * msXpertSuite - mass spectrometry software suite
4  * -----------------------------------------------
5  * Copyright(C) 2009,...,2018 Filippo Rusconi
6  *
7  * http://www.msxpertsuite.org
8  *
9  * This file is part of the msXpertSuite project.
10  *
11  * The msXpertSuite project is the successor of the massXpert project. This
12  * project now includes various independent modules:
13  *
14  * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15  * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16  *
17  * This program is free software: you can redistribute it and/or modify
18  * it under the terms of the GNU General Public License as published by
19  * the Free Software Foundation, either version 3 of the License, or
20  * (at your option) any later version.
21  *
22  * This program is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25  * GNU General Public License for more details.
26  *
27  * You should have received a copy of the GNU General Public License
28  * along with this program. If not, see <http://www.gnu.org/licenses/>.
29  *
30  * END software license
31  */
32 
33 
34 /////////////////////// StdLib includes
35 #include <map>
36 #include <cmath>
37 
38 
39 /////////////////////// Qt includes
40 #include <QDebug>
41 #include <QString>
42 #include <QFile>
43 #include <QDateTime>
44 
45 
46 /////////////////////// pappsomspp includes
47 #include "../../utils.h"
48 #include "../../massspectrum/massspectrum.h"
49 
50 
51 /////////////////////// Local includes
52 #include "mzintegrationparams.h"
53 
54 
56  qRegisterMetaType<pappso::MzIntegrationParams>("pappso::MzIntegrationParams");
58  qRegisterMetaType<pappso::MzIntegrationParams *>(
59  "pappso::MzIntegrationParams *");
60 
61 
62 namespace pappso
63 {
64 
65 
66 //! Map relating the BinningType to a textual representation
67 std::map<BinningType, QString> binningTypeMap{
68  {BinningType::NONE, "NONE"},
69  {BinningType::DATA_BASED, "DATA_BASED"},
70  {BinningType::ARBITRARY, "ARBITRARY"}};
71 
72 
74 {
77 }
78 
79 
82  BinningType binningType,
83  int decimalPlaces,
84  pappso::PrecisionPtr precisionPtr,
85  bool applyMzShift,
86  pappso::pappso_double mzShift,
87  bool removeZeroValDataPoints)
88  : m_smallestMz(minMz),
89  m_greatestMz(maxMz),
90  m_binningType(binningType),
91  m_decimalPlaces(decimalPlaces),
92  mp_precision(precisionPtr),
93  m_applyMzShift(applyMzShift),
94  m_mzShift(mzShift),
95  m_removeZeroValDataPoints(removeZeroValDataPoints)
96 {
97  if(mp_precision == nullptr)
99 }
100 
101 
103  : m_smallestMz(other.m_smallestMz),
104  m_greatestMz(other.m_greatestMz),
105  m_binningType(other.m_binningType),
106  m_decimalPlaces(other.m_decimalPlaces),
107  mp_precision(other.mp_precision),
108  m_applyMzShift(other.m_applyMzShift),
109  m_mzShift(other.m_mzShift),
110  m_removeZeroValDataPoints(other.m_removeZeroValDataPoints)
111 {
112  if(mp_precision == nullptr)
114 }
115 
116 
118 {
119 }
120 
121 
124 {
125  if(this == &other)
126  return *this;
127 
128  m_smallestMz = other.m_smallestMz;
129  m_greatestMz = other.m_greatestMz;
131 
133 
134  mp_precision = other.mp_precision;
135  if(mp_precision == nullptr)
137 
139  m_mzShift = other.m_mzShift;
141 
142  return *this;
143 }
144 
145 
146 void
148 {
149  m_smallestMz = value;
150 }
151 
152 
153 void
155 {
156  m_smallestMz = m_smallestMz > value ? value : m_smallestMz;
157 }
158 
159 
162 {
163  return m_smallestMz;
164 }
165 
166 
167 void
169 {
170  m_greatestMz = value;
171 }
172 
173 
174 void
176 {
177  m_greatestMz = m_greatestMz < value ? value : m_greatestMz;
178 }
179 
180 
183 {
184  return m_greatestMz;
185 }
186 
187 void
189 {
190  m_binningType = binningType;
191 }
192 
195 {
196  return m_binningType;
197 }
198 
199 void
201 {
202  m_decimalPlaces = decimal_places;
203 }
204 
205 
206 int
208 {
209  return m_decimalPlaces;
210 }
211 
212 void
214 {
215  mp_precision = precisionPtr;
216 
217  if(mp_precision == nullptr)
219 }
220 
223 {
224  return mp_precision;
225 }
226 
227 
228 void
230 {
231  m_applyMzShift = applyMzShift;
232 }
233 
234 
235 bool
237 {
238  return m_applyMzShift;
239 }
240 
241 
242 void
244 {
245  m_removeZeroValDataPoints = removeOrNot;
246 }
247 
248 
249 bool
251 {
253 }
254 
255 
256 void
258 {
259  m_mzShift = value;
260 }
261 
262 
263 double
265 {
266  return m_mzShift;
267 }
268 
269 
270 //! Reset the instance to default values.
271 void
273 {
274  m_smallestMz = std::numeric_limits<double>::min();
275  m_greatestMz = std::numeric_limits<double>::min();
277 
278  // Special case for this member datum
280 
281  m_applyMzShift = false;
282  m_mzShift = 0;
284 }
285 
286 
287 bool
289 {
290  int errors = 0;
291 
293  {
294  // qDebug() << "m_smallestMz:" << m_smallestMz;
295  // qDebug() << "smallest is max:" << (m_smallestMz ==
296  // std::numeric_limits<double>::max());
297 
298  errors += (m_smallestMz == std::numeric_limits<double>::max() ? 1 : 0);
299 
300  // qDebug() << "m_greatestMz:" << m_greatestMz;
301  // qDebug() << "greatest is min:" << (m_greatestMz ==
302  // std::numeric_limits<double>::min());
303  errors += (m_greatestMz == std::numeric_limits<double>::min() ? 1 : 0);
304 
305  // if(mp_precision != nullptr)
306  // qDebug() << mp_precision->toString();
307 
308  errors += (mp_precision == nullptr ? 1 : 0);
309  }
310 
311  if(errors)
312  {
313  qDebug()
314  << "The m/z integration parameters are not valid or do not apply...";
315  }
316 
317  return !errors;
318 }
319 
320 
321 bool
323 {
324  return (m_smallestMz != std::numeric_limits<double>::max()) &&
325  (m_greatestMz != std::numeric_limits<double>::min());
326 }
327 
328 
329 std::vector<double>
331 {
332 
333  // qDebug();
334 
335  std::vector<double> bins;
336 
338  {
339  // If no binning is to be performed, fine.
340  return bins;
341  }
343  {
344  // Use only data in the MzIntegrationParams member data.
345  return createArbitraryBins();
346  }
348  {
349  // qDebug();
350 
351  qFatal("Programming error.");
352  }
353 
354  return bins;
355 }
356 
357 
358 std::vector<double>
360 {
361 
362  // qDebug();
363 
364  std::vector<double> bins;
365 
367  {
368  // If no binning is to be performed, fine.
369  return bins;
370  }
372  {
373  // Use only data in the MzIntegrationParams member data.
374  return createArbitraryBins();
375  }
377  {
378  // qDebug();
379 
380  // Use the first spectrum to perform the data-based bins
381 
382  return createDataBasedBins(mass_spectrum_csp);
383  }
384 
385  return bins;
386 }
387 
388 
389 std::vector<double>
391 {
392 
393  // qDebug();
394 
395  // Now starts the tricky stuff. Depending on how the binning has been
396  // configured, we need to take diverse actions.
397 
398  // qDebug() << "Bin specification:" << mp_precision->toString();
399 
402 
403  // qDebug() << QString::asprintf("min_mz: %.6f\n", min_mz)
404  //<< QString::asprintf("max_mz: %.6f\n", max_mz);
405 
406  pappso::pappso_double binSize = mp_precision->delta(min_mz);
407 
408  // qDebug() << QString::asprintf(
409  //"binSize is the precision delta for min_mz: %.6f\n", binSize);
410 
411  // Only compute the decimal places if they were not configured already.
412  if(m_decimalPlaces == -1)
413  {
414  // qDebug() << "Now checking how many decimal places are needed.";
415 
416  // We want as many decimal places as there are 0s between the integral
417  // part of the double and the first non-0 cipher. For example, if
418  // binSize is 0.004, zero decimals is 2 and m_decimalPlaces is set to 3,
419  // because we want decimals up to 4 included.
420 
422 
423  // qDebug() << "With binSize" << binSize
424  //<< " m_decimalPlaces was computed to be:" << m_decimalPlaces;
425  }
426 
427  // Now that we have defined the value of m_decimalPlaces, let's use that
428  // value.
429 
430  double first_mz = ceil((min_mz * std::pow(10, m_decimalPlaces)) - 0.49) /
431  pow(10, m_decimalPlaces);
432  double last_mz =
433  ceil((max_mz * pow(10, m_decimalPlaces)) - 0.49) / pow(10, m_decimalPlaces);
434 
435  // qDebug() << "After having accounted for the decimals, new min/max values:"
436  //<< QString::asprintf("Very first data point: %.6f\n", first_mz)
437  //<< QString::asprintf("Very last data point to reach: %.6f\n",
438  // last_mz);
439 
440  // Instanciate the vector of mz double_s that we'll feed with the bins.
441 
442  std::vector<pappso::pappso_double> bins;
443 
444  // Store that very first value for later use in the loop.
445  // The bins are notking more than:
446  //
447  // 1. The first mz (that is the smallest mz value found in all the spectra
448  // 2. A sequence of mz values corresponding to that first mz value
449  // incremented by the bin size.
450 
451  // Seed the root of the bin vector with the first mz value rounded above as
452  // requested.
453  pappso::pappso_double previous_mz_bin = first_mz;
454 
455  bins.push_back(previous_mz_bin);
456 
457  // Now continue adding mz values until we have reached the end of the
458  // spectrum, that is the max_mz value, as converted using the decimals to
459  // last_mz.
460 
461  // debugCount value used below for debugging purposes.
462  // int debugCount = 0;
463 
464  while(previous_mz_bin <= last_mz)
465  {
466 
467  // qDebug() << "Now starting the bin creation loop.";
468 
469  // Calculate dynamically the precision delta according to the current mz
470  // value.
471 
472  // double precision_delta = mp_precision->delta(previous_mz_bin);
473  // qDebug() << "precision_delta: " << precision_delta;
474 
475  double current_mz =
476  previous_mz_bin + mp_precision->delta(previous_mz_bin);
477 
478  // qDebug() << QString::asprintf(
479  //"previous_mzBin: %.6f and current_mz: %.6f\n",
480  // previous_mz_bin,
481  // current_mz);
482 
483  // Now apply on the obtained mz value the decimals that were either set
484  // or computed earlier.
485 
486  double current_rounded_mz =
487  ceil((current_mz * pow(10, m_decimalPlaces)) - 0.49) /
488  pow(10, m_decimalPlaces);
489 
490  // qDebug() << QString::asprintf(
491  //"current_mz: %.6f and current_rounded_mz: %.6f and previous_mzBin "
492  //": % .6f\n ",
493  // current_mz,
494  // current_rounded_mz,
495  // previous_mz_bin);
496 
497  // If rounding makes the new value identical to the previous one, then
498  // that means that we need to decrease roughness.
499 
500  if(current_rounded_mz == previous_mz_bin)
501  {
502  ++m_decimalPlaces;
503 
504  current_rounded_mz =
505  ceil((current_mz * pow(10, m_decimalPlaces)) - 0.49) /
506  pow(10, m_decimalPlaces);
507 
508  // qDebug().noquote()
509  //<< "Had to increment decimal places by one while creating the bins "
510  //"in BinningType::ARBITRARY mode..";
511  }
512 
513  bins.push_back(current_rounded_mz);
514 
515  // Use the local_mz value for the storage of the previous mz bin.
516  previous_mz_bin = current_rounded_mz;
517  }
518 
519 
520 #if 0
521 
522  QString fileName = "/tmp/massSpecArbitraryBins.txt-at-" +
523  QDateTime::currentDateTime().toString("yyyyMMdd-HH-mm-ss");
524 
525  qDebug() << "Writing the list of bins setup in the "
526  "mass spectrum in file "
527  << fileName;
528 
529  QFile file(fileName);
530  file.open(QIODevice::WriteOnly);
531 
532  QTextStream fileStream(&file);
533 
534  for(auto &&bin : bins)
535  fileStream << QString("%1\n").arg(bin, 0, 'f', 10);
536 
537  fileStream.flush();
538  file.close();
539 
540 #endif
541 
542  // qDebug() << "Prepared bins with " << bins.size() << "elements."
543  //<< "starting with mz" << bins.front() << "ending with mz"
544  //<< bins.back();
545 
546  return bins;
547 }
548 
549 
550 std::vector<double>
552  pappso::MassSpectrumCstSPtr mass_spectrum_csp)
553 {
554  // qDebug();
555 
556  // The bins in *this mass spectrum must be calculated starting from the
557  // data in the mass_spectrum_csp parameter.
558 
559  // Instanciate the vector of mz double_s that we'll feed with the bins.
560 
561  std::vector<pappso::pappso_double> bins;
562 
563  if(mass_spectrum_csp->size() < 2)
564  return bins;
565 
566  // Make sure the spectrum is sorted, as this functions takes for granted
567  // that the DataPoint instances are sorted in ascending x (== mz) value
568  // order.
569  pappso::MassSpectrum local_mass_spectrum = *mass_spectrum_csp;
570  local_mass_spectrum.sortMz();
571 
573 
574  // qDebug() << "The min_mz:" << min_mz;
575 
576  if(m_decimalPlaces != -1)
577  min_mz = ceil((min_mz * pow(10, m_decimalPlaces)) - 0.49) /
578  pow(10, m_decimalPlaces);
579 
580 
581  // Two values for the definition of a MassSpectrumBin.
582 
583  // The first value of the mz range that defines the bin. This value is part
584  // of the bin.
585  pappso::pappso_double start_mz_in = min_mz;
586 
587  // The second value of the mz range that defines the bin. This value is
588  // *not* part of the bin.
589  pappso::pappso_double end_mz_out;
590 
591  std::vector<pappso::DataPoint>::const_iterator it =
592  local_mass_spectrum.begin();
593 
594  pappso::pappso_double prev_mz = it->x;
595 
596  if(m_decimalPlaces != -1)
597  prev_mz = ceil((prev_mz * pow(10, m_decimalPlaces)) - 0.49) /
598  pow(10, m_decimalPlaces);
599 
600  ++it;
601 
602  while(it != local_mass_spectrum.end())
603  {
604  pappso::pappso_double next_mz = it->x;
605 
606  if(m_decimalPlaces != -1)
607  next_mz = ceil((next_mz * pow(10, m_decimalPlaces)) - 0.49) /
608  pow(10, m_decimalPlaces);
609 
610  pappso::pappso_double step = next_mz - prev_mz;
611  end_mz_out = start_mz_in + step;
612 
613  if(m_decimalPlaces != -1)
614  end_mz_out = ceil((end_mz_out * pow(10, m_decimalPlaces)) - 0.49) /
615  pow(10, m_decimalPlaces);
616 
617  // The data point that is crafted has a 0 y-value. The binning must
618  // indeed not create artificial intensity data.
619 
620  // qDebug() << "Pushing back bin:" << start_mz_in << end_mz_out;
621 
622  bins.push_back(start_mz_in);
623 
624  // Prepare next bin
625  start_mz_in = end_mz_out;
626 
627  // Update prev_mz to be the current one for next iteration.
628  prev_mz = next_mz;
629 
630  // Now got the next DataPoint instance.
631  ++it;
632  }
633 
634 #if 0
635 
636  QString fileName = "/tmp/massSpecDataBasedBins.txt";
637 
638  qDebug() << "Writing the list of bins setup in the "
639  "mass spectrum in file "
640  << fileName;
641 
642  QFile file(fileName);
643  file.open(QIODevice::WriteOnly);
644 
645  QTextStream fileStream(&file);
646 
647  for(auto &&bin : m_bins)
648  fileStream << QString("[%1-%2]\n")
649  .arg(bin.startMzIn, 0, 'f', 10)
650  .arg(bin.endMzOut, 0, 'f', 10);
651 
652  fileStream.flush();
653  file.close();
654 
655  qDebug() << "elements."
656  << "starting with mz" << m_bins.front().startMzIn << "ending with mz"
657  << m_bins.back().endMzOut;
658 
659 #endif
660 
661  return bins;
662 }
663 
664 
665 QString
666 MzIntegrationParams::toString(int offset, const QString &spacer) const
667 {
668  QString lead;
669 
670  for(int iter = 0; iter < offset; ++iter)
671  lead += spacer;
672 
673  QString text = lead;
674  text += "m/z integration parameters:\n";
675 
676  text += lead;
677  text += spacer;
678  if(m_smallestMz != std::numeric_limits<double>::max())
679  text.append(
680  QString::asprintf("Smallest (first) m/z: %.6f\n", m_smallestMz));
681 
682  text += lead;
683  text += spacer;
684  if(m_greatestMz != std::numeric_limits<double>::min())
685  text.append(QString::asprintf("Greatest (last) m/z: %.6f\n", m_greatestMz));
686 
687  text += lead;
688  text += spacer;
689  text.append(QString("Decimal places: %1\n").arg(m_decimalPlaces));
690 
691  std::map<BinningType, QString>::iterator it;
692  it = binningTypeMap.find(m_binningType);
693 
694  if(it == binningTypeMap.end())
695  qFatal("Programming error.");
696 
697  text += lead;
698  text += spacer;
699  text.append(QString("Binning type: %1\n").arg(it->second.toLatin1().data()));
700 
701  // Only provide the details relative to the ARBITRARY binning type.
702 
704  {
705  text += lead;
706  text += spacer;
707  text += spacer;
708  text.append(QString("Bin nominal size: %1\n")
709  .arg(mp_precision->getNominal(), 0, 'f', 6));
710 
711  text += lead;
712  text += spacer;
713  text += spacer;
714  text.append(QString("Bin size: %2\n")
715  .arg(mp_precision->toString().toLatin1().data()));
716  }
717 
718  // Now other data that are independent of the bin settings.
719 
720  text += lead;
721  text += spacer;
722  text +=
723  QString("Apply m/z shift: %1\n").arg(m_applyMzShift ? "true" : "false");
724 
725  if(m_applyMzShift)
726  {
727  text += lead;
728  text += spacer;
729  text += spacer;
730  text += QString("m/z shift: %1").arg(m_mzShift, 0, 'f', 6);
731  }
732 
733  text += lead;
734  text += spacer;
735  text += QString("Remove 0-val data points: %1\n")
736  .arg(m_removeZeroValDataPoints ? "true" : "false");
737 
738  return text;
739 }
740 
741 } // namespace pappso
742 
Class to represent a mass spectrum.
Definition: massspectrum.h:71
void sortMz()
Sort the DataPoint instances of this spectrum.
The MzIntegrationParams class provides the parameters definining how m/z !
Q_INVOKABLE BinningType getBinningType() const
Q_INVOKABLE int getDecimalPlaces() const
pappso::pappso_double getSmallestMz() const
pappso::pappso_double m_smallestMz
MzIntegrationParams & operator=(const MzIntegrationParams &other)
Q_INVOKABLE pappso::pappso_double getGreatestMz() const
pappso::pappso_double m_greatestMz
Q_INVOKABLE pappso::PrecisionPtr getPrecision() const
Q_INVOKABLE bool isApplyMzShift() const
std::vector< double > createArbitraryBins()
Q_INVOKABLE void setPrecision(pappso::PrecisionPtr precisionPtr)
Q_INVOKABLE double getMzShift() const
Q_INVOKABLE void updateSmallestMz(pappso::pappso_double value)
Q_INVOKABLE void updateGreatestMz(pappso::pappso_double value)
Q_INVOKABLE bool isRemoveZeroValDataPoints() const
Q_INVOKABLE bool isValid() const
Q_INVOKABLE bool hasValidMzRange() const
Q_INVOKABLE QString toString(int offset=0, const QString &spacer=QString()) const
pappso::PrecisionPtr mp_precision
Q_INVOKABLE void setSmallestMz(pappso::pappso_double value)
Q_INVOKABLE void setBinningType(BinningType binningType)
Q_INVOKABLE void reset()
Reset the instance to default values.
std::vector< double > createDataBasedBins(pappso::MassSpectrumCstSPtr massSpectrum)
Q_INVOKABLE void setApplyMzShift(bool applyMzShift)
Q_INVOKABLE void setDecimalPlaces(int decimal_places)
Q_INVOKABLE std::vector< pappso::pappso_double > createBins()
Q_INVOKABLE void setMzShift(double value)
Q_INVOKABLE void setRemoveZeroValDataPoints(bool removeOrNot=true)
Q_INVOKABLE void setGreatestMz(pappso::pappso_double value)
pappso::pappso_double m_mzShift
virtual QString toString() const =0
virtual pappso_double getNominal() const final
Definition: precision.cpp:65
virtual pappso_double delta(pappso_double value) const =0
static PrecisionPtr getPpmInstance(pappso_double value)
get a ppm precision pointer
Definition: precision.cpp:150
static PrecisionPtr getDaltonInstance(pappso_double value)
get a Dalton precision pointer
Definition: precision.cpp:130
static int zeroDecimalsInValue(pappso_double value)
0.11 would return 0 (no empty decimal) 2.001 would return 2 1000.0001254 would return 3
Definition: utils.cpp:82
int mzIntegrationParamsMetaTypeId
int mzIntegrationParamsPtrMetaTypeId
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
BinningType
Type of binning when performing integrations to a mass spectrum.
@ DATA_BASED
binning based on mass spectral data
@ ARBITRARY
binning based on arbitrary bin size value
@ NONE
< no binning
double pappso_double
A type definition for doubles.
Definition: types.h:49
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
std::map< BinningType, QString > binningTypeMap
Map relating the BinningType to a textual representation.