libpappsomspp
Library for mass spectrometry
filterlowintensitysignalremoval.cpp
Go to the documentation of this file.
1 /* BEGIN software license
2  *
3  * msXpertSuite - mass spectrometry software suite
4  * -----------------------------------------------
5  * Copyright(C) 2009,...,2021 Filippo Rusconi
6  *
7  * http://www.msxpertsuite.org
8  *
9  * This file is part of the msXpertSuite project.
10  *
11  * The msXpertSuite project is the successor of the massXpert project. This
12  * project now includes various independent modules:
13  *
14  * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15  * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16  *
17  * This program is free software: you can redistribute it and/or modify
18  * it under the terms of the GNU General Public License as published by
19  * the Free Software Foundation, either version 3 of the License, or
20  * (at your option) any later version.
21  *
22  * This program is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25  * GNU General Public License for more details.
26  *
27  * You should have received a copy of the GNU General Public License
28  * along with this program. If not, see <http://www.gnu.org/licenses/>.
29  *
30  * END software license
31  */
32 
33 
34 #include <qmath.h>
35 
36 #include <limits>
37 #include <iterator>
38 
39 #include <QVector>
40 #include <QDebug>
41 
42 #include "../../exception/exceptionnotrecognized.h"
43 
45 #include "../../trace/maptrace.h"
46 
47 
48 namespace pappso
49 {
50 
51 
53  double mean, double std_dev, double threshold)
54 {
55  m_noiseMean = mean;
56  m_noiseStdDev = std_dev;
57  m_threshold = threshold;
58 }
59 
60 
62  const QString &parameters)
63 {
64  buildFilterFromString(parameters);
65 }
66 
67 
70 {
71  m_noiseMean = other.m_noiseMean;
73  m_threshold = other.m_threshold;
74 }
75 
76 
78 {
79 }
80 
81 
85 {
86  if(&other == this)
87  return *this;
88 
89  m_noiseMean = other.m_noiseMean;
91  m_threshold = other.m_threshold;
92 
93  return *this;
94 }
95 
96 
97 void
99  const QString &parameters)
100 {
101  // Typical string: "FloorAmplitudePercentage|15"
102  if(parameters.startsWith(QString("%1|").arg(name())))
103  {
104  QStringList params = parameters.split("|").back().split(";");
105 
106  m_noiseMean = params.at(0).toDouble();
107  m_noiseStdDev = params.at(1).toDouble();
108  m_threshold = params.at(2).toDouble();
109  }
110  else
111  {
113  QString(
114  "Building of FilterLowIntensitySignalRemoval from string %1 failed")
115  .arg(parameters));
116  }
117 }
118 
119 
120 std::size_t
122 {
123  // We want to iterate in the trace and check if we can get the apicess
124  // isolated one by one. An apex is nothing more than the top cusp of a
125  // triangle. We only store apices that have a value > m_threshold.
126 
127  m_clusters.clear();
128 
129  std::size_t trace_size = trace.size();
130  if(trace_size <= 2)
131  {
132  //qDebug() << "The original trace has less than 3 points. Returning it "
133  //"without modification.";
134  return m_clusters.size();
135  }
136  //else
137  //qDebug() << "The original trace has" << trace_size << "data points";
138 
139  // Seed the system with the first point of the trace.
140  m_curIter = trace.cbegin();
141  m_prevIter = trace.cbegin();
142 
143  //qDebug() << "First trace point: "
144  //<< "(" << m_prevIter->x << "," << m_prevIter->y << ");";
145 
146  // We still have not found any apex!
147  m_prevApex = trace.cend();
148 
149  // We will need to know if we were ascending to an apex.
150  bool was_ascending_to_apex = false;
151 
152  // Prepare a first apex cluster.
153  ApicesSPtr cluster_apices = std::make_shared<ClusterApices>();
154 
155  // Now that we have seeded the system with the first point of the original
156  // trace, go to the next one.
157  ++m_curIter;
158 
159  // And now iterate in the original trace and make sure we detect all the
160  // apicess.
161 
162  while(m_curIter != trace.cend())
163  {
164  //qDebug() << "Current trace point: "
165  //<< "(" << m_curIter->x << "," << m_curIter->y << ");";
166 
167  // We monitor if we are going up or down a peak.
168 
169  if(m_curIter->y > m_prevIter->y)
170  // We are ascending a peak. We do not know if we are at the apex.
171  {
172  //qDebug().noquote() << "We are ascending to an apex.\n";
173  was_ascending_to_apex = true;
174  }
175  else
176  // We are descending a peak.
177  {
178  //qDebug().noquote() << "Descending a peak. ";
179 
180  // There are two situations:
181  //
182  // 1. Either we were ascending to an apex, and m_prev is that apex,
183  //
184  // 2. Or we were not ascending to an apex and in fact all we are doing
185  // is going down an apex that occurred more than one trace point ago.
186 
187  if(!was_ascending_to_apex)
188  // We were not ascending to an apex.
189  {
190  // Do nothing here.
191 
192  //qDebug().noquote()
193  //<< "But, we were not ascending to an apex, so do nothing.\n";
194  }
195  else
196  // We are effectively descending a peak right after the apex was
197  // reached at the previous iteration.
198  {
200 
201  //qDebug().noquote()
202  //<< "And, we were ascending to an apex, so "
203  //"m_curApex has become m_prevIter: ("
204  //<< m_curApex->x << "," << m_curApex->y << ")\n";
205 
206  // We might have two situations:
207 
208  // 1. We had already encountered an apex.
209  // 2. We had not yet encountered an apex.
210 
211  if(m_prevApex != trace.cend())
212  // We had already encountered an apex.
213  {
214  // Was that apex far on the left of the current apex ?
215 
216  if(m_curApex->x - m_prevApex->x >
218  // The distance is not compatible with both apices to belong
219  // to the same apices cluster.
220  {
221  // We are creating a new isotopic apices.
222 
223  // But, since another apex had been encountered already,
224  // that means that an isotopic apices was cooking
225  // already. We must store it.
226 
227  if(!cluster_apices->size())
228  qFatal("Cannot be that the apices has no apex.");
229 
230  // Store the crafted apices cluster.
231  m_clusters.push_back(cluster_apices);
232 
233  //qDebug().noquote()
234  //<< "There was a previous apex already, BUT "
235  //"outside of apices range. "
236  //"Pushing the cooking apices that has size:"
237  //<< cluster_apices->size();
238 
239  // Now create a brand new apices for later work.
240  cluster_apices = std::make_shared<ClusterApices>();
241 
242  //qDebug() << "Created a brand new apices.";
243 
244  // We only start the new apices with the current apex if
245  // that apex is above the threshold.
246 
247  if(m_curApex->y > m_threshold)
248  {
249  // And start it with the current apex.
250  cluster_apices->push_back(m_curApex);
251 
252  //qDebug()
253  //<< "Since the current apex is above the threshold, "
254  //"we PUSH it to the newly created apices: ("
255  //<< m_curApex->x << "," << m_curApex->y << ")";
256 
258 
259  //qDebug() << "Set prev apex to be cur apex.";
260  }
261  else
262  {
263  //qDebug()
264  //<< "Since the current apex is below the threshold, "
265  //"we DO NOT push it to the newly created "
266  //"apices: ("
267  //<< m_curApex->x << "," << m_curApex->y << ")";
268 
269  // Since previous apex went to the closed apices, we
270  // need to reset it.
271 
272  m_prevApex = trace.cend();
273 
274  //qDebug() << "Since the previous apex went to the "
275  //"closed apices, and cur apex has too "
276  //"small an intensity, we reset prev apex "
277  //"to trace.cend().";
278  }
279  }
280  else
281  // The distance is compatible with both apices to belong to
282  // the same isotopic apices.
283  {
284 
285  // But we only push back the current apex to the apices
286  // if its intensity is above the threshold.
287 
288  if(m_curApex->y > m_threshold)
289  // The current apex was above the threshold
290  {
291  cluster_apices->push_back(m_curApex);
292 
293  //qDebug().noquote()
294  //<< "There was an apex already inside of apices "
295  //"range. "
296  //"AND, since the current apex was above the "
297  //"threshold, we indeed push it to apices.\n";
298 
299  //qDebug().noquote()
300  //<< "Current apex PUSHED: " << m_curApex->x << ", "
301  //<< m_curApex->y;
302 
304 
305  //qDebug() << "We set prev apex to be cur apex.";
306  }
307  else
308  {
309  //qDebug().noquote()
310  //<< "There was an apex already inside of apices "
311  //"range. "
312  //"BUT, since the current apex was below the "
313  //"threshold, we do not push it to apices.\n";
314 
315  //qDebug().noquote()
316  //<< "Current apex NOT pushed: " << m_curApex->x
317  //<< ", " << m_curApex->y;
318  }
319  }
320  }
321  else
322  // No apex was previously found. We are fillin-up a new isotopic
323  // apices.
324  {
325  if(m_curApex->y > m_threshold)
326  // We can actually add that apex to a new isotopic apices.
327  {
328  if(cluster_apices->size())
329  qCritical(
330  "At this point, the apices should be new and "
331  "empty.");
332 
333  cluster_apices->push_back(m_curApex);
334 
335  //qDebug().noquote()
336  //<< "No previous apex was found. Since current apex' "
337  //"intensity is above threshold, we push it back to "
338  //"the "
339  //"apices.\n";
340 
341  // Store current apex as previous apex for next rounds.
343 
344  //qDebug().noquote() << "And thus we store the current "
345  //"apex as previous apex.\n";
346  }
347  else
348  {
349  //qDebug().noquote()
350  //<< "No previous apex was found. Since current apex' "
351  //"intensity is below threshold, we do nothing.\n";
352  }
353  }
354  }
355  // End of
356  // ! if(!was_ascending_to_apex)
357  // That is, we were ascending to an apex.
358 
359  // Tell what it is: we were not ascending to an apex.
360  was_ascending_to_apex = false;
361  }
362  // End of
363  // ! if(m_curIter->y > m_prevIter->y)
364  // That is, we are descending a peak.
365 
366  // At this point, prepare next round.
367 
368  //qDebug().noquote()
369  //<< "Preparing next round, with m_prevIter = m_curIter and ++index.\n";
370 
372 
373  ++m_curIter;
374  }
375  // End of
376  // while(index < trace_size)
377 
378  // At this point, if a apices had been cooking, add it.
379 
380  if(cluster_apices->size())
381  m_clusters.push_back(cluster_apices);
382 
383  return m_clusters.size();
384 }
385 
386 
387 Trace::const_iterator
389  Trace::const_iterator iter,
390  double distance_threshold)
391 {
392  // We receive an iterator that points to an apex. We want iterate back in the
393  // trace working copy and look if there are apices that are distant less than
394  // 1.1~Th.
395 
396  Trace::const_iterator init_iter = iter;
397 
398  if(iter == trace.cbegin())
399  return iter;
400 
401  // Seed the previous iter to iter, because we'll move from there right away.
402  Trace::const_iterator prev_iter = init_iter;
403 
404  Trace::const_iterator last_apex_iter = init_iter;
405 
406  // We will need to know if we were ascending to an apex.
407  bool was_ascending_to_apex = false;
408 
409  // Now that we have seeded the system, we can move iter one data point:
410  --iter;
411 
412  while(iter != trace.cbegin())
413  {
414  // If we are already outside of distance_threshold, return the last apex
415  // that was found (or initial iter if none was encountered)
416 
417  if(abs(init_iter->x - iter->x) >= distance_threshold)
418  return last_apex_iter;
419 
420  if(iter->y > prev_iter->y)
421  {
422  // New data point has greater intensity. Just record that fact.
423  was_ascending_to_apex = true;
424  }
425  else
426  {
427  // New data point has smaller intensity. We are descending a peak.
428 
429  if(was_ascending_to_apex)
430  {
431  // We had an apex at previous iter. We are inside the distance
432  // threshold. That is good. But we still could find another apex
433  // while moving along the trace that is in the distance threshold.
434  // This is why we keep going, but we store the previous iter as
435  // the last encountered apex.
436 
437  last_apex_iter = prev_iter;
438  }
439  }
440 
441  prev_iter = iter;
442 
443  // Move.
444  --iter;
445  }
446 
447  //qDebug() << "Init m/z: " << init_iter->x
448  //<< "Returning m/z: " << last_apex_iter->x
449  //<< "at distance:" << std::distance(last_apex_iter, init_iter);
450 
451  // At this point last_apex_iter is the same as the initial iter.
452  return last_apex_iter;
453 }
454 
455 
456 Trace::const_iterator
458  Trace::const_iterator iter,
459  double distance_threshold)
460 {
461  // We receive an iterator that points to an apex. We want iterate back in the
462  // trace working copy and look if there are apices that are distant less than
463  // 1.1~Th.
464 
465  Trace::const_iterator init_iter = iter;
466 
467  if(iter == trace.cend())
468  return iter;
469 
470  // Seed the previous iter to iter, because we'll move from there right away.
471  Trace::const_iterator prev_iter = init_iter;
472 
473  Trace::const_iterator last_apex_iter = init_iter;
474 
475  // We will need to know if we were ascending to an apex.
476  bool was_ascending_to_apex = false;
477 
478  // Now that we have seeded the system, we can move iter one data point:
479  ++iter;
480 
481  while(iter != trace.cend())
482  {
483  // If we are already outside of distance_threshold, return the last apex
484  // that was found (or initial iter if none was encountered)
485 
486  // FIXME: maybe we should compare prev_iter->x with iter->x so that we
487  // continue moving if all the succcessive apices found are each one in the
488  // distance_threshold from the previous one ?
489 
490  if(abs(init_iter->x - iter->x) >= distance_threshold)
491  return last_apex_iter;
492 
493  if(iter->y > prev_iter->y)
494  {
495  // New data point has greater intensity. Just record that fact.
496  was_ascending_to_apex = true;
497  }
498  else
499  {
500  // New data point has smaller intensity. We are descending a peak.
501 
502  if(was_ascending_to_apex)
503  {
504  // We had an apex at previous iter. We are inside the distance
505  // threshold. That is good. But we still could find another apex
506  // while moving along the trace that is in the distance threshold.
507  // This is why we keep going, but we store the previous iter as
508  // the last encountered apex.
509 
510  last_apex_iter = prev_iter;
511  }
512  }
513 
514  prev_iter = iter;
515 
516  // Move.
517  ++iter;
518  }
519 
520  // At this point last_apex_iter is the same as the initial iter.
521  return last_apex_iter;
522 }
523 
524 
525 Trace
527 {
528  // We start from the vector of apices and try to remake a high resolution
529  // trace out of these apices.
530 
531  MapTrace map_trace;
532 
533  // The general idea is that apices were detected, and only apices having
534  // their intensity above the threshold were stored. That means that we need to
535  // add points to the trace to reconstruct a meaningful trace. Indeed, imagine
536  // a heavy peptide isotopic cluster: the first peak's apex is below threshold
537  // and was not stored. The second peak is also below. But the third isotopic
538  // cluster peak is above and was stored.
539  //
540  // How do we reconstruct the trace to have all these points that were
541  // preceding the first isotopic cluster apex that was detected.
542  //
543  // The same applies to the last peaks of the cluster that are below the
544  // threshold whil the preceeding ones were above!
545 
546  // The general idea is to iterate in the vector of apices and for each apex
547  // that is encountered, ask if there were apices
548 
549  // m_clusters is a vector that contains vectors of Trace::const_iter.
550  // Each vector in m_clusters should represent all the apices of a cluster.
551 
552  //qDebug() << "Reconstructing trace with " << m_clusters.size() << "clusters.";
553 
554  Trace::const_iterator left_begin_iter = trace.cend();
555  Trace::const_iterator right_end_iter = trace.cend();
556 
557  for(auto &&cluster_apices : m_clusters)
558  {
559  // cluster_apices is a vector of Trace::const_iterator. If we want to
560  // reconstruct the Trace, we need to iterate through all the DataPoint
561  // objects in between cluster_apices.begin() and cluster_apices.end().
562 
563  Trace::const_iterator begin_iter = *(cluster_apices->begin());
564  Trace::const_iterator end_iter = *(std::prev(cluster_apices->end()));
565 
566  //qDebug() << "Iterating in a new cluster apices with boundaries:"
567  //<< begin_iter->x << "-" << end_iter->x;
568 
569  left_begin_iter =
571 
572  //qDebug() << "After backwardFindApex, left_begin_iter points to:"
573  //<< left_begin_iter->toString() << "with distance:"
574  //<< std::distance(left_begin_iter, begin_iter);
575 
576  right_end_iter = forwardFindApex(
577  trace, end_iter, 1.5 * INTRA_CLUSTER_INTER_PEAK_DISTANCE);
578 
579  for(Trace::const_iterator iter = left_begin_iter; iter != right_end_iter;
580  ++iter)
581  {
582  map_trace[iter->x] = iter->y;
583  }
584 
585  // Now reset the left and right iters to intensity 0 to avoid having
586  // disgraceful oblique lines connnecting trace segments.
587 
588  map_trace[left_begin_iter->x] = 0;
589  map_trace[std::prev(right_end_iter)->x] = 0;
590 
591  }
592 
593  return map_trace.toTrace();
594 }
595 
596 
597 Trace &
599 {
600  //qDebug();
601 
602  // Horrible hack to have a non const filtering process.
603  return const_cast<FilterLowIntensitySignalRemoval *>(this)->nonConstFilter(
604  trace);
605 }
606 
607 
608 Trace &
610 {
611  //qDebug();
612 
613  if(trace.size() <= 2)
614  {
615  //qDebug() << "The original trace has less than 3 points. Returning it "
616  //"without modification.";
617  return trace;
618  }
619  //else
620  //qDebug() << "The original trace had" << trace.size() << "data points";
621 
622  std::size_t cluster_count = detectClusterApices(trace);
623 
624  //qDebug() << "Number of detected cluster apices: " << cluster_count;
625 
626  if(!cluster_count)
627  return trace;
628 
629  // At this point we want to work on the apices and reconstruct a full
630  // trace.
631 
632  Trace reconstructed_trace = reconstructTrace(trace);
633 
634  trace = std::move(reconstructed_trace);
635 
636  return trace;
637 }
638 
639 
640 double
642 {
643  return m_threshold;
644 }
645 
646 
647 //! Return a string with the textual representation of the configuration data.
648 QString
650 {
651  return QString("%1|%2").arg(name()).arg(QString::number(m_threshold, 'f', 2));
652 }
653 
654 
655 QString
657 {
658  return "FilterLowIntensitySignalRemoval";
659 }
660 
661 } // namespace pappso
excetion to use when an item type is not recognized
Redefines the floor intensity of the Trace.
Trace::const_iterator backwardFindApex(const Trace &trace, Trace::const_iterator iter, double distance_threshold)
FilterLowIntensitySignalRemoval(double mean, double std_dev, double threshold)
Trace & filter(Trace &data_points) const override
Trace::const_iterator forwardFindApex(const Trace &trace, Trace::const_iterator iter, double distance_threshold)
FilterLowIntensitySignalRemoval & operator=(const FilterLowIntensitySignalRemoval &other)
void buildFilterFromString(const QString &strBuildParams) override
build this filter using a string
QString toString() const override
Return a string with the textual representation of the configuration data.
Trace toTrace() const
Definition: maptrace.cpp:219
A simple container of DataPoint instances.
Definition: trace.h:148
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39