libpappsomspp
Library for mass spectrometry
msrunxicextractordisk.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/xicextractor/private/msrunxicextractordisk.cpp
3  * \date 12/05/2018
4  * \author Olivier Langella
5  * \brief proteowizard based XIC extractor featuring disk cache
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 #include "msrunxicextractordisk.h"
32 #include <QDebug>
33 #include "../../pappsoexception.h"
34 #include "../../massspectrum/massspectrum.h"
35 
36 namespace pappso
37 {
38 
40  const QDir &temporary_dir)
41  : pappso::MsRunXicExtractor(msrun_reader)
42 {
43  mpa_temporaryDirectory = nullptr;
44  m_temporaryDirectory = temporary_dir.absolutePath();
45 }
46 
48  : pappso::MsRunXicExtractor(other)
49 {
50 
52  mpa_temporaryDirectory = new QTemporaryDir(
53  QString("%1/msrun_%2_")
55  .arg(msp_msrun_reader.get()->getMsRunId().get()->getXmlId()));
56 }
57 
59 {
60  if(mpa_temporaryDirectory != nullptr)
61  {
63  }
64 }
65 
66 void
68 {
69  qDebug();
70  try
71  {
73  // msp_msrun_reader = nullptr;
74  }
75  catch(pappso::PappsoException &errora)
76  {
77  qDebug();
79  QObject::tr("Error reading file (%1) : %2")
80  .arg(msp_msrun_reader.get()->getMsRunId().get()->toString())
81  .arg(errora.qwhat()));
82  }
83  catch(std::exception &error)
84  {
85  qDebug();
87  QObject::tr("Error reading file (%1) using : %2")
88  .arg(msp_msrun_reader.get()->getMsRunId().get()->toString())
89  .arg(error.what()));
90  }
91 }
92 
93 
94 void
96  UiMonitorInterface &monitor,
97  std::vector<XicCoordSPtr>::iterator it_xic_coord_list_begin,
98  std::vector<XicCoordSPtr>::iterator it_xic_coord_list_end)
99 {
100  // sort xic by mz:
101  std::sort(it_xic_coord_list_begin,
102  it_xic_coord_list_end,
103  [](XicCoordSPtr &a, XicCoordSPtr &b) {
104  return a.get()->mzRange.getMz() < b.get()->mzRange.getMz();
105  });
106 
107  for(auto it = it_xic_coord_list_begin; it != it_xic_coord_list_end; it++)
108  {
109  extractOneXicCoord(*(it->get()));
110  monitor.count();
111  }
112 }
113 
114 void
116 {
117  std::shared_ptr<Xic> msrunxic_sp = xic_coord.xicSptr;
118 
119  double rt_begin = xic_coord.rtTarget - m_retentionTimeAroundTarget;
120  double rt_end = xic_coord.rtTarget + m_retentionTimeAroundTarget;
121 
122 
123  std::vector<MsRunSliceSPtr> slice_list;
124  slice_list = acquireSlices(xic_coord.mzRange);
125 
126  if(slice_list.size() == 0)
127  {
129  QObject::tr("Error getMsRunXicSp slice_list.size() == 0"));
130  }
131 
132  for(std::size_t i = 0; i < m_retentionTimeList.size(); i++)
133  {
134 
135  DataPoint xic_element;
136  xic_element.x = m_retentionTimeList[i];
137  xic_element.y = 0;
138  if((xic_element.x < rt_begin) || (xic_element.x > rt_end))
139  continue;
140 
141  for(auto &&msrun_slice : slice_list)
142  {
143  const MassSpectrum &spectrum = msrun_slice.get()->getSpectrum(i);
144  for(auto &&peak : spectrum)
145  {
146  if(xic_coord.mzRange.contains(peak.x))
147  {
149  {
150  xic_element.y += peak.y;
151  }
152  else
153  {
154  if(xic_element.y < peak.y)
155  {
156  xic_element.y = peak.y;
157  }
158  }
159  }
160  }
161  }
162  msrunxic_sp.get()->push_back(xic_element);
163  }
164 }
165 
166 void
168 {
169  qDebug();
170  m_minMz = 5000;
171  m_maxMz = 0;
172 
173  unsigned int slice_number;
174  std::map<unsigned int, MassSpectrum> spectrum_map;
175 
176  /*
177  const pwiz::msdata::SpectrumList *p_spectrum_list =
178  p_msdatafile->run.spectrumListPtr.get();
179 
180  std::size_t spectrum_list_size = p_spectrum_list->size();
181  pwiz::msdata::SpectrumPtr pwiz_spectrum;
182  */
183 
184  m_rtSize = m_msrun_points.size();
185 
186 
187  MassSpectrumCstSPtr spectrum;
188  for(auto &&msrun_point : m_msrun_points)
189  {
190 
191  spectrum_map.clear();
192 
193  m_retentionTimeList.push_back(msrun_point.rt);
194 
195  spectrum =
196  msp_msrun_reader.get()->massSpectrumCstSPtr(msrun_point.spectrum_index);
197 
198  const MassSpectrum *p_spectrum = spectrum.get();
199  if(p_spectrum->size() > 0)
200  {
201  if(p_spectrum->begin()->x < m_minMz)
202  {
203  m_minMz = p_spectrum->begin()->x;
204  }
205  // iterate through the m/z-intensity pairs
206 
207  if(p_spectrum->back().x > m_maxMz)
208  {
209  m_maxMz = p_spectrum->back().x;
210  }
211 
212  for(auto &peak : *p_spectrum)
213  {
214 
215  slice_number = peak.x;
216 
217  std::pair<std::map<unsigned int, MassSpectrum>::iterator, bool>
218  ret = spectrum_map.insert(std::pair<unsigned int, MassSpectrum>(
219  slice_number, MassSpectrum()));
220 
221  ret.first->second.push_back(peak);
222  // auto ret = spectrum_map.insert(std::pair<unsigned int,
223  // MassSpectrum>(slice_number,MassSpectrum()));
224  // ret.first->second.push_back(peak);
225  }
226 
227  // slices are ready for this retention time
228  storeSlices(spectrum_map, m_retentionTimeList.size() - 1);
229  }
230  }
231 
232  endPwizRead();
233  qDebug();
234 }
235 
236 
237 void
239  std::map<unsigned int, MassSpectrum> &spectrum_map, std::size_t ipos)
240 {
241  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
242 
243  for(auto &&spectrum_pair : spectrum_map)
244  {
245  appendSliceOnDisk(spectrum_pair.first, spectrum_pair.second, ipos);
246  }
247 
248  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
249 }
250 
251 void
253  MassSpectrum &spectrum,
254  std::size_t ipos)
255 {
256  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
257  QFile slice_file(
258  QString("%1/%2").arg(mpa_temporaryDirectory->path()).arg(slice_number));
259  bool new_file = false;
260  if(!slice_file.exists())
261  {
262  new_file = true;
263  }
264  if(!slice_file.open(QIODevice::WriteOnly | QIODevice::Append))
265  {
267  QObject::tr("unable to open file %1").arg(slice_file.fileName()));
268  }
269  QDataStream stream(&slice_file);
270 
271  if(new_file)
272  {
273  stream << (quint32)slice_number;
274  stream << (quint32)m_rtSize;
275  }
276 
277  stream << (quint32)ipos;
278  stream << spectrum;
279 
280  slice_file.flush();
281  slice_file.close();
282  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
283 }
284 
286 MsRunXicExtractorDisk::unserializeSlice(unsigned int slice_number)
287 {
288  qDebug();
289  try
290  {
291  std::shared_ptr<MsRunSlice> msrun_slice_sp =
292  std::make_shared<MsRunSlice>(MsRunSlice());
293 
294  QFile slice_file(
295  QString("%1/%2").arg(mpa_temporaryDirectory->path()).arg(slice_number));
296  if(!slice_file.exists())
297  {
298  msrun_slice_sp.get()->setSize(m_rtSize);
299  msrun_slice_sp.get()->setSliceNumber(slice_number);
300  return msrun_slice_sp;
301  }
302  if(!slice_file.open(QIODevice::ReadOnly))
303  {
305  QObject::tr("unable to open file %1 in readonly")
306  .arg(slice_file.fileName()));
307  }
308  QDataStream stream(&slice_file);
309 
310  stream >> *(msrun_slice_sp.get());
311 
312  slice_file.close();
313 
314  return msrun_slice_sp;
315  }
316  catch(pappso::PappsoException &error)
317  {
319  QObject::tr("error unserializing slice %1:\n%2")
320  .arg(slice_number)
321  .arg(error.qwhat()));
322  }
323  qDebug();
324 }
325 
326 std::vector<MsRunSliceSPtr>
328 {
329  QMutexLocker lock(&m_mutex);
330  std::vector<MsRunSliceSPtr> slice_list;
331  for(unsigned int i = mz_range.lower(); i <= mz_range.upper(); i++)
332  {
333  auto it = std::find_if(m_msRunSliceListCache.begin(),
334  m_msRunSliceListCache.end(),
335  [i](const MsRunSliceSPtr &slice_sp) {
336  return slice_sp.get()->getSliceNumber() == i;
337  });
338  if(it != m_msRunSliceListCache.end())
339  {
340  slice_list.push_back(*it);
341  m_msRunSliceListCache.push_back(*it);
342  }
343  else
344  {
345  MsRunSliceSPtr slice_sp = unserializeSlice(i);
346  slice_list.push_back(slice_sp);
347  m_msRunSliceListCache.push_back(slice_sp);
348  }
349  }
350 
351  if(m_msRunSliceListCache.size() > 20)
352  {
353  m_msRunSliceListCache.pop_front();
354  }
355  return slice_list;
356 }
357 
358 
359 void
361 {
362  msp_msrun_reader.get()->releaseDevice();
363 }
364 } // namespace pappso
Class to represent a mass spectrum.
Definition: massspectrum.h:71
std::vector< MsRunSliceSPtr > acquireSlices(const MzRange &mz_range)
retrieve all the slices corresponding to a given mz_range
std::vector< pappso::pappso_double > m_retentionTimeList
MsRunXicExtractorDisk(MsRunReaderSPtr &msrun_reader)
std::deque< MsRunSliceSPtr > m_msRunSliceListCache
virtual void storeSlices(std::map< unsigned int, MassSpectrum > &slice_vector, std::size_t ipos)
store MassSpectrum slices (by daltons) for a given retention time
void extractOneXicCoord(XicCoord &xic_coord)
void appendSliceOnDisk(unsigned int slice_number, MassSpectrum &spectrum, std::size_t ipos)
append a slice on disk (in a file)
virtual void protectedExtractXicCoordSPtrList(UiMonitorInterface &monitor, std::vector< XicCoordSPtr >::iterator it_xic_coord_list_begin, std::vector< XicCoordSPtr >::iterator it_xic_coord_list_end) override
MsRunSliceSPtr unserializeSlice(unsigned int slice_number)
get one slice from disk by her slice number (dalton)
std::vector< MsRunXicExtractorPoints > m_msrun_points
pappso_double lower() const
Definition: mzrange.h:71
pappso_double upper() const
Definition: mzrange.h:77
bool contains(pappso_double) const
Definition: mzrange.cpp:120
virtual const QString & qwhat() const
virtual void count()=0
count steps report when a step is computed in an algorithm
MsRunReader based XIC extractor featuring disk cache.
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< MsRunReader > MsRunReaderSPtr
Definition: msrunreader.h:185
std::shared_ptr< const MsRunSlice > MsRunSliceSPtr
Definition: msrunslice.h:39
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
@ sum
sum of intensities
std::shared_ptr< XicCoord > XicCoordSPtr
Definition: xiccoord.h:41
pappso_double x
Definition: datapoint.h:23
pappso_double y
Definition: datapoint.h:24
coordinates of the XIC to extract and the resulting XIC after extraction
Definition: xiccoord.h:54
XicSPtr xicSptr
extracted xic
Definition: xiccoord.h:113
double rtTarget
the targeted retention time to extract around intended in seconds, and related to one msrun....
Definition: xiccoord.h:109
MzRange mzRange
the mass to extract
Definition: xiccoord.h:103