libpappsomspp
Library for mass spectrometry
timsdata.h
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/vendors/tims/timsdata.h
3  * \date 27/08/2019
4  * \author Olivier Langella
5  * \brief main Tims data handler
6  */
7 
8 /*******************************************************************************
9 œ* Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  ******************************************************************************/
27 
28 #pragma once
29 
30 #include <QDir>
31 #include <QSqlDatabase>
32 #include "timsbindec.h"
33 #include "timsframe.h"
34 #include "../../massspectrum/qualifiedmassspectrum.h"
35 #include "../../processing/filters/filterinterface.h"
36 #include "../../msrun/xiccoord/xiccoordtims.h"
37 #include "../../msrun/msrunreader.h"
38 #include <deque>
39 #include <QMutex>
40 #include <QSqlQuery>
42 
43 namespace pappso
44 {
45 
46 class TimsData;
47 
48 /** \brief shared pointer on a TimsData object
49  */
50 typedef std::shared_ptr<TimsData> TimsDataSp;
51 
52 
54 {
55  std::size_t m_frameId; // frame id
56  std::size_t m_size; // frame size (number of TOF scans in frame)
57  std::size_t m_cumulSize; // cumulative size
58 };
59 
60 /**
61  * @todo write docs
62  */
64 {
65  public:
66  /** @brief build using the tims data directory
67  */
68  TimsData(QDir timsDataDirectory);
69 
70  /**
71  * Copy constructor
72  *
73  * @param other TODO
74  */
75  TimsData(const TimsData &other);
76 
77  /**
78  * Destructor
79  */
80  virtual ~TimsData();
81 
82 
83  /** @brief get a mass spectrum given its spectrum index
84  * @param raw_index a number begining at 0, corresponding to a Tims Scan in
85  * the order they lies in the binary data file
86  */
88  getMassSpectrumCstSPtrByRawIndex(std::size_t raw_index);
89 
90  /** @brief get a mass spectrum given the tims frame database id and scan
91  * number within tims frame
92  */
93  pappso::MassSpectrumCstSPtr getMassSpectrumCstSPtr(std::size_t timsId,
94  std::size_t scanNum);
95 
96  /** @brief Get total number of frames
97  */
98  std::size_t getTotalNumberOfFrames() const;
99 
100  /** @brief get the total number of scans
101  */
102  std::size_t getTotalNumberOfScans() const;
103 
104  /** @brief get the number of precursors analyzes by PASEF
105  */
106  std::size_t getTotalNumberOfPrecursors() const;
107 
108  /** @brief guess possible precursor ids given a charge, m/z, retention time
109  * and k0
110  * @return a list of possible precursor ids
111  */
112  std::vector<std::size_t> getPrecursorsFromMzRtCharge(int charge,
113  double mz_val,
114  double rt_sec,
115  double k0);
116 
117  unsigned int getMsLevelBySpectrumIndex(std::size_t spectrum_index);
118 
119  void getQualifiedMassSpectrumByRawIndex(const MsRunIdCstSPtr &msrun_id,
120  QualifiedMassSpectrum &mass_spectrum,
121  std::size_t spectrum_index,
122  bool want_binary_data);
123 
124  Trace getTicChromatogram() const;
125 
127  {
128  std::size_t parent_frame = 0;
129  std::size_t precursor_id = 0;
130  std::size_t scan_mobility_start = 0;
131  std::size_t scan_mobility_end = 0;
132  std::size_t ms1_index = 0;
133  std::size_t ms2_index = 0;
134  double isolationMz = 0;
135  double isolationWidth = 0;
136  float collisionEnergy = 0;
137  std::vector<std::size_t> tims_frame_list;
139  };
140 
141  void
142  getQualifiedMs2MassSpectrumByPrecursorId(const MsRunIdCstSPtr &msrun_id,
143  QualifiedMassSpectrum &mass_spectrum,
144  const SpectrumDescr &spectrum_descr,
145  bool want_binary_data);
146 
147  void
148  getQualifiedMs1MassSpectrumByPrecursorId(const MsRunIdCstSPtr &msrun_id,
149  QualifiedMassSpectrum &mass_spectrum,
150  const SpectrumDescr &spectrum_descr,
151  bool want_binary_data);
152 
153  /** @brief filter interface to apply just after raw MS2 specturm extraction
154  * the filter can be a list of filters inside a FilterSuite object
155  */
156  void setMs2FilterCstSPtr(pappso::FilterInterfaceCstSPtr &filter);
157 
158  /** @brief filter interface to apply just after raw MS1 specturm extraction
159  * the filter can be a list of filters inside a FilterSuite object
160  */
161  void setMs1FilterCstSPtr(pappso::FilterInterfaceCstSPtr &filter);
162 
163  /** @brief enable or disable simple centroid filter on raw tims data for MS2
164  */
165  void setMs2BuiltinCentroid(bool centroid);
166 
167 
168  /** @brief tells if simple centroid filter on raw tims data for MS2 is enabled
169  * or not
170  */
171  bool getMs2BuiltinCentroid() const;
172 
173 
174  std::vector<std::size_t> getTimsMS1FrameIdRange(double rt_begin,
175  double rt_end) const;
176 
177 
178  /** @brief get a Tims frame with his database ID
179  * but look in the cache first
180  *
181  * thread safe
182  */
183  TimsFrameCstSPtr getTimsFrameCstSPtrCached(std::size_t timsId);
184 
185  /** @brief get a Tims frame with his database ID
186  *
187  * this function is not thread safe
188  */
189  TimsFrameCstSPtr getTimsFrameCstSPtr(std::size_t timsId);
190 
191  XicCoordTims getXicCoordTimsFromPrecursorId(std::size_t precursor_id,
192  PrecisionPtr precision_ptr);
193 
194 
195  /** @brief function to visit an MsRunReader and get each Spectrum in a
196  * spectrum collection handler by Ms Levels
197  *
198  * this function will retrieve processed qualified spectrum depending on each
199  * Bruker precursors
200  */
201  void ms2ReaderSpectrumCollectionByMsLevel(
202  const MsRunIdCstSPtr &msrun_id,
204  unsigned int ms_level);
205 
206 
207  /** @brief function to visit an MsRunReader and get each raw Spectrum in a
208  * spectrum collection handler by Ms Levels
209  *
210  * this function will retrieve every scans as a qualified mass spectrum
211  */
212  void rawReaderSpectrumCollectionByMsLevel(
213  const MsRunIdCstSPtr &msrun_id,
215  unsigned int ms_level);
216 
217  /** @brief get cumulated raw signal for a given precursor
218  * only to use to see the raw signal
219  *
220  * @param precursor_index precursor index to extract signal from
221  * @result a map of integers, x=time of flights, y= intensities
222  */
223  std::map<quint32, quint32>
224  getRawMs2ByPrecursorId(std::size_t precursor_index);
225 
226  /** @brief get raw signal for a spectrum index
227  * only to use to see the raw signal
228  *
229  * @param spectrum_index spcetrum index
230  * @result a map of integers, x=time of flights, y= intensities
231  */
232  std::map<quint32, quint32>
233  getRawMsBySpectrumIndex(std::size_t spectrum_index);
234 
235 
236  /** @brief retention timeline
237  * get retention times along the MSrun in seconds
238  * @return vector of retention times (seconds)
239  */
240  virtual std::vector<double> getRetentionTimeLine() const;
241 
242  /** @brief get an intermediate structure describing a spectrum
243  */
244  SpectrumDescr getSpectrumDescrWithPrecursorId(std::size_t precursor_id);
245 
246  /** @brief set only one is_mono_thread to true
247  *
248  * this avoid to use qtconcurrent
249  */
250  void setMonoThread(bool is_mono_thread);
251 
252  const std::vector<FrameIdDescr> &getFrameIdDescrList() const;
253 
254 
255  private:
256  SpectrumDescr getSpectrumDescrWithScanCoordinate(
257  const std::pair<std::size_t, std::size_t> &scan_coordinate);
258 
259 
260  std::pair<std::size_t, std::size_t>
261  getScanCoordinateFromRawIndex(std::size_t spectrum_index) const;
262 
263  std::size_t getRawIndexFromCoordinate(std::size_t frame_id,
264  std::size_t scan_num) const;
265 
266  QSqlDatabase openDatabaseConnection() const;
267 
268 
269  /** @brief get a Tims frame base (no binary data file access) with his
270  * database ID
271  */
272  TimsFrameBaseCstSPtr getTimsFrameBaseCstSPtr(std::size_t timsId);
273 
274 
275  TimsFrameBaseCstSPtr getTimsFrameBaseCstSPtrCached(std::size_t timsId);
276 
277 
278  std::vector<std::size_t>
279  getMatchPrecursorIdByKo(std::vector<std::vector<double>> ids,
280  double ko_value);
281 
282  /** @todo documentation
283  */
284  std::vector<std::size_t>
285  getClosestPrecursorIdByMz(std::vector<std::vector<double>> ids,
286  double mz_value);
287 
288 
289  /** @brief private function to fill m_frameIdDescrList
290  */
291  void fillFrameIdDescrList();
292 
293 
294  void ms2ReaderGenerateMS1MS2Spectrum(
295  const MsRunIdCstSPtr &msrun_id,
296  std::vector<QualifiedMassSpectrum> &qualified_mass_spectrum_list,
298  const SpectrumDescr &spectrum_descr,
299  unsigned int ms_level);
300 
301  void fillSpectrumDescriptionWithSqlRecord(SpectrumDescr &spectrum_descr,
302  QSqlQuery &qprecursor_list);
303 
305  TimsBinDec *mpa_timsBinDec = nullptr;
306  // QSqlDatabase *mpa_qdb = nullptr;
307  std::size_t m_totalNumberOfScans;
310  std::size_t m_cacheSize = 60;
311  std::deque<TimsFrameCstSPtr> m_timsFrameCache;
312  std::deque<TimsFrameBaseCstSPtr> m_timsFrameBaseCache;
313 
314  pappso::FilterInterfaceCstSPtr mcsp_ms2Filter = nullptr;
315  pappso::FilterInterfaceCstSPtr mcsp_ms1Filter = nullptr;
316 
317  /** @brief enable builtin centroid on raw tims integers by default
318  */
319  bool m_builtinMs2Centroid = true;
320 
321 
322  std::map<int, QSqlRecord> m_mapMzCalibrationRecord;
323  std::map<int, QSqlRecord> m_mapTimsCalibrationRecord;
324  std::vector<TimsFrameRecord> m_mapFramesRecord;
325  std::map<std::size_t, QSqlRecord> m_mapXicCoordRecord;
326 
328 
329 
330  /** @brief store every frame id and corresponding sizes
331  */
332  std::vector<FrameIdDescr> m_frameIdDescrList;
333 
334  /** @brief index to find quickly a frameId in the description list with the
335  * raw index of spectrum modulo 1000
336  * @key thousands of TOF scans
337  * @value corresponding m_frameIdDescrList index
338  */
339  std::map<std::size_t, std::size_t> m_thousandIndexToFrameIdDescrListIndex;
340 
341 
342  /** @brief tells if someone is loading a frame id
343  */
344  std::vector<std::size_t> m_someoneIsLoadingFrameId;
345 
346  bool m_isMonoThread = false;
347 
349 
350  QMutex m_mutex;
351 };
352 } // namespace pappso
Class representing a fully specified mass spectrum.
interface to collect spectrums from the MsRunReader class
Definition: msrunreader.h:56
std::vector< FrameIdDescr > m_frameIdDescrList
store every frame id and corresponding sizes
Definition: timsdata.h:332
std::map< std::size_t, QSqlRecord > m_mapXicCoordRecord
Definition: timsdata.h:325
std::size_t m_totalNumberOfFrames
Definition: timsdata.h:309
std::size_t m_totalNumberOfScans
Definition: timsdata.h:307
std::deque< TimsFrameCstSPtr > m_timsFrameCache
Definition: timsdata.h:311
std::vector< TimsFrameRecord > m_mapFramesRecord
Definition: timsdata.h:324
std::map< int, QSqlRecord > m_mapMzCalibrationRecord
Definition: timsdata.h:322
std::map< int, QSqlRecord > m_mapTimsCalibrationRecord
Definition: timsdata.h:323
QMutex m_mutex
Definition: timsdata.h:350
std::vector< std::size_t > m_someoneIsLoadingFrameId
tells if someone is loading a frame id
Definition: timsdata.h:344
bool m_hasPrecursorTable
Definition: timsdata.h:348
TimsData(const TimsData &other)
QDir m_timsDataDirectory
Definition: timsdata.h:304
MzCalibrationStore * mpa_mzCalibrationStore
Definition: timsdata.h:327
std::deque< TimsFrameBaseCstSPtr > m_timsFrameBaseCache
Definition: timsdata.h:312
std::map< std::size_t, std::size_t > m_thousandIndexToFrameIdDescrListIndex
index to find quickly a frameId in the description list with the raw index of spectrum modulo 1000 @k...
Definition: timsdata.h:339
std::size_t m_totalNumberOfPrecursors
Definition: timsdata.h:308
A simple container of DataPoint instances.
Definition: trace.h:148
#define PMSPP_LIB_DECL
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< const TimsFrameBase > TimsFrameBaseCstSPtr
Definition: timsframebase.h:41
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:44
std::shared_ptr< TimsData > TimsDataSp
shared pointer on a TimsData object
Definition: timsdata.h:46
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
std::shared_ptr< const FilterInterface > FilterInterfaceCstSPtr
std::shared_ptr< const TimsFrame > TimsFrameCstSPtr
Definition: timsframe.h:42
std::size_t m_cumulSize
Definition: timsdata.h:57
std::size_t m_size
Definition: timsdata.h:56
std::size_t m_frameId
Definition: timsdata.h:55
std::vector< std::size_t > tims_frame_list
Definition: timsdata.h:137
PrecursorIonData precursor_ion_data
Definition: timsdata.h:138
coordinates of the XIC to extract and the resulting XIC after extraction
Definition: xiccoordtims.h:51
binary file handler of Bruker's TimsTof raw data
handle a single Bruker's TimsTof frame