libpappsomspp
Library for mass spectrometry
mzxmloutput.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/msrun/output/mzxmloutput.cpp
3  * \date 23/11/2019
4  * \author Olivier Langella
5  * \brief write msrun peaks into mzxml output stream
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 #include "mzxmloutput.h"
32 #include <QDebug>
33 #include <QStringList>
34 #include <algorithm>
35 #include <cstdio>
36 #include "../../config.h"
37 
38 using namespace pappso;
39 
40 
41 template <class T>
42 T
44 {
45  char *const p = reinterpret_cast<char *>(&in);
46  for(size_t i = 0; i < sizeof(T) / 2; ++i)
47  std::swap(p[i], p[sizeof(T) - i - 1]);
48  return in;
49 }
50 
51 
53 {
54  mp_output = p_mzxml_output;
55 }
57 {
58 }
59 void
61  const QualifiedMassSpectrum &spectrum)
62 {
63  qDebug();
64  mp_output->m_monitor.count();
65  mp_output->writeQualifiedMassSpectrum(spectrum);
66  qDebug();
67 }
68 bool
70 {
71  return true;
72 }
73 
74 
76  QIODevice *p_output_device)
77  : m_monitor(monitor)
78 {
79 
80  mpa_outputStream = new QXmlStreamWriter(p_output_device);
81  mpa_outputStream->setAutoFormatting(true);
82 
83  mpa_outputStream->writeStartDocument("1.0");
84 }
85 
87 {
88  close();
89  delete mpa_outputStream;
90 }
91 
92 void
93 MzxmlOutput::setReadAhead(bool isReadAhead)
94 {
95  m_isReadAhead = isReadAhead;
96 }
97 void
99 {
100  qDebug();
101  m_monitor.setTotalSteps(p_msrunreader->spectrumListSize());
102  writeHeader(p_msrunreader);
103 
104  Translater translater(this);
105 
106  translater.setReadAhead(m_isReadAhead);
107 
108  translater.setNeedMsLevelPeakList(1, !m_ms1IsMasked);
109  // translater.setNeedMsLevelPeakList(1, false);
110  // translater.setNeedMsLevelPeakList(2, false);
111  p_msrunreader->readSpectrumCollection(translater);
112 
114  qDebug();
115 }
116 
117 void
119 {
120 
121  mpa_outputStream->writeNamespace("http://www.w3.org/2001/XMLSchema-instance",
122  "xsi");
123  // xmlns="http://sashimi.sourceforge.net/schema_revision/mzXML_2.0"
124  // xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/mzXML_2.0
125  // http://sashimi.sourceforge.net/schema_revision/mzXML_2.0/mzXML_idx_2.0.xsd"
126  /*
127 114 writer.setPrefix("xsi", xmlnsxsi);
128 115 writer.setDefaultNamespace(namespaceURI);
129 mpa_outputStream->writeStartElement("mzXML");
130 117 writer.writeNamespace("xsi", xmlnsxsi);
131 118 writer.writeDefaultNamespace(namespaceURI);
132 119
133 120 writer.writeAttribute(xmlnsxsi, "schemaLocation",
134 xsischemaLocation); 121 */
135  mpa_outputStream->writeStartElement("mzXML");
136  mpa_outputStream->writeAttribute(
137  "xmlns", "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2");
138  mpa_outputStream->writeAttribute(
139  "xsi:schemaLocation",
140  "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2 "
141  "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2/"
142  "mzXML_idx_3.2.xsd");
143 
144  mpa_outputStream->writeStartElement("msRun");
145  mpa_outputStream->writeAttribute(
146  "scanCount", QString("%1").arg(p_msrunreader->spectrumListSize()));
147  //<msRun scanCount="16576" startTime="PT0.292553S" endTime="PT3000.34S">
148  // writer.writeAttribute("scanCount",
149  // ms_run.getSpectrumCount(this.controller).toString());
150 
151  /*
152  * # < parentFile fileName = #
153  * "file://SEQUEST1/raw/vidal/20060411_VIDAL_JEAN_1_PEPCR1_42140.RAW" #
154  * fileType = "RAWData" fileSha1 = #
155  * "23c1620d4ad3f4f0103b0141b7caec1e8b7eebf5" / >
156  */
157  mpa_outputStream->writeStartElement("parentFile");
158  mpa_outputStream->writeAttribute("fileName",
159  p_msrunreader->getMsRunId()->getFileName());
160  mpa_outputStream->writeAttribute("fileType", "RAWData");
161  mpa_outputStream->writeEndElement();
162  /*
163 144
164 145 MsInstrumentList instrument_list =
165 ms_run.getMsInstruments(controller); 146 for (MsInstrument
166 instrument : instrument_list) { 147 this.write(instrument); 148 }
167 */
168 
169  mpa_outputStream->writeStartElement("msInstrument");
170  mpa_outputStream->writeAttribute("msInstrumentID", "1");
171  //<msManufacturer category="msManufacturer" value="Thermo Scientific"/>
172  mpa_outputStream->writeStartElement("msManufacturer");
173  mpa_outputStream->writeAttribute("category", "msManufacturer");
174  mpa_outputStream->writeAttribute("value", "unknown");
175  mpa_outputStream->writeEndElement();
176  //<msModel category="msModel" value="Q Exactive"/>
177  // <msIonisation category="msIonisation" value="nanoelectrospray"/>
178  // <msMassAnalyzer category="msMassAnalyzer" value="quadrupole"/>
179  // <msDetector category="msDetector" value="inductive detector"/>
180  // <software type="acquisition" name="Xcalibur"
181  // version="2.1-152001/2.1.0.1520"/>
182  mpa_outputStream->writeEndElement();
183  /*
184 149
185 150 // #< dataProcessing centroided ="1" >
186 151 // my $ref_data_processings =
187 $ms_run_description->dataProcessing(); 152 MsDataProcessingList
188 dataProcList = ms_run.getMsDataProcessings(controller); 153 for
189 (MsDataProcessing msDataProc : dataProcList) { 154 this.write(msDataProc); 155 }
190 */
191  mpa_outputStream->writeStartElement("dataProcessing");
192  //<dataProcessing centroided="1">
193  mpa_outputStream->writeAttribute("centroided", "1");
194  // <software type="conversion" name="ProteoWizard" version="3.0.3706"/>
195  mpa_outputStream->writeStartElement("software");
196  mpa_outputStream->writeAttribute("type", "conversion");
197  mpa_outputStream->writeAttribute("name", PAPPSOMSPP_NAME);
198  mpa_outputStream->writeAttribute("version", PAPPSOMSPP_VERSION);
199  mpa_outputStream->writeEndElement();
200  //<processingOperation name="Conversion to mzML"/>
201  mpa_outputStream->writeStartElement("processingOperation");
202  mpa_outputStream->writeAttribute("name", "Conversion to mzXML");
203  //<software type="processing" name="ProteoWizard" version="3.0.3706"/>
204  mpa_outputStream->writeStartElement("software");
205  mpa_outputStream->writeAttribute("type", "processing");
206  mpa_outputStream->writeAttribute("name", PAPPSOMSPP_NAME);
207  mpa_outputStream->writeAttribute("version", PAPPSOMSPP_VERSION);
208  mpa_outputStream->writeEndElement();
209  //<comment>Thermo/Xcalibur peak picking</comment>
210  mpa_outputStream->writeStartElement("comment");
211  mpa_outputStream->writeCharacters("pappso::MzxmlOutput");
212  mpa_outputStream->writeEndElement();
213  //</dataProcessing>
214  mpa_outputStream->writeEndElement();
215  mpa_outputStream->writeEndElement();
216  // Peaks
217 }
218 
219 
220 void
222 {
223  mpa_outputStream->writeEndDocument();
224 }
225 
226 
227 std::size_t
228 MzxmlOutput::getScanNumberFromNativeId(const QString &native_id) const
229 {
230  QStringList native_id_list = native_id.split("=");
231  if(native_id_list.size() < 2)
232  {
233  }
234  else
235  {
236  return native_id_list.back().toULong();
237  }
238  return std::numeric_limits<std::size_t>::max();
239 }
240 
241 std::size_t
243 {
244  std::size_t scan_number =
246  if(scan_number == std::numeric_limits<std::size_t>::max())
247  {
248  scan_number = spectrum.getMassSpectrumId().getSpectrumIndex() + 1;
249  }
250  return scan_number;
251 }
252 
253 std::size_t
255 {
256 
257  std::size_t scan_number =
259  if(scan_number == std::numeric_limits<std::size_t>::max())
260  {
261  scan_number = spectrum.getPrecursorSpectrumIndex() + 1;
262  }
263  return scan_number;
264 }
265 
266 void
268  const pappso::QualifiedMassSpectrum &spectrum)
269 {
270  qDebug();
271  mpa_outputStream->writeStartElement("scan");
272  /*
273  <scan num="1"
274  scanType="Full"
275  centroided="1"
276  msLevel="1"
277  peaksCount="1552"
278  polarity="+"
279  retentionTime="PT0.292553S"
280  lowMz="400.153411865234"
281  highMz="1013.123352050781"
282  basePeakMz="445.12003"
283  basePeakIntensity="2.0422125e06"
284  totIonCurrent="1.737798e07">*/
285  mpa_outputStream->writeAttribute("num",
286  QString("%1").arg(getScanNumber(spectrum)));
287  mpa_outputStream->writeAttribute("centroided", QString("1"));
288  mpa_outputStream->writeAttribute("msLevel",
289  QString("%1").arg(spectrum.getMsLevel()));
290  if(spectrum.getMassSpectrumCstSPtr().get() == nullptr)
291  {
292  mpa_outputStream->writeAttribute("peaksCount", "0");
293  }
294  else
295  {
296  mpa_outputStream->writeAttribute("peaksCount",
297  QString("%1").arg(spectrum.size()));
298 
299  if(spectrum.size() > 0)
300  {
301  mpa_outputStream->writeAttribute(
302  "lowMz",
303  QString::number(
304  spectrum.getMassSpectrumCstSPtr().get()->front().x, 'f', 12));
305 
306  mpa_outputStream->writeAttribute(
307  "highMz",
308  QString::number(
309  spectrum.getMassSpectrumCstSPtr().get()->back().x, 'f', 12));
310  // mpa_outputStream->writeAttribute("highMz",
311  // QString::number(spectrum.getMassSpectrumCstSPtr().get()->back().x,
312  // 'f', 10)); basePeakMz="245.1271988"
313  // basePeakIntensity="5810.7739"
314  // totIonCurrent="57803.815999999999">
315  }
316  }
317  mpa_outputStream->writeAttribute("polarity", "+");
318  mpa_outputStream->writeAttribute(
319  "retentionTime",
320  QString("PT%1S").arg(QString::number(spectrum.getRtInSeconds(), 'f', 2)));
321 
322  if(spectrum.getMsLevel() > 1)
323  {
324 
325  //<precursorMz precursorScanNum="16574"
326  // precursorIntensity="58403.04296875" precursorCharge="2"
327  ////activationMethod="HCD">994.690619901808</precursorMz>
328  mpa_outputStream->writeStartElement("precursorMz");
329  mpa_outputStream->writeAttribute(
330  "precursorScanNum",
331  QString("%1").arg(getPrecursorScanNumber(spectrum)));
332  mpa_outputStream->writeAttribute(
333  "precursorIntensity",
334  QString::number(spectrum.getPrecursorIntensity(), 'f', 4));
335  mpa_outputStream->writeAttribute(
336  "precursorCharge", QString("%1").arg(spectrum.getPrecursorCharge()));
337  mpa_outputStream->writeCharacters(
338  QString::number(spectrum.getPrecursorMz(), 'f', 12));
339  mpa_outputStream->writeEndElement();
340  }
341 
342  /*<peaks compressionType="none"
343  compressedLen="0"
344  precision="64"
345  byteOrder="network"
346  contentType="m/z-int"></peaks>*/
347 
348  mpa_outputStream->writeStartElement("peaks");
349  mpa_outputStream->writeAttribute("compressionType", "none");
350  mpa_outputStream->writeAttribute("compressedLen", "0");
351  mpa_outputStream->writeAttribute("precision", "64");
352  mpa_outputStream->writeAttribute("byteOrder", "network");
353  mpa_outputStream->writeAttribute("contentType", "m/z-int");
354 
355  if(spectrum.getMassSpectrumCstSPtr().get() != nullptr)
356  {
357  QByteArray byte_array;
358  if(QSysInfo::ByteOrder == QSysInfo::LittleEndian)
359  {
360  for(const DataPoint &peak :
361  *(spectrum.getMassSpectrumCstSPtr().get()))
362  {
363  double swap = change_endian(peak.x);
364  byte_array.append((char *)&swap, 8);
365  swap = change_endian(peak.y);
366  byte_array.append((char *)&swap, 8);
367  }
368  }
369  else
370  {
371  for(const DataPoint &peak :
372  *(spectrum.getMassSpectrumCstSPtr().get()))
373  {
374  byte_array.append((char *)&peak.x, 8);
375  byte_array.append((char *)&peak.y, 8);
376  }
377  }
378  mpa_outputStream->writeCharacters(byte_array.toBase64());
379  }
380  mpa_outputStream->writeEndElement();
381 
382  // scan
383  mpa_outputStream->writeEndElement();
384  qDebug();
385 }
386 
387 void
388 MzxmlOutput::maskMs1(bool mask_ms1)
389 {
390  m_ms1IsMasked = mask_ms1;
391 }
std::size_t getSpectrumIndex() const
const QString & getNativeId() const
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition: msrunreader.h:192
virtual std::size_t spectrumListSize() const =0
get the totat number of spectrum conained in the MSrun data file
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler)=0
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
const MsRunIdCstSPtr & getMsRunId() const
Translater(MzxmlOutput *p_mzxml_output)
Definition: mzxmloutput.cpp:52
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
Definition: mzxmloutput.cpp:69
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
Definition: mzxmloutput.cpp:60
void setReadAhead(bool read_ahead)
Definition: mzxmloutput.cpp:93
std::size_t getScanNumberFromNativeId(const QString &native_id) const
MzxmlOutput(UiMonitorInterface &monitor, QIODevice *p_output_device)
Definition: mzxmloutput.cpp:75
void write(MsRunReader *p_msrunreader)
Definition: mzxmloutput.cpp:98
UiMonitorInterface & m_monitor
Definition: mzxmloutput.h:90
std::size_t getScanNumber(const QualifiedMassSpectrum &spectrum) const
QXmlStreamWriter * mpa_outputStream
Definition: mzxmloutput.h:91
void writeQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)
void maskMs1(bool mask_ms1)
std::size_t getPrecursorScanNumber(const QualifiedMassSpectrum &spectrum) const
void writeHeader(MsRunReader *p_msrunreader)
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
uint getPrecursorCharge(bool *ok=nullptr) const
Get the precursor charge.
const QString & getPrecursorNativeId() const
pappso_double getPrecursorIntensity(bool *ok=nullptr) const
Get the intensity of the precursor ion.
const MassSpectrumId & getMassSpectrumId() const
Get the MassSpectrumId.
pappso_double getPrecursorMz(bool *ok=nullptr) const
Get the precursor m/z ratio.
std::size_t getPrecursorSpectrumIndex() const
Get the scan number of the precursor ion.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
virtual void setReadAhead(bool is_read_ahead) final
use threads to read a spectrum by batch of batch_size
Definition: msrunreader.cpp:58
virtual void setNeedMsLevelPeakList(unsigned int ms_level, bool want_peak_list) final
tells if we need the peak list given
Definition: msrunreader.cpp:88
virtual void setTotalSteps(std::size_t total_number_of_steps)
use it if the number of steps is known in an algorithm the total number of steps is usefull to report...
#define PAPPSOMSPP_VERSION
Definition: config.h:4
#define PAPPSOMSPP_NAME
Definition: config.h:3
T change_endian(T in)
Definition: mzxmloutput.cpp:43
write msrun peaks into mzxml output stream
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39