libpappsomspp
Library for mass spectrometry
aamodification.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/amino_acid/aamodification.h
3  * \date 7/3/2015
4  * \author Olivier Langella
5  * \brief amino acid modification model
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 #include <QRegularExpression>
32 #include <QDebug>
33 #include <cmath>
34 
35 #include "aamodification.h"
36 #include "aa.h"
37 #include "../pappsoexception.h"
38 #include "../mzrange.h"
39 #include "../peptide/peptide.h"
40 #include "../obo/filterobopsimodsink.h"
41 #include "../obo/filterobopsimodtermaccession.h"
42 #include "../exception/exceptionnotfound.h"
43 
44 /*
45 
46 inline void initMyResource() {
47  Q_INIT_RESOURCE(resources);
48 }
49 */
50 
51 namespace pappso
52 {
53 
55 
56 AaModification::AaModification(const QString &accession, pappso_double mass)
57  : m_accession(accession), m_mass(mass)
58 {
64 
65  m_mapIsotope = {{Isotope::C13, 0},
66  {Isotope::H2, 0},
67  {Isotope::N15, 0},
68  {Isotope::O17, 0},
69  {Isotope::O18, 0},
70  {Isotope::S33, 0},
71  {Isotope::S34, 0},
72  {Isotope::S36, 0}};
73 }
74 
75 
76 AaModification::AaModification(AaModification &&toCopy) // move constructor
77  : m_accession(toCopy.m_accession),
78  m_name(toCopy.m_name),
79  m_mass(toCopy.m_mass),
80  m_atomCount(std::move(toCopy.m_atomCount)),
81  m_mapIsotope(toCopy.m_mapIsotope)
82 {
83  m_origin = toCopy.m_origin;
84 }
85 
87 {
88 }
89 
90 const QString &
92 {
93 
94  qDebug();
95  return m_accession;
96 }
97 const QString &
99 {
100  return m_name;
101 }
104  MapAccessionModifications ret;
105 
106  return ret;
107  }();
108 
111 {
112  AaModification *new_mod;
113  // qDebug() << " AaModification::createInstance begin";
114  new_mod = new AaModification(term.m_accession, term.m_diffMono);
115  // xref: DiffFormula: "C 0 H 0 N 0 O 1 S 0"
116  new_mod->setDiffFormula(term.m_diffFormula);
117  new_mod->setXrefOrigin(term.m_origin);
118  new_mod->m_name = term.m_name;
119  return new_mod;
120 }
121 
123 AaModification::createInstance(const QString &accession)
124 {
125  if(accession == "internal:Nter_hydrolytic_cleavage_H")
126  {
127  OboPsiModTerm term;
128  term.m_accession = accession;
129  term.m_diffFormula = "H 1";
130  term.m_diffMono = MPROTIUM;
131  term.m_name = "Nter hydrolytic cleavage H+";
132  return (AaModification::createInstance(term));
133  }
134  if(accession == "internal:Cter_hydrolytic_cleavage_HO")
135  {
136  OboPsiModTerm term;
137  term.m_accession = accession;
138  term.m_diffFormula = "H 1 O 1";
139  term.m_diffMono = MPROTIUM + MASSOXYGEN;
140  term.m_name = "Cter hydrolytic cleavage HO";
141  return (AaModification::createInstance(term));
142  }
143  if(accession.startsWith("MUTATION:"))
144  {
145  QRegularExpression regexp_mutation("^MUTATION:([A-Z])=>([A-Z])$");
146  QRegularExpressionMatch match = regexp_mutation.match(accession);
147  if(match.hasMatch())
148  {
149  qDebug() << match.capturedTexts()[1].at(0) << " "
150  << match.capturedTexts()[2].at(0);
151 
152  Aa aa_from(match.capturedTexts()[1].toStdString().c_str()[0]);
153  Aa aa_to(match.capturedTexts()[2].toStdString().c_str()[0]);
154  AaModificationP instance_mutation =
155  createInstanceMutation(aa_from, aa_to);
156  return instance_mutation;
157  // m_psiModLabel<<"|";
158  }
159  }
160  // initMyResource();
161  FilterOboPsiModSink term_list;
162  FilterOboPsiModTermAccession filterm_accession(term_list, accession);
163 
164  OboPsiMod psimod(filterm_accession);
165 
166  try
167  {
168  return (AaModification::createInstance(term_list.getOne()));
169  }
170  catch(ExceptionNotFound &e)
171  {
172  throw ExceptionNotFound(QObject::tr("modification not found : [%1]\n%2")
173  .arg(accession)
174  .arg(e.qwhat()));
175  }
176 }
177 
178 void
179 AaModification::setXrefOrigin(const QString &origin)
180 {
181  // xref: Origin: "N"
182  // xref: Origin: "X"
183  m_origin = origin;
184 }
185 void
186 AaModification::setDiffFormula(const QString &diff_formula)
187 {
188  QRegularExpression rx("(^|\\s)([C,H,O,N,H,S])\\s([-]{0,1}\\d+)");
189  QRegularExpressionMatchIterator i = rx.globalMatch(diff_formula);
190 
191  while(i.hasNext())
192  {
193  QRegularExpressionMatch match = i.next();
194 
195  qDebug() << match.captured(2) << " " << match.captured(2) << " "
196  << match.captured(3);
197 
198  if(match.captured(2) == "C")
199  {
200  m_atomCount[AtomIsotopeSurvey::C] = match.captured(3).toInt();
201  }
202  else if(match.captured(2) == "H")
203  {
204  m_atomCount[AtomIsotopeSurvey::H] = match.captured(3).toInt();
205  }
206  else if(match.captured(2) == "N")
207  {
208  m_atomCount[AtomIsotopeSurvey::N] = match.captured(3).toInt();
209  }
210  else if(match.captured(2) == "O")
211  {
212  m_atomCount[AtomIsotopeSurvey::O] = match.captured(3).toInt();
213  }
214  else if(match.captured(2) == "S")
215  {
216  m_atomCount[AtomIsotopeSurvey::S] = match.captured(3).toInt();
217  }
218  }
219 
220  // look for isotopes :
221  rx.setPattern("\\(([-]{0,1}\\d+)\\)([C,H,O,N,H,S])\\s([-]{0,1}\\d+)");
222 
223  i = rx.globalMatch(diff_formula);
224 
225  while(i.hasNext())
226  {
227  QRegularExpressionMatch match = i.next();
228 
229  qDebug() << match.captured(1) << " " << match.captured(2) << " "
230  << match.captured(3);
231 
232  int number_of_isotopes = match.captured(3).toInt();
233 
234  if(match.captured(2) == "C")
235  {
236  if(match.captured(1) == "13")
237  {
238  m_mapIsotope.at(Isotope::C13) = number_of_isotopes;
239  }
240  m_atomCount[AtomIsotopeSurvey::C] += number_of_isotopes;
241  }
242  else if(match.captured(2) == "H")
243  {
244  if(match.captured(1) == "2")
245  {
246  m_mapIsotope.at(Isotope::H2) = number_of_isotopes;
247  }
248  m_atomCount[AtomIsotopeSurvey::H] += number_of_isotopes;
249  }
250  else if(match.captured(2) == "N")
251  {
252  if(match.captured(1) == "15")
253  {
254  m_mapIsotope.at(Isotope::N15) = number_of_isotopes;
255  }
256  m_atomCount[AtomIsotopeSurvey::N] += number_of_isotopes;
257  }
258  else if(match.captured(2) == "O")
259  {
260  if(match.captured(1) == "17")
261  {
262  m_mapIsotope.at(Isotope::O17) = number_of_isotopes;
263  }
264  else if(match.captured(1) == "18")
265  {
266  m_mapIsotope.at(Isotope::O18) = number_of_isotopes;
267  }
268  m_atomCount[AtomIsotopeSurvey::O] += number_of_isotopes;
269  }
270  else if(match.captured(2) == "S")
271  {
272  if(match.captured(1) == "33")
273  {
274  m_mapIsotope.at(Isotope::S33) = number_of_isotopes;
275  }
276  else if(match.captured(1) == "34")
277  {
278  m_mapIsotope.at(Isotope::S34) = number_of_isotopes;
279  }
280  else if(match.captured(1) == "36")
281  {
282  m_mapIsotope.at(Isotope::S36) = number_of_isotopes;
283  }
284  m_atomCount[AtomIsotopeSurvey::S] += number_of_isotopes;
285  }
286  }
287 
289 }
290 
291 
292 void
294 {
295  pappso_double theoreticalm_mass = 0;
296  std::map<AtomIsotopeSurvey, int>::const_iterator it_atom =
298  if(it_atom != m_atomCount.end())
299  {
300  theoreticalm_mass += MASSCARBON * (it_atom->second);
301  }
302  it_atom = m_atomCount.find(AtomIsotopeSurvey::H);
303  if(it_atom != m_atomCount.end())
304  {
305  theoreticalm_mass += MPROTIUM * (it_atom->second);
306  }
307 
308  it_atom = m_atomCount.find(AtomIsotopeSurvey::O);
309  if(it_atom != m_atomCount.end())
310  {
311  theoreticalm_mass += MASSOXYGEN * (it_atom->second);
312  }
313 
314  it_atom = m_atomCount.find(AtomIsotopeSurvey::N);
315  if(it_atom != m_atomCount.end())
316  {
317  theoreticalm_mass += MASSNITROGEN * (it_atom->second);
318  }
319  it_atom = m_atomCount.find(AtomIsotopeSurvey::S);
320  if(it_atom != m_atomCount.end())
321  {
322  theoreticalm_mass += MASSSULFUR * (it_atom->second);
323  }
324 
325  qDebug() << theoreticalm_mass;
326 
327  theoreticalm_mass += DIFFC12C13 * m_mapIsotope.at(Isotope::C13);
328  theoreticalm_mass += DIFFH1H2 * m_mapIsotope.at(Isotope::H2);
329  theoreticalm_mass += DIFFN14N15 * m_mapIsotope.at(Isotope::N15);
330  theoreticalm_mass += DIFFO16O17 * m_mapIsotope.at(Isotope::O17);
331  theoreticalm_mass += DIFFO16O18 * m_mapIsotope.at(Isotope::O18);
332  theoreticalm_mass += DIFFS32S33 * m_mapIsotope.at(Isotope::S33);
333  theoreticalm_mass += DIFFS32S34 * m_mapIsotope.at(Isotope::S34);
334  theoreticalm_mass += DIFFS32S36 * m_mapIsotope.at(Isotope::S36);
335 
336 
337  pappso_double diff = std::fabs((pappso_double)m_mass - theoreticalm_mass);
338  if(diff < 0.001)
339  {
340  m_mass = theoreticalm_mass;
341  qDebug() << diff;
342  }
343  else
344  {
345  qDebug()
346  << "ERROR in AaModification::calculateMassFromChemicalComponents theo="
347  << theoreticalm_mass << " m=" << m_mass << " diff=" << diff
348  << " accession=" << m_accession;
349  }
350 }
351 
354 {
355  QString accession = QString("%1").arg(modificationMass);
356  qDebug() << accession;
357  QMutexLocker locker(&m_mutex);
358  if(m_mapAccessionModifications.find(accession) ==
360  {
361  // not found
362  m_mapAccessionModifications.insert(std::pair<QString, AaModification *>(
363  accession, new AaModification(accession, modificationMass)));
364  }
365  else
366  {
367  // found
368  }
369  return m_mapAccessionModifications.at(accession);
370 }
371 
373 AaModification::getInstance(const QString &accession)
374 {
375  try
376  {
377  QMutexLocker locker(&m_mutex);
378  MapAccessionModifications::iterator it =
379  m_mapAccessionModifications.find(accession);
380  if(it == m_mapAccessionModifications.end())
381  {
382 
383  // not found
384  std::pair<MapAccessionModifications::iterator, bool> insert_res =
386  std::pair<QString, AaModificationP>(
387  accession, AaModification::createInstance(accession)));
388  it = insert_res.first;
389  }
390  else
391  {
392  // found
393  }
394  return it->second;
395  }
396  catch(ExceptionNotFound &e)
397  {
398  throw ExceptionNotFound(
399  QObject::tr("ERROR getting instance of : %1 NOT FOUND\n%2")
400  .arg(accession)
401  .arg(e.qwhat()));
402  }
403  catch(PappsoException &e)
404  {
405  throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
406  .arg(accession)
407  .arg(e.qwhat()));
408  }
409  catch(std::exception &e)
410  {
411  throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
412  .arg(accession)
413  .arg(e.what()));
414  }
415 }
416 
419 {
420 
421  QMutexLocker locker(&m_mutex);
422  MapAccessionModifications::iterator it =
424  if(it == m_mapAccessionModifications.end())
425  {
426  // not found
427  std::pair<MapAccessionModifications::iterator, bool> insert_res =
428  m_mapAccessionModifications.insert(std::pair<QString, AaModificationP>(
429  oboterm.m_accession, AaModification::createInstance(oboterm)));
430  it = insert_res.first;
431  }
432  else
433  {
434  // found
435  }
436  return it->second;
437 }
438 
439 
442  pappso_double mass,
443  const PeptideSp &peptide_sp,
444  unsigned int position)
445 {
447  if(MzRange(mass, precision).contains(getInstance("MOD:00719")->getMass()))
448  {
449  if(type == "M")
450  {
451  return getInstance("MOD:00719");
452  }
453  if(type == "K")
454  {
455  return getInstance("MOD:01047");
456  }
457  }
458  // accession== "MOD:00057"
459  if(MzRange(mass, precision).contains(getInstance("MOD:00408")->getMass()))
460  {
461  // id: MOD:00394
462  // name: acetylated residue
463  // potential N-terminus modifications
464  if(position == 0)
465  {
466  return getInstance("MOD:00408");
467  }
468  }
469  if(MzRange(mass, precision).contains(getInstance("MOD:01160")->getMass()))
470  {
471  //-17.02655
472  // loss of ammonia [MOD:01160] -17.026549
473  return getInstance("MOD:01160");
474  }
475 
476  if(MzRange(mass, precision).contains(getInstance("MOD:01060")->getMass()))
477  {
478  //// iodoacetamide [MOD:00397] 57.021464
479  if(type == "C")
480  {
481  return getInstance("MOD:01060");
482  }
483  else
484  {
485  return getInstance("MOD:00397");
486  }
487  }
488  if(MzRange(mass, precision).contains(getInstance("MOD:00704")->getMass()))
489  {
490  // loss of water
491  /*
492  if (position == 0) {
493  if (peptide_sp.get()->getSequence().startsWith("EG")) {
494  return getInstance("MOD:00365");
495  }
496  if (peptide_sp.get()->getSequence().startsWith("ES")) {
497  return getInstance("MOD:00953");
498  }
499  if (type == "E") {
500  return getInstance("MOD:00420");
501  }
502  }
503  */
504  // dehydrated residue [MOD:00704] -18.010565
505  return getInstance("MOD:00704");
506  }
507  if(MzRange(mass, precision).contains(getInstance("MOD:00696")->getMass()))
508  {
509  // phosphorylated residue [MOD:00696] 79.966330
510  return getInstance("MOD:00696");
511  }
512  bool isCter = false;
513  if(peptide_sp.get()->size() == (position + 1))
514  {
515  isCter = true;
516  }
517  if((position == 0) || isCter)
518  {
519  if(MzRange(mass, precision).contains(getInstance("MOD:00429")->getMass()))
520  {
521  // dimethyl
522  return getInstance("MOD:00429");
523  }
524  if(MzRange(mass, precision).contains(getInstance("MOD:00552")->getMass()))
525  {
526  // 4x(2)H labeled dimethyl residue
527  return getInstance("MOD:00552");
528  }
529  if(MzRange(mass, precision).contains(getInstance("MOD:00638")->getMass()))
530  {
531  // 2x(13)C,6x(2)H-dimethylated arginine
532  return getInstance("MOD:00638");
533  }
534  }
535  throw PappsoException(
536  QObject::tr("tandem modification not found : %1 %2 %3 %4")
537  .arg(type)
538  .arg(mass)
539  .arg(peptide_sp.get()->getSequence())
540  .arg(position));
541 }
542 
545 {
546  return m_mass;
547 }
548 
549 
550 int
552 {
553  // qDebug() << "AaModification::getNumberOfAtom(AtomIsotopeSurvey atom) NOT
554  // IMPLEMENTED";
555  return m_atomCount.at(atom);
556 }
557 
558 
559 int
561 {
562  try
563  {
564  return m_mapIsotope.at(isotope);
565  }
566  catch(std::exception &e)
567  {
568  throw PappsoException(
569  QObject::tr("ERROR in AaModification::getNumberOfIsotope %2")
570  .arg(e.what()));
571  }
572 }
573 
574 
575 bool
577 {
578  if(m_accession.startsWith("internal:"))
579  {
580  return true;
581  }
582  return false;
583 }
584 
586 AaModification::createInstanceMutation(const Aa &aa_from, const Aa &aa_to)
587 {
588  QString accession(
589  QString("MUTATION:%1=>%2").arg(aa_from.getLetter()).arg(aa_to.getLetter()));
590  double diffMono = aa_to.getMass() - aa_from.getMass();
591  // not found
592  AaModification *instance_mutation;
593  // qDebug() << " AaModification::createInstance begin";
594  instance_mutation = new AaModification(accession, diffMono);
595  // xref: DiffFormula: "C 0 H 0 N 0 O 1 S 0"
596 
597  for(std::int8_t atomInt = (std::int8_t)AtomIsotopeSurvey::C;
598  atomInt != (std::int8_t)AtomIsotopeSurvey::last;
599  atomInt++)
600  {
601  AtomIsotopeSurvey atom = static_cast<AtomIsotopeSurvey>(atomInt);
602  instance_mutation->m_atomCount[atom] =
603  aa_to.getNumberOfAtom(atom) - aa_from.getNumberOfAtom(atom);
604  }
605  instance_mutation->m_name = QString("mutation from %1 to %2")
606  .arg(aa_from.getLetter())
607  .arg(aa_to.getLetter());
608  return instance_mutation;
609 }
610 
611 
613 AaModification::getInstanceMutation(const QChar &mut_from, const QChar &mut_to)
614 {
615  QString accession(QString("MUTATION:%1=>%2").arg(mut_from).arg(mut_to));
616  try
617  {
618  QMutexLocker locker(&m_mutex);
619  MapAccessionModifications::iterator it =
620  m_mapAccessionModifications.find(accession);
621  if(it == m_mapAccessionModifications.end())
622  {
623  Aa aa_from(mut_from.toLatin1());
624  Aa aa_to(mut_to.toLatin1());
625  AaModificationP instance_mutation =
626  createInstanceMutation(aa_from, aa_to);
627 
628  std::pair<MapAccessionModifications::iterator, bool> insert_res =
630  std::pair<QString, AaModificationP>(accession,
631  instance_mutation));
632  it = insert_res.first;
633  }
634  else
635  {
636  // found
637  }
638  return it->second;
639  }
640  catch(ExceptionNotFound &e)
641  {
642  throw ExceptionNotFound(
643  QObject::tr("ERROR getting instance of : %1 NOT FOUND\n%2")
644  .arg(accession)
645  .arg(e.qwhat()));
646  }
647  catch(PappsoException &e)
648  {
649  throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
650  .arg(accession)
651  .arg(e.qwhat()));
652  }
653  catch(std::exception &e)
654  {
655  throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
656  .arg(accession)
657  .arg(e.what()));
658  }
659 } // namespace pappso
660 
661 } // namespace pappso
amino acid modification model
virtual const char & getLetter() const
Definition: aabase.cpp:434
const QString & getName() const
static AaModificationP getInstanceMutation(const QChar &mut_from, const QChar &mut_to)
get a fake modification coding a mutation from an amino acid to an other
static AaModificationP createInstance(const QString &saccession)
std::map< Isotope, int > m_mapIsotope
const QString & getAccession() const
static AaModificationP getInstanceXtandemMod(const QString &type, pappso_double mass, const PeptideSp &peptide_sp, unsigned int position)
AaModification(AaModification &&toCopy)
std::map< AtomIsotopeSurvey, int > m_atomCount
int getNumberOfAtom(AtomIsotopeSurvey atom) const override final
get the number of atom C, O, N, H in the molecule
pappso_double getMass() const
void setXrefOrigin(const QString &origin)
set list of amino acid on which this modification takes place
std::map< QString, AaModificationP > MapAccessionModifications
static AaModificationP getInstance(const QString &accession)
static AaModificationP getInstanceCustomizedMod(pappso_double modificationMass)
const QString m_accession
void setDiffFormula(const QString &diff_formula)
static AaModificationP createInstanceMutation(const Aa &aa_from, const Aa &aa_to)
void calculateMassFromChemicalComponents()
static MapAccessionModifications m_mapAccessionModifications
int getNumberOfIsotope(Isotope isotope) const override final
get the number of isotopes C13, H2, O17, O18, N15, S33, S34, S36 in the molecule
Definition: aa.h:45
int getNumberOfAtom(AtomIsotopeSurvey atom) const override final
get the number of atom C, O, N, H in the molecule
Definition: aa.cpp:166
pappso_double getMass() const override
Definition: aa.cpp:79
const OboPsiModTerm & getOne()
virtual const QString & qwhat() const
const char * what() const noexcept override
static PrecisionPtr getDaltonInstance(pappso_double value)
get a Dalton precision pointer
Definition: precision.cpp:130
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
const pappso_double DIFFS32S33(32.9714589101 - MASSSULFUR)
const pappso_double DIFFS32S34(33.9678670300 - MASSSULFUR)
const pappso_double DIFFO16O17(16.99913150 - MASSOXYGEN)
const pappso_double MASSCARBON(12)
const pappso_double MASSSULFUR(31.9720711741)
std::shared_ptr< const Peptide > PeptideSp
const pappso_double DIFFS32S36(35.9670812000 - MASSSULFUR)
const AaModification * AaModificationP
AtomIsotopeSurvey
Definition: types.h:77
double pappso_double
A type definition for doubles.
Definition: types.h:49
Isotope
Definition: types.h:92
const pappso_double MPROTIUM(1.007825032241)
const pappso_double MASSNITROGEN(14.0030740048)
const pappso_double MASSOXYGEN(15.99491461956)
const pappso_double DIFFO16O18(17.9991610 - MASSOXYGEN)
const pappso_double DIFFN14N15(15.0001088982 - MASSNITROGEN)
const pappso_double DIFFC12C13(1.0033548378)
const pappso_double DIFFH1H2(2.0141017778 - MPROTIUM)