libpappsomspp
Library for mass spectrometry
protein.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/protein/protein.cpp
3  * \date 2/7/2015
4  * \author Olivier Langella
5  * \brief object to handle a protein
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 #include "protein.h"
32 #include "../peptide/peptide.h"
33 #include <QStringList>
34 #include <algorithm>
35 #include "../pappsoexception.h"
36 
37 namespace pappso
38 {
39 
40 QRegularExpression Protein::m_removeTranslationStopRegExp("\\*$");
41 
42 /*
43  * http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml
44  */
45 // For those programs that use amino acid query sequences (BLASTP and
46 // TBLASTN), the accepted amino acid codes are:
47 //
48 // A alanine P proline
49 // B aspartate/asparagine Q glutamine
50 // C cystine R arginine
51 // D aspartate S serine
52 // E glutamate T threonine
53 // F phenylalanine U selenocysteine
54 // G glycine V valine
55 // H histidine W tryptophan
56 // I isoleucine Y tyrosine
57 // K lysine Z glutamate/glutamine
58 // L leucine X any
59 // M methionine * translation stop
60 // N asparagine - gap of indeterminate length
61 
63 {
64 }
65 Protein::Protein(const QString &description, const QString &sequence)
66  : m_description(description.simplified()),
67  m_accession(m_description.split(" ").at(0)),
68  m_sequence(sequence)
69 {
70  m_description = m_description.remove(0, m_accession.size()).simplified();
71  // m_sequence.replace(m_removeTranslationStopRegExp, "");
72  m_length = m_sequence.size();
73 }
74 Protein::Protein(const Protein &protein)
75  : m_description(protein.m_description),
76  m_accession(protein.m_accession),
77  m_sequence(protein.m_sequence),
78  m_length(protein.m_length)
79 {
80 }
81 
82 Protein &
84 {
86  return (*this);
87 }
88 
89 Protein &
91 {
92  std::reverse(m_sequence.begin(), m_sequence.end());
93  return (*this);
94 }
95 
98 {
99  return std::make_shared<Protein>(*this);
100 }
101 
102 
103 bool
104 Protein::operator==(const Protein &other) const
105 {
106  return (m_accession == other.m_accession);
107 }
108 
109 void
110 Protein::setSequence(const QString &sequence)
111 {
112  m_sequence = sequence.simplified();
113  m_length = m_sequence.size();
114 }
115 unsigned int
117 {
118  return m_length;
119 }
120 
121 const QString &
123 {
124  return m_sequence;
125 }
126 const QString &
128 {
129  return m_accession;
130 }
131 void
132 Protein::setAccession(const QString &accession)
133 {
134  m_accession = accession.simplified();
135 }
136 const QString &
138 {
139  return m_description;
140 }
141 void
142 Protein::setDescription(const QString &description)
143 {
144  m_description = description.simplified();
145 }
147 {
148 }
151 {
152  try
153  {
154  // qDebug() << "ProteinXtp::getMass() begin " <<
155  // getOnlyAminoAcidSequence().replace("[BZX]","E");
156  // replace amino acid wildcard by E, just to give an random mass (assumed
157  // it is not perfect)
158  QString sequence(m_sequence);
159  sequence.replace(QRegularExpression("[^WGASPVTLINDKQEMHFRCYUBZX]"), "");
160  pappso::Peptide peptide(sequence.replace(QRegularExpression("[BZX]"), "E"));
161  return peptide.getMass();
162  }
163  catch(pappso::PappsoException &error)
164  {
166  QObject::tr("Error computing mass for protein %1 :\n%2")
167  .arg(getAccession())
168  .arg(error.qwhat()));
169  }
170 }
171 } // namespace pappso
virtual const QString & qwhat() const
pappso_double getMass()
Definition: peptide.cpp:207
const QString & getAccession() const
Definition: protein.cpp:127
const QString & getDescription() const
Definition: protein.cpp:137
Protein & removeTranslationStop()
remove * characters at the end of the sequence
Definition: protein.cpp:83
unsigned int m_length
number of amino acid
Definition: protein.h:60
pappso_double getMass() const
get monoisotopic mass of ProteinSp Protein::makeProteinSp() const
Definition: protein.cpp:150
QString m_sequence
the amino acid sequence
Definition: protein.h:58
static QRegularExpression m_removeTranslationStopRegExp
Definition: protein.h:62
unsigned int size() const
protein amino acid sequence size
Definition: protein.cpp:116
Protein & reverse()
reverse characters in the sequence
Definition: protein.cpp:90
QString m_accession
a single unique identifier of the protein (usually the first word of description)
Definition: protein.h:56
bool operator==(const Protein &other) const
Definition: protein.cpp:104
QString m_description
free text to describe the protein
Definition: protein.h:53
ProteinSp makeProteinSp() const
Definition: protein.cpp:97
void setSequence(const QString &sequence)
Definition: protein.cpp:110
const QString & getSequence() const
Definition: protein.cpp:122
void setDescription(const QString &description)
Definition: protein.cpp:142
virtual void setAccession(const QString &accession)
Definition: protein.cpp:132
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
double pappso_double
A type definition for doubles.
Definition: types.h:49
std::shared_ptr< const Protein > ProteinSp
shared pointer on a Protein object
Definition: protein.h:43
object to handle a protein