libpappsomspp
Library for mass spectrometry
enzyme.h
Go to the documentation of this file.
1 /*******************************************************************************
2  * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
3  *
4  * This file is part of the PAPPSOms++ library.
5  *
6  * PAPPSOms++ is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * PAPPSOms++ is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Contributors:
20  * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
21  *implementation
22  ******************************************************************************/
23 
24 #pragma once
25 
26 #include "enzymeproductinterface.h"
27 #include <QRegularExpression>
28 
29 namespace pappso
30 {
32 {
33  public:
34  /** \brief build the default enzyme (trypsin) with recognition_site =
35  * "([KR])([^P])"
36  * */
37  Enzyme();
38 
39  /** \brief build any enzyme given a recognition_site
40  * \param recognition_site is a regular expression that must identify 2 motifs
41  * : one on Nter side one on Cter side
42  * */
43  Enzyme(const QString &recognition_site);
44  ~Enzyme();
45 
46  /** \brief digest a protein into enzyme products
47  * \param sequence_database_id integer that references the sequence fatabase
48  * (file, stream, url...) \param protein_sp is the original protein to be
49  * digested \param is_decoy tell if the current protein is a decoy (true) or
50  * normal (false) protein \param enzyme_product is the object that will
51  * receive the digestion products
52  * */
53  void eat(std::int8_t sequence_database_id,
54  const ProteinSp &protein_sp,
55  bool is_decoy,
56  EnzymeProductInterface &enzyme_product) const;
57 
58  /** \brief sets the maximum number of missed cleavage allowed in the digestion
59  * \param miscleavage maximum number of missed cleavade to allow (defaults is
60  * 0)
61  * */
62  void setMiscleavage(unsigned int miscleavage);
63 
64  /** \brief get the maximum number of missed cleavage allowed in the digestion
65  * @return miscleavage maximum number of missed cleavade to allow (defaults is
66  * 0)
67  * */
68  unsigned int getMiscleavage() const;
69 
70 
71  /** \brief take only first m_takeOnlyFirstWildcard
72  * \param bool true : switch to take only the first possibility if there are
73  * X, B or Z wildcards in sequence
74  */
75  void setTakeOnlyFirstWildcard(bool take_only_first_wildcard);
76 
77  /** \brief if there are wildcards in the protein sequence : restrict the
78  * number of possible peptide sequences \param max_peptide_variant_list_size
79  * maximum number of peptide variant (default is 100)
80  */
81  void setMaxPeptideVariantListSize(std::size_t max_peptide_variant_list_size);
82 
83 
84  const QRegularExpression &getQRegExpRecognitionSite() const;
85 
86 
87  private:
88  /** \brief example with a kinase == [K,R] */
89  QRegularExpression m_recognitionSite;
90  unsigned int m_miscleavage = 0;
91  bool m_takeOnlyFirstWildcard = false;
92 
93  std::size_t m_maxPeptideVariantListSize = 100;
94 
95 
96  std::vector<char> m_wildCardX;
97  std::vector<char> m_wildCardB;
98  std::vector<char> m_wildCardZ;
99 
100  void sanityCheck(EnzymeProductInterface &enzyme_product,
101  std::int8_t sequence_database_id,
102  const ProteinSp &protein_sp,
103  bool is_decoy,
104  const PeptideStr &peptide,
105  unsigned int start,
106  bool is_nter,
107  unsigned int missed_cleavage_number,
108  bool semi_enzyme) const;
109  void replaceWildcards(std::vector<std::string> *p_peptide_variant_list) const;
110 };
111 
112 } // namespace pappso
QRegularExpression m_recognitionSite
example with a kinase == [K,R]
Definition: enzyme.h:89
std::vector< char > m_wildCardB
Definition: enzyme.h:97
std::vector< char > m_wildCardZ
Definition: enzyme.h:98
std::vector< char > m_wildCardX
Definition: enzyme.h:96
#define PMSPP_LIB_DECL
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
QString PeptideStr
A type definition for PeptideStr.
Definition: types.h:44
std::shared_ptr< const Protein > ProteinSp
shared pointer on a Protein object
Definition: protein.h:43