25 #include <QStringList>
27 #include "../exception/exceptionnotpossible.h"
38 char vv1[] = {
'A',
'R',
'N',
'D',
'C',
'Q',
'E',
'G',
'H',
'I',
39 'L',
'K',
'M',
'F',
'P',
'S',
'T',
'W',
'Y',
'V'};
42 char vv2[] = {
'N',
'D'};
45 char vv3[] = {
'Q',
'E'};
55 char vv1[] = {
'A',
'R',
'N',
'D',
'C',
'Q',
'E',
'G',
'H',
'I',
56 'L',
'K',
'M',
'F',
'P',
'S',
'T',
'W',
'Y',
'V'};
59 char vv2[] = {
'N',
'D'};
62 char vv3[] = {
'Q',
'E'};
105 qDebug() <<
"Enzyme::eat begin ";
106 const QString sequence = protein_sp.get()->getSequence();
107 qDebug() << sequence;
108 QStringList peptide_list;
110 int peptide_start = 0;
111 int peptide_size = sequence.size();
112 QRegularExpressionMatch match_recognition_site =
114 while(match_recognition_site.hasMatch())
116 pos = match_recognition_site.capturedStart(0);
118 pos + match_recognition_site.captured(1).length() - peptide_start;
124 peptide_list.append(sequence.mid(peptide_start, peptide_size));
126 peptide_start += peptide_size;
130 peptide_size = sequence.size() - peptide_start;
133 peptide_list.append(sequence.mid(peptide_start, peptide_size));
136 unsigned int start = 1;
138 foreach(
const QString &peptide, peptide_list)
143 sequence_database_id,
152 start += peptide.size();
155 unsigned int miscleavage_i = 0;
159 qDebug() <<
"miscleavage_i=" << miscleavage_i;
160 int chunk_number = miscleavage_i + 1;
161 unsigned int start = 1;
164 for(
auto i = 0; i < peptide_list.size(); ++i)
166 qDebug() <<
"start=" << start;
167 QStringList peptide_mis_list;
168 for(
auto j = 0; (j < chunk_number) && ((i + j) < peptide_list.size());
171 peptide_mis_list << peptide_list.at(i + j);
173 if(peptide_mis_list.size() == chunk_number)
179 sequence_database_id,
182 peptide_mis_list.join(
""),
189 start += peptide_list.at(i).size();
197 std::string new_peptide = p_peptide_variant_list->at(0);
198 qDebug() <<
"Enzyme::replaceWildcards begin " << new_peptide.c_str();
199 std::vector<std::string> old_peptide_variant_list;
200 old_peptide_variant_list.assign(p_peptide_variant_list->begin(),
201 p_peptide_variant_list->end());
204 for(
char wildcard : {
'X',
'B',
'Z'})
207 std::size_t position = new_peptide.find(wildcard);
208 if(position == std::string::npos)
214 p_peptide_variant_list->clear();
221 const std::vector<char> *p_x_replace_wildcard =
nullptr;
226 else if(wildcard ==
'B')
230 else if(wildcard ==
'Z')
235 if(p_x_replace_wildcard !=
nullptr)
237 for(std::string orig_peptide : old_peptide_variant_list)
239 for(
char replace : *p_x_replace_wildcard)
241 orig_peptide[position] = replace;
242 p_peptide_variant_list->push_back(orig_peptide);
249 QObject::tr(
"x_replace_wildcard is empty"));
261 std::vector<std::string>().swap(
262 old_peptide_variant_list);
265 qDebug() <<
"Enzyme::replaceWildcards end " << new_peptide.c_str();
277 std::int8_t sequence_database_id,
283 unsigned int missed_cleavage_number,
284 bool semi_enzyme)
const
286 if(peptide.contains(
'X') || peptide.contains(
'B') || peptide.contains(
'Z'))
289 std::vector<std::string> peptide_variant_list;
290 peptide_variant_list.push_back(peptide.toStdString());
292 while((peptide_variant_list.at(0).find(
'X') != std::string::npos) ||
293 (peptide_variant_list.at(0).find(
'B') != std::string::npos) ||
294 (peptide_variant_list.at(0).find(
'Z') != std::string::npos))
300 peptide_variant_list.shrink_to_fit();
307 enzyme_product.
setPeptide(sequence_database_id,
310 QString(peptide_variant_list.at(0).c_str()),
313 missed_cleavage_number,
318 std::string peptide_variant = peptide_variant_list.back();
319 while(peptide_variant_list.size() > 0)
321 enzyme_product.
setPeptide(sequence_database_id,
324 QString(peptide_variant.c_str()),
327 missed_cleavage_number,
329 peptide_variant_list.pop_back();
330 if(peptide_variant_list.size() > 0)
332 peptide_variant = peptide_variant_list.back();
336 std::vector<std::string>().swap(
337 peptide_variant_list);
341 enzyme_product.
setPeptide(sequence_database_id,
347 missed_cleavage_number,
352 const QRegularExpression &
virtual void setPeptide(std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, const PeptideStr &peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme)=0
function to give the products of a protein digestion by an enzyme
QRegularExpression m_recognitionSite
example with a kinase == [K,R]
std::size_t m_maxPeptideVariantListSize
unsigned int getMiscleavage() const
get the maximum number of missed cleavage allowed in the digestion
Enzyme()
build the default enzyme (trypsin) with recognition_site = "([KR])([^P])"
void setMiscleavage(unsigned int miscleavage)
sets the maximum number of missed cleavage allowed in the digestion
std::vector< char > m_wildCardB
std::vector< char > m_wildCardZ
std::vector< char > m_wildCardX
void sanityCheck(EnzymeProductInterface &enzyme_product, std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, const PeptideStr &peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) const
const QRegularExpression & getQRegExpRecognitionSite() const
void replaceWildcards(std::vector< std::string > *p_peptide_variant_list) const
void setTakeOnlyFirstWildcard(bool take_only_first_wildcard)
take only first m_takeOnlyFirstWildcard
void eat(std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, EnzymeProductInterface &enzyme_product) const
digest a protein into enzyme products
unsigned int m_miscleavage
bool m_takeOnlyFirstWildcard
void setMaxPeptideVariantListSize(std::size_t max_peptide_variant_list_size)
if there are wildcards in the protein sequence : restrict the number of possible peptide sequences
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
QString PeptideStr
A type definition for PeptideStr.
std::shared_ptr< const Protein > ProteinSp
shared pointer on a Protein object