libpappsomspp
Library for mass spectrometry
grpexperiment.cpp
Go to the documentation of this file.
1 
2 /*******************************************************************************
3  * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
4  *
5  * This file is part of the PAPPSOms++ library.
6  *
7  * PAPPSOms++ is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * PAPPSOms++ is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
19  *
20  * Contributors:
21  * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
22  *implementation
23  ******************************************************************************/
24 
25 #include "grpexperiment.h"
26 #include "grpprotein.h"
27 #include "grppeptide.h"
28 
29 #include "grpgroup.h"
30 #include "grpsubgroup.h"
31 #include "../pappsoexception.h"
32 
33 #include <QObject>
34 
35 using namespace pappso;
36 
38 {
39  mp_monitor = p_monitor;
40 }
41 
43 {
44 }
45 void
47 {
49 }
50 
51 void
53 {
54  GrpPeptideSet peptide_set(sp_protein.get());
56 }
57 
58 
59 void
61 {
62  GrpPeptideSet peptide_set(sp_protein.get());
64 }
65 
66 std::vector<GrpGroupSpConst>
68 {
69  std::vector<GrpGroupSpConst> grp_list;
70  for(GrpGroupSp group : m_grpGroupSpList)
71  {
72  grp_list.push_back(group);
73  }
74  return grp_list;
75 }
76 
78 GrpExperiment::getGrpProteinSp(const QString &accession,
79  const QString &description)
80 {
81  GrpProtein grpProtein(accession, description);
82  auto insertedPair = m_mapProteins.insert(std::pair<QString, GrpProteinSp>(
83  accession, std::make_shared<GrpProtein>(grpProtein)));
84  if(insertedPair.second)
85  {
86  m_grpProteinList.push_back(insertedPair.first->second);
87  m_remainingGrpProteinList.push_back(insertedPair.first->second.get());
88  }
89  return (insertedPair.first->second);
90 }
91 
94  const QString &sequence,
95  pappso_double mass)
96 {
97  proteinSp.get()->countPlus();
98  GrpPeptideSp sp_grppeptide =
99  std::make_shared<GrpPeptide>(GrpPeptide(sequence, mass));
100 
101  auto insertedPair = m_mapPeptides.insert(
102  std::pair<QString, std::map<unsigned long, GrpPeptideSp>>(
103  sp_grppeptide.get()->m_sequence,
104  std::map<unsigned long, GrpPeptideSp>()));
105  auto secondInsertedPair =
106  insertedPair.first->second.insert(std::pair<unsigned long, GrpPeptideSp>(
107  (unsigned long)(mass * 100), sp_grppeptide));
108  if(secondInsertedPair.second)
109  {
110  m_grpPeptideList.push_back(secondInsertedPair.first->second);
111  }
112  proteinSp.get()->push_back(secondInsertedPair.first->second.get());
113  return (secondInsertedPair.first->second);
114 }
115 
116 void
118 {
119  qDebug() << "GrpExperiment::startGrouping begin";
120  if(mp_monitor != nullptr)
122  m_grpPeptideList.size());
123  m_isGroupingStarted = true;
124  m_mapPeptides.clear();
125  m_mapProteins.clear();
126  qDebug() << "GrpExperiment::startGrouping sort protein list "
127  "m_remainingGrpProteinList.size() "
128  << m_remainingGrpProteinList.size();
129  // m_remainingGrpProteinList.sort();
130  // m_remainingGrpProteinList.unique();
131 
133  {
134  // TODO clean protein list to remove contaminant peptides before grouping
135  }
136 
137 
138  GrpMapPeptideToGroup grp_map_peptide_to_group;
139  qDebug() << "GrpExperiment::startGrouping grouping begin";
140  for(auto p_grpProtein : m_remainingGrpProteinList)
141  {
142  p_grpProtein->strip();
143  if(p_grpProtein->m_count == 0)
144  {
145  // no peptides : do not group this protein
146  }
147  else
148  {
149  GrpSubGroupSp grpSubGroupSp =
150  GrpSubGroup(p_grpProtein).makeGrpSubGroupSp();
151 
152  if(mp_monitor != nullptr)
154  this->addSubGroupSp(grp_map_peptide_to_group, grpSubGroupSp);
155  }
156  }
157  grp_map_peptide_to_group.clear(m_grpGroupSpList);
158  qDebug() << "GrpExperiment::startGrouping grouping end";
159 
160  qDebug() << "GrpExperiment::startGrouping grouping m_grpGroupSpList.size() "
161  << m_grpGroupSpList.size();
162 
164  {
166  }
167 
168  // post grouping protein group removal
169  // remove any group containing contaminants
170  m_grpGroupSpList.remove_if([this](GrpGroupSp &groupSp) {
171  return (
172  groupSp.get()->containsAny(this->m_grpPostGroupingProteinListRemoval));
173  });
174 
175 
176  numbering();
177  if(mp_monitor != nullptr)
179  // GrpGroup(this, *m_remainingGrpProteinList.begin());
180  qDebug() << "GrpExperiment::startGrouping end";
181 }
182 
183 
185 {
186  ContainsAny(const GrpPeptideSet &peptide_set) : _peptide_set(peptide_set)
187  {
188  }
189 
190  typedef bool result_type;
191 
192  bool
193  operator()(const GrpGroupSp &testGroupSp)
194  {
195  return testGroupSp.get()->containsAny(_peptide_set);
196  }
197 
199 };
200 
201 
202 void
204  GrpSubGroupSp &grpSubGroupSp) const
205 {
206  qDebug() << "GrpExperiment::addSubGroupSp begin "
207  << grpSubGroupSp.get()->getFirstAccession();
208 
209  std::list<GrpGroupSp> new_group_list;
210  grp_map_peptide_to_group.getGroupList(grpSubGroupSp.get()->getPeptideSet(),
211  new_group_list);
212 
213  if(new_group_list.size() == 0)
214  {
215  qDebug() << "GrpExperiment::addSubGroupSp create a new group";
216  // create a new group
217  GrpGroupSp sp_group = GrpGroup(grpSubGroupSp).makeGrpGroupSp();
218  // m_grpGroupSpList.push_back(sp_group);
219 
220  grp_map_peptide_to_group.set(grpSubGroupSp.get()->getPeptideSet(),
221  sp_group);
222  }
223  else
224  {
225  qDebug() << "GrpExperiment::addSubGroupSp fusion groupList.size() "
226  << new_group_list.size();
227  // fusion group and add the subgroup
228  auto itGroup = new_group_list.begin();
229  GrpGroupSp p_keepGroup = *itGroup;
230  qDebug() << "GrpExperiment::addSubGroupSp "
231  "p_keepGroup->addSubGroupSp(grpSubGroupSp) "
232  << p_keepGroup.get();
233  p_keepGroup->addSubGroupSp(grpSubGroupSp);
234  grp_map_peptide_to_group.set(grpSubGroupSp.get()->getPeptideSet(),
235  p_keepGroup);
236 
237  itGroup++;
238  while(itGroup != new_group_list.end())
239  {
240  qDebug()
241  << "GrpExperiment::addSubGroupSp p_keepGroup->addGroup(*itGroup) "
242  << itGroup->get();
243  p_keepGroup->addGroup(itGroup->get());
244  grp_map_peptide_to_group.set((*itGroup)->getGrpPeptideSet(),
245  p_keepGroup);
246 
247  // m_grpGroupSpList.remove_if([itGroup](GrpGroupSp & groupSp) {
248  // return (itGroup->get() == groupSp.get()) ;
249  //});
250  itGroup++;
251  }
252  }
253 
254  qDebug() << "GrpExperiment::addSubGroupSp end";
255 }
256 
257 void
259 {
260  qDebug() << "GrpExperiment::numbering begin";
261  if(mp_monitor != nullptr)
263  for(auto &&group_sp : m_grpGroupSpList)
264  {
265  group_sp.get()->numbering();
266  }
267  m_grpGroupSpList.sort([](GrpGroupSp &first, GrpGroupSp &second) {
268  return ((*first.get()) < (*second.get()));
269  });
270  unsigned int i = 1;
271  for(auto &&group_sp : m_grpGroupSpList)
272  {
273  group_sp.get()->setGroupNumber(i);
274  i++;
275  }
276 
277  qDebug() << "GrpExperiment::numbering end";
278 }
279 
280 std::vector<GrpProteinSpConst>
282 {
283  std::vector<GrpProteinSpConst> grouped_protein_list;
285  {
286  throw PappsoException(
287  QObject::tr("unable to get grouped protein list before grouping"));
288  }
289  for(auto &&protein_sp : m_grpProteinList)
290  {
291  if(protein_sp.get()->getGroupNumber() > 0)
292  {
293  grouped_protein_list.push_back(protein_sp);
294  }
295  }
296  return grouped_protein_list;
297 }
298 
299 void
301 {
302  qDebug() << "GrpExperiment::removeNonInformativeSubGroups begin";
303  if(mp_monitor != nullptr)
305  m_grpGroupSpList.size());
306 
307  std::list<GrpGroupSp> old_grp_group_sp_list(m_grpGroupSpList);
308  m_grpGroupSpList.clear();
309  auto it_group = old_grp_group_sp_list.begin();
310  while(it_group != old_grp_group_sp_list.end())
311  {
312  if(mp_monitor != nullptr)
314  if(it_group->get()->removeNonInformativeSubGroups())
315  {
316  // need to regroup it
317  GrpGroupSp old_group_sp = *it_group;
318  GrpMapPeptideToGroup grp_map_peptide_to_group;
319 
320  std::list<GrpSubGroupSp> dispatch_sub_group_set =
321  old_group_sp.get()->getSubGroupSpList();
322  for(GrpSubGroupSp &grp_subgroup : dispatch_sub_group_set)
323  {
324  addSubGroupSp(grp_map_peptide_to_group, grp_subgroup);
325  }
326  grp_map_peptide_to_group.clear(m_grpGroupSpList);
327  }
328  else
329  {
330  qDebug() << "GrpExperiment::removeNonInformativeSubGroups no "
331  "removeNonInformativeSubGroups";
332  m_grpGroupSpList.push_back(*it_group);
333  }
334  it_group++;
335  }
336  if(mp_monitor != nullptr)
338  m_grpGroupSpList.size());
339 
340  qDebug() << "GrpExperiment::removeNonInformativeSubGroups end";
341 }
void addSubGroupSp(GrpMapPeptideToGroup &grp_map_peptide_to_group, GrpSubGroupSp &grpSubGroupSp) const
std::list< GrpGroupSp > m_grpGroupSpList
Definition: grpexperiment.h:58
GrpPeptideSp & setGrpPeptide(const GrpProteinSp &proteinSp, const QString &sequence, pappso_double mass)
GrpGroupingMonitorInterface * mp_monitor
Definition: grpexperiment.h:44
GrpProteinSp & getGrpProteinSp(const QString &acc, const QString &description)
void addPostGroupingGrpProteinSpRemoval(GrpProteinSp sp_protein)
protein to remove with its entire group after grouping is completed typically : to use with protein c...
bool m_isRemoveNonInformativeSubgroups
Definition: grpexperiment.h:45
std::list< GrpPeptideSp > m_grpPeptideList
Definition: grpexperiment.h:49
std::vector< GrpGroupSpConst > getGrpGroupSpList() const
void setRemoveNonInformativeSubgroups(bool ok)
GrpPeptideSet m_grpPreGroupingProteinListRemoval
Definition: grpexperiment.h:56
GrpExperiment(GrpGroupingMonitorInterface *p_monitor)
void addPreGroupingGrpProteinSpRemoval(GrpProteinSp sp_protein)
protein peptides to remove before grouping typically : remove protein contaminants in special metapro...
std::map< QString, std::map< unsigned long, GrpPeptideSp > > m_mapPeptides
Definition: grpexperiment.h:47
std::list< GrpProteinSp > m_grpProteinList
Definition: grpexperiment.h:50
GrpPeptideSet m_grpPostGroupingProteinListRemoval
Definition: grpexperiment.h:54
std::list< GrpProtein * > m_remainingGrpProteinList
Definition: grpexperiment.h:52
std::vector< GrpProteinSpConst > getGrpProteinSpList() const
std::map< QString, GrpProteinSp > m_mapProteins
Definition: grpexperiment.h:46
GrpGroupSp makeGrpGroupSp()
Definition: grpgroup.cpp:98
virtual void removingNonInformativeSubGroupsInGroup()=0
virtual void startNumberingAllGroups(std::size_t total_number_group)=0
virtual void startRemovingNonInformativeSubGroupsInAllGroups(std::size_t total_number_group)=0
virtual void startGrouping(std::size_t total_number_protein, std::size_t total_number_peptide)=0
virtual void stopRemovingNonInformativeSubGroupsInAllGroups(std::size_t total_number_group)=0
void clear(std::list< GrpGroupSp > &grp_group_list)
void getGroupList(const GrpPeptideSet &peptide_set_in, std::list< GrpGroupSp > &impacted_group_list) const
get all groups concerned by a list of peptides
void set(const GrpPeptideSet &peptide_set_in, GrpGroupSp grp_group)
set peptide keys pointing on the group
unsigned int size() const
Definition: grppeptideset.h:54
void addAll(const GrpPeptideSet &peptideSet)
GrpSubGroupSp makeGrpSubGroupSp()
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< GrpProtein > GrpProteinSp
Definition: grpprotein.h:37
std::shared_ptr< GrpSubGroup > GrpSubGroupSp
Definition: grpsubgroup.h:39
std::shared_ptr< GrpPeptide > GrpPeptideSp
Definition: grppeptide.h:40
double pappso_double
A type definition for doubles.
Definition: types.h:49
std::shared_ptr< GrpGroup > GrpGroupSp
Definition: grpgroup.h:38
bool operator()(const GrpGroupSp &testGroupSp)
GrpPeptideSet _peptide_set
ContainsAny(const GrpPeptideSet &peptide_set)