libpappsomspp
Library for mass spectrometry
fastafileindexer.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/fasta/fastafileindexer.cpp
3  * \date 22/06/2109
4  * \author Olivier Langella
5  * \brief Quick random access to sequences in a fasta file using an index
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  ******************************************************************************/
27 
28 #include "fastafileindexer.h"
29 
30 #include <QDebug>
31 #include <QTextStream>
32 #include <QDataStream>
33 #include <QFileInfo>
34 #include "../exception/exceptionoutofrange.h"
35 #include "fastareader.h"
36 
37 namespace pappso
38 {
39 FastaFileIndexer::FastaFileIndexer(const QFileInfo &fastaFile)
40  : m_fasta_file(fastaFile.absoluteFilePath())
41 {
42 
43  if(m_fasta_file.fileName().isEmpty())
44  {
45  throw PappsoException(QObject::tr("No FASTA file name specified"));
46  }
47  if(m_fasta_file.open(QIODevice::ReadOnly))
48  {
50  m_fasta_file.close();
51  }
52  else
53  {
54  throw PappsoException(QObject::tr("ERROR opening FASTA file %1 for read")
55  .arg(fastaFile.fileName()));
56  }
57 }
58 
60  : m_fasta_file(other.m_fasta_file.fileName())
61 {
62 
63  m_indexArray = other.m_indexArray;
64  mpa_sequenceTxtIn = nullptr;
65 }
67 {
68  close();
69 }
70 
71 
72 void
74 {
75 
76  qDebug();
77  QDataStream bin_in(&m_fasta_file);
78  qint64 position = 0;
79 
80  // QChar first_char;
81  // txt_in >> first_char;
82  qint8 char_in;
83  bin_in >> char_in;
84  while(!bin_in.atEnd() && (char_in < (qint8)21))
85  { // eat Windows \r\n
86  position++;
87  bin_in >> char_in;
88  }
89  while(!bin_in.atEnd())
90  {
91  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
92  // << " first_char=" << first_char;
93  if(char_in == (qint8)'>')
94  {
95 
96  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
97  // << " index=" << m_indexArray.size()
98  // << " position=" << position;
99  m_indexArray.push_back(position);
100  }
101  // eat line
102  position++;
103  bin_in >> char_in;
104  while(!bin_in.atEnd() && (char_in > (qint8)20))
105  {
106  position++;
107  bin_in >> char_in;
108  }
109  position++;
110  bin_in >> char_in;
111 
112  if(!bin_in.atEnd() && (char_in < (qint8)21))
113  { // eat Windows \r\n
114  position++;
115  bin_in >> char_in;
116  }
117  }
118  qDebug();
119 }
120 
121 void
123 {
124  if(mpa_sequenceTxtIn != nullptr)
125  return;
126  if(m_fasta_file.open(QIODevice::ReadOnly))
127  {
128  mpa_sequenceTxtIn = new QTextStream(&m_fasta_file);
129  }
130  else
131  {
132  throw PappsoException(QObject::tr("ERROR opening FASTA file %1 for read")
133  .arg(m_fasta_file.fileName()));
134  }
135 }
136 
137 void
139 {
140  if(mpa_sequenceTxtIn != nullptr)
141  {
142  delete mpa_sequenceTxtIn;
143  mpa_sequenceTxtIn = nullptr;
144  m_fasta_file.close();
145  }
146 }
147 
148 void
150  std::size_t index)
151 {
152  open();
153 
154  qDebug() << " goto=" << index << " pos=" << m_indexArray[index];
155  bool seek_ok;
156  if((index < m_indexArray.size()) &&
157  (seek_ok = mpa_sequenceTxtIn->seek(m_indexArray[index])))
158  {
159 
160  qDebug() << " realpos=" << mpa_sequenceTxtIn->pos();
161  ;
162  if(!seek_ok)
163  {
164 
165  throw PappsoException(QObject::tr("ERROR FastaFileIndexer : seek to "
166  "sequence %1, position %2 failed")
167  .arg(index)
168  .arg(m_indexArray[index]));
169  }
170  FastaReader reader(fasta_handler);
172  }
173  else
174  {
175  throw ExceptionOutOfRange(
176  QObject::tr("ERROR reading FASTA file %1 : sequence index %2 "
177  "unreachable, array size=%3")
178  .arg(m_fasta_file.fileName())
179  .arg(index)
180  .arg(m_indexArray.size()));
181  }
182 }
183 
184 
187 {
188 
189  return std::make_shared<FastaFileIndexer>(*this);
190 }
191 } // namespace pappso
FastaFileIndexer(const QFileInfo &fastaFile)
FastaFileIndexerSPtr makeFastaFileIndexerSPtr() const
QTextStream * mpa_sequenceTxtIn
std::vector< qint64 > m_indexArray
void getSequenceByIndex(FastaHandlerInterface &fasta_handler, std::size_t index) override
void parseOnlyOne(QTextStream &p_in)
Definition: fastareader.cpp:99
Quick random access to sequences in a fasta file using an index.
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< FastaFileIndexer > FastaFileIndexerSPtr