38 char vv1[] = {
'A',
'R',
'N',
'D',
'C',
'Q',
'E',
'G',
'H',
'I',
39 'L',
'K',
'M',
'F',
'P',
'S',
'T',
'W',
'Y',
'V'};
42 char vv2[] = {
'N',
'D'};
45 char vv3[] = {
'Q',
'E'};
55 char vv1[] = {
'A',
'R',
'N',
'D',
'C',
'Q',
'E',
'G',
'H',
'I',
56 'L',
'K',
'M',
'F',
'P',
'S',
'T',
'W',
'Y',
'V'};
59 char vv2[] = {
'N',
'D'};
62 char vv3[] = {
'Q',
'E'};
105 qDebug() <<
"Enzyme::eat begin ";
106 const QString sequence = protein_sp.get()->getSequence();
107 qDebug() << sequence;
108 QStringList peptide_list;
110 int peptide_start = 0;
111 int peptide_size = sequence.size();
112 QRegularExpressionMatch match_recognition_site =
m_recognitionSite.match(sequence, pos);
113 while(match_recognition_site.hasMatch())
115 pos = match_recognition_site.capturedStart(0);
116 peptide_size = pos + match_recognition_site.captured(1).length() - peptide_start;
122 peptide_list.append(sequence.mid(peptide_start, peptide_size));
124 peptide_start += peptide_size;
128 peptide_size = sequence.size() - peptide_start;
131 peptide_list.append(sequence.mid(peptide_start, peptide_size));
134 unsigned int start = 1;
136 foreach(
const QString &peptide, peptide_list)
141 sequence_database_id,
150 start += peptide.size();
153 unsigned int miscleavage_i = 0;
157 qDebug() <<
"miscleavage_i=" << miscleavage_i;
158 int chunk_number = miscleavage_i + 1;
159 unsigned int start = 1;
162 for(
auto i = 0; i < peptide_list.size(); ++i)
164 qDebug() <<
"start=" << start;
165 QStringList peptide_mis_list;
166 for(
auto j = 0; (j < chunk_number) && ((i + j) < peptide_list.size()); j++)
168 peptide_mis_list << peptide_list.at(i + j);
170 if(peptide_mis_list.size() == chunk_number)
176 sequence_database_id,
179 peptide_mis_list.join(
""),
186 start += peptide_list.at(i).size();
194 std::string new_peptide = p_peptide_variant_list->at(0);
195 qDebug() <<
"Enzyme::replaceWildcards begin " << new_peptide.c_str();
196 std::vector<std::string> old_peptide_variant_list;
197 old_peptide_variant_list.assign(p_peptide_variant_list->begin(), p_peptide_variant_list->end());
200 for(
char wildcard : {
'X',
'B',
'Z'})
203 std::size_t position = new_peptide.find(wildcard);
204 if(position == std::string::npos)
210 p_peptide_variant_list->clear();
217 const std::vector<char> *p_x_replace_wildcard =
nullptr;
222 else if(wildcard ==
'B')
226 else if(wildcard ==
'Z')
231 if(p_x_replace_wildcard !=
nullptr)
233 for(std::string orig_peptide : old_peptide_variant_list)
235 for(
char replace : *p_x_replace_wildcard)
237 orig_peptide[position] = replace;
238 p_peptide_variant_list->push_back(orig_peptide);
256 std::vector<std::string>().swap(
257 old_peptide_variant_list);
260 qDebug() <<
"Enzyme::replaceWildcards end " << new_peptide.c_str();
272 std::int8_t sequence_database_id,
278 unsigned int missed_cleavage_number,
279 bool semi_enzyme)
const
281 if(peptide.contains(
'X') || peptide.contains(
'B') || peptide.contains(
'Z'))
284 std::vector<std::string> peptide_variant_list;
285 peptide_variant_list.push_back(peptide.toStdString());
287 while((peptide_variant_list.at(0).find(
'X') != std::string::npos) ||
288 (peptide_variant_list.at(0).find(
'B') != std::string::npos) ||
289 (peptide_variant_list.at(0).find(
'Z') != std::string::npos))
295 peptide_variant_list.shrink_to_fit();
302 enzyme_product.
setPeptide(sequence_database_id,
305 QString(peptide_variant_list.at(0).c_str()),
308 missed_cleavage_number,
313 std::string peptide_variant = peptide_variant_list.back();
314 while(peptide_variant_list.size() > 0)
316 enzyme_product.
setPeptide(sequence_database_id,
319 QString(peptide_variant.c_str()),
322 missed_cleavage_number,
324 peptide_variant_list.pop_back();
325 if(peptide_variant_list.size() > 0)
327 peptide_variant = peptide_variant_list.back();
331 std::vector<std::string>().swap(
332 peptide_variant_list);
336 enzyme_product.
setPeptide(sequence_database_id,
342 missed_cleavage_number,
347const QRegularExpression &
virtual void setPeptide(std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, const PeptideStr &peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme)=0
function to give the products of a protein digestion by an enzyme
QRegularExpression m_recognitionSite
example with a kinase == [K,R]
std::size_t m_maxPeptideVariantListSize
unsigned int getMiscleavage() const
get the maximum number of missed cleavage allowed in the digestion
Enzyme()
build the default enzyme (trypsin) with recognition_site = "([KR])([^P])"
void setMiscleavage(unsigned int miscleavage)
sets the maximum number of missed cleavage allowed in the digestion
std::vector< char > m_wildCardB
std::vector< char > m_wildCardZ
std::vector< char > m_wildCardX
void sanityCheck(EnzymeProductInterface &enzyme_product, std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, const PeptideStr &peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) const
const QRegularExpression & getQRegExpRecognitionSite() const
void replaceWildcards(std::vector< std::string > *p_peptide_variant_list) const
void setTakeOnlyFirstWildcard(bool take_only_first_wildcard)
take only first m_takeOnlyFirstWildcard
void eat(std::int8_t sequence_database_id, const ProteinSp &protein_sp, bool is_decoy, EnzymeProductInterface &enzyme_product) const
digest a protein into enzyme products
unsigned int m_miscleavage
bool m_takeOnlyFirstWildcard
void setMaxPeptideVariantListSize(std::size_t max_peptide_variant_list_size)
if there are wildcards in the protein sequence : restrict the number of possible peptide sequences
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
QString PeptideStr
A type definition for PeptideStr.
std::shared_ptr< const Protein > ProteinSp
shared pointer on a Protein object