42 const QJsonObject ¶meters)
47 if(parameters.value(
"fragment_tolerance_unit").toString() ==
"dalton")
49 m_fragmentTolerance = pappso::PrecisionFactory::getDaltonInstance(
50 parameters.value(
"fragment_tolerance").toDouble());
52 else if(parameters.value(
"fragment_tolerance_unit").toString() ==
"ppm")
55 pappso::PrecisionFactory::getPpmInstance(parameters.value(
"fragment_tolerance").toDouble());
58 QJsonObject spectrum_param = parameters.value(
"spectrum").toObject();
60 m_minimumMz = spectrum_param.value(
"minimum_mz").toDouble();
62 m_deisotope = spectrum_param.value(
"deisotope").toBool();
106 new_decoy_protein = it_protein.second;
108 std::make_shared<Protein>(*new_decoy_protein.
protein_sp.get());
109 new_decoy_protein.
protein_sp.get()->reverse();
110 new_decoy_protein.
protein_sp.get()->setAccession(
163 if(!keys().contains(
"id"))
167 if(keys().contains(
"psm_list"))
175 *(qualified_mass_spectrum.get()->getMassSpectrumSPtr().get()));
178 std::make_shared<pappso::specpeptidoms::SpOMSSpectrum>(
179 *qualified_mass_spectrum.get(),
190 QCborArray new_psm_arr;
191 for(QCborValue cbor_psm : value(
"psm_list").toArray())
193 QCborMap old_cbor_psm_map = cbor_psm.toMap();
196 if(!old_cbor_psm_map.keys().contains(
"proforma"))
199 QObject::tr(
"missing proforma in psm %1").arg(old_cbor_psm_map.keys().size()));
202 old_cbor_psm_map.value(
"proforma").toString());
206 peptide_sp.get()->getSequence(),
212 experimental_spectrum,
213 semi_global_alignment,
218 QString sequence = peptide_sp.get()->getSequence();
219 std::reverse(sequence.begin(), sequence.end());
222 m_decoyPrefix + old_cbor_psm_map.value(
"proforma").toString(),
229 experimental_spectrum,
230 semi_global_alignment,
239 remove(QString(
"psm_list"));
240 insert(QString(
"psm_list"), new_psm_arr);
251 const QCborMap &old_cbor_psm_map,
252 QCborArray &new_psm_arr,
258 std::vector<pappso::specpeptidoms::Location> locations;
259 std::vector<double> potential_mass_errors;
260 const QString &sequence = protein_ptr->
getSequence();
265 if((sequence.size() >= 8) &&
269 semi_global_alignment.
fastAlign(*experimental_spectrum.get(), protein_ptr);
273 qDebug() <<
"locations.size():" << locations.size();
274 for(
auto loc : locations)
276 QCborMap new_cbor_psm;
277 qDebug() <<
"beginning=" << loc.beginning <<
"length=" << loc.length
278 <<
"tree=" << loc.tree <<
"score=" << loc.score
279 <<
"protein=" << loc.proteinPtr->getAccession();
281 *experimental_spectrum.get(), loc.proteinPtr, loc.beginning, loc.length);
282 qDebug() <<
"Completed preciseAlign";
290 if(best_alignment.
end > (std::size_t)sequence.size())
293 "(std::size_t)sequence.size() : %1 %2")
294 .arg(best_alignment.
end)
295 .arg(sequence.size()));
298 best_alignment.
shifts.size() > 0)
301 potential_mass_errors =
309 potential_mass_errors);
311 qDebug() <<
"semi_global_alignment.getBestAlignment()";
314 if(best_post_processed_alignment.
SPC > best_alignment.
SPC)
316 qDebug() <<
"Best post-processed alignment"
318 << best_post_processed_alignment.
score <<
"SPC"
319 << best_post_processed_alignment.
SPC;
324 best_post_processed_alignment);
328 qDebug() <<
"no improvement in post-processing";
346 if(!new_cbor_psm.isEmpty())
348 new_psm_arr.push_back(new_cbor_psm);
360 struct sortPsmResults
366 QCborArray old_psm_arr = value(
"psm_list").toArray();
367 QCborArray new_psm_arr;
371 std::vector<sortPsmResults> sort_psm_list;
372 for(
auto it_psm : old_psm_arr)
374 QCborMap psm_map = it_psm.toMap();
376 psm_map.value(
"eval").toMap().value(
"peptidoms").toMap().value(
"score").toInteger();
377 sort_psm_list.push_back({score, psm_map});
380 std::sort(sort_psm_list.begin(), sort_psm_list.end(), [](sortPsmResults &
a, sortPsmResults &
b) {
381 return a.score > b.score;
385 auto it_end = sort_psm_list.begin() + max_psm;
388 qDebug() << sort_psm_list.size();
389 for(
auto it = sort_psm_list.begin(); it != sort_psm_list.end() && it != it_end; it++)
392 new_psm_arr.append(it->psm);
397 remove(QString(
"psm_list"));
398 insert(QString(
"psm_list"), new_psm_arr);
405 const QCborMap &old_cbor_psm,
406 QCborMap &new_cbor_psm,
407 const QString &accession,
410 qDebug() << accession;
412 if(alignment.
score > 0)
448 new_cbor_psm.insert(QString(
"proforma"), peptide_key);
451 new_cbor_psm.insert(QString(
"protein_list"), old_cbor_psm.value(
"protein_list"));
457 QCborMap cbor_peptidoms;
464 cbor_peptidoms.insert(QString(
"spc"), (qint64)alignment.
SPC);
465 cbor_peptidoms.insert(QString(
"score"), alignment.
score);
470 cbor_eval.insert(QString(
"matcher"), old_cbor_psm.value(
"eval").toMap().value(
"matcher"));
474 cbor_eval.insert(QString(
"peptidoms"), cbor_peptidoms);
476 new_cbor_psm.insert(QString(
"eval"), cbor_eval);
482 QCborMap &new_cbor_psm,
483 std::size_t offset_position)
const
485 QCborArray new_protein_list;
486 for(
auto qcbor_protein : new_cbor_psm.value(
"protein_list").toArray())
490 QCborArray positions;
493 protein.insert(QString(
"accession"),
494 m_decoyPrefix + qcbor_protein.toMap().value(
"accession").toString());
495 for(
auto position : qcbor_protein.toMap().value(
"positions").toArray())
497 positions.append(position.toInteger() + (qint64)offset_position);
503 protein.insert(QString(
"accession"), qcbor_protein.toMap().value(
"accession"));
504 for(
auto position : qcbor_protein.toMap().value(
"positions").toArray())
506 positions.append(position.toInteger() + (qint64)offset_position);
510 protein.insert(QString(
"positions"), positions.toCborValue());
512 new_protein_list.append(protein);
514 new_cbor_psm.insert(QString(
"protein_list"), new_protein_list);
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
static AaModificationP getInstance(const QString &accession)
Trace & filter(Trace &data_points) const override
get all the datapoints and remove different isotope and add their intensity and change to charge = 1 ...
keep N datapoints form the greatest intensities to the lowest
Trace & filter(Trace &data_points) const override
Trace & filter(Trace &trace) const override
Class to represent a mass spectrum.
virtual void setStatus(const QString &status)=0
current status of the process
overrides QCborStreamWriter base class to provide convenient functions
void filterPsmListUniqueUniqueProforma()
CborScanMapBase(const PsmFileScanProcess &psm_file_scan_process)
pappso::QualifiedMassSpectrumSPtr getCurrentQualifiedMassSpectrumSPtr() const
PsmProteinMap m_proteinMap
QCborMap m_cborParameterMap
void delayProteinMapInMemory()
CborStreamWriter * mp_cborOutput
PsmFileScanProcessAndCopy(std::size_t buffer_scan_size, CborStreamWriter *cbor_output_p, const QString &operation)
virtual void proteinMapReady(pappso::UiMonitorInterface &monitor) override
virtual void processBufferScanDone(pappso::UiMonitorInterface &monitor) override
std::size_t m_bufferScanSize
void sequenceAlignment(bool is_reverse, const QCborMap &old_cbor_psm_map, QCborArray &new_psm_arr, pappso::specpeptidoms::SpOMSSpectrumCsp &experimental_spectrum, pappso::specpeptidoms::SemiGlobalAlignment &semi_global_alignment, const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
void storeAlignment(bool is_reverse, const QCborMap &old_cbor_psm, QCborMap &new_cbor_psm, const QString &accession, const pappso::specpeptidoms::Alignment &alignment)
void fixPositionStart(bool is_reverse, QCborMap &new_cbor_psm, std::size_t offset_position) const
virtual ~PsmSpecPeptidOmsScan()
virtual void filterAndSortPsmList() override
const PsmSpecPeptidOms * mp_psmSpecPeptidOms
PsmSpecPeptidOmsScan(const PsmSpecPeptidOms &psm_specpeptidoms, pappso::PrecisionPtr fragment_tolerance)
void parameterMapReady(pappso::UiMonitorInterface &monitor) override
virtual void proteinMapReady(pappso::UiMonitorInterface &monitor) override
PsmSpecPeptidOms(std::size_t buffer_scan_size, CborStreamWriter *cbor_output_p, const QJsonObject ¶meters)
const pappso::AaCode & getAaCode() const
std::size_t m_countScanProcessed
friend PsmSpecPeptidOmsScan
void filterMassSpectrum(pappso::MassSpectrum &mass_spectrum) const
std::size_t m_nMostIntense
virtual ~PsmSpecPeptidOms()
pappso::PrecisionPtr m_fragmentTolerance
std::size_t m_maxInterpretationsPerSpectrum
QJsonObject m_specpeptidomsParameters
CborScanMapBase * newCborScanMap() override
virtual void processBufferScanDone(pappso::UiMonitorInterface &monitor) override
std::vector< Location > getLocations() const
Returns a vector containing the saved locations.
QString toProForma() const
QString toInterpretation() const
const Alignment & getBestAlignment() const
Returns a const ref to m_best_alignment.
void postProcessingAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, std::size_t beginning, std::size_t length, const std::vector< double > &shifts)
performs the post-processing : generates corrected spectra and align them
void preciseAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, const std::size_t beginning, const std::size_t length)
performs the second alignment search between a protein subsequence and a spectrum.
void fastAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr)
perform the first alignment search between a protein sequence and a spectrum. The member location hea...
static bool checkSequenceDiversity(const QString &sequence, std::size_t window, std::size_t minimum_aa_diversity)
check that the sequence has a minimum of amino acid checkSequenceDiversity
static std::vector< double > getPotentialMassErrors(const pappso::AaCode &aa_code, const Alignment &alignment, const QString &protein_seq)
Returns a list of the potential mass errors corresponding to the provided alignment in the provided p...
LocationSaver getLocationSaver() const
Returns a copy of m_location_saver.
const QString & getSequence() const
const QString getAccession() const
std::shared_ptr< const SpOMSSpectrum > SpOMSSpectrumCsp
std::shared_ptr< QualifiedMassSpectrum > QualifiedMassSpectrumSPtr
std::shared_ptr< const Peptide > PeptideSp
const PrecisionBase * PrecisionPtr
std::shared_ptr< Protein > protein_sp
double getNonAlignedMass() const
convenient function to get the remaining non explained mass shift
std::vector< double > shifts
std::size_t getPositionStart() const
get position of start on the protein sequence
PeptideModel m_peptideModel