/*******************************************************************************
 * Copyright (c) 2025 Olivier Langella
 *<Olivier.Langella@universite-paris-saclay.fr>.
 *
 * This file is part of the PAPPSOms++ library.
 *
 *     PAPPSOms++ is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     PAPPSOms++ is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with PAPPSOms++.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/


// ./tests/peptidoms/catch2-only-peptidoms [precisealign] -s


#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/matchers/catch_matchers_vector.hpp>

#include <QString>
#include "../common.h"
#include <pappsomspp/core/amino_acid/aastringcodemassmatching.h>
#include <pappsomspp/core/fasta/fastareader.h>
#include <pappsomspp/core/fasta/fastafileindexer.h>
#include <pappsomspp/core/massspectrum/massspectrum.h>
#include <pappsomspp/core/processing/filters/filterresample.h>
#include <pappsomspp/core/processing/filters/filterpass.h>
#include <pappsomspp/core/processing/filters/filterchargedeconvolution.h>
#include <pappsomspp/core/processing/specpeptidoms/correctiontree.h>
#include <pappsomspp/core/processing/specpeptidoms/locationsaver.h>
#include <pappsomspp/core/processing/specpeptidoms/semiglobalalignment.h>
#include <pappsomspp/core/processing/specpeptidoms/spomsspectrum.h>
#include <pappsomspp/core/processing/specpeptidoms/types.h>
#include <pappsomspp/core/protein/protein.h>
#include <pappsomspp/core/msfile/msfileaccessor.h>


TEST_CASE("test for peptidoms alignment.", "[precisealign]")
{
  // Set the debugging message formatting pattern.
  qSetMessagePattern(QString("%{file}@%{line}, %{function}(): %{message}"));

  SECTION("..:: Check precise alignment for GRMZM2G123558_P01 on spectrum 2037 ::..",
          "[precisealign]")
  {

    //"GRMZM2G123558_P01"  spectrum_index= 2037
    pappso::MsFileAccessor file_access(
      "/gorgone/pappso/data_extraction_pappso/mzML/20120906_balliau_extract_1_A01_urnb-1.mzML", "");
    pappso::MsRunReaderSPtr msrunA01 = file_access.getMsRunReaderSPtrByRunId("", "runa01");

    pappso::AaCode aa_code;
    aa_code.addAaModification('C', pappso::AaModification::getInstance("MOD:00397"));

    pappso::specpeptidoms::SpOMSProtein protein(
      "protein_id",
      "MTSVWKTKVLPGLNKIFDKDGKKAAAAGFLKSFNKEEFDKEIEDKKTELEPKVVEAYEASPPEVKALFKDKKPVKVSKKNSAAVTKFLDELA"
      "KIDFPGAKVVSEAVAKSGTSPLLPAITFILDKVAPFIPKEEPKEEPAAEATSREVAVEEKKEEAEPAAATEAAPAPAETPSEAAVVEEKKEE"
      "EKKEEEDKPAAEEAAPPAAAAAAAEEK",
      aa_code);

    pappso::PrecisionPtr precision_ptr = pappso::PrecisionFactory::getDaltonInstance(0.02);

    pappso::QualifiedMassSpectrum spectrum_simple = msrunA01.get()->qualifiedMassSpectrum(2037);
    pappso::FilterChargeDeconvolution(precision_ptr)
      .filter(*(spectrum_simple.getMassSpectrumSPtr().get()));
    pappso::FilterResampleKeepGreater(150).filter(*(spectrum_simple.getMassSpectrumSPtr().get()));
    pappso::FilterGreatestY(120).filter(*(spectrum_simple.getMassSpectrumSPtr().get()));


    pappso::specpeptidoms::SpOMSSpectrum experimental_spectrum(
      spectrum_simple, precision_ptr, aa_code);


    pappso::specpeptidoms::ScoreValues score_values;

    pappso::specpeptidoms::SemiGlobalAlignment semi_global_alignment(
      score_values, precision_ptr, aa_code);

    qDebug();
    semi_global_alignment.fastAlign(experimental_spectrum, &protein); // 1er alignement

    qDebug();
    std::vector<pappso::specpeptidoms::Location> locations =
      semi_global_alignment.getLocationSaver()
        .getLocations(); // On récupère les sous-séquences intéressantes pour un alignement plus
                         // précis.

    qDebug();
    REQUIRE(locations.size() == 4);
    REQUIRE(locations.at(0).score == 16);
    REQUIRE(locations.at(0).beginning == 22);
    REQUIRE(locations.at(0).length == 31);
    QStringList interpretation_list;
    for(auto loc : locations)
      {
        qDebug() << "loc.beginning=" << loc.beginning << " loc.length=" << loc.length;
        semi_global_alignment.preciseAlign(experimental_spectrum,
                                           &protein,
                                           loc.beginning,
                                           loc.length); // 2e alignement
        qDebug();
        pappso::specpeptidoms::Alignment best_alignment =
          semi_global_alignment.getBestAlignment(); // On récupère le meilleur alignement.

        interpretation_list << best_alignment.m_peptideModel.toInterpretation();
      }

    REQUIRE(interpretation_list.at(0).toStdString() == "[938.524]KEEE[150.078]");
    REQUIRE(locations.at(0).beginning == 22);
    REQUIRE(interpretation_list.at(1).toStdString() ==
            "[168.09]EEE[D][-115.028]KPAA[16.0826]EEAAPPA");
    REQUIRE(locations.at(1).beginning == 1);
    REQUIRE(interpretation_list.at(3).toStdString() == "[554.268]EPAAE[552.347]");
    REQUIRE(interpretation_list.size() == 4);
  }

  SECTION("..:: Check precise alignment for AC207738.3_FGP009 on spectrum 504 ::..",
          "[precisealign]")
  {
    // /gorgone/pappso/moulon/database/Genome_Z_mays_5a.fasta
    /*
     * >AC207738.3_FGP009 seq=translation; coord=5:150095245..150095532:-1;
parent_transcript=AC207738.3_FGT009; parent_gene=AC207738.3_FG009
MLAFFLAKVAVDSQVLRRVLAGTGGSESDGVFLTVTVTAGQLGSGNGNGDKAAGRRTDGV
LGGGEDGFDFTGLLTGGGGQTSDATLHVQCGQRLL
*/

    // /gorgone/pappso/data_extraction_pappso/mzML/20120906_balliau_extract_1_A01_urnb-1.mzML
    // beginning= 53 length= 18 tree= 5 score= 20 protein= "AC207738.3_FGP009"  spectrum_index= 504

    pappso::MsFileAccessor file_access(
      "/gorgone/pappso/data_extraction_pappso/mzML/20120906_balliau_extract_1_A01_urnb-1.mzML", "");
    pappso::MsRunReaderSPtr msrunA01 = file_access.getMsRunReaderSPtrByRunId("", "runa01");

    pappso::AaCode aa_code;
    aa_code.addAaModification('C', pappso::AaModification::getInstance("MOD:00397"));

    pappso::specpeptidoms::SpOMSProtein protein(
      "protein_id",
      "MLAFFLAKVAVDSQVLRRVLAGTGGSESDGVFLTVTVTAGQLGSGNGNGDKAAGRRTDGVLGGGEDGFDFTGLLTGGGGQTSDATLHVQCGQ"
      "RLL",
      aa_code);

    pappso::PrecisionPtr precision_ptr = pappso::PrecisionFactory::getDaltonInstance(0.02);

    pappso::QualifiedMassSpectrum spectrum_simple = msrunA01.get()->qualifiedMassSpectrum(504);
    pappso::FilterChargeDeconvolution(precision_ptr)
      .filter(*(spectrum_simple.getMassSpectrumSPtr().get()));
    pappso::FilterResampleKeepGreater(150).filter(*(spectrum_simple.getMassSpectrumSPtr().get()));
    pappso::FilterGreatestY(120).filter(*(spectrum_simple.getMassSpectrumSPtr().get()));


    pappso::specpeptidoms::SpOMSSpectrum experimental_spectrum(
      spectrum_simple, precision_ptr, aa_code);


    pappso::specpeptidoms::ScoreValues score_values;

    pappso::specpeptidoms::SemiGlobalAlignment semi_global_alignment(
      score_values, precision_ptr, aa_code);

    semi_global_alignment.fastAlign(experimental_spectrum, &protein); // 1er alignement
    std::vector<pappso::specpeptidoms::Location> locations =
      semi_global_alignment.getLocationSaver()
        .getLocations(); // On récupère les sous-séquences intéressantes pour un alignement plus
                         // précis.
    REQUIRE(locations.size() == 1);
    REQUIRE(locations.at(0).score == 20);
    REQUIRE(locations.at(0).beginning == 53);
    REQUIRE(locations.at(0).length == 18);
    for(auto loc : locations)
      {
        semi_global_alignment.preciseAlign(experimental_spectrum,
                                           &protein,
                                           loc.beginning,
                                           loc.length); // 2e alignement
        pappso::specpeptidoms::Alignment best_alignment =
          semi_global_alignment.getBestAlignment(); // On récupère le meilleur alignement.

        REQUIRE(best_alignment.m_peptideModel.toInterpretation().toStdString() == "");
        REQUIRE(best_alignment.SPC == 0);
        REQUIRE_THAT(best_alignment.peaks, Catch::Matchers::Approx(std::vector<std::size_t>({})));
        REQUIRE_THAT(best_alignment.shifts,
                     Catch::Matchers::Approx(std::vector<double>({})).margin(0.001));
        REQUIRE(best_alignment.score == 0);
        REQUIRE(best_alignment.beginning == 0);
        REQUIRE(best_alignment.end == 0);
        REQUIRE(protein.getSequence()
                  .mid(best_alignment.beginning, best_alignment.end - best_alignment.beginning)
                  .toStdString() == "");
        REQUIRE(best_alignment.begin_shift == Catch::Approx(0.0));
        REQUIRE(best_alignment.end_shift == Catch::Approx(0.0));
      }
  }
  SECTION("..:: Check precise alignment for AC207738.3_FGP009 on spectrum 504 ::..",
          "[precisealign]")
  {
    // /gorgone/pappso/moulon/database/Genome_Z_mays_5a.fasta
    /*
     * >GRMZM2G499900_P01
MKENFCGDKDVNKTSPKYRTVRSAAATVTVTSFLYPSVNYMGSFLFLT
*/

    // /gorgone/pappso/data_extraction_pappso/mzML/20120906_balliau_extract_1_A01_urnb-1.mzML
    // beginning= 53 length= 18 tree= 5 score= 20 protein= "AC207738.3_FGP009"  spectrum_index= 504

    pappso::MsFileAccessor file_access(
      "/gorgone/pappso/data_extraction_pappso/mzML/20120906_balliau_extract_1_A01_urnb-1.mzML", "");
    pappso::MsRunReaderSPtr msrunA01 = file_access.getMsRunReaderSPtrByRunId("", "runa01");

    pappso::AaCode aa_code;
    aa_code.addAaModification('C', pappso::AaModification::getInstance("MOD:00397"));

    pappso::specpeptidoms::SpOMSProtein protein(
      "protein_id", "MKENFCGDKDVNKTSPKYRTVRSAAATVTVTSFLYPSVNYMGSFLFLT", aa_code);

    pappso::PrecisionPtr precision_ptr = pappso::PrecisionFactory::getDaltonInstance(0.02);

    pappso::QualifiedMassSpectrum spectrum_simple = msrunA01.get()->qualifiedMassSpectrum(504);
    pappso::FilterChargeDeconvolution(precision_ptr)
      .filter(*(spectrum_simple.getMassSpectrumSPtr().get()));
    pappso::FilterResampleKeepGreater(150).filter(*(spectrum_simple.getMassSpectrumSPtr().get()));
    pappso::FilterGreatestY(120).filter(*(spectrum_simple.getMassSpectrumSPtr().get()));


    pappso::specpeptidoms::SpOMSSpectrum experimental_spectrum(
      spectrum_simple, precision_ptr, aa_code);


    pappso::specpeptidoms::ScoreValues score_values;

    pappso::specpeptidoms::SemiGlobalAlignment semi_global_alignment(
      score_values, precision_ptr, aa_code);

    semi_global_alignment.fastAlign(experimental_spectrum, &protein); // 1er alignement
    std::vector<pappso::specpeptidoms::Location> locations =
      semi_global_alignment.getLocationSaver()
        .getLocations(); // On récupère les sous-séquences intéressantes pour un alignement plus
                         // précis.
    REQUIRE(locations.size() == 1);
    REQUIRE(locations.at(0).score == 20);
    REQUIRE(locations.at(0).beginning == 13);
    REQUIRE(locations.at(0).length == 18);
    std::vector<double> potential_mass_errors;
    for(auto loc : locations)
      {
        semi_global_alignment.preciseAlign(experimental_spectrum,
                                           &protein,
                                           loc.beginning,
                                           loc.length); // 2e alignement
        pappso::specpeptidoms::Alignment best_alignment =
          semi_global_alignment.getBestAlignment(); // On récupère le meilleur alignement.

        REQUIRE(best_alignment.m_peptideModel.toInterpretation().toStdString() == "");
        REQUIRE(best_alignment.SPC == 0);
        REQUIRE_THAT(best_alignment.peaks, Catch::Matchers::Approx(std::vector<std::size_t>({})));
        REQUIRE_THAT(best_alignment.shifts,
                     Catch::Matchers::Approx(std::vector<double>({})).margin(0.001));
        REQUIRE(best_alignment.score == 0);
        REQUIRE(best_alignment.beginning == 0);
        REQUIRE(best_alignment.end == 0);
        REQUIRE(protein.getSequence()
                  .mid(best_alignment.beginning, best_alignment.end - best_alignment.beginning)
                  .toStdString() == "");
        REQUIRE(best_alignment.begin_shift == Catch::Approx(0.0));
        REQUIRE(best_alignment.end_shift == Catch::Approx(0.0));

        qDebug();
        potential_mass_errors = semi_global_alignment.getPotentialMassErrors(
          aa_code, best_alignment, protein.getSequence());
        semi_global_alignment.postProcessingAlign(
          experimental_spectrum, &protein, loc.beginning, loc.length, potential_mass_errors);
        pappso::specpeptidoms::Alignment best_post_processed_alignment =
          semi_global_alignment.getBestAlignment();
        if(best_post_processed_alignment.SPC > best_alignment.SPC)
          {
            qDebug() << "Best post-processed alignment"
                     << best_post_processed_alignment.m_peptideModel.toInterpretation()
                     << best_post_processed_alignment.score << "SPC"
                     << best_post_processed_alignment.SPC;
          }
        else
          {
            qDebug() << "no improvement in post-processing";
          }
      }
  }
}
