/**
 * BCL to FASTQ file converter
 * Copyright (c) 2007-2015 Illumina, Inc.
 *
 * This software is covered by the accompanying EULA
 * and certain third party copyright/licenses, and any user of this
 * source file is bound by the terms therein.
 *
 * \file FastqCreator.hh
 *
 * \brief Declaration of FASTQ creator.
 *
 * \author Marek Balint
 * \author Mauricio Varea
 */


#ifndef BCL2FASTQ_CONVERSION_FASTQCREATOR_HH
#define BCL2FASTQ_CONVERSION_FASTQCREATOR_HH

#include "layout/Layout.hh"
#include "stats/TileStats.hpp"
#include "stats/BarcodeHits.hh"
#include "conversion/BclBuffer.hh"
#include "conversion/FastqBuffer.hh"
#include "conversion/Stage.hh"
#include "conversion/Task.hh"
#include "conversion/SampleIndex.hh"
#include "conversion/FastqIterator.hh"
#include "conversion/AdapterLocator.hh"

#include <boost/ptr_container/ptr_vector.hpp>

namespace bcl2fastq
{

namespace io
{
    class GzipCompressor;
}

namespace conversion
{

/// \brief Task: Convert BCLs to FASTQs.
class FastqCreateTask : public Task
{
public:

    /// \brief BaseCalling statistics (one per thread).
    typedef std::pair< std::vector<stats::TileBarcodeStats>,
                       std::vector<stats::ReadBarcodeStats> > ConversionStats;

public:

    /// \brief Constructor.
    /// \param bclBuffer BCL buffer.
    /// \param cyclesBegin Beginning of BCL cycles to process.
    /// \param cyclesEnd End of BCL cycles to process.
    /// \param flowcellInfo Flowcell meta data.
    /// \param laneInfo Lane meata data.
    /// \param readInfo Read meta data.
    /// \param offsetNumber Offset number.
    /// \param offsetsBegin Beginning of FASTQ offsets range.
    /// \param offsetsEnd End of FASTQ offsets range.
    /// \param maskShortAdapterReads Maximum number of useful bases in read after adapter trimming for which whole read is masked.
    /// \param adapterStringency Adapter stringency.
    /// \param maskAdapters Mask adapters.
    /// \param trimAdapters Trim adapters.
    /// \param generateReverseComplementFastqs Generate reverse complement FASTQs flag.
    /// \param includeNonPfClusters Include non-PF clusters in created FASTQ files flag.
    /// \param layout Flowcell layout.
    /// \param useBgzf If true, use BGZF compression
    /// \param compressionLevel Compression level used by zlib
    /// \param findAdaptersWithSlidingWindow If true, find adapters with the sliding window algorithm.
    /// \param tileStats Statistics collected for a tile
    /// \param outputBuffer FASTQ output buffer.
    FastqCreateTask(
        const BclBufferVec& bclBuffers,
        size_t cyclesIndex,
        size_t cycleIndexEnd,
        const layout::FlowcellInfo &flowcellInfo,
        const layout::LaneInfo &laneInfo,
        const layout::ReadInfo& currentReadInfo,
        SampleIndex::FastqOffsetsContainer::const_iterator offsetsBegin,
        SampleIndex::FastqOffsetsContainer::const_iterator offsetsEnd,
        std::size_t maskShortAdapterReads,
        float adapterStringency,
        const boost::ptr_vector<AdapterLocator>& maskAdapters,
        const boost::ptr_vector<AdapterLocator>& trimAdapters,
        bool generateReverseComplementFastqs,
        bool includeNonPfClusters,
        bool useBgzf,
        int compressionLevel,
        bool findAdaptersWithSlidingWindow,
        std::vector< FastqCreateTask::ConversionStats > &tileStats,
        std::vector<stats::BarcodeHits> &unknownBarcodes,
        FastqBuffer::FastqsContainer::value_type::value_type::value_type &outputBuffer
    );

public:

    virtual bool execute(common::ThreadVector::size_type threadNum);

private:

    /// \brief Update tileStats_ accordingly.
    /// \param threadNum Thread number.
    /// \param basesBegin Beginning of data.
    /// \param basesEnd End of data.
    /// \param barcode Index for this read.
    /// \param filterFlag Whether data passes chastity filter or not.
    /// \param trimmedCount Number of trimmed bases.
    /// \return Whether or not the barcode has been found in the table.
    bool computeStatistics(
        common::ThreadVector::size_type threadNum,
        FastqConstIterator basesBegin,
        FastqConstIterator basesEnd,
        SampleIndex::FastqOffsetsContainer::value_type offset,
        bool filterFlag,
        size_t trimmedCount
    );

    /// \brief Crate FASTQ.
    /// \param threadNum Thread number.
    /// \param offset Offset of the FASTQ in the input buffer.
    /// \param compressor Gzip compressor.
    void fastqCreate(
        common::ThreadVector::size_type threadNum,
        SampleIndex::FastqOffsetsContainer::value_type offset,
        io::GzipCompressor& compressor
    );

    /// \brief Create the barcode strings for the current read.
    /// \param barcodeStrings Vector of barcode strings.
    /// \param offset Offset of the FASTQ in the input buffer.
    void createBarcodeStrings(std::vector<std::string>&                      barcodeStrings,
                              SampleIndex::FastqOffsetsContainer::value_type offset);

    /// \brief Write a header element.
    /// \param element Element to write.
    void writeHeaderElement(const std::string& element);

    /// \brief Create the header.
    /// \param offset Offset of the FASTQ in the input buffer.
    /// \param filterFlag True if filter passed.
    void createHeader(SampleIndex::FastqOffsetsContainer::value_type offset,
                      bool                                           filterFlag);

    /// \brief Create the bases and quality scores.
    /// \param offset Offset of the FASTQ in the input buffer.
    /// \param compressor Gzip compressor.
    /// \param trimmedCount Number of trimmed bases.
    void createBasesAndQualities(SampleIndex::FastqOffsetsContainer::value_type offset,
                                 io::GzipCompressor& compressor,
                                 size_t& trimmedCount);

    /// \brief Identify adapter sequence with a sliding window.
    /// \param basesBegin Beginning of data.
    /// \param basesEnd End of data.
    /// \param adapter Adapter to be identified.
    /// \param adapterStringency Adapter stringency.
    /// \return Iterator pointing to the first base of identified adapter (end iterator otherwise).
    static FastqConstIterator identifyAdapterWithSlidingWindow(
        FastqConstIterator basesBegin,
        FastqConstIterator basesEnd,
        const std::string &adapter,
        float adapterStringency
    );

private:

    /// \brief BCL buffer.
    const BclBufferVec& bclBuffers_;

    /// \brief Beginning of BCL cycles to process.
    const size_t cycleIndex_;

    /// \brief End of BCL cycles to process.
    const size_t cycleIndexEnd_;

    /// \brief Flowcell meta data.
    const layout::FlowcellInfo &flowcellInfo_;

    /// \brief Lane meta data.
    const layout::LaneInfo &laneInfo_;

    /// \brief Read meta data.
    const layout::ReadInfo &readInfo_;

    /// \brief Sample number.
    const layout::BarcodeTranslationTable::SampleMetadata sampleMetadata_;

    /// \brief Beginning of FASTQ offsets range.
    const SampleIndex::FastqOffsetsContainer::const_iterator offsetsBegin_;

    /// \brief End of FASTQ offsets range.
    const SampleIndex::FastqOffsetsContainer::const_iterator offsetsEnd_;

    /// \brief Buffer to write FASTQs to.
    FastqBuffer::FastqsContainer::value_type::value_type::value_type &outputBuffer_;

    /// \brief Intermediate buffer.
    FastqBuffer::FastqsContainer::value_type::value_type::value_type buffer_;

    /// \brief Use BGZF compression if true.
    bool useBgzf_;

    /// \brief Compression level.
    int compressionLevel_;

    /// \brief Find adapters with the sliding window algorithm if true.
    bool findAdaptersWithSlidingWindow_;

    /// \brief Minimum read length after adapter trimming.
    std::size_t minimumTrimmedReadLength_;

    /// \brief Maximum number of useful bases in read after adapter trimming for which whole read is masked.
    std::size_t maskShortAdapterReads_;

    /// \brief Adapter stringency.
    float adapterStringency_;

    /// \brief Mask adapters.
    const boost::ptr_vector<AdapterLocator>& maskAdapters_;

    /// \brief Trim adapters.
    const boost::ptr_vector<AdapterLocator>& trimAdapters_;

    /// \brief Generate reverse complement FASTQs flag.
    bool generateReverseComplementFastqs_;

    /// \brief Include non-PF clusters in created FASTQ files.
    bool includeNonPfClusters_;

    /// \brief Conversion statistics.
    std::vector< FastqCreateTask::ConversionStats > &tileStats_;

    /// \brief Demultiplexing statistics.
    std::vector<stats::BarcodeHits> &unknownBarcodes_;

    /// \brief UMI cycles.
    std::vector< common::CycleRange > umiCycles_;
};


/// \brief FASTQ creator.
class FastqCreator : public IntermediateStage<BclBufferVec, FastqBuffer>
{
public:

    /// \brief Constructor.
    /// \param threadsCount Number of threads.
    /// \param inputMediator Input mediator.
    /// \param outputMediator Output mediator.
    /// \param layout Flowcell layout.
    /// \param laneInfo Lane meata data.
    /// \param maskShortAdapterReads Maximum number of useful bases in read after adapter trimming for which whole read is masked.
    /// \param adapterStringency Adapter stringency.
    /// \param generateReverseComplementFastqs Generate reverse complement FASTQs.
    /// \param includeNonPfClusters Include non-PF clusters in created FASTQ files flag.
    /// \param createFastqsForIndexReads Create FASTQ files also for index reads flag.
    /// \param useBgzf If true, use BGZF compression
    /// \param compressionLevel Compression level used by zlib
    /// \param findAdaptersWithSlidingWindow If true, find adapters with the sliding window algorithm.
    /// \param tileStats Statistics collected for a tile
    /// \param unknownBarcodeHits Counts of unknown barcodes
    FastqCreator(
        common::ThreadVector::size_type threadsCount,
        StageMediator<InputBuffer> &inputMediator,
        StageMediator<OutputBuffer> &outputMediator,
        const layout::Layout &layout,
        const layout::LaneInfo &laneInfo,
        std::size_t maskShortAdapterReads,
        float adapterStringency,
        bool generateReverseComplementFastqs,
        bool includeNonPfClusters,
        bool createFastqsForIndexReads,
        bool useBgzf,
        int compressionLevel,
        bool findAdaptersWithSlidingWindow,
        FastqCreateTask::ConversionStats &summaryTileStats,
        stats::BarcodeHits &unknownBarcodeHits
    );

public:

    virtual bool preExecute();

    virtual bool postExecute();

private:

    void createTileStats(const layout::BarcodeTranslationTable::SampleMetadata &sampleMetadata,
                         size_t nonIndexReadLength,
                         size_t numNonIndexReads);

    void createAdapters(boost::ptr_vector<AdapterLocator>& adapters,
                        layout::ReadInfo::AdaptersContainer::const_iterator begin,
                        layout::ReadInfo::AdaptersContainer::const_iterator end) const;

private:

    /// \brief Layout.
    const layout::Layout &layout_;

    /// \brief Current lane.
    const layout::LaneInfo &laneInfo_;

    /// \brief Number of threads.
    const common::ThreadVector::size_type threadsCount_;

    /// \brief Sample index.
    SampleIndex sampleIndex_;

    /// \brief Maximum number of useful bases in read after adapter trimming for which whole read is masked.
    std::size_t maskShortAdapterReads_;

    /// \brief Adapter stringency.
    float adapterStringency_;

    /// \brief Mask adapters.
    boost::ptr_vector< boost::ptr_vector<AdapterLocator> > maskAdapters_;

    /// \brief Trim adapters.
    boost::ptr_vector< boost::ptr_vector<AdapterLocator> > trimAdapters_;

    /// \brief Generate reverse complement FASTQs flag.
    bool generateReverseComplementFastqs_;

    /// \brief Include non-PF clusters in created FASTQ files.
    bool includeNonPfClusters_;

    /// \brief Create FASTQ files also for index reads.
    bool createFastqsForIndexReads_;

    /// \brief Use BGZF compression if true.
    bool useBgzf_;

    /// \brief Compression level used by zlib
    int compressionLevel_;

    /// \brief Find adapters with the sliding window algorithm if true.
    bool findAdaptersWithSlidingWindow_;

    /// \brief Conversion statistics (summary).
    FastqCreateTask::ConversionStats &summaryTileStats_;

    /// \brief Conversion statistics (one per thread).
    std::vector< FastqCreateTask::ConversionStats > tileStats_;

    /// \brief Unknown barcodes statistics (summary).
    stats::BarcodeHits &unknownBarcodesHits_;

    /// \brief Unknown barcodes statistics (one per thread).
    std::vector<stats::BarcodeHits> unknownBarcodes_;
};


} // namespace conversion
} // namespace bcl2fastq


#endif // BCL2FASTQ_CONVERSION_FASTQCREATOR_HH


