/**
 * BCL to FASTQ file converter
 * Copyright (c) 2007-2015 Illumina, Inc.
 *
 * This software is covered by the accompanying EULA
 * and certain third party copyright/licenses, and any user of this
 * source file is bound by the terms therein.
 *
 * \file BclLoader.cpp
 *
 * \brief Implementation of BCL loader.
 *
 * \author Marek Balint
 * \author Mauricio Varea
 */


#include <errno.h>
#include <algorithm>
#include <utility>

#include <boost/format.hpp>
#include <boost/foreach.hpp>
#include <boost/filesystem.hpp>

#include "common/Debug.hh"
#include "common/Exceptions.hh"
#include "common/Types.hh"
#include "conversion/BclLoader.hh"


namespace bcl2fastq {


namespace conversion {


BclLoadTask::BclLoadTask(
    data::BclFile &bclFile,
    data::CycleBCIFile &cycleBciFile,
    const boost::filesystem::path &inputDir,
    bool aggregateTilesFlag,
    const layout::LaneInfo &laneInfo,
    const layout::CycleInfo &cycleInfo,
    layout::LaneInfo::TileInfosContainer::const_iterator &tileInfoIter,
    const std::vector<common::ClustersCount>& clustersCounts,
    bool ignoreMissingBcls,
    size_t bclIdx,
    BclBufferVec& outputBuffer
)
: Task()
, bclFile_(bclFile)
, cycleBciFile_(cycleBciFile)
, inputDir_(inputDir)
, aggregateTilesFlag_(aggregateTilesFlag)
, laneInfo_(laneInfo)
, cycleInfo_(cycleInfo)
, tileInfoIter_(tileInfoIter)
, clustersCounts_(clustersCounts)
, ignoreMissingBcls_(ignoreMissingBcls)
, bclIdx_(bclIdx)
, outputBuffer_(outputBuffer)
{
}

bool BclLoadTask::execute(common::ThreadVector::size_type threadNum)
{
    size_t bufferIndex = 0;
    for (auto& outputBuffer : outputBuffer_)
    {
        common::ClustersCount clustersCount = 0;

        if (aggregateTilesFlag_)
        {
            clustersCount = clustersCounts_[bufferIndex];
            if (!bclFile_.isOpen())
            {
                bclFile_.openFile(inputDir_,
                                  laneInfo_.getNumber(),
                                  cycleInfo_.getNumber(),
                                  ignoreMissingBcls_);
            }

            if (tileInfoIter_->getSkippedTilesCount())
            {
                if (!cycleBciFile_.isOpen())
                {
                    cycleBciFile_.openFile(inputDir_, laneInfo_.getNumber(), cycleInfo_.getNumber());
                }
                data::CycleBCIFile::Record record = cycleBciFile_.getRecord(tileInfoIter_->getIndex());
                bclFile_.seek(record.compressedOffset, record.uncompressedOffset);
            }
        }
        else
        {
            bclFile_.openFile(inputDir_,
                              laneInfo_.getNumber(),
                              tileInfoIter_->getNumber(),
                              cycleInfo_.getNumber(),
                              ignoreMissingBcls_);

            clustersCount = bclFile_.getClustersCount();
        }

        outputBuffer.bcls_[bclIdx_].resize(clustersCount);
        std::streamsize result = bclFile_.read(&*outputBuffer.bcls_[bclIdx_].begin(), clustersCount);
        int errnum = errno;
        if (result != static_cast<std::streamsize>(clustersCount))
        {
            if (ignoreMissingBcls_)
            {
                BCL2FASTQ_LOG(common::LogLevel::WARNING) << "BCL file '" << bclFile_.getPath() << "' truncated: bytes_read=" << result << " bytes_expected=" << clustersCount << ":" << std::strerror(errnum) << " (" << errnum << ")" << std::endl;
                outputBuffer.bcls_[bclIdx_].resize(result > 0 ? result : 0);
                outputBuffer.bcls_[bclIdx_].resize(clustersCount, 0);
            }
            else
            {
                BOOST_THROW_EXCEPTION(common::InputDataError(errnum, (boost::format("BCL file '%s' truncated: bytes_read=%d bytes_expected=%d") % bclFile_.getPath().string() % result % clustersCount).str()));
            }
        }

        ++bufferIndex;
        ++tileInfoIter_;
    }
    return true;
}


PositionsLoadTask::PositionsLoadTask(
    boost::shared_ptr<data::PositionsFile> &positionsFile,
    const boost::filesystem::path &intensitiesDir,
    bool aggregateTilesFlag,
    bool isPatternedFlowcell,
    const layout::LaneInfo &laneInfo,
    layout::LaneInfo::TileInfosContainer::const_iterator &tileInfoIter,
    const std::vector<common::ClustersCount>& clustersCounts,
    bool ignoreMissingPositions,
    BclBufferVec& outputBuffer,
    BclBuffer::PositionsContainer& patternedFlowcellPositions
)
: Task()
, positionsFile_(positionsFile)
, intensitiesDir_(intensitiesDir)
, aggregateTilesFlag_(aggregateTilesFlag)
, isPatternedFlowcell_(isPatternedFlowcell)
, laneInfo_(laneInfo)
, tileInfoIter_(tileInfoIter)
, clustersCounts_(clustersCounts)
, ignoreMissingPositions_(ignoreMissingPositions)
, outputBuffer_(outputBuffer)
, patternedFlowcellPositions_(patternedFlowcellPositions)
{
}

bool PositionsLoadTask::execute(common::ThreadVector::size_type threadNum)
{
    if (isPatternedFlowcell_)
    {
        return execute(patternedFlowcellPositions_,
                       clustersCounts_[0]);
    }
    else
    {
        size_t bufferIndex = 0;
        for (auto& outputBuffer : outputBuffer_)
        {
            execute(outputBuffer.positions_,
                    clustersCounts_[bufferIndex]);
            ++bufferIndex;
            ++tileInfoIter_;
        }
    }

    return true;
}

bool PositionsLoadTask::execute(BclBuffer::PositionsContainer& outputBuffer,
                                common::ClustersCount clustersCount)
{
    if (!positionsFile_ || !aggregateTilesFlag_)
    {
        positionsFile_ = data::PositionsFileFactory::createPositionsFile(intensitiesDir_,
                                                                         aggregateTilesFlag_,
                                                                         isPatternedFlowcell_,
                                                                         laneInfo_.getNumber(),
                                                                         tileInfoIter_->getNumber(),
                                                                         ignoreMissingPositions_);

        if (!positionsFile_)
        {
            if (ignoreMissingPositions_)
            {
                outputBuffer.clear();
                return true;
            }
            else
            {
                BOOST_THROW_EXCEPTION(common::InputDataError(
                    errno,
                    (boost::format("Could not find positions file for lane %s and tile %s.")
                        % laneInfo_.getNumber() % tileInfoIter_->getNumber()).str() ));
            }
        }
    }

    if (aggregateTilesFlag_ && !isPatternedFlowcell_)
    {
        std::size_t skippedClustersCount = tileInfoIter_->getSkippedClustersCount();
        if (skippedClustersCount)
        {
            std::vector<data::PositionsFile::Record> buffer;
            buffer.resize(skippedClustersCount);
            positionsFile_->read(&*buffer.begin(), skippedClustersCount);
        }
    }
    else
    {
        clustersCount = positionsFile_->getClustersCount();
    }

    outputBuffer.resize(clustersCount);
    std::size_t recordsRead = positionsFile_->read(&*outputBuffer.begin(), clustersCount);

    int errnum = errno;
    if (recordsRead != clustersCount)
    {
        if (ignoreMissingPositions_)
        {
            BCL2FASTQ_LOG(common::LogLevel::WARNING) << "Positions file '" << positionsFile_->getPath() << "' truncated: records_read=" << recordsRead << " records_expected=" << clustersCount << ":" << std::strerror(errnum) << " (" << errnum << ")" << std::endl;
            outputBuffer.resize(recordsRead);
            static const data::PositionsFile::Record defaultRecord = { /* .x_ = */ 0, /* .y_ = */ 0 };
            outputBuffer.resize(clustersCount, defaultRecord);
        }
        else
        {
            BOOST_THROW_EXCEPTION(common::InputDataError(errnum, (boost::format("Positions file '%s' truncated: records_real=%d records_expected=%d") % positionsFile_->getPath().string() % recordsRead % clustersCount).str()));
        }
    }

    return true;
}


FilterLoadTask::FilterLoadTask(
    boost::shared_ptr<data::FilterFile> &filterFile,
    const boost::filesystem::path &inputDir,
    bool aggregateTilesFlag,
    const layout::LaneInfo &laneInfo,
    layout::LaneInfo::TileInfosContainer::const_iterator &tileInfoIter,
    const std::vector<common::ClustersCount>& clustersCounts,
    bool ignoreMissingFilters,
    BclBufferVec &outputBuffer
)
: Task()
, filterFile_(filterFile)
, inputDir_(inputDir)
, aggregateTilesFlag_(aggregateTilesFlag)
, laneInfo_(laneInfo)
, tileInfoIter_(tileInfoIter)
, clustersCounts_(clustersCounts)
, ignoreMissingFilters_(ignoreMissingFilters)
, outputBuffer_(outputBuffer)
{
}

bool FilterLoadTask::execute(common::ThreadVector::size_type threadNum)
{
    size_t bufferIndex = 0;
    for (auto& outputBuffer : outputBuffer_)
    {
    if (filterFile_)
    {
        BOOST_ASSERT(aggregateTilesFlag_);
        readAggregateTilesFilterFile(*tileInfoIter_,
                                     clustersCounts_[bufferIndex],
                                     outputBuffer);
    }
    else
    {
        boost::filesystem::path filePath;
        if (data::FilterFile::doesFileExist(inputDir_,
                                            aggregateTilesFlag_,
                                            laneInfo_.getNumber(),
                                            tileInfoIter_->getNumber(),
                                            filePath))
        {
            if (aggregateTilesFlag_)
            {
                readAggregateTilesFilterFile(*tileInfoIter_,
                                             clustersCounts_[bufferIndex],
                                             outputBuffer,
                                             filePath);
            }
            else
            {
                readTileFilterFile(filePath,
                                   *tileInfoIter_,
                                   outputBuffer);
            }
        }
    }

        ++bufferIndex;
        ++tileInfoIter_;
    }

    return true;
}

void FilterLoadTask::readAggregateTilesFilterFile(const layout::TileInfo& tileInfo,
                                                  common::ClustersCount clustersCount,
                                                  BclBuffer& outputBuffer,
                                                  const boost::filesystem::path& filePath)
{
    if (!filterFile_)
    {
        filterFile_ = boost::shared_ptr<data::FilterFile>(new data::FilterFile(filePath,
                                                                               ignoreMissingFilters_));
    }

    std::size_t skippedClustersCount = tileInfoIter_->getSkippedClustersCount();
    if (skippedClustersCount)
    {
        std::vector<data::FilterFile::Record> buffer;
        buffer.resize(skippedClustersCount);
        filterFile_->read(&*buffer.begin(), skippedClustersCount);
    }

    readFilterFile(*filterFile_,
                   tileInfo,
                   clustersCount,
                   outputBuffer);
}

void FilterLoadTask::readTileFilterFile(const boost::filesystem::path& filePath,
                                        const layout::TileInfo& tileInfo,
                                        BclBuffer& outputBuffer)
{
    data::FilterFile filterFile(filePath,
                                ignoreMissingFilters_);

    common::ClustersCount clustersCount = filterFile.getClustersCount();

    readFilterFile(filterFile,
                   tileInfo,
                   clustersCount,
                   outputBuffer);
}

void FilterLoadTask::readFilterFile(data::FilterFile& filterFile,
                                    const layout::TileInfo& tileInfo,
                                    common::ClustersCount clustersCount,
                                    BclBuffer& outputBuffer)
{
    outputBuffer.filters_.resize(clustersCount);
    std::size_t recordsRead = filterFile.read(&*outputBuffer.filters_.begin(), clustersCount);
    validateFilterRecords(recordsRead,
                          clustersCount,
                          filterFile.getPath(),
                          outputBuffer);
}

void FilterLoadTask::validateFilterRecords(std::size_t                    recordsRead,
                                           common::ClustersCount          clustersCount,
                                           const boost::filesystem::path& filePath,
                                           BclBuffer&                     outputBuffer)
{
    int errnum = errno;
    if (recordsRead != clustersCount)
    {
        if (ignoreMissingFilters_)
        {
            BCL2FASTQ_LOG(common::LogLevel::WARNING) << "Filter file '" << filePath << "' truncated: records_read=" << recordsRead << " records_expected=" << clustersCount << ":" << std::strerror(errnum) << " (" << errnum << ")" << std::endl;
            outputBuffer.filters_.resize(recordsRead);
            static const data::FilterFile::Record defaultRecord(0x01);
            outputBuffer.filters_.resize(clustersCount, defaultRecord);
        }
        else
        {
            BOOST_THROW_EXCEPTION(common::InputDataError(errnum, (boost::format("Filter file '%s' truncated: records_real=%d records_expected=%d") % filePath.string() % recordsRead % clustersCount).str()));
        }
    }
}

ControlLoadTask::ControlLoadTask(
    boost::shared_ptr<data::ControlFile> &controlFile,
    const boost::filesystem::path &inputDir,
    const layout::LaneInfo &laneInfo,
    layout::LaneInfo::TileInfosContainer::const_iterator &tileInfoIter,
    bool ignoreMissingControls,
    BclBufferVec &outputBuffer
)
: Task()
, controlFile_(controlFile)
, inputDir_(inputDir)
, laneInfo_(laneInfo)
, tileInfoIter_(tileInfoIter)
, ignoreMissingControls_(ignoreMissingControls)
, outputBuffer_(outputBuffer)
{
}

bool ControlLoadTask::execute(common::ThreadVector::size_type threadNum)
{
    for (auto& outputBuffer : outputBuffer_)
    {
    controlFile_ = data::ControlFileFactory::createControlFile(inputDir_,
                                                               laneInfo_.getNumber(),
                                                               tileInfoIter_->getNumber(),
                                                               ignoreMissingControls_);

    if (!controlFile_)
    {
        // There might not be a control file
        return true;
    }

    common::ClustersCount clustersCount = controlFile_->getClustersCount();

    outputBuffer.controls_.resize(clustersCount);
    std::size_t recordsRead = controlFile_->read(&*outputBuffer.controls_.begin(), clustersCount);

    int errnum = errno;
    if (recordsRead != clustersCount)
    {
        if (ignoreMissingControls_)
        {
            BCL2FASTQ_LOG(common::LogLevel::WARNING) << "Control file '" << controlFile_->getPath() << "' truncated: records_read=" << recordsRead << " records_expected=" << clustersCount << ":" << std::strerror(errnum) << " (" << errnum << ")" << std::endl;
            outputBuffer.controls_.resize(recordsRead);
            outputBuffer.controls_.resize(clustersCount);
        }
        else
        {
            BOOST_THROW_EXCEPTION(common::InputDataError(errnum, (boost::format("Control file '%s' truncated: records_real=%d records_expected=%d") % controlFile_->getPath().string() % recordsRead % clustersCount).str()));
        }
    }
    ++tileInfoIter_;
    }
    return true;
}


BclLoader::BclLoader(
    common::ThreadVector::size_type threadsCount,
    StageMediator<OutputBuffer> &outputMediator,
    const layout::Layout &layout,
    const layout::LaneInfo &laneInfo,
    bool ignoreMissingBcls,
    bool ignoreMissingFilters,
    bool ignoreMissingPositions,
    bool ignoreMissingControls,
    const boost::filesystem::path &inputDir,
    const boost::filesystem::path &intensitiesDir
)
: SourceStage<BclBufferVec>(threadsCount, "Bcl loading", outputMediator)
, layout_(layout)
, laneInfo_(laneInfo)
, ignoreMissingBcls_(ignoreMissingBcls)
, ignoreMissingFilters_(ignoreMissingFilters)
, ignoreMissingPositions_(ignoreMissingPositions)
, ignoreMissingControls_(ignoreMissingControls)
, currentTileInfo_(laneInfo_.getTileInfos().begin())
, inputDir_(inputDir)
, intensitiesDir_(intensitiesDir)
, bclFiles_()
, cycleBciFiles_()
, positionsFile_()
, filterFile_()
, controlFile_()
, patternedFlowcellPositions_()
, uniqueFailedReadIndex_(0)
, clustersCounts_()
{
    common::CycleNumber cyclesCount = laneInfo.getNumCyclesToLoad();
    BCL2FASTQ_ASSERT_MSG(cyclesCount != 0, "There are no cycles to be processed");
    for (common::CycleNumber i = 0; i < cyclesCount; ++i)
    {
        bclFiles_.push_back(new data::BclFile);
        cycleBciFiles_.push_back(new data::CycleBCIFile);
    }
}

bool BclLoader::preExecute()
{
    if (currentTileInfo_ == laneInfo_.getTileInfos().end())
    {
        return false;
    }

    TaskQueue &taskQueue = this->getTaskQueue();
    BclBufferVec &outputBuffer = this->getOutputBuffer();
    outputBuffer.clear();

    const bool aggregateTilesFlag = layout_.getFlowcellInfo().getAggregateTilesFlag();
    const bool isPatternedFlowcell = layout_.getFlowcellInfo().isPatternedFlowcell();

    // We want a minimum number of clusters per sample
    size_t desiredClustersCount = ClustersPerTask * std::distance(laneInfo_.sampleInfosBegin(), laneInfo_.sampleInfosEnd());
    layout::LaneInfo::TileInfosContainer::const_iterator tileInfoIter = currentTileInfo_;
    size_t totalClustersInBuffer = 0;
    clustersCounts_.clear();
    while (totalClustersInBuffer < desiredClustersCount && tileInfoIter != laneInfo_.getTileInfos().end())
    {
        const bool haveClustersCount = tileInfoIter->haveClustersCount();
        const common::ClustersCount clustersCount = haveClustersCount ? tileInfoIter->getClustersCount() : 0;

        if (clustersCount == 0)
        {
            // We don't have the count, let's get out of here.
            break;
        }
        else
        {
            clustersCounts_.push_back(clustersCount);
            totalClustersInBuffer += clustersCount;
            ++tileInfoIter;
        }
    }

        if (outputBuffer.size() <= clustersCounts_.size())
        {
            if (clustersCounts_.empty())
            {
                clustersCounts_.resize(1, 0);
                outputBuffer.resize(1);
            }
            else
            {
                outputBuffer.resize(clustersCounts_.size());
            }
        }

        for (auto& buffer : outputBuffer)
        {
            buffer.bcls_.resize(laneInfo_.getNumCyclesToLoad());
        }

        BclFilesContainer::size_type bclIdx = 0;
        for (const auto& readInfo : laneInfo_.readInfos())
        {
            // We want all the cycles for the index reads, even if they were masked.
            for (const auto& cycleInfo : readInfo.cyclesToLoad())
            {
                taskQueue.addTask(new BclLoadTask(
                    bclFiles_.at(bclIdx),
                    cycleBciFiles_.at(bclIdx),
                    inputDir_,
                    aggregateTilesFlag,
                    laneInfo_,
                    cycleInfo,
                    currentTileInfo_,
                    clustersCounts_,
                    ignoreMissingBcls_,
                    bclIdx,
                    outputBuffer
                ));

                ++bclIdx;
            }
        }

        taskQueue.addTask(new FilterLoadTask(
            filterFile_,
            inputDir_,
            aggregateTilesFlag,
            laneInfo_,
            currentTileInfo_,
            clustersCounts_,
            ignoreMissingFilters_,
            outputBuffer
        ));

        if (!isPatternedFlowcell || !positionsFile_)
        {
            taskQueue.addTask(new PositionsLoadTask(
                positionsFile_,
                intensitiesDir_,
                aggregateTilesFlag,
                isPatternedFlowcell,
                laneInfo_,
                currentTileInfo_,
                clustersCounts_,
                ignoreMissingPositions_,
                outputBuffer,
                patternedFlowcellPositions_
            ));
        }

        taskQueue.addTask(new ControlLoadTask(
            controlFile_,
            inputDir_,
            laneInfo_,
            currentTileInfo_,
            ignoreMissingControls_,
            outputBuffer
        ));

    return true;
}

void BclLoader::bclMismatchCount(std::string fileType,
                                 common::CycleNumber cycleNumber,
                                 BclBuffer::BclsContainer::value_type::size_type realSize,
                                 BclBuffer::BclsContainer::value_type::size_type expectedSize)
{
    if ((ignoreMissingBcls_ && "BCL" == fileType)
    ||  (ignoreMissingPositions_ && "positions" == fileType)
    ||  (ignoreMissingFilters_ && "filter" == fileType)
    ||  (ignoreMissingControls_ && "control" == fileType))
    {
        BCL2FASTQ_LOG(common::LogLevel::WARNING) << "Mismatching cluster count in " << fileType << " file: Cycle #" << cycleNumber << ": bytes_real=" << realSize << " bytes_expected=" << expectedSize << std::endl;
    } else {
        BOOST_THROW_EXCEPTION(common::InputDataError((boost::format("Mismatching cluster count in %s file: Cycle #%d: bytes_real=%d bytes_expected=%d") % fileType % cycleNumber % realSize % expectedSize).str()));
    }
}

void BclLoader::createUniqueFakePositions(BclBuffer& outputBuffer,
                                          BclBuffer::BclsContainer::value_type::size_type bufferSize)
{
    outputBuffer.positions_.resize(bufferSize);

    for (BclBuffer::BclsContainer::value_type::size_type i = 0; i < bufferSize; ++i)
    {
        outputBuffer.positions_[i].y_ = uniqueFailedReadIndex_++;
    }
}

bool BclLoader::postExecute()
{
    for (BclBuffer& outputBuffer : this->getOutputBuffer())
    {
        if (layout_.getFlowcellInfo().isPatternedFlowcell())
        {
            outputBuffer.positions_ = patternedFlowcellPositions_;
        }

        const BclBuffer::BclsContainer::const_iterator largestBuffer = std::max_element(
            outputBuffer.bcls_.begin(),
            outputBuffer.bcls_.end(),
            boost::bind(&BclBuffer::BclsContainer::value_type::size, _1) < boost::bind(&BclBuffer::BclsContainer::value_type::size, _2)
        );
        const BclBuffer::BclsContainer::value_type::size_type bufferSize = largestBuffer->size();

        common::CycleNumber cycleNumber = 1;
        BOOST_FOREACH (BclBuffer::BclsContainer::value_type &bclBuffer, std::make_pair(outputBuffer.bcls_.begin(), outputBuffer.bcls_.end()))
        {
            const BclBuffer::BclsContainer::value_type::size_type currentSize = bclBuffer.size();
            if (bufferSize != currentSize)
            {
                bclMismatchCount("BCL",cycleNumber,currentSize,bufferSize);
                bclBuffer.resize(bufferSize, 0);
            }
            ++cycleNumber;
        }
        if (outputBuffer.positions_.size() != bufferSize)
        {
            bclMismatchCount("positions",1,outputBuffer.positions_.size(),bufferSize);
            createUniqueFakePositions(outputBuffer, bufferSize);
        }
        if (outputBuffer.filters_.size() != bufferSize)
        {
            bclMismatchCount("filter",1,outputBuffer.filters_.size(),bufferSize);
            outputBuffer.filters_.resize(bufferSize, data::FilterFile::Record(1));
        }
        if (outputBuffer.controls_.size() != bufferSize)
        {
            if (controlFile_)
            {
                bclMismatchCount("control",1,outputBuffer.controls_.size(),bufferSize);
            }
            outputBuffer.controls_.resize(bufferSize);
        }

        outputBuffer.tileInfo_ = currentTileInfo_;
        ++currentTileInfo_;
    }

    return true;
}


} // namespace conversion


} // namespace bcl2fastq


