/**
 * BCL to FASTQ file converter
 * Copyright (c) 2007-2015 Illumina, Inc.
 *
 * This software is covered by the accompanying EULA
 * and certain third party copyright/licenses, and any user of this
 * source file is bound by the terms therein.
 *
 * \file PositionsFile.cpp
 *
 * \brief Implementation of positions file.
 *
 * \author Marek Balint
 * \author Mauricio Varea
 * \author Aaron Day
 */


#include "data/PositionsFile.hh"

#include <vector>
#include <algorithm>
#include <numeric>
#include <utility>

#include <boost/format.hpp>
#include <boost/foreach.hpp>
#include <boost/filesystem.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/shared_ptr.hpp>

#include <boost/algorithm/string.hpp>



namespace bcl2fastq {


namespace data {


PositionsFile::PositionsFile()
    : FileReaderBase("",
                     false)
{
}


namespace detail {


///////////////////////////////////////////////////////////////////////////////
// positions file type: LOCS
///////////////////////////////////////////////////////////////////////////////

/// \brief LOCS file.
class LocsFile : public PositionsFile, public BinaryAllClustersFileReader
{

public:

    /// \brief Constructor.
    /// \param path Path to the LOCS file.
    /// \param ignoreErrors Suppress errors opening file and/or reading it.
    explicit LocsFile(
        const boost::filesystem::path &path,
        bool ignoreErrors
    );

    /// \brief Read records from the file to buffer.
    /// \param targetBuffer Target buffer to read to.
    /// \param targetSize Maximum number of records to be read.
    /// \return Number of records read.
    virtual std::size_t read(
        PositionsFile::Record *targetBuffer,
        std::size_t targetSize
    );

private:

    /// \brief LOCS header type definition.
#pragma pack(push, 1)
    struct Header
    {
        /// \brief Value of the first constant field.
        static const uint32_t FIELD1;

        /// \brief Value of the second constant field.
        static const float FIELD2;

        /// \brief The first constant field.
        uint32_t field1_;

        /// \brief The second constant field.
        float field2_;

        /// \brief Clusters count.
        uint32_t clustersCount_;
    };

    /// \brief LOCS record type definition
    struct Record
    {
        /// \brief X-coordinate.
        float x_;

        /// \brief y-coordinate.
        float y_;

    };
#pragma pack(pop)

    /// \brief Get the number of bytes in a record
    /// \return Number of bytes in a record
    virtual std::size_t getRecordBytes() const { return sizeof(Record); }

    /// \brief Read the header
    virtual bool readHeader();

    /// \brief Validate the header. Throw an exception on failure.
    virtual bool validateHeader(Header& header);

    /// \brief Read the data records from the file (not the header)
    virtual void readRecords();

    /// \brief Gets the file type string used for error messages.
    /// \return File type string
    virtual std::string getFileTypeStr() const { return "LOCS"; }

    /// \brief Transform the Record type into a PositionsFile::Record
    /// \param record Record to transform
    static PositionsFile::Record transformRecord(const Record &record);

    /// \brief Internal buffer.
    std::vector<char> buffer_;

    /// \brief Current position in internal buffer.
    std::vector<char>::const_iterator bufferPosition_;
};

LocsFile::LocsFile(
    const boost::filesystem::path &path,
    bool ignoreErrors
)
: FileReaderBase(path,
                 ignoreErrors)
, PositionsFile()
, BinaryAllClustersFileReader(path,
                              ignoreErrors)
, buffer_()
, bufferPosition_()
{
    readFile();
}

bool LocsFile::readHeader()
{
    Header header;
    if (!BinaryAllClustersFileReader::readHeader(header) ||
        !validateHeader(header))
    {
        return false;
    }

    clustersCount_ = header.clustersCount_;

    return true;
}

bool LocsFile::validateHeader(LocsFile::Header& header)
{
    int errnum = errno;
    if (header.field1_ != Header::FIELD1)
    {
        const uint32_t field1 = header.field1_;
        BOOST_THROW_EXCEPTION(
            common::InputDataError(errnum,
                                   (boost::format("Corrupted header of LOCS file '%s': header_field_1=%d")
                                       % this->getPath().string() % field1).str()));

        return false;
    }
    if (header.field2_ != Header::FIELD2)
    {
        const float field2 = header.field2_;
        BOOST_THROW_EXCEPTION(
            common::InputDataError(errnum,
                                   (boost::format("Corrupted header of LOCS file '%s': header_field_2=%f")
                                       % this->getPath().string() % field2).str()));

        return false;
    }

    clustersCount_ = header.clustersCount_;

    return true;
}

void LocsFile::readRecords()
{
    readClusters(buffer_,
                 getClustersCount());

    bufferPosition_ = buffer_.begin();
}

std::size_t LocsFile::read(
    PositionsFile::Record *targetBuffer,
    std::size_t targetSize
)
{
    if (!validateCondition((buffer_.end() - bufferPosition_) >= 0, "Invalid buffer size.")) { return 0; }
    if (!validateCondition((buffer_.end() - bufferPosition_) % sizeof(Record) == 0, "Invalid buffer size.")) { return 0; }

    const std::size_t recordsToRead = std::min(
        static_cast<std::size_t>(buffer_.end() - bufferPosition_) / sizeof(Record),
        targetSize
    );

    const Record * const begin = reinterpret_cast<const Record * const>(&*bufferPosition_);
    const Record * const end = begin + recordsToRead;

    std::transform(begin, end, targetBuffer, &LocsFile::transformRecord);
    bufferPosition_ += (recordsToRead * sizeof(Record));

    return recordsToRead;
}

const uint32_t LocsFile::Header::FIELD1 = 1;

const float LocsFile::Header::FIELD2 = 1.0;

PositionsFile::Record LocsFile::transformRecord(const Record &record)
{
    PositionsFile::Record ret;
    ret.x_ = static_cast<PositionsFile::Record::ClusterCoordinate>(record.x_ * 10.0 + 1000.5);
    ret.y_ = static_cast<PositionsFile::Record::ClusterCoordinate>(record.y_ * 10.0 + 1000.5);

    return ret;
}


///////////////////////////////////////////////////////////////////////////////
// positions file type: CLOCS
///////////////////////////////////////////////////////////////////////////////

/// \brief CLOCS file.
class ClocsFile : public PositionsFile, public BinaryAllClustersFileReader
{

public:

    /// \brief Constructor.
    /// \param path Path to the CLOCS file.
    /// \param ignoreErrors Suppress errors opening file and/or reading it.
    ClocsFile(
        const boost::filesystem::path& path,
        bool ignoreErrors
    );

    /// \brief Read records from the file to buffer.
    /// \param targetBuffer Target buffer to read to.
    /// \param targetSize Maximum number of records to be read.
    /// \return Number of records read.
    virtual std::size_t read(
        PositionsFile::Record *targetBuffer,
        std::size_t targetSize
    );

private:

    /// \brief CLOCS header type definition.
#pragma pack(push, 1)
    struct Header
    {
    public:
        /// \brief supported version number
        static const uint8_t VERSION;

        /// \brief Version of the format.
        uint8_t version_;

        /// \brief Number of tiles
        uint32_t numTiles_;
    };

    struct Record
    {
    public:

        /// \brief x-offset
        uint8_t xOffset_;

        /// \brief y-offset
        uint8_t yOffset_;

    };
#pragma pack(pop)

private:

    /// \brief Get the number of bytes in a record
    /// \return Number of bytes in a record
    virtual std::size_t getRecordBytes() const { return sizeof(Record); }

    /// \brief Validate the header. Throw an exception on failure.
    virtual bool readHeader();

    /// \brief Validate the header. Throw an exception on failure.
    virtual bool validateHeader(Header& header);

    /// \brief Read the data records from the file (not the header)
    virtual void readRecords();

    /// \brief Read the number of clusters in the tile.
    uint8_t readClusterCount();

    /// \brief Gets the file type string used for error messages.
    /// \return File type string
    virtual std::string getFileTypeStr() const { return "CLOCS"; }

    /// \brief Transform the Record type into a PositionsFile::Record
    /// \param record Record to transform
    static PositionsFile::Record transformRecord(const Record &record, size_t tileIndex);

private:

    /// \brief Internal buffer.
    std::vector< std::vector<char> > buffer_;

    /// \brief Current tile position in internal buffer
    std::vector< std::vector<char> >::const_iterator bufferTilePosition_;

    /// \brief Current cluster position in buffer
    std::vector<char>::const_iterator bufferClusterPosition_;

    /// \brief Number of tiles
    uint32_t numTiles_;
};

ClocsFile::ClocsFile(
    const boost::filesystem::path &path,
    bool ignoreErrors
)
: FileReaderBase(path,
                 ignoreErrors)
, PositionsFile()
, BinaryAllClustersFileReader(path,
                              ignoreErrors)
, buffer_()
, bufferTilePosition_()
, bufferClusterPosition_()
, numTiles_(0)
{
    readFile();
}

bool ClocsFile::readHeader()
{
    Header header;
    if (!BinaryAllClustersFileReader::readHeader(header) ||
        !validateHeader(header))
    {
        return false;
    }

    numTiles_ = header.numTiles_;

    return true;
}

bool ClocsFile::validateHeader(ClocsFile::Header& header)
{
    int errnum = errno;
    if (header.version_ != Header::VERSION)
    {
        BOOST_THROW_EXCEPTION(
            common::InputDataError(errnum,
                                   (boost::format("Corrupted header of CLOCS file '%s': header_version=%d")
                                       % this->getPath().string() % header.version_).str()));

        return false;
    }

    return true;
}

void ClocsFile::readRecords()
{
    buffer_.resize(numTiles_);
    common::ClustersCount totalClustersCount = 0;
    for (size_t i = 0; i < numTiles_; ++i)
    {
        uint8_t clustersCount = readClusterCount();

        if (clustersCount == 0)
        {
            continue;
        }

        totalClustersCount += clustersCount;

        bool readSuccess = readClusters(buffer_[i],
                                        clustersCount);

        if (!readSuccess)
        {
            break;
        }
    }

    clustersCount_ = totalClustersCount;

    bufferTilePosition_ = buffer_.begin();
    if (!buffer_.empty())
    {
        while (bufferTilePosition_->empty() && bufferTilePosition_ != buffer_.end())
        {
            ++bufferTilePosition_;
        }

        if (bufferTilePosition_ != buffer_.end())
        {
            bufferClusterPosition_ = bufferTilePosition_->begin();
        }
    }
}

uint8_t ClocsFile::readClusterCount()
{
    uint8_t clustersCount = 0;

    std::streamsize clustersCountLength =
        io::read(fileBuf_,
                 reinterpret_cast<char *>(&clustersCount),
                 sizeof(clustersCount));

    if (clustersCountLength != sizeof(clustersCount))
    {
        logError(clustersCountLength,
                 sizeof(clustersCount));
    }

    return clustersCount;
}

std::size_t ClocsFile::read(
    PositionsFile::Record *targetBuffer,
    std::size_t targetSize
)
{
    if (!validateCondition(numTiles_ > 0, "No tiles read from clocs file.")) { return 0; }
    if (!validateCondition(buffer_.size() > 0, "No data read from clocs file.")) { return 0; }
    if (!validateCondition((buffer_.end() - bufferTilePosition_) >= 0, "Invalid buffer size.")) { return 0; }
    if (!validateCondition((bufferTilePosition_->end() - bufferClusterPosition_) % sizeof(Record) == 0, "Invalid buffer size.")) { return 0; }

    size_t recordsRead = 0;
    while (recordsRead < targetSize && bufferTilePosition_ != buffer_.end())
    {
        if (bufferTilePosition_->empty() ||
            bufferTilePosition_->end() - bufferClusterPosition_ == 0)
        {
            ++bufferTilePosition_;
            if (!bufferTilePosition_->empty())
            {
                bufferClusterPosition_ = bufferTilePosition_->begin();
            }
            continue;
        }

        const std::size_t recordsToRead = std::min(
            static_cast<std::size_t>(bufferTilePosition_->end() - bufferClusterPosition_) / sizeof(Record),
            targetSize - recordsRead
        );

        const Record * const begin = reinterpret_cast<const Record * const>(&*bufferClusterPosition_);
        const Record * const end = begin + recordsToRead;

        size_t tileIndex = std::distance(std::vector< std::vector<char> >::const_iterator(buffer_.begin()), bufferTilePosition_);
        std::transform(begin, end, targetBuffer+recordsRead, boost::bind(&ClocsFile::transformRecord, _1, tileIndex));
        bufferClusterPosition_ += (recordsToRead * sizeof(Record));
        recordsRead += recordsToRead;
    }

    return recordsRead;
}


const uint8_t ClocsFile::Header::VERSION = 0x01;

PositionsFile::Record ClocsFile::transformRecord(const Record &record, size_t tileIndex)
{
    PositionsFile::Record ret;
    ret.x_ = static_cast<PositionsFile::Record::ClusterCoordinate>(record.xOffset_ + (tileIndex % 82)*250) + 1000.5;
    ret.y_ = static_cast<PositionsFile::Record::ClusterCoordinate>(record.yOffset_ + (tileIndex / 82)*250) + 1000.5;

    return ret;
}


class PosFile : public PositionsFile, FileReader
{
public:
    PosFile(const boost::filesystem::path& filePath,
            bool                           ignoreErrors);

    /// \brief Read records from file to buffer.
    /// \param targetBuffer Target buffer to read to.
    /// \param targetSize Maximum number of records to be read.
    /// \return Number of records read.
    virtual std::size_t read(
        PositionsFile::Record *targetBuffer,
        std::size_t targetSize
    );

private:
    /// \brief Gets the file type string used for error messages.
    /// \return File type string
    virtual std::string getFileTypeStr() const { return "POS"; }

    /// \brief Read the data records from the file
    virtual void readRecords();

    /// \brief Internal buffer.
    std::vector<PositionsFile::Record> records_;

    /// \brief Current tile position in internal buffer
    std::vector<PositionsFile::Record>::const_iterator recordsPos_;
};

PosFile::PosFile(const boost::filesystem::path& filePath,
                 bool                           ignoreErrors)
: FileReaderBase(filePath,
                 ignoreErrors)
, PositionsFile()
, FileReader(std::ios_base::in,
             filePath,
             ignoreErrors)
, records_()
, recordsPos_()
{
    static const std::string errMsgBegin = "Ignoring read failure for POS file '";

    try
    {
        if(!openFileBuf())
        {
            return;
        }

        readRecords();

        recordsPos_ = records_.begin();
    }
    CATCH_AND_IGNORE(common::IoError, errMsgBegin)
    CATCH_AND_IGNORE(std::ios_base::failure, errMsgBegin)
    CATCH_AND_IGNORE_ALL(errMsgBegin)
}

void PosFile::readRecords()
{
    std::string line;
    std::basic_istream<char> is(&fileBuf_);

    while (std::getline(is, line))
    {
        std::vector<std::string> strs;
        boost::split(strs, line, boost::is_any_of("\t "));

        if (strs.size() != 2)
        {
            std::string errMsg = "Formatting error for POS file: " + this->getPath().string();
            if (ignoreErrors_)
            {
                BCL2FASTQ_LOG(common::LogLevel::WARNING) << errMsg << std::endl;
                return;
            }
            else
            {
                int errnum = errno;
                BOOST_THROW_EXCEPTION(common::InputDataError(errnum, errMsg));
            }
        }

        PositionsFile::Record record;
        record.x_ = boost::lexical_cast<PositionsFile::Record::ClusterCoordinate>(strs[0]);
        record.y_ = boost::lexical_cast<PositionsFile::Record::ClusterCoordinate>(strs[1]);
        records_.push_back(record);
    }
}

std::size_t PosFile::read(
    PositionsFile::Record *targetBuffer,
    std::size_t targetSize
)
{
    std::size_t recordsToRead =
        std::min(targetSize,
                 static_cast<std::size_t>(
                     std::distance(recordsPos_,
                                   std::vector<PositionsFile::Record>::const_iterator(records_.end()))));

    std::copy(recordsPos_, recordsPos_ + recordsToRead, targetBuffer);

    std::advance(recordsPos_, recordsToRead);
    return recordsToRead;
}

} // namespace detail


bool PositionsFileFactory::doesFileExist(
    const boost::filesystem::path& intensitiesDir,
    bool                           aggregateTilesFlag,
    bool                           isPatternedFlowcell,
    common::LaneNumber             laneNumber,
    common::TileNumber             tileNumber
)
{
    boost::filesystem::path positionsFilePath;
    return (binaryFilePathExists(intensitiesDir,
                                 aggregateTilesFlag,
                                 isPatternedFlowcell,
                                 laneNumber,
                                 tileNumber,
                                 "locs",
                                 positionsFilePath) ||
            binaryFilePathExists(intensitiesDir,
                                 aggregateTilesFlag,
                                 isPatternedFlowcell,
                                 laneNumber,
                                 tileNumber,
                                 "clocs",
                                 positionsFilePath) ||
            posFilePathExists(intensitiesDir,
                              laneNumber,
                              tileNumber,
                              positionsFilePath));
}

bool PositionsFileFactory::binaryFilePathExists(
    const boost::filesystem::path& intensitiesDir,
    bool                           aggregateTilesFlag,
    bool                           isPatternedFlowcell,
    common::LaneNumber             laneNumber,
    common::TileNumber             tileNumber,
    const std::string&             fileExtension,
    boost::filesystem::path&       positionsFilePath
)
{
    if( isPatternedFlowcell )
    {
        positionsFilePath = boost::filesystem::path(
            intensitiesDir
            /
            boost::filesystem::path("s." + fileExtension)
        );
    } else if( aggregateTilesFlag ) {
        positionsFilePath = boost::filesystem::path(
            intensitiesDir
            /
            boost::filesystem::path((boost::format("L%03d") % laneNumber).str())
            /
            boost::filesystem::path((boost::format("s_%d." + fileExtension) % laneNumber).str())
        );
    } else {
        positionsFilePath = boost::filesystem::path(
            intensitiesDir
            /
            boost::filesystem::path((boost::format("L%03d") % laneNumber).str())
            /
            boost::filesystem::path((boost::format("s_%d_%d." + fileExtension) % laneNumber % tileNumber).str())
        );
    }

    return boost::filesystem::exists(positionsFilePath);
}

bool PositionsFileFactory::posFilePathExists(
    const boost::filesystem::path& intensitiesDir,
    common::LaneNumber             laneNumber,
    common::TileNumber             tileNumber,
    boost::filesystem::path&       positionsFilePath
)
{
    positionsFilePath = boost::filesystem::path(
        intensitiesDir
        /
        boost::filesystem::path((boost::format("s_%d_%03d_pos.txt") % laneNumber % tileNumber).str())
    );

    return boost::filesystem::exists(positionsFilePath);
}


boost::shared_ptr<PositionsFile> PositionsFileFactory::createPositionsFile(
    const boost::filesystem::path& intensitiesDir,
    bool                           aggregateTilesFlag,
    bool                           isPatternedFlowcell,
    common::LaneNumber             laneNumber,
    common::TileNumber             tileNumber,
    bool                           ignoreErrors
)
{
    boost::filesystem::path positionsFilePath;
    if (PositionsFileFactory::binaryFilePathExists(intensitiesDir,
                                                   aggregateTilesFlag,
                                                   isPatternedFlowcell,
                                                   laneNumber,
                                                   tileNumber,
                                                   "locs",
                                                   positionsFilePath))
    {
        return boost::shared_ptr<PositionsFile>(
            new detail::LocsFile(positionsFilePath,
                                 ignoreErrors));
    }
    else if (PositionsFileFactory::binaryFilePathExists(intensitiesDir,
                                                        aggregateTilesFlag,
                                                        isPatternedFlowcell,
                                                        laneNumber,
                                                        tileNumber,
                                                        "clocs",
                                                        positionsFilePath))
    {
        return boost::shared_ptr<PositionsFile>(
            new detail::ClocsFile(positionsFilePath,
                                  ignoreErrors));
    }
    else if (PositionsFileFactory::posFilePathExists(intensitiesDir,
                                                     laneNumber,
                                                     tileNumber,
                                                     positionsFilePath))
    {
        return boost::shared_ptr<PositionsFile>(
            new detail::PosFile(positionsFilePath,
                                ignoreErrors));
    }

    return boost::shared_ptr<PositionsFile>();
}


} // namespace data


} // namespace bcl2fastq


