import os
import re
from collections import defaultdict
from dataclasses import dataclass
from os import PathLike
from typing import List, Tuple, Dict, Union

FQ_PATTERN = re.compile(r'(.+_S[0-9]+)_L([0-9]{3})_R([1-2])_([0-9]{3})$')


@dataclass
class Fastq:
    rgid: str
    lane: int
    read1: str = ''
    read2: str = ''


def write_fastq_list(
        samples: Dict[str, List[Fastq]],
        output_dir: Union[str, bytes, PathLike],
        fastq_list_file: str
) -> str:
    sample_sheet_header = 'RGID,RGSM,RGLB,Lane,Read1File,Read2File\n'
    sample_sheet_formatter = '{rgid},{rgsm},{rglb},{lane},{r1},{r2}\n'
    sample_sheet_str = sample_sheet_header
    for sample_name, fastqs in samples.items():
        for fastq in fastqs:
            sample_sheet_str += sample_sheet_formatter.format(
                rgid=fastq.rgid,
                rgsm=sample_name,
                rglb='RGLB',
                lane=fastq.lane,
                r1=fastq.read1,
                r2=fastq.read2
            )

    sample_sheet_file = os.path.join(output_dir, fastq_list_file)
    with open(sample_sheet_file, 'w') as f:
        f.write(sample_sheet_str)

    return sample_sheet_file


def get_basename(filename: str) -> str:
    """/a/b/c/basename.ext1.ext2 -> basename"""
    fname = filename.split('/')[-1]
    return '.'.join(fname.split('.', )[0:-2])


def parse_fastq(filename: str) -> Tuple[str, int, int, str]:
    """Returns (sample_name, lane, read_number, sample_id)"""
    name = get_basename(filename)
    re_match = FQ_PATTERN.match(name)
    try:
        return (
            re_match.group(1),
            int(re_match.group(2)),
            int(re_match.group(3)),
            f'{re_match.group(1)}_L{re_match.group(2)}_{re_match.group(4)}'
        )
    except Exception:
        raise ValueError(f'Could not parse FASTQ file name {filename}')


_READ_MAPPING = {
    1: 'read1',
    2: 'read2'
}


def construct_samples(fastq_files: List[str]) -> Dict[str, List[Fastq]]:
    samples: Dict[str, List[Fastq]] = defaultdict(list)
    for fastq_file in fastq_files:
        sample_name, lane, read_number, sample_id = parse_fastq(fastq_file)
        existing = next(
            (fq for fq in samples[sample_name] if fq.lane == lane and fq.rgid == sample_id), None
        )
        if existing:
            try:
                if existing.__dict__[_READ_MAPPING[read_number]] != '':
                    raise ValueError(f'Sample ID for {fastq_file} is not unique')
                existing.__dict__[_READ_MAPPING[read_number]] = fastq_file
            except KeyError:
                raise ValueError(f'The read number from {fastq_file} was not 1 or 2')
        else:
            samples[sample_name].append(Fastq(
                rgid=sample_id,
                lane=lane,
                **{f'read{read_number}': fastq_file}
            ))

    return samples


def process_fastqs(
        fastq_files: List[str],
        output_dir: Union[str, bytes, PathLike],
        fastq_list_file: str = 'fastq_list.csv'
) -> Tuple[List[str], str]:
    """Writes a FASTQ list, returns a list of sample names and the FASTQ list"""
    samples = construct_samples(fastq_files)
    fq_list = write_fastq_list(samples, output_dir, fastq_list_file)
    return list(samples.keys()), fq_list
