import csv
import pickle  # nosec
import sys
from argparse import ArgumentParser
from collections import defaultdict
from pathlib import Path
from typing import Any, List, Optional

import numpy
import pandas as pd
import numpy as np
import torch
from loguru import logger
from torch.utils.data import DataLoader

from data import SiteType, TileFeatureTensorDataset

# Constants
BATCH_SIZE = 8
NUM_WORKERS = 16


class UsageError(Exception):
    """A UsageError is raised when there's a problem with the command-line arguments."""

    pass


def load_model_map(model_map_path: str) -> dict[Any, Any]:
    """Load the table mapping histologies and targets to the paladin
    model (a pickle file) that predicts that target for that cancer subtype.

    A dict is returned, mapping each histology to a table mapping a
    target to the pathname for the model that predicts it.
    """
    models = defaultdict(dict)
    with Path(model_map_path).open() as fp:
        rdr = csv.DictReader(fp)
        for row in rdr:
            histology = row["cancer_subtype"]
            target = row["target_name"]
            model = row["model_path"]
            models[histology][target] = model
    return models


def load_aeon_scores(df: pd.DataFrame) -> dict[str, float]:
    """Load the output table from a single-slide Aeon run, listing Oncotree
    histologies and their confidence values.

    A dict is returned, mapping each histology to its confidence score.
    """
    score = {}
    for _, row in df.iterrows():
        subtype = row["Cancer Subtype"]
        confidence = row["Confidence"]
        score[subtype] = confidence
    return score


def select_histologies(aeon_scores: dict[str, float]) -> list[str]:
    """Return the three top-scoring histologies, based on the given Aeon scores."""
    sorted_histologies = list(
        sorted([(v, k) for k, v in aeon_scores.items()], reverse=True)
    )
    return [histology for score, histology in sorted_histologies[:3]]


def select_models(histologies: list[str], model_map: dict[Any, Any]) -> list[Any]:
    """ """
    models = []
    for histology, target, model in model_map.items():
        if histology in histologies:
            models.append((histology, target, model))
    return models


def run_model(device, dataset, model_path: str, num_workers, batch_size) -> float:
    """Run inference for the given embeddings and model.
    The point estimate is returned.
    """

    logger.debug(f"[loading model {model_path}]")
    with Path(model_path).open("rb") as f:
        model = pickle.load(f)  # nosec
        # model = CPU_Unpickler(f).load()  # nosec
    model.to(device)
    model.eval()

    dataloader = DataLoader(
        dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers
    )

    results_df = []
    batch = next(iter(dataloader))
    with torch.no_grad():
        batch["tile_tensor"] = batch["tile_tensor"].to(device)
        outputs = model(batch)

        logits = outputs["logits"]
        # Apply softplus to ensure positive values for beta-binomial parameters
        logits = torch.nn.functional.softplus(logits) + 1.0  # enforce concavity
        point_estimates = logits_to_point_estimates(logits)

        # sample_id = batch['sample_id'][0]
        class_assignment = point_estimates[0].item()
    return class_assignment


def logits_to_point_estimates(logits):
    # logits is a tensor of shape (batch_size, 2 * (n_clf_tasks + n_reg_tasks))
    # need to convert it to a tensor of shape (batch_size, n_clf_tasks + n_reg_tasks)
    return logits[:, ::2] / (logits[:, ::2] + logits[:, 1::2])


def run_paladin(
    features: np.ndarray,
    aeon_results: Optional[pd.DataFrame] = None,
    histology_codes: List[str] = None,
    model_map_path: str = None,
    model_path: str = None,
    metastatic: bool = False,
    batch_size: int = BATCH_SIZE,
    num_workers: int = NUM_WORKERS,
    use_cpu: bool = False,
):
    """Run Paladin inference on a single slide, using the given embeddings
    and either a single model or a table mapping histologies and targets to models.
    If histology_codes is given, it is a list of OncoTree codes for the slide.
    If aeon_predictions_path is given, it is the pathname to a CSV file
    with the output of an Aeon run on the slide.
    If both are given, an error is raised.
    The output is written to the given output_path (a CSV file).
    """

    if aeon_results is not None:
        aeon_scores = load_aeon_scores(aeon_results)
        target_histologies = select_histologies(aeon_scores)
    else:
        target_histologies = histology_codes

    # Build a dataset to feed to the model
    site = SiteType.METASTASIS if metastatic else SiteType.PRIMARY

    dataset = TileFeatureTensorDataset(
        tile_features=features,
        site_type=site,
        n_max_tiles=20000,
    )

    device = torch.device(
        "cuda" if not use_cpu and torch.cuda.is_available() else "cpu"
    )

    results = []
    if model_path:
        histology, target = "None", "None"
        try:
            score = run_model(device, dataset, model_path, num_workers, batch_size)
            results.append((histology, target, score))
            logger.info(f"histology: {histology}  target: {target}  score: {score}")
        except Exception as exc:
            logger.error(f"Unable to run model for {histology} target {target}\n{exc}")

    elif model_map_path:
        model_map = load_model_map(model_map_path)
        for histology in target_histologies:
            if histology not in model_map:
                logger.warning(f"Warning: no models found for {histology}")
                continue

            for target, model in sorted(model_map[histology].items()):
                try:
                    score = run_model(device, dataset, model, num_workers, batch_size)
                    results.append((histology, target, score))
                    logger.info(
                        f"histology: {histology}  target: {target}  score: {score}"
                    )
                except Exception as exc:
                    logger.error(
                        f"Unable to run model for {histology} target {target}\n{exc}"
                    )
    df = pd.DataFrame(results, columns=["Cancer Subtype", "Biomarker", "Score"])
    return df


def parse_args():
    parser = ArgumentParser(description="Run Paladin inference on a single slide")
    parser.add_argument(
        "-i",
        "--features-path",
        required=True,
        help="Pathname to a .pt file with optimus embeddings for this slide",
    )
    parser.add_argument(
        "-o",
        "--output-path",
        help="The filename for the Paladin predictions file (CSV)",
        required=True,
    )
    parser.add_argument(
        "-c",
        "--histology-codes",
        help="One or more histologies (OncoTree codes, comma-separated)",
    )
    parser.add_argument(
        "-a",
        "--aeon-predictions-path",
        help="Pathname to an aeon-predictions file (CSV) for this slide",
    )
    parser.add_argument(
        "-mm",
        "--model-map-path",
        help="A CSV file mapping histologies and targets to Paladin models (.pkl files). Contains columns 'histology', 'target_name', and 'model_path'.",
    )
    parser.add_argument(
        "-m",
        "--model-path",
        help="The filename for a Paladin model to run inference with",
    )
    parser.add_argument(
        "--metastatic", action="store_true", help="Tissue is from a metastatic site"
    )
    parser.add_argument("--batch-size", type=int, default=BATCH_SIZE, help="Batch size")
    parser.add_argument(
        "--num-workers",
        type=int,
        default=NUM_WORKERS,
        help="Number of workers for data loading",
    )
    parser.add_argument("--use-cpu", action="store_true", help="Use CPU")
    opt = parser.parse_args()

    if opt.histology_codes and opt.aeon_predictions_path:
        raise UsageError(
            "You may specify either --codes or --aeon-predictions-path, but not both."
        )

    if opt.histology_codes:
        opt.histology_codes = opt.histology_codes.split(",")

    if opt.model_path is None and opt.model_map_path is None:
        raise UsageError("You must specify either --model-path or --model-map-path")

    return opt


def main():
    opt = parse_args()
    features = torch.load(opt.features_path)
    logger.info(f"Loaded features from {opt.features_path}")
    aeon_results = None
    if opt.aeon_predictions_path:
        aeon_results = pd.read_csv(opt.aeon_predictions_path)
        logger.info(f"Loaded Aeon results from {opt.aeon_predictions_path}")
    df = run_paladin(
        features=features,
        aeon_results=aeon_results,
        histology_codes=opt.histology_codes,
        model_map_path=opt.model_map_path,
        model_path=opt.model_path,
        metastatic=opt.metastatic,
        batch_size=opt.batch_size,
        num_workers=opt.num_workers,
        use_cpu=opt.use_cpu,
    )
    df.to_csv(opt.output_path, index=False)
    logger.info(f"Wrote {opt.output_path}")


if __name__ == "__main__":
    main()