fast-mri/code/FROC/cal_froc_from_np.py

import argparse
import multiprocessing
import numpy as np
import SimpleITK as sitk
import time
import os
import itertools
import concurrent.futures

from typing import List
import sys
sys.path.append('./../code')
from utils_quintin import *

# from fastMRI_PCa.visualization.visualize import write_array2nifti_t2
from tensorflow.keras.models import load_model
from functools import partial
from focal_loss import BinaryFocalLoss
from scipy import ndimage
from pathlib import Path
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from sklearn.metrics import roc_curve, auc

# from fastMRI_PCa.visualization import save_slice_3d
# from fastMRI_PCa.utils import read_yaml_to_dict, list_from_file, print_stats_np
# from fastMRI_PCa.utils import print_p, get_rand_exp_decay_mask
# from fastMRI_PCa.data import resample, center_crop, normalize, undersample, binarize_s
# from fastMRI_PCa.models import ssim_loss, ssim_metric, psnr_metric

from typing import List, Tuple, Dict, Any, Union, Optional, Callable, Iterable, Hashable, Sized

try:
    import numpy.typing as npt
except ImportError:
    pass


###############################  PREPROCESSING  BLOBS  #########################
"""
Extract lesion candidates from a softmax prediction
Authors: anindox8, matinhz, joeranbosma
"""


# Preprocess Softmax Volume (Clipping, Max Confidence)
def preprocess_softmax_static(softmax: np.ndarray,
                              threshold: float = 0.10,
                              min_voxels_detection: int = 10,
                              max_prob_round_decimals: Optional[int] = 4) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
    # Load and Preprocess Softmax Image
    all_hard_blobs = np.zeros_like(softmax)
    confidences = []
    clipped_softmax = softmax.copy()
    clipped_softmax[softmax < threshold] = 0
    blobs_index, num_blobs = ndimage.label(clipped_softmax, np.ones((3, 3, 3)))

    if num_blobs > 0:  # For Each Prediction
        for tumor in range(1, num_blobs+1):
            # determine mask for current lesion
            hard_mask = np.zeros_like(blobs_index)
            hard_mask[blobs_index == tumor] = 1

            if np.count_nonzero(hard_mask) <= min_voxels_detection:
                # remove tiny detection of <= 0.009 cm^3
                blobs_index[hard_mask.astype(bool)] = 0
                continue

            # add sufficiently sized detection
            hard_blob = hard_mask * clipped_softmax
            max_prob = np.max(hard_blob)
            if max_prob_round_decimals is not None:
                max_prob = np.round(max_prob, max_prob_round_decimals)
            hard_blob[hard_blob > 0] = max_prob
            all_hard_blobs += hard_blob
            confidences.append((tumor, max_prob))
    return all_hard_blobs, confidences, blobs_index


def preprocess_softmax_dynamic(softmax: np.ndarray,
                               min_voxels_detection: int = 10,
                               num_lesions_to_extract: int = 5,
                               dynamic_threshold_factor: float = 2.5,
                               max_prob_round_decimals: Optional[int] = None,
                               remove_adjacent_lesion_candidates: bool = True,
                               max_prob_failsafe_stopping_threshold: float = 0.01) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
    """
    Generate detection proposals using a dynamic threshold to determine the location and size of lesions.
    Author: Joeran Bosma
    """
    working_softmax = softmax.copy()
    dynamic_hard_blobs = np.zeros_like(softmax)
    confidences: List[Tuple[int, float]] = []
    dynamic_indexed_blobs = np.zeros_like(softmax, dtype=int)

    while len(confidences) < num_lesions_to_extract:
        tumor_index = 1 + len(confidences)

        # determine max. softmax
        max_prob = np.max(working_softmax)

        if max_prob < max_prob_failsafe_stopping_threshold:
            break

        # set dynamic threshold to half the max
        threshold = max_prob / dynamic_threshold_factor

        # extract blobs for dynamix threshold
        all_hard_blobs, _, _ = preprocess_softmax_static(working_softmax, threshold=threshold,
                                                         min_voxels_detection=min_voxels_detection,
                                                         max_prob_round_decimals=max_prob_round_decimals)

        # select blob with max. confidence
        # note: the max_prob is re-computed in the (unlikely) case that the max. prob
        # was inside a 'lesion candidate' of less than min_voxels_detection, which is
        # thus removed in preprocess_softmax_static.
        max_prob = np.max(all_hard_blobs)
        mask_current_lesion = (all_hard_blobs == max_prob)

        # ensure that mask is only a single lesion candidate (this assumption fails when multiple lesions have the same max. prob)
        mask_current_lesion_indexed, _ = ndimage.label(mask_current_lesion, np.ones((3, 3, 3)))
        mask_current_lesion = (mask_current_lesion_indexed == 1)

        # create mask with its confidence
        hard_blob = (all_hard_blobs * mask_current_lesion)

        # Detect whether the extractted mask is a ring/hollow sphere
        # around an existing lesion candidate. For confident lesions,
        # the surroundings of the prediction are still quite confident,
        # and can become a second 'detection'. For an # example, please
        # see extracted lesion candidates nr. 4 and 5 at:
        # https://repos.diagnijmegen.nl/trac/ticket/9299#comment:49
        # Detection method: grow currently extracted lesions by one voxel,
        # and check if they overlap with the current extracted lesion.
        extracted_lesions_grown = ndimage.morphology.binary_dilation(dynamic_hard_blobs > 0)
        current_lesion_has_overlap = (mask_current_lesion & extracted_lesions_grown).any()

        # Check if lesion candidate should be retained
        if (not remove_adjacent_lesion_candidates) or (not current_lesion_has_overlap):
            # store extracted lesion
            dynamic_hard_blobs += hard_blob
            confidences += [(tumor_index, max_prob)]
            dynamic_indexed_blobs += (mask_current_lesion * tumor_index)

        # remove extracted lesion from working-softmax
        working_softmax = (working_softmax * (~mask_current_lesion))

    return dynamic_hard_blobs, confidences, dynamic_indexed_blobs


def preprocess_softmax(softmax: np.ndarray,
                       threshold: Union[str, float] = 0.10,
                       min_voxels_detection: int = 10,
                       num_lesions_to_extract: int = 5,
                       dynamic_threshold_factor: float = 2.5,
                       max_prob_round_decimals: Optional[int] = None,
                       remove_adjacent_lesion_candidates: bool = True) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
    """
    Generate detection proposals using a dynamic or static threshold to determine the size of lesions.
    """
    if threshold == 'dynamic':
        all_hard_blobs, confidences, indexed_pred = preprocess_softmax_dynamic(softmax, min_voxels_detection=min_voxels_detection,
                                                                               dynamic_threshold_factor=dynamic_threshold_factor,
                                                                               num_lesions_to_extract=num_lesions_to_extract,
                                                                               remove_adjacent_lesion_candidates=remove_adjacent_lesion_candidates,
                                                                               max_prob_round_decimals=max_prob_round_decimals)
    elif threshold == 'dynamic-fast':
        # determine max. softmax and set a per-case 'static' threshold based on that
        max_prob = np.max(softmax)
        threshold = float(max_prob / dynamic_threshold_factor)
        all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
                                                                              min_voxels_detection=min_voxels_detection,
                                                                              max_prob_round_decimals=max_prob_round_decimals)
    else:
        threshold = float(threshold)  # convert threshold to float, if it wasn't already
        all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
                                                                              min_voxels_detection=min_voxels_detection,
                                                                              max_prob_round_decimals=max_prob_round_decimals)

    return all_hard_blobs, confidences, indexed_pred


################################################################################


# Evaluate all cases
def evaluate(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    min_overlap=0.10,
    overlap_func: str = 'DSC',
    case_confidence: str = 'max',
    multiple_lesion_candidates_selection_criteria='overlap',
    allow_unmatched_candidates_with_minimal_overlap=True,
    flat: Optional[bool] = None
    ) -> Dict[str, Any]:

    # Make list out of numpy array so that it can be mapped in parallel with multiple CPUs.
    y_true_list = y_true #[y_true[mri_idx] for mri_idx in range(y_true.shape[0])]
    y_pred_list = y_pred #[y_pred[mri_idx] for mri_idx in range(y_pred.shape[0])]

    roc_true, roc_pred = {}, {}
    y_list = []
    num_lesions = 0

    subject_idxs = list(range(len(y_true_list)))
    N_CPUS = 2
    with ThreadPoolExecutor(max_workers=N_CPUS) as pool:
        # define the functions that need to be processed: compute_pred_vector, with each individual
        # detection_map prediction, ground truth label and parameters
        future_to_args = {
            pool.submit(evaluate_case,
                        y_true,
                        y_pred,
                        min_overlap=min_overlap,
                        overlap_func=overlap_func,
                        multiple_lesion_candidates_selection_criteria=multiple_lesion_candidates_selection_criteria,
                        allow_unmatched_candidates_with_minimal_overlap=allow_unmatched_candidates_with_minimal_overlap): idx
            for (y_true, y_pred, idx) in zip(y_true_list, y_pred_list, subject_idxs)
        }

        # process the cases in parallel
        iterator = concurrent.futures.as_completed(future_to_args)
        iterator = tqdm(iterator, desc='Computing FROC', total=len(y_true_list))
        for future in iterator:
            try:
                res = future.result()
            except Exception as e:
                print(f"Exception: {e}")
            else:
                # unpack results
                y_list_pat, num_lesions_gt = res
                # note: y_list_pat contains: is_lesion, confidence[, Dice, gt num voxels]

                # aggregate results
                idx = future_to_args[future]
                roc_true[idx] = np.max([a[0] for a in y_list_pat])
                if case_confidence == 'max':
                    # take highest lesion confidence as case-level confidence
                    roc_pred[idx] = np.max([a[1] for a in y_list_pat])
                elif case_confidence == 'bayesian':
                    # if a_i is the probability the i-th lesion is csPCa, then the case-level
                    # probability to have any csPCa lesion is 1 - Π_i{ 1 - a_i}
                    roc_pred[idx] = 1 - np.prod([(1-a[1]) for a in y_list_pat])
                else:
                    raise ValueError(f"Patient confidence calculation method not recognised. Got: {case_confidence}.")

                # accumulate outputs
                y_list += y_list_pat
                num_lesions += num_lesions_gt

    # calculate statistics
    num_patients = len(roc_true)

    # get lesion-level results
    sensitivity, FP_per_case, thresholds, num_lesions = froc_from_lesion_evaluations(
        y_list=y_list, num_patients=num_patients
    )

    # calculate recall, precision and average precision
    AP, precision, recall, _ = ap_from_lesion_evaluations(y_list, thresholds=thresholds)

    # calculate case-level AUROC
    fpr, tpr, _ = roc_curve(y_true=[roc_true[s] for s in subject_idxs],
                            y_score=[roc_pred[s] for s in subject_idxs],
                            pos_label=1)
    auc_score = auc(fpr, tpr)

    flat = True
    if flat:
        # flatten roc_true and roc_pred
        roc_true_flat = [roc_true[s] for s in subject_idxs]
        roc_pred_flat = [roc_pred[s] for s in subject_idxs]

    metrics = {
        "FP_per_case": convert_np_to_list(FP_per_case),
        "sensitivity": convert_np_to_list(sensitivity),
        "thresholds": convert_np_to_list(thresholds),
        "num_lesions": int(num_lesions),
        "num_patients": int(num_patients),
        "roc_true": convert_np_to_list(roc_true_flat if flat else roc_true),
        "roc_pred": convert_np_to_list(roc_pred_flat if flat else roc_pred),
        "AP": float(AP),
        "precision": convert_np_to_list(precision),
        "recall": convert_np_to_list(recall),

        # patient-level predictions
        'auroc': float(auc_score),
        'tpr': convert_np_to_list(tpr),
        'fpr': convert_np_to_list(fpr),
    }

    return metrics


def convert_np_to_list(flat_numpy_arr):
    ans = []
    for elem in flat_numpy_arr:
        ans.append(float(elem))
    return ans


# Calculate FROC metrics (FP rate, sensitivity)
def froc_from_lesion_evaluations(y_list, num_patients, thresholds=None):
    # calculate counts
    TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
        y_list=y_list, thresholds=thresholds
    )

    # calculate FROC metrics from counts
    sensitivity = TP / num_lesions if num_lesions > 0 else np.nan
    FP_per_case = FP / num_patients

    return sensitivity, FP_per_case, thresholds, num_lesions


def ap_from_lesion_evaluations(y_list, thresholds=None):
    # calculate counts
    TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
        y_list=y_list, thresholds=thresholds
    )

    # calculate precision (lesion-level)
    precision = TP / (TP + FP)
    precision = np.append(precision, [0])

    # calculate recall (lesion-level)
    FN = num_lesions - TP
    recall = TP / (TP + FN)
    recall = np.append(recall, recall[-1:])

    # calculate average precission (lesion-level)
    AP = np.trapz(y=precision, x=recall)

    return AP, precision, recall, thresholds


# Calculate macro metrics (true positives (TP), false positives (FP))
def counts_from_lesion_evaluations(
    y_list: List[Tuple[int, float, float]],
    thresholds: "Optional[npt.NDArray[np.float64]]" = None
) -> "Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32], npt.NDArray[np.float64], int]":
    """
    Calculate true positives (TP) and false positives (FP) as function of threshold,
    based on the case evaluations from `evaluate_case`.
    """
    # sort predictions
    y_list.sort()

    # collect targets and predictions
    y_true: "npt.NDArray[np.float64]" = np.array([target for target, *_ in y_list])
    y_pred: "npt.NDArray[np.float64]" = np.array([pred for _, pred, *_ in y_list])

    # calculate number of lesions
    num_lesions = y_true.sum()

    if thresholds is None:
        # collect thresholds for FROC analysis
        thresholds = np.unique(y_pred)
        thresholds[::-1].sort()  # sort thresholds in descending order (inplace)

        # for >10,000 thresholds: resample to 10,000 unique thresholds, while also
        # keeping all thresholds higher than 0.8 and the first 20 thresholds
        if len(thresholds) > 10_000:
            rng = np.arange(1, len(thresholds), len(thresholds)/10_000, dtype=np.int32)
            st = [thresholds[i] for i in rng]
            low_thresholds = thresholds[-20:]
            thresholds = np.array([t for t in thresholds if t > 0.8 or t in st or t in low_thresholds])

    # define placeholders
    FP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
    TP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)

    # for each threshold: count FPs and calculate the sensitivity
    for i, th in enumerate(thresholds):
        if th > 0:
            y_pred_thresholded = (y_pred >= th).astype(int)
            tp = np.sum(y_true*y_pred_thresholded)
            fp = np.sum(y_pred_thresholded - y_true*y_pred_thresholded)

            # update FROC with new point
            FP[i] = fp
            TP[i] = tp
        else:
            # extend FROC curve to infinity
            # TP[i] = TP[-2] #note: aangepast stefan 11-04-2022
            TP[i] = 1
            FP[i] = np.inf

    return TP, FP, thresholds, num_lesions


# Compute base prediction metrics TP/FP/FN with associated model confidences
def evaluate_case(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    min_overlap: float = 0.10,
    overlap_func: str = 'DSC',
    multiple_lesion_candidates_selection_criteria: str = 'overlap',
    allow_unmatched_candidates_with_minimal_overlap: bool = True
    ) -> Tuple[List[Tuple[int, float, float]], int]:
    """
    Gather the list of lesion candidates, and classify in TP/FP/FN.
    - multiple_lesion_candidates_selection_criteria: when multiple lesion candidates have overlap with the same
                                                     ground truth mask, use 'overlap' or 'confidence' to choose
                                                     which lesion is matched against the ground truth mask.

    Returns:
    - a list of tuples with:
        (is_lesion, prediction confidence, Dice similarity coefficient, number of voxels in label)
    - number of ground truth lesions
    """
    y_list: List[Tuple[int, float, float]] = []

    if overlap_func == 'IoU':
        overlap_func = calculate_iou
    elif overlap_func == 'DSC':
        overlap_func = calculate_dsc
    else:
        raise ValueError(f"Overlap function with name {overlap_func} not recognized. Supported are 'IoU' and 'DSC'")

    # convert dtype to float32
    y_true = y_true.astype('int32')
    y_pred = y_pred.astype('float32')

    if y_pred.shape[0] < y_true.shape[0]:
        print("Warning: padding prediction to match label!")
        y_pred = resize_image_with_crop_or_pad(y_pred, y_true.shape)


    confidences, indexed_pred = parse_detection_map(y_pred)

    lesion_candidates_best_overlap: Dict[str, float] = {}

    if y_true.any():
        # for each malignant scan
        labeled_gt, num_gt_lesions = ndimage.label(y_true,  np.ones((3, 3, 3)))

        for lesiong_id in range(1, num_gt_lesions+1):
            # for each lesion in ground-truth (GT) label
            gt_lesion_mask = (labeled_gt == lesiong_id)

            # collect indices of lesion candidates that have any overlap with the current GT lesion
            overlapping_lesion_candidate_indices = set(np.unique(indexed_pred[gt_lesion_mask]))
            overlapping_lesion_candidate_indices -= {0}  # remove index 0, if present

            # collect lesion candidates for current GT lesion
            lesion_candidates_for_target_gt: List[Dict[str, Union[int, float]]] = []
            for lesion_candidate_id, lesion_confidence in confidences:
                if lesion_candidate_id in overlapping_lesion_candidate_indices:
                    # calculate overlap between lesion candidate and GT mask
                    lesion_pred_mask = (indexed_pred == lesion_candidate_id)
                    overlap_score = overlap_func(lesion_pred_mask, gt_lesion_mask)

                    # keep track of the highest overlap a lesion candidate has with any GT lesion
                    lesion_candidates_best_overlap[lesion_candidate_id] = max(
                        overlap_score, lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
                    )

                    # store lesion candidate info for current GT mask
                    lesion_candidates_for_target_gt.append({
                        'id': lesion_candidate_id,
                        'confidence': lesion_confidence,
                        'overlap': overlap_score,
                    })
            print(lesion_candidates_for_target_gt)

            if len(lesion_candidates_for_target_gt) == 0:
                # no lesion candidate matched with GT mask. Add FN.
                y_list.append((1, 0., 0.))
            elif len(lesion_candidates_for_target_gt) == 1:
                # single lesion candidate overlapped with GT mask. Add TP if overlap is sufficient, or FN otherwise.
                candidate_info = lesion_candidates_for_target_gt[0]
                lesion_pred_mask = (indexed_pred == candidate_info['id'])

                if candidate_info['overlap'] > min_overlap:
                    # overlap between lesion candidate and GT mask is sufficient, add TP
                    indexed_pred[lesion_pred_mask] = 0  # remove lesion candidate after assignment
                    y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
                else:
                    # overlap between lesion candidate and GT mask is insufficient, add FN
                    y_list.append((1, 0., 0.))
            else:
                # multiple predictions for current GT lesion
                # sort lesion candidates based on overlap or confidence
                key = multiple_lesion_candidates_selection_criteria
                lesion_candidates_for_target_gt = sorted(lesion_candidates_for_target_gt, key=lambda x: x[key], reverse=True)

                gt_lesion_matched = False
                for candidate_info in lesion_candidates_for_target_gt:
                    lesion_pred_mask = (indexed_pred == candidate_info['id'])

                    if candidate_info['overlap'] > min_overlap:
                        indexed_pred[lesion_pred_mask] = 0
                        y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
                        gt_lesion_matched = True
                        break

                if not gt_lesion_matched:
                    # ground truth lesion not matched to a lesion candidate. Add FN.
                    y_list.append((1, 0., 0.))

        # Remaining lesions are FPs
        remaining_lesions = set(np.unique(indexed_pred))
        remaining_lesions -= {0}  # remove index 0, if present
        for lesion_candidate_id, lesion_confidence in confidences:
            if lesion_candidate_id in remaining_lesions:
                overlap_score = lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
                if allow_unmatched_candidates_with_minimal_overlap and overlap_score > min_overlap:
                    # The lesion candidate was not matched to a GT lesion, but did have overlap > min_overlap
                    # with a GT lesion. The GT lesion is, however, matched to another lesion candidate.
                    # In this operation mode, this lesion candidate is not considered as a false positive.
                    pass
                else:
                    y_list.append((0, lesion_confidence, 0.))  # add FP

    else:
        # for benign case, all predictions are FPs
        num_gt_lesions = 0
        if len(confidences) > 0:
            for _, lesion_confidence in confidences:
                y_list.append((0, lesion_confidence, 0.))
        else:
            y_list.append((0, 0., 0.))  # avoid empty list

    return y_list, num_gt_lesions


# Calculate Intersection over Union (IoU) for N-D Arrays
def calculate_iou(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
    epsilon = 1e-8
    iou_num = np.sum(predictions[labels == 1])
    iou_denom = np.sum(predictions) + np.sum(labels) - iou_num
    return float((iou_num + epsilon) / (iou_denom + epsilon))


# Calculate Dice Similarity Coefficient (DSC) for N-D Arrays
def calculate_dsc(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
    epsilon = 1e-8
    dsc_num = np.sum(predictions[labels == 1]) * 2.0
    dsc_denom = np.sum(predictions) + np.sum(labels)
    return float((dsc_num + epsilon) / (dsc_denom + epsilon))


# Resize images (scans/predictions/labels) by cropping and/or padding [Ref: https://github.com/DLTK/DLTK]
def resize_image_with_crop_or_pad(image, img_size=(64, 64, 64), **kwargs):
    assert isinstance(image, np.ndarray)
    assert (image.ndim - 1 == len(img_size) or image.ndim == len(img_size)), \
        "Target size doesn't fit image size"

    rank = len(img_size)  # image dimensions

    # placeholders for new shape
    from_indices = [[0, image.shape[dim]] for dim in range(rank)]
    to_padding = [[0, 0] for _ in range(rank)]
    slicer = [slice(None)] * rank

    # for each dimension, determine process (cropping or padding)
    for i in range(rank):
        if image.shape[i] < img_size[i]:
            to_padding[i][0] = (img_size[i] - image.shape[i]) // 2
            to_padding[i][1] = img_size[i] - image.shape[i] - to_padding[i][0]
        else:
            from_indices[i][0] = int(np.floor((image.shape[i] - img_size[i]) / 2.))
            from_indices[i][1] = from_indices[i][0] + img_size[i]

        # create slicer object to crop/leave each dimension
        slicer[i] = slice(from_indices[i][0], from_indices[i][1])

    # pad cropped image to extend missing dimension
    return np.pad(image[tuple(slicer)], to_padding, **kwargs)


# Parse Detection Maps to Individual Lesions + Likelihoods
def parse_detection_map(detection_map):
    # Label All Non-Connected Components in Detection Map
    blobs_index, num_blobs = ndimage.label(detection_map, np.ones((3, 3, 3)))

    confidences = []
    if num_blobs > 0:
        # For Each Lesion Detection
        for tumor_index in range(1, num_blobs+1):

            # Extract Mask of Current Lesion
            # hard_blob = np.zeros_like(blobs_index)
            # hard_blob[blobs_index == tumor] = 1
            # TODO: replace above with the following? Is faster I think.
            # hard_blob = (blobs_index == tumor).astype(int)

            # Extract Max Predicted Likelihood for Lesion Detection
            # max_prob = np.max(hard_blob)  # <- this is always 1
            # hard_blob[hard_blob > 0] = max_prob  # <- this line does nothing, as hard_blob is not used

            # Store Predicted Likelihood per Lesion Detection
            max_prob = detection_map[blobs_index == tumor_index].max()
            confidences.append((tumor_index, max_prob))
    return confidences, blobs_index

###############################  CONSTANTS  ####################################
# Quitin code removed
###############################  CONSTANTS  ####################################

def move_dims(arr):
    # UMCG numpy dimensions convention: dims = (batch, width, heigth, depth)
    # Joeran numpy dimensions convention: dims = (batch, depth, heigth, width)
    arr = np.moveaxis(arr, 3, 1) # Joeran has his numpy arrays ordered differently.
    arr = np.moveaxis(arr, 3, 2)
    return arr

###############################  CONSTANTS  ####################################


# # Reconstruction model directory
# RECON_DIR = "11_new_recon_multiprocess"
# T2_MODEL_PATH = f"models/prelims/{RECON_DIR}/best-direct-fold0.h5"

# # Path to file with train, validation and test indexes.
# IDX_YAML_PATH = r"data/path_lists/pirads_4plus/train_val_test_idxs.yml"

# # Diagnostic models
# diag_models = [r"models/14_long_diag/best-direct-fold0.h5",
#                r"models/15_long_diag_early_stopping/best-direct-fold0.h5"]


# # x_true_val = np.load("temp/x_true_val.npy")
# print("loading numpy arrays")
# y_true_val = np.squeeze(np.load("temp/y_true_val_label.npy"))
# y_pred_val = np.squeeze(np.load("temp/y_pred_val_detection_map.npy"))
# y_true_val = move_dims(y_true_val)
# y_pred_val = move_dims(y_pred_val)

# # Preprocess the blobs individually
# for mri_idx in range(y_pred_val.shape[0]):
#     print(f"preprocessing mri idx {mri_idx}")
#     y_pred_val[mri_idx] = preprocess_softmax(y_pred_val[mri_idx], threshold="dynamic")[0]

# print(f"Start FROC evaluation.")
# metrics = evaluate(y_true=y_true_val, y_pred=y_pred_val)

# dump_dict_to_yaml(metrics, 'temp', "froc_metrics", verbose=True)