import argparse import multiprocessing import numpy as np import SimpleITK as sitk import time import os import itertools import concurrent.futures from typing import List import sys sys.path.append('./../code') from utils_quintin import * # from fastMRI_PCa.visualization.visualize import write_array2nifti_t2 from tensorflow.keras.models import load_model from functools import partial from focal_loss import BinaryFocalLoss from scipy import ndimage from pathlib import Path from tqdm import tqdm from concurrent.futures import ThreadPoolExecutor from sklearn.metrics import roc_curve, auc # from fastMRI_PCa.visualization import save_slice_3d # from fastMRI_PCa.utils import read_yaml_to_dict, list_from_file, print_stats_np # from fastMRI_PCa.utils import print_p, get_rand_exp_decay_mask # from fastMRI_PCa.data import resample, center_crop, normalize, undersample, binarize_s # from fastMRI_PCa.models import ssim_loss, ssim_metric, psnr_metric from typing import List, Tuple, Dict, Any, Union, Optional, Callable, Iterable, Hashable, Sized try: import numpy.typing as npt except ImportError: pass ############################### PREPROCESSING BLOBS ######################### """ Extract lesion candidates from a softmax prediction Authors: anindox8, matinhz, joeranbosma """ # Preprocess Softmax Volume (Clipping, Max Confidence) def preprocess_softmax_static(softmax: np.ndarray, threshold: float = 0.10, min_voxels_detection: int = 10, max_prob_round_decimals: Optional[int] = 4) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]: # Load and Preprocess Softmax Image all_hard_blobs = np.zeros_like(softmax) confidences = [] clipped_softmax = softmax.copy() clipped_softmax[softmax < threshold] = 0 blobs_index, num_blobs = ndimage.label(clipped_softmax, np.ones((3, 3, 3))) if num_blobs > 0: # For Each Prediction for tumor in range(1, num_blobs+1): # determine mask for current lesion hard_mask = np.zeros_like(blobs_index) hard_mask[blobs_index == tumor] = 1 if np.count_nonzero(hard_mask) <= min_voxels_detection: # remove tiny detection of <= 0.009 cm^3 blobs_index[hard_mask.astype(bool)] = 0 continue # add sufficiently sized detection hard_blob = hard_mask * clipped_softmax max_prob = np.max(hard_blob) if max_prob_round_decimals is not None: max_prob = np.round(max_prob, max_prob_round_decimals) hard_blob[hard_blob > 0] = max_prob all_hard_blobs += hard_blob confidences.append((tumor, max_prob)) return all_hard_blobs, confidences, blobs_index def preprocess_softmax_dynamic(softmax: np.ndarray, min_voxels_detection: int = 10, num_lesions_to_extract: int = 5, dynamic_threshold_factor: float = 2.5, max_prob_round_decimals: Optional[int] = None, remove_adjacent_lesion_candidates: bool = True, max_prob_failsafe_stopping_threshold: float = 0.01) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]: """ Generate detection proposals using a dynamic threshold to determine the location and size of lesions. Author: Joeran Bosma """ working_softmax = softmax.copy() dynamic_hard_blobs = np.zeros_like(softmax) confidences: List[Tuple[int, float]] = [] dynamic_indexed_blobs = np.zeros_like(softmax, dtype=int) while len(confidences) < num_lesions_to_extract: tumor_index = 1 + len(confidences) # determine max. softmax max_prob = np.max(working_softmax) if max_prob < max_prob_failsafe_stopping_threshold: break # set dynamic threshold to half the max threshold = max_prob / dynamic_threshold_factor # extract blobs for dynamix threshold all_hard_blobs, _, _ = preprocess_softmax_static(working_softmax, threshold=threshold, min_voxels_detection=min_voxels_detection, max_prob_round_decimals=max_prob_round_decimals) # select blob with max. confidence # note: the max_prob is re-computed in the (unlikely) case that the max. prob # was inside a 'lesion candidate' of less than min_voxels_detection, which is # thus removed in preprocess_softmax_static. max_prob = np.max(all_hard_blobs) mask_current_lesion = (all_hard_blobs == max_prob) # ensure that mask is only a single lesion candidate (this assumption fails when multiple lesions have the same max. prob) mask_current_lesion_indexed, _ = ndimage.label(mask_current_lesion, np.ones((3, 3, 3))) mask_current_lesion = (mask_current_lesion_indexed == 1) # create mask with its confidence hard_blob = (all_hard_blobs * mask_current_lesion) # Detect whether the extractted mask is a ring/hollow sphere # around an existing lesion candidate. For confident lesions, # the surroundings of the prediction are still quite confident, # and can become a second 'detection'. For an # example, please # see extracted lesion candidates nr. 4 and 5 at: # https://repos.diagnijmegen.nl/trac/ticket/9299#comment:49 # Detection method: grow currently extracted lesions by one voxel, # and check if they overlap with the current extracted lesion. extracted_lesions_grown = ndimage.morphology.binary_dilation(dynamic_hard_blobs > 0) current_lesion_has_overlap = (mask_current_lesion & extracted_lesions_grown).any() # Check if lesion candidate should be retained if (not remove_adjacent_lesion_candidates) or (not current_lesion_has_overlap): # store extracted lesion dynamic_hard_blobs += hard_blob confidences += [(tumor_index, max_prob)] dynamic_indexed_blobs += (mask_current_lesion * tumor_index) # remove extracted lesion from working-softmax working_softmax = (working_softmax * (~mask_current_lesion)) return dynamic_hard_blobs, confidences, dynamic_indexed_blobs def preprocess_softmax(softmax: np.ndarray, threshold: Union[str, float] = 0.10, min_voxels_detection: int = 10, num_lesions_to_extract: int = 5, dynamic_threshold_factor: float = 2.5, max_prob_round_decimals: Optional[int] = None, remove_adjacent_lesion_candidates: bool = True) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]: """ Generate detection proposals using a dynamic or static threshold to determine the size of lesions. """ if threshold == 'dynamic': all_hard_blobs, confidences, indexed_pred = preprocess_softmax_dynamic(softmax, min_voxels_detection=min_voxels_detection, dynamic_threshold_factor=dynamic_threshold_factor, num_lesions_to_extract=num_lesions_to_extract, remove_adjacent_lesion_candidates=remove_adjacent_lesion_candidates, max_prob_round_decimals=max_prob_round_decimals) elif threshold == 'dynamic-fast': # determine max. softmax and set a per-case 'static' threshold based on that max_prob = np.max(softmax) threshold = float(max_prob / dynamic_threshold_factor) all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold, min_voxels_detection=min_voxels_detection, max_prob_round_decimals=max_prob_round_decimals) else: threshold = float(threshold) # convert threshold to float, if it wasn't already all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold, min_voxels_detection=min_voxels_detection, max_prob_round_decimals=max_prob_round_decimals) return all_hard_blobs, confidences, indexed_pred ################################################################################ # Evaluate all cases def evaluate( y_true: np.ndarray, y_pred: np.ndarray, min_overlap=0.10, overlap_func: str = 'DSC', case_confidence: str = 'max', multiple_lesion_candidates_selection_criteria='overlap', allow_unmatched_candidates_with_minimal_overlap=True, flat: Optional[bool] = None ) -> Dict[str, Any]: # Make list out of numpy array so that it can be mapped in parallel with multiple CPUs. y_true_list = y_true #[y_true[mri_idx] for mri_idx in range(y_true.shape[0])] y_pred_list = y_pred #[y_pred[mri_idx] for mri_idx in range(y_pred.shape[0])] roc_true, roc_pred = {}, {} y_list = [] num_lesions = 0 subject_idxs = list(range(len(y_true_list))) N_CPUS = 2 with ThreadPoolExecutor(max_workers=N_CPUS) as pool: # define the functions that need to be processed: compute_pred_vector, with each individual # detection_map prediction, ground truth label and parameters future_to_args = { pool.submit(evaluate_case, y_true, y_pred, min_overlap=min_overlap, overlap_func=overlap_func, multiple_lesion_candidates_selection_criteria=multiple_lesion_candidates_selection_criteria, allow_unmatched_candidates_with_minimal_overlap=allow_unmatched_candidates_with_minimal_overlap): idx for (y_true, y_pred, idx) in zip(y_true_list, y_pred_list, subject_idxs) } # process the cases in parallel iterator = concurrent.futures.as_completed(future_to_args) iterator = tqdm(iterator, desc='Computing FROC', total=len(y_true_list)) for future in iterator: try: res = future.result() except Exception as e: print(f"Exception: {e}") else: # unpack results y_list_pat, num_lesions_gt = res # note: y_list_pat contains: is_lesion, confidence[, Dice, gt num voxels] # aggregate results idx = future_to_args[future] roc_true[idx] = np.max([a[0] for a in y_list_pat]) if case_confidence == 'max': # take highest lesion confidence as case-level confidence roc_pred[idx] = np.max([a[1] for a in y_list_pat]) elif case_confidence == 'bayesian': # if a_i is the probability the i-th lesion is csPCa, then the case-level # probability to have any csPCa lesion is 1 - Π_i{ 1 - a_i} roc_pred[idx] = 1 - np.prod([(1-a[1]) for a in y_list_pat]) else: raise ValueError(f"Patient confidence calculation method not recognised. Got: {case_confidence}.") # accumulate outputs y_list += y_list_pat num_lesions += num_lesions_gt # calculate statistics num_patients = len(roc_true) # get lesion-level results sensitivity, FP_per_case, thresholds, num_lesions = froc_from_lesion_evaluations( y_list=y_list, num_patients=num_patients ) # calculate recall, precision and average precision AP, precision, recall, _ = ap_from_lesion_evaluations(y_list, thresholds=thresholds) # calculate case-level AUROC fpr, tpr, _ = roc_curve(y_true=[roc_true[s] for s in subject_idxs], y_score=[roc_pred[s] for s in subject_idxs], pos_label=1) auc_score = auc(fpr, tpr) flat = True if flat: # flatten roc_true and roc_pred roc_true_flat = [roc_true[s] for s in subject_idxs] roc_pred_flat = [roc_pred[s] for s in subject_idxs] metrics = { "FP_per_case": convert_np_to_list(FP_per_case), "sensitivity": convert_np_to_list(sensitivity), "thresholds": convert_np_to_list(thresholds), "num_lesions": int(num_lesions), "num_patients": int(num_patients), "roc_true": convert_np_to_list(roc_true_flat if flat else roc_true), "roc_pred": convert_np_to_list(roc_pred_flat if flat else roc_pred), "AP": float(AP), "precision": convert_np_to_list(precision), "recall": convert_np_to_list(recall), # patient-level predictions 'auroc': float(auc_score), 'tpr': convert_np_to_list(tpr), 'fpr': convert_np_to_list(fpr), } return metrics def convert_np_to_list(flat_numpy_arr): ans = [] for elem in flat_numpy_arr: ans.append(float(elem)) return ans # Calculate FROC metrics (FP rate, sensitivity) def froc_from_lesion_evaluations(y_list, num_patients, thresholds=None): # calculate counts TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations( y_list=y_list, thresholds=thresholds ) # calculate FROC metrics from counts sensitivity = TP / num_lesions if num_lesions > 0 else np.nan FP_per_case = FP / num_patients return sensitivity, FP_per_case, thresholds, num_lesions def ap_from_lesion_evaluations(y_list, thresholds=None): # calculate counts TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations( y_list=y_list, thresholds=thresholds ) # calculate precision (lesion-level) precision = TP / (TP + FP) precision = np.append(precision, [0]) # calculate recall (lesion-level) FN = num_lesions - TP recall = TP / (TP + FN) recall = np.append(recall, recall[-1:]) # calculate average precission (lesion-level) AP = np.trapz(y=precision, x=recall) return AP, precision, recall, thresholds # Calculate macro metrics (true positives (TP), false positives (FP)) def counts_from_lesion_evaluations( y_list: List[Tuple[int, float, float]], thresholds: "Optional[npt.NDArray[np.float64]]" = None ) -> "Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32], npt.NDArray[np.float64], int]": """ Calculate true positives (TP) and false positives (FP) as function of threshold, based on the case evaluations from `evaluate_case`. """ # sort predictions y_list.sort() # collect targets and predictions y_true: "npt.NDArray[np.float64]" = np.array([target for target, *_ in y_list]) y_pred: "npt.NDArray[np.float64]" = np.array([pred for _, pred, *_ in y_list]) # calculate number of lesions num_lesions = y_true.sum() if thresholds is None: # collect thresholds for FROC analysis thresholds = np.unique(y_pred) thresholds[::-1].sort() # sort thresholds in descending order (inplace) # for >10,000 thresholds: resample to 10,000 unique thresholds, while also # keeping all thresholds higher than 0.8 and the first 20 thresholds if len(thresholds) > 10_000: rng = np.arange(1, len(thresholds), len(thresholds)/10_000, dtype=np.int32) st = [thresholds[i] for i in rng] low_thresholds = thresholds[-20:] thresholds = np.array([t for t in thresholds if t > 0.8 or t in st or t in low_thresholds]) # define placeholders FP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32) TP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32) # for each threshold: count FPs and calculate the sensitivity for i, th in enumerate(thresholds): if th > 0: y_pred_thresholded = (y_pred >= th).astype(int) tp = np.sum(y_true*y_pred_thresholded) fp = np.sum(y_pred_thresholded - y_true*y_pred_thresholded) # update FROC with new point FP[i] = fp TP[i] = tp else: # extend FROC curve to infinity # TP[i] = TP[-2] #note: aangepast stefan 11-04-2022 TP[i] = 1 FP[i] = np.inf return TP, FP, thresholds, num_lesions # Compute base prediction metrics TP/FP/FN with associated model confidences def evaluate_case( y_true: np.ndarray, y_pred: np.ndarray, min_overlap: float = 0.10, overlap_func: str = 'DSC', multiple_lesion_candidates_selection_criteria: str = 'overlap', allow_unmatched_candidates_with_minimal_overlap: bool = True ) -> Tuple[List[Tuple[int, float, float]], int]: """ Gather the list of lesion candidates, and classify in TP/FP/FN. - multiple_lesion_candidates_selection_criteria: when multiple lesion candidates have overlap with the same ground truth mask, use 'overlap' or 'confidence' to choose which lesion is matched against the ground truth mask. Returns: - a list of tuples with: (is_lesion, prediction confidence, Dice similarity coefficient, number of voxels in label) - number of ground truth lesions """ y_list: List[Tuple[int, float, float]] = [] if overlap_func == 'IoU': overlap_func = calculate_iou elif overlap_func == 'DSC': overlap_func = calculate_dsc else: raise ValueError(f"Overlap function with name {overlap_func} not recognized. Supported are 'IoU' and 'DSC'") # convert dtype to float32 y_true = y_true.astype('int32') y_pred = y_pred.astype('float32') if y_pred.shape[0] < y_true.shape[0]: print("Warning: padding prediction to match label!") y_pred = resize_image_with_crop_or_pad(y_pred, y_true.shape) confidences, indexed_pred = parse_detection_map(y_pred) lesion_candidates_best_overlap: Dict[str, float] = {} if y_true.any(): # for each malignant scan labeled_gt, num_gt_lesions = ndimage.label(y_true, np.ones((3, 3, 3))) for lesiong_id in range(1, num_gt_lesions+1): # for each lesion in ground-truth (GT) label gt_lesion_mask = (labeled_gt == lesiong_id) # collect indices of lesion candidates that have any overlap with the current GT lesion overlapping_lesion_candidate_indices = set(np.unique(indexed_pred[gt_lesion_mask])) overlapping_lesion_candidate_indices -= {0} # remove index 0, if present # collect lesion candidates for current GT lesion lesion_candidates_for_target_gt: List[Dict[str, Union[int, float]]] = [] for lesion_candidate_id, lesion_confidence in confidences: if lesion_candidate_id in overlapping_lesion_candidate_indices: # calculate overlap between lesion candidate and GT mask lesion_pred_mask = (indexed_pred == lesion_candidate_id) overlap_score = overlap_func(lesion_pred_mask, gt_lesion_mask) # keep track of the highest overlap a lesion candidate has with any GT lesion lesion_candidates_best_overlap[lesion_candidate_id] = max( overlap_score, lesion_candidates_best_overlap.get(lesion_candidate_id, 0) ) # store lesion candidate info for current GT mask lesion_candidates_for_target_gt.append({ 'id': lesion_candidate_id, 'confidence': lesion_confidence, 'overlap': overlap_score, }) print(lesion_candidates_for_target_gt) if len(lesion_candidates_for_target_gt) == 0: # no lesion candidate matched with GT mask. Add FN. y_list.append((1, 0., 0.)) elif len(lesion_candidates_for_target_gt) == 1: # single lesion candidate overlapped with GT mask. Add TP if overlap is sufficient, or FN otherwise. candidate_info = lesion_candidates_for_target_gt[0] lesion_pred_mask = (indexed_pred == candidate_info['id']) if candidate_info['overlap'] > min_overlap: # overlap between lesion candidate and GT mask is sufficient, add TP indexed_pred[lesion_pred_mask] = 0 # remove lesion candidate after assignment y_list.append((1, candidate_info['confidence'], candidate_info['overlap'])) else: # overlap between lesion candidate and GT mask is insufficient, add FN y_list.append((1, 0., 0.)) else: # multiple predictions for current GT lesion # sort lesion candidates based on overlap or confidence key = multiple_lesion_candidates_selection_criteria lesion_candidates_for_target_gt = sorted(lesion_candidates_for_target_gt, key=lambda x: x[key], reverse=True) gt_lesion_matched = False for candidate_info in lesion_candidates_for_target_gt: lesion_pred_mask = (indexed_pred == candidate_info['id']) if candidate_info['overlap'] > min_overlap: indexed_pred[lesion_pred_mask] = 0 y_list.append((1, candidate_info['confidence'], candidate_info['overlap'])) gt_lesion_matched = True break if not gt_lesion_matched: # ground truth lesion not matched to a lesion candidate. Add FN. y_list.append((1, 0., 0.)) # Remaining lesions are FPs remaining_lesions = set(np.unique(indexed_pred)) remaining_lesions -= {0} # remove index 0, if present for lesion_candidate_id, lesion_confidence in confidences: if lesion_candidate_id in remaining_lesions: overlap_score = lesion_candidates_best_overlap.get(lesion_candidate_id, 0) if allow_unmatched_candidates_with_minimal_overlap and overlap_score > min_overlap: # The lesion candidate was not matched to a GT lesion, but did have overlap > min_overlap # with a GT lesion. The GT lesion is, however, matched to another lesion candidate. # In this operation mode, this lesion candidate is not considered as a false positive. pass else: y_list.append((0, lesion_confidence, 0.)) # add FP else: # for benign case, all predictions are FPs num_gt_lesions = 0 if len(confidences) > 0: for _, lesion_confidence in confidences: y_list.append((0, lesion_confidence, 0.)) else: y_list.append((0, 0., 0.)) # avoid empty list return y_list, num_gt_lesions # Calculate Intersection over Union (IoU) for N-D Arrays def calculate_iou(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float: epsilon = 1e-8 iou_num = np.sum(predictions[labels == 1]) iou_denom = np.sum(predictions) + np.sum(labels) - iou_num return float((iou_num + epsilon) / (iou_denom + epsilon)) # Calculate Dice Similarity Coefficient (DSC) for N-D Arrays def calculate_dsc(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float: epsilon = 1e-8 dsc_num = np.sum(predictions[labels == 1]) * 2.0 dsc_denom = np.sum(predictions) + np.sum(labels) return float((dsc_num + epsilon) / (dsc_denom + epsilon)) # Resize images (scans/predictions/labels) by cropping and/or padding [Ref: https://github.com/DLTK/DLTK] def resize_image_with_crop_or_pad(image, img_size=(64, 64, 64), **kwargs): assert isinstance(image, np.ndarray) assert (image.ndim - 1 == len(img_size) or image.ndim == len(img_size)), \ "Target size doesn't fit image size" rank = len(img_size) # image dimensions # placeholders for new shape from_indices = [[0, image.shape[dim]] for dim in range(rank)] to_padding = [[0, 0] for _ in range(rank)] slicer = [slice(None)] * rank # for each dimension, determine process (cropping or padding) for i in range(rank): if image.shape[i] < img_size[i]: to_padding[i][0] = (img_size[i] - image.shape[i]) // 2 to_padding[i][1] = img_size[i] - image.shape[i] - to_padding[i][0] else: from_indices[i][0] = int(np.floor((image.shape[i] - img_size[i]) / 2.)) from_indices[i][1] = from_indices[i][0] + img_size[i] # create slicer object to crop/leave each dimension slicer[i] = slice(from_indices[i][0], from_indices[i][1]) # pad cropped image to extend missing dimension return np.pad(image[tuple(slicer)], to_padding, **kwargs) # Parse Detection Maps to Individual Lesions + Likelihoods def parse_detection_map(detection_map): # Label All Non-Connected Components in Detection Map blobs_index, num_blobs = ndimage.label(detection_map, np.ones((3, 3, 3))) confidences = [] if num_blobs > 0: # For Each Lesion Detection for tumor_index in range(1, num_blobs+1): # Extract Mask of Current Lesion # hard_blob = np.zeros_like(blobs_index) # hard_blob[blobs_index == tumor] = 1 # TODO: replace above with the following? Is faster I think. # hard_blob = (blobs_index == tumor).astype(int) # Extract Max Predicted Likelihood for Lesion Detection # max_prob = np.max(hard_blob) # <- this is always 1 # hard_blob[hard_blob > 0] = max_prob # <- this line does nothing, as hard_blob is not used # Store Predicted Likelihood per Lesion Detection max_prob = detection_map[blobs_index == tumor_index].max() confidences.append((tumor_index, max_prob)) return confidences, blobs_index ############################### CONSTANTS #################################### # Quitin code removed ############################### CONSTANTS #################################### def move_dims(arr): # UMCG numpy dimensions convention: dims = (batch, width, heigth, depth) # Joeran numpy dimensions convention: dims = (batch, depth, heigth, width) arr = np.moveaxis(arr, 3, 1) # Joeran has his numpy arrays ordered differently. arr = np.moveaxis(arr, 3, 2) return arr ############################### CONSTANTS #################################### # # Reconstruction model directory # RECON_DIR = "11_new_recon_multiprocess" # T2_MODEL_PATH = f"models/prelims/{RECON_DIR}/best-direct-fold0.h5" # # Path to file with train, validation and test indexes. # IDX_YAML_PATH = r"data/path_lists/pirads_4plus/train_val_test_idxs.yml" # # Diagnostic models # diag_models = [r"models/14_long_diag/best-direct-fold0.h5", # r"models/15_long_diag_early_stopping/best-direct-fold0.h5"] # # x_true_val = np.load("temp/x_true_val.npy") # print("loading numpy arrays") # y_true_val = np.squeeze(np.load("temp/y_true_val_label.npy")) # y_pred_val = np.squeeze(np.load("temp/y_pred_val_detection_map.npy")) # y_true_val = move_dims(y_true_val) # y_pred_val = move_dims(y_pred_val) # # Preprocess the blobs individually # for mri_idx in range(y_pred_val.shape[0]): # print(f"preprocessing mri idx {mri_idx}") # y_pred_val[mri_idx] = preprocess_softmax(y_pred_val[mri_idx], threshold="dynamic")[0] # print(f"Start FROC evaluation.") # metrics = evaluate(y_true=y_true_val, y_pred=y_pred_val) # dump_dict_to_yaml(metrics, 'temp', "froc_metrics", verbose=True)