commit before migration to habrok

This commit is contained in:
Stefan
2023-03-29 13:05:32 +02:00
parent 9468dadfa3
commit ad595bb25e
24 changed files with 4098 additions and 1 deletions

6
code/FROC/__init__.py Executable file
View File

@@ -0,0 +1,6 @@
from .FROC import image_utils
from .FROC import froc
from .FROC import data_utils
from .FROC import cal_froc_from_np
from .FROC import blob_preprocess
from .FROC import analysis_utils

313
code/FROC/analysis_utils.py Executable file
View File

@@ -0,0 +1,313 @@
# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from sklearn.metrics import auc
from scipy import ndimage
try:
import numpy.typing as npt
except ImportError: # pragma: no cover
pass
# Parse Detection Maps to Individual Lesions + Likelihoods
def parse_detection_map(detection_map):
# Label All Non-Connected Components in Detection Map
blobs_index, num_blobs = ndimage.label(detection_map, np.ones((3, 3, 3)))
confidences = []
if num_blobs > 0:
# For Each Lesion Detection
for tumor_index in range(1, num_blobs+1):
# Extract Mask of Current Lesion
# hard_blob = np.zeros_like(blobs_index)
# hard_blob[blobs_index == tumor] = 1
# TODO: replace above with the following? Is faster I think.
# hard_blob = (blobs_index == tumor).astype(int)
# Extract Max Predicted Likelihood for Lesion Detection
# max_prob = np.max(hard_blob) # <- this is always 1
# hard_blob[hard_blob > 0] = max_prob # <- this line does nothing, as hard_blob is not used
# Store Predicted Likelihood per Lesion Detection
max_prob = detection_map[blobs_index == tumor_index].max()
confidences.append((tumor_index, max_prob))
return confidences, blobs_index
# Calculate Dice Similarity Coefficient (DSC) for N-D Arrays
def calculate_dsc(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
epsilon = 1e-8
dsc_num = np.sum(predictions[labels == 1]) * 2.0
dsc_denom = np.sum(predictions) + np.sum(labels)
return float((dsc_num + epsilon) / (dsc_denom + epsilon))
# Calculate Intersection over Union (IoU) for N-D Arrays
def calculate_iou(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
epsilon = 1e-8
iou_num = np.sum(predictions[labels == 1])
iou_denom = np.sum(predictions) + np.sum(labels) - iou_num
return float((iou_num + epsilon) / (iou_denom + epsilon))
# Calculate Operating Points for Curve
def calculate_operating_points(y, x, op_match=None, verbose=1):
"""
Input:
- y: (monotonically increasing) performance metric, such as the True Positive Rate
- x: (monotonically increasing) performance metric, such as the False Positive Rate
- op_match: dictionary that specifies the target operating point: {
'x': target x value, 'y': target y value
}
Returns:
- dictionary with operating point(s): {
'op_closest_xy_y': y_op, # y value at operating point that matches both x and y of target operating point
'op_closest_xy_x': x_op, # x value at operating point that matches both x and y of target operating point
...
}
"""
# TODO: currently, a lower sensitivity is preferrred over a higher sensitivity if that means the operating point is matched better.
# Would be better to go for the best sensitivity/specificity, if that can be done without hurting the other performance metric.
# In practice, this should not be an issue, as we have many points then.
y = np.array(y)
x = np.array(x)
operating_points = {}
if not np.all(np.diff(y) >= 0) and verbose:
print("Warning: y performance metric is not monotonically increasing, this could lead to unexpected behaviour!")
if not np.all(np.diff(x) >= 0) and verbose:
print("Warning: x performance metric is not monotonically increasing, this could lead to unexpected behaviour!")
# Grab Index of Intersection -> Compute y/TPR and x/FPR @ Index -> Store
op_best_roc_idx = np.argmin(np.abs(y - (1 - x)))
op_best_roc_y = y[op_best_roc_idx]
op_best_roc_x = x[op_best_roc_idx]
operating_points.update(dict(
op_best_roc_idx=op_best_roc_idx,
op_best_roc_y=op_best_roc_y,
op_best_roc_x=op_best_roc_x
))
if op_match is not None:
# calculate operating point closest to target operating point
abs_deficit_x, abs_deficit_y = None, None
optional_x_keys = ['x', 'fpr', 'FPR']
optional_y_keys = ['y', 'tpr', 'TPR', 'sensitivity', 'sens']
# if the target x value is specified, calculate the difference between target and oberved value
for key in optional_x_keys:
if key in op_match:
op_match_x = op_match[key]
abs_deficit_x = np.abs(x - op_match_x)
break
# if the target y value is specified, calculate the difference between target and oberved value
for key in optional_y_keys:
if key in op_match:
op_match_y = op_match[key]
abs_deficit_y = np.abs(y - op_match_y)
break
# if both target x and y values are specified, calculate the difference between the target pair and observed pair
# at the best match, store the observed x and y values
if abs_deficit_x is not None and abs_deficit_y is not None:
# determine the index of the the closest point to the target pair
abs_deficit = abs_deficit_x + abs_deficit_y
op_closest_xy_idx = np.argmin(abs_deficit)
op_closest_xy_y = y[op_closest_xy_idx]
op_closest_xy_x = x[op_closest_xy_idx]
# store
operating_points.update(dict(
op_closest_xy_idx=op_closest_xy_idx,
op_closest_xy_y=op_closest_xy_y,
op_closest_xy_x=op_closest_xy_x
))
# same for matching x only
if abs_deficit_x is not None:
# determine the index of the the closest point to the target value
op_closest_x_idx = np.argmin(abs_deficit_x)
op_closest_x_y = y[op_closest_x_idx]
op_closest_x_x = x[op_closest_x_idx]
# store
operating_points.update(dict(
op_closest_x_idx=op_closest_x_idx,
op_closest_x_y=op_closest_x_y,
op_closest_x_x=op_closest_x_x
))
# same for matching y only
if abs_deficit_y is not None:
# determine the index of the the closest point to the target value
op_closest_y_idx = np.argmin(abs_deficit_y)
op_closest_y_y = y[op_closest_y_idx]
op_closest_y_x = x[op_closest_y_idx]
# store
operating_points.update(dict(
op_closest_y_idx=op_closest_y_idx,
op_closest_y_y=op_closest_y_y,
op_closest_y_x=op_closest_y_x
))
return operating_points
# Calculate Statistics for Multiple Curves
def calculate_statistics(metrics, op_match=None, x_start=0., x_end=1., verbose=1):
"""
Calculate statistics, such as the area under the curve, for multiple (independent) curves.
To calculate shared statistics, the curves must be translated to a shared x domain. To
achieve this with virtually no loss of the step-like nature of curves like ROC and FROC,
the shared x values are derived from the input, and offset with ± 1e-7.
Input:
- metrics should be a list of tuples with the y & x coordinates for each run:
[([y1, y2, y3, ...], [x1, x2, x3]), # run 1
([y1, y2, y3, ...], [x1, x2, x3]), # run 2
]
- op_match: {
'y': value of y metric (e.g., TPR/sensitivity) to match,
'x': value of x metric (e.g., FPR/false positive rate) to match,
}
Note: mean and 95% CI are calculated as function of the shared x.
"""
# construct the array of shared x values
eps = 1e-10
x_shared = np.array([xi for _, x in metrics for xi in x], dtype=np.float64) # collect list of all possible x-values
x_shared = np.ravel(x_shared) # flatten list, if necessary
x_shared = np.append(x_shared, [x_start, x_end]) # add x_start and x_end to ensure correct pAUC calculation
x_shared = np.concatenate((x_shared+eps, x_shared-eps))
x_shared = np.unique(x_shared) # only keep unique x values
x_shared.sort() # sort in ascending order (inplace)
# validate x_start and x_end
assert x_start < x_end, f"x_start must be smaller than x_end! Got x_start={x_start} and x_end={x_end}."
# convert the per-model y (e.g., TPR) vs x (e.g., FPR) to a shared domain
y_shared_all = np.zeros(shape=(len(metrics), len(x_shared)), dtype=np.float32)
auroc_all = []
individually_matched_operating_points = []
for i, (y, x) in enumerate(metrics):
# if necessary, unpack x and y
if len(y) == 1:
y = y[0]
if len(x) == 1:
x = x[0]
# interpolate the y values to the shared x values
y_shared_domain = np.interp(x_shared, x, y)
y_shared_all[i] = y_shared_domain
# calculate AUROC for macro stats
mask = (x_shared >= x_start) & (x_shared <= x_end)
auc_score = auc(x_shared[mask], y_shared_domain[mask])
auroc_all += [auc_score]
# match operating point for each run individually
operating_points = calculate_operating_points(y=y, x=x, op_match=op_match)
individually_matched_operating_points += [operating_points]
# calculate statistics in shared domain
y_shared_mean = np.mean(y_shared_all, axis=0)
y_shared_std = np.std(y_shared_all, axis=0)
y_shared_CI_lower = np.percentile(y_shared_all, 2.5, axis=0)
y_shared_CI_higher = np.percentile(y_shared_all, 97.5, axis=0)
auroc_mean = np.mean(auroc_all)
auroc_std = np.std(auroc_all)
# calculate operating points in shared domain
operating_points = calculate_operating_points(y=y_shared_mean, x=x_shared,
op_match=op_match, verbose=verbose)
# collect results
results = {
# overview statistics
'auroc_mean': auroc_mean,
'auroc_std': auroc_std,
'auroc_all': auroc_all,
# for plotting
'x_shared': x_shared,
'y_shared_all': y_shared_all,
'y_shared_mean': y_shared_mean,
'y_shared_std': y_shared_std,
'y_shared_CI_lower': y_shared_CI_lower,
'y_shared_CI_higher': y_shared_CI_higher,
# individually matched operating point
'individually_matched_operating_points': individually_matched_operating_points,
}
results.update(operating_points)
# calculate standard deviation of each metric (op_closest_xy_y, etc.) between individual runs
individually_matched_operating_points_std = {
f"{key}_std": np.std([
operating_point_info[key]
for operating_point_info in individually_matched_operating_points
])
for key in individually_matched_operating_points[0].keys()
}
results.update(individually_matched_operating_points_std)
return results
# Calculate (partial) Area Under Curve (pAUC) using (x,y) coordinates from the given curve
def calculate_pAUC_from_graph(x, y, pAUC_start: float = 0.0, pAUC_end: float = 1.0, full: bool = False):
"""
Input:
For a single curve:
- x: x values of a curve (e.g., the False Positive Rate points). [x1, x2, .., xn]
- y: y values of a curve (e.g., the True Positive Rate points). [y1, y2, .., yn]
For multiple curves:
- list of x curves, for example the x values observed across multiple runs. [[x1m1, x2m1, .., xnm1], [x1m2, x2m2, ...., xnm2], ..]
- list of y curves, for example the y values observed across multiple runs. [[y1m1, y2m1, .., ynm1], [y1m2, y2m2, ...., ynm2], ..]
- pAUC_start: lower bound of x (e.g., FPR) to compute pAUC
- pAUC_end: higher bound of x (e.g., FPR) to compute pAUC
Returns:
- if (full==False): List of pAUC values for each set of ([x1, ..], [y1, ..]) coordinates
- if (full==True): Metrics as returned by `calculate_statistics` [see there]
Note: function is not specific to the FROC curve
"""
if not isinstance(x[0], (list, np.ndarray)) or not isinstance(y[0], (list, np.ndarray)):
# Have a single set of (x,y) coordinates
assert not isinstance(x[0], (list, np.ndarray)) and not isinstance(y[0], (list, np.ndarray)), \
"Either provide multiple sequences of (x,y) coordinates, or a single sequence. Obtained a mix of both now. "
# Pack coordinates in format expected by `calculate_statistics`
coordinates_joined = [(y, x)]
else:
# Have multiple sets of (x,y) coordinates
# Pack coordinates in format expected by `calculate_statistics`
coordinates_joined = list(zip(y, x))
# Calculate AUC in Given Ranges
results = calculate_statistics(metrics=coordinates_joined, x_start=pAUC_start, x_end=pAUC_end)
if full:
return results
return results['auroc_all']

148
code/FROC/blob_preprocess.py Executable file
View File

@@ -0,0 +1,148 @@
import numpy as np
from scipy import ndimage
from typing import List, Tuple, Optional, Union
"""
Extract lesion candidates from a softmax prediction
Authors: anindox8, matinhz, joeranbosma
"""
# Preprocess Softmax Volume (Clipping, Max Confidence)
def preprocess_softmax_static(softmax: np.ndarray,
threshold: float = 0.10,
min_voxels_detection: int = 10,
max_prob_round_decimals: Optional[int] = 4) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
# Load and Preprocess Softmax Image
all_hard_blobs = np.zeros_like(softmax)
confidences = []
clipped_softmax = softmax.copy()
clipped_softmax[softmax < threshold] = 0
blobs_index, num_blobs = ndimage.label(clipped_softmax, np.ones((3, 3, 3)))
if num_blobs > 0: # For Each Prediction
for tumor in range(1, num_blobs+1):
# determine mask for current lesion
hard_mask = np.zeros_like(blobs_index)
hard_mask[blobs_index == tumor] = 1
if np.count_nonzero(hard_mask) <= min_voxels_detection:
# remove tiny detection of <= 0.009 cm^3
blobs_index[hard_mask.astype(bool)] = 0
continue
# add sufficiently sized detection
hard_blob = hard_mask * clipped_softmax
max_prob = np.max(hard_blob)
if max_prob_round_decimals is not None:
max_prob = np.round(max_prob, max_prob_round_decimals)
hard_blob[hard_blob > 0] = max_prob
all_hard_blobs += hard_blob
confidences.append((tumor, max_prob))
return all_hard_blobs, confidences, blobs_index
def preprocess_softmax_dynamic(softmax: np.ndarray,
min_voxels_detection: int = 10,
num_lesions_to_extract: int = 5,
dynamic_threshold_factor: float = 2.5,
max_prob_round_decimals: Optional[int] = None,
remove_adjacent_lesion_candidates: bool = True,
max_prob_failsafe_stopping_threshold: float = 0.01) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
"""
Generate detection proposals using a dynamic threshold to determine the location and size of lesions.
Author: Joeran Bosma
"""
working_softmax = softmax.copy()
dynamic_hard_blobs = np.zeros_like(softmax)
confidences: List[Tuple[int, float]] = []
dynamic_indexed_blobs = np.zeros_like(softmax, dtype=int)
while len(confidences) < num_lesions_to_extract:
tumor_index = 1 + len(confidences)
# determine max. softmax
max_prob = np.max(working_softmax)
if max_prob < max_prob_failsafe_stopping_threshold:
break
# set dynamic threshold to half the max
threshold = max_prob / dynamic_threshold_factor
# extract blobs for dynamix threshold
all_hard_blobs, _, _ = preprocess_softmax_static(working_softmax, threshold=threshold,
min_voxels_detection=min_voxels_detection,
max_prob_round_decimals=max_prob_round_decimals)
# select blob with max. confidence
# note: the max_prob is re-computed in the (unlikely) case that the max. prob
# was inside a 'lesion candidate' of less than min_voxels_detection, which is
# thus removed in preprocess_softmax_static.
max_prob = np.max(all_hard_blobs)
mask_current_lesion = (all_hard_blobs == max_prob)
# ensure that mask is only a single lesion candidate (this assumption fails when multiple lesions have the same max. prob)
mask_current_lesion_indexed, _ = ndimage.label(mask_current_lesion, np.ones((3, 3, 3)))
mask_current_lesion = (mask_current_lesion_indexed == 1)
# create mask with its confidence
hard_blob = (all_hard_blobs * mask_current_lesion)
# Detect whether the extractted mask is a ring/hollow sphere
# around an existing lesion candidate. For confident lesions,
# the surroundings of the prediction are still quite confident,
# and can become a second 'detection'. For an # example, please
# see extracted lesion candidates nr. 4 and 5 at:
# https://repos.diagnijmegen.nl/trac/ticket/9299#comment:49
# Detection method: grow currently extracted lesions by one voxel,
# and check if they overlap with the current extracted lesion.
extracted_lesions_grown = ndimage.morphology.binary_dilation(dynamic_hard_blobs > 0)
current_lesion_has_overlap = (mask_current_lesion & extracted_lesions_grown).any()
# Check if lesion candidate should be retained
if (not remove_adjacent_lesion_candidates) or (not current_lesion_has_overlap):
# store extracted lesion
dynamic_hard_blobs += hard_blob
confidences += [(tumor_index, max_prob)]
dynamic_indexed_blobs += (mask_current_lesion * tumor_index)
# remove extracted lesion from working-softmax
working_softmax = (working_softmax * (~mask_current_lesion))
return dynamic_hard_blobs, confidences, dynamic_indexed_blobs
def preprocess_softmax(softmax: np.ndarray,
threshold: Union[str, float] = 0.10,
min_voxels_detection: int = 10,
num_lesions_to_extract: int = 5,
dynamic_threshold_factor: float = 2.5,
max_prob_round_decimals: Optional[int] = None,
remove_adjacent_lesion_candidates: bool = True) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
"""
Generate detection proposals using a dynamic or static threshold to determine the size of lesions.
"""
if threshold == 'dynamic':
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_dynamic(softmax, min_voxels_detection=min_voxels_detection,
dynamic_threshold_factor=dynamic_threshold_factor,
num_lesions_to_extract=num_lesions_to_extract,
remove_adjacent_lesion_candidates=remove_adjacent_lesion_candidates,
max_prob_round_decimals=max_prob_round_decimals)
elif threshold == 'dynamic-fast':
# determine max. softmax and set a per-case 'static' threshold based on that
max_prob = np.max(softmax)
threshold = float(max_prob / dynamic_threshold_factor)
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
min_voxels_detection=min_voxels_detection,
max_prob_round_decimals=max_prob_round_decimals)
else:
threshold = float(threshold) # convert threshold to float, if it wasn't already
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
min_voxels_detection=min_voxels_detection,
max_prob_round_decimals=max_prob_round_decimals)
return all_hard_blobs, confidences, indexed_pred

643
code/FROC/cal_froc_from_np.py Executable file
View File

@@ -0,0 +1,643 @@
import argparse
import multiprocessing
import numpy as np
import SimpleITK as sitk
import time
import os
import itertools
import concurrent.futures
from typing import List
import sys
sys.path.append('./../code')
from utils_quintin import *
# from fastMRI_PCa.visualization.visualize import write_array2nifti_t2
from tensorflow.keras.models import load_model
from functools import partial
from focal_loss import BinaryFocalLoss
from scipy import ndimage
from pathlib import Path
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from sklearn.metrics import roc_curve, auc
# from fastMRI_PCa.visualization import save_slice_3d
# from fastMRI_PCa.utils import read_yaml_to_dict, list_from_file, print_stats_np
# from fastMRI_PCa.utils import print_p, get_rand_exp_decay_mask
# from fastMRI_PCa.data import resample, center_crop, normalize, undersample, binarize_s
# from fastMRI_PCa.models import ssim_loss, ssim_metric, psnr_metric
from typing import List, Tuple, Dict, Any, Union, Optional, Callable, Iterable, Hashable, Sized
try:
import numpy.typing as npt
except ImportError:
pass
############################### PREPROCESSING BLOBS #########################
"""
Extract lesion candidates from a softmax prediction
Authors: anindox8, matinhz, joeranbosma
"""
# Preprocess Softmax Volume (Clipping, Max Confidence)
def preprocess_softmax_static(softmax: np.ndarray,
threshold: float = 0.10,
min_voxels_detection: int = 10,
max_prob_round_decimals: Optional[int] = 4) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
# Load and Preprocess Softmax Image
all_hard_blobs = np.zeros_like(softmax)
confidences = []
clipped_softmax = softmax.copy()
clipped_softmax[softmax < threshold] = 0
blobs_index, num_blobs = ndimage.label(clipped_softmax, np.ones((3, 3, 3)))
if num_blobs > 0: # For Each Prediction
for tumor in range(1, num_blobs+1):
# determine mask for current lesion
hard_mask = np.zeros_like(blobs_index)
hard_mask[blobs_index == tumor] = 1
if np.count_nonzero(hard_mask) <= min_voxels_detection:
# remove tiny detection of <= 0.009 cm^3
blobs_index[hard_mask.astype(bool)] = 0
continue
# add sufficiently sized detection
hard_blob = hard_mask * clipped_softmax
max_prob = np.max(hard_blob)
if max_prob_round_decimals is not None:
max_prob = np.round(max_prob, max_prob_round_decimals)
hard_blob[hard_blob > 0] = max_prob
all_hard_blobs += hard_blob
confidences.append((tumor, max_prob))
return all_hard_blobs, confidences, blobs_index
def preprocess_softmax_dynamic(softmax: np.ndarray,
min_voxels_detection: int = 10,
num_lesions_to_extract: int = 5,
dynamic_threshold_factor: float = 2.5,
max_prob_round_decimals: Optional[int] = None,
remove_adjacent_lesion_candidates: bool = True,
max_prob_failsafe_stopping_threshold: float = 0.01) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
"""
Generate detection proposals using a dynamic threshold to determine the location and size of lesions.
Author: Joeran Bosma
"""
working_softmax = softmax.copy()
dynamic_hard_blobs = np.zeros_like(softmax)
confidences: List[Tuple[int, float]] = []
dynamic_indexed_blobs = np.zeros_like(softmax, dtype=int)
while len(confidences) < num_lesions_to_extract:
tumor_index = 1 + len(confidences)
# determine max. softmax
max_prob = np.max(working_softmax)
if max_prob < max_prob_failsafe_stopping_threshold:
break
# set dynamic threshold to half the max
threshold = max_prob / dynamic_threshold_factor
# extract blobs for dynamix threshold
all_hard_blobs, _, _ = preprocess_softmax_static(working_softmax, threshold=threshold,
min_voxels_detection=min_voxels_detection,
max_prob_round_decimals=max_prob_round_decimals)
# select blob with max. confidence
# note: the max_prob is re-computed in the (unlikely) case that the max. prob
# was inside a 'lesion candidate' of less than min_voxels_detection, which is
# thus removed in preprocess_softmax_static.
max_prob = np.max(all_hard_blobs)
mask_current_lesion = (all_hard_blobs == max_prob)
# ensure that mask is only a single lesion candidate (this assumption fails when multiple lesions have the same max. prob)
mask_current_lesion_indexed, _ = ndimage.label(mask_current_lesion, np.ones((3, 3, 3)))
mask_current_lesion = (mask_current_lesion_indexed == 1)
# create mask with its confidence
hard_blob = (all_hard_blobs * mask_current_lesion)
# Detect whether the extractted mask is a ring/hollow sphere
# around an existing lesion candidate. For confident lesions,
# the surroundings of the prediction are still quite confident,
# and can become a second 'detection'. For an # example, please
# see extracted lesion candidates nr. 4 and 5 at:
# https://repos.diagnijmegen.nl/trac/ticket/9299#comment:49
# Detection method: grow currently extracted lesions by one voxel,
# and check if they overlap with the current extracted lesion.
extracted_lesions_grown = ndimage.morphology.binary_dilation(dynamic_hard_blobs > 0)
current_lesion_has_overlap = (mask_current_lesion & extracted_lesions_grown).any()
# Check if lesion candidate should be retained
if (not remove_adjacent_lesion_candidates) or (not current_lesion_has_overlap):
# store extracted lesion
dynamic_hard_blobs += hard_blob
confidences += [(tumor_index, max_prob)]
dynamic_indexed_blobs += (mask_current_lesion * tumor_index)
# remove extracted lesion from working-softmax
working_softmax = (working_softmax * (~mask_current_lesion))
return dynamic_hard_blobs, confidences, dynamic_indexed_blobs
def preprocess_softmax(softmax: np.ndarray,
threshold: Union[str, float] = 0.10,
min_voxels_detection: int = 10,
num_lesions_to_extract: int = 5,
dynamic_threshold_factor: float = 2.5,
max_prob_round_decimals: Optional[int] = None,
remove_adjacent_lesion_candidates: bool = True) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
"""
Generate detection proposals using a dynamic or static threshold to determine the size of lesions.
"""
if threshold == 'dynamic':
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_dynamic(softmax, min_voxels_detection=min_voxels_detection,
dynamic_threshold_factor=dynamic_threshold_factor,
num_lesions_to_extract=num_lesions_to_extract,
remove_adjacent_lesion_candidates=remove_adjacent_lesion_candidates,
max_prob_round_decimals=max_prob_round_decimals)
elif threshold == 'dynamic-fast':
# determine max. softmax and set a per-case 'static' threshold based on that
max_prob = np.max(softmax)
threshold = float(max_prob / dynamic_threshold_factor)
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
min_voxels_detection=min_voxels_detection,
max_prob_round_decimals=max_prob_round_decimals)
else:
threshold = float(threshold) # convert threshold to float, if it wasn't already
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
min_voxels_detection=min_voxels_detection,
max_prob_round_decimals=max_prob_round_decimals)
return all_hard_blobs, confidences, indexed_pred
################################################################################
# Evaluate all cases
def evaluate(
y_true: np.ndarray,
y_pred: np.ndarray,
min_overlap=0.10,
overlap_func: str = 'DSC',
case_confidence: str = 'max',
multiple_lesion_candidates_selection_criteria='overlap',
allow_unmatched_candidates_with_minimal_overlap=True,
flat: Optional[bool] = None
) -> Dict[str, Any]:
# Make list out of numpy array so that it can be mapped in parallel with multiple CPUs.
y_true_list = y_true #[y_true[mri_idx] for mri_idx in range(y_true.shape[0])]
y_pred_list = y_pred #[y_pred[mri_idx] for mri_idx in range(y_pred.shape[0])]
roc_true, roc_pred = {}, {}
y_list = []
num_lesions = 0
subject_idxs = list(range(len(y_true_list)))
N_CPUS = 2
with ThreadPoolExecutor(max_workers=N_CPUS) as pool:
# define the functions that need to be processed: compute_pred_vector, with each individual
# detection_map prediction, ground truth label and parameters
future_to_args = {
pool.submit(evaluate_case,
y_true,
y_pred,
min_overlap=min_overlap,
overlap_func=overlap_func,
multiple_lesion_candidates_selection_criteria=multiple_lesion_candidates_selection_criteria,
allow_unmatched_candidates_with_minimal_overlap=allow_unmatched_candidates_with_minimal_overlap): idx
for (y_true, y_pred, idx) in zip(y_true_list, y_pred_list, subject_idxs)
}
# process the cases in parallel
iterator = concurrent.futures.as_completed(future_to_args)
iterator = tqdm(iterator, desc='Computing FROC', total=len(y_true_list))
for future in iterator:
try:
res = future.result()
except Exception as e:
print(f"Exception: {e}")
else:
# unpack results
y_list_pat, num_lesions_gt = res
# note: y_list_pat contains: is_lesion, confidence[, Dice, gt num voxels]
# aggregate results
idx = future_to_args[future]
roc_true[idx] = np.max([a[0] for a in y_list_pat])
if case_confidence == 'max':
# take highest lesion confidence as case-level confidence
roc_pred[idx] = np.max([a[1] for a in y_list_pat])
elif case_confidence == 'bayesian':
# if a_i is the probability the i-th lesion is csPCa, then the case-level
# probability to have any csPCa lesion is 1 - Π_i{ 1 - a_i}
roc_pred[idx] = 1 - np.prod([(1-a[1]) for a in y_list_pat])
else:
raise ValueError(f"Patient confidence calculation method not recognised. Got: {case_confidence}.")
# accumulate outputs
y_list += y_list_pat
num_lesions += num_lesions_gt
# calculate statistics
num_patients = len(roc_true)
# get lesion-level results
sensitivity, FP_per_case, thresholds, num_lesions = froc_from_lesion_evaluations(
y_list=y_list, num_patients=num_patients
)
# calculate recall, precision and average precision
AP, precision, recall, _ = ap_from_lesion_evaluations(y_list, thresholds=thresholds)
# calculate case-level AUROC
fpr, tpr, _ = roc_curve(y_true=[roc_true[s] for s in subject_idxs],
y_score=[roc_pred[s] for s in subject_idxs],
pos_label=1)
auc_score = auc(fpr, tpr)
flat = True
if flat:
# flatten roc_true and roc_pred
roc_true_flat = [roc_true[s] for s in subject_idxs]
roc_pred_flat = [roc_pred[s] for s in subject_idxs]
metrics = {
"FP_per_case": convert_np_to_list(FP_per_case),
"sensitivity": convert_np_to_list(sensitivity),
"thresholds": convert_np_to_list(thresholds),
"num_lesions": int(num_lesions),
"num_patients": int(num_patients),
"roc_true": convert_np_to_list(roc_true_flat if flat else roc_true),
"roc_pred": convert_np_to_list(roc_pred_flat if flat else roc_pred),
"AP": float(AP),
"precision": convert_np_to_list(precision),
"recall": convert_np_to_list(recall),
# patient-level predictions
'auroc': float(auc_score),
'tpr': convert_np_to_list(tpr),
'fpr': convert_np_to_list(fpr),
}
return metrics
def convert_np_to_list(flat_numpy_arr):
ans = []
for elem in flat_numpy_arr:
ans.append(float(elem))
return ans
# Calculate FROC metrics (FP rate, sensitivity)
def froc_from_lesion_evaluations(y_list, num_patients, thresholds=None):
# calculate counts
TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
y_list=y_list, thresholds=thresholds
)
# calculate FROC metrics from counts
sensitivity = TP / num_lesions if num_lesions > 0 else np.nan
FP_per_case = FP / num_patients
return sensitivity, FP_per_case, thresholds, num_lesions
def ap_from_lesion_evaluations(y_list, thresholds=None):
# calculate counts
TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
y_list=y_list, thresholds=thresholds
)
# calculate precision (lesion-level)
precision = TP / (TP + FP)
precision = np.append(precision, [0])
# calculate recall (lesion-level)
FN = num_lesions - TP
recall = TP / (TP + FN)
recall = np.append(recall, recall[-1:])
# calculate average precission (lesion-level)
AP = np.trapz(y=precision, x=recall)
return AP, precision, recall, thresholds
# Calculate macro metrics (true positives (TP), false positives (FP))
def counts_from_lesion_evaluations(
y_list: List[Tuple[int, float, float]],
thresholds: "Optional[npt.NDArray[np.float64]]" = None
) -> "Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32], npt.NDArray[np.float64], int]":
"""
Calculate true positives (TP) and false positives (FP) as function of threshold,
based on the case evaluations from `evaluate_case`.
"""
# sort predictions
y_list.sort()
# collect targets and predictions
y_true: "npt.NDArray[np.float64]" = np.array([target for target, *_ in y_list])
y_pred: "npt.NDArray[np.float64]" = np.array([pred for _, pred, *_ in y_list])
# calculate number of lesions
num_lesions = y_true.sum()
if thresholds is None:
# collect thresholds for FROC analysis
thresholds = np.unique(y_pred)
thresholds[::-1].sort() # sort thresholds in descending order (inplace)
# for >10,000 thresholds: resample to 10,000 unique thresholds, while also
# keeping all thresholds higher than 0.8 and the first 20 thresholds
if len(thresholds) > 10_000:
rng = np.arange(1, len(thresholds), len(thresholds)/10_000, dtype=np.int32)
st = [thresholds[i] for i in rng]
low_thresholds = thresholds[-20:]
thresholds = np.array([t for t in thresholds if t > 0.8 or t in st or t in low_thresholds])
# define placeholders
FP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
TP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
# for each threshold: count FPs and calculate the sensitivity
for i, th in enumerate(thresholds):
if th > 0:
y_pred_thresholded = (y_pred >= th).astype(int)
tp = np.sum(y_true*y_pred_thresholded)
fp = np.sum(y_pred_thresholded - y_true*y_pred_thresholded)
# update FROC with new point
FP[i] = fp
TP[i] = tp
else:
# extend FROC curve to infinity
# TP[i] = TP[-2] #note: aangepast stefan 11-04-2022
TP[i] = 1
FP[i] = np.inf
return TP, FP, thresholds, num_lesions
# Compute base prediction metrics TP/FP/FN with associated model confidences
def evaluate_case(
y_true: np.ndarray,
y_pred: np.ndarray,
min_overlap: float = 0.10,
overlap_func: str = 'DSC',
multiple_lesion_candidates_selection_criteria: str = 'overlap',
allow_unmatched_candidates_with_minimal_overlap: bool = True
) -> Tuple[List[Tuple[int, float, float]], int]:
"""
Gather the list of lesion candidates, and classify in TP/FP/FN.
- multiple_lesion_candidates_selection_criteria: when multiple lesion candidates have overlap with the same
ground truth mask, use 'overlap' or 'confidence' to choose
which lesion is matched against the ground truth mask.
Returns:
- a list of tuples with:
(is_lesion, prediction confidence, Dice similarity coefficient, number of voxels in label)
- number of ground truth lesions
"""
y_list: List[Tuple[int, float, float]] = []
if overlap_func == 'IoU':
overlap_func = calculate_iou
elif overlap_func == 'DSC':
overlap_func = calculate_dsc
else:
raise ValueError(f"Overlap function with name {overlap_func} not recognized. Supported are 'IoU' and 'DSC'")
# convert dtype to float32
y_true = y_true.astype('int32')
y_pred = y_pred.astype('float32')
if y_pred.shape[0] < y_true.shape[0]:
print("Warning: padding prediction to match label!")
y_pred = resize_image_with_crop_or_pad(y_pred, y_true.shape)
confidences, indexed_pred = parse_detection_map(y_pred)
lesion_candidates_best_overlap: Dict[str, float] = {}
if y_true.any():
# for each malignant scan
labeled_gt, num_gt_lesions = ndimage.label(y_true, np.ones((3, 3, 3)))
for lesiong_id in range(1, num_gt_lesions+1):
# for each lesion in ground-truth (GT) label
gt_lesion_mask = (labeled_gt == lesiong_id)
# collect indices of lesion candidates that have any overlap with the current GT lesion
overlapping_lesion_candidate_indices = set(np.unique(indexed_pred[gt_lesion_mask]))
overlapping_lesion_candidate_indices -= {0} # remove index 0, if present
# collect lesion candidates for current GT lesion
lesion_candidates_for_target_gt: List[Dict[str, Union[int, float]]] = []
for lesion_candidate_id, lesion_confidence in confidences:
if lesion_candidate_id in overlapping_lesion_candidate_indices:
# calculate overlap between lesion candidate and GT mask
lesion_pred_mask = (indexed_pred == lesion_candidate_id)
overlap_score = overlap_func(lesion_pred_mask, gt_lesion_mask)
# keep track of the highest overlap a lesion candidate has with any GT lesion
lesion_candidates_best_overlap[lesion_candidate_id] = max(
overlap_score, lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
)
# store lesion candidate info for current GT mask
lesion_candidates_for_target_gt.append({
'id': lesion_candidate_id,
'confidence': lesion_confidence,
'overlap': overlap_score,
})
print(lesion_candidates_for_target_gt)
if len(lesion_candidates_for_target_gt) == 0:
# no lesion candidate matched with GT mask. Add FN.
y_list.append((1, 0., 0.))
elif len(lesion_candidates_for_target_gt) == 1:
# single lesion candidate overlapped with GT mask. Add TP if overlap is sufficient, or FN otherwise.
candidate_info = lesion_candidates_for_target_gt[0]
lesion_pred_mask = (indexed_pred == candidate_info['id'])
if candidate_info['overlap'] > min_overlap:
# overlap between lesion candidate and GT mask is sufficient, add TP
indexed_pred[lesion_pred_mask] = 0 # remove lesion candidate after assignment
y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
else:
# overlap between lesion candidate and GT mask is insufficient, add FN
y_list.append((1, 0., 0.))
else:
# multiple predictions for current GT lesion
# sort lesion candidates based on overlap or confidence
key = multiple_lesion_candidates_selection_criteria
lesion_candidates_for_target_gt = sorted(lesion_candidates_for_target_gt, key=lambda x: x[key], reverse=True)
gt_lesion_matched = False
for candidate_info in lesion_candidates_for_target_gt:
lesion_pred_mask = (indexed_pred == candidate_info['id'])
if candidate_info['overlap'] > min_overlap:
indexed_pred[lesion_pred_mask] = 0
y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
gt_lesion_matched = True
break
if not gt_lesion_matched:
# ground truth lesion not matched to a lesion candidate. Add FN.
y_list.append((1, 0., 0.))
# Remaining lesions are FPs
remaining_lesions = set(np.unique(indexed_pred))
remaining_lesions -= {0} # remove index 0, if present
for lesion_candidate_id, lesion_confidence in confidences:
if lesion_candidate_id in remaining_lesions:
overlap_score = lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
if allow_unmatched_candidates_with_minimal_overlap and overlap_score > min_overlap:
# The lesion candidate was not matched to a GT lesion, but did have overlap > min_overlap
# with a GT lesion. The GT lesion is, however, matched to another lesion candidate.
# In this operation mode, this lesion candidate is not considered as a false positive.
pass
else:
y_list.append((0, lesion_confidence, 0.)) # add FP
else:
# for benign case, all predictions are FPs
num_gt_lesions = 0
if len(confidences) > 0:
for _, lesion_confidence in confidences:
y_list.append((0, lesion_confidence, 0.))
else:
y_list.append((0, 0., 0.)) # avoid empty list
return y_list, num_gt_lesions
# Calculate Intersection over Union (IoU) for N-D Arrays
def calculate_iou(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
epsilon = 1e-8
iou_num = np.sum(predictions[labels == 1])
iou_denom = np.sum(predictions) + np.sum(labels) - iou_num
return float((iou_num + epsilon) / (iou_denom + epsilon))
# Calculate Dice Similarity Coefficient (DSC) for N-D Arrays
def calculate_dsc(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
epsilon = 1e-8
dsc_num = np.sum(predictions[labels == 1]) * 2.0
dsc_denom = np.sum(predictions) + np.sum(labels)
return float((dsc_num + epsilon) / (dsc_denom + epsilon))
# Resize images (scans/predictions/labels) by cropping and/or padding [Ref: https://github.com/DLTK/DLTK]
def resize_image_with_crop_or_pad(image, img_size=(64, 64, 64), **kwargs):
assert isinstance(image, np.ndarray)
assert (image.ndim - 1 == len(img_size) or image.ndim == len(img_size)), \
"Target size doesn't fit image size"
rank = len(img_size) # image dimensions
# placeholders for new shape
from_indices = [[0, image.shape[dim]] for dim in range(rank)]
to_padding = [[0, 0] for _ in range(rank)]
slicer = [slice(None)] * rank
# for each dimension, determine process (cropping or padding)
for i in range(rank):
if image.shape[i] < img_size[i]:
to_padding[i][0] = (img_size[i] - image.shape[i]) // 2
to_padding[i][1] = img_size[i] - image.shape[i] - to_padding[i][0]
else:
from_indices[i][0] = int(np.floor((image.shape[i] - img_size[i]) / 2.))
from_indices[i][1] = from_indices[i][0] + img_size[i]
# create slicer object to crop/leave each dimension
slicer[i] = slice(from_indices[i][0], from_indices[i][1])
# pad cropped image to extend missing dimension
return np.pad(image[tuple(slicer)], to_padding, **kwargs)
# Parse Detection Maps to Individual Lesions + Likelihoods
def parse_detection_map(detection_map):
# Label All Non-Connected Components in Detection Map
blobs_index, num_blobs = ndimage.label(detection_map, np.ones((3, 3, 3)))
confidences = []
if num_blobs > 0:
# For Each Lesion Detection
for tumor_index in range(1, num_blobs+1):
# Extract Mask of Current Lesion
# hard_blob = np.zeros_like(blobs_index)
# hard_blob[blobs_index == tumor] = 1
# TODO: replace above with the following? Is faster I think.
# hard_blob = (blobs_index == tumor).astype(int)
# Extract Max Predicted Likelihood for Lesion Detection
# max_prob = np.max(hard_blob) # <- this is always 1
# hard_blob[hard_blob > 0] = max_prob # <- this line does nothing, as hard_blob is not used
# Store Predicted Likelihood per Lesion Detection
max_prob = detection_map[blobs_index == tumor_index].max()
confidences.append((tumor_index, max_prob))
return confidences, blobs_index
############################### CONSTANTS ####################################
# Quitin code removed
############################### CONSTANTS ####################################
def move_dims(arr):
# UMCG numpy dimensions convention: dims = (batch, width, heigth, depth)
# Joeran numpy dimensions convention: dims = (batch, depth, heigth, width)
arr = np.moveaxis(arr, 3, 1) # Joeran has his numpy arrays ordered differently.
arr = np.moveaxis(arr, 3, 2)
return arr
############################### CONSTANTS ####################################
# # Reconstruction model directory
# RECON_DIR = "11_new_recon_multiprocess"
# T2_MODEL_PATH = f"models/prelims/{RECON_DIR}/best-direct-fold0.h5"
# # Path to file with train, validation and test indexes.
# IDX_YAML_PATH = r"data/path_lists/pirads_4plus/train_val_test_idxs.yml"
# # Diagnostic models
# diag_models = [r"models/14_long_diag/best-direct-fold0.h5",
# r"models/15_long_diag_early_stopping/best-direct-fold0.h5"]
# # x_true_val = np.load("temp/x_true_val.npy")
# print("loading numpy arrays")
# y_true_val = np.squeeze(np.load("temp/y_true_val_label.npy"))
# y_pred_val = np.squeeze(np.load("temp/y_pred_val_detection_map.npy"))
# y_true_val = move_dims(y_true_val)
# y_pred_val = move_dims(y_pred_val)
# # Preprocess the blobs individually
# for mri_idx in range(y_pred_val.shape[0]):
# print(f"preprocessing mri idx {mri_idx}")
# y_pred_val[mri_idx] = preprocess_softmax(y_pred_val[mri_idx], threshold="dynamic")[0]
# print(f"Start FROC evaluation.")
# metrics = evaluate(y_true=y_true_val, y_pred=y_pred_val)
# dump_dict_to_yaml(metrics, 'temp', "froc_metrics", verbose=True)

39
code/FROC/data_utils.py Executable file
View File

@@ -0,0 +1,39 @@
# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import json
def save_metrics(metrics, file_path=None):
# convert dtypes to stock Python
save_metrics = sterilize(metrics)
# save metrics using safe file write
file_path_tmp = file_path + '.tmp'
with open(file_path_tmp, 'w') as fp:
json.dump(save_metrics, fp, indent=4)
os.rename(file_path_tmp, file_path)
def sterilize(obj):
if isinstance(obj, dict):
return {k: sterilize(v) for k, v in obj.items()}
elif isinstance(obj, (list, tuple, np.ndarray)):
return [sterilize(v) for v in obj]
elif isinstance(obj, (str, int, bool, float)):
return obj
else:
return obj.__repr__()

505
code/FROC/froc.py Executable file
View File

@@ -0,0 +1,505 @@
# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import os
import numpy as np
from tqdm import tqdm
from scipy import ndimage
from sklearn.metrics import roc_curve, auc
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
import itertools
from typing import List, Tuple, Dict, Any, Union, Optional, Callable, Iterable, Hashable, Sized
try:
import numpy.typing as npt
except ImportError:
pass
from image_utils import (
resize_image_with_crop_or_pad, read_label, read_prediction
)
from analysis_utils import (
parse_detection_map, calculate_iou, calculate_dsc
)
# Compute base prediction metrics TP/FP/FN with associated model confidences
def evaluate_case(
detection_map: "Union[npt.NDArray[np.float32], str]",
label: "Union[npt.NDArray[np.int32], str]",
min_overlap: float = 0.10,
overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU',
multiple_lesion_candidates_selection_criteria: str = 'overlap',
allow_unmatched_candidates_with_minimal_overlap: bool = True
) -> Tuple[List[Tuple[int, float, float]], int]:
"""
Gather the list of lesion candidates, and classify in TP/FP/FN.
- multiple_lesion_candidates_selection_criteria: when multiple lesion candidates have overlap with the same
ground truth mask, use 'overlap' or 'confidence' to choose
which lesion is matched against the ground truth mask.
Returns:
- a list of tuples with:
(is_lesion, prediction confidence, Dice similarity coefficient, number of voxels in label)
- number of ground truth lesions
"""
y_list: List[Tuple[int, float, float]] = []
if isinstance(label, str):
label = read_label(label)
if isinstance(detection_map, str):
detection_map = read_prediction(detection_map)
if overlap_func == 'IoU':
overlap_func = calculate_iou
elif overlap_func == 'DSC':
overlap_func = calculate_dsc
else:
raise ValueError(f"Overlap function with name {overlap_func} not recognized. Supported are 'IoU' and 'DSC'")
# convert dtype to float32
label = label.astype('int32')
detection_map = detection_map.astype('float32')
if detection_map.shape[0] < label.shape[0]:
print("Warning: padding prediction to match label!")
detection_map = resize_image_with_crop_or_pad(detection_map, label.shape)
confidences, indexed_pred = parse_detection_map(detection_map)
lesion_candidates_best_overlap: Dict[str, float] = {}
# note to stefan: check wether the if statements are correct and that the append goes correct
if label.any():
# for each malignant scan
labeled_gt, num_gt_lesions = ndimage.label(label, np.ones((3, 3, 3)))
# print("test3, werkt if label.any", num_gt_lesions) WE
for lesiong_id in range(1, num_gt_lesions+1):
# for each lesion in ground-truth (GT) label
gt_lesion_mask = (labeled_gt == lesiong_id)
# collect indices of lesion candidates that have any overlap with the current GT lesion
overlapping_lesion_candidate_indices = set(np.unique(indexed_pred[gt_lesion_mask]))
overlapping_lesion_candidate_indices -= {0} # remove index 0, if present
# collect lesion candidates for current GT lesion
lesion_candidates_for_target_gt: List[Dict[str, Union[int, float]]] = []
for lesion_candidate_id, lesion_confidence in confidences:
if lesion_candidate_id in overlapping_lesion_candidate_indices:
# calculate overlap between lesion candidate and GT mask
lesion_pred_mask = (indexed_pred == lesion_candidate_id)
overlap_score = overlap_func(lesion_pred_mask, gt_lesion_mask)
# keep track of the highest overlap a lesion candidate has with any GT lesion
lesion_candidates_best_overlap[lesion_candidate_id] = max(
overlap_score, lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
)
# store lesion candidate info for current GT mask
lesion_candidates_for_target_gt.append({
'id': lesion_candidate_id,
'confidence': lesion_confidence,
'overlap': overlap_score,
})
print("test 4, lesion_candidates_for_target_gt:",lesion_candidates_for_target_gt)
# Min overlap wordt niet behaald: +- 0.001
if len(lesion_candidates_for_target_gt) == 0:
# no lesion candidate matched with GT mask. Add FN.
y_list.append((1, 0., 0.))
elif len(lesion_candidates_for_target_gt) == 1:
# single lesion candidate overlapped with GT mask. Add TP if overlap is sufficient, or FN otherwise.
candidate_info = lesion_candidates_for_target_gt[0]
lesion_pred_mask = (indexed_pred == candidate_info['id'])
if candidate_info['overlap'] > min_overlap:
# overlap between lesion candidate and GT mask is sufficient, add TP
indexed_pred[lesion_pred_mask] = 0 # remove lesion candidate after assignment
y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
else:
# overlap between lesion candidate and GT mask is insufficient, add FN
y_list.append((1, 0., 0.))
else:
# multiple predictions for current GT lesion
# sort lesion candidates based on overlap or confidence
key = multiple_lesion_candidates_selection_criteria
lesion_candidates_for_target_gt = sorted(lesion_candidates_for_target_gt, key=lambda x: x[key], reverse=True)
gt_lesion_matched = False
for candidate_info in lesion_candidates_for_target_gt:
lesion_pred_mask = (indexed_pred == candidate_info['id'])
if candidate_info['overlap'] > min_overlap:
indexed_pred[lesion_pred_mask] = 0
y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
gt_lesion_matched = True
break
if not gt_lesion_matched:
# ground truth lesion not matched to a lesion candidate. Add FN.
y_list.append((1, 0., 0.))
# Remaining lesions are FPs
remaining_lesions = set(np.unique(indexed_pred))
remaining_lesions -= {0} # remove index 0, if present
for lesion_candidate_id, lesion_confidence in confidences:
if lesion_candidate_id in remaining_lesions:
overlap_score = lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
if allow_unmatched_candidates_with_minimal_overlap and overlap_score > min_overlap:
# The lesion candidate was not matched to a GT lesion, but did have overlap > min_overlap
# with a GT lesion. The GT lesion is, however, matched to another lesion candidate.
# In this operation mode, this lesion candidate is not considered as a false positive.
pass
else:
y_list.append((0, lesion_confidence, 0.)) # add FP
# print("test 4, gaat alles hiernaartoe?") == JA
# print("test 3, hoe ziet y_list eruit na labels",y_list)
else:
# for benign case, all predictions are FPs
num_gt_lesions = 0
if len(confidences) > 0:
for _, lesion_confidence in confidences:
y_list.append((0, lesion_confidence, 0.))
else:
y_list.append((0, 0., 0.)) # avoid empty list
return y_list, num_gt_lesions
# Calculate macro metrics (true positives (TP), false positives (FP))
def counts_from_lesion_evaluations(
y_list: List[Tuple[int, float, float]],
thresholds: "Optional[npt.NDArray[np.float64]]" = None
) -> "Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32], npt.NDArray[np.float64], int]":
"""
Calculate true positives (TP) and false positives (FP) as function of threshold,
based on the case evaluations from `evaluate_case`.
"""
# sort predictions
# print("test 4, zitten er predicties bij leasions voor sort?",y_list)
y_list.sort()
# print("test 4, zitten er predicties bij leasions na sort?",y_list,'len y_lsit:',len(y_list))
# collect targets and predictions
y_true: "npt.NDArray[np.float64]" = np.array([target for target, *_ in y_list])
y_pred: "npt.NDArray[np.float64]" = np.array([pred for _, pred, *_ in y_list])
# print("test,zijn de laatste y-pred hoog?", y_pred)
# calculate number of lesions
num_lesions = y_true.sum()
if thresholds is None:
# collect thresholds for FROC analysis
thresholds = np.unique(y_pred)
thresholds[::-1].sort() # sort thresholds in descending order (inplace)
# for >10,000 thresholds: resample to 10,000 unique thresholds, while also
# keeping all thresholds higher than 0.8 and the first 20 thresholds
if len(thresholds) > 10_000:
rng = np.arange(1, len(thresholds), len(thresholds)/10_000, dtype=np.int32)
st = [thresholds[i] for i in rng]
low_thresholds = thresholds[-20:]
thresholds = np.array([t for t in thresholds if t > 0.8 or t in st or t in low_thresholds])
# define placeholders
FP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
TP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
# for each threshold: count FPs and calculate the sensitivity
for i, th in enumerate(thresholds):
if th > 0:
y_pred_thresholded = (y_pred >= th).astype(int)
tp = np.sum(y_true*y_pred_thresholded)
fp = np.sum(y_pred_thresholded - y_true*y_pred_thresholded)
# print("test, is y_pred_thresholded altijd 0?",y_pred_thresholded)
# update FROC with new point
FP[i] = fp
TP[i] = tp
else:
# extend FROC curve to infinity
TP[i] = TP[-2]
FP[i] = np.inf
# print("test if tp werkt",TP)
# print("test if fp werkt",FP)
return TP, FP, thresholds, num_lesions
# Calculate FROC metrics (FP rate, sensitivity)
def froc_from_lesion_evaluations(y_list, num_patients, thresholds=None):
# calculate counts
TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
y_list=y_list, thresholds=thresholds
)
# calculate FROC metrics from counts
sensitivity = TP / num_lesions if num_lesions > 0 else np.nan
# print('test,Hieronder staat de tp waarde:',TP)
# print('test,Hieronder staat de num_lesions waarde:',num_lesions)
FP_per_case = FP / num_patients
return sensitivity, FP_per_case, thresholds, num_lesions
def ap_from_lesion_evaluations(y_list, thresholds=None):
# calculate counts
TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
y_list=y_list, thresholds=thresholds
)
# calculate precision (lesion-level)
precision = TP / (TP + FP)
precision = np.append(precision, [0])
# calculate recall (lesion-level)
FN = num_lesions - TP
recall = TP / (TP + FN)
recall = np.append(recall, recall[-1:])
# calculate average precission (lesion-level)
AP = np.trapz(y=precision, x=recall)
return AP, precision, recall, thresholds
# Compute full FROC
def froc(
y_det: "Iterable[Union[npt.NDArray[np.float64], str, Path]]",
y_true: "Iterable[Union[npt.NDArray[np.float64], str, Path]]",
subject_list: Optional[Iterable[Hashable]] = None,
min_overlap=0.10,
overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU',
case_confidence: str = 'max',
multiple_lesion_candidates_selection_criteria='overlap',
allow_unmatched_candidates_with_minimal_overlap=True,
flat: Optional[bool] = None,
num_parallel_calls: int = 8,
verbose: int = 0,
) -> Dict[str, Any]:
"""
FROC evaluation pipeline
(written 19 January 2022 by Joeran Bosma)
Usage:
For normal usage of the FROC evaluation pipeline, use the function `froc` with parameters
`y_det`, `y_true` and (optional) `subject_list`. Please note that this function is written
for binary 3D FROC analysis.
- `y_det`: iterable of all detection_map volumes to evaluate. Alternatively, y_det may contain
filenames ending in .nii.gz/.mha/.mhd/.npy/.npz, which will be loaded on-the-fly.
Provide an array of shape `(num_samples, D, H, W)`, where D, H, W are the spatial
dimensions (depth, height and width).
- `y_true`: iterable of all ground truth labels. Alternatively, y_det may contain filenames
ending in .nii.gz/.mha/.mhd/.npy/.npz, which should contain binary labels and
will be loaded on-the-fly. Provide an array of the same shape as `y_det`. Use
`1` to encode ground truth lesion, and `0` to encode background.
Additional settings:
For more control over the FROC evaluation pipeline, use:
- `min_overlap`: defines the minimal required Intersection over Union (IoU) or Dice similarity
coefficient (DSC) between a lesion candidate and ground truth lesion, to be
counted as a true positive detection.
"""
# Initialize Lists
roc_true = {}
roc_pred = {}
y_list = []
num_lesions = 0
if subject_list is None:
# generate indices to keep track of each case during multiprocessing
subject_list = itertools.count()
if flat is None:
flat = True
with ThreadPoolExecutor(max_workers=num_parallel_calls) as pool:
# define the functions that need to be processed: compute_pred_vector, with each individual
# detection_map prediction, ground truth label and parameters
future_to_args = {
pool.submit(evaluate_case, y_pred, y_true, min_overlap=min_overlap, overlap_func=overlap_func,
multiple_lesion_candidates_selection_criteria=multiple_lesion_candidates_selection_criteria,
allow_unmatched_candidates_with_minimal_overlap=allow_unmatched_candidates_with_minimal_overlap): idx
for (y_pred, y_true, idx) in zip(y_det, y_true, subject_list)
}
# process the cases in parallel
iterator = concurrent.futures.as_completed(future_to_args)
if verbose:
total: Optional[int] = None
if isinstance(subject_list, Sized):
total = len(subject_list)
iterator = tqdm(iterator, desc='Computing FROC', total=total)
for future in iterator:
try:
res = future.result()
except Exception as e:
print(f"Exception: {e}")
else:
# unpack results
y_list_pat, num_lesions_gt = res
# note: y_list_pat contains: is_lesion, confidence[, Dice, gt num voxels]
# print("test 3,", y_list_pat)
# aggregate results
idx = future_to_args[future]
# print("test2, indx", idx)
# test: allemaal ingelezen
roc_true[idx] = np.max([a[0] for a in y_list_pat])
# print("test2, roc_true",roc_true)
if case_confidence == 'max':
# take highest lesion confidence as case-level confidence
roc_pred[idx] = np.max([a[1] for a in y_list_pat])
elif case_confidence == 'bayesian':
# if a_i is the probability the i-th lesion is csPCa, then the case-level
# probability to have any csPCa lesion is 1 - Π_i{ 1 - a_i}
roc_pred[idx] = 1 - np.prod([(1-a[1]) for a in y_list_pat])
else:
raise ValueError(f"Patient confidence calculation method not recognised. Got: {case_confidence}.")
# accumulate outputs
y_list += y_list_pat
num_lesions += num_lesions_gt
# print("test2,heeft y-list ook leasie pred:",y_list)
# calculate statistics
num_patients = len(roc_true)
# get lesion-level results
sensitivity, FP_per_case, thresholds, num_lesions = froc_from_lesion_evaluations(
y_list=y_list, num_patients=num_patients
)
# calculate recall, precision and average precision
AP, precision, recall, _ = ap_from_lesion_evaluations(y_list, thresholds=thresholds)
# calculate case-level AUROC
fpr, tpr, _ = roc_curve(y_true=[roc_true[s] for s in subject_list],
y_score=[roc_pred[s] for s in subject_list],
pos_label=1)
auc_score = auc(fpr, tpr)
if flat:
# flatten roc_true and roc_pred
roc_true_flat = [roc_true[s] for s in subject_list]
roc_pred_flat = [roc_pred[s] for s in subject_list]
metrics = {
"FP_per_case": FP_per_case,
"sensitivity": sensitivity,
"thresholds": thresholds,
"num_lesions": num_lesions,
"num_patients": num_patients,
"roc_true": (roc_true_flat if flat else roc_true),
"roc_pred": (roc_pred_flat if flat else roc_pred),
"AP": AP,
"precision": precision,
"recall": recall,
# patient-level predictions
'auroc': auc_score,
'tpr': tpr,
'fpr': fpr,
}
return metrics
def froc_for_folder(
y_det_dir: Union[Path, str],
y_true_dir: Optional[Union[Path, str]] = None,
subject_list: Optional[List[str]] = None,
min_overlap: float = 0.10,
overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU',
case_confidence: str = 'max',
flat: Optional[bool] = None,
num_parallel_calls: int = 8,
verbose: int = 1
) -> Dict[str, Any]:
"""
Perform FROC evaluation for all samples found in y_det_dir, or the samples specified in the subject_list
Input:
- y_det_dir: path to folder containing the detection maps
- y_true_dir: (optioinal) allow labels to be stored in a different folder
- min_overlap: minimum overlap threshold
- overlap_func: intersection over union (IoU), Dice similarity coefficient (DSC), or custom function
"""
if y_true_dir is None:
y_true_dir = y_det_dir
y_det = []
y_true = []
if subject_list:
# collect the detection maps and labels for each case specified in the subject list
for subject_id in subject_list:
# construct paths to detection maps and labels
# print(np.type(subject_id))
# print(subject_list)
for postfix in [
"_detection_map.nii.gz", "_detection_map.npy", "_detection_map.npz",
".nii.gz", ".npy", ".npz", "_pred.nii.gz",
]:
detection_path = os.path.join(y_det_dir, f"{subject_id}{postfix}")
if os.path.exists(detection_path):
break
for postfix in [
"_label.nii.gz", "label.npy", "label.npz", "_seg.nii.gz",
]:
label_path = os.path.join(y_true_dir, f"{subject_id}{postfix}")
if os.path.exists(label_path):
break
if not os.path.exists(label_path):
assert y_true_dir != y_det_dir, f"Could not find label for {subject_id}!"
for postfix in [
".nii.gz", ".npy", ".npz",
]:
label_path = os.path.join(y_true_dir, f"{subject_id}{postfix}")
if os.path.exists(label_path):
break
# collect file paths
y_det += [detection_path]
y_true += [label_path]
else:
# collect all detection maps found in detection_map_dir
file_list = sorted(os.listdir(y_det_dir))
subject_list = []
if verbose >= 1:
print(f"Found {len(file_list)} files in the input directory, collecting detection_mapes with " +
"_detection_map.nii.gz and labels with _label.nii.gz..")
# collect filenames of detection_map predictions and labels
for fn in file_list:
if '_detection_map' in fn:
y_det += [os.path.join(y_det_dir, fn)]
y_true += [os.path.join(y_true_dir, fn.replace('_detection_map', '_label'))]
subject_list += [fn]
# ensure files exist
for detection_path in y_det:
assert os.path.exists(detection_path), f"Could not find detection map for {subject_id} at {detection_path}!"
for label_path in y_true:
assert os.path.exists(label_path), f"Could not find label for {subject_id} at {label_path}!"
if verbose >= 1:
print(f"Found prediction and label for {len(y_det)} cases. Here are some examples:")
print(subject_list[0:5])
# perform FROC evaluation with compiled file lists
return froc(y_det=y_det, y_true=y_true, subject_list=subject_list,
min_overlap=min_overlap, overlap_func=overlap_func, case_confidence=case_confidence,
flat=flat, num_parallel_calls=num_parallel_calls, verbose=verbose)

83
code/FROC/image_utils.py Executable file
View File

@@ -0,0 +1,83 @@
# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import SimpleITK as sitk
from pathlib import Path
from typing import Union
try:
import numpy.typing as npt
except ImportError:
pass
# Resize images (scans/predictions/labels) by cropping and/or padding [Ref: https://github.com/DLTK/DLTK]
def resize_image_with_crop_or_pad(image, img_size=(64, 64, 64), **kwargs):
assert isinstance(image, np.ndarray)
assert (image.ndim - 1 == len(img_size) or image.ndim == len(img_size)), \
"Target size doesn't fit image size"
rank = len(img_size) # image dimensions
# placeholders for new shape
from_indices = [[0, image.shape[dim]] for dim in range(rank)]
to_padding = [[0, 0] for _ in range(rank)]
slicer = [slice(None)] * rank
# for each dimension, determine process (cropping or padding)
for i in range(rank):
if image.shape[i] < img_size[i]:
to_padding[i][0] = (img_size[i] - image.shape[i]) // 2
to_padding[i][1] = img_size[i] - image.shape[i] - to_padding[i][0]
else:
from_indices[i][0] = int(np.floor((image.shape[i] - img_size[i]) / 2.))
from_indices[i][1] = from_indices[i][0] + img_size[i]
# create slicer object to crop/leave each dimension
slicer[i] = slice(from_indices[i][0], from_indices[i][1])
# pad cropped image to extend missing dimension
return np.pad(image[tuple(slicer)], to_padding, **kwargs)
def read_image(path: Union[Path, str]):
"""Read image, given a filepath"""
if isinstance(path, Path):
path = path.as_posix()
else:
assert isinstance(path, str), f"Unexpected path type: {type(path)}. Please provide a Path or str."
if '.npy' in path:
return np.load(path)
elif '.nii' in path or '.mha' in path or 'mhd' in path:
return sitk.GetArrayFromImage(sitk.ReadImage(path))
elif '.npz' in path:
return np.load(path)['softmax'].astype('float32')[1] # nnUnet format
else:
raise ValueError(f"Unexpected file path. Supported file formats: .nii(.gz), .mha, .npy and .npz. Got: {path}.")
def read_prediction(path: Union[Path, str]) -> "npt.NDArray[np.float32]":
"""Read prediction, given a filepath"""
# read prediction and ensure correct dtype
pred: "npt.NDArray[np.float32]" = np.array(read_image(path), dtype=np.float32)
return pred
def read_label(path: Union[Path, str]) -> "npt.NDArray[np.int32]":
"""Read label, given a filepath"""
# read label and ensure correct dtype
lbl: "npt.NDArray[np.int32]" = np.array(read_image(path), dtype=np.int32)
return lbl