commit before migration to habrok
This commit is contained in:
parent
9468dadfa3
commit
ad595bb25e
|
@ -139,7 +139,6 @@ dmypy.json
|
|||
cython_debug/
|
||||
|
||||
/build/
|
||||
/code/
|
||||
/old_code/
|
||||
/data/
|
||||
/job_scripts/
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
print('verkeerde code')
|
||||
from .batchgenerator import *
|
||||
from .callbacks import *
|
||||
from .helpers import *
|
||||
from .preprocessing_function import *
|
||||
from .unet import *
|
|
@ -0,0 +1,64 @@
|
|||
import math
|
||||
from typing import Callable, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
from tensorflow.keras.utils import Sequence
|
||||
|
||||
class BatchGenerator(Sequence):
|
||||
def __init__(self,
|
||||
images: Dict[str, List[np.ndarray]],
|
||||
segmentations: List[np.ndarray],
|
||||
sequences: List[str],
|
||||
shape: Tuple[int],
|
||||
indexes: List[int] = None,
|
||||
batch_size: int = 5,
|
||||
shuffle: bool = False,
|
||||
augmentation_function: Optional[Callable\
|
||||
[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]] = None):
|
||||
|
||||
self.images = images
|
||||
self.segmentations = segmentations
|
||||
self.sequences = sequences
|
||||
self.indexes = indexes
|
||||
self.batch_size = batch_size
|
||||
self.shuffle = shuffle
|
||||
self.augmentation = augmentation_function
|
||||
|
||||
self.num_rows = len(images)
|
||||
if self.indexes == None:
|
||||
self.indexes = list(range(self.num_rows))
|
||||
|
||||
if type(indexes) == int:
|
||||
self.indexes = list(range(self.indexes))
|
||||
|
||||
if self.batch_size == None:
|
||||
self.batch_size = len(self.indexes)
|
||||
|
||||
# Stack image index as index 0, stack sequences as index -1
|
||||
self._images = np.stack(
|
||||
[np.stack(self.images[k], axis=0) for k in sequences], axis=-1)
|
||||
self._segmentations = np.stack(segmentations, axis=0)[..., np.newaxis]
|
||||
|
||||
def __len__(self):
|
||||
return math.ceil(len(self.indexes) / self.batch_size)
|
||||
|
||||
def __getitem__(self, batch_idx):
|
||||
# Get image / segmentation indexes for current batch
|
||||
indexes = self.indexes[batch_idx*self.batch_size:(1+batch_idx)*self.batch_size]
|
||||
|
||||
# Get matching images and segmentations
|
||||
images = self._images[indexes]
|
||||
segmentations = self._segmentations[indexes]
|
||||
|
||||
# Apply data augmentation if a function is provided
|
||||
if self.augmentation:
|
||||
for img_idx in range(len(indexes)):
|
||||
images[img_idx], segmentations[img_idx] = self.augmentation(
|
||||
images[img_idx], segmentations[img_idx])
|
||||
return images, segmentations
|
||||
|
||||
def on_epoch_end(self):
|
||||
super().on_epoch_end()
|
||||
|
||||
if self.shuffle:
|
||||
np.random.shuffle(self.indexes)
|
|
@ -0,0 +1,96 @@
|
|||
import numpy as np
|
||||
import SimpleITK as sitk
|
||||
from helpers import *
|
||||
import tensorflow.keras.backend as K
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.callbacks import Callback
|
||||
from sklearn.metrics import roc_auc_score, roc_curve
|
||||
|
||||
def dice_coef(y_true, y_pred):
|
||||
y_true_f = K.flatten(y_true)
|
||||
y_pred_f = K.round(K.flatten(y_pred))
|
||||
intersection = K.sum(y_true_f * y_pred_f)
|
||||
return (2. * intersection + K.epsilon()) / (
|
||||
K.sum(y_true_f) + K.sum(y_pred_f) + K.epsilon())
|
||||
|
||||
|
||||
# def auc_value(y_true, y_pred):
|
||||
# # print_("start ROC_AUC")
|
||||
# # y_true_old = self.validation_set[1].squeeze()
|
||||
# # y_pred_old = np.around(self.model.predict(self.validation_set[0],batch_size=1).squeeze())
|
||||
|
||||
# y_true_auc = K.flatten(y_true)
|
||||
# # print_(f"The shape of y_true = {np.shape(y_true)}" )
|
||||
|
||||
# y_pred_auc = K.round(K.flatten(y_pred))
|
||||
# # print_(f"The shape of y_pred = {np.shape(y_pred)}" )
|
||||
|
||||
# # fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1)
|
||||
# auc = roc_auc_score(y_true_auc, y_pred_auc)
|
||||
# print_('AUC:', auc)
|
||||
# print_('AUC shape:', np.shape(auc))
|
||||
# # print_(f'ROC: fpr={fpr} , tpr={tpr}, thresholds={thresholds}')
|
||||
# return auc
|
||||
|
||||
|
||||
class IntermediateImages(Callback):
|
||||
def __init__(self, validation_set, prefix, sequences,
|
||||
num_images=10):
|
||||
self.prefix = prefix
|
||||
self.num_images = num_images
|
||||
self.validation_set = (
|
||||
validation_set[0][:num_images, ...],
|
||||
validation_set[1][:num_images, ...]
|
||||
)
|
||||
|
||||
# Export scan crops and targets once
|
||||
# they don't change during training so we export them only once
|
||||
for i in range(min(self.num_images, self.validation_set[0].shape[0])):
|
||||
for s_idx, s in enumerate(sequences):
|
||||
img_s = sitk.GetImageFromArray(
|
||||
self.validation_set[0][i][..., s_idx].squeeze().T)
|
||||
sitk.WriteImage(img_s, f"{prefix}_{i:03d}_{s}.nii.gz")
|
||||
seg_s = sitk.GetImageFromArray(
|
||||
self.validation_set[1][i].squeeze().T)
|
||||
sitk.WriteImage(seg_s, f"{prefix}_{i:03d}_seg.nii.gz")
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
# Predict on the validation_set
|
||||
predictions = self.model.predict(self.validation_set, batch_size=1)
|
||||
|
||||
# print_("start ROC_AUC")
|
||||
# y_true_old = self.validation_set[1].squeeze()
|
||||
# y_pred_old = np.around(self.model.predict(self.validation_set[0],batch_size=1).squeeze())
|
||||
|
||||
# y_true = np.array(y_true_old).flatten()
|
||||
# print_(f"The shape of y_true = {np.shape(y_true)}" )
|
||||
|
||||
# y_pred = np.array(y_pred_old).flatten()
|
||||
# print_(f"The shape of y_pred = {np.shape(y_pred)}" )
|
||||
|
||||
# fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1)
|
||||
# auc = roc_auc_score(y_true, y_pred)
|
||||
# print_('AUC:', auc)
|
||||
# print_(f'ROC: fpr={fpr} , tpr={tpr}, thresholds={thresholds}')
|
||||
|
||||
for i in range(min(self.num_images, self.validation_set[0].shape[0])):
|
||||
prd_s = sitk.GetImageFromArray(predictions[i].squeeze().T)
|
||||
prd_bin_s = sitk.GetImageFromArray(
|
||||
np.around(predictions[i]).astype(np.float32).squeeze().T)
|
||||
sitk.WriteImage(prd_s, f"{self.prefix}_{i:03d}_pred.nii.gz")
|
||||
sitk.WriteImage(prd_bin_s, f"{self.prefix}_{i:03d}_pred_bin.nii.gz")
|
||||
|
||||
# class RocCallback(Callback):
|
||||
# def __init__(self,validation_data):
|
||||
# # self.x = training_data[0]
|
||||
# # self.y = training_data[1]
|
||||
# self.x_val = validation_data[0]
|
||||
# self.y_val = validation_data[1]
|
||||
|
||||
# def on_epoch_end(self, epoch, logs={}):
|
||||
# # y_pred_train = self.model.predict_proba(self.x)
|
||||
# # roc_train = roc_auc_score(self.y, y_pred_train)
|
||||
# y_pred_val = self.model.predict(self.x_val, batch_size=1)
|
||||
# roc_val = roc_auc_score(self.y_val, y_pred_val)
|
||||
# print('\rroc-auc_train: %s - roc-auc_val: %s' % (str(round(roc_val,4))),end=100*' '+'\n')
|
||||
# return
|
|
@ -0,0 +1,239 @@
|
|||
import os
|
||||
from os import path
|
||||
|
||||
import numpy as np
|
||||
import SimpleITK as sitk
|
||||
from scipy import ndimage
|
||||
import yaml
|
||||
|
||||
def print_(*args, **kwargs):
|
||||
print(*args, **kwargs, flush=True)
|
||||
|
||||
def print_config(config, depth=0):
|
||||
for k in config:
|
||||
print(end=(" "*depth*2) + f"- {k}:")
|
||||
if type(config[k]) == dict:
|
||||
print()
|
||||
print_config(config[k], depth+1)
|
||||
else:
|
||||
print(f"\t{config[k]}")
|
||||
print_(end="")
|
||||
|
||||
def prepare_project_dir(project_dir):
|
||||
"""
|
||||
Prepares the work directory for a project
|
||||
TODO: Create text file containing configuration
|
||||
"""
|
||||
def _makedir(n):
|
||||
os.makedirs(path.join(project_dir, n), exist_ok=True)
|
||||
|
||||
_makedir("models")
|
||||
_makedir("logs")
|
||||
_makedir("data")
|
||||
_makedir("docs")
|
||||
_makedir("samples")
|
||||
_makedir("output")
|
||||
|
||||
|
||||
|
||||
|
||||
def fake_loss(*args, **kwargs):
|
||||
return 0.
|
||||
|
||||
def center_crop(
|
||||
image: sitk.Image, shape: list, offset: list = None
|
||||
) -> sitk.Image:
|
||||
"""Extracts a region of interest from the center of an SITK image.
|
||||
|
||||
Parameters:
|
||||
image: Input image (SITK).
|
||||
shape: The shape of the
|
||||
|
||||
Returns: Cropped image (SITK)
|
||||
"""
|
||||
|
||||
size = image.GetSize()
|
||||
|
||||
# Determine the centroid
|
||||
centroid = [sz / 2 for sz in size]
|
||||
|
||||
# Determine the origin for the bounding box by subtracting half the
|
||||
# shape of the bounding box from each dimension of the centroid.
|
||||
box_start = [int(c - sh / 2) for c, sh in zip(centroid, shape)]
|
||||
|
||||
if offset:
|
||||
box_start = [b - o for b, o in zip(box_start, offset)]
|
||||
|
||||
# Extract the region of provided shape starting from the previously
|
||||
# determined starting pixel.
|
||||
region_extractor = sitk.RegionOfInterestImageFilter()
|
||||
region_extractor.SetSize(shape)
|
||||
region_extractor.SetIndex(box_start)
|
||||
cropped_image = region_extractor.Execute(image)
|
||||
|
||||
return cropped_image
|
||||
|
||||
def augment(img, seg,
|
||||
noise_chance = 0.3,
|
||||
noise_mult_max = 0.001,
|
||||
rotate_chance = 0.2,
|
||||
rotate_max_angle = 30,
|
||||
):
|
||||
#noise_mult_max was initially 0.1
|
||||
if np.random.uniform() < noise_chance:
|
||||
img = augment_noise(img, np.random.uniform(0., noise_mult_max))
|
||||
|
||||
if np.random.uniform() < rotate_chance:
|
||||
img, seg = augment_rotate(img, seg,
|
||||
angle=np.random.uniform(0-rotate_max_angle, rotate_max_angle))
|
||||
img, seg = augment_tilt(img, seg,
|
||||
angle=np.random.uniform(0-rotate_max_angle, rotate_max_angle))
|
||||
|
||||
return img, seg
|
||||
|
||||
def augment_noise(img, multiplier):
|
||||
noise = np.random.standard_normal(img.shape) * multiplier
|
||||
return img + noise
|
||||
|
||||
def augment_rotate(img, seg, angle):
|
||||
img = ndimage.rotate(img, angle, reshape=False, cval=0)
|
||||
seg = ndimage.rotate(seg, angle, reshape=False, order=0)
|
||||
return img, seg
|
||||
|
||||
def augment_tilt(img, seg, angle):
|
||||
img = ndimage.rotate(img, angle, reshape=False, axes=(2,1), cval=0)
|
||||
seg = ndimage.rotate(seg, angle, reshape=False, axes=(2,1), order=0)
|
||||
return img, seg
|
||||
|
||||
def resample(
|
||||
image: sitk.Image, min_shape: list, method=sitk.sitkLinear,
|
||||
new_spacing: list=[1, 1, 3.6]
|
||||
) -> sitk.Image:
|
||||
"""Resamples an image to given target spacing and shape.
|
||||
|
||||
Parameters:
|
||||
image: Input image (SITK).
|
||||
shape: Minimum output shape for the underlying array.
|
||||
method: SimpleITK interpolator to use for resampling.
|
||||
(e.g. sitk.sitkNearestNeighbor, sitk.sitkLinear)
|
||||
new_spacing: The new spacing to resample to.
|
||||
|
||||
Returns:
|
||||
int: Resampled image
|
||||
|
||||
"""
|
||||
|
||||
# Extract size and spacing from the image
|
||||
size = image.GetSize()
|
||||
spacing = image.GetSpacing()
|
||||
|
||||
# Determine how much larger the image will become with the new spacing
|
||||
factor = [sp / new_sp for sp, new_sp in zip(spacing, new_spacing)]
|
||||
|
||||
# Determine the outcome size of the image for each dimension
|
||||
get_size = lambda size, factor, min_shape: max(int(size * factor), min_shape)
|
||||
new_size = [get_size(sz, f, sh) for sz, f, sh in zip(size, factor, min_shape)]
|
||||
|
||||
# Resample the image
|
||||
resampler = sitk.ResampleImageFilter()
|
||||
resampler.SetReferenceImage(image)
|
||||
resampler.SetOutputSpacing(new_spacing)
|
||||
resampler.SetSize(new_size)
|
||||
resampler.SetInterpolator(method)
|
||||
resampled_image = resampler.Execute(image)
|
||||
|
||||
return resampled_image
|
||||
|
||||
# Start training
|
||||
def get_generator(
|
||||
images: dict,
|
||||
segmentations: list,
|
||||
sequences: list,
|
||||
shape: tuple,
|
||||
indexes: list = None,
|
||||
batch_size: int = 5,
|
||||
shuffle: bool = False,
|
||||
augmentation = True
|
||||
):
|
||||
"""
|
||||
Returns a (training) generator for use with model.fit().
|
||||
|
||||
Parameters:
|
||||
input_modalities: List of modalty names to include.
|
||||
output_modalities: Names of the target modalities.
|
||||
batch_size: Number of images per batch (default: all).
|
||||
indexes: Only use the specified image indexes.
|
||||
shuffle: Shuffle the lists of indexes once at the beginning.
|
||||
augmentation: Apply augmentation or not (bool).
|
||||
"""
|
||||
|
||||
num_rows = len(images)
|
||||
|
||||
if indexes == None:
|
||||
indexes = list(range(num_rows))
|
||||
|
||||
if type(indexes) == int:
|
||||
indexes = list(range(indexes))
|
||||
|
||||
if batch_size == None:
|
||||
batch_size = len(indexes)
|
||||
|
||||
idx = 0
|
||||
|
||||
# Prepare empty batch placeholder with named inputs and outputs
|
||||
input_batch = np.zeros((batch_size,) + shape + (len(images),))
|
||||
output_batch = np.zeros((batch_size,) + shape + (1,))
|
||||
|
||||
# Loop infinitely to keep generating batches
|
||||
while True:
|
||||
# Prepare each observation in a batch
|
||||
for batch_idx in range(batch_size):
|
||||
# Shuffle the order of images if all indexes have been seen
|
||||
if idx == 0 and shuffle:
|
||||
np.random.shuffle(indexes)
|
||||
current_index = indexes[idx]
|
||||
|
||||
# Insert the augmented images into the input batch
|
||||
img_crop = [images[s][current_index] for s in sequences]
|
||||
img_crop = np.stack(img_crop, axis=-1)
|
||||
seg_crop = segmentations[current_index][..., np.newaxis]
|
||||
if augmentation:
|
||||
img_crop, seg_crop = augment(img_crop, seg_crop)
|
||||
input_batch[batch_idx] = img_crop
|
||||
output_batch[batch_idx] = seg_crop
|
||||
|
||||
# Increase the current index and modulo by the number of rows
|
||||
# so that we stay within bounds
|
||||
idx = (idx + 1) % len(indexes)
|
||||
|
||||
yield input_batch, output_batch
|
||||
|
||||
def resample_to_reference(image, ref_img,
|
||||
interpolator=sitk.sitkNearestNeighbor,
|
||||
default_pixel_value=0):
|
||||
resampled_img = sitk.Resample(image, ref_img,
|
||||
sitk.Transform(),
|
||||
interpolator, default_pixel_value,
|
||||
ref_img.GetPixelID())
|
||||
return resampled_img
|
||||
|
||||
def dump_dict_to_yaml(
|
||||
data: dict,
|
||||
target_dir: str,
|
||||
filename: str = "settings") -> None:
|
||||
""" Writes the given dictionary as a yaml to the target directory.
|
||||
|
||||
Parameters:
|
||||
`data (dict)`: dictionary of data.
|
||||
`target_dir (str)`: directory where the yaml will be saved.
|
||||
`` filename (str)`: name of the file without extension
|
||||
"""
|
||||
print("\nParameters")
|
||||
for pair in data.items():
|
||||
print(f"\t{pair}")
|
||||
print()
|
||||
|
||||
path = f"{target_dir}/{filename}.yml"
|
||||
print_(f"Wrote yaml to: {path}")
|
||||
with open(path, 'w') as outfile:
|
||||
yaml.dump(data, outfile, default_flow_style=False)
|
|
@ -0,0 +1,46 @@
|
|||
import numpy as np
|
||||
import SimpleITK as sitk
|
||||
|
||||
from helpers import *
|
||||
|
||||
def preprocess(imgs: dict, seg: sitk.Image,
|
||||
shape: tuple, spacing: tuple, to_numpy=True) -> tuple:
|
||||
|
||||
# Resample all of the images to the desired voxel spacing
|
||||
img_r = {k: resample(imgs[k],
|
||||
min_shape=(s+1 for s in shape),
|
||||
new_spacing=spacing) for k in imgs}
|
||||
seg_r = resample(seg,
|
||||
min_shape=(s+1 for s in shape),
|
||||
new_spacing=spacing,
|
||||
method=sitk.sitkNearestNeighbor)
|
||||
|
||||
# Center crop one of the input images
|
||||
ref_seq = [k for k in img_r.keys()][0]
|
||||
ref_img = center_crop(img_r[ref_seq], shape=shape)
|
||||
|
||||
# Then crop the remaining series / segmentation to match the input crop by
|
||||
# transforming them on the physical space of the cropped series.
|
||||
img_crop = {k: resample_to_reference(
|
||||
image=img_r[k],
|
||||
ref_img=ref_img,
|
||||
interpolator=sitk.sitkLinear)
|
||||
for k in img_r}
|
||||
seg_crop = resample_to_reference(
|
||||
image=seg_r,
|
||||
ref_img=ref_img,
|
||||
interpolator=sitk.sitkNearestNeighbor)
|
||||
|
||||
# Return sitk.Image instead of numpy np.ndarray.
|
||||
if not to_numpy:
|
||||
return img_crop, seg_crop
|
||||
|
||||
img_n = {k: sitk.GetArrayFromImage(img_crop[k]).T for k in img_crop}
|
||||
seg_n = sitk.GetArrayFromImage(seg_crop).T
|
||||
seg_n = np.clip(seg_n, 0., 1.)
|
||||
|
||||
# Z-score normalize all images
|
||||
# to do: 2*std
|
||||
for seq in img_n:
|
||||
img_n[seq] = (img_n[seq] - np.mean(img_n[seq])) / ( 2* np.std(img_n[seq]))
|
||||
return img_n, seg_n
|
|
@ -0,0 +1,114 @@
|
|||
from tensorflow.keras import backend as K
|
||||
from tensorflow.keras import Input, Model
|
||||
from tensorflow.keras.layers import Conv3D, Activation, Dense, concatenate, add
|
||||
from tensorflow.keras.layers import Conv3DTranspose, LeakyReLU
|
||||
from tensorflow.keras import regularizers
|
||||
from tensorflow.keras.layers import GlobalAveragePooling3D, Reshape, Dense, multiply, Permute
|
||||
|
||||
def squeeze_excite_block(input, ratio=8):
|
||||
''' Create a channel-wise squeeze-excite block
|
||||
Args:
|
||||
input: input tensor
|
||||
filters: number of output filters
|
||||
Returns: a keras tensor
|
||||
References
|
||||
- [Squeeze and Excitation Networks](https://arxiv.org/abs/1709.01507)
|
||||
'''
|
||||
init = input
|
||||
channel_axis = 1 if K.image_data_format() == "channels_first" else -1
|
||||
filters = init.shape[channel_axis]
|
||||
se_shape = (1, 1, 1, filters)
|
||||
|
||||
se = GlobalAveragePooling3D()(init)
|
||||
se = Reshape(se_shape)(se)
|
||||
se = Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se)
|
||||
se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se)
|
||||
|
||||
if K.image_data_format() == 'channels_first':
|
||||
se = Permute((4, 1, 2, 3))(se)
|
||||
|
||||
x = multiply([init, se])
|
||||
return x
|
||||
|
||||
def build_dual_attention_unet(
|
||||
input_shape,
|
||||
l2_regularization = 0.0001,
|
||||
):
|
||||
|
||||
def conv_layer(x, kernel_size, out_filters, strides=(1,1,1)):
|
||||
x = Conv3D(out_filters, kernel_size,
|
||||
strides = strides,
|
||||
padding = 'same',
|
||||
kernel_regularizer = regularizers.l2(l2_regularization),
|
||||
kernel_initializer = 'he_normal',
|
||||
use_bias = False
|
||||
)(x)
|
||||
return x
|
||||
|
||||
def conv_block(input, out_filters, strides=(1,1,1), with_residual=False, with_se=False, activation='relu'):
|
||||
# Strided convolution to convsample
|
||||
x = conv_layer(input, (3,3,3), out_filters, strides)
|
||||
x = Activation('relu')(x)
|
||||
|
||||
# Unstrided convolution
|
||||
x = conv_layer(x, (3,3,3), out_filters)
|
||||
|
||||
# Add a squeeze-excite block
|
||||
if with_se:
|
||||
se = squeeze_excite_block(x)
|
||||
x = add([x, se])
|
||||
|
||||
# Add a residual connection using a 1x1x1 convolution with strides
|
||||
if with_residual:
|
||||
residual = conv_layer(input, (1,1,1), out_filters, strides)
|
||||
x = add([x, residual])
|
||||
|
||||
if activation == 'leaky':
|
||||
x = LeakyReLU(alpha=.1)(x)
|
||||
else:
|
||||
x = Activation('relu')(x)
|
||||
|
||||
# Activate whatever comes out of this
|
||||
return x
|
||||
|
||||
# If we already have only one input, no need to combine anything
|
||||
inputs = Input(input_shape)
|
||||
|
||||
# Downsampling
|
||||
conv1 = conv_block(inputs, 16)
|
||||
conv2 = conv_block(conv1, 32, strides=(2,2,1), with_residual=True, with_se=True) #72x72x18
|
||||
conv3 = conv_block(conv2, 64, strides=(2,2,1), with_residual=True, with_se=True) #36x36x18
|
||||
conv4 = conv_block(conv3, 128, strides=(2,2,2), with_residual=True, with_se=True) #18x18x9
|
||||
conv5 = conv_block(conv4, 256, strides=(2,2,2), with_residual=True, with_se=True) #9x9x9
|
||||
|
||||
# First upsampling sequence
|
||||
up1_1 = Conv3DTranspose(128, (3,3,3), strides=(2,2,2), padding='same')(conv5) #18x18x9
|
||||
up1_2 = Conv3DTranspose(128, (3,3,3), strides=(2,2,2), padding='same')(up1_1) #36x36x18
|
||||
up1_3 = Conv3DTranspose(128, (3,3,3), strides=(2,2,1), padding='same')(up1_2) #72x72x18
|
||||
bridge1 = concatenate([conv4, up1_1]) #18x18x9 (128+128=256)
|
||||
dec_conv_1 = conv_block(bridge1, 128, with_residual=True, with_se=True, activation='leaky') #18x18x9
|
||||
|
||||
# Second upsampling sequence
|
||||
up2_1 = Conv3DTranspose(64, (3,3,3), strides=(2,2,2), padding='same')(dec_conv_1) # 36x36x18
|
||||
up2_2 = Conv3DTranspose(64, (3,3,3), strides=(2,2,1), padding='same')(up2_1) # 72x72x18
|
||||
bridge2 = concatenate([conv3, up1_2, up2_1]) # 36x36x18 (64+128+64=256)
|
||||
dec_conv_2 = conv_block(bridge2, 64, with_residual=True, with_se=True, activation='leaky')
|
||||
|
||||
# Final upsampling sequence
|
||||
up3_1 = Conv3DTranspose(32, (3,3,3), strides=(2,2,1), padding='same')(dec_conv_2) # 72x72x18
|
||||
bridge3 = concatenate([conv2, up1_3, up2_2, up3_1]) # 72x72x18 (32+128+64+32=256)
|
||||
dec_conv_3 = conv_block(bridge3, 32, with_residual=True, with_se=True, activation='leaky')
|
||||
|
||||
# Last upsampling to make heatmap
|
||||
up4_1 = Conv3DTranspose(16, (3,3,3), strides=(2,2,1), padding='same')(dec_conv_3) # 72x72x18
|
||||
dec_conv_4 = conv_block(up4_1, 16, with_residual=False, with_se=True, activation='leaky') #144x144x18 (16)
|
||||
|
||||
# Reduce to a single output channel with a 1x1x1 convolution
|
||||
single_channel = Conv3D(1, (1, 1, 1))(dec_conv_4)
|
||||
|
||||
# Apply sigmoid activation to get binary prediction per voxel
|
||||
act = Activation('sigmoid')(single_channel)
|
||||
|
||||
# Model definition
|
||||
model = Model(inputs=inputs, outputs=act)
|
||||
return model
|
|
@ -0,0 +1,6 @@
|
|||
from .FROC import image_utils
|
||||
from .FROC import froc
|
||||
from .FROC import data_utils
|
||||
from .FROC import cal_froc_from_np
|
||||
from .FROC import blob_preprocess
|
||||
from .FROC import analysis_utils
|
|
@ -0,0 +1,313 @@
|
|||
# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
from sklearn.metrics import auc
|
||||
from scipy import ndimage
|
||||
|
||||
try:
|
||||
import numpy.typing as npt
|
||||
except ImportError: # pragma: no cover
|
||||
pass
|
||||
|
||||
|
||||
# Parse Detection Maps to Individual Lesions + Likelihoods
|
||||
def parse_detection_map(detection_map):
|
||||
# Label All Non-Connected Components in Detection Map
|
||||
blobs_index, num_blobs = ndimage.label(detection_map, np.ones((3, 3, 3)))
|
||||
|
||||
confidences = []
|
||||
if num_blobs > 0:
|
||||
# For Each Lesion Detection
|
||||
for tumor_index in range(1, num_blobs+1):
|
||||
|
||||
# Extract Mask of Current Lesion
|
||||
# hard_blob = np.zeros_like(blobs_index)
|
||||
# hard_blob[blobs_index == tumor] = 1
|
||||
# TODO: replace above with the following? Is faster I think.
|
||||
# hard_blob = (blobs_index == tumor).astype(int)
|
||||
|
||||
# Extract Max Predicted Likelihood for Lesion Detection
|
||||
# max_prob = np.max(hard_blob) # <- this is always 1
|
||||
# hard_blob[hard_blob > 0] = max_prob # <- this line does nothing, as hard_blob is not used
|
||||
|
||||
# Store Predicted Likelihood per Lesion Detection
|
||||
max_prob = detection_map[blobs_index == tumor_index].max()
|
||||
confidences.append((tumor_index, max_prob))
|
||||
return confidences, blobs_index
|
||||
|
||||
|
||||
# Calculate Dice Similarity Coefficient (DSC) for N-D Arrays
|
||||
def calculate_dsc(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
|
||||
epsilon = 1e-8
|
||||
dsc_num = np.sum(predictions[labels == 1]) * 2.0
|
||||
dsc_denom = np.sum(predictions) + np.sum(labels)
|
||||
return float((dsc_num + epsilon) / (dsc_denom + epsilon))
|
||||
|
||||
|
||||
# Calculate Intersection over Union (IoU) for N-D Arrays
|
||||
def calculate_iou(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
|
||||
epsilon = 1e-8
|
||||
iou_num = np.sum(predictions[labels == 1])
|
||||
iou_denom = np.sum(predictions) + np.sum(labels) - iou_num
|
||||
return float((iou_num + epsilon) / (iou_denom + epsilon))
|
||||
|
||||
|
||||
# Calculate Operating Points for Curve
|
||||
def calculate_operating_points(y, x, op_match=None, verbose=1):
|
||||
"""
|
||||
Input:
|
||||
- y: (monotonically increasing) performance metric, such as the True Positive Rate
|
||||
- x: (monotonically increasing) performance metric, such as the False Positive Rate
|
||||
- op_match: dictionary that specifies the target operating point: {
|
||||
'x': target x value, 'y': target y value
|
||||
}
|
||||
|
||||
Returns:
|
||||
- dictionary with operating point(s): {
|
||||
'op_closest_xy_y': y_op, # y value at operating point that matches both x and y of target operating point
|
||||
'op_closest_xy_x': x_op, # x value at operating point that matches both x and y of target operating point
|
||||
...
|
||||
}
|
||||
"""
|
||||
# TODO: currently, a lower sensitivity is preferrred over a higher sensitivity if that means the operating point is matched better.
|
||||
# Would be better to go for the best sensitivity/specificity, if that can be done without hurting the other performance metric.
|
||||
# In practice, this should not be an issue, as we have many points then.
|
||||
y = np.array(y)
|
||||
x = np.array(x)
|
||||
operating_points = {}
|
||||
|
||||
if not np.all(np.diff(y) >= 0) and verbose:
|
||||
print("Warning: y performance metric is not monotonically increasing, this could lead to unexpected behaviour!")
|
||||
if not np.all(np.diff(x) >= 0) and verbose:
|
||||
print("Warning: x performance metric is not monotonically increasing, this could lead to unexpected behaviour!")
|
||||
|
||||
# Grab Index of Intersection -> Compute y/TPR and x/FPR @ Index -> Store
|
||||
op_best_roc_idx = np.argmin(np.abs(y - (1 - x)))
|
||||
op_best_roc_y = y[op_best_roc_idx]
|
||||
op_best_roc_x = x[op_best_roc_idx]
|
||||
operating_points.update(dict(
|
||||
op_best_roc_idx=op_best_roc_idx,
|
||||
op_best_roc_y=op_best_roc_y,
|
||||
op_best_roc_x=op_best_roc_x
|
||||
))
|
||||
|
||||
if op_match is not None:
|
||||
# calculate operating point closest to target operating point
|
||||
abs_deficit_x, abs_deficit_y = None, None
|
||||
optional_x_keys = ['x', 'fpr', 'FPR']
|
||||
optional_y_keys = ['y', 'tpr', 'TPR', 'sensitivity', 'sens']
|
||||
|
||||
# if the target x value is specified, calculate the difference between target and oberved value
|
||||
for key in optional_x_keys:
|
||||
if key in op_match:
|
||||
op_match_x = op_match[key]
|
||||
abs_deficit_x = np.abs(x - op_match_x)
|
||||
break
|
||||
|
||||
# if the target y value is specified, calculate the difference between target and oberved value
|
||||
for key in optional_y_keys:
|
||||
if key in op_match:
|
||||
op_match_y = op_match[key]
|
||||
abs_deficit_y = np.abs(y - op_match_y)
|
||||
break
|
||||
|
||||
# if both target x and y values are specified, calculate the difference between the target pair and observed pair
|
||||
# at the best match, store the observed x and y values
|
||||
if abs_deficit_x is not None and abs_deficit_y is not None:
|
||||
# determine the index of the the closest point to the target pair
|
||||
abs_deficit = abs_deficit_x + abs_deficit_y
|
||||
op_closest_xy_idx = np.argmin(abs_deficit)
|
||||
op_closest_xy_y = y[op_closest_xy_idx]
|
||||
op_closest_xy_x = x[op_closest_xy_idx]
|
||||
|
||||
# store
|
||||
operating_points.update(dict(
|
||||
op_closest_xy_idx=op_closest_xy_idx,
|
||||
op_closest_xy_y=op_closest_xy_y,
|
||||
op_closest_xy_x=op_closest_xy_x
|
||||
))
|
||||
|
||||
# same for matching x only
|
||||
if abs_deficit_x is not None:
|
||||
# determine the index of the the closest point to the target value
|
||||
op_closest_x_idx = np.argmin(abs_deficit_x)
|
||||
op_closest_x_y = y[op_closest_x_idx]
|
||||
op_closest_x_x = x[op_closest_x_idx]
|
||||
|
||||
# store
|
||||
operating_points.update(dict(
|
||||
op_closest_x_idx=op_closest_x_idx,
|
||||
op_closest_x_y=op_closest_x_y,
|
||||
op_closest_x_x=op_closest_x_x
|
||||
))
|
||||
|
||||
# same for matching y only
|
||||
if abs_deficit_y is not None:
|
||||
# determine the index of the the closest point to the target value
|
||||
op_closest_y_idx = np.argmin(abs_deficit_y)
|
||||
op_closest_y_y = y[op_closest_y_idx]
|
||||
op_closest_y_x = x[op_closest_y_idx]
|
||||
|
||||
# store
|
||||
operating_points.update(dict(
|
||||
op_closest_y_idx=op_closest_y_idx,
|
||||
op_closest_y_y=op_closest_y_y,
|
||||
op_closest_y_x=op_closest_y_x
|
||||
))
|
||||
|
||||
return operating_points
|
||||
|
||||
|
||||
# Calculate Statistics for Multiple Curves
|
||||
def calculate_statistics(metrics, op_match=None, x_start=0., x_end=1., verbose=1):
|
||||
"""
|
||||
Calculate statistics, such as the area under the curve, for multiple (independent) curves.
|
||||
To calculate shared statistics, the curves must be translated to a shared x domain. To
|
||||
achieve this with virtually no loss of the step-like nature of curves like ROC and FROC,
|
||||
the shared x values are derived from the input, and offset with ± 1e-7.
|
||||
|
||||
Input:
|
||||
- metrics should be a list of tuples with the y & x coordinates for each run:
|
||||
[([y1, y2, y3, ...], [x1, x2, x3]), # run 1
|
||||
([y1, y2, y3, ...], [x1, x2, x3]), # run 2
|
||||
]
|
||||
- op_match: {
|
||||
'y': value of y metric (e.g., TPR/sensitivity) to match,
|
||||
'x': value of x metric (e.g., FPR/false positive rate) to match,
|
||||
}
|
||||
|
||||
Note: mean and 95% CI are calculated as function of the shared x.
|
||||
"""
|
||||
# construct the array of shared x values
|
||||
eps = 1e-10
|
||||
x_shared = np.array([xi for _, x in metrics for xi in x], dtype=np.float64) # collect list of all possible x-values
|
||||
x_shared = np.ravel(x_shared) # flatten list, if necessary
|
||||
x_shared = np.append(x_shared, [x_start, x_end]) # add x_start and x_end to ensure correct pAUC calculation
|
||||
x_shared = np.concatenate((x_shared+eps, x_shared-eps))
|
||||
x_shared = np.unique(x_shared) # only keep unique x values
|
||||
x_shared.sort() # sort in ascending order (inplace)
|
||||
|
||||
# validate x_start and x_end
|
||||
assert x_start < x_end, f"x_start must be smaller than x_end! Got x_start={x_start} and x_end={x_end}."
|
||||
|
||||
# convert the per-model y (e.g., TPR) vs x (e.g., FPR) to a shared domain
|
||||
y_shared_all = np.zeros(shape=(len(metrics), len(x_shared)), dtype=np.float32)
|
||||
auroc_all = []
|
||||
individually_matched_operating_points = []
|
||||
for i, (y, x) in enumerate(metrics):
|
||||
# if necessary, unpack x and y
|
||||
if len(y) == 1:
|
||||
y = y[0]
|
||||
if len(x) == 1:
|
||||
x = x[0]
|
||||
|
||||
# interpolate the y values to the shared x values
|
||||
y_shared_domain = np.interp(x_shared, x, y)
|
||||
y_shared_all[i] = y_shared_domain
|
||||
|
||||
# calculate AUROC for macro stats
|
||||
mask = (x_shared >= x_start) & (x_shared <= x_end)
|
||||
auc_score = auc(x_shared[mask], y_shared_domain[mask])
|
||||
auroc_all += [auc_score]
|
||||
|
||||
# match operating point for each run individually
|
||||
operating_points = calculate_operating_points(y=y, x=x, op_match=op_match)
|
||||
individually_matched_operating_points += [operating_points]
|
||||
|
||||
# calculate statistics in shared domain
|
||||
y_shared_mean = np.mean(y_shared_all, axis=0)
|
||||
y_shared_std = np.std(y_shared_all, axis=0)
|
||||
y_shared_CI_lower = np.percentile(y_shared_all, 2.5, axis=0)
|
||||
y_shared_CI_higher = np.percentile(y_shared_all, 97.5, axis=0)
|
||||
auroc_mean = np.mean(auroc_all)
|
||||
auroc_std = np.std(auroc_all)
|
||||
|
||||
# calculate operating points in shared domain
|
||||
operating_points = calculate_operating_points(y=y_shared_mean, x=x_shared,
|
||||
op_match=op_match, verbose=verbose)
|
||||
|
||||
# collect results
|
||||
results = {
|
||||
# overview statistics
|
||||
'auroc_mean': auroc_mean,
|
||||
'auroc_std': auroc_std,
|
||||
'auroc_all': auroc_all,
|
||||
|
||||
# for plotting
|
||||
'x_shared': x_shared,
|
||||
'y_shared_all': y_shared_all,
|
||||
'y_shared_mean': y_shared_mean,
|
||||
'y_shared_std': y_shared_std,
|
||||
'y_shared_CI_lower': y_shared_CI_lower,
|
||||
'y_shared_CI_higher': y_shared_CI_higher,
|
||||
|
||||
# individually matched operating point
|
||||
'individually_matched_operating_points': individually_matched_operating_points,
|
||||
}
|
||||
results.update(operating_points)
|
||||
|
||||
# calculate standard deviation of each metric (op_closest_xy_y, etc.) between individual runs
|
||||
individually_matched_operating_points_std = {
|
||||
f"{key}_std": np.std([
|
||||
operating_point_info[key]
|
||||
for operating_point_info in individually_matched_operating_points
|
||||
])
|
||||
for key in individually_matched_operating_points[0].keys()
|
||||
}
|
||||
results.update(individually_matched_operating_points_std)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# Calculate (partial) Area Under Curve (pAUC) using (x,y) coordinates from the given curve
|
||||
def calculate_pAUC_from_graph(x, y, pAUC_start: float = 0.0, pAUC_end: float = 1.0, full: bool = False):
|
||||
"""
|
||||
Input:
|
||||
For a single curve:
|
||||
- x: x values of a curve (e.g., the False Positive Rate points). [x1, x2, .., xn]
|
||||
- y: y values of a curve (e.g., the True Positive Rate points). [y1, y2, .., yn]
|
||||
|
||||
For multiple curves:
|
||||
- list of x curves, for example the x values observed across multiple runs. [[x1m1, x2m1, .., xnm1], [x1m2, x2m2, ...., xnm2], ..]
|
||||
- list of y curves, for example the y values observed across multiple runs. [[y1m1, y2m1, .., ynm1], [y1m2, y2m2, ...., ynm2], ..]
|
||||
|
||||
- pAUC_start: lower bound of x (e.g., FPR) to compute pAUC
|
||||
- pAUC_end: higher bound of x (e.g., FPR) to compute pAUC
|
||||
|
||||
Returns:
|
||||
- if (full==False): List of pAUC values for each set of ([x1, ..], [y1, ..]) coordinates
|
||||
- if (full==True): Metrics as returned by `calculate_statistics` [see there]
|
||||
|
||||
Note: function is not specific to the FROC curve
|
||||
"""
|
||||
|
||||
if not isinstance(x[0], (list, np.ndarray)) or not isinstance(y[0], (list, np.ndarray)):
|
||||
# Have a single set of (x,y) coordinates
|
||||
|
||||
assert not isinstance(x[0], (list, np.ndarray)) and not isinstance(y[0], (list, np.ndarray)), \
|
||||
"Either provide multiple sequences of (x,y) coordinates, or a single sequence. Obtained a mix of both now. "
|
||||
|
||||
# Pack coordinates in format expected by `calculate_statistics`
|
||||
coordinates_joined = [(y, x)]
|
||||
else:
|
||||
# Have multiple sets of (x,y) coordinates
|
||||
# Pack coordinates in format expected by `calculate_statistics`
|
||||
coordinates_joined = list(zip(y, x))
|
||||
|
||||
# Calculate AUC in Given Ranges
|
||||
results = calculate_statistics(metrics=coordinates_joined, x_start=pAUC_start, x_end=pAUC_end)
|
||||
if full:
|
||||
return results
|
||||
return results['auroc_all']
|
|
@ -0,0 +1,148 @@
|
|||
import numpy as np
|
||||
from scipy import ndimage
|
||||
from typing import List, Tuple, Optional, Union
|
||||
|
||||
"""
|
||||
Extract lesion candidates from a softmax prediction
|
||||
Authors: anindox8, matinhz, joeranbosma
|
||||
"""
|
||||
|
||||
|
||||
# Preprocess Softmax Volume (Clipping, Max Confidence)
|
||||
def preprocess_softmax_static(softmax: np.ndarray,
|
||||
threshold: float = 0.10,
|
||||
min_voxels_detection: int = 10,
|
||||
max_prob_round_decimals: Optional[int] = 4) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
|
||||
# Load and Preprocess Softmax Image
|
||||
all_hard_blobs = np.zeros_like(softmax)
|
||||
confidences = []
|
||||
clipped_softmax = softmax.copy()
|
||||
clipped_softmax[softmax < threshold] = 0
|
||||
blobs_index, num_blobs = ndimage.label(clipped_softmax, np.ones((3, 3, 3)))
|
||||
|
||||
if num_blobs > 0: # For Each Prediction
|
||||
for tumor in range(1, num_blobs+1):
|
||||
# determine mask for current lesion
|
||||
hard_mask = np.zeros_like(blobs_index)
|
||||
hard_mask[blobs_index == tumor] = 1
|
||||
|
||||
if np.count_nonzero(hard_mask) <= min_voxels_detection:
|
||||
# remove tiny detection of <= 0.009 cm^3
|
||||
blobs_index[hard_mask.astype(bool)] = 0
|
||||
continue
|
||||
|
||||
# add sufficiently sized detection
|
||||
hard_blob = hard_mask * clipped_softmax
|
||||
max_prob = np.max(hard_blob)
|
||||
if max_prob_round_decimals is not None:
|
||||
max_prob = np.round(max_prob, max_prob_round_decimals)
|
||||
hard_blob[hard_blob > 0] = max_prob
|
||||
all_hard_blobs += hard_blob
|
||||
confidences.append((tumor, max_prob))
|
||||
return all_hard_blobs, confidences, blobs_index
|
||||
|
||||
|
||||
def preprocess_softmax_dynamic(softmax: np.ndarray,
|
||||
min_voxels_detection: int = 10,
|
||||
num_lesions_to_extract: int = 5,
|
||||
dynamic_threshold_factor: float = 2.5,
|
||||
max_prob_round_decimals: Optional[int] = None,
|
||||
remove_adjacent_lesion_candidates: bool = True,
|
||||
max_prob_failsafe_stopping_threshold: float = 0.01) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
|
||||
"""
|
||||
Generate detection proposals using a dynamic threshold to determine the location and size of lesions.
|
||||
Author: Joeran Bosma
|
||||
"""
|
||||
working_softmax = softmax.copy()
|
||||
dynamic_hard_blobs = np.zeros_like(softmax)
|
||||
confidences: List[Tuple[int, float]] = []
|
||||
dynamic_indexed_blobs = np.zeros_like(softmax, dtype=int)
|
||||
|
||||
while len(confidences) < num_lesions_to_extract:
|
||||
tumor_index = 1 + len(confidences)
|
||||
|
||||
# determine max. softmax
|
||||
max_prob = np.max(working_softmax)
|
||||
|
||||
if max_prob < max_prob_failsafe_stopping_threshold:
|
||||
break
|
||||
|
||||
# set dynamic threshold to half the max
|
||||
threshold = max_prob / dynamic_threshold_factor
|
||||
|
||||
# extract blobs for dynamix threshold
|
||||
all_hard_blobs, _, _ = preprocess_softmax_static(working_softmax, threshold=threshold,
|
||||
min_voxels_detection=min_voxels_detection,
|
||||
max_prob_round_decimals=max_prob_round_decimals)
|
||||
|
||||
# select blob with max. confidence
|
||||
# note: the max_prob is re-computed in the (unlikely) case that the max. prob
|
||||
# was inside a 'lesion candidate' of less than min_voxels_detection, which is
|
||||
# thus removed in preprocess_softmax_static.
|
||||
max_prob = np.max(all_hard_blobs)
|
||||
mask_current_lesion = (all_hard_blobs == max_prob)
|
||||
|
||||
# ensure that mask is only a single lesion candidate (this assumption fails when multiple lesions have the same max. prob)
|
||||
mask_current_lesion_indexed, _ = ndimage.label(mask_current_lesion, np.ones((3, 3, 3)))
|
||||
mask_current_lesion = (mask_current_lesion_indexed == 1)
|
||||
|
||||
# create mask with its confidence
|
||||
hard_blob = (all_hard_blobs * mask_current_lesion)
|
||||
|
||||
# Detect whether the extractted mask is a ring/hollow sphere
|
||||
# around an existing lesion candidate. For confident lesions,
|
||||
# the surroundings of the prediction are still quite confident,
|
||||
# and can become a second 'detection'. For an # example, please
|
||||
# see extracted lesion candidates nr. 4 and 5 at:
|
||||
# https://repos.diagnijmegen.nl/trac/ticket/9299#comment:49
|
||||
# Detection method: grow currently extracted lesions by one voxel,
|
||||
# and check if they overlap with the current extracted lesion.
|
||||
extracted_lesions_grown = ndimage.morphology.binary_dilation(dynamic_hard_blobs > 0)
|
||||
current_lesion_has_overlap = (mask_current_lesion & extracted_lesions_grown).any()
|
||||
|
||||
# Check if lesion candidate should be retained
|
||||
if (not remove_adjacent_lesion_candidates) or (not current_lesion_has_overlap):
|
||||
# store extracted lesion
|
||||
dynamic_hard_blobs += hard_blob
|
||||
confidences += [(tumor_index, max_prob)]
|
||||
dynamic_indexed_blobs += (mask_current_lesion * tumor_index)
|
||||
|
||||
# remove extracted lesion from working-softmax
|
||||
working_softmax = (working_softmax * (~mask_current_lesion))
|
||||
|
||||
return dynamic_hard_blobs, confidences, dynamic_indexed_blobs
|
||||
|
||||
|
||||
def preprocess_softmax(softmax: np.ndarray,
|
||||
threshold: Union[str, float] = 0.10,
|
||||
min_voxels_detection: int = 10,
|
||||
num_lesions_to_extract: int = 5,
|
||||
dynamic_threshold_factor: float = 2.5,
|
||||
max_prob_round_decimals: Optional[int] = None,
|
||||
remove_adjacent_lesion_candidates: bool = True) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
|
||||
"""
|
||||
Generate detection proposals using a dynamic or static threshold to determine the size of lesions.
|
||||
"""
|
||||
if threshold == 'dynamic':
|
||||
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_dynamic(softmax, min_voxels_detection=min_voxels_detection,
|
||||
dynamic_threshold_factor=dynamic_threshold_factor,
|
||||
num_lesions_to_extract=num_lesions_to_extract,
|
||||
remove_adjacent_lesion_candidates=remove_adjacent_lesion_candidates,
|
||||
max_prob_round_decimals=max_prob_round_decimals)
|
||||
elif threshold == 'dynamic-fast':
|
||||
# determine max. softmax and set a per-case 'static' threshold based on that
|
||||
max_prob = np.max(softmax)
|
||||
threshold = float(max_prob / dynamic_threshold_factor)
|
||||
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
|
||||
min_voxels_detection=min_voxels_detection,
|
||||
max_prob_round_decimals=max_prob_round_decimals)
|
||||
else:
|
||||
threshold = float(threshold) # convert threshold to float, if it wasn't already
|
||||
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
|
||||
min_voxels_detection=min_voxels_detection,
|
||||
max_prob_round_decimals=max_prob_round_decimals)
|
||||
|
||||
|
||||
|
||||
|
||||
return all_hard_blobs, confidences, indexed_pred
|
|
@ -0,0 +1,643 @@
|
|||
import argparse
|
||||
import multiprocessing
|
||||
import numpy as np
|
||||
import SimpleITK as sitk
|
||||
import time
|
||||
import os
|
||||
import itertools
|
||||
import concurrent.futures
|
||||
|
||||
from typing import List
|
||||
import sys
|
||||
sys.path.append('./../code')
|
||||
from utils_quintin import *
|
||||
|
||||
# from fastMRI_PCa.visualization.visualize import write_array2nifti_t2
|
||||
from tensorflow.keras.models import load_model
|
||||
from functools import partial
|
||||
from focal_loss import BinaryFocalLoss
|
||||
from scipy import ndimage
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from sklearn.metrics import roc_curve, auc
|
||||
|
||||
# from fastMRI_PCa.visualization import save_slice_3d
|
||||
# from fastMRI_PCa.utils import read_yaml_to_dict, list_from_file, print_stats_np
|
||||
# from fastMRI_PCa.utils import print_p, get_rand_exp_decay_mask
|
||||
# from fastMRI_PCa.data import resample, center_crop, normalize, undersample, binarize_s
|
||||
# from fastMRI_PCa.models import ssim_loss, ssim_metric, psnr_metric
|
||||
|
||||
from typing import List, Tuple, Dict, Any, Union, Optional, Callable, Iterable, Hashable, Sized
|
||||
|
||||
try:
|
||||
import numpy.typing as npt
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
############################### PREPROCESSING BLOBS #########################
|
||||
"""
|
||||
Extract lesion candidates from a softmax prediction
|
||||
Authors: anindox8, matinhz, joeranbosma
|
||||
"""
|
||||
|
||||
|
||||
# Preprocess Softmax Volume (Clipping, Max Confidence)
|
||||
def preprocess_softmax_static(softmax: np.ndarray,
|
||||
threshold: float = 0.10,
|
||||
min_voxels_detection: int = 10,
|
||||
max_prob_round_decimals: Optional[int] = 4) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
|
||||
# Load and Preprocess Softmax Image
|
||||
all_hard_blobs = np.zeros_like(softmax)
|
||||
confidences = []
|
||||
clipped_softmax = softmax.copy()
|
||||
clipped_softmax[softmax < threshold] = 0
|
||||
blobs_index, num_blobs = ndimage.label(clipped_softmax, np.ones((3, 3, 3)))
|
||||
|
||||
if num_blobs > 0: # For Each Prediction
|
||||
for tumor in range(1, num_blobs+1):
|
||||
# determine mask for current lesion
|
||||
hard_mask = np.zeros_like(blobs_index)
|
||||
hard_mask[blobs_index == tumor] = 1
|
||||
|
||||
if np.count_nonzero(hard_mask) <= min_voxels_detection:
|
||||
# remove tiny detection of <= 0.009 cm^3
|
||||
blobs_index[hard_mask.astype(bool)] = 0
|
||||
continue
|
||||
|
||||
# add sufficiently sized detection
|
||||
hard_blob = hard_mask * clipped_softmax
|
||||
max_prob = np.max(hard_blob)
|
||||
if max_prob_round_decimals is not None:
|
||||
max_prob = np.round(max_prob, max_prob_round_decimals)
|
||||
hard_blob[hard_blob > 0] = max_prob
|
||||
all_hard_blobs += hard_blob
|
||||
confidences.append((tumor, max_prob))
|
||||
return all_hard_blobs, confidences, blobs_index
|
||||
|
||||
|
||||
def preprocess_softmax_dynamic(softmax: np.ndarray,
|
||||
min_voxels_detection: int = 10,
|
||||
num_lesions_to_extract: int = 5,
|
||||
dynamic_threshold_factor: float = 2.5,
|
||||
max_prob_round_decimals: Optional[int] = None,
|
||||
remove_adjacent_lesion_candidates: bool = True,
|
||||
max_prob_failsafe_stopping_threshold: float = 0.01) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
|
||||
"""
|
||||
Generate detection proposals using a dynamic threshold to determine the location and size of lesions.
|
||||
Author: Joeran Bosma
|
||||
"""
|
||||
working_softmax = softmax.copy()
|
||||
dynamic_hard_blobs = np.zeros_like(softmax)
|
||||
confidences: List[Tuple[int, float]] = []
|
||||
dynamic_indexed_blobs = np.zeros_like(softmax, dtype=int)
|
||||
|
||||
while len(confidences) < num_lesions_to_extract:
|
||||
tumor_index = 1 + len(confidences)
|
||||
|
||||
# determine max. softmax
|
||||
max_prob = np.max(working_softmax)
|
||||
|
||||
if max_prob < max_prob_failsafe_stopping_threshold:
|
||||
break
|
||||
|
||||
# set dynamic threshold to half the max
|
||||
threshold = max_prob / dynamic_threshold_factor
|
||||
|
||||
# extract blobs for dynamix threshold
|
||||
all_hard_blobs, _, _ = preprocess_softmax_static(working_softmax, threshold=threshold,
|
||||
min_voxels_detection=min_voxels_detection,
|
||||
max_prob_round_decimals=max_prob_round_decimals)
|
||||
|
||||
# select blob with max. confidence
|
||||
# note: the max_prob is re-computed in the (unlikely) case that the max. prob
|
||||
# was inside a 'lesion candidate' of less than min_voxels_detection, which is
|
||||
# thus removed in preprocess_softmax_static.
|
||||
max_prob = np.max(all_hard_blobs)
|
||||
mask_current_lesion = (all_hard_blobs == max_prob)
|
||||
|
||||
# ensure that mask is only a single lesion candidate (this assumption fails when multiple lesions have the same max. prob)
|
||||
mask_current_lesion_indexed, _ = ndimage.label(mask_current_lesion, np.ones((3, 3, 3)))
|
||||
mask_current_lesion = (mask_current_lesion_indexed == 1)
|
||||
|
||||
# create mask with its confidence
|
||||
hard_blob = (all_hard_blobs * mask_current_lesion)
|
||||
|
||||
# Detect whether the extractted mask is a ring/hollow sphere
|
||||
# around an existing lesion candidate. For confident lesions,
|
||||
# the surroundings of the prediction are still quite confident,
|
||||
# and can become a second 'detection'. For an # example, please
|
||||
# see extracted lesion candidates nr. 4 and 5 at:
|
||||
# https://repos.diagnijmegen.nl/trac/ticket/9299#comment:49
|
||||
# Detection method: grow currently extracted lesions by one voxel,
|
||||
# and check if they overlap with the current extracted lesion.
|
||||
extracted_lesions_grown = ndimage.morphology.binary_dilation(dynamic_hard_blobs > 0)
|
||||
current_lesion_has_overlap = (mask_current_lesion & extracted_lesions_grown).any()
|
||||
|
||||
# Check if lesion candidate should be retained
|
||||
if (not remove_adjacent_lesion_candidates) or (not current_lesion_has_overlap):
|
||||
# store extracted lesion
|
||||
dynamic_hard_blobs += hard_blob
|
||||
confidences += [(tumor_index, max_prob)]
|
||||
dynamic_indexed_blobs += (mask_current_lesion * tumor_index)
|
||||
|
||||
# remove extracted lesion from working-softmax
|
||||
working_softmax = (working_softmax * (~mask_current_lesion))
|
||||
|
||||
return dynamic_hard_blobs, confidences, dynamic_indexed_blobs
|
||||
|
||||
|
||||
def preprocess_softmax(softmax: np.ndarray,
|
||||
threshold: Union[str, float] = 0.10,
|
||||
min_voxels_detection: int = 10,
|
||||
num_lesions_to_extract: int = 5,
|
||||
dynamic_threshold_factor: float = 2.5,
|
||||
max_prob_round_decimals: Optional[int] = None,
|
||||
remove_adjacent_lesion_candidates: bool = True) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
|
||||
"""
|
||||
Generate detection proposals using a dynamic or static threshold to determine the size of lesions.
|
||||
"""
|
||||
if threshold == 'dynamic':
|
||||
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_dynamic(softmax, min_voxels_detection=min_voxels_detection,
|
||||
dynamic_threshold_factor=dynamic_threshold_factor,
|
||||
num_lesions_to_extract=num_lesions_to_extract,
|
||||
remove_adjacent_lesion_candidates=remove_adjacent_lesion_candidates,
|
||||
max_prob_round_decimals=max_prob_round_decimals)
|
||||
elif threshold == 'dynamic-fast':
|
||||
# determine max. softmax and set a per-case 'static' threshold based on that
|
||||
max_prob = np.max(softmax)
|
||||
threshold = float(max_prob / dynamic_threshold_factor)
|
||||
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
|
||||
min_voxels_detection=min_voxels_detection,
|
||||
max_prob_round_decimals=max_prob_round_decimals)
|
||||
else:
|
||||
threshold = float(threshold) # convert threshold to float, if it wasn't already
|
||||
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
|
||||
min_voxels_detection=min_voxels_detection,
|
||||
max_prob_round_decimals=max_prob_round_decimals)
|
||||
|
||||
return all_hard_blobs, confidences, indexed_pred
|
||||
|
||||
|
||||
################################################################################
|
||||
|
||||
|
||||
# Evaluate all cases
|
||||
def evaluate(
|
||||
y_true: np.ndarray,
|
||||
y_pred: np.ndarray,
|
||||
min_overlap=0.10,
|
||||
overlap_func: str = 'DSC',
|
||||
case_confidence: str = 'max',
|
||||
multiple_lesion_candidates_selection_criteria='overlap',
|
||||
allow_unmatched_candidates_with_minimal_overlap=True,
|
||||
flat: Optional[bool] = None
|
||||
) -> Dict[str, Any]:
|
||||
|
||||
# Make list out of numpy array so that it can be mapped in parallel with multiple CPUs.
|
||||
y_true_list = y_true #[y_true[mri_idx] for mri_idx in range(y_true.shape[0])]
|
||||
y_pred_list = y_pred #[y_pred[mri_idx] for mri_idx in range(y_pred.shape[0])]
|
||||
|
||||
roc_true, roc_pred = {}, {}
|
||||
y_list = []
|
||||
num_lesions = 0
|
||||
|
||||
subject_idxs = list(range(len(y_true_list)))
|
||||
N_CPUS = 2
|
||||
with ThreadPoolExecutor(max_workers=N_CPUS) as pool:
|
||||
# define the functions that need to be processed: compute_pred_vector, with each individual
|
||||
# detection_map prediction, ground truth label and parameters
|
||||
future_to_args = {
|
||||
pool.submit(evaluate_case,
|
||||
y_true,
|
||||
y_pred,
|
||||
min_overlap=min_overlap,
|
||||
overlap_func=overlap_func,
|
||||
multiple_lesion_candidates_selection_criteria=multiple_lesion_candidates_selection_criteria,
|
||||
allow_unmatched_candidates_with_minimal_overlap=allow_unmatched_candidates_with_minimal_overlap): idx
|
||||
for (y_true, y_pred, idx) in zip(y_true_list, y_pred_list, subject_idxs)
|
||||
}
|
||||
|
||||
# process the cases in parallel
|
||||
iterator = concurrent.futures.as_completed(future_to_args)
|
||||
iterator = tqdm(iterator, desc='Computing FROC', total=len(y_true_list))
|
||||
for future in iterator:
|
||||
try:
|
||||
res = future.result()
|
||||
except Exception as e:
|
||||
print(f"Exception: {e}")
|
||||
else:
|
||||
# unpack results
|
||||
y_list_pat, num_lesions_gt = res
|
||||
# note: y_list_pat contains: is_lesion, confidence[, Dice, gt num voxels]
|
||||
|
||||
# aggregate results
|
||||
idx = future_to_args[future]
|
||||
roc_true[idx] = np.max([a[0] for a in y_list_pat])
|
||||
if case_confidence == 'max':
|
||||
# take highest lesion confidence as case-level confidence
|
||||
roc_pred[idx] = np.max([a[1] for a in y_list_pat])
|
||||
elif case_confidence == 'bayesian':
|
||||
# if a_i is the probability the i-th lesion is csPCa, then the case-level
|
||||
# probability to have any csPCa lesion is 1 - Π_i{ 1 - a_i}
|
||||
roc_pred[idx] = 1 - np.prod([(1-a[1]) for a in y_list_pat])
|
||||
else:
|
||||
raise ValueError(f"Patient confidence calculation method not recognised. Got: {case_confidence}.")
|
||||
|
||||
# accumulate outputs
|
||||
y_list += y_list_pat
|
||||
num_lesions += num_lesions_gt
|
||||
|
||||
# calculate statistics
|
||||
num_patients = len(roc_true)
|
||||
|
||||
# get lesion-level results
|
||||
sensitivity, FP_per_case, thresholds, num_lesions = froc_from_lesion_evaluations(
|
||||
y_list=y_list, num_patients=num_patients
|
||||
)
|
||||
|
||||
# calculate recall, precision and average precision
|
||||
AP, precision, recall, _ = ap_from_lesion_evaluations(y_list, thresholds=thresholds)
|
||||
|
||||
# calculate case-level AUROC
|
||||
fpr, tpr, _ = roc_curve(y_true=[roc_true[s] for s in subject_idxs],
|
||||
y_score=[roc_pred[s] for s in subject_idxs],
|
||||
pos_label=1)
|
||||
auc_score = auc(fpr, tpr)
|
||||
|
||||
flat = True
|
||||
if flat:
|
||||
# flatten roc_true and roc_pred
|
||||
roc_true_flat = [roc_true[s] for s in subject_idxs]
|
||||
roc_pred_flat = [roc_pred[s] for s in subject_idxs]
|
||||
|
||||
metrics = {
|
||||
"FP_per_case": convert_np_to_list(FP_per_case),
|
||||
"sensitivity": convert_np_to_list(sensitivity),
|
||||
"thresholds": convert_np_to_list(thresholds),
|
||||
"num_lesions": int(num_lesions),
|
||||
"num_patients": int(num_patients),
|
||||
"roc_true": convert_np_to_list(roc_true_flat if flat else roc_true),
|
||||
"roc_pred": convert_np_to_list(roc_pred_flat if flat else roc_pred),
|
||||
"AP": float(AP),
|
||||
"precision": convert_np_to_list(precision),
|
||||
"recall": convert_np_to_list(recall),
|
||||
|
||||
# patient-level predictions
|
||||
'auroc': float(auc_score),
|
||||
'tpr': convert_np_to_list(tpr),
|
||||
'fpr': convert_np_to_list(fpr),
|
||||
}
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def convert_np_to_list(flat_numpy_arr):
|
||||
ans = []
|
||||
for elem in flat_numpy_arr:
|
||||
ans.append(float(elem))
|
||||
return ans
|
||||
|
||||
|
||||
# Calculate FROC metrics (FP rate, sensitivity)
|
||||
def froc_from_lesion_evaluations(y_list, num_patients, thresholds=None):
|
||||
# calculate counts
|
||||
TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
|
||||
y_list=y_list, thresholds=thresholds
|
||||
)
|
||||
|
||||
# calculate FROC metrics from counts
|
||||
sensitivity = TP / num_lesions if num_lesions > 0 else np.nan
|
||||
FP_per_case = FP / num_patients
|
||||
|
||||
return sensitivity, FP_per_case, thresholds, num_lesions
|
||||
|
||||
|
||||
def ap_from_lesion_evaluations(y_list, thresholds=None):
|
||||
# calculate counts
|
||||
TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
|
||||
y_list=y_list, thresholds=thresholds
|
||||
)
|
||||
|
||||
# calculate precision (lesion-level)
|
||||
precision = TP / (TP + FP)
|
||||
precision = np.append(precision, [0])
|
||||
|
||||
# calculate recall (lesion-level)
|
||||
FN = num_lesions - TP
|
||||
recall = TP / (TP + FN)
|
||||
recall = np.append(recall, recall[-1:])
|
||||
|
||||
# calculate average precission (lesion-level)
|
||||
AP = np.trapz(y=precision, x=recall)
|
||||
|
||||
return AP, precision, recall, thresholds
|
||||
|
||||
|
||||
# Calculate macro metrics (true positives (TP), false positives (FP))
|
||||
def counts_from_lesion_evaluations(
|
||||
y_list: List[Tuple[int, float, float]],
|
||||
thresholds: "Optional[npt.NDArray[np.float64]]" = None
|
||||
) -> "Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32], npt.NDArray[np.float64], int]":
|
||||
"""
|
||||
Calculate true positives (TP) and false positives (FP) as function of threshold,
|
||||
based on the case evaluations from `evaluate_case`.
|
||||
"""
|
||||
# sort predictions
|
||||
y_list.sort()
|
||||
|
||||
# collect targets and predictions
|
||||
y_true: "npt.NDArray[np.float64]" = np.array([target for target, *_ in y_list])
|
||||
y_pred: "npt.NDArray[np.float64]" = np.array([pred for _, pred, *_ in y_list])
|
||||
|
||||
# calculate number of lesions
|
||||
num_lesions = y_true.sum()
|
||||
|
||||
if thresholds is None:
|
||||
# collect thresholds for FROC analysis
|
||||
thresholds = np.unique(y_pred)
|
||||
thresholds[::-1].sort() # sort thresholds in descending order (inplace)
|
||||
|
||||
# for >10,000 thresholds: resample to 10,000 unique thresholds, while also
|
||||
# keeping all thresholds higher than 0.8 and the first 20 thresholds
|
||||
if len(thresholds) > 10_000:
|
||||
rng = np.arange(1, len(thresholds), len(thresholds)/10_000, dtype=np.int32)
|
||||
st = [thresholds[i] for i in rng]
|
||||
low_thresholds = thresholds[-20:]
|
||||
thresholds = np.array([t for t in thresholds if t > 0.8 or t in st or t in low_thresholds])
|
||||
|
||||
# define placeholders
|
||||
FP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
|
||||
TP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
|
||||
|
||||
# for each threshold: count FPs and calculate the sensitivity
|
||||
for i, th in enumerate(thresholds):
|
||||
if th > 0:
|
||||
y_pred_thresholded = (y_pred >= th).astype(int)
|
||||
tp = np.sum(y_true*y_pred_thresholded)
|
||||
fp = np.sum(y_pred_thresholded - y_true*y_pred_thresholded)
|
||||
|
||||
# update FROC with new point
|
||||
FP[i] = fp
|
||||
TP[i] = tp
|
||||
else:
|
||||
# extend FROC curve to infinity
|
||||
# TP[i] = TP[-2] #note: aangepast stefan 11-04-2022
|
||||
TP[i] = 1
|
||||
FP[i] = np.inf
|
||||
|
||||
return TP, FP, thresholds, num_lesions
|
||||
|
||||
|
||||
# Compute base prediction metrics TP/FP/FN with associated model confidences
|
||||
def evaluate_case(
|
||||
y_true: np.ndarray,
|
||||
y_pred: np.ndarray,
|
||||
min_overlap: float = 0.10,
|
||||
overlap_func: str = 'DSC',
|
||||
multiple_lesion_candidates_selection_criteria: str = 'overlap',
|
||||
allow_unmatched_candidates_with_minimal_overlap: bool = True
|
||||
) -> Tuple[List[Tuple[int, float, float]], int]:
|
||||
"""
|
||||
Gather the list of lesion candidates, and classify in TP/FP/FN.
|
||||
- multiple_lesion_candidates_selection_criteria: when multiple lesion candidates have overlap with the same
|
||||
ground truth mask, use 'overlap' or 'confidence' to choose
|
||||
which lesion is matched against the ground truth mask.
|
||||
|
||||
Returns:
|
||||
- a list of tuples with:
|
||||
(is_lesion, prediction confidence, Dice similarity coefficient, number of voxels in label)
|
||||
- number of ground truth lesions
|
||||
"""
|
||||
y_list: List[Tuple[int, float, float]] = []
|
||||
|
||||
if overlap_func == 'IoU':
|
||||
overlap_func = calculate_iou
|
||||
elif overlap_func == 'DSC':
|
||||
overlap_func = calculate_dsc
|
||||
else:
|
||||
raise ValueError(f"Overlap function with name {overlap_func} not recognized. Supported are 'IoU' and 'DSC'")
|
||||
|
||||
# convert dtype to float32
|
||||
y_true = y_true.astype('int32')
|
||||
y_pred = y_pred.astype('float32')
|
||||
|
||||
if y_pred.shape[0] < y_true.shape[0]:
|
||||
print("Warning: padding prediction to match label!")
|
||||
y_pred = resize_image_with_crop_or_pad(y_pred, y_true.shape)
|
||||
|
||||
|
||||
confidences, indexed_pred = parse_detection_map(y_pred)
|
||||
|
||||
lesion_candidates_best_overlap: Dict[str, float] = {}
|
||||
|
||||
if y_true.any():
|
||||
# for each malignant scan
|
||||
labeled_gt, num_gt_lesions = ndimage.label(y_true, np.ones((3, 3, 3)))
|
||||
|
||||
for lesiong_id in range(1, num_gt_lesions+1):
|
||||
# for each lesion in ground-truth (GT) label
|
||||
gt_lesion_mask = (labeled_gt == lesiong_id)
|
||||
|
||||
# collect indices of lesion candidates that have any overlap with the current GT lesion
|
||||
overlapping_lesion_candidate_indices = set(np.unique(indexed_pred[gt_lesion_mask]))
|
||||
overlapping_lesion_candidate_indices -= {0} # remove index 0, if present
|
||||
|
||||
# collect lesion candidates for current GT lesion
|
||||
lesion_candidates_for_target_gt: List[Dict[str, Union[int, float]]] = []
|
||||
for lesion_candidate_id, lesion_confidence in confidences:
|
||||
if lesion_candidate_id in overlapping_lesion_candidate_indices:
|
||||
# calculate overlap between lesion candidate and GT mask
|
||||
lesion_pred_mask = (indexed_pred == lesion_candidate_id)
|
||||
overlap_score = overlap_func(lesion_pred_mask, gt_lesion_mask)
|
||||
|
||||
# keep track of the highest overlap a lesion candidate has with any GT lesion
|
||||
lesion_candidates_best_overlap[lesion_candidate_id] = max(
|
||||
overlap_score, lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
|
||||
)
|
||||
|
||||
# store lesion candidate info for current GT mask
|
||||
lesion_candidates_for_target_gt.append({
|
||||
'id': lesion_candidate_id,
|
||||
'confidence': lesion_confidence,
|
||||
'overlap': overlap_score,
|
||||
})
|
||||
print(lesion_candidates_for_target_gt)
|
||||
|
||||
if len(lesion_candidates_for_target_gt) == 0:
|
||||
# no lesion candidate matched with GT mask. Add FN.
|
||||
y_list.append((1, 0., 0.))
|
||||
elif len(lesion_candidates_for_target_gt) == 1:
|
||||
# single lesion candidate overlapped with GT mask. Add TP if overlap is sufficient, or FN otherwise.
|
||||
candidate_info = lesion_candidates_for_target_gt[0]
|
||||
lesion_pred_mask = (indexed_pred == candidate_info['id'])
|
||||
|
||||
if candidate_info['overlap'] > min_overlap:
|
||||
# overlap between lesion candidate and GT mask is sufficient, add TP
|
||||
indexed_pred[lesion_pred_mask] = 0 # remove lesion candidate after assignment
|
||||
y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
|
||||
else:
|
||||
# overlap between lesion candidate and GT mask is insufficient, add FN
|
||||
y_list.append((1, 0., 0.))
|
||||
else:
|
||||
# multiple predictions for current GT lesion
|
||||
# sort lesion candidates based on overlap or confidence
|
||||
key = multiple_lesion_candidates_selection_criteria
|
||||
lesion_candidates_for_target_gt = sorted(lesion_candidates_for_target_gt, key=lambda x: x[key], reverse=True)
|
||||
|
||||
gt_lesion_matched = False
|
||||
for candidate_info in lesion_candidates_for_target_gt:
|
||||
lesion_pred_mask = (indexed_pred == candidate_info['id'])
|
||||
|
||||
if candidate_info['overlap'] > min_overlap:
|
||||
indexed_pred[lesion_pred_mask] = 0
|
||||
y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
|
||||
gt_lesion_matched = True
|
||||
break
|
||||
|
||||
if not gt_lesion_matched:
|
||||
# ground truth lesion not matched to a lesion candidate. Add FN.
|
||||
y_list.append((1, 0., 0.))
|
||||
|
||||
# Remaining lesions are FPs
|
||||
remaining_lesions = set(np.unique(indexed_pred))
|
||||
remaining_lesions -= {0} # remove index 0, if present
|
||||
for lesion_candidate_id, lesion_confidence in confidences:
|
||||
if lesion_candidate_id in remaining_lesions:
|
||||
overlap_score = lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
|
||||
if allow_unmatched_candidates_with_minimal_overlap and overlap_score > min_overlap:
|
||||
# The lesion candidate was not matched to a GT lesion, but did have overlap > min_overlap
|
||||
# with a GT lesion. The GT lesion is, however, matched to another lesion candidate.
|
||||
# In this operation mode, this lesion candidate is not considered as a false positive.
|
||||
pass
|
||||
else:
|
||||
y_list.append((0, lesion_confidence, 0.)) # add FP
|
||||
|
||||
else:
|
||||
# for benign case, all predictions are FPs
|
||||
num_gt_lesions = 0
|
||||
if len(confidences) > 0:
|
||||
for _, lesion_confidence in confidences:
|
||||
y_list.append((0, lesion_confidence, 0.))
|
||||
else:
|
||||
y_list.append((0, 0., 0.)) # avoid empty list
|
||||
|
||||
return y_list, num_gt_lesions
|
||||
|
||||
|
||||
# Calculate Intersection over Union (IoU) for N-D Arrays
|
||||
def calculate_iou(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
|
||||
epsilon = 1e-8
|
||||
iou_num = np.sum(predictions[labels == 1])
|
||||
iou_denom = np.sum(predictions) + np.sum(labels) - iou_num
|
||||
return float((iou_num + epsilon) / (iou_denom + epsilon))
|
||||
|
||||
|
||||
# Calculate Dice Similarity Coefficient (DSC) for N-D Arrays
|
||||
def calculate_dsc(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
|
||||
epsilon = 1e-8
|
||||
dsc_num = np.sum(predictions[labels == 1]) * 2.0
|
||||
dsc_denom = np.sum(predictions) + np.sum(labels)
|
||||
return float((dsc_num + epsilon) / (dsc_denom + epsilon))
|
||||
|
||||
|
||||
# Resize images (scans/predictions/labels) by cropping and/or padding [Ref: https://github.com/DLTK/DLTK]
|
||||
def resize_image_with_crop_or_pad(image, img_size=(64, 64, 64), **kwargs):
|
||||
assert isinstance(image, np.ndarray)
|
||||
assert (image.ndim - 1 == len(img_size) or image.ndim == len(img_size)), \
|
||||
"Target size doesn't fit image size"
|
||||
|
||||
rank = len(img_size) # image dimensions
|
||||
|
||||
# placeholders for new shape
|
||||
from_indices = [[0, image.shape[dim]] for dim in range(rank)]
|
||||
to_padding = [[0, 0] for _ in range(rank)]
|
||||
slicer = [slice(None)] * rank
|
||||
|
||||
# for each dimension, determine process (cropping or padding)
|
||||
for i in range(rank):
|
||||
if image.shape[i] < img_size[i]:
|
||||
to_padding[i][0] = (img_size[i] - image.shape[i]) // 2
|
||||
to_padding[i][1] = img_size[i] - image.shape[i] - to_padding[i][0]
|
||||
else:
|
||||
from_indices[i][0] = int(np.floor((image.shape[i] - img_size[i]) / 2.))
|
||||
from_indices[i][1] = from_indices[i][0] + img_size[i]
|
||||
|
||||
# create slicer object to crop/leave each dimension
|
||||
slicer[i] = slice(from_indices[i][0], from_indices[i][1])
|
||||
|
||||
# pad cropped image to extend missing dimension
|
||||
return np.pad(image[tuple(slicer)], to_padding, **kwargs)
|
||||
|
||||
|
||||
# Parse Detection Maps to Individual Lesions + Likelihoods
|
||||
def parse_detection_map(detection_map):
|
||||
# Label All Non-Connected Components in Detection Map
|
||||
blobs_index, num_blobs = ndimage.label(detection_map, np.ones((3, 3, 3)))
|
||||
|
||||
confidences = []
|
||||
if num_blobs > 0:
|
||||
# For Each Lesion Detection
|
||||
for tumor_index in range(1, num_blobs+1):
|
||||
|
||||
# Extract Mask of Current Lesion
|
||||
# hard_blob = np.zeros_like(blobs_index)
|
||||
# hard_blob[blobs_index == tumor] = 1
|
||||
# TODO: replace above with the following? Is faster I think.
|
||||
# hard_blob = (blobs_index == tumor).astype(int)
|
||||
|
||||
# Extract Max Predicted Likelihood for Lesion Detection
|
||||
# max_prob = np.max(hard_blob) # <- this is always 1
|
||||
# hard_blob[hard_blob > 0] = max_prob # <- this line does nothing, as hard_blob is not used
|
||||
|
||||
# Store Predicted Likelihood per Lesion Detection
|
||||
max_prob = detection_map[blobs_index == tumor_index].max()
|
||||
confidences.append((tumor_index, max_prob))
|
||||
return confidences, blobs_index
|
||||
|
||||
############################### CONSTANTS ####################################
|
||||
# Quitin code removed
|
||||
############################### CONSTANTS ####################################
|
||||
|
||||
def move_dims(arr):
|
||||
# UMCG numpy dimensions convention: dims = (batch, width, heigth, depth)
|
||||
# Joeran numpy dimensions convention: dims = (batch, depth, heigth, width)
|
||||
arr = np.moveaxis(arr, 3, 1) # Joeran has his numpy arrays ordered differently.
|
||||
arr = np.moveaxis(arr, 3, 2)
|
||||
return arr
|
||||
|
||||
############################### CONSTANTS ####################################
|
||||
|
||||
|
||||
|
||||
|
||||
# # Reconstruction model directory
|
||||
# RECON_DIR = "11_new_recon_multiprocess"
|
||||
# T2_MODEL_PATH = f"models/prelims/{RECON_DIR}/best-direct-fold0.h5"
|
||||
|
||||
# # Path to file with train, validation and test indexes.
|
||||
# IDX_YAML_PATH = r"data/path_lists/pirads_4plus/train_val_test_idxs.yml"
|
||||
|
||||
# # Diagnostic models
|
||||
# diag_models = [r"models/14_long_diag/best-direct-fold0.h5",
|
||||
# r"models/15_long_diag_early_stopping/best-direct-fold0.h5"]
|
||||
|
||||
|
||||
# # x_true_val = np.load("temp/x_true_val.npy")
|
||||
# print("loading numpy arrays")
|
||||
# y_true_val = np.squeeze(np.load("temp/y_true_val_label.npy"))
|
||||
# y_pred_val = np.squeeze(np.load("temp/y_pred_val_detection_map.npy"))
|
||||
# y_true_val = move_dims(y_true_val)
|
||||
# y_pred_val = move_dims(y_pred_val)
|
||||
|
||||
# # Preprocess the blobs individually
|
||||
# for mri_idx in range(y_pred_val.shape[0]):
|
||||
# print(f"preprocessing mri idx {mri_idx}")
|
||||
# y_pred_val[mri_idx] = preprocess_softmax(y_pred_val[mri_idx], threshold="dynamic")[0]
|
||||
|
||||
# print(f"Start FROC evaluation.")
|
||||
# metrics = evaluate(y_true=y_true_val, y_pred=y_pred_val)
|
||||
|
||||
# dump_dict_to_yaml(metrics, 'temp', "froc_metrics", verbose=True)
|
|
@ -0,0 +1,39 @@
|
|||
# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import json
|
||||
|
||||
|
||||
def save_metrics(metrics, file_path=None):
|
||||
# convert dtypes to stock Python
|
||||
save_metrics = sterilize(metrics)
|
||||
|
||||
# save metrics using safe file write
|
||||
file_path_tmp = file_path + '.tmp'
|
||||
with open(file_path_tmp, 'w') as fp:
|
||||
json.dump(save_metrics, fp, indent=4)
|
||||
os.rename(file_path_tmp, file_path)
|
||||
|
||||
|
||||
def sterilize(obj):
|
||||
if isinstance(obj, dict):
|
||||
return {k: sterilize(v) for k, v in obj.items()}
|
||||
elif isinstance(obj, (list, tuple, np.ndarray)):
|
||||
return [sterilize(v) for v in obj]
|
||||
elif isinstance(obj, (str, int, bool, float)):
|
||||
return obj
|
||||
else:
|
||||
return obj.__repr__()
|
|
@ -0,0 +1,505 @@
|
|||
# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from scipy import ndimage
|
||||
from sklearn.metrics import roc_curve, auc
|
||||
import concurrent.futures
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
import itertools
|
||||
|
||||
from typing import List, Tuple, Dict, Any, Union, Optional, Callable, Iterable, Hashable, Sized
|
||||
try:
|
||||
import numpy.typing as npt
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
from image_utils import (
|
||||
resize_image_with_crop_or_pad, read_label, read_prediction
|
||||
)
|
||||
from analysis_utils import (
|
||||
parse_detection_map, calculate_iou, calculate_dsc
|
||||
)
|
||||
|
||||
|
||||
# Compute base prediction metrics TP/FP/FN with associated model confidences
|
||||
def evaluate_case(
|
||||
detection_map: "Union[npt.NDArray[np.float32], str]",
|
||||
label: "Union[npt.NDArray[np.int32], str]",
|
||||
min_overlap: float = 0.10,
|
||||
overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU',
|
||||
multiple_lesion_candidates_selection_criteria: str = 'overlap',
|
||||
allow_unmatched_candidates_with_minimal_overlap: bool = True
|
||||
) -> Tuple[List[Tuple[int, float, float]], int]:
|
||||
"""
|
||||
Gather the list of lesion candidates, and classify in TP/FP/FN.
|
||||
- multiple_lesion_candidates_selection_criteria: when multiple lesion candidates have overlap with the same
|
||||
ground truth mask, use 'overlap' or 'confidence' to choose
|
||||
which lesion is matched against the ground truth mask.
|
||||
|
||||
Returns:
|
||||
- a list of tuples with:
|
||||
(is_lesion, prediction confidence, Dice similarity coefficient, number of voxels in label)
|
||||
- number of ground truth lesions
|
||||
"""
|
||||
y_list: List[Tuple[int, float, float]] = []
|
||||
if isinstance(label, str):
|
||||
label = read_label(label)
|
||||
if isinstance(detection_map, str):
|
||||
detection_map = read_prediction(detection_map)
|
||||
if overlap_func == 'IoU':
|
||||
overlap_func = calculate_iou
|
||||
elif overlap_func == 'DSC':
|
||||
overlap_func = calculate_dsc
|
||||
else:
|
||||
raise ValueError(f"Overlap function with name {overlap_func} not recognized. Supported are 'IoU' and 'DSC'")
|
||||
|
||||
# convert dtype to float32
|
||||
label = label.astype('int32')
|
||||
detection_map = detection_map.astype('float32')
|
||||
|
||||
if detection_map.shape[0] < label.shape[0]:
|
||||
print("Warning: padding prediction to match label!")
|
||||
detection_map = resize_image_with_crop_or_pad(detection_map, label.shape)
|
||||
|
||||
confidences, indexed_pred = parse_detection_map(detection_map)
|
||||
|
||||
lesion_candidates_best_overlap: Dict[str, float] = {}
|
||||
|
||||
# note to stefan: check wether the if statements are correct and that the append goes correct
|
||||
if label.any():
|
||||
# for each malignant scan
|
||||
labeled_gt, num_gt_lesions = ndimage.label(label, np.ones((3, 3, 3)))
|
||||
# print("test3, werkt if label.any", num_gt_lesions) WE
|
||||
for lesiong_id in range(1, num_gt_lesions+1):
|
||||
# for each lesion in ground-truth (GT) label
|
||||
gt_lesion_mask = (labeled_gt == lesiong_id)
|
||||
|
||||
# collect indices of lesion candidates that have any overlap with the current GT lesion
|
||||
overlapping_lesion_candidate_indices = set(np.unique(indexed_pred[gt_lesion_mask]))
|
||||
overlapping_lesion_candidate_indices -= {0} # remove index 0, if present
|
||||
|
||||
# collect lesion candidates for current GT lesion
|
||||
lesion_candidates_for_target_gt: List[Dict[str, Union[int, float]]] = []
|
||||
for lesion_candidate_id, lesion_confidence in confidences:
|
||||
if lesion_candidate_id in overlapping_lesion_candidate_indices:
|
||||
# calculate overlap between lesion candidate and GT mask
|
||||
lesion_pred_mask = (indexed_pred == lesion_candidate_id)
|
||||
overlap_score = overlap_func(lesion_pred_mask, gt_lesion_mask)
|
||||
|
||||
# keep track of the highest overlap a lesion candidate has with any GT lesion
|
||||
lesion_candidates_best_overlap[lesion_candidate_id] = max(
|
||||
overlap_score, lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
|
||||
)
|
||||
|
||||
# store lesion candidate info for current GT mask
|
||||
lesion_candidates_for_target_gt.append({
|
||||
'id': lesion_candidate_id,
|
||||
'confidence': lesion_confidence,
|
||||
'overlap': overlap_score,
|
||||
})
|
||||
print("test 4, lesion_candidates_for_target_gt:",lesion_candidates_for_target_gt)
|
||||
# Min overlap wordt niet behaald: +- 0.001
|
||||
if len(lesion_candidates_for_target_gt) == 0:
|
||||
# no lesion candidate matched with GT mask. Add FN.
|
||||
y_list.append((1, 0., 0.))
|
||||
elif len(lesion_candidates_for_target_gt) == 1:
|
||||
# single lesion candidate overlapped with GT mask. Add TP if overlap is sufficient, or FN otherwise.
|
||||
candidate_info = lesion_candidates_for_target_gt[0]
|
||||
lesion_pred_mask = (indexed_pred == candidate_info['id'])
|
||||
|
||||
if candidate_info['overlap'] > min_overlap:
|
||||
# overlap between lesion candidate and GT mask is sufficient, add TP
|
||||
indexed_pred[lesion_pred_mask] = 0 # remove lesion candidate after assignment
|
||||
y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
|
||||
else:
|
||||
# overlap between lesion candidate and GT mask is insufficient, add FN
|
||||
y_list.append((1, 0., 0.))
|
||||
else:
|
||||
# multiple predictions for current GT lesion
|
||||
# sort lesion candidates based on overlap or confidence
|
||||
key = multiple_lesion_candidates_selection_criteria
|
||||
lesion_candidates_for_target_gt = sorted(lesion_candidates_for_target_gt, key=lambda x: x[key], reverse=True)
|
||||
|
||||
gt_lesion_matched = False
|
||||
for candidate_info in lesion_candidates_for_target_gt:
|
||||
lesion_pred_mask = (indexed_pred == candidate_info['id'])
|
||||
|
||||
if candidate_info['overlap'] > min_overlap:
|
||||
indexed_pred[lesion_pred_mask] = 0
|
||||
y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
|
||||
gt_lesion_matched = True
|
||||
break
|
||||
|
||||
if not gt_lesion_matched:
|
||||
# ground truth lesion not matched to a lesion candidate. Add FN.
|
||||
y_list.append((1, 0., 0.))
|
||||
|
||||
# Remaining lesions are FPs
|
||||
remaining_lesions = set(np.unique(indexed_pred))
|
||||
remaining_lesions -= {0} # remove index 0, if present
|
||||
for lesion_candidate_id, lesion_confidence in confidences:
|
||||
if lesion_candidate_id in remaining_lesions:
|
||||
overlap_score = lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
|
||||
if allow_unmatched_candidates_with_minimal_overlap and overlap_score > min_overlap:
|
||||
# The lesion candidate was not matched to a GT lesion, but did have overlap > min_overlap
|
||||
# with a GT lesion. The GT lesion is, however, matched to another lesion candidate.
|
||||
# In this operation mode, this lesion candidate is not considered as a false positive.
|
||||
pass
|
||||
else:
|
||||
y_list.append((0, lesion_confidence, 0.)) # add FP
|
||||
# print("test 4, gaat alles hiernaartoe?") == JA
|
||||
# print("test 3, hoe ziet y_list eruit na labels",y_list)
|
||||
|
||||
else:
|
||||
# for benign case, all predictions are FPs
|
||||
num_gt_lesions = 0
|
||||
if len(confidences) > 0:
|
||||
for _, lesion_confidence in confidences:
|
||||
y_list.append((0, lesion_confidence, 0.))
|
||||
else:
|
||||
y_list.append((0, 0., 0.)) # avoid empty list
|
||||
|
||||
return y_list, num_gt_lesions
|
||||
|
||||
|
||||
# Calculate macro metrics (true positives (TP), false positives (FP))
|
||||
def counts_from_lesion_evaluations(
|
||||
y_list: List[Tuple[int, float, float]],
|
||||
thresholds: "Optional[npt.NDArray[np.float64]]" = None
|
||||
) -> "Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32], npt.NDArray[np.float64], int]":
|
||||
"""
|
||||
Calculate true positives (TP) and false positives (FP) as function of threshold,
|
||||
based on the case evaluations from `evaluate_case`.
|
||||
"""
|
||||
# sort predictions
|
||||
# print("test 4, zitten er predicties bij leasions voor sort?",y_list)
|
||||
y_list.sort()
|
||||
# print("test 4, zitten er predicties bij leasions na sort?",y_list,'len y_lsit:',len(y_list))
|
||||
# collect targets and predictions
|
||||
y_true: "npt.NDArray[np.float64]" = np.array([target for target, *_ in y_list])
|
||||
y_pred: "npt.NDArray[np.float64]" = np.array([pred for _, pred, *_ in y_list])
|
||||
# print("test,zijn de laatste y-pred hoog?", y_pred)
|
||||
# calculate number of lesions
|
||||
num_lesions = y_true.sum()
|
||||
|
||||
if thresholds is None:
|
||||
# collect thresholds for FROC analysis
|
||||
thresholds = np.unique(y_pred)
|
||||
thresholds[::-1].sort() # sort thresholds in descending order (inplace)
|
||||
|
||||
# for >10,000 thresholds: resample to 10,000 unique thresholds, while also
|
||||
# keeping all thresholds higher than 0.8 and the first 20 thresholds
|
||||
if len(thresholds) > 10_000:
|
||||
rng = np.arange(1, len(thresholds), len(thresholds)/10_000, dtype=np.int32)
|
||||
st = [thresholds[i] for i in rng]
|
||||
low_thresholds = thresholds[-20:]
|
||||
thresholds = np.array([t for t in thresholds if t > 0.8 or t in st or t in low_thresholds])
|
||||
|
||||
# define placeholders
|
||||
FP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
|
||||
TP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
|
||||
|
||||
# for each threshold: count FPs and calculate the sensitivity
|
||||
for i, th in enumerate(thresholds):
|
||||
if th > 0:
|
||||
y_pred_thresholded = (y_pred >= th).astype(int)
|
||||
tp = np.sum(y_true*y_pred_thresholded)
|
||||
fp = np.sum(y_pred_thresholded - y_true*y_pred_thresholded)
|
||||
# print("test, is y_pred_thresholded altijd 0?",y_pred_thresholded)
|
||||
# update FROC with new point
|
||||
FP[i] = fp
|
||||
TP[i] = tp
|
||||
else:
|
||||
# extend FROC curve to infinity
|
||||
TP[i] = TP[-2]
|
||||
FP[i] = np.inf
|
||||
|
||||
|
||||
# print("test if tp werkt",TP)
|
||||
# print("test if fp werkt",FP)
|
||||
return TP, FP, thresholds, num_lesions
|
||||
|
||||
|
||||
# Calculate FROC metrics (FP rate, sensitivity)
|
||||
def froc_from_lesion_evaluations(y_list, num_patients, thresholds=None):
|
||||
# calculate counts
|
||||
TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
|
||||
y_list=y_list, thresholds=thresholds
|
||||
)
|
||||
|
||||
# calculate FROC metrics from counts
|
||||
sensitivity = TP / num_lesions if num_lesions > 0 else np.nan
|
||||
# print('test,Hieronder staat de tp waarde:',TP)
|
||||
# print('test,Hieronder staat de num_lesions waarde:',num_lesions)
|
||||
|
||||
FP_per_case = FP / num_patients
|
||||
|
||||
return sensitivity, FP_per_case, thresholds, num_lesions
|
||||
|
||||
|
||||
def ap_from_lesion_evaluations(y_list, thresholds=None):
|
||||
# calculate counts
|
||||
TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
|
||||
y_list=y_list, thresholds=thresholds
|
||||
)
|
||||
|
||||
# calculate precision (lesion-level)
|
||||
precision = TP / (TP + FP)
|
||||
precision = np.append(precision, [0])
|
||||
|
||||
# calculate recall (lesion-level)
|
||||
FN = num_lesions - TP
|
||||
recall = TP / (TP + FN)
|
||||
recall = np.append(recall, recall[-1:])
|
||||
|
||||
# calculate average precission (lesion-level)
|
||||
AP = np.trapz(y=precision, x=recall)
|
||||
|
||||
return AP, precision, recall, thresholds
|
||||
|
||||
|
||||
# Compute full FROC
|
||||
def froc(
|
||||
y_det: "Iterable[Union[npt.NDArray[np.float64], str, Path]]",
|
||||
y_true: "Iterable[Union[npt.NDArray[np.float64], str, Path]]",
|
||||
subject_list: Optional[Iterable[Hashable]] = None,
|
||||
min_overlap=0.10,
|
||||
overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU',
|
||||
case_confidence: str = 'max',
|
||||
multiple_lesion_candidates_selection_criteria='overlap',
|
||||
allow_unmatched_candidates_with_minimal_overlap=True,
|
||||
flat: Optional[bool] = None,
|
||||
num_parallel_calls: int = 8,
|
||||
verbose: int = 0,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
FROC evaluation pipeline
|
||||
(written 19 January 2022 by Joeran Bosma)
|
||||
|
||||
Usage:
|
||||
For normal usage of the FROC evaluation pipeline, use the function `froc` with parameters
|
||||
`y_det`, `y_true` and (optional) `subject_list`. Please note that this function is written
|
||||
for binary 3D FROC analysis.
|
||||
|
||||
- `y_det`: iterable of all detection_map volumes to evaluate. Alternatively, y_det may contain
|
||||
filenames ending in .nii.gz/.mha/.mhd/.npy/.npz, which will be loaded on-the-fly.
|
||||
Provide an array of shape `(num_samples, D, H, W)`, where D, H, W are the spatial
|
||||
dimensions (depth, height and width).
|
||||
- `y_true`: iterable of all ground truth labels. Alternatively, y_det may contain filenames
|
||||
ending in .nii.gz/.mha/.mhd/.npy/.npz, which should contain binary labels and
|
||||
will be loaded on-the-fly. Provide an array of the same shape as `y_det`. Use
|
||||
`1` to encode ground truth lesion, and `0` to encode background.
|
||||
|
||||
Additional settings:
|
||||
For more control over the FROC evaluation pipeline, use:
|
||||
- `min_overlap`: defines the minimal required Intersection over Union (IoU) or Dice similarity
|
||||
coefficient (DSC) between a lesion candidate and ground truth lesion, to be
|
||||
counted as a true positive detection.
|
||||
|
||||
"""
|
||||
# Initialize Lists
|
||||
roc_true = {}
|
||||
roc_pred = {}
|
||||
y_list = []
|
||||
num_lesions = 0
|
||||
|
||||
if subject_list is None:
|
||||
# generate indices to keep track of each case during multiprocessing
|
||||
subject_list = itertools.count()
|
||||
if flat is None:
|
||||
flat = True
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_parallel_calls) as pool:
|
||||
# define the functions that need to be processed: compute_pred_vector, with each individual
|
||||
# detection_map prediction, ground truth label and parameters
|
||||
future_to_args = {
|
||||
pool.submit(evaluate_case, y_pred, y_true, min_overlap=min_overlap, overlap_func=overlap_func,
|
||||
multiple_lesion_candidates_selection_criteria=multiple_lesion_candidates_selection_criteria,
|
||||
allow_unmatched_candidates_with_minimal_overlap=allow_unmatched_candidates_with_minimal_overlap): idx
|
||||
for (y_pred, y_true, idx) in zip(y_det, y_true, subject_list)
|
||||
}
|
||||
|
||||
# process the cases in parallel
|
||||
iterator = concurrent.futures.as_completed(future_to_args)
|
||||
if verbose:
|
||||
total: Optional[int] = None
|
||||
if isinstance(subject_list, Sized):
|
||||
total = len(subject_list)
|
||||
iterator = tqdm(iterator, desc='Computing FROC', total=total)
|
||||
for future in iterator:
|
||||
try:
|
||||
res = future.result()
|
||||
except Exception as e:
|
||||
print(f"Exception: {e}")
|
||||
else:
|
||||
# unpack results
|
||||
y_list_pat, num_lesions_gt = res
|
||||
# note: y_list_pat contains: is_lesion, confidence[, Dice, gt num voxels]
|
||||
# print("test 3,", y_list_pat)
|
||||
# aggregate results
|
||||
idx = future_to_args[future]
|
||||
# print("test2, indx", idx)
|
||||
# test: allemaal ingelezen
|
||||
roc_true[idx] = np.max([a[0] for a in y_list_pat])
|
||||
# print("test2, roc_true",roc_true)
|
||||
if case_confidence == 'max':
|
||||
# take highest lesion confidence as case-level confidence
|
||||
roc_pred[idx] = np.max([a[1] for a in y_list_pat])
|
||||
elif case_confidence == 'bayesian':
|
||||
# if a_i is the probability the i-th lesion is csPCa, then the case-level
|
||||
# probability to have any csPCa lesion is 1 - Π_i{ 1 - a_i}
|
||||
roc_pred[idx] = 1 - np.prod([(1-a[1]) for a in y_list_pat])
|
||||
else:
|
||||
raise ValueError(f"Patient confidence calculation method not recognised. Got: {case_confidence}.")
|
||||
|
||||
# accumulate outputs
|
||||
y_list += y_list_pat
|
||||
num_lesions += num_lesions_gt
|
||||
|
||||
# print("test2,heeft y-list ook leasie pred:",y_list)
|
||||
# calculate statistics
|
||||
num_patients = len(roc_true)
|
||||
|
||||
# get lesion-level results
|
||||
sensitivity, FP_per_case, thresholds, num_lesions = froc_from_lesion_evaluations(
|
||||
y_list=y_list, num_patients=num_patients
|
||||
)
|
||||
|
||||
# calculate recall, precision and average precision
|
||||
AP, precision, recall, _ = ap_from_lesion_evaluations(y_list, thresholds=thresholds)
|
||||
|
||||
# calculate case-level AUROC
|
||||
fpr, tpr, _ = roc_curve(y_true=[roc_true[s] for s in subject_list],
|
||||
y_score=[roc_pred[s] for s in subject_list],
|
||||
pos_label=1)
|
||||
auc_score = auc(fpr, tpr)
|
||||
|
||||
if flat:
|
||||
# flatten roc_true and roc_pred
|
||||
roc_true_flat = [roc_true[s] for s in subject_list]
|
||||
roc_pred_flat = [roc_pred[s] for s in subject_list]
|
||||
|
||||
metrics = {
|
||||
"FP_per_case": FP_per_case,
|
||||
"sensitivity": sensitivity,
|
||||
"thresholds": thresholds,
|
||||
"num_lesions": num_lesions,
|
||||
"num_patients": num_patients,
|
||||
"roc_true": (roc_true_flat if flat else roc_true),
|
||||
"roc_pred": (roc_pred_flat if flat else roc_pred),
|
||||
"AP": AP,
|
||||
"precision": precision,
|
||||
"recall": recall,
|
||||
|
||||
# patient-level predictions
|
||||
'auroc': auc_score,
|
||||
'tpr': tpr,
|
||||
'fpr': fpr,
|
||||
}
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def froc_for_folder(
|
||||
y_det_dir: Union[Path, str],
|
||||
y_true_dir: Optional[Union[Path, str]] = None,
|
||||
subject_list: Optional[List[str]] = None,
|
||||
min_overlap: float = 0.10,
|
||||
overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU',
|
||||
case_confidence: str = 'max',
|
||||
flat: Optional[bool] = None,
|
||||
num_parallel_calls: int = 8,
|
||||
verbose: int = 1
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Perform FROC evaluation for all samples found in y_det_dir, or the samples specified in the subject_list
|
||||
|
||||
Input:
|
||||
- y_det_dir: path to folder containing the detection maps
|
||||
- y_true_dir: (optioinal) allow labels to be stored in a different folder
|
||||
- min_overlap: minimum overlap threshold
|
||||
- overlap_func: intersection over union (IoU), Dice similarity coefficient (DSC), or custom function
|
||||
"""
|
||||
if y_true_dir is None:
|
||||
y_true_dir = y_det_dir
|
||||
|
||||
y_det = []
|
||||
y_true = []
|
||||
if subject_list:
|
||||
# collect the detection maps and labels for each case specified in the subject list
|
||||
for subject_id in subject_list:
|
||||
# construct paths to detection maps and labels
|
||||
# print(np.type(subject_id))
|
||||
# print(subject_list)
|
||||
for postfix in [
|
||||
"_detection_map.nii.gz", "_detection_map.npy", "_detection_map.npz",
|
||||
".nii.gz", ".npy", ".npz", "_pred.nii.gz",
|
||||
]:
|
||||
detection_path = os.path.join(y_det_dir, f"{subject_id}{postfix}")
|
||||
if os.path.exists(detection_path):
|
||||
break
|
||||
|
||||
for postfix in [
|
||||
"_label.nii.gz", "label.npy", "label.npz", "_seg.nii.gz",
|
||||
]:
|
||||
label_path = os.path.join(y_true_dir, f"{subject_id}{postfix}")
|
||||
if os.path.exists(label_path):
|
||||
break
|
||||
if not os.path.exists(label_path):
|
||||
assert y_true_dir != y_det_dir, f"Could not find label for {subject_id}!"
|
||||
for postfix in [
|
||||
".nii.gz", ".npy", ".npz",
|
||||
]:
|
||||
label_path = os.path.join(y_true_dir, f"{subject_id}{postfix}")
|
||||
if os.path.exists(label_path):
|
||||
break
|
||||
|
||||
# collect file paths
|
||||
y_det += [detection_path]
|
||||
y_true += [label_path]
|
||||
else:
|
||||
# collect all detection maps found in detection_map_dir
|
||||
file_list = sorted(os.listdir(y_det_dir))
|
||||
subject_list = []
|
||||
if verbose >= 1:
|
||||
print(f"Found {len(file_list)} files in the input directory, collecting detection_mapes with " +
|
||||
"_detection_map.nii.gz and labels with _label.nii.gz..")
|
||||
|
||||
# collect filenames of detection_map predictions and labels
|
||||
for fn in file_list:
|
||||
if '_detection_map' in fn:
|
||||
y_det += [os.path.join(y_det_dir, fn)]
|
||||
y_true += [os.path.join(y_true_dir, fn.replace('_detection_map', '_label'))]
|
||||
subject_list += [fn]
|
||||
|
||||
# ensure files exist
|
||||
for detection_path in y_det:
|
||||
assert os.path.exists(detection_path), f"Could not find detection map for {subject_id} at {detection_path}!"
|
||||
for label_path in y_true:
|
||||
assert os.path.exists(label_path), f"Could not find label for {subject_id} at {label_path}!"
|
||||
|
||||
if verbose >= 1:
|
||||
print(f"Found prediction and label for {len(y_det)} cases. Here are some examples:")
|
||||
print(subject_list[0:5])
|
||||
|
||||
# perform FROC evaluation with compiled file lists
|
||||
return froc(y_det=y_det, y_true=y_true, subject_list=subject_list,
|
||||
min_overlap=min_overlap, overlap_func=overlap_func, case_confidence=case_confidence,
|
||||
flat=flat, num_parallel_calls=num_parallel_calls, verbose=verbose)
|
|
@ -0,0 +1,83 @@
|
|||
# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import SimpleITK as sitk
|
||||
from pathlib import Path
|
||||
|
||||
from typing import Union
|
||||
try:
|
||||
import numpy.typing as npt
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# Resize images (scans/predictions/labels) by cropping and/or padding [Ref: https://github.com/DLTK/DLTK]
|
||||
def resize_image_with_crop_or_pad(image, img_size=(64, 64, 64), **kwargs):
|
||||
assert isinstance(image, np.ndarray)
|
||||
assert (image.ndim - 1 == len(img_size) or image.ndim == len(img_size)), \
|
||||
"Target size doesn't fit image size"
|
||||
|
||||
rank = len(img_size) # image dimensions
|
||||
|
||||
# placeholders for new shape
|
||||
from_indices = [[0, image.shape[dim]] for dim in range(rank)]
|
||||
to_padding = [[0, 0] for _ in range(rank)]
|
||||
slicer = [slice(None)] * rank
|
||||
|
||||
# for each dimension, determine process (cropping or padding)
|
||||
for i in range(rank):
|
||||
if image.shape[i] < img_size[i]:
|
||||
to_padding[i][0] = (img_size[i] - image.shape[i]) // 2
|
||||
to_padding[i][1] = img_size[i] - image.shape[i] - to_padding[i][0]
|
||||
else:
|
||||
from_indices[i][0] = int(np.floor((image.shape[i] - img_size[i]) / 2.))
|
||||
from_indices[i][1] = from_indices[i][0] + img_size[i]
|
||||
|
||||
# create slicer object to crop/leave each dimension
|
||||
slicer[i] = slice(from_indices[i][0], from_indices[i][1])
|
||||
|
||||
# pad cropped image to extend missing dimension
|
||||
return np.pad(image[tuple(slicer)], to_padding, **kwargs)
|
||||
|
||||
|
||||
def read_image(path: Union[Path, str]):
|
||||
"""Read image, given a filepath"""
|
||||
if isinstance(path, Path):
|
||||
path = path.as_posix()
|
||||
else:
|
||||
assert isinstance(path, str), f"Unexpected path type: {type(path)}. Please provide a Path or str."
|
||||
|
||||
if '.npy' in path:
|
||||
return np.load(path)
|
||||
elif '.nii' in path or '.mha' in path or 'mhd' in path:
|
||||
return sitk.GetArrayFromImage(sitk.ReadImage(path))
|
||||
elif '.npz' in path:
|
||||
return np.load(path)['softmax'].astype('float32')[1] # nnUnet format
|
||||
else:
|
||||
raise ValueError(f"Unexpected file path. Supported file formats: .nii(.gz), .mha, .npy and .npz. Got: {path}.")
|
||||
|
||||
|
||||
def read_prediction(path: Union[Path, str]) -> "npt.NDArray[np.float32]":
|
||||
"""Read prediction, given a filepath"""
|
||||
# read prediction and ensure correct dtype
|
||||
pred: "npt.NDArray[np.float32]" = np.array(read_image(path), dtype=np.float32)
|
||||
return pred
|
||||
|
||||
|
||||
def read_label(path: Union[Path, str]) -> "npt.NDArray[np.int32]":
|
||||
"""Read label, given a filepath"""
|
||||
# read label and ensure correct dtype
|
||||
lbl: "npt.NDArray[np.int32]" = np.array(read_image(path), dtype=np.int32)
|
||||
return lbl
|
|
@ -0,0 +1,139 @@
|
|||
# http://insightsoftwareconsortium.github.io/SimpleITK-Notebooks/Python_html/05_Results_Visualization.html
|
||||
|
||||
|
||||
from cgitb import grey
|
||||
import SimpleITK as sitk
|
||||
|
||||
def make_isotropic(image, interpolator=sitk.sitkLinear, spacing=None):
|
||||
"""
|
||||
Many file formats (e.g. jpg, png,...) expect the pixels to be isotropic, same
|
||||
spacing for all axes. Saving non-isotropic data in these formats will result in
|
||||
distorted images. This function makes an image isotropic via resampling, if needed.
|
||||
Args:
|
||||
image (SimpleITK.Image): Input image.
|
||||
interpolator: By default the function uses a linear interpolator. For
|
||||
label images one should use the sitkNearestNeighbor interpolator
|
||||
so as not to introduce non-existant labels.
|
||||
spacing (float): Desired spacing. If none given then use the smallest spacing from
|
||||
the original image.
|
||||
Returns:
|
||||
SimpleITK.Image with isotropic spacing which occupies the same region in space as
|
||||
the input image.
|
||||
"""
|
||||
original_spacing = image.GetSpacing()
|
||||
# Image is already isotropic, just return a copy.
|
||||
if all(spc == original_spacing[0] for spc in original_spacing):
|
||||
return sitk.Image(image)
|
||||
# Make image isotropic via resampling.
|
||||
original_size = image.GetSize()
|
||||
if spacing is None:
|
||||
spacing = min(original_spacing)
|
||||
new_spacing = [spacing] * image.GetDimension()
|
||||
new_size = [
|
||||
int(round(osz * ospc / spacing))
|
||||
for osz, ospc in zip(original_size, original_spacing)
|
||||
]
|
||||
return sitk.Resample(
|
||||
image,
|
||||
new_size,
|
||||
sitk.Transform(),
|
||||
interpolator,
|
||||
image.GetOrigin(),
|
||||
new_spacing,
|
||||
image.GetDirection(),
|
||||
0,
|
||||
image.GetPixelID(),
|
||||
)
|
||||
|
||||
def mask_image_multiply(mask, image):
|
||||
components_per_pixel = image.GetNumberOfComponentsPerPixel()
|
||||
if components_per_pixel == 1:
|
||||
return mask * image
|
||||
else:
|
||||
return sitk.Compose(
|
||||
[
|
||||
mask * sitk.VectorIndexSelectionCast(image, channel)
|
||||
for channel in range(components_per_pixel)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def alpha_blend(image1, image2, alpha=0.5, mask1=None, mask2=None):
|
||||
"""
|
||||
Alaph blend two images, pixels can be scalars or vectors.
|
||||
The alpha blending factor can be either a scalar or an image whose
|
||||
pixel type is sitkFloat32 and values are in [0,1].
|
||||
The region that is alpha blended is controled by the given masks.
|
||||
"""
|
||||
|
||||
if not mask1:
|
||||
mask1 = sitk.Image(image1.GetSize(), sitk.sitkFloat32) + 1.0
|
||||
mask1.CopyInformation(image1)
|
||||
else:
|
||||
mask1 = sitk.Cast(mask1, sitk.sitkFloat32)
|
||||
if not mask2:
|
||||
mask2 = sitk.Image(image2.GetSize(), sitk.sitkFloat32) + 1
|
||||
mask2.CopyInformation(image2)
|
||||
else:
|
||||
mask2 = sitk.Cast(mask2, sitk.sitkFloat32)
|
||||
# if we received a scalar, convert it to an image
|
||||
if type(alpha) != sitk.SimpleITK.Image:
|
||||
alpha = sitk.Image(image1.GetSize(), sitk.sitkFloat32) + alpha
|
||||
alpha.CopyInformation(image1)
|
||||
components_per_pixel = image1.GetNumberOfComponentsPerPixel()
|
||||
if components_per_pixel > 1:
|
||||
img1 = sitk.Cast(image1, sitk.sitkVectorFloat32)
|
||||
img2 = sitk.Cast(image2, sitk.sitkVectorFloat32)
|
||||
else:
|
||||
img1 = sitk.Cast(image1, sitk.sitkFloat32)
|
||||
img2 = sitk.Cast(image2, sitk.sitkFloat32)
|
||||
|
||||
intersection_mask = mask1 * mask2
|
||||
|
||||
intersection_image = mask_image_multiply(
|
||||
alpha * intersection_mask, img1
|
||||
) + mask_image_multiply((1 - alpha) * intersection_mask, img2)
|
||||
return (
|
||||
intersection_image
|
||||
+ mask_image_multiply(mask2 - intersection_mask, img2)
|
||||
+ mask_image_multiply(mask1 - intersection_mask, img1)
|
||||
)
|
||||
|
||||
|
||||
#Dictionary with functions mapping a scalar image to a three component vector image
|
||||
image_mappings = {'grey': lambda x: sitk.Compose([x]*3),
|
||||
'jet' : lambda x: sitk.ScalarToRGBColormap(x, sitk.ScalarToRGBColormapImageFilter.Jet),
|
||||
'hot' : lambda x: sitk.ScalarToRGBColormap(x, sitk.ScalarToRGBColormapImageFilter.Hot),
|
||||
'winter' : lambda x: sitk.ScalarToRGBColormap(x, sitk.ScalarToRGBColormapImageFilter.Winter)
|
||||
}
|
||||
|
||||
|
||||
|
||||
image = sitk.ReadImage('../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_pred.nii')
|
||||
segmentation = sitk.ReadImage('../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_seg.nii')
|
||||
|
||||
#Make the images isotropic so that when we save a slice along any axis
|
||||
#it will be fine (most image formats assume isotropic pixel sizes).
|
||||
#Segmentation is interpolated with nearest neighbor so we don't introduce new
|
||||
#labels.
|
||||
image = make_isotropic(image, interpolator = sitk.sitkLinear)
|
||||
segmentation = make_isotropic(segmentation, interpolator = sitk.sitkNearestNeighbor)
|
||||
|
||||
#Convert image to sitkUInt8 after rescaling, color image formats only work for [0,255]
|
||||
image_255 = sitk.Cast(sitk.RescaleIntensity(image, 0, 255), sitk.sitkUInt8)
|
||||
segmentation_255 = sitk.Cast(sitk.RescaleIntensity(segmentation, 0, 255), sitk.sitkUInt8)
|
||||
|
||||
|
||||
colormap = 'hot'
|
||||
vec_image = image_mappings['hot'](image_255)
|
||||
vec_image_label = image_mappings['winter'](image_255)
|
||||
|
||||
# vec_segmentation = sitk.ScalarToRGBColormap(segmentation, sitk.ScalarToRGBColormapImageFilter.Winter)
|
||||
# vec_segmentation_2 = sitk.LabelToRGB(segmentation_255,colormap=[255,0,0])
|
||||
|
||||
vec_combined = sitk.Cast(alpha_blend(image1=vec_image, image2=vec_image_label, alpha=0.5, mask2=segmentation==1), sitk.sitkVectorUInt8)
|
||||
|
||||
|
||||
sitk.WriteImage(vec_image, '../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_vec_image.nii.gz')
|
||||
sitk.WriteImage(vec_image_label, '../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_vec_image_label.nii.gz')
|
||||
sitk.WriteImage(vec_combined, '../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_vec_com.nii.gz')
|
|
@ -0,0 +1,59 @@
|
|||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import glob
|
||||
import argparse
|
||||
import os
|
||||
|
||||
#create parser
|
||||
def parse_input_args():
|
||||
parser = argparse.ArgumentParser(description='Parse arguments for training a Reconstruction model')
|
||||
|
||||
parser.add_argument('train_out_dir',
|
||||
type=str,
|
||||
help='Directory name in train_output dir of the desired experiment folder. There should be a .csv file in this directory with train statistics.')
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
args = parse_input_args()
|
||||
|
||||
print(f"Plotting {args}")
|
||||
|
||||
# find csv file
|
||||
# csv = glob.glob(f"train_output/{args.train_out_dir}/*.csv")[0]
|
||||
folder_input = args.train_out_dir
|
||||
|
||||
# load csv file
|
||||
df = pd.read_csv(f'{folder_input}')
|
||||
|
||||
# read csv file
|
||||
for metric in df:
|
||||
if not metric == 'epoch':
|
||||
# if metric == 'loss' or metric == 'val_loss':
|
||||
plt.plot(df['epoch'], df[metric], label=metric)
|
||||
# plt.ylim(ymin=0,ymax=0.01)
|
||||
|
||||
|
||||
|
||||
folder, csvfile = os.path.split(args.train_out_dir)
|
||||
root, experiment = os.path.split(os.path.split(folder)[0])
|
||||
|
||||
plt.title(experiment)
|
||||
plt.xlabel('Epoch')
|
||||
plt.ylabel('Loss')
|
||||
plt.grid()
|
||||
plt.legend()
|
||||
plt.savefig(f"{folder}/{experiment}.png")
|
||||
plt.clf()
|
||||
plt.close()
|
||||
print(folder+".png")
|
||||
print(f"\nsaved figure to {folder}")
|
||||
|
||||
# van yaml inladen 'loss'
|
||||
# vanuit utils > dict to yaml
|
||||
# csv viewer extension
|
||||
# course .venv/bin/activate
|
||||
# loop over alles en if metric then 1-metric
|
||||
# kleur codering color=[]
|
||||
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
#X:\datasets\radboud_lesions_2022
|
||||
#/data/pca-rad/experiments/fix-radboud-data/export/pat0777-2016.nii.gz
|
||||
|
||||
#'experiments/fix-radboud-data/export'
|
||||
#'datasets/radboud_lesions_2022'
|
||||
|
||||
########### Read in the file ##############
|
||||
# with open('./../data/Nijmegen paths/t2.txt', 'r') as file :
|
||||
# filedata = file.read()
|
||||
|
||||
# filedata = filedata.replace('/diffusie_cro/b-1400','/t2_tse_tra')
|
||||
# import numpy as np
|
||||
# print(np.shape(filedata))
|
||||
|
||||
############# check for missing files ############
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
serie = 'b2000'
|
||||
file1 = f'./../data/Nijmegen paths/{serie}.txt'
|
||||
file2 = f'./../data/Test paths/{serie}.txt'
|
||||
file3 = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_dwi.txt'
|
||||
# X:\qvlohuizen\fastMRI_PCa\data\path_lists\pirads_4plus
|
||||
with open(file3) as fp:
|
||||
data = fp.readlines()
|
||||
|
||||
UMCG = [i for i, s in enumerate(data) if '-U-' in s]
|
||||
Martini = [i for i, s in enumerate(data) if '-M-' in s]
|
||||
patient_id = [os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(data[index]))))[-9:-6] for index in Martini]
|
||||
patient_id = [item.replace("/", "") for item in patient_id]
|
||||
patient_id = [item.replace("y", "") for item in patient_id]
|
||||
patient_id.sort()
|
||||
Martini_data = [data[index] for index in Martini]
|
||||
|
||||
Martini_Diffusie = [i for i, s in enumerate(Martini_data) if not 'Diffusie' in s]
|
||||
|
||||
print(patient_id)
|
||||
print(np.shape(Martini_data))
|
||||
print(len(Martini_Diffusie))
|
||||
print([Martini_data[index] for index in Martini_Diffusie])
|
||||
quit()
|
||||
############# merge radboud and umcg files ############
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
serie = 'b2000'
|
||||
file1 = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_dwi.txt'
|
||||
# X:\qvlohuizen\fastMRI_PCa\data\path_lists\pirads_4plus
|
||||
with open(file1) as fp:
|
||||
data = fp.readlines()
|
||||
# Reading data from file2
|
||||
with open(file2) as fp:
|
||||
data2 = fp.read()
|
||||
|
||||
# Merging 2 files
|
||||
# To add the data of file2
|
||||
# from next line
|
||||
data += "\n"
|
||||
data += data2
|
||||
|
||||
with open (f'{serie}.txt', 'w') as fp:
|
||||
fp.write(data)
|
||||
|
||||
quit()
|
||||
############ find more martini data ##############
|
||||
import numpy as np
|
||||
|
||||
high_b_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_dwi.txt'
|
||||
adc_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_adc.txt'
|
||||
t2_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_t2w.txt'
|
||||
seg_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/seg_new.txt'
|
||||
|
||||
with open(high_b_data, 'r') as file :
|
||||
high_b_data = file.readlines()
|
||||
with open(adc_data, 'r') as file :
|
||||
adc_data = file.readlines()
|
||||
with open(t2_data, 'r') as file :
|
||||
t2_data = file.readlines()
|
||||
with open(seg_data, 'r') as file :
|
||||
seg_data = file.readlines()
|
||||
|
||||
|
||||
indices = [i for i, s in enumerate(high_b_data) if 'Diffusie_b0-50-400-800-2000' in s]
|
||||
|
||||
b_2000 = [high_b_data[index] for index in indices]
|
||||
b_50 = [item.replace("b-2000", "b-50") for item in b_2000]
|
||||
b_400 = [item.replace("b-2000", "b-400") for item in b_2000]
|
||||
b_800 = [item.replace("b-2000", "b-800") for item in b_2000]
|
||||
adc = [adc_data[index] for index in indices]
|
||||
t2 = [t2_data[index] for index in indices]
|
||||
seg = [seg_data[index] for index in indices]
|
||||
|
||||
with open('b50.txt', 'w') as f:
|
||||
f.writelines(b_50)
|
||||
with open('b400.txt', 'w') as f:
|
||||
f.writelines(b_400)
|
||||
with open('b800.txt', 'w') as f:
|
||||
f.writelines(b_800)
|
||||
with open('b2000.txt', 'w') as f:
|
||||
f.writelines(b_2000)
|
||||
with open('adc.txt', 'w') as f:
|
||||
f.writelines(adc)
|
||||
with open('t2.txt', 'w') as f:
|
||||
f.writelines(t2)
|
||||
with open('seg.txt', 'w') as f:
|
||||
f.writelines(seg)
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
def dump_dict_to_yaml(
|
||||
data: dict,
|
||||
target_dir: str,
|
||||
filename: str = "settings") -> None:
|
||||
""" Writes the given dictionary as a yaml to the target directory.
|
||||
|
||||
Parameters:
|
||||
`data (dict)`: dictionary of data.
|
||||
`target_dir (str)`: directory where the yaml will be saved.
|
||||
`` filename (str)`: name of the file without extension
|
||||
"""
|
||||
print("\nParameters")
|
||||
for pair in data.items():
|
||||
print(f"\t{pair}")
|
||||
print()
|
||||
|
||||
path = f"{target_dir}/{filename}.yml"
|
||||
print_p(f"Wrote yaml to: {path}")
|
||||
with open(path, 'w') as outfile:
|
||||
yaml.dump(data, outfile, default_flow_style=False)
|
|
@ -0,0 +1,75 @@
|
|||
# A script with returns the prediction with labels overlay and stores them in a new folder named "report"
|
||||
|
||||
#modules
|
||||
from preprocessing_function import preprocess
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from helpers import *
|
||||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
|
||||
|
||||
#model load
|
||||
Model_path = '..'
|
||||
reconstructed_model = tf.keras.models.load_model(Model_path)
|
||||
reconstructed_model.compile(loss='',
|
||||
optimizer='',
|
||||
metrics=[''])
|
||||
|
||||
####image load
|
||||
|
||||
# arg.series = b-50 b-400 etc.
|
||||
# DATA_DIR = data dir
|
||||
# IMAGE_SHAPE =
|
||||
# TARGET_SPACING =
|
||||
|
||||
images, image_paths = {s: [] for s in args.series}, {}
|
||||
segmentations = []
|
||||
print_(f"> Loading images into RAM...")
|
||||
|
||||
# Read the image paths from the data directory.
|
||||
# Texts files are expected to have the name "[series_name].txt"
|
||||
for s in args.series:
|
||||
with open(path.join(DATA_DIR, f"{s}.txt"), 'r') as f:
|
||||
image_paths[s] = [l.strip() for l in f.readlines()]
|
||||
with open(path.join(DATA_DIR, f"seg.txt"), 'r') as f:
|
||||
seg_paths = [l.strip() for l in f.readlines()]
|
||||
num_images = len(seg_paths)
|
||||
|
||||
# Read and preprocess each of the paths for each series, and the segmentations.
|
||||
for img_idx in tqdm(range(num_images)): #[:40] for less images
|
||||
img_s = {s: sitk.ReadImage(image_paths[s][img_idx], sitk.sitkFloat32)
|
||||
for s in args.series}
|
||||
seg_s = sitk.ReadImage(seg_paths[img_idx], sitk.sitkFloat32)
|
||||
img_n, seg_n = preprocess(img_s, seg_s,
|
||||
shape=IMAGE_SHAPE, spacing=TARGET_SPACING)
|
||||
for seq in img_n:
|
||||
images[seq].append(img_n[seq])
|
||||
segmentations.append(seg_n)
|
||||
|
||||
self._images = np.stack(
|
||||
[np.stack(self.images[k], axis=0) for k in sequences], axis=-1)
|
||||
|
||||
#image predict
|
||||
predictions = reconstructed_model.predict(images[idx])
|
||||
|
||||
#heatmap + input image
|
||||
|
||||
#save in folder output
|
||||
|
||||
# Export scan crops and targets once
|
||||
# they don't change during training so we export them only once
|
||||
for i in range(min(self.num_images, self.validation_set[0].shape[0])):
|
||||
for s_idx, s in enumerate(sequences):
|
||||
img_s = sitk.GetImageFromArray(
|
||||
self.validation_set[0][i][..., s_idx].squeeze().T)
|
||||
sitk.WriteImage(img_s, f"{prefix}_{i:03d}_{s}.nii.gz")
|
||||
seg_s = sitk.GetImageFromArray(
|
||||
self.validation_set[1][i].squeeze().T)
|
||||
sitk.WriteImage(seg_s, f"{prefix}_{i:03d}_seg.nii.gz")
|
||||
|
||||
prd_s = sitk.GetImageFromArray(predictions[i].squeeze().T)
|
||||
prd_bin_s = sitk.GetImageFromArray(
|
||||
np.around(predictions[i]).astype(np.float32).squeeze().T)
|
||||
sitk.WriteImage(prd_s, f"{self.prefix}_{i:03d}_pred.nii.gz")
|
||||
sitk.WriteImage(prd_bin_s, f"{self.prefix}_{i:03d}_pred_bin.nii.gz")
|
|
@ -0,0 +1,82 @@
|
|||
import yaml
|
||||
from typing import List
|
||||
from collections import deque
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import SimpleITK as sitk
|
||||
|
||||
def print_p(text, end="\n"):
|
||||
""" Print function for on Peregrine. It needs a flush before printing. """
|
||||
print(text, flush=True, end=end)
|
||||
|
||||
def list_from_file(path: str) -> List:
|
||||
""" Returns a list of all items on each line of the text file referenced in
|
||||
the given path
|
||||
|
||||
Parameters:
|
||||
`path (str)`: path to the text file
|
||||
"""
|
||||
print_p(path)
|
||||
return [line.strip() for line in open(path, "r")]
|
||||
|
||||
|
||||
def dump_dict_to_yaml(
|
||||
data: dict,
|
||||
target_dir: str,
|
||||
filename: str = "settings",
|
||||
verbose: bool = True) -> None:
|
||||
""" Writes the given dictionary as a yaml to the target directory.
|
||||
|
||||
Parameters:
|
||||
`data (dict)`: dictionary of data.
|
||||
`target_dir (str)`: directory where the yaml will be saved.
|
||||
`` filename (str)`: name of the file without extension
|
||||
"""
|
||||
if verbose:
|
||||
print("\nParameters")
|
||||
for pair in data.items():
|
||||
print(f"\t{pair}")
|
||||
print()
|
||||
|
||||
path = f"{target_dir}/{filename}.yml"
|
||||
print_p(f"Wrote yaml to: {path}")
|
||||
with open(path, 'w') as outfile:
|
||||
yaml.dump(data, outfile, default_flow_style=False)
|
||||
|
||||
|
||||
def read_yaml_to_dict(path: str) -> dict:
|
||||
with open(path, 'r') as stream:
|
||||
try:
|
||||
parsed_yaml=yaml.safe_load(stream)
|
||||
# print(parsed_yaml)
|
||||
return parsed_yaml
|
||||
except yaml.YAMLError as exc:
|
||||
print(exc)
|
||||
|
||||
|
||||
def create_dirs_if_not_exists(dirs: List[str]) -> None:
|
||||
""" Creates the list of supplied directories if they don't exist yet.
|
||||
|
||||
Parameters:
|
||||
`dirs (List[str]) or str`: list of strings representing the directories that
|
||||
need to be created
|
||||
"""
|
||||
|
||||
if isinstance(dirs, str):
|
||||
dirs = [dirs]
|
||||
|
||||
for folder in dirs:
|
||||
Path(folder).mkdir(parents=True, exist_ok=True)
|
||||
print_p(f"Created directory {folder} or it already existed.")
|
||||
|
||||
|
||||
def save_np_to_nifti(data_np: np.ndarray,
|
||||
target_dir: str,
|
||||
fname: str,
|
||||
target_space: List[float] = [0.5, 0.5, 3.0]):
|
||||
|
||||
img_s = sitk.GetImageFromArray(data_np.T)
|
||||
img_s.SetSpacing(target_space)
|
||||
path = f"{target_dir}/{fname}.nii.gz"
|
||||
sitk.WriteImage(img_s, path)
|
||||
print_p(f"Saved numpy array to nifti: {path}")
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,91 @@
|
|||
import glob
|
||||
import csv
|
||||
from sqlite3 import Error
|
||||
import sqlite3
|
||||
|
||||
|
||||
def create_connection(db_file):
|
||||
""" create a database connection to the SQLite database
|
||||
specified by the db_file
|
||||
:param db_file: database file
|
||||
:return: Connection object or None
|
||||
"""
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_file)
|
||||
except Error as e:
|
||||
print(e)
|
||||
|
||||
return conn
|
||||
|
||||
## get pat_id, birthdate, PSA, PIRADS,
|
||||
conn = create_connection('./db/master.db')
|
||||
cur = conn.cursor()
|
||||
query_pirads = f"""SELECT visits.patientID, visits.StudyDate, rois.roiID FROM biopsies
|
||||
INNER JOIN rois ON biopsies.target_visitID = rois.StudyInstanceUID AND biopsies.target_roi = rois.roiID
|
||||
INNER JOIN visits ON biopsies.target_visitID = visits.StudyInstanceUID
|
||||
WHERE pirads != "NA" AND (pirads > 3);"""
|
||||
result_pirads = cur.execute(query_pirads).fetchall() #list of tuples
|
||||
dates = [f"{date[1][:4]}-{date[1][4:6]}-{date[1][6:8]}" for date in result_pirads]
|
||||
pirads_paths = [path.join(x[0],y,x[2]) for x,y in zip(result_pirads,dates) ]
|
||||
|
||||
query_gleason = f"""SELECT visits.patientID, visits.StudyDate, rois.roiID FROM biopsies
|
||||
INNER JOIN rois ON biopsies.target_visitID = rois.StudyInstanceUID AND biopsies.target_roi = rois.roiID
|
||||
INNER JOIN visits ON biopsies.target_visitID = visits.StudyInstanceUID
|
||||
WHERE gleason_1 != "NA" AND (gleason_1 > 3 OR gleason_2 > 3);"""
|
||||
result_gleason = cur.execute(query_gleason).fetchall() #list of tuples
|
||||
dates = [f"{date[1][:4]}-{date[1][4:6]}-{date[1][6:8]}" for date in result_gleason]
|
||||
gleason_paths = [path.join(x[0],y,x[2]) for x,y in zip(result_gleason,dates) ]
|
||||
|
||||
print("Looking for visit folders containing niftis in ./nifti")
|
||||
niftis = glob("./nifti/**/*.nii.gz", recursive=True)
|
||||
folders = list(set([path.dirname(f) for f in niftis]))
|
||||
|
||||
print("Found", len(folders), "folders")
|
||||
|
||||
|
||||
# df_list = []
|
||||
# ROOT_DIR = '../../../datasets/anonymized_mri/only_nii_directory/*/*'
|
||||
# x = glob.glob(ROOT_DIR)
|
||||
|
||||
# for patient_dir in sorted(x):
|
||||
# T2 = []
|
||||
# adc = []
|
||||
# highb = []
|
||||
# patient_id = []
|
||||
# age_year = []
|
||||
|
||||
# patient_id = patient_dir.split('/')[-2]
|
||||
# age_year = patient_dir.split('/')[-1]
|
||||
# for path in sorted(glob.glob(f'../../../datasets/anonymized_mri/only_nii_directory/{patient_dir.split("/")[-2]}/{patient_dir.split("/")[-1]}/*/*')):
|
||||
# if ('T2' in path or 't2' in path) and ('tra' in path or 'TRA' in path) and not ('seg' in path):
|
||||
# T2 = path
|
||||
# for path in sorted(glob.glob(f'../../../datasets/anonymized_mri/only_nii_directory/{patient_dir.split("/")[-2]}/{patient_dir.split("/")[-1]}/*')):
|
||||
# if 'manual_adc' in path:
|
||||
# manual_adc = path
|
||||
# else:
|
||||
# if 'ADC' in path and 'nii.gz' in path:
|
||||
# adc = path
|
||||
# print(path)
|
||||
|
||||
# if 'manual_b-1400' in path:
|
||||
# highb1400 = path
|
||||
# else:
|
||||
# if 'b-2000' in path and 'nii.gz' in path:
|
||||
# highb = path
|
||||
# print(path)
|
||||
|
||||
# if manual_adc:
|
||||
# adc = manual_adc
|
||||
# if highb1400:
|
||||
# highb = highb1400
|
||||
|
||||
# df_list.append({'patient_id':patient_id,'age_year':age_year,'t2':T2,'adc':adc,'high_b':highb})
|
||||
# if not T2 or not adc or not highb:
|
||||
# input(f'{patient_id} {age_year} {T2} {adc} {highb}')
|
||||
|
||||
# with open('csv_file.csv', 'w') as csvfile:
|
||||
# writer = csv.DictWriter(csvfile, fieldnames=['patient_id','age_year','t2','adc','high_b'],delimiter=';')
|
||||
# writer.writeheader()
|
||||
# for data in df_list:
|
||||
# writer.writerow(data)
|
|
@ -0,0 +1,48 @@
|
|||
import glob
|
||||
import csv
|
||||
|
||||
df_list = []
|
||||
ROOT_DIR = '../../../datasets/anonymized_mri/only_nii_directory/*/*'
|
||||
x = glob.glob(ROOT_DIR)
|
||||
|
||||
for patient_dir in sorted(x):
|
||||
T2 = []
|
||||
adc = []
|
||||
highb = []
|
||||
patient_id = []
|
||||
age_year = []
|
||||
|
||||
patient_id = patient_dir.split('/')[-2]
|
||||
age_year = patient_dir.split('/')[-1]
|
||||
for path in sorted(glob.glob(f'../../../datasets/anonymized_mri/only_nii_directory/{patient_dir.split("/")[-2]}/{patient_dir.split("/")[-1]}/*/*')):
|
||||
if ('T2' in path or 't2' in path) and ('tra' in path or 'TRA' in path) and not ('seg' in path):
|
||||
T2 = path
|
||||
for path in sorted(glob.glob(f'../../../datasets/anonymized_mri/only_nii_directory/{patient_dir.split("/")[-2]}/{patient_dir.split("/")[-1]}/*')):
|
||||
if 'manual_adc' in path:
|
||||
manual_adc = path
|
||||
else:
|
||||
if 'ADC' in path and 'nii.gz' in path:
|
||||
adc = path
|
||||
print(path)
|
||||
|
||||
if 'manual_b-1400' in path:
|
||||
highb1400 = path
|
||||
else:
|
||||
if 'b-2000' in path and 'nii.gz' in path:
|
||||
highb = path
|
||||
print(path)
|
||||
|
||||
if manual_adc:
|
||||
adc = manual_adc
|
||||
if highb1400:
|
||||
highb = highb1400
|
||||
|
||||
df_list.append({'patient_id':patient_id,'age_year':age_year,'t2':T2,'adc':adc,'high_b':highb})
|
||||
if not T2 or not adc or not highb:
|
||||
input(f'{patient_id} {age_year} {T2} {adc} {highb}')
|
||||
|
||||
with open('csv_file.csv', 'w') as csvfile:
|
||||
writer = csv.DictWriter(csvfile, fieldnames=['patient_id','age_year','t2','adc','high_b'],delimiter=';')
|
||||
writer.writeheader()
|
||||
for data in df_list:
|
||||
writer.writerow(data)
|
Loading…
Reference in New Issue