commit before migration to habrok

This commit is contained in:
Stefan 2023-03-29 13:05:32 +02:00
parent 9468dadfa3
commit ad595bb25e
24 changed files with 4098 additions and 1 deletions

1
.gitignore vendored
View File

@ -139,7 +139,6 @@ dmypy.json
cython_debug/
/build/
/code/
/old_code/
/data/
/job_scripts/

6
code/DWI_exp/__init__.py Executable file
View File

@ -0,0 +1,6 @@
print('verkeerde code')
from .batchgenerator import *
from .callbacks import *
from .helpers import *
from .preprocessing_function import *
from .unet import *

64
code/DWI_exp/batchgenerator.py Executable file
View File

@ -0,0 +1,64 @@
import math
from typing import Callable, Dict, List, Optional, Tuple
import numpy as np
from tensorflow.keras.utils import Sequence
class BatchGenerator(Sequence):
def __init__(self,
images: Dict[str, List[np.ndarray]],
segmentations: List[np.ndarray],
sequences: List[str],
shape: Tuple[int],
indexes: List[int] = None,
batch_size: int = 5,
shuffle: bool = False,
augmentation_function: Optional[Callable\
[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]] = None):
self.images = images
self.segmentations = segmentations
self.sequences = sequences
self.indexes = indexes
self.batch_size = batch_size
self.shuffle = shuffle
self.augmentation = augmentation_function
self.num_rows = len(images)
if self.indexes == None:
self.indexes = list(range(self.num_rows))
if type(indexes) == int:
self.indexes = list(range(self.indexes))
if self.batch_size == None:
self.batch_size = len(self.indexes)
# Stack image index as index 0, stack sequences as index -1
self._images = np.stack(
[np.stack(self.images[k], axis=0) for k in sequences], axis=-1)
self._segmentations = np.stack(segmentations, axis=0)[..., np.newaxis]
def __len__(self):
return math.ceil(len(self.indexes) / self.batch_size)
def __getitem__(self, batch_idx):
# Get image / segmentation indexes for current batch
indexes = self.indexes[batch_idx*self.batch_size:(1+batch_idx)*self.batch_size]
# Get matching images and segmentations
images = self._images[indexes]
segmentations = self._segmentations[indexes]
# Apply data augmentation if a function is provided
if self.augmentation:
for img_idx in range(len(indexes)):
images[img_idx], segmentations[img_idx] = self.augmentation(
images[img_idx], segmentations[img_idx])
return images, segmentations
def on_epoch_end(self):
super().on_epoch_end()
if self.shuffle:
np.random.shuffle(self.indexes)

96
code/DWI_exp/callbacks.py Executable file
View File

@ -0,0 +1,96 @@
import numpy as np
import SimpleITK as sitk
from helpers import *
import tensorflow.keras.backend as K
import tensorflow as tf
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import roc_auc_score, roc_curve
def dice_coef(y_true, y_pred):
y_true_f = K.flatten(y_true)
y_pred_f = K.round(K.flatten(y_pred))
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + K.epsilon()) / (
K.sum(y_true_f) + K.sum(y_pred_f) + K.epsilon())
# def auc_value(y_true, y_pred):
# # print_("start ROC_AUC")
# # y_true_old = self.validation_set[1].squeeze()
# # y_pred_old = np.around(self.model.predict(self.validation_set[0],batch_size=1).squeeze())
# y_true_auc = K.flatten(y_true)
# # print_(f"The shape of y_true = {np.shape(y_true)}" )
# y_pred_auc = K.round(K.flatten(y_pred))
# # print_(f"The shape of y_pred = {np.shape(y_pred)}" )
# # fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1)
# auc = roc_auc_score(y_true_auc, y_pred_auc)
# print_('AUC:', auc)
# print_('AUC shape:', np.shape(auc))
# # print_(f'ROC: fpr={fpr} , tpr={tpr}, thresholds={thresholds}')
# return auc
class IntermediateImages(Callback):
def __init__(self, validation_set, prefix, sequences,
num_images=10):
self.prefix = prefix
self.num_images = num_images
self.validation_set = (
validation_set[0][:num_images, ...],
validation_set[1][:num_images, ...]
)
# Export scan crops and targets once
# they don't change during training so we export them only once
for i in range(min(self.num_images, self.validation_set[0].shape[0])):
for s_idx, s in enumerate(sequences):
img_s = sitk.GetImageFromArray(
self.validation_set[0][i][..., s_idx].squeeze().T)
sitk.WriteImage(img_s, f"{prefix}_{i:03d}_{s}.nii.gz")
seg_s = sitk.GetImageFromArray(
self.validation_set[1][i].squeeze().T)
sitk.WriteImage(seg_s, f"{prefix}_{i:03d}_seg.nii.gz")
def on_epoch_end(self, epoch, logs={}):
# Predict on the validation_set
predictions = self.model.predict(self.validation_set, batch_size=1)
# print_("start ROC_AUC")
# y_true_old = self.validation_set[1].squeeze()
# y_pred_old = np.around(self.model.predict(self.validation_set[0],batch_size=1).squeeze())
# y_true = np.array(y_true_old).flatten()
# print_(f"The shape of y_true = {np.shape(y_true)}" )
# y_pred = np.array(y_pred_old).flatten()
# print_(f"The shape of y_pred = {np.shape(y_pred)}" )
# fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1)
# auc = roc_auc_score(y_true, y_pred)
# print_('AUC:', auc)
# print_(f'ROC: fpr={fpr} , tpr={tpr}, thresholds={thresholds}')
for i in range(min(self.num_images, self.validation_set[0].shape[0])):
prd_s = sitk.GetImageFromArray(predictions[i].squeeze().T)
prd_bin_s = sitk.GetImageFromArray(
np.around(predictions[i]).astype(np.float32).squeeze().T)
sitk.WriteImage(prd_s, f"{self.prefix}_{i:03d}_pred.nii.gz")
sitk.WriteImage(prd_bin_s, f"{self.prefix}_{i:03d}_pred_bin.nii.gz")
# class RocCallback(Callback):
# def __init__(self,validation_data):
# # self.x = training_data[0]
# # self.y = training_data[1]
# self.x_val = validation_data[0]
# self.y_val = validation_data[1]
# def on_epoch_end(self, epoch, logs={}):
# # y_pred_train = self.model.predict_proba(self.x)
# # roc_train = roc_auc_score(self.y, y_pred_train)
# y_pred_val = self.model.predict(self.x_val, batch_size=1)
# roc_val = roc_auc_score(self.y_val, y_pred_val)
# print('\rroc-auc_train: %s - roc-auc_val: %s' % (str(round(roc_val,4))),end=100*' '+'\n')
# return

239
code/DWI_exp/helpers.py Executable file
View File

@ -0,0 +1,239 @@
import os
from os import path
import numpy as np
import SimpleITK as sitk
from scipy import ndimage
import yaml
def print_(*args, **kwargs):
print(*args, **kwargs, flush=True)
def print_config(config, depth=0):
for k in config:
print(end=(" "*depth*2) + f"- {k}:")
if type(config[k]) == dict:
print()
print_config(config[k], depth+1)
else:
print(f"\t{config[k]}")
print_(end="")
def prepare_project_dir(project_dir):
"""
Prepares the work directory for a project
TODO: Create text file containing configuration
"""
def _makedir(n):
os.makedirs(path.join(project_dir, n), exist_ok=True)
_makedir("models")
_makedir("logs")
_makedir("data")
_makedir("docs")
_makedir("samples")
_makedir("output")
def fake_loss(*args, **kwargs):
return 0.
def center_crop(
image: sitk.Image, shape: list, offset: list = None
) -> sitk.Image:
"""Extracts a region of interest from the center of an SITK image.
Parameters:
image: Input image (SITK).
shape: The shape of the
Returns: Cropped image (SITK)
"""
size = image.GetSize()
# Determine the centroid
centroid = [sz / 2 for sz in size]
# Determine the origin for the bounding box by subtracting half the
# shape of the bounding box from each dimension of the centroid.
box_start = [int(c - sh / 2) for c, sh in zip(centroid, shape)]
if offset:
box_start = [b - o for b, o in zip(box_start, offset)]
# Extract the region of provided shape starting from the previously
# determined starting pixel.
region_extractor = sitk.RegionOfInterestImageFilter()
region_extractor.SetSize(shape)
region_extractor.SetIndex(box_start)
cropped_image = region_extractor.Execute(image)
return cropped_image
def augment(img, seg,
noise_chance = 0.3,
noise_mult_max = 0.001,
rotate_chance = 0.2,
rotate_max_angle = 30,
):
#noise_mult_max was initially 0.1
if np.random.uniform() < noise_chance:
img = augment_noise(img, np.random.uniform(0., noise_mult_max))
if np.random.uniform() < rotate_chance:
img, seg = augment_rotate(img, seg,
angle=np.random.uniform(0-rotate_max_angle, rotate_max_angle))
img, seg = augment_tilt(img, seg,
angle=np.random.uniform(0-rotate_max_angle, rotate_max_angle))
return img, seg
def augment_noise(img, multiplier):
noise = np.random.standard_normal(img.shape) * multiplier
return img + noise
def augment_rotate(img, seg, angle):
img = ndimage.rotate(img, angle, reshape=False, cval=0)
seg = ndimage.rotate(seg, angle, reshape=False, order=0)
return img, seg
def augment_tilt(img, seg, angle):
img = ndimage.rotate(img, angle, reshape=False, axes=(2,1), cval=0)
seg = ndimage.rotate(seg, angle, reshape=False, axes=(2,1), order=0)
return img, seg
def resample(
image: sitk.Image, min_shape: list, method=sitk.sitkLinear,
new_spacing: list=[1, 1, 3.6]
) -> sitk.Image:
"""Resamples an image to given target spacing and shape.
Parameters:
image: Input image (SITK).
shape: Minimum output shape for the underlying array.
method: SimpleITK interpolator to use for resampling.
(e.g. sitk.sitkNearestNeighbor, sitk.sitkLinear)
new_spacing: The new spacing to resample to.
Returns:
int: Resampled image
"""
# Extract size and spacing from the image
size = image.GetSize()
spacing = image.GetSpacing()
# Determine how much larger the image will become with the new spacing
factor = [sp / new_sp for sp, new_sp in zip(spacing, new_spacing)]
# Determine the outcome size of the image for each dimension
get_size = lambda size, factor, min_shape: max(int(size * factor), min_shape)
new_size = [get_size(sz, f, sh) for sz, f, sh in zip(size, factor, min_shape)]
# Resample the image
resampler = sitk.ResampleImageFilter()
resampler.SetReferenceImage(image)
resampler.SetOutputSpacing(new_spacing)
resampler.SetSize(new_size)
resampler.SetInterpolator(method)
resampled_image = resampler.Execute(image)
return resampled_image
# Start training
def get_generator(
images: dict,
segmentations: list,
sequences: list,
shape: tuple,
indexes: list = None,
batch_size: int = 5,
shuffle: bool = False,
augmentation = True
):
"""
Returns a (training) generator for use with model.fit().
Parameters:
input_modalities: List of modalty names to include.
output_modalities: Names of the target modalities.
batch_size: Number of images per batch (default: all).
indexes: Only use the specified image indexes.
shuffle: Shuffle the lists of indexes once at the beginning.
augmentation: Apply augmentation or not (bool).
"""
num_rows = len(images)
if indexes == None:
indexes = list(range(num_rows))
if type(indexes) == int:
indexes = list(range(indexes))
if batch_size == None:
batch_size = len(indexes)
idx = 0
# Prepare empty batch placeholder with named inputs and outputs
input_batch = np.zeros((batch_size,) + shape + (len(images),))
output_batch = np.zeros((batch_size,) + shape + (1,))
# Loop infinitely to keep generating batches
while True:
# Prepare each observation in a batch
for batch_idx in range(batch_size):
# Shuffle the order of images if all indexes have been seen
if idx == 0 and shuffle:
np.random.shuffle(indexes)
current_index = indexes[idx]
# Insert the augmented images into the input batch
img_crop = [images[s][current_index] for s in sequences]
img_crop = np.stack(img_crop, axis=-1)
seg_crop = segmentations[current_index][..., np.newaxis]
if augmentation:
img_crop, seg_crop = augment(img_crop, seg_crop)
input_batch[batch_idx] = img_crop
output_batch[batch_idx] = seg_crop
# Increase the current index and modulo by the number of rows
# so that we stay within bounds
idx = (idx + 1) % len(indexes)
yield input_batch, output_batch
def resample_to_reference(image, ref_img,
interpolator=sitk.sitkNearestNeighbor,
default_pixel_value=0):
resampled_img = sitk.Resample(image, ref_img,
sitk.Transform(),
interpolator, default_pixel_value,
ref_img.GetPixelID())
return resampled_img
def dump_dict_to_yaml(
data: dict,
target_dir: str,
filename: str = "settings") -> None:
""" Writes the given dictionary as a yaml to the target directory.
Parameters:
`data (dict)`: dictionary of data.
`target_dir (str)`: directory where the yaml will be saved.
`` filename (str)`: name of the file without extension
"""
print("\nParameters")
for pair in data.items():
print(f"\t{pair}")
print()
path = f"{target_dir}/{filename}.yml"
print_(f"Wrote yaml to: {path}")
with open(path, 'w') as outfile:
yaml.dump(data, outfile, default_flow_style=False)

View File

@ -0,0 +1,46 @@
import numpy as np
import SimpleITK as sitk
from helpers import *
def preprocess(imgs: dict, seg: sitk.Image,
shape: tuple, spacing: tuple, to_numpy=True) -> tuple:
# Resample all of the images to the desired voxel spacing
img_r = {k: resample(imgs[k],
min_shape=(s+1 for s in shape),
new_spacing=spacing) for k in imgs}
seg_r = resample(seg,
min_shape=(s+1 for s in shape),
new_spacing=spacing,
method=sitk.sitkNearestNeighbor)
# Center crop one of the input images
ref_seq = [k for k in img_r.keys()][0]
ref_img = center_crop(img_r[ref_seq], shape=shape)
# Then crop the remaining series / segmentation to match the input crop by
# transforming them on the physical space of the cropped series.
img_crop = {k: resample_to_reference(
image=img_r[k],
ref_img=ref_img,
interpolator=sitk.sitkLinear)
for k in img_r}
seg_crop = resample_to_reference(
image=seg_r,
ref_img=ref_img,
interpolator=sitk.sitkNearestNeighbor)
# Return sitk.Image instead of numpy np.ndarray.
if not to_numpy:
return img_crop, seg_crop
img_n = {k: sitk.GetArrayFromImage(img_crop[k]).T for k in img_crop}
seg_n = sitk.GetArrayFromImage(seg_crop).T
seg_n = np.clip(seg_n, 0., 1.)
# Z-score normalize all images
# to do: 2*std
for seq in img_n:
img_n[seq] = (img_n[seq] - np.mean(img_n[seq])) / ( 2* np.std(img_n[seq]))
return img_n, seg_n

114
code/DWI_exp/unet.py Executable file
View File

@ -0,0 +1,114 @@
from tensorflow.keras import backend as K
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv3D, Activation, Dense, concatenate, add
from tensorflow.keras.layers import Conv3DTranspose, LeakyReLU
from tensorflow.keras import regularizers
from tensorflow.keras.layers import GlobalAveragePooling3D, Reshape, Dense, multiply, Permute
def squeeze_excite_block(input, ratio=8):
''' Create a channel-wise squeeze-excite block
Args:
input: input tensor
filters: number of output filters
Returns: a keras tensor
References
- [Squeeze and Excitation Networks](https://arxiv.org/abs/1709.01507)
'''
init = input
channel_axis = 1 if K.image_data_format() == "channels_first" else -1
filters = init.shape[channel_axis]
se_shape = (1, 1, 1, filters)
se = GlobalAveragePooling3D()(init)
se = Reshape(se_shape)(se)
se = Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se)
se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se)
if K.image_data_format() == 'channels_first':
se = Permute((4, 1, 2, 3))(se)
x = multiply([init, se])
return x
def build_dual_attention_unet(
input_shape,
l2_regularization = 0.0001,
):
def conv_layer(x, kernel_size, out_filters, strides=(1,1,1)):
x = Conv3D(out_filters, kernel_size,
strides = strides,
padding = 'same',
kernel_regularizer = regularizers.l2(l2_regularization),
kernel_initializer = 'he_normal',
use_bias = False
)(x)
return x
def conv_block(input, out_filters, strides=(1,1,1), with_residual=False, with_se=False, activation='relu'):
# Strided convolution to convsample
x = conv_layer(input, (3,3,3), out_filters, strides)
x = Activation('relu')(x)
# Unstrided convolution
x = conv_layer(x, (3,3,3), out_filters)
# Add a squeeze-excite block
if with_se:
se = squeeze_excite_block(x)
x = add([x, se])
# Add a residual connection using a 1x1x1 convolution with strides
if with_residual:
residual = conv_layer(input, (1,1,1), out_filters, strides)
x = add([x, residual])
if activation == 'leaky':
x = LeakyReLU(alpha=.1)(x)
else:
x = Activation('relu')(x)
# Activate whatever comes out of this
return x
# If we already have only one input, no need to combine anything
inputs = Input(input_shape)
# Downsampling
conv1 = conv_block(inputs, 16)
conv2 = conv_block(conv1, 32, strides=(2,2,1), with_residual=True, with_se=True) #72x72x18
conv3 = conv_block(conv2, 64, strides=(2,2,1), with_residual=True, with_se=True) #36x36x18
conv4 = conv_block(conv3, 128, strides=(2,2,2), with_residual=True, with_se=True) #18x18x9
conv5 = conv_block(conv4, 256, strides=(2,2,2), with_residual=True, with_se=True) #9x9x9
# First upsampling sequence
up1_1 = Conv3DTranspose(128, (3,3,3), strides=(2,2,2), padding='same')(conv5) #18x18x9
up1_2 = Conv3DTranspose(128, (3,3,3), strides=(2,2,2), padding='same')(up1_1) #36x36x18
up1_3 = Conv3DTranspose(128, (3,3,3), strides=(2,2,1), padding='same')(up1_2) #72x72x18
bridge1 = concatenate([conv4, up1_1]) #18x18x9 (128+128=256)
dec_conv_1 = conv_block(bridge1, 128, with_residual=True, with_se=True, activation='leaky') #18x18x9
# Second upsampling sequence
up2_1 = Conv3DTranspose(64, (3,3,3), strides=(2,2,2), padding='same')(dec_conv_1) # 36x36x18
up2_2 = Conv3DTranspose(64, (3,3,3), strides=(2,2,1), padding='same')(up2_1) # 72x72x18
bridge2 = concatenate([conv3, up1_2, up2_1]) # 36x36x18 (64+128+64=256)
dec_conv_2 = conv_block(bridge2, 64, with_residual=True, with_se=True, activation='leaky')
# Final upsampling sequence
up3_1 = Conv3DTranspose(32, (3,3,3), strides=(2,2,1), padding='same')(dec_conv_2) # 72x72x18
bridge3 = concatenate([conv2, up1_3, up2_2, up3_1]) # 72x72x18 (32+128+64+32=256)
dec_conv_3 = conv_block(bridge3, 32, with_residual=True, with_se=True, activation='leaky')
# Last upsampling to make heatmap
up4_1 = Conv3DTranspose(16, (3,3,3), strides=(2,2,1), padding='same')(dec_conv_3) # 72x72x18
dec_conv_4 = conv_block(up4_1, 16, with_residual=False, with_se=True, activation='leaky') #144x144x18 (16)
# Reduce to a single output channel with a 1x1x1 convolution
single_channel = Conv3D(1, (1, 1, 1))(dec_conv_4)
# Apply sigmoid activation to get binary prediction per voxel
act = Activation('sigmoid')(single_channel)
# Model definition
model = Model(inputs=inputs, outputs=act)
return model

6
code/FROC/__init__.py Executable file
View File

@ -0,0 +1,6 @@
from .FROC import image_utils
from .FROC import froc
from .FROC import data_utils
from .FROC import cal_froc_from_np
from .FROC import blob_preprocess
from .FROC import analysis_utils

313
code/FROC/analysis_utils.py Executable file
View File

@ -0,0 +1,313 @@
# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from sklearn.metrics import auc
from scipy import ndimage
try:
import numpy.typing as npt
except ImportError: # pragma: no cover
pass
# Parse Detection Maps to Individual Lesions + Likelihoods
def parse_detection_map(detection_map):
# Label All Non-Connected Components in Detection Map
blobs_index, num_blobs = ndimage.label(detection_map, np.ones((3, 3, 3)))
confidences = []
if num_blobs > 0:
# For Each Lesion Detection
for tumor_index in range(1, num_blobs+1):
# Extract Mask of Current Lesion
# hard_blob = np.zeros_like(blobs_index)
# hard_blob[blobs_index == tumor] = 1
# TODO: replace above with the following? Is faster I think.
# hard_blob = (blobs_index == tumor).astype(int)
# Extract Max Predicted Likelihood for Lesion Detection
# max_prob = np.max(hard_blob) # <- this is always 1
# hard_blob[hard_blob > 0] = max_prob # <- this line does nothing, as hard_blob is not used
# Store Predicted Likelihood per Lesion Detection
max_prob = detection_map[blobs_index == tumor_index].max()
confidences.append((tumor_index, max_prob))
return confidences, blobs_index
# Calculate Dice Similarity Coefficient (DSC) for N-D Arrays
def calculate_dsc(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
epsilon = 1e-8
dsc_num = np.sum(predictions[labels == 1]) * 2.0
dsc_denom = np.sum(predictions) + np.sum(labels)
return float((dsc_num + epsilon) / (dsc_denom + epsilon))
# Calculate Intersection over Union (IoU) for N-D Arrays
def calculate_iou(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
epsilon = 1e-8
iou_num = np.sum(predictions[labels == 1])
iou_denom = np.sum(predictions) + np.sum(labels) - iou_num
return float((iou_num + epsilon) / (iou_denom + epsilon))
# Calculate Operating Points for Curve
def calculate_operating_points(y, x, op_match=None, verbose=1):
"""
Input:
- y: (monotonically increasing) performance metric, such as the True Positive Rate
- x: (monotonically increasing) performance metric, such as the False Positive Rate
- op_match: dictionary that specifies the target operating point: {
'x': target x value, 'y': target y value
}
Returns:
- dictionary with operating point(s): {
'op_closest_xy_y': y_op, # y value at operating point that matches both x and y of target operating point
'op_closest_xy_x': x_op, # x value at operating point that matches both x and y of target operating point
...
}
"""
# TODO: currently, a lower sensitivity is preferrred over a higher sensitivity if that means the operating point is matched better.
# Would be better to go for the best sensitivity/specificity, if that can be done without hurting the other performance metric.
# In practice, this should not be an issue, as we have many points then.
y = np.array(y)
x = np.array(x)
operating_points = {}
if not np.all(np.diff(y) >= 0) and verbose:
print("Warning: y performance metric is not monotonically increasing, this could lead to unexpected behaviour!")
if not np.all(np.diff(x) >= 0) and verbose:
print("Warning: x performance metric is not monotonically increasing, this could lead to unexpected behaviour!")
# Grab Index of Intersection -> Compute y/TPR and x/FPR @ Index -> Store
op_best_roc_idx = np.argmin(np.abs(y - (1 - x)))
op_best_roc_y = y[op_best_roc_idx]
op_best_roc_x = x[op_best_roc_idx]
operating_points.update(dict(
op_best_roc_idx=op_best_roc_idx,
op_best_roc_y=op_best_roc_y,
op_best_roc_x=op_best_roc_x
))
if op_match is not None:
# calculate operating point closest to target operating point
abs_deficit_x, abs_deficit_y = None, None
optional_x_keys = ['x', 'fpr', 'FPR']
optional_y_keys = ['y', 'tpr', 'TPR', 'sensitivity', 'sens']
# if the target x value is specified, calculate the difference between target and oberved value
for key in optional_x_keys:
if key in op_match:
op_match_x = op_match[key]
abs_deficit_x = np.abs(x - op_match_x)
break
# if the target y value is specified, calculate the difference between target and oberved value
for key in optional_y_keys:
if key in op_match:
op_match_y = op_match[key]
abs_deficit_y = np.abs(y - op_match_y)
break
# if both target x and y values are specified, calculate the difference between the target pair and observed pair
# at the best match, store the observed x and y values
if abs_deficit_x is not None and abs_deficit_y is not None:
# determine the index of the the closest point to the target pair
abs_deficit = abs_deficit_x + abs_deficit_y
op_closest_xy_idx = np.argmin(abs_deficit)
op_closest_xy_y = y[op_closest_xy_idx]
op_closest_xy_x = x[op_closest_xy_idx]
# store
operating_points.update(dict(
op_closest_xy_idx=op_closest_xy_idx,
op_closest_xy_y=op_closest_xy_y,
op_closest_xy_x=op_closest_xy_x
))
# same for matching x only
if abs_deficit_x is not None:
# determine the index of the the closest point to the target value
op_closest_x_idx = np.argmin(abs_deficit_x)
op_closest_x_y = y[op_closest_x_idx]
op_closest_x_x = x[op_closest_x_idx]
# store
operating_points.update(dict(
op_closest_x_idx=op_closest_x_idx,
op_closest_x_y=op_closest_x_y,
op_closest_x_x=op_closest_x_x
))
# same for matching y only
if abs_deficit_y is not None:
# determine the index of the the closest point to the target value
op_closest_y_idx = np.argmin(abs_deficit_y)
op_closest_y_y = y[op_closest_y_idx]
op_closest_y_x = x[op_closest_y_idx]
# store
operating_points.update(dict(
op_closest_y_idx=op_closest_y_idx,
op_closest_y_y=op_closest_y_y,
op_closest_y_x=op_closest_y_x
))
return operating_points
# Calculate Statistics for Multiple Curves
def calculate_statistics(metrics, op_match=None, x_start=0., x_end=1., verbose=1):
"""
Calculate statistics, such as the area under the curve, for multiple (independent) curves.
To calculate shared statistics, the curves must be translated to a shared x domain. To
achieve this with virtually no loss of the step-like nature of curves like ROC and FROC,
the shared x values are derived from the input, and offset with ± 1e-7.
Input:
- metrics should be a list of tuples with the y & x coordinates for each run:
[([y1, y2, y3, ...], [x1, x2, x3]), # run 1
([y1, y2, y3, ...], [x1, x2, x3]), # run 2
]
- op_match: {
'y': value of y metric (e.g., TPR/sensitivity) to match,
'x': value of x metric (e.g., FPR/false positive rate) to match,
}
Note: mean and 95% CI are calculated as function of the shared x.
"""
# construct the array of shared x values
eps = 1e-10
x_shared = np.array([xi for _, x in metrics for xi in x], dtype=np.float64) # collect list of all possible x-values
x_shared = np.ravel(x_shared) # flatten list, if necessary
x_shared = np.append(x_shared, [x_start, x_end]) # add x_start and x_end to ensure correct pAUC calculation
x_shared = np.concatenate((x_shared+eps, x_shared-eps))
x_shared = np.unique(x_shared) # only keep unique x values
x_shared.sort() # sort in ascending order (inplace)
# validate x_start and x_end
assert x_start < x_end, f"x_start must be smaller than x_end! Got x_start={x_start} and x_end={x_end}."
# convert the per-model y (e.g., TPR) vs x (e.g., FPR) to a shared domain
y_shared_all = np.zeros(shape=(len(metrics), len(x_shared)), dtype=np.float32)
auroc_all = []
individually_matched_operating_points = []
for i, (y, x) in enumerate(metrics):
# if necessary, unpack x and y
if len(y) == 1:
y = y[0]
if len(x) == 1:
x = x[0]
# interpolate the y values to the shared x values
y_shared_domain = np.interp(x_shared, x, y)
y_shared_all[i] = y_shared_domain
# calculate AUROC for macro stats
mask = (x_shared >= x_start) & (x_shared <= x_end)
auc_score = auc(x_shared[mask], y_shared_domain[mask])
auroc_all += [auc_score]
# match operating point for each run individually
operating_points = calculate_operating_points(y=y, x=x, op_match=op_match)
individually_matched_operating_points += [operating_points]
# calculate statistics in shared domain
y_shared_mean = np.mean(y_shared_all, axis=0)
y_shared_std = np.std(y_shared_all, axis=0)
y_shared_CI_lower = np.percentile(y_shared_all, 2.5, axis=0)
y_shared_CI_higher = np.percentile(y_shared_all, 97.5, axis=0)
auroc_mean = np.mean(auroc_all)
auroc_std = np.std(auroc_all)
# calculate operating points in shared domain
operating_points = calculate_operating_points(y=y_shared_mean, x=x_shared,
op_match=op_match, verbose=verbose)
# collect results
results = {
# overview statistics
'auroc_mean': auroc_mean,
'auroc_std': auroc_std,
'auroc_all': auroc_all,
# for plotting
'x_shared': x_shared,
'y_shared_all': y_shared_all,
'y_shared_mean': y_shared_mean,
'y_shared_std': y_shared_std,
'y_shared_CI_lower': y_shared_CI_lower,
'y_shared_CI_higher': y_shared_CI_higher,
# individually matched operating point
'individually_matched_operating_points': individually_matched_operating_points,
}
results.update(operating_points)
# calculate standard deviation of each metric (op_closest_xy_y, etc.) between individual runs
individually_matched_operating_points_std = {
f"{key}_std": np.std([
operating_point_info[key]
for operating_point_info in individually_matched_operating_points
])
for key in individually_matched_operating_points[0].keys()
}
results.update(individually_matched_operating_points_std)
return results
# Calculate (partial) Area Under Curve (pAUC) using (x,y) coordinates from the given curve
def calculate_pAUC_from_graph(x, y, pAUC_start: float = 0.0, pAUC_end: float = 1.0, full: bool = False):
"""
Input:
For a single curve:
- x: x values of a curve (e.g., the False Positive Rate points). [x1, x2, .., xn]
- y: y values of a curve (e.g., the True Positive Rate points). [y1, y2, .., yn]
For multiple curves:
- list of x curves, for example the x values observed across multiple runs. [[x1m1, x2m1, .., xnm1], [x1m2, x2m2, ...., xnm2], ..]
- list of y curves, for example the y values observed across multiple runs. [[y1m1, y2m1, .., ynm1], [y1m2, y2m2, ...., ynm2], ..]
- pAUC_start: lower bound of x (e.g., FPR) to compute pAUC
- pAUC_end: higher bound of x (e.g., FPR) to compute pAUC
Returns:
- if (full==False): List of pAUC values for each set of ([x1, ..], [y1, ..]) coordinates
- if (full==True): Metrics as returned by `calculate_statistics` [see there]
Note: function is not specific to the FROC curve
"""
if not isinstance(x[0], (list, np.ndarray)) or not isinstance(y[0], (list, np.ndarray)):
# Have a single set of (x,y) coordinates
assert not isinstance(x[0], (list, np.ndarray)) and not isinstance(y[0], (list, np.ndarray)), \
"Either provide multiple sequences of (x,y) coordinates, or a single sequence. Obtained a mix of both now. "
# Pack coordinates in format expected by `calculate_statistics`
coordinates_joined = [(y, x)]
else:
# Have multiple sets of (x,y) coordinates
# Pack coordinates in format expected by `calculate_statistics`
coordinates_joined = list(zip(y, x))
# Calculate AUC in Given Ranges
results = calculate_statistics(metrics=coordinates_joined, x_start=pAUC_start, x_end=pAUC_end)
if full:
return results
return results['auroc_all']

148
code/FROC/blob_preprocess.py Executable file
View File

@ -0,0 +1,148 @@
import numpy as np
from scipy import ndimage
from typing import List, Tuple, Optional, Union
"""
Extract lesion candidates from a softmax prediction
Authors: anindox8, matinhz, joeranbosma
"""
# Preprocess Softmax Volume (Clipping, Max Confidence)
def preprocess_softmax_static(softmax: np.ndarray,
threshold: float = 0.10,
min_voxels_detection: int = 10,
max_prob_round_decimals: Optional[int] = 4) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
# Load and Preprocess Softmax Image
all_hard_blobs = np.zeros_like(softmax)
confidences = []
clipped_softmax = softmax.copy()
clipped_softmax[softmax < threshold] = 0
blobs_index, num_blobs = ndimage.label(clipped_softmax, np.ones((3, 3, 3)))
if num_blobs > 0: # For Each Prediction
for tumor in range(1, num_blobs+1):
# determine mask for current lesion
hard_mask = np.zeros_like(blobs_index)
hard_mask[blobs_index == tumor] = 1
if np.count_nonzero(hard_mask) <= min_voxels_detection:
# remove tiny detection of <= 0.009 cm^3
blobs_index[hard_mask.astype(bool)] = 0
continue
# add sufficiently sized detection
hard_blob = hard_mask * clipped_softmax
max_prob = np.max(hard_blob)
if max_prob_round_decimals is not None:
max_prob = np.round(max_prob, max_prob_round_decimals)
hard_blob[hard_blob > 0] = max_prob
all_hard_blobs += hard_blob
confidences.append((tumor, max_prob))
return all_hard_blobs, confidences, blobs_index
def preprocess_softmax_dynamic(softmax: np.ndarray,
min_voxels_detection: int = 10,
num_lesions_to_extract: int = 5,
dynamic_threshold_factor: float = 2.5,
max_prob_round_decimals: Optional[int] = None,
remove_adjacent_lesion_candidates: bool = True,
max_prob_failsafe_stopping_threshold: float = 0.01) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
"""
Generate detection proposals using a dynamic threshold to determine the location and size of lesions.
Author: Joeran Bosma
"""
working_softmax = softmax.copy()
dynamic_hard_blobs = np.zeros_like(softmax)
confidences: List[Tuple[int, float]] = []
dynamic_indexed_blobs = np.zeros_like(softmax, dtype=int)
while len(confidences) < num_lesions_to_extract:
tumor_index = 1 + len(confidences)
# determine max. softmax
max_prob = np.max(working_softmax)
if max_prob < max_prob_failsafe_stopping_threshold:
break
# set dynamic threshold to half the max
threshold = max_prob / dynamic_threshold_factor
# extract blobs for dynamix threshold
all_hard_blobs, _, _ = preprocess_softmax_static(working_softmax, threshold=threshold,
min_voxels_detection=min_voxels_detection,
max_prob_round_decimals=max_prob_round_decimals)
# select blob with max. confidence
# note: the max_prob is re-computed in the (unlikely) case that the max. prob
# was inside a 'lesion candidate' of less than min_voxels_detection, which is
# thus removed in preprocess_softmax_static.
max_prob = np.max(all_hard_blobs)
mask_current_lesion = (all_hard_blobs == max_prob)
# ensure that mask is only a single lesion candidate (this assumption fails when multiple lesions have the same max. prob)
mask_current_lesion_indexed, _ = ndimage.label(mask_current_lesion, np.ones((3, 3, 3)))
mask_current_lesion = (mask_current_lesion_indexed == 1)
# create mask with its confidence
hard_blob = (all_hard_blobs * mask_current_lesion)
# Detect whether the extractted mask is a ring/hollow sphere
# around an existing lesion candidate. For confident lesions,
# the surroundings of the prediction are still quite confident,
# and can become a second 'detection'. For an # example, please
# see extracted lesion candidates nr. 4 and 5 at:
# https://repos.diagnijmegen.nl/trac/ticket/9299#comment:49
# Detection method: grow currently extracted lesions by one voxel,
# and check if they overlap with the current extracted lesion.
extracted_lesions_grown = ndimage.morphology.binary_dilation(dynamic_hard_blobs > 0)
current_lesion_has_overlap = (mask_current_lesion & extracted_lesions_grown).any()
# Check if lesion candidate should be retained
if (not remove_adjacent_lesion_candidates) or (not current_lesion_has_overlap):
# store extracted lesion
dynamic_hard_blobs += hard_blob
confidences += [(tumor_index, max_prob)]
dynamic_indexed_blobs += (mask_current_lesion * tumor_index)
# remove extracted lesion from working-softmax
working_softmax = (working_softmax * (~mask_current_lesion))
return dynamic_hard_blobs, confidences, dynamic_indexed_blobs
def preprocess_softmax(softmax: np.ndarray,
threshold: Union[str, float] = 0.10,
min_voxels_detection: int = 10,
num_lesions_to_extract: int = 5,
dynamic_threshold_factor: float = 2.5,
max_prob_round_decimals: Optional[int] = None,
remove_adjacent_lesion_candidates: bool = True) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
"""
Generate detection proposals using a dynamic or static threshold to determine the size of lesions.
"""
if threshold == 'dynamic':
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_dynamic(softmax, min_voxels_detection=min_voxels_detection,
dynamic_threshold_factor=dynamic_threshold_factor,
num_lesions_to_extract=num_lesions_to_extract,
remove_adjacent_lesion_candidates=remove_adjacent_lesion_candidates,
max_prob_round_decimals=max_prob_round_decimals)
elif threshold == 'dynamic-fast':
# determine max. softmax and set a per-case 'static' threshold based on that
max_prob = np.max(softmax)
threshold = float(max_prob / dynamic_threshold_factor)
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
min_voxels_detection=min_voxels_detection,
max_prob_round_decimals=max_prob_round_decimals)
else:
threshold = float(threshold) # convert threshold to float, if it wasn't already
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
min_voxels_detection=min_voxels_detection,
max_prob_round_decimals=max_prob_round_decimals)
return all_hard_blobs, confidences, indexed_pred

643
code/FROC/cal_froc_from_np.py Executable file
View File

@ -0,0 +1,643 @@
import argparse
import multiprocessing
import numpy as np
import SimpleITK as sitk
import time
import os
import itertools
import concurrent.futures
from typing import List
import sys
sys.path.append('./../code')
from utils_quintin import *
# from fastMRI_PCa.visualization.visualize import write_array2nifti_t2
from tensorflow.keras.models import load_model
from functools import partial
from focal_loss import BinaryFocalLoss
from scipy import ndimage
from pathlib import Path
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from sklearn.metrics import roc_curve, auc
# from fastMRI_PCa.visualization import save_slice_3d
# from fastMRI_PCa.utils import read_yaml_to_dict, list_from_file, print_stats_np
# from fastMRI_PCa.utils import print_p, get_rand_exp_decay_mask
# from fastMRI_PCa.data import resample, center_crop, normalize, undersample, binarize_s
# from fastMRI_PCa.models import ssim_loss, ssim_metric, psnr_metric
from typing import List, Tuple, Dict, Any, Union, Optional, Callable, Iterable, Hashable, Sized
try:
import numpy.typing as npt
except ImportError:
pass
############################### PREPROCESSING BLOBS #########################
"""
Extract lesion candidates from a softmax prediction
Authors: anindox8, matinhz, joeranbosma
"""
# Preprocess Softmax Volume (Clipping, Max Confidence)
def preprocess_softmax_static(softmax: np.ndarray,
threshold: float = 0.10,
min_voxels_detection: int = 10,
max_prob_round_decimals: Optional[int] = 4) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
# Load and Preprocess Softmax Image
all_hard_blobs = np.zeros_like(softmax)
confidences = []
clipped_softmax = softmax.copy()
clipped_softmax[softmax < threshold] = 0
blobs_index, num_blobs = ndimage.label(clipped_softmax, np.ones((3, 3, 3)))
if num_blobs > 0: # For Each Prediction
for tumor in range(1, num_blobs+1):
# determine mask for current lesion
hard_mask = np.zeros_like(blobs_index)
hard_mask[blobs_index == tumor] = 1
if np.count_nonzero(hard_mask) <= min_voxels_detection:
# remove tiny detection of <= 0.009 cm^3
blobs_index[hard_mask.astype(bool)] = 0
continue
# add sufficiently sized detection
hard_blob = hard_mask * clipped_softmax
max_prob = np.max(hard_blob)
if max_prob_round_decimals is not None:
max_prob = np.round(max_prob, max_prob_round_decimals)
hard_blob[hard_blob > 0] = max_prob
all_hard_blobs += hard_blob
confidences.append((tumor, max_prob))
return all_hard_blobs, confidences, blobs_index
def preprocess_softmax_dynamic(softmax: np.ndarray,
min_voxels_detection: int = 10,
num_lesions_to_extract: int = 5,
dynamic_threshold_factor: float = 2.5,
max_prob_round_decimals: Optional[int] = None,
remove_adjacent_lesion_candidates: bool = True,
max_prob_failsafe_stopping_threshold: float = 0.01) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
"""
Generate detection proposals using a dynamic threshold to determine the location and size of lesions.
Author: Joeran Bosma
"""
working_softmax = softmax.copy()
dynamic_hard_blobs = np.zeros_like(softmax)
confidences: List[Tuple[int, float]] = []
dynamic_indexed_blobs = np.zeros_like(softmax, dtype=int)
while len(confidences) < num_lesions_to_extract:
tumor_index = 1 + len(confidences)
# determine max. softmax
max_prob = np.max(working_softmax)
if max_prob < max_prob_failsafe_stopping_threshold:
break
# set dynamic threshold to half the max
threshold = max_prob / dynamic_threshold_factor
# extract blobs for dynamix threshold
all_hard_blobs, _, _ = preprocess_softmax_static(working_softmax, threshold=threshold,
min_voxels_detection=min_voxels_detection,
max_prob_round_decimals=max_prob_round_decimals)
# select blob with max. confidence
# note: the max_prob is re-computed in the (unlikely) case that the max. prob
# was inside a 'lesion candidate' of less than min_voxels_detection, which is
# thus removed in preprocess_softmax_static.
max_prob = np.max(all_hard_blobs)
mask_current_lesion = (all_hard_blobs == max_prob)
# ensure that mask is only a single lesion candidate (this assumption fails when multiple lesions have the same max. prob)
mask_current_lesion_indexed, _ = ndimage.label(mask_current_lesion, np.ones((3, 3, 3)))
mask_current_lesion = (mask_current_lesion_indexed == 1)
# create mask with its confidence
hard_blob = (all_hard_blobs * mask_current_lesion)
# Detect whether the extractted mask is a ring/hollow sphere
# around an existing lesion candidate. For confident lesions,
# the surroundings of the prediction are still quite confident,
# and can become a second 'detection'. For an # example, please
# see extracted lesion candidates nr. 4 and 5 at:
# https://repos.diagnijmegen.nl/trac/ticket/9299#comment:49
# Detection method: grow currently extracted lesions by one voxel,
# and check if they overlap with the current extracted lesion.
extracted_lesions_grown = ndimage.morphology.binary_dilation(dynamic_hard_blobs > 0)
current_lesion_has_overlap = (mask_current_lesion & extracted_lesions_grown).any()
# Check if lesion candidate should be retained
if (not remove_adjacent_lesion_candidates) or (not current_lesion_has_overlap):
# store extracted lesion
dynamic_hard_blobs += hard_blob
confidences += [(tumor_index, max_prob)]
dynamic_indexed_blobs += (mask_current_lesion * tumor_index)
# remove extracted lesion from working-softmax
working_softmax = (working_softmax * (~mask_current_lesion))
return dynamic_hard_blobs, confidences, dynamic_indexed_blobs
def preprocess_softmax(softmax: np.ndarray,
threshold: Union[str, float] = 0.10,
min_voxels_detection: int = 10,
num_lesions_to_extract: int = 5,
dynamic_threshold_factor: float = 2.5,
max_prob_round_decimals: Optional[int] = None,
remove_adjacent_lesion_candidates: bool = True) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
"""
Generate detection proposals using a dynamic or static threshold to determine the size of lesions.
"""
if threshold == 'dynamic':
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_dynamic(softmax, min_voxels_detection=min_voxels_detection,
dynamic_threshold_factor=dynamic_threshold_factor,
num_lesions_to_extract=num_lesions_to_extract,
remove_adjacent_lesion_candidates=remove_adjacent_lesion_candidates,
max_prob_round_decimals=max_prob_round_decimals)
elif threshold == 'dynamic-fast':
# determine max. softmax and set a per-case 'static' threshold based on that
max_prob = np.max(softmax)
threshold = float(max_prob / dynamic_threshold_factor)
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
min_voxels_detection=min_voxels_detection,
max_prob_round_decimals=max_prob_round_decimals)
else:
threshold = float(threshold) # convert threshold to float, if it wasn't already
all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
min_voxels_detection=min_voxels_detection,
max_prob_round_decimals=max_prob_round_decimals)
return all_hard_blobs, confidences, indexed_pred
################################################################################
# Evaluate all cases
def evaluate(
y_true: np.ndarray,
y_pred: np.ndarray,
min_overlap=0.10,
overlap_func: str = 'DSC',
case_confidence: str = 'max',
multiple_lesion_candidates_selection_criteria='overlap',
allow_unmatched_candidates_with_minimal_overlap=True,
flat: Optional[bool] = None
) -> Dict[str, Any]:
# Make list out of numpy array so that it can be mapped in parallel with multiple CPUs.
y_true_list = y_true #[y_true[mri_idx] for mri_idx in range(y_true.shape[0])]
y_pred_list = y_pred #[y_pred[mri_idx] for mri_idx in range(y_pred.shape[0])]
roc_true, roc_pred = {}, {}
y_list = []
num_lesions = 0
subject_idxs = list(range(len(y_true_list)))
N_CPUS = 2
with ThreadPoolExecutor(max_workers=N_CPUS) as pool:
# define the functions that need to be processed: compute_pred_vector, with each individual
# detection_map prediction, ground truth label and parameters
future_to_args = {
pool.submit(evaluate_case,
y_true,
y_pred,
min_overlap=min_overlap,
overlap_func=overlap_func,
multiple_lesion_candidates_selection_criteria=multiple_lesion_candidates_selection_criteria,
allow_unmatched_candidates_with_minimal_overlap=allow_unmatched_candidates_with_minimal_overlap): idx
for (y_true, y_pred, idx) in zip(y_true_list, y_pred_list, subject_idxs)
}
# process the cases in parallel
iterator = concurrent.futures.as_completed(future_to_args)
iterator = tqdm(iterator, desc='Computing FROC', total=len(y_true_list))
for future in iterator:
try:
res = future.result()
except Exception as e:
print(f"Exception: {e}")
else:
# unpack results
y_list_pat, num_lesions_gt = res
# note: y_list_pat contains: is_lesion, confidence[, Dice, gt num voxels]
# aggregate results
idx = future_to_args[future]
roc_true[idx] = np.max([a[0] for a in y_list_pat])
if case_confidence == 'max':
# take highest lesion confidence as case-level confidence
roc_pred[idx] = np.max([a[1] for a in y_list_pat])
elif case_confidence == 'bayesian':
# if a_i is the probability the i-th lesion is csPCa, then the case-level
# probability to have any csPCa lesion is 1 - Π_i{ 1 - a_i}
roc_pred[idx] = 1 - np.prod([(1-a[1]) for a in y_list_pat])
else:
raise ValueError(f"Patient confidence calculation method not recognised. Got: {case_confidence}.")
# accumulate outputs
y_list += y_list_pat
num_lesions += num_lesions_gt
# calculate statistics
num_patients = len(roc_true)
# get lesion-level results
sensitivity, FP_per_case, thresholds, num_lesions = froc_from_lesion_evaluations(
y_list=y_list, num_patients=num_patients
)
# calculate recall, precision and average precision
AP, precision, recall, _ = ap_from_lesion_evaluations(y_list, thresholds=thresholds)
# calculate case-level AUROC
fpr, tpr, _ = roc_curve(y_true=[roc_true[s] for s in subject_idxs],
y_score=[roc_pred[s] for s in subject_idxs],
pos_label=1)
auc_score = auc(fpr, tpr)
flat = True
if flat:
# flatten roc_true and roc_pred
roc_true_flat = [roc_true[s] for s in subject_idxs]
roc_pred_flat = [roc_pred[s] for s in subject_idxs]
metrics = {
"FP_per_case": convert_np_to_list(FP_per_case),
"sensitivity": convert_np_to_list(sensitivity),
"thresholds": convert_np_to_list(thresholds),
"num_lesions": int(num_lesions),
"num_patients": int(num_patients),
"roc_true": convert_np_to_list(roc_true_flat if flat else roc_true),
"roc_pred": convert_np_to_list(roc_pred_flat if flat else roc_pred),
"AP": float(AP),
"precision": convert_np_to_list(precision),
"recall": convert_np_to_list(recall),
# patient-level predictions
'auroc': float(auc_score),
'tpr': convert_np_to_list(tpr),
'fpr': convert_np_to_list(fpr),
}
return metrics
def convert_np_to_list(flat_numpy_arr):
ans = []
for elem in flat_numpy_arr:
ans.append(float(elem))
return ans
# Calculate FROC metrics (FP rate, sensitivity)
def froc_from_lesion_evaluations(y_list, num_patients, thresholds=None):
# calculate counts
TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
y_list=y_list, thresholds=thresholds
)
# calculate FROC metrics from counts
sensitivity = TP / num_lesions if num_lesions > 0 else np.nan
FP_per_case = FP / num_patients
return sensitivity, FP_per_case, thresholds, num_lesions
def ap_from_lesion_evaluations(y_list, thresholds=None):
# calculate counts
TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
y_list=y_list, thresholds=thresholds
)
# calculate precision (lesion-level)
precision = TP / (TP + FP)
precision = np.append(precision, [0])
# calculate recall (lesion-level)
FN = num_lesions - TP
recall = TP / (TP + FN)
recall = np.append(recall, recall[-1:])
# calculate average precission (lesion-level)
AP = np.trapz(y=precision, x=recall)
return AP, precision, recall, thresholds
# Calculate macro metrics (true positives (TP), false positives (FP))
def counts_from_lesion_evaluations(
y_list: List[Tuple[int, float, float]],
thresholds: "Optional[npt.NDArray[np.float64]]" = None
) -> "Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32], npt.NDArray[np.float64], int]":
"""
Calculate true positives (TP) and false positives (FP) as function of threshold,
based on the case evaluations from `evaluate_case`.
"""
# sort predictions
y_list.sort()
# collect targets and predictions
y_true: "npt.NDArray[np.float64]" = np.array([target for target, *_ in y_list])
y_pred: "npt.NDArray[np.float64]" = np.array([pred for _, pred, *_ in y_list])
# calculate number of lesions
num_lesions = y_true.sum()
if thresholds is None:
# collect thresholds for FROC analysis
thresholds = np.unique(y_pred)
thresholds[::-1].sort() # sort thresholds in descending order (inplace)
# for >10,000 thresholds: resample to 10,000 unique thresholds, while also
# keeping all thresholds higher than 0.8 and the first 20 thresholds
if len(thresholds) > 10_000:
rng = np.arange(1, len(thresholds), len(thresholds)/10_000, dtype=np.int32)
st = [thresholds[i] for i in rng]
low_thresholds = thresholds[-20:]
thresholds = np.array([t for t in thresholds if t > 0.8 or t in st or t in low_thresholds])
# define placeholders
FP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
TP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
# for each threshold: count FPs and calculate the sensitivity
for i, th in enumerate(thresholds):
if th > 0:
y_pred_thresholded = (y_pred >= th).astype(int)
tp = np.sum(y_true*y_pred_thresholded)
fp = np.sum(y_pred_thresholded - y_true*y_pred_thresholded)
# update FROC with new point
FP[i] = fp
TP[i] = tp
else:
# extend FROC curve to infinity
# TP[i] = TP[-2] #note: aangepast stefan 11-04-2022
TP[i] = 1
FP[i] = np.inf
return TP, FP, thresholds, num_lesions
# Compute base prediction metrics TP/FP/FN with associated model confidences
def evaluate_case(
y_true: np.ndarray,
y_pred: np.ndarray,
min_overlap: float = 0.10,
overlap_func: str = 'DSC',
multiple_lesion_candidates_selection_criteria: str = 'overlap',
allow_unmatched_candidates_with_minimal_overlap: bool = True
) -> Tuple[List[Tuple[int, float, float]], int]:
"""
Gather the list of lesion candidates, and classify in TP/FP/FN.
- multiple_lesion_candidates_selection_criteria: when multiple lesion candidates have overlap with the same
ground truth mask, use 'overlap' or 'confidence' to choose
which lesion is matched against the ground truth mask.
Returns:
- a list of tuples with:
(is_lesion, prediction confidence, Dice similarity coefficient, number of voxels in label)
- number of ground truth lesions
"""
y_list: List[Tuple[int, float, float]] = []
if overlap_func == 'IoU':
overlap_func = calculate_iou
elif overlap_func == 'DSC':
overlap_func = calculate_dsc
else:
raise ValueError(f"Overlap function with name {overlap_func} not recognized. Supported are 'IoU' and 'DSC'")
# convert dtype to float32
y_true = y_true.astype('int32')
y_pred = y_pred.astype('float32')
if y_pred.shape[0] < y_true.shape[0]:
print("Warning: padding prediction to match label!")
y_pred = resize_image_with_crop_or_pad(y_pred, y_true.shape)
confidences, indexed_pred = parse_detection_map(y_pred)
lesion_candidates_best_overlap: Dict[str, float] = {}
if y_true.any():
# for each malignant scan
labeled_gt, num_gt_lesions = ndimage.label(y_true, np.ones((3, 3, 3)))
for lesiong_id in range(1, num_gt_lesions+1):
# for each lesion in ground-truth (GT) label
gt_lesion_mask = (labeled_gt == lesiong_id)
# collect indices of lesion candidates that have any overlap with the current GT lesion
overlapping_lesion_candidate_indices = set(np.unique(indexed_pred[gt_lesion_mask]))
overlapping_lesion_candidate_indices -= {0} # remove index 0, if present
# collect lesion candidates for current GT lesion
lesion_candidates_for_target_gt: List[Dict[str, Union[int, float]]] = []
for lesion_candidate_id, lesion_confidence in confidences:
if lesion_candidate_id in overlapping_lesion_candidate_indices:
# calculate overlap between lesion candidate and GT mask
lesion_pred_mask = (indexed_pred == lesion_candidate_id)
overlap_score = overlap_func(lesion_pred_mask, gt_lesion_mask)
# keep track of the highest overlap a lesion candidate has with any GT lesion
lesion_candidates_best_overlap[lesion_candidate_id] = max(
overlap_score, lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
)
# store lesion candidate info for current GT mask
lesion_candidates_for_target_gt.append({
'id': lesion_candidate_id,
'confidence': lesion_confidence,
'overlap': overlap_score,
})
print(lesion_candidates_for_target_gt)
if len(lesion_candidates_for_target_gt) == 0:
# no lesion candidate matched with GT mask. Add FN.
y_list.append((1, 0., 0.))
elif len(lesion_candidates_for_target_gt) == 1:
# single lesion candidate overlapped with GT mask. Add TP if overlap is sufficient, or FN otherwise.
candidate_info = lesion_candidates_for_target_gt[0]
lesion_pred_mask = (indexed_pred == candidate_info['id'])
if candidate_info['overlap'] > min_overlap:
# overlap between lesion candidate and GT mask is sufficient, add TP
indexed_pred[lesion_pred_mask] = 0 # remove lesion candidate after assignment
y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
else:
# overlap between lesion candidate and GT mask is insufficient, add FN
y_list.append((1, 0., 0.))
else:
# multiple predictions for current GT lesion
# sort lesion candidates based on overlap or confidence
key = multiple_lesion_candidates_selection_criteria
lesion_candidates_for_target_gt = sorted(lesion_candidates_for_target_gt, key=lambda x: x[key], reverse=True)
gt_lesion_matched = False
for candidate_info in lesion_candidates_for_target_gt:
lesion_pred_mask = (indexed_pred == candidate_info['id'])
if candidate_info['overlap'] > min_overlap:
indexed_pred[lesion_pred_mask] = 0
y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
gt_lesion_matched = True
break
if not gt_lesion_matched:
# ground truth lesion not matched to a lesion candidate. Add FN.
y_list.append((1, 0., 0.))
# Remaining lesions are FPs
remaining_lesions = set(np.unique(indexed_pred))
remaining_lesions -= {0} # remove index 0, if present
for lesion_candidate_id, lesion_confidence in confidences:
if lesion_candidate_id in remaining_lesions:
overlap_score = lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
if allow_unmatched_candidates_with_minimal_overlap and overlap_score > min_overlap:
# The lesion candidate was not matched to a GT lesion, but did have overlap > min_overlap
# with a GT lesion. The GT lesion is, however, matched to another lesion candidate.
# In this operation mode, this lesion candidate is not considered as a false positive.
pass
else:
y_list.append((0, lesion_confidence, 0.)) # add FP
else:
# for benign case, all predictions are FPs
num_gt_lesions = 0
if len(confidences) > 0:
for _, lesion_confidence in confidences:
y_list.append((0, lesion_confidence, 0.))
else:
y_list.append((0, 0., 0.)) # avoid empty list
return y_list, num_gt_lesions
# Calculate Intersection over Union (IoU) for N-D Arrays
def calculate_iou(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
epsilon = 1e-8
iou_num = np.sum(predictions[labels == 1])
iou_denom = np.sum(predictions) + np.sum(labels) - iou_num
return float((iou_num + epsilon) / (iou_denom + epsilon))
# Calculate Dice Similarity Coefficient (DSC) for N-D Arrays
def calculate_dsc(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
epsilon = 1e-8
dsc_num = np.sum(predictions[labels == 1]) * 2.0
dsc_denom = np.sum(predictions) + np.sum(labels)
return float((dsc_num + epsilon) / (dsc_denom + epsilon))
# Resize images (scans/predictions/labels) by cropping and/or padding [Ref: https://github.com/DLTK/DLTK]
def resize_image_with_crop_or_pad(image, img_size=(64, 64, 64), **kwargs):
assert isinstance(image, np.ndarray)
assert (image.ndim - 1 == len(img_size) or image.ndim == len(img_size)), \
"Target size doesn't fit image size"
rank = len(img_size) # image dimensions
# placeholders for new shape
from_indices = [[0, image.shape[dim]] for dim in range(rank)]
to_padding = [[0, 0] for _ in range(rank)]
slicer = [slice(None)] * rank
# for each dimension, determine process (cropping or padding)
for i in range(rank):
if image.shape[i] < img_size[i]:
to_padding[i][0] = (img_size[i] - image.shape[i]) // 2
to_padding[i][1] = img_size[i] - image.shape[i] - to_padding[i][0]
else:
from_indices[i][0] = int(np.floor((image.shape[i] - img_size[i]) / 2.))
from_indices[i][1] = from_indices[i][0] + img_size[i]
# create slicer object to crop/leave each dimension
slicer[i] = slice(from_indices[i][0], from_indices[i][1])
# pad cropped image to extend missing dimension
return np.pad(image[tuple(slicer)], to_padding, **kwargs)
# Parse Detection Maps to Individual Lesions + Likelihoods
def parse_detection_map(detection_map):
# Label All Non-Connected Components in Detection Map
blobs_index, num_blobs = ndimage.label(detection_map, np.ones((3, 3, 3)))
confidences = []
if num_blobs > 0:
# For Each Lesion Detection
for tumor_index in range(1, num_blobs+1):
# Extract Mask of Current Lesion
# hard_blob = np.zeros_like(blobs_index)
# hard_blob[blobs_index == tumor] = 1
# TODO: replace above with the following? Is faster I think.
# hard_blob = (blobs_index == tumor).astype(int)
# Extract Max Predicted Likelihood for Lesion Detection
# max_prob = np.max(hard_blob) # <- this is always 1
# hard_blob[hard_blob > 0] = max_prob # <- this line does nothing, as hard_blob is not used
# Store Predicted Likelihood per Lesion Detection
max_prob = detection_map[blobs_index == tumor_index].max()
confidences.append((tumor_index, max_prob))
return confidences, blobs_index
############################### CONSTANTS ####################################
# Quitin code removed
############################### CONSTANTS ####################################
def move_dims(arr):
# UMCG numpy dimensions convention: dims = (batch, width, heigth, depth)
# Joeran numpy dimensions convention: dims = (batch, depth, heigth, width)
arr = np.moveaxis(arr, 3, 1) # Joeran has his numpy arrays ordered differently.
arr = np.moveaxis(arr, 3, 2)
return arr
############################### CONSTANTS ####################################
# # Reconstruction model directory
# RECON_DIR = "11_new_recon_multiprocess"
# T2_MODEL_PATH = f"models/prelims/{RECON_DIR}/best-direct-fold0.h5"
# # Path to file with train, validation and test indexes.
# IDX_YAML_PATH = r"data/path_lists/pirads_4plus/train_val_test_idxs.yml"
# # Diagnostic models
# diag_models = [r"models/14_long_diag/best-direct-fold0.h5",
# r"models/15_long_diag_early_stopping/best-direct-fold0.h5"]
# # x_true_val = np.load("temp/x_true_val.npy")
# print("loading numpy arrays")
# y_true_val = np.squeeze(np.load("temp/y_true_val_label.npy"))
# y_pred_val = np.squeeze(np.load("temp/y_pred_val_detection_map.npy"))
# y_true_val = move_dims(y_true_val)
# y_pred_val = move_dims(y_pred_val)
# # Preprocess the blobs individually
# for mri_idx in range(y_pred_val.shape[0]):
# print(f"preprocessing mri idx {mri_idx}")
# y_pred_val[mri_idx] = preprocess_softmax(y_pred_val[mri_idx], threshold="dynamic")[0]
# print(f"Start FROC evaluation.")
# metrics = evaluate(y_true=y_true_val, y_pred=y_pred_val)
# dump_dict_to_yaml(metrics, 'temp', "froc_metrics", verbose=True)

39
code/FROC/data_utils.py Executable file
View File

@ -0,0 +1,39 @@
# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import json
def save_metrics(metrics, file_path=None):
# convert dtypes to stock Python
save_metrics = sterilize(metrics)
# save metrics using safe file write
file_path_tmp = file_path + '.tmp'
with open(file_path_tmp, 'w') as fp:
json.dump(save_metrics, fp, indent=4)
os.rename(file_path_tmp, file_path)
def sterilize(obj):
if isinstance(obj, dict):
return {k: sterilize(v) for k, v in obj.items()}
elif isinstance(obj, (list, tuple, np.ndarray)):
return [sterilize(v) for v in obj]
elif isinstance(obj, (str, int, bool, float)):
return obj
else:
return obj.__repr__()

505
code/FROC/froc.py Executable file
View File

@ -0,0 +1,505 @@
# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import os
import numpy as np
from tqdm import tqdm
from scipy import ndimage
from sklearn.metrics import roc_curve, auc
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
import itertools
from typing import List, Tuple, Dict, Any, Union, Optional, Callable, Iterable, Hashable, Sized
try:
import numpy.typing as npt
except ImportError:
pass
from image_utils import (
resize_image_with_crop_or_pad, read_label, read_prediction
)
from analysis_utils import (
parse_detection_map, calculate_iou, calculate_dsc
)
# Compute base prediction metrics TP/FP/FN with associated model confidences
def evaluate_case(
detection_map: "Union[npt.NDArray[np.float32], str]",
label: "Union[npt.NDArray[np.int32], str]",
min_overlap: float = 0.10,
overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU',
multiple_lesion_candidates_selection_criteria: str = 'overlap',
allow_unmatched_candidates_with_minimal_overlap: bool = True
) -> Tuple[List[Tuple[int, float, float]], int]:
"""
Gather the list of lesion candidates, and classify in TP/FP/FN.
- multiple_lesion_candidates_selection_criteria: when multiple lesion candidates have overlap with the same
ground truth mask, use 'overlap' or 'confidence' to choose
which lesion is matched against the ground truth mask.
Returns:
- a list of tuples with:
(is_lesion, prediction confidence, Dice similarity coefficient, number of voxels in label)
- number of ground truth lesions
"""
y_list: List[Tuple[int, float, float]] = []
if isinstance(label, str):
label = read_label(label)
if isinstance(detection_map, str):
detection_map = read_prediction(detection_map)
if overlap_func == 'IoU':
overlap_func = calculate_iou
elif overlap_func == 'DSC':
overlap_func = calculate_dsc
else:
raise ValueError(f"Overlap function with name {overlap_func} not recognized. Supported are 'IoU' and 'DSC'")
# convert dtype to float32
label = label.astype('int32')
detection_map = detection_map.astype('float32')
if detection_map.shape[0] < label.shape[0]:
print("Warning: padding prediction to match label!")
detection_map = resize_image_with_crop_or_pad(detection_map, label.shape)
confidences, indexed_pred = parse_detection_map(detection_map)
lesion_candidates_best_overlap: Dict[str, float] = {}
# note to stefan: check wether the if statements are correct and that the append goes correct
if label.any():
# for each malignant scan
labeled_gt, num_gt_lesions = ndimage.label(label, np.ones((3, 3, 3)))
# print("test3, werkt if label.any", num_gt_lesions) WE
for lesiong_id in range(1, num_gt_lesions+1):
# for each lesion in ground-truth (GT) label
gt_lesion_mask = (labeled_gt == lesiong_id)
# collect indices of lesion candidates that have any overlap with the current GT lesion
overlapping_lesion_candidate_indices = set(np.unique(indexed_pred[gt_lesion_mask]))
overlapping_lesion_candidate_indices -= {0} # remove index 0, if present
# collect lesion candidates for current GT lesion
lesion_candidates_for_target_gt: List[Dict[str, Union[int, float]]] = []
for lesion_candidate_id, lesion_confidence in confidences:
if lesion_candidate_id in overlapping_lesion_candidate_indices:
# calculate overlap between lesion candidate and GT mask
lesion_pred_mask = (indexed_pred == lesion_candidate_id)
overlap_score = overlap_func(lesion_pred_mask, gt_lesion_mask)
# keep track of the highest overlap a lesion candidate has with any GT lesion
lesion_candidates_best_overlap[lesion_candidate_id] = max(
overlap_score, lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
)
# store lesion candidate info for current GT mask
lesion_candidates_for_target_gt.append({
'id': lesion_candidate_id,
'confidence': lesion_confidence,
'overlap': overlap_score,
})
print("test 4, lesion_candidates_for_target_gt:",lesion_candidates_for_target_gt)
# Min overlap wordt niet behaald: +- 0.001
if len(lesion_candidates_for_target_gt) == 0:
# no lesion candidate matched with GT mask. Add FN.
y_list.append((1, 0., 0.))
elif len(lesion_candidates_for_target_gt) == 1:
# single lesion candidate overlapped with GT mask. Add TP if overlap is sufficient, or FN otherwise.
candidate_info = lesion_candidates_for_target_gt[0]
lesion_pred_mask = (indexed_pred == candidate_info['id'])
if candidate_info['overlap'] > min_overlap:
# overlap between lesion candidate and GT mask is sufficient, add TP
indexed_pred[lesion_pred_mask] = 0 # remove lesion candidate after assignment
y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
else:
# overlap between lesion candidate and GT mask is insufficient, add FN
y_list.append((1, 0., 0.))
else:
# multiple predictions for current GT lesion
# sort lesion candidates based on overlap or confidence
key = multiple_lesion_candidates_selection_criteria
lesion_candidates_for_target_gt = sorted(lesion_candidates_for_target_gt, key=lambda x: x[key], reverse=True)
gt_lesion_matched = False
for candidate_info in lesion_candidates_for_target_gt:
lesion_pred_mask = (indexed_pred == candidate_info['id'])
if candidate_info['overlap'] > min_overlap:
indexed_pred[lesion_pred_mask] = 0
y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
gt_lesion_matched = True
break
if not gt_lesion_matched:
# ground truth lesion not matched to a lesion candidate. Add FN.
y_list.append((1, 0., 0.))
# Remaining lesions are FPs
remaining_lesions = set(np.unique(indexed_pred))
remaining_lesions -= {0} # remove index 0, if present
for lesion_candidate_id, lesion_confidence in confidences:
if lesion_candidate_id in remaining_lesions:
overlap_score = lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
if allow_unmatched_candidates_with_minimal_overlap and overlap_score > min_overlap:
# The lesion candidate was not matched to a GT lesion, but did have overlap > min_overlap
# with a GT lesion. The GT lesion is, however, matched to another lesion candidate.
# In this operation mode, this lesion candidate is not considered as a false positive.
pass
else:
y_list.append((0, lesion_confidence, 0.)) # add FP
# print("test 4, gaat alles hiernaartoe?") == JA
# print("test 3, hoe ziet y_list eruit na labels",y_list)
else:
# for benign case, all predictions are FPs
num_gt_lesions = 0
if len(confidences) > 0:
for _, lesion_confidence in confidences:
y_list.append((0, lesion_confidence, 0.))
else:
y_list.append((0, 0., 0.)) # avoid empty list
return y_list, num_gt_lesions
# Calculate macro metrics (true positives (TP), false positives (FP))
def counts_from_lesion_evaluations(
y_list: List[Tuple[int, float, float]],
thresholds: "Optional[npt.NDArray[np.float64]]" = None
) -> "Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32], npt.NDArray[np.float64], int]":
"""
Calculate true positives (TP) and false positives (FP) as function of threshold,
based on the case evaluations from `evaluate_case`.
"""
# sort predictions
# print("test 4, zitten er predicties bij leasions voor sort?",y_list)
y_list.sort()
# print("test 4, zitten er predicties bij leasions na sort?",y_list,'len y_lsit:',len(y_list))
# collect targets and predictions
y_true: "npt.NDArray[np.float64]" = np.array([target for target, *_ in y_list])
y_pred: "npt.NDArray[np.float64]" = np.array([pred for _, pred, *_ in y_list])
# print("test,zijn de laatste y-pred hoog?", y_pred)
# calculate number of lesions
num_lesions = y_true.sum()
if thresholds is None:
# collect thresholds for FROC analysis
thresholds = np.unique(y_pred)
thresholds[::-1].sort() # sort thresholds in descending order (inplace)
# for >10,000 thresholds: resample to 10,000 unique thresholds, while also
# keeping all thresholds higher than 0.8 and the first 20 thresholds
if len(thresholds) > 10_000:
rng = np.arange(1, len(thresholds), len(thresholds)/10_000, dtype=np.int32)
st = [thresholds[i] for i in rng]
low_thresholds = thresholds[-20:]
thresholds = np.array([t for t in thresholds if t > 0.8 or t in st or t in low_thresholds])
# define placeholders
FP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
TP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
# for each threshold: count FPs and calculate the sensitivity
for i, th in enumerate(thresholds):
if th > 0:
y_pred_thresholded = (y_pred >= th).astype(int)
tp = np.sum(y_true*y_pred_thresholded)
fp = np.sum(y_pred_thresholded - y_true*y_pred_thresholded)
# print("test, is y_pred_thresholded altijd 0?",y_pred_thresholded)
# update FROC with new point
FP[i] = fp
TP[i] = tp
else:
# extend FROC curve to infinity
TP[i] = TP[-2]
FP[i] = np.inf
# print("test if tp werkt",TP)
# print("test if fp werkt",FP)
return TP, FP, thresholds, num_lesions
# Calculate FROC metrics (FP rate, sensitivity)
def froc_from_lesion_evaluations(y_list, num_patients, thresholds=None):
# calculate counts
TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
y_list=y_list, thresholds=thresholds
)
# calculate FROC metrics from counts
sensitivity = TP / num_lesions if num_lesions > 0 else np.nan
# print('test,Hieronder staat de tp waarde:',TP)
# print('test,Hieronder staat de num_lesions waarde:',num_lesions)
FP_per_case = FP / num_patients
return sensitivity, FP_per_case, thresholds, num_lesions
def ap_from_lesion_evaluations(y_list, thresholds=None):
# calculate counts
TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
y_list=y_list, thresholds=thresholds
)
# calculate precision (lesion-level)
precision = TP / (TP + FP)
precision = np.append(precision, [0])
# calculate recall (lesion-level)
FN = num_lesions - TP
recall = TP / (TP + FN)
recall = np.append(recall, recall[-1:])
# calculate average precission (lesion-level)
AP = np.trapz(y=precision, x=recall)
return AP, precision, recall, thresholds
# Compute full FROC
def froc(
y_det: "Iterable[Union[npt.NDArray[np.float64], str, Path]]",
y_true: "Iterable[Union[npt.NDArray[np.float64], str, Path]]",
subject_list: Optional[Iterable[Hashable]] = None,
min_overlap=0.10,
overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU',
case_confidence: str = 'max',
multiple_lesion_candidates_selection_criteria='overlap',
allow_unmatched_candidates_with_minimal_overlap=True,
flat: Optional[bool] = None,
num_parallel_calls: int = 8,
verbose: int = 0,
) -> Dict[str, Any]:
"""
FROC evaluation pipeline
(written 19 January 2022 by Joeran Bosma)
Usage:
For normal usage of the FROC evaluation pipeline, use the function `froc` with parameters
`y_det`, `y_true` and (optional) `subject_list`. Please note that this function is written
for binary 3D FROC analysis.
- `y_det`: iterable of all detection_map volumes to evaluate. Alternatively, y_det may contain
filenames ending in .nii.gz/.mha/.mhd/.npy/.npz, which will be loaded on-the-fly.
Provide an array of shape `(num_samples, D, H, W)`, where D, H, W are the spatial
dimensions (depth, height and width).
- `y_true`: iterable of all ground truth labels. Alternatively, y_det may contain filenames
ending in .nii.gz/.mha/.mhd/.npy/.npz, which should contain binary labels and
will be loaded on-the-fly. Provide an array of the same shape as `y_det`. Use
`1` to encode ground truth lesion, and `0` to encode background.
Additional settings:
For more control over the FROC evaluation pipeline, use:
- `min_overlap`: defines the minimal required Intersection over Union (IoU) or Dice similarity
coefficient (DSC) between a lesion candidate and ground truth lesion, to be
counted as a true positive detection.
"""
# Initialize Lists
roc_true = {}
roc_pred = {}
y_list = []
num_lesions = 0
if subject_list is None:
# generate indices to keep track of each case during multiprocessing
subject_list = itertools.count()
if flat is None:
flat = True
with ThreadPoolExecutor(max_workers=num_parallel_calls) as pool:
# define the functions that need to be processed: compute_pred_vector, with each individual
# detection_map prediction, ground truth label and parameters
future_to_args = {
pool.submit(evaluate_case, y_pred, y_true, min_overlap=min_overlap, overlap_func=overlap_func,
multiple_lesion_candidates_selection_criteria=multiple_lesion_candidates_selection_criteria,
allow_unmatched_candidates_with_minimal_overlap=allow_unmatched_candidates_with_minimal_overlap): idx
for (y_pred, y_true, idx) in zip(y_det, y_true, subject_list)
}
# process the cases in parallel
iterator = concurrent.futures.as_completed(future_to_args)
if verbose:
total: Optional[int] = None
if isinstance(subject_list, Sized):
total = len(subject_list)
iterator = tqdm(iterator, desc='Computing FROC', total=total)
for future in iterator:
try:
res = future.result()
except Exception as e:
print(f"Exception: {e}")
else:
# unpack results
y_list_pat, num_lesions_gt = res
# note: y_list_pat contains: is_lesion, confidence[, Dice, gt num voxels]
# print("test 3,", y_list_pat)
# aggregate results
idx = future_to_args[future]
# print("test2, indx", idx)
# test: allemaal ingelezen
roc_true[idx] = np.max([a[0] for a in y_list_pat])
# print("test2, roc_true",roc_true)
if case_confidence == 'max':
# take highest lesion confidence as case-level confidence
roc_pred[idx] = np.max([a[1] for a in y_list_pat])
elif case_confidence == 'bayesian':
# if a_i is the probability the i-th lesion is csPCa, then the case-level
# probability to have any csPCa lesion is 1 - Π_i{ 1 - a_i}
roc_pred[idx] = 1 - np.prod([(1-a[1]) for a in y_list_pat])
else:
raise ValueError(f"Patient confidence calculation method not recognised. Got: {case_confidence}.")
# accumulate outputs
y_list += y_list_pat
num_lesions += num_lesions_gt
# print("test2,heeft y-list ook leasie pred:",y_list)
# calculate statistics
num_patients = len(roc_true)
# get lesion-level results
sensitivity, FP_per_case, thresholds, num_lesions = froc_from_lesion_evaluations(
y_list=y_list, num_patients=num_patients
)
# calculate recall, precision and average precision
AP, precision, recall, _ = ap_from_lesion_evaluations(y_list, thresholds=thresholds)
# calculate case-level AUROC
fpr, tpr, _ = roc_curve(y_true=[roc_true[s] for s in subject_list],
y_score=[roc_pred[s] for s in subject_list],
pos_label=1)
auc_score = auc(fpr, tpr)
if flat:
# flatten roc_true and roc_pred
roc_true_flat = [roc_true[s] for s in subject_list]
roc_pred_flat = [roc_pred[s] for s in subject_list]
metrics = {
"FP_per_case": FP_per_case,
"sensitivity": sensitivity,
"thresholds": thresholds,
"num_lesions": num_lesions,
"num_patients": num_patients,
"roc_true": (roc_true_flat if flat else roc_true),
"roc_pred": (roc_pred_flat if flat else roc_pred),
"AP": AP,
"precision": precision,
"recall": recall,
# patient-level predictions
'auroc': auc_score,
'tpr': tpr,
'fpr': fpr,
}
return metrics
def froc_for_folder(
y_det_dir: Union[Path, str],
y_true_dir: Optional[Union[Path, str]] = None,
subject_list: Optional[List[str]] = None,
min_overlap: float = 0.10,
overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU',
case_confidence: str = 'max',
flat: Optional[bool] = None,
num_parallel_calls: int = 8,
verbose: int = 1
) -> Dict[str, Any]:
"""
Perform FROC evaluation for all samples found in y_det_dir, or the samples specified in the subject_list
Input:
- y_det_dir: path to folder containing the detection maps
- y_true_dir: (optioinal) allow labels to be stored in a different folder
- min_overlap: minimum overlap threshold
- overlap_func: intersection over union (IoU), Dice similarity coefficient (DSC), or custom function
"""
if y_true_dir is None:
y_true_dir = y_det_dir
y_det = []
y_true = []
if subject_list:
# collect the detection maps and labels for each case specified in the subject list
for subject_id in subject_list:
# construct paths to detection maps and labels
# print(np.type(subject_id))
# print(subject_list)
for postfix in [
"_detection_map.nii.gz", "_detection_map.npy", "_detection_map.npz",
".nii.gz", ".npy", ".npz", "_pred.nii.gz",
]:
detection_path = os.path.join(y_det_dir, f"{subject_id}{postfix}")
if os.path.exists(detection_path):
break
for postfix in [
"_label.nii.gz", "label.npy", "label.npz", "_seg.nii.gz",
]:
label_path = os.path.join(y_true_dir, f"{subject_id}{postfix}")
if os.path.exists(label_path):
break
if not os.path.exists(label_path):
assert y_true_dir != y_det_dir, f"Could not find label for {subject_id}!"
for postfix in [
".nii.gz", ".npy", ".npz",
]:
label_path = os.path.join(y_true_dir, f"{subject_id}{postfix}")
if os.path.exists(label_path):
break
# collect file paths
y_det += [detection_path]
y_true += [label_path]
else:
# collect all detection maps found in detection_map_dir
file_list = sorted(os.listdir(y_det_dir))
subject_list = []
if verbose >= 1:
print(f"Found {len(file_list)} files in the input directory, collecting detection_mapes with " +
"_detection_map.nii.gz and labels with _label.nii.gz..")
# collect filenames of detection_map predictions and labels
for fn in file_list:
if '_detection_map' in fn:
y_det += [os.path.join(y_det_dir, fn)]
y_true += [os.path.join(y_true_dir, fn.replace('_detection_map', '_label'))]
subject_list += [fn]
# ensure files exist
for detection_path in y_det:
assert os.path.exists(detection_path), f"Could not find detection map for {subject_id} at {detection_path}!"
for label_path in y_true:
assert os.path.exists(label_path), f"Could not find label for {subject_id} at {label_path}!"
if verbose >= 1:
print(f"Found prediction and label for {len(y_det)} cases. Here are some examples:")
print(subject_list[0:5])
# perform FROC evaluation with compiled file lists
return froc(y_det=y_det, y_true=y_true, subject_list=subject_list,
min_overlap=min_overlap, overlap_func=overlap_func, case_confidence=case_confidence,
flat=flat, num_parallel_calls=num_parallel_calls, verbose=verbose)

83
code/FROC/image_utils.py Executable file
View File

@ -0,0 +1,83 @@
# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import SimpleITK as sitk
from pathlib import Path
from typing import Union
try:
import numpy.typing as npt
except ImportError:
pass
# Resize images (scans/predictions/labels) by cropping and/or padding [Ref: https://github.com/DLTK/DLTK]
def resize_image_with_crop_or_pad(image, img_size=(64, 64, 64), **kwargs):
assert isinstance(image, np.ndarray)
assert (image.ndim - 1 == len(img_size) or image.ndim == len(img_size)), \
"Target size doesn't fit image size"
rank = len(img_size) # image dimensions
# placeholders for new shape
from_indices = [[0, image.shape[dim]] for dim in range(rank)]
to_padding = [[0, 0] for _ in range(rank)]
slicer = [slice(None)] * rank
# for each dimension, determine process (cropping or padding)
for i in range(rank):
if image.shape[i] < img_size[i]:
to_padding[i][0] = (img_size[i] - image.shape[i]) // 2
to_padding[i][1] = img_size[i] - image.shape[i] - to_padding[i][0]
else:
from_indices[i][0] = int(np.floor((image.shape[i] - img_size[i]) / 2.))
from_indices[i][1] = from_indices[i][0] + img_size[i]
# create slicer object to crop/leave each dimension
slicer[i] = slice(from_indices[i][0], from_indices[i][1])
# pad cropped image to extend missing dimension
return np.pad(image[tuple(slicer)], to_padding, **kwargs)
def read_image(path: Union[Path, str]):
"""Read image, given a filepath"""
if isinstance(path, Path):
path = path.as_posix()
else:
assert isinstance(path, str), f"Unexpected path type: {type(path)}. Please provide a Path or str."
if '.npy' in path:
return np.load(path)
elif '.nii' in path or '.mha' in path or 'mhd' in path:
return sitk.GetArrayFromImage(sitk.ReadImage(path))
elif '.npz' in path:
return np.load(path)['softmax'].astype('float32')[1] # nnUnet format
else:
raise ValueError(f"Unexpected file path. Supported file formats: .nii(.gz), .mha, .npy and .npz. Got: {path}.")
def read_prediction(path: Union[Path, str]) -> "npt.NDArray[np.float32]":
"""Read prediction, given a filepath"""
# read prediction and ensure correct dtype
pred: "npt.NDArray[np.float32]" = np.array(read_image(path), dtype=np.float32)
return pred
def read_label(path: Union[Path, str]) -> "npt.NDArray[np.int32]":
"""Read label, given a filepath"""
# read label and ensure correct dtype
lbl: "npt.NDArray[np.int32]" = np.array(read_image(path), dtype=np.int32)
return lbl

139
code/Make_overlay.py Executable file
View File

@ -0,0 +1,139 @@
# http://insightsoftwareconsortium.github.io/SimpleITK-Notebooks/Python_html/05_Results_Visualization.html
from cgitb import grey
import SimpleITK as sitk
def make_isotropic(image, interpolator=sitk.sitkLinear, spacing=None):
"""
Many file formats (e.g. jpg, png,...) expect the pixels to be isotropic, same
spacing for all axes. Saving non-isotropic data in these formats will result in
distorted images. This function makes an image isotropic via resampling, if needed.
Args:
image (SimpleITK.Image): Input image.
interpolator: By default the function uses a linear interpolator. For
label images one should use the sitkNearestNeighbor interpolator
so as not to introduce non-existant labels.
spacing (float): Desired spacing. If none given then use the smallest spacing from
the original image.
Returns:
SimpleITK.Image with isotropic spacing which occupies the same region in space as
the input image.
"""
original_spacing = image.GetSpacing()
# Image is already isotropic, just return a copy.
if all(spc == original_spacing[0] for spc in original_spacing):
return sitk.Image(image)
# Make image isotropic via resampling.
original_size = image.GetSize()
if spacing is None:
spacing = min(original_spacing)
new_spacing = [spacing] * image.GetDimension()
new_size = [
int(round(osz * ospc / spacing))
for osz, ospc in zip(original_size, original_spacing)
]
return sitk.Resample(
image,
new_size,
sitk.Transform(),
interpolator,
image.GetOrigin(),
new_spacing,
image.GetDirection(),
0,
image.GetPixelID(),
)
def mask_image_multiply(mask, image):
components_per_pixel = image.GetNumberOfComponentsPerPixel()
if components_per_pixel == 1:
return mask * image
else:
return sitk.Compose(
[
mask * sitk.VectorIndexSelectionCast(image, channel)
for channel in range(components_per_pixel)
]
)
def alpha_blend(image1, image2, alpha=0.5, mask1=None, mask2=None):
"""
Alaph blend two images, pixels can be scalars or vectors.
The alpha blending factor can be either a scalar or an image whose
pixel type is sitkFloat32 and values are in [0,1].
The region that is alpha blended is controled by the given masks.
"""
if not mask1:
mask1 = sitk.Image(image1.GetSize(), sitk.sitkFloat32) + 1.0
mask1.CopyInformation(image1)
else:
mask1 = sitk.Cast(mask1, sitk.sitkFloat32)
if not mask2:
mask2 = sitk.Image(image2.GetSize(), sitk.sitkFloat32) + 1
mask2.CopyInformation(image2)
else:
mask2 = sitk.Cast(mask2, sitk.sitkFloat32)
# if we received a scalar, convert it to an image
if type(alpha) != sitk.SimpleITK.Image:
alpha = sitk.Image(image1.GetSize(), sitk.sitkFloat32) + alpha
alpha.CopyInformation(image1)
components_per_pixel = image1.GetNumberOfComponentsPerPixel()
if components_per_pixel > 1:
img1 = sitk.Cast(image1, sitk.sitkVectorFloat32)
img2 = sitk.Cast(image2, sitk.sitkVectorFloat32)
else:
img1 = sitk.Cast(image1, sitk.sitkFloat32)
img2 = sitk.Cast(image2, sitk.sitkFloat32)
intersection_mask = mask1 * mask2
intersection_image = mask_image_multiply(
alpha * intersection_mask, img1
) + mask_image_multiply((1 - alpha) * intersection_mask, img2)
return (
intersection_image
+ mask_image_multiply(mask2 - intersection_mask, img2)
+ mask_image_multiply(mask1 - intersection_mask, img1)
)
#Dictionary with functions mapping a scalar image to a three component vector image
image_mappings = {'grey': lambda x: sitk.Compose([x]*3),
'jet' : lambda x: sitk.ScalarToRGBColormap(x, sitk.ScalarToRGBColormapImageFilter.Jet),
'hot' : lambda x: sitk.ScalarToRGBColormap(x, sitk.ScalarToRGBColormapImageFilter.Hot),
'winter' : lambda x: sitk.ScalarToRGBColormap(x, sitk.ScalarToRGBColormapImageFilter.Winter)
}
image = sitk.ReadImage('../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_pred.nii')
segmentation = sitk.ReadImage('../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_seg.nii')
#Make the images isotropic so that when we save a slice along any axis
#it will be fine (most image formats assume isotropic pixel sizes).
#Segmentation is interpolated with nearest neighbor so we don't introduce new
#labels.
image = make_isotropic(image, interpolator = sitk.sitkLinear)
segmentation = make_isotropic(segmentation, interpolator = sitk.sitkNearestNeighbor)
#Convert image to sitkUInt8 after rescaling, color image formats only work for [0,255]
image_255 = sitk.Cast(sitk.RescaleIntensity(image, 0, 255), sitk.sitkUInt8)
segmentation_255 = sitk.Cast(sitk.RescaleIntensity(segmentation, 0, 255), sitk.sitkUInt8)
colormap = 'hot'
vec_image = image_mappings['hot'](image_255)
vec_image_label = image_mappings['winter'](image_255)
# vec_segmentation = sitk.ScalarToRGBColormap(segmentation, sitk.ScalarToRGBColormapImageFilter.Winter)
# vec_segmentation_2 = sitk.LabelToRGB(segmentation_255,colormap=[255,0,0])
vec_combined = sitk.Cast(alpha_blend(image1=vec_image, image2=vec_image_label, alpha=0.5, mask2=segmentation==1), sitk.sitkVectorUInt8)
sitk.WriteImage(vec_image, '../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_vec_image.nii.gz')
sitk.WriteImage(vec_image_label, '../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_vec_image_label.nii.gz')
sitk.WriteImage(vec_combined, '../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_vec_com.nii.gz')

0
code/__init__.py Executable file
View File

59
code/csv2graph.py Executable file
View File

@ -0,0 +1,59 @@
import matplotlib.pyplot as plt
import pandas as pd
import glob
import argparse
import os
#create parser
def parse_input_args():
parser = argparse.ArgumentParser(description='Parse arguments for training a Reconstruction model')
parser.add_argument('train_out_dir',
type=str,
help='Directory name in train_output dir of the desired experiment folder. There should be a .csv file in this directory with train statistics.')
args = parser.parse_args()
return args
args = parse_input_args()
print(f"Plotting {args}")
# find csv file
# csv = glob.glob(f"train_output/{args.train_out_dir}/*.csv")[0]
folder_input = args.train_out_dir
# load csv file
df = pd.read_csv(f'{folder_input}')
# read csv file
for metric in df:
if not metric == 'epoch':
# if metric == 'loss' or metric == 'val_loss':
plt.plot(df['epoch'], df[metric], label=metric)
# plt.ylim(ymin=0,ymax=0.01)
folder, csvfile = os.path.split(args.train_out_dir)
root, experiment = os.path.split(os.path.split(folder)[0])
plt.title(experiment)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid()
plt.legend()
plt.savefig(f"{folder}/{experiment}.png")
plt.clf()
plt.close()
print(folder+".png")
print(f"\nsaved figure to {folder}")
# van yaml inladen 'loss'
# vanuit utils > dict to yaml
# csv viewer extension
# course .venv/bin/activate
# loop over alles en if metric then 1-metric
# kleur codering color=[]

107
code/data_path_rename.py Executable file
View File

@ -0,0 +1,107 @@
#X:\datasets\radboud_lesions_2022
#/data/pca-rad/experiments/fix-radboud-data/export/pat0777-2016.nii.gz
#'experiments/fix-radboud-data/export'
#'datasets/radboud_lesions_2022'
########### Read in the file ##############
# with open('./../data/Nijmegen paths/t2.txt', 'r') as file :
# filedata = file.read()
# filedata = filedata.replace('/diffusie_cro/b-1400','/t2_tse_tra')
# import numpy as np
# print(np.shape(filedata))
############# check for missing files ############
import numpy as np
import os
serie = 'b2000'
file1 = f'./../data/Nijmegen paths/{serie}.txt'
file2 = f'./../data/Test paths/{serie}.txt'
file3 = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_dwi.txt'
# X:\qvlohuizen\fastMRI_PCa\data\path_lists\pirads_4plus
with open(file3) as fp:
data = fp.readlines()
UMCG = [i for i, s in enumerate(data) if '-U-' in s]
Martini = [i for i, s in enumerate(data) if '-M-' in s]
patient_id = [os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(data[index]))))[-9:-6] for index in Martini]
patient_id = [item.replace("/", "") for item in patient_id]
patient_id = [item.replace("y", "") for item in patient_id]
patient_id.sort()
Martini_data = [data[index] for index in Martini]
Martini_Diffusie = [i for i, s in enumerate(Martini_data) if not 'Diffusie' in s]
print(patient_id)
print(np.shape(Martini_data))
print(len(Martini_Diffusie))
print([Martini_data[index] for index in Martini_Diffusie])
quit()
############# merge radboud and umcg files ############
import numpy as np
import os
serie = 'b2000'
file1 = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_dwi.txt'
# X:\qvlohuizen\fastMRI_PCa\data\path_lists\pirads_4plus
with open(file1) as fp:
data = fp.readlines()
# Reading data from file2
with open(file2) as fp:
data2 = fp.read()
# Merging 2 files
# To add the data of file2
# from next line
data += "\n"
data += data2
with open (f'{serie}.txt', 'w') as fp:
fp.write(data)
quit()
############ find more martini data ##############
import numpy as np
high_b_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_dwi.txt'
adc_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_adc.txt'
t2_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_t2w.txt'
seg_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/seg_new.txt'
with open(high_b_data, 'r') as file :
high_b_data = file.readlines()
with open(adc_data, 'r') as file :
adc_data = file.readlines()
with open(t2_data, 'r') as file :
t2_data = file.readlines()
with open(seg_data, 'r') as file :
seg_data = file.readlines()
indices = [i for i, s in enumerate(high_b_data) if 'Diffusie_b0-50-400-800-2000' in s]
b_2000 = [high_b_data[index] for index in indices]
b_50 = [item.replace("b-2000", "b-50") for item in b_2000]
b_400 = [item.replace("b-2000", "b-400") for item in b_2000]
b_800 = [item.replace("b-2000", "b-800") for item in b_2000]
adc = [adc_data[index] for index in indices]
t2 = [t2_data[index] for index in indices]
seg = [seg_data[index] for index in indices]
with open('b50.txt', 'w') as f:
f.writelines(b_50)
with open('b400.txt', 'w') as f:
f.writelines(b_400)
with open('b800.txt', 'w') as f:
f.writelines(b_800)
with open('b2000.txt', 'w') as f:
f.writelines(b_2000)
with open('adc.txt', 'w') as f:
f.writelines(adc)
with open('t2.txt', 'w') as f:
f.writelines(t2)
with open('seg.txt', 'w') as f:
f.writelines(seg)

20
code/dump_params.py Executable file
View File

@ -0,0 +1,20 @@
def dump_dict_to_yaml(
data: dict,
target_dir: str,
filename: str = "settings") -> None:
""" Writes the given dictionary as a yaml to the target directory.
Parameters:
`data (dict)`: dictionary of data.
`target_dir (str)`: directory where the yaml will be saved.
`` filename (str)`: name of the file without extension
"""
print("\nParameters")
for pair in data.items():
print(f"\t{pair}")
print()
path = f"{target_dir}/{filename}.yml"
print_p(f"Wrote yaml to: {path}")
with open(path, 'w') as outfile:
yaml.dump(data, outfile, default_flow_style=False)

75
code/pred_label_images.py Executable file
View File

@ -0,0 +1,75 @@
# A script with returns the prediction with labels overlay and stores them in a new folder named "report"
#modules
from preprocessing_function import preprocess
import numpy as np
from tqdm import tqdm
from helpers import *
import tensorflow as tf
from tensorflow import keras
#model load
Model_path = '..'
reconstructed_model = tf.keras.models.load_model(Model_path)
reconstructed_model.compile(loss='',
optimizer='',
metrics=[''])
####image load
# arg.series = b-50 b-400 etc.
# DATA_DIR = data dir
# IMAGE_SHAPE =
# TARGET_SPACING =
images, image_paths = {s: [] for s in args.series}, {}
segmentations = []
print_(f"> Loading images into RAM...")
# Read the image paths from the data directory.
# Texts files are expected to have the name "[series_name].txt"
for s in args.series:
with open(path.join(DATA_DIR, f"{s}.txt"), 'r') as f:
image_paths[s] = [l.strip() for l in f.readlines()]
with open(path.join(DATA_DIR, f"seg.txt"), 'r') as f:
seg_paths = [l.strip() for l in f.readlines()]
num_images = len(seg_paths)
# Read and preprocess each of the paths for each series, and the segmentations.
for img_idx in tqdm(range(num_images)): #[:40] for less images
img_s = {s: sitk.ReadImage(image_paths[s][img_idx], sitk.sitkFloat32)
for s in args.series}
seg_s = sitk.ReadImage(seg_paths[img_idx], sitk.sitkFloat32)
img_n, seg_n = preprocess(img_s, seg_s,
shape=IMAGE_SHAPE, spacing=TARGET_SPACING)
for seq in img_n:
images[seq].append(img_n[seq])
segmentations.append(seg_n)
self._images = np.stack(
[np.stack(self.images[k], axis=0) for k in sequences], axis=-1)
#image predict
predictions = reconstructed_model.predict(images[idx])
#heatmap + input image
#save in folder output
# Export scan crops and targets once
# they don't change during training so we export them only once
for i in range(min(self.num_images, self.validation_set[0].shape[0])):
for s_idx, s in enumerate(sequences):
img_s = sitk.GetImageFromArray(
self.validation_set[0][i][..., s_idx].squeeze().T)
sitk.WriteImage(img_s, f"{prefix}_{i:03d}_{s}.nii.gz")
seg_s = sitk.GetImageFromArray(
self.validation_set[1][i].squeeze().T)
sitk.WriteImage(seg_s, f"{prefix}_{i:03d}_seg.nii.gz")
prd_s = sitk.GetImageFromArray(predictions[i].squeeze().T)
prd_bin_s = sitk.GetImageFromArray(
np.around(predictions[i]).astype(np.float32).squeeze().T)
sitk.WriteImage(prd_s, f"{self.prefix}_{i:03d}_pred.nii.gz")
sitk.WriteImage(prd_bin_s, f"{self.prefix}_{i:03d}_pred_bin.nii.gz")

82
code/utils_quintin.py Executable file
View File

@ -0,0 +1,82 @@
import yaml
from typing import List
from collections import deque
from pathlib import Path
import numpy as np
import SimpleITK as sitk
def print_p(text, end="\n"):
""" Print function for on Peregrine. It needs a flush before printing. """
print(text, flush=True, end=end)
def list_from_file(path: str) -> List:
""" Returns a list of all items on each line of the text file referenced in
the given path
Parameters:
`path (str)`: path to the text file
"""
print_p(path)
return [line.strip() for line in open(path, "r")]
def dump_dict_to_yaml(
data: dict,
target_dir: str,
filename: str = "settings",
verbose: bool = True) -> None:
""" Writes the given dictionary as a yaml to the target directory.
Parameters:
`data (dict)`: dictionary of data.
`target_dir (str)`: directory where the yaml will be saved.
`` filename (str)`: name of the file without extension
"""
if verbose:
print("\nParameters")
for pair in data.items():
print(f"\t{pair}")
print()
path = f"{target_dir}/{filename}.yml"
print_p(f"Wrote yaml to: {path}")
with open(path, 'w') as outfile:
yaml.dump(data, outfile, default_flow_style=False)
def read_yaml_to_dict(path: str) -> dict:
with open(path, 'r') as stream:
try:
parsed_yaml=yaml.safe_load(stream)
# print(parsed_yaml)
return parsed_yaml
except yaml.YAMLError as exc:
print(exc)
def create_dirs_if_not_exists(dirs: List[str]) -> None:
""" Creates the list of supplied directories if they don't exist yet.
Parameters:
`dirs (List[str]) or str`: list of strings representing the directories that
need to be created
"""
if isinstance(dirs, str):
dirs = [dirs]
for folder in dirs:
Path(folder).mkdir(parents=True, exist_ok=True)
print_p(f"Created directory {folder} or it already existed.")
def save_np_to_nifti(data_np: np.ndarray,
target_dir: str,
fname: str,
target_space: List[float] = [0.5, 0.5, 3.0]):
img_s = sitk.GetImageFromArray(data_np.T)
img_s.SetSpacing(target_space)
path = f"{target_dir}/{fname}.nii.gz"
sitk.WriteImage(img_s, path)
print_p(f"Saved numpy array to nifti: {path}")

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,91 @@
import glob
import csv
from sqlite3 import Error
import sqlite3
def create_connection(db_file):
""" create a database connection to the SQLite database
specified by the db_file
:param db_file: database file
:return: Connection object or None
"""
conn = None
try:
conn = sqlite3.connect(db_file)
except Error as e:
print(e)
return conn
## get pat_id, birthdate, PSA, PIRADS,
conn = create_connection('./db/master.db')
cur = conn.cursor()
query_pirads = f"""SELECT visits.patientID, visits.StudyDate, rois.roiID FROM biopsies
INNER JOIN rois ON biopsies.target_visitID = rois.StudyInstanceUID AND biopsies.target_roi = rois.roiID
INNER JOIN visits ON biopsies.target_visitID = visits.StudyInstanceUID
WHERE pirads != "NA" AND (pirads > 3);"""
result_pirads = cur.execute(query_pirads).fetchall() #list of tuples
dates = [f"{date[1][:4]}-{date[1][4:6]}-{date[1][6:8]}" for date in result_pirads]
pirads_paths = [path.join(x[0],y,x[2]) for x,y in zip(result_pirads,dates) ]
query_gleason = f"""SELECT visits.patientID, visits.StudyDate, rois.roiID FROM biopsies
INNER JOIN rois ON biopsies.target_visitID = rois.StudyInstanceUID AND biopsies.target_roi = rois.roiID
INNER JOIN visits ON biopsies.target_visitID = visits.StudyInstanceUID
WHERE gleason_1 != "NA" AND (gleason_1 > 3 OR gleason_2 > 3);"""
result_gleason = cur.execute(query_gleason).fetchall() #list of tuples
dates = [f"{date[1][:4]}-{date[1][4:6]}-{date[1][6:8]}" for date in result_gleason]
gleason_paths = [path.join(x[0],y,x[2]) for x,y in zip(result_gleason,dates) ]
print("Looking for visit folders containing niftis in ./nifti")
niftis = glob("./nifti/**/*.nii.gz", recursive=True)
folders = list(set([path.dirname(f) for f in niftis]))
print("Found", len(folders), "folders")
# df_list = []
# ROOT_DIR = '../../../datasets/anonymized_mri/only_nii_directory/*/*'
# x = glob.glob(ROOT_DIR)
# for patient_dir in sorted(x):
# T2 = []
# adc = []
# highb = []
# patient_id = []
# age_year = []
# patient_id = patient_dir.split('/')[-2]
# age_year = patient_dir.split('/')[-1]
# for path in sorted(glob.glob(f'../../../datasets/anonymized_mri/only_nii_directory/{patient_dir.split("/")[-2]}/{patient_dir.split("/")[-1]}/*/*')):
# if ('T2' in path or 't2' in path) and ('tra' in path or 'TRA' in path) and not ('seg' in path):
# T2 = path
# for path in sorted(glob.glob(f'../../../datasets/anonymized_mri/only_nii_directory/{patient_dir.split("/")[-2]}/{patient_dir.split("/")[-1]}/*')):
# if 'manual_adc' in path:
# manual_adc = path
# else:
# if 'ADC' in path and 'nii.gz' in path:
# adc = path
# print(path)
# if 'manual_b-1400' in path:
# highb1400 = path
# else:
# if 'b-2000' in path and 'nii.gz' in path:
# highb = path
# print(path)
# if manual_adc:
# adc = manual_adc
# if highb1400:
# highb = highb1400
# df_list.append({'patient_id':patient_id,'age_year':age_year,'t2':T2,'adc':adc,'high_b':highb})
# if not T2 or not adc or not highb:
# input(f'{patient_id} {age_year} {T2} {adc} {highb}')
# with open('csv_file.csv', 'w') as csvfile:
# writer = csv.DictWriter(csvfile, fieldnames=['patient_id','age_year','t2','adc','high_b'],delimiter=';')
# writer.writeheader()
# for data in df_list:
# writer.writerow(data)

View File

@ -0,0 +1,48 @@
import glob
import csv
df_list = []
ROOT_DIR = '../../../datasets/anonymized_mri/only_nii_directory/*/*'
x = glob.glob(ROOT_DIR)
for patient_dir in sorted(x):
T2 = []
adc = []
highb = []
patient_id = []
age_year = []
patient_id = patient_dir.split('/')[-2]
age_year = patient_dir.split('/')[-1]
for path in sorted(glob.glob(f'../../../datasets/anonymized_mri/only_nii_directory/{patient_dir.split("/")[-2]}/{patient_dir.split("/")[-1]}/*/*')):
if ('T2' in path or 't2' in path) and ('tra' in path or 'TRA' in path) and not ('seg' in path):
T2 = path
for path in sorted(glob.glob(f'../../../datasets/anonymized_mri/only_nii_directory/{patient_dir.split("/")[-2]}/{patient_dir.split("/")[-1]}/*')):
if 'manual_adc' in path:
manual_adc = path
else:
if 'ADC' in path and 'nii.gz' in path:
adc = path
print(path)
if 'manual_b-1400' in path:
highb1400 = path
else:
if 'b-2000' in path and 'nii.gz' in path:
highb = path
print(path)
if manual_adc:
adc = manual_adc
if highb1400:
highb = highb1400
df_list.append({'patient_id':patient_id,'age_year':age_year,'t2':T2,'adc':adc,'high_b':highb})
if not T2 or not adc or not highb:
input(f'{patient_id} {age_year} {T2} {adc} {highb}')
with open('csv_file.csv', 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=['patient_id','age_year','t2','adc','high_b'],delimiter=';')
writer.writeheader()
for data in df_list:
writer.writerow(data)