From 11591d57525d9699de6a0f9af7cf06133c5bc7f3 Mon Sep 17 00:00:00 2001 From: Stefan Date: Fri, 18 Mar 2022 12:02:03 +0100 Subject: [PATCH] initialization of git --- .gitignore | 152 +++++ README.md | 3 + setup.py | 24 + src/sfransen/DWI_exp/__init__.py | 6 + src/sfransen/DWI_exp/batchgenerator.py | 64 ++ src/sfransen/DWI_exp/callbacks.py | 96 +++ src/sfransen/DWI_exp/helpers.py | 239 +++++++ .../DWI_exp/preprocessing_function.py | 46 ++ src/sfransen/DWI_exp/unet.py | 114 ++++ src/sfransen/FROC/__init__.py | 6 + src/sfransen/FROC/analysis_utils.py | 313 +++++++++ src/sfransen/FROC/blob_preprocess.py | 148 ++++ src/sfransen/FROC/cal_froc_from_np.py | 642 ++++++++++++++++++ src/sfransen/FROC/data_utils.py | 39 ++ src/sfransen/FROC/froc.py | 505 ++++++++++++++ src/sfransen/FROC/image_utils.py | 83 +++ src/sfransen/Make_overlay.py | 139 ++++ src/sfransen/__init__.py | 0 src/sfransen/csv2graph.py | 59 ++ src/sfransen/data_path_rename.py | 107 +++ src/sfransen/dump_params.py | 20 + src/sfransen/pred_label_images.py | 75 ++ src/sfransen/utils_quintin.py | 82 +++ 23 files changed, 2962 insertions(+) create mode 100755 .gitignore create mode 100755 README.md create mode 100755 setup.py create mode 100755 src/sfransen/DWI_exp/__init__.py create mode 100755 src/sfransen/DWI_exp/batchgenerator.py create mode 100755 src/sfransen/DWI_exp/callbacks.py create mode 100755 src/sfransen/DWI_exp/helpers.py create mode 100755 src/sfransen/DWI_exp/preprocessing_function.py create mode 100755 src/sfransen/DWI_exp/unet.py create mode 100755 src/sfransen/FROC/__init__.py create mode 100755 src/sfransen/FROC/analysis_utils.py create mode 100755 src/sfransen/FROC/blob_preprocess.py create mode 100755 src/sfransen/FROC/cal_froc_from_np.py create mode 100755 src/sfransen/FROC/data_utils.py create mode 100755 src/sfransen/FROC/froc.py create mode 100755 src/sfransen/FROC/image_utils.py create mode 100755 src/sfransen/Make_overlay.py create mode 100755 src/sfransen/__init__.py create mode 100755 src/sfransen/csv2graph.py create mode 100755 src/sfransen/data_path_rename.py create mode 100755 src/sfransen/dump_params.py create mode 100755 src/sfransen/pred_label_images.py create mode 100755 src/sfransen/utils_quintin.py diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..dc5042b --- /dev/null +++ b/.gitignore @@ -0,0 +1,152 @@ +# ---> Python +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +/build/ +/code/ +/old_code/ +/data/ +/job_scripts/ +/scripts/ +/temp/ +/slurms/ +*.out +sqliteDB/ +*.db +/train_output/ \ No newline at end of file diff --git a/README.md b/README.md new file mode 100755 index 0000000..22ea947 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# sfransen + +A library for fast MRI for prostate cancer. Create undersampled data, train reconstruction models, validate and visualize results. \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..0d8020d --- /dev/null +++ b/setup.py @@ -0,0 +1,24 @@ +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( + name="sfransen", + version="0.0.1", + author="S.J. Fransen (rad)", + author_email="s.j.fransen@umcg.nl", + description="later", + long_description=long_description, + long_description_content_type="text/markdown", + url="", + package_dir={'': 'src'}, + packages=setuptools.find_packages(where='src'), + classifiers=[ + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + ], + scripts=[ + ], + python_requires='>=3.6', +) diff --git a/src/sfransen/DWI_exp/__init__.py b/src/sfransen/DWI_exp/__init__.py new file mode 100755 index 0000000..c0f8c62 --- /dev/null +++ b/src/sfransen/DWI_exp/__init__.py @@ -0,0 +1,6 @@ +print("de juiste init file") +from .batchgenerator import * +from .callbacks import * +from .helpers import * +from .preprocessing_function import * +from .unet import * diff --git a/src/sfransen/DWI_exp/batchgenerator.py b/src/sfransen/DWI_exp/batchgenerator.py new file mode 100755 index 0000000..33c9a8c --- /dev/null +++ b/src/sfransen/DWI_exp/batchgenerator.py @@ -0,0 +1,64 @@ +import math +from typing import Callable, Dict, List, Optional, Tuple + +import numpy as np +from tensorflow.keras.utils import Sequence + +class BatchGenerator(Sequence): + def __init__(self, + images: Dict[str, List[np.ndarray]], + segmentations: List[np.ndarray], + sequences: List[str], + shape: Tuple[int], + indexes: List[int] = None, + batch_size: int = 5, + shuffle: bool = False, + augmentation_function: Optional[Callable\ + [[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]] = None): + + self.images = images + self.segmentations = segmentations + self.sequences = sequences + self.indexes = indexes + self.batch_size = batch_size + self.shuffle = shuffle + self.augmentation = augmentation_function + + self.num_rows = len(images) + if self.indexes == None: + self.indexes = list(range(self.num_rows)) + + if type(indexes) == int: + self.indexes = list(range(self.indexes)) + + if self.batch_size == None: + self.batch_size = len(self.indexes) + + # Stack image index as index 0, stack sequences as index -1 + self._images = np.stack( + [np.stack(self.images[k], axis=0) for k in sequences], axis=-1) + self._segmentations = np.stack(segmentations, axis=0)[..., np.newaxis] + + def __len__(self): + return math.ceil(len(self.indexes) / self.batch_size) + + def __getitem__(self, batch_idx): + # Get image / segmentation indexes for current batch + indexes = self.indexes[batch_idx*self.batch_size:(1+batch_idx)*self.batch_size] + + # Get matching images and segmentations + images = self._images[indexes] + segmentations = self._segmentations[indexes] + + # Apply data augmentation if a function is provided + if self.augmentation: + for img_idx in range(len(indexes)): + images[img_idx], segmentations[img_idx] = self.augmentation( + images[img_idx], segmentations[img_idx]) + return images, segmentations + + def on_epoch_end(self): + super().on_epoch_end() + + if self.shuffle: + np.random.shuffle(self.indexes) diff --git a/src/sfransen/DWI_exp/callbacks.py b/src/sfransen/DWI_exp/callbacks.py new file mode 100755 index 0000000..d49f885 --- /dev/null +++ b/src/sfransen/DWI_exp/callbacks.py @@ -0,0 +1,96 @@ +import numpy as np +import SimpleITK as sitk +from helpers import * +import tensorflow.keras.backend as K +import tensorflow as tf +from tensorflow.keras.callbacks import Callback +from sklearn.metrics import roc_auc_score, roc_curve + +def dice_coef(y_true, y_pred): + y_true_f = K.flatten(y_true) + y_pred_f = K.round(K.flatten(y_pred)) + intersection = K.sum(y_true_f * y_pred_f) + return (2. * intersection + K.epsilon()) / ( + K.sum(y_true_f) + K.sum(y_pred_f) + K.epsilon()) + + +# def auc_value(y_true, y_pred): +# # print_("start ROC_AUC") +# # y_true_old = self.validation_set[1].squeeze() +# # y_pred_old = np.around(self.model.predict(self.validation_set[0],batch_size=1).squeeze()) + +# y_true_auc = K.flatten(y_true) +# # print_(f"The shape of y_true = {np.shape(y_true)}" ) + +# y_pred_auc = K.round(K.flatten(y_pred)) +# # print_(f"The shape of y_pred = {np.shape(y_pred)}" ) + +# # fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1) +# auc = roc_auc_score(y_true_auc, y_pred_auc) +# print_('AUC:', auc) +# print_('AUC shape:', np.shape(auc)) +# # print_(f'ROC: fpr={fpr} , tpr={tpr}, thresholds={thresholds}') +# return auc + + +class IntermediateImages(Callback): + def __init__(self, validation_set, prefix, sequences, + num_images=10): + self.prefix = prefix + self.num_images = num_images + self.validation_set = ( + validation_set[0][:num_images, ...], + validation_set[1][:num_images, ...] + ) + + # Export scan crops and targets once + # they don't change during training so we export them only once + for i in range(min(self.num_images, self.validation_set[0].shape[0])): + for s_idx, s in enumerate(sequences): + img_s = sitk.GetImageFromArray( + self.validation_set[0][i][..., s_idx].squeeze().T) + sitk.WriteImage(img_s, f"{prefix}_{i:03d}_{s}.nii.gz") + seg_s = sitk.GetImageFromArray( + self.validation_set[1][i].squeeze().T) + sitk.WriteImage(seg_s, f"{prefix}_{i:03d}_seg.nii.gz") + + def on_epoch_end(self, epoch, logs={}): + # Predict on the validation_set + predictions = self.model.predict(self.validation_set, batch_size=1) + + # print_("start ROC_AUC") + # y_true_old = self.validation_set[1].squeeze() + # y_pred_old = np.around(self.model.predict(self.validation_set[0],batch_size=1).squeeze()) + + # y_true = np.array(y_true_old).flatten() + # print_(f"The shape of y_true = {np.shape(y_true)}" ) + + # y_pred = np.array(y_pred_old).flatten() + # print_(f"The shape of y_pred = {np.shape(y_pred)}" ) + + # fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1) + # auc = roc_auc_score(y_true, y_pred) + # print_('AUC:', auc) + # print_(f'ROC: fpr={fpr} , tpr={tpr}, thresholds={thresholds}') + + for i in range(min(self.num_images, self.validation_set[0].shape[0])): + prd_s = sitk.GetImageFromArray(predictions[i].squeeze().T) + prd_bin_s = sitk.GetImageFromArray( + np.around(predictions[i]).astype(np.float32).squeeze().T) + sitk.WriteImage(prd_s, f"{self.prefix}_{i:03d}_pred.nii.gz") + sitk.WriteImage(prd_bin_s, f"{self.prefix}_{i:03d}_pred_bin.nii.gz") + +# class RocCallback(Callback): +# def __init__(self,validation_data): +# # self.x = training_data[0] +# # self.y = training_data[1] +# self.x_val = validation_data[0] +# self.y_val = validation_data[1] + +# def on_epoch_end(self, epoch, logs={}): +# # y_pred_train = self.model.predict_proba(self.x) +# # roc_train = roc_auc_score(self.y, y_pred_train) +# y_pred_val = self.model.predict(self.x_val, batch_size=1) +# roc_val = roc_auc_score(self.y_val, y_pred_val) +# print('\rroc-auc_train: %s - roc-auc_val: %s' % (str(round(roc_val,4))),end=100*' '+'\n') +# return \ No newline at end of file diff --git a/src/sfransen/DWI_exp/helpers.py b/src/sfransen/DWI_exp/helpers.py new file mode 100755 index 0000000..18c39c7 --- /dev/null +++ b/src/sfransen/DWI_exp/helpers.py @@ -0,0 +1,239 @@ +import os +from os import path + +import numpy as np +import SimpleITK as sitk +from scipy import ndimage +import yaml + +def print_(*args, **kwargs): + print(*args, **kwargs, flush=True) + +def print_config(config, depth=0): + for k in config: + print(end=(" "*depth*2) + f"- {k}:") + if type(config[k]) == dict: + print() + print_config(config[k], depth+1) + else: + print(f"\t{config[k]}") + print_(end="") + +def prepare_project_dir(project_dir): + """ + Prepares the work directory for a project + TODO: Create text file containing configuration + """ + def _makedir(n): + os.makedirs(path.join(project_dir, n), exist_ok=True) + + _makedir("models") + _makedir("logs") + _makedir("data") + _makedir("docs") + _makedir("samples") + _makedir("output") + + + + +def fake_loss(*args, **kwargs): + return 0. + +def center_crop( + image: sitk.Image, shape: list, offset: list = None + ) -> sitk.Image: + """Extracts a region of interest from the center of an SITK image. + + Parameters: + image: Input image (SITK). + shape: The shape of the + + Returns: Cropped image (SITK) + """ + + size = image.GetSize() + + # Determine the centroid + centroid = [sz / 2 for sz in size] + + # Determine the origin for the bounding box by subtracting half the + # shape of the bounding box from each dimension of the centroid. + box_start = [int(c - sh / 2) for c, sh in zip(centroid, shape)] + + if offset: + box_start = [b - o for b, o in zip(box_start, offset)] + + # Extract the region of provided shape starting from the previously + # determined starting pixel. + region_extractor = sitk.RegionOfInterestImageFilter() + region_extractor.SetSize(shape) + region_extractor.SetIndex(box_start) + cropped_image = region_extractor.Execute(image) + + return cropped_image + +def augment(img, seg, + noise_chance = 0.3, + noise_mult_max = 0.001, + rotate_chance = 0.2, + rotate_max_angle = 30, + ): + #noise_mult_max was initially 0.1 + if np.random.uniform() < noise_chance: + img = augment_noise(img, np.random.uniform(0., noise_mult_max)) + + if np.random.uniform() < rotate_chance: + img, seg = augment_rotate(img, seg, + angle=np.random.uniform(0-rotate_max_angle, rotate_max_angle)) + img, seg = augment_tilt(img, seg, + angle=np.random.uniform(0-rotate_max_angle, rotate_max_angle)) + + return img, seg + +def augment_noise(img, multiplier): + noise = np.random.standard_normal(img.shape) * multiplier + return img + noise + +def augment_rotate(img, seg, angle): + img = ndimage.rotate(img, angle, reshape=False, cval=0) + seg = ndimage.rotate(seg, angle, reshape=False, order=0) + return img, seg + +def augment_tilt(img, seg, angle): + img = ndimage.rotate(img, angle, reshape=False, axes=(2,1), cval=0) + seg = ndimage.rotate(seg, angle, reshape=False, axes=(2,1), order=0) + return img, seg + +def resample( + image: sitk.Image, min_shape: list, method=sitk.sitkLinear, + new_spacing: list=[1, 1, 3.6] + ) -> sitk.Image: + """Resamples an image to given target spacing and shape. + + Parameters: + image: Input image (SITK). + shape: Minimum output shape for the underlying array. + method: SimpleITK interpolator to use for resampling. + (e.g. sitk.sitkNearestNeighbor, sitk.sitkLinear) + new_spacing: The new spacing to resample to. + + Returns: + int: Resampled image + + """ + + # Extract size and spacing from the image + size = image.GetSize() + spacing = image.GetSpacing() + + # Determine how much larger the image will become with the new spacing + factor = [sp / new_sp for sp, new_sp in zip(spacing, new_spacing)] + + # Determine the outcome size of the image for each dimension + get_size = lambda size, factor, min_shape: max(int(size * factor), min_shape) + new_size = [get_size(sz, f, sh) for sz, f, sh in zip(size, factor, min_shape)] + + # Resample the image + resampler = sitk.ResampleImageFilter() + resampler.SetReferenceImage(image) + resampler.SetOutputSpacing(new_spacing) + resampler.SetSize(new_size) + resampler.SetInterpolator(method) + resampled_image = resampler.Execute(image) + + return resampled_image + +# Start training +def get_generator( + images: dict, + segmentations: list, + sequences: list, + shape: tuple, + indexes: list = None, + batch_size: int = 5, + shuffle: bool = False, + augmentation = True + ): + """ + Returns a (training) generator for use with model.fit(). + + Parameters: + input_modalities: List of modalty names to include. + output_modalities: Names of the target modalities. + batch_size: Number of images per batch (default: all). + indexes: Only use the specified image indexes. + shuffle: Shuffle the lists of indexes once at the beginning. + augmentation: Apply augmentation or not (bool). + """ + + num_rows = len(images) + + if indexes == None: + indexes = list(range(num_rows)) + + if type(indexes) == int: + indexes = list(range(indexes)) + + if batch_size == None: + batch_size = len(indexes) + + idx = 0 + + # Prepare empty batch placeholder with named inputs and outputs + input_batch = np.zeros((batch_size,) + shape + (len(images),)) + output_batch = np.zeros((batch_size,) + shape + (1,)) + + # Loop infinitely to keep generating batches + while True: + # Prepare each observation in a batch + for batch_idx in range(batch_size): + # Shuffle the order of images if all indexes have been seen + if idx == 0 and shuffle: + np.random.shuffle(indexes) + current_index = indexes[idx] + + # Insert the augmented images into the input batch + img_crop = [images[s][current_index] for s in sequences] + img_crop = np.stack(img_crop, axis=-1) + seg_crop = segmentations[current_index][..., np.newaxis] + if augmentation: + img_crop, seg_crop = augment(img_crop, seg_crop) + input_batch[batch_idx] = img_crop + output_batch[batch_idx] = seg_crop + + # Increase the current index and modulo by the number of rows + # so that we stay within bounds + idx = (idx + 1) % len(indexes) + + yield input_batch, output_batch + +def resample_to_reference(image, ref_img, + interpolator=sitk.sitkNearestNeighbor, + default_pixel_value=0): + resampled_img = sitk.Resample(image, ref_img, + sitk.Transform(), + interpolator, default_pixel_value, + ref_img.GetPixelID()) + return resampled_img + +def dump_dict_to_yaml( + data: dict, + target_dir: str, + filename: str = "settings") -> None: + """ Writes the given dictionary as a yaml to the target directory. + + Parameters: + `data (dict)`: dictionary of data. + `target_dir (str)`: directory where the yaml will be saved. + `` filename (str)`: name of the file without extension + """ + print("\nParameters") + for pair in data.items(): + print(f"\t{pair}") + print() + + path = f"{target_dir}/{filename}.yml" + print_(f"Wrote yaml to: {path}") + with open(path, 'w') as outfile: + yaml.dump(data, outfile, default_flow_style=False) \ No newline at end of file diff --git a/src/sfransen/DWI_exp/preprocessing_function.py b/src/sfransen/DWI_exp/preprocessing_function.py new file mode 100755 index 0000000..aba1008 --- /dev/null +++ b/src/sfransen/DWI_exp/preprocessing_function.py @@ -0,0 +1,46 @@ +import numpy as np +import SimpleITK as sitk + +from .helpers import * + +def preprocess(imgs: dict, seg: sitk.Image, + shape: tuple, spacing: tuple, to_numpy=True) -> tuple: + + # Resample all of the images to the desired voxel spacing + img_r = {k: resample(imgs[k], + min_shape=(s+1 for s in shape), + new_spacing=spacing) for k in imgs} + seg_r = resample(seg, + min_shape=(s+1 for s in shape), + new_spacing=spacing, + method=sitk.sitkNearestNeighbor) + + # Center crop one of the input images + ref_seq = [k for k in img_r.keys()][0] + ref_img = center_crop(img_r[ref_seq], shape=shape) + + # Then crop the remaining series / segmentation to match the input crop by + # transforming them on the physical space of the cropped series. + img_crop = {k: resample_to_reference( + image=img_r[k], + ref_img=ref_img, + interpolator=sitk.sitkLinear) + for k in img_r} + seg_crop = resample_to_reference( + image=seg_r, + ref_img=ref_img, + interpolator=sitk.sitkNearestNeighbor) + + # Return sitk.Image instead of numpy np.ndarray. + if not to_numpy: + return img_crop, seg_crop + + img_n = {k: sitk.GetArrayFromImage(img_crop[k]).T for k in img_crop} + seg_n = sitk.GetArrayFromImage(seg_crop).T + seg_n = np.clip(seg_n, 0., 1.) + + # Z-score normalize all images + # to do: 2*std + for seq in img_n: + img_n[seq] = (img_n[seq] - np.mean(img_n[seq])) / ( 2* np.std(img_n[seq])) + return img_n, seg_n diff --git a/src/sfransen/DWI_exp/unet.py b/src/sfransen/DWI_exp/unet.py new file mode 100755 index 0000000..999ec53 --- /dev/null +++ b/src/sfransen/DWI_exp/unet.py @@ -0,0 +1,114 @@ +from tensorflow.keras import backend as K +from tensorflow.keras import Input, Model +from tensorflow.keras.layers import Conv3D, Activation, Dense, concatenate, add +from tensorflow.keras.layers import Conv3DTranspose, LeakyReLU +from tensorflow.keras import regularizers +from tensorflow.keras.layers import GlobalAveragePooling3D, Reshape, Dense, multiply, Permute + +def squeeze_excite_block(input, ratio=8): + ''' Create a channel-wise squeeze-excite block + Args: + input: input tensor + filters: number of output filters + Returns: a keras tensor + References + - [Squeeze and Excitation Networks](https://arxiv.org/abs/1709.01507) + ''' + init = input + channel_axis = 1 if K.image_data_format() == "channels_first" else -1 + filters = init.shape[channel_axis] + se_shape = (1, 1, 1, filters) + + se = GlobalAveragePooling3D()(init) + se = Reshape(se_shape)(se) + se = Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se) + se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se) + + if K.image_data_format() == 'channels_first': + se = Permute((4, 1, 2, 3))(se) + + x = multiply([init, se]) + return x + +def build_dual_attention_unet( + input_shape, + l2_regularization = 0.0001, + ): + + def conv_layer(x, kernel_size, out_filters, strides=(1,1,1)): + x = Conv3D(out_filters, kernel_size, + strides = strides, + padding = 'same', + kernel_regularizer = regularizers.l2(l2_regularization), + kernel_initializer = 'he_normal', + use_bias = False + )(x) + return x + + def conv_block(input, out_filters, strides=(1,1,1), with_residual=False, with_se=False, activation='relu'): + # Strided convolution to convsample + x = conv_layer(input, (3,3,3), out_filters, strides) + x = Activation('relu')(x) + + # Unstrided convolution + x = conv_layer(x, (3,3,3), out_filters) + + # Add a squeeze-excite block + if with_se: + se = squeeze_excite_block(x) + x = add([x, se]) + + # Add a residual connection using a 1x1x1 convolution with strides + if with_residual: + residual = conv_layer(input, (1,1,1), out_filters, strides) + x = add([x, residual]) + + if activation == 'leaky': + x = LeakyReLU(alpha=.1)(x) + else: + x = Activation('relu')(x) + + # Activate whatever comes out of this + return x + + # If we already have only one input, no need to combine anything + inputs = Input(input_shape) + + # Downsampling + conv1 = conv_block(inputs, 16) + conv2 = conv_block(conv1, 32, strides=(2,2,1), with_residual=True, with_se=True) #72x72x18 + conv3 = conv_block(conv2, 64, strides=(2,2,1), with_residual=True, with_se=True) #36x36x18 + conv4 = conv_block(conv3, 128, strides=(2,2,2), with_residual=True, with_se=True) #18x18x9 + conv5 = conv_block(conv4, 256, strides=(2,2,2), with_residual=True, with_se=True) #9x9x9 + + # First upsampling sequence + up1_1 = Conv3DTranspose(128, (3,3,3), strides=(2,2,2), padding='same')(conv5) #18x18x9 + up1_2 = Conv3DTranspose(128, (3,3,3), strides=(2,2,2), padding='same')(up1_1) #36x36x18 + up1_3 = Conv3DTranspose(128, (3,3,3), strides=(2,2,1), padding='same')(up1_2) #72x72x18 + bridge1 = concatenate([conv4, up1_1]) #18x18x9 (128+128=256) + dec_conv_1 = conv_block(bridge1, 128, with_residual=True, with_se=True, activation='leaky') #18x18x9 + + # Second upsampling sequence + up2_1 = Conv3DTranspose(64, (3,3,3), strides=(2,2,2), padding='same')(dec_conv_1) # 36x36x18 + up2_2 = Conv3DTranspose(64, (3,3,3), strides=(2,2,1), padding='same')(up2_1) # 72x72x18 + bridge2 = concatenate([conv3, up1_2, up2_1]) # 36x36x18 (64+128+64=256) + dec_conv_2 = conv_block(bridge2, 64, with_residual=True, with_se=True, activation='leaky') + + # Final upsampling sequence + up3_1 = Conv3DTranspose(32, (3,3,3), strides=(2,2,1), padding='same')(dec_conv_2) # 72x72x18 + bridge3 = concatenate([conv2, up1_3, up2_2, up3_1]) # 72x72x18 (32+128+64+32=256) + dec_conv_3 = conv_block(bridge3, 32, with_residual=True, with_se=True, activation='leaky') + + # Last upsampling to make heatmap + up4_1 = Conv3DTranspose(16, (3,3,3), strides=(2,2,1), padding='same')(dec_conv_3) # 72x72x18 + dec_conv_4 = conv_block(up4_1, 16, with_residual=False, with_se=True, activation='leaky') #144x144x18 (16) + + # Reduce to a single output channel with a 1x1x1 convolution + single_channel = Conv3D(1, (1, 1, 1))(dec_conv_4) + + # Apply sigmoid activation to get binary prediction per voxel + act = Activation('sigmoid')(single_channel) + + # Model definition + model = Model(inputs=inputs, outputs=act) + return model \ No newline at end of file diff --git a/src/sfransen/FROC/__init__.py b/src/sfransen/FROC/__init__.py new file mode 100755 index 0000000..377a94b --- /dev/null +++ b/src/sfransen/FROC/__init__.py @@ -0,0 +1,6 @@ +from .image_utils import * +from .froc import * +from .data_utils import * +from .cal_froc_from_np import * +from .blob_preprocess import * +from .analysis_utils import * diff --git a/src/sfransen/FROC/analysis_utils.py b/src/sfransen/FROC/analysis_utils.py new file mode 100755 index 0000000..218b3a5 --- /dev/null +++ b/src/sfransen/FROC/analysis_utils.py @@ -0,0 +1,313 @@ +# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from sklearn.metrics import auc +from scipy import ndimage + +try: + import numpy.typing as npt +except ImportError: # pragma: no cover + pass + + +# Parse Detection Maps to Individual Lesions + Likelihoods +def parse_detection_map(detection_map): + # Label All Non-Connected Components in Detection Map + blobs_index, num_blobs = ndimage.label(detection_map, np.ones((3, 3, 3))) + + confidences = [] + if num_blobs > 0: + # For Each Lesion Detection + for tumor_index in range(1, num_blobs+1): + + # Extract Mask of Current Lesion + # hard_blob = np.zeros_like(blobs_index) + # hard_blob[blobs_index == tumor] = 1 + # TODO: replace above with the following? Is faster I think. + # hard_blob = (blobs_index == tumor).astype(int) + + # Extract Max Predicted Likelihood for Lesion Detection + # max_prob = np.max(hard_blob) # <- this is always 1 + # hard_blob[hard_blob > 0] = max_prob # <- this line does nothing, as hard_blob is not used + + # Store Predicted Likelihood per Lesion Detection + max_prob = detection_map[blobs_index == tumor_index].max() + confidences.append((tumor_index, max_prob)) + return confidences, blobs_index + + +# Calculate Dice Similarity Coefficient (DSC) for N-D Arrays +def calculate_dsc(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float: + epsilon = 1e-8 + dsc_num = np.sum(predictions[labels == 1]) * 2.0 + dsc_denom = np.sum(predictions) + np.sum(labels) + return float((dsc_num + epsilon) / (dsc_denom + epsilon)) + + +# Calculate Intersection over Union (IoU) for N-D Arrays +def calculate_iou(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float: + epsilon = 1e-8 + iou_num = np.sum(predictions[labels == 1]) + iou_denom = np.sum(predictions) + np.sum(labels) - iou_num + return float((iou_num + epsilon) / (iou_denom + epsilon)) + + +# Calculate Operating Points for Curve +def calculate_operating_points(y, x, op_match=None, verbose=1): + """ + Input: + - y: (monotonically increasing) performance metric, such as the True Positive Rate + - x: (monotonically increasing) performance metric, such as the False Positive Rate + - op_match: dictionary that specifies the target operating point: { + 'x': target x value, 'y': target y value + } + + Returns: + - dictionary with operating point(s): { + 'op_closest_xy_y': y_op, # y value at operating point that matches both x and y of target operating point + 'op_closest_xy_x': x_op, # x value at operating point that matches both x and y of target operating point + ... + } + """ + # TODO: currently, a lower sensitivity is preferrred over a higher sensitivity if that means the operating point is matched better. + # Would be better to go for the best sensitivity/specificity, if that can be done without hurting the other performance metric. + # In practice, this should not be an issue, as we have many points then. + y = np.array(y) + x = np.array(x) + operating_points = {} + + if not np.all(np.diff(y) >= 0) and verbose: + print("Warning: y performance metric is not monotonically increasing, this could lead to unexpected behaviour!") + if not np.all(np.diff(x) >= 0) and verbose: + print("Warning: x performance metric is not monotonically increasing, this could lead to unexpected behaviour!") + + # Grab Index of Intersection -> Compute y/TPR and x/FPR @ Index -> Store + op_best_roc_idx = np.argmin(np.abs(y - (1 - x))) + op_best_roc_y = y[op_best_roc_idx] + op_best_roc_x = x[op_best_roc_idx] + operating_points.update(dict( + op_best_roc_idx=op_best_roc_idx, + op_best_roc_y=op_best_roc_y, + op_best_roc_x=op_best_roc_x + )) + + if op_match is not None: + # calculate operating point closest to target operating point + abs_deficit_x, abs_deficit_y = None, None + optional_x_keys = ['x', 'fpr', 'FPR'] + optional_y_keys = ['y', 'tpr', 'TPR', 'sensitivity', 'sens'] + + # if the target x value is specified, calculate the difference between target and oberved value + for key in optional_x_keys: + if key in op_match: + op_match_x = op_match[key] + abs_deficit_x = np.abs(x - op_match_x) + break + + # if the target y value is specified, calculate the difference between target and oberved value + for key in optional_y_keys: + if key in op_match: + op_match_y = op_match[key] + abs_deficit_y = np.abs(y - op_match_y) + break + + # if both target x and y values are specified, calculate the difference between the target pair and observed pair + # at the best match, store the observed x and y values + if abs_deficit_x is not None and abs_deficit_y is not None: + # determine the index of the the closest point to the target pair + abs_deficit = abs_deficit_x + abs_deficit_y + op_closest_xy_idx = np.argmin(abs_deficit) + op_closest_xy_y = y[op_closest_xy_idx] + op_closest_xy_x = x[op_closest_xy_idx] + + # store + operating_points.update(dict( + op_closest_xy_idx=op_closest_xy_idx, + op_closest_xy_y=op_closest_xy_y, + op_closest_xy_x=op_closest_xy_x + )) + + # same for matching x only + if abs_deficit_x is not None: + # determine the index of the the closest point to the target value + op_closest_x_idx = np.argmin(abs_deficit_x) + op_closest_x_y = y[op_closest_x_idx] + op_closest_x_x = x[op_closest_x_idx] + + # store + operating_points.update(dict( + op_closest_x_idx=op_closest_x_idx, + op_closest_x_y=op_closest_x_y, + op_closest_x_x=op_closest_x_x + )) + + # same for matching y only + if abs_deficit_y is not None: + # determine the index of the the closest point to the target value + op_closest_y_idx = np.argmin(abs_deficit_y) + op_closest_y_y = y[op_closest_y_idx] + op_closest_y_x = x[op_closest_y_idx] + + # store + operating_points.update(dict( + op_closest_y_idx=op_closest_y_idx, + op_closest_y_y=op_closest_y_y, + op_closest_y_x=op_closest_y_x + )) + + return operating_points + + +# Calculate Statistics for Multiple Curves +def calculate_statistics(metrics, op_match=None, x_start=0., x_end=1., verbose=1): + """ + Calculate statistics, such as the area under the curve, for multiple (independent) curves. + To calculate shared statistics, the curves must be translated to a shared x domain. To + achieve this with virtually no loss of the step-like nature of curves like ROC and FROC, + the shared x values are derived from the input, and offset with ± 1e-7. + + Input: + - metrics should be a list of tuples with the y & x coordinates for each run: + [([y1, y2, y3, ...], [x1, x2, x3]), # run 1 + ([y1, y2, y3, ...], [x1, x2, x3]), # run 2 + ] + - op_match: { + 'y': value of y metric (e.g., TPR/sensitivity) to match, + 'x': value of x metric (e.g., FPR/false positive rate) to match, + } + + Note: mean and 95% CI are calculated as function of the shared x. + """ + # construct the array of shared x values + eps = 1e-10 + x_shared = np.array([xi for _, x in metrics for xi in x], dtype=np.float64) # collect list of all possible x-values + x_shared = np.ravel(x_shared) # flatten list, if necessary + x_shared = np.append(x_shared, [x_start, x_end]) # add x_start and x_end to ensure correct pAUC calculation + x_shared = np.concatenate((x_shared+eps, x_shared-eps)) + x_shared = np.unique(x_shared) # only keep unique x values + x_shared.sort() # sort in ascending order (inplace) + + # validate x_start and x_end + assert x_start < x_end, f"x_start must be smaller than x_end! Got x_start={x_start} and x_end={x_end}." + + # convert the per-model y (e.g., TPR) vs x (e.g., FPR) to a shared domain + y_shared_all = np.zeros(shape=(len(metrics), len(x_shared)), dtype=np.float32) + auroc_all = [] + individually_matched_operating_points = [] + for i, (y, x) in enumerate(metrics): + # if necessary, unpack x and y + if len(y) == 1: + y = y[0] + if len(x) == 1: + x = x[0] + + # interpolate the y values to the shared x values + y_shared_domain = np.interp(x_shared, x, y) + y_shared_all[i] = y_shared_domain + + # calculate AUROC for macro stats + mask = (x_shared >= x_start) & (x_shared <= x_end) + auc_score = auc(x_shared[mask], y_shared_domain[mask]) + auroc_all += [auc_score] + + # match operating point for each run individually + operating_points = calculate_operating_points(y=y, x=x, op_match=op_match) + individually_matched_operating_points += [operating_points] + + # calculate statistics in shared domain + y_shared_mean = np.mean(y_shared_all, axis=0) + y_shared_std = np.std(y_shared_all, axis=0) + y_shared_CI_lower = np.percentile(y_shared_all, 2.5, axis=0) + y_shared_CI_higher = np.percentile(y_shared_all, 97.5, axis=0) + auroc_mean = np.mean(auroc_all) + auroc_std = np.std(auroc_all) + + # calculate operating points in shared domain + operating_points = calculate_operating_points(y=y_shared_mean, x=x_shared, + op_match=op_match, verbose=verbose) + + # collect results + results = { + # overview statistics + 'auroc_mean': auroc_mean, + 'auroc_std': auroc_std, + 'auroc_all': auroc_all, + + # for plotting + 'x_shared': x_shared, + 'y_shared_all': y_shared_all, + 'y_shared_mean': y_shared_mean, + 'y_shared_std': y_shared_std, + 'y_shared_CI_lower': y_shared_CI_lower, + 'y_shared_CI_higher': y_shared_CI_higher, + + # individually matched operating point + 'individually_matched_operating_points': individually_matched_operating_points, + } + results.update(operating_points) + + # calculate standard deviation of each metric (op_closest_xy_y, etc.) between individual runs + individually_matched_operating_points_std = { + f"{key}_std": np.std([ + operating_point_info[key] + for operating_point_info in individually_matched_operating_points + ]) + for key in individually_matched_operating_points[0].keys() + } + results.update(individually_matched_operating_points_std) + + return results + + +# Calculate (partial) Area Under Curve (pAUC) using (x,y) coordinates from the given curve +def calculate_pAUC_from_graph(x, y, pAUC_start: float = 0.0, pAUC_end: float = 1.0, full: bool = False): + """ + Input: + For a single curve: + - x: x values of a curve (e.g., the False Positive Rate points). [x1, x2, .., xn] + - y: y values of a curve (e.g., the True Positive Rate points). [y1, y2, .., yn] + + For multiple curves: + - list of x curves, for example the x values observed across multiple runs. [[x1m1, x2m1, .., xnm1], [x1m2, x2m2, ...., xnm2], ..] + - list of y curves, for example the y values observed across multiple runs. [[y1m1, y2m1, .., ynm1], [y1m2, y2m2, ...., ynm2], ..] + + - pAUC_start: lower bound of x (e.g., FPR) to compute pAUC + - pAUC_end: higher bound of x (e.g., FPR) to compute pAUC + + Returns: + - if (full==False): List of pAUC values for each set of ([x1, ..], [y1, ..]) coordinates + - if (full==True): Metrics as returned by `calculate_statistics` [see there] + + Note: function is not specific to the FROC curve + """ + + if not isinstance(x[0], (list, np.ndarray)) or not isinstance(y[0], (list, np.ndarray)): + # Have a single set of (x,y) coordinates + + assert not isinstance(x[0], (list, np.ndarray)) and not isinstance(y[0], (list, np.ndarray)), \ + "Either provide multiple sequences of (x,y) coordinates, or a single sequence. Obtained a mix of both now. " + + # Pack coordinates in format expected by `calculate_statistics` + coordinates_joined = [(y, x)] + else: + # Have multiple sets of (x,y) coordinates + # Pack coordinates in format expected by `calculate_statistics` + coordinates_joined = list(zip(y, x)) + + # Calculate AUC in Given Ranges + results = calculate_statistics(metrics=coordinates_joined, x_start=pAUC_start, x_end=pAUC_end) + if full: + return results + return results['auroc_all'] \ No newline at end of file diff --git a/src/sfransen/FROC/blob_preprocess.py b/src/sfransen/FROC/blob_preprocess.py new file mode 100755 index 0000000..d635dbb --- /dev/null +++ b/src/sfransen/FROC/blob_preprocess.py @@ -0,0 +1,148 @@ +import numpy as np +from scipy import ndimage +from typing import List, Tuple, Optional, Union + +""" +Extract lesion candidates from a softmax prediction +Authors: anindox8, matinhz, joeranbosma +""" + + +# Preprocess Softmax Volume (Clipping, Max Confidence) +def preprocess_softmax_static(softmax: np.ndarray, + threshold: float = 0.10, + min_voxels_detection: int = 10, + max_prob_round_decimals: Optional[int] = 4) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]: + # Load and Preprocess Softmax Image + all_hard_blobs = np.zeros_like(softmax) + confidences = [] + clipped_softmax = softmax.copy() + clipped_softmax[softmax < threshold] = 0 + blobs_index, num_blobs = ndimage.label(clipped_softmax, np.ones((3, 3, 3))) + + if num_blobs > 0: # For Each Prediction + for tumor in range(1, num_blobs+1): + # determine mask for current lesion + hard_mask = np.zeros_like(blobs_index) + hard_mask[blobs_index == tumor] = 1 + + if np.count_nonzero(hard_mask) <= min_voxels_detection: + # remove tiny detection of <= 0.009 cm^3 + blobs_index[hard_mask.astype(bool)] = 0 + continue + + # add sufficiently sized detection + hard_blob = hard_mask * clipped_softmax + max_prob = np.max(hard_blob) + if max_prob_round_decimals is not None: + max_prob = np.round(max_prob, max_prob_round_decimals) + hard_blob[hard_blob > 0] = max_prob + all_hard_blobs += hard_blob + confidences.append((tumor, max_prob)) + return all_hard_blobs, confidences, blobs_index + + +def preprocess_softmax_dynamic(softmax: np.ndarray, + min_voxels_detection: int = 10, + num_lesions_to_extract: int = 5, + dynamic_threshold_factor: float = 2.5, + max_prob_round_decimals: Optional[int] = None, + remove_adjacent_lesion_candidates: bool = True, + max_prob_failsafe_stopping_threshold: float = 0.01) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]: + """ + Generate detection proposals using a dynamic threshold to determine the location and size of lesions. + Author: Joeran Bosma + """ + working_softmax = softmax.copy() + dynamic_hard_blobs = np.zeros_like(softmax) + confidences: List[Tuple[int, float]] = [] + dynamic_indexed_blobs = np.zeros_like(softmax, dtype=int) + + while len(confidences) < num_lesions_to_extract: + tumor_index = 1 + len(confidences) + + # determine max. softmax + max_prob = np.max(working_softmax) + + if max_prob < max_prob_failsafe_stopping_threshold: + break + + # set dynamic threshold to half the max + threshold = max_prob / dynamic_threshold_factor + + # extract blobs for dynamix threshold + all_hard_blobs, _, _ = preprocess_softmax_static(working_softmax, threshold=threshold, + min_voxels_detection=min_voxels_detection, + max_prob_round_decimals=max_prob_round_decimals) + + # select blob with max. confidence + # note: the max_prob is re-computed in the (unlikely) case that the max. prob + # was inside a 'lesion candidate' of less than min_voxels_detection, which is + # thus removed in preprocess_softmax_static. + max_prob = np.max(all_hard_blobs) + mask_current_lesion = (all_hard_blobs == max_prob) + + # ensure that mask is only a single lesion candidate (this assumption fails when multiple lesions have the same max. prob) + mask_current_lesion_indexed, _ = ndimage.label(mask_current_lesion, np.ones((3, 3, 3))) + mask_current_lesion = (mask_current_lesion_indexed == 1) + + # create mask with its confidence + hard_blob = (all_hard_blobs * mask_current_lesion) + + # Detect whether the extractted mask is a ring/hollow sphere + # around an existing lesion candidate. For confident lesions, + # the surroundings of the prediction are still quite confident, + # and can become a second 'detection'. For an # example, please + # see extracted lesion candidates nr. 4 and 5 at: + # https://repos.diagnijmegen.nl/trac/ticket/9299#comment:49 + # Detection method: grow currently extracted lesions by one voxel, + # and check if they overlap with the current extracted lesion. + extracted_lesions_grown = ndimage.morphology.binary_dilation(dynamic_hard_blobs > 0) + current_lesion_has_overlap = (mask_current_lesion & extracted_lesions_grown).any() + + # Check if lesion candidate should be retained + if (not remove_adjacent_lesion_candidates) or (not current_lesion_has_overlap): + # store extracted lesion + dynamic_hard_blobs += hard_blob + confidences += [(tumor_index, max_prob)] + dynamic_indexed_blobs += (mask_current_lesion * tumor_index) + + # remove extracted lesion from working-softmax + working_softmax = (working_softmax * (~mask_current_lesion)) + + return dynamic_hard_blobs, confidences, dynamic_indexed_blobs + + +def preprocess_softmax(softmax: np.ndarray, + threshold: Union[str, float] = 0.10, + min_voxels_detection: int = 10, + num_lesions_to_extract: int = 5, + dynamic_threshold_factor: float = 2.5, + max_prob_round_decimals: Optional[int] = None, + remove_adjacent_lesion_candidates: bool = True) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]: + """ + Generate detection proposals using a dynamic or static threshold to determine the size of lesions. + """ + if threshold == 'dynamic': + all_hard_blobs, confidences, indexed_pred = preprocess_softmax_dynamic(softmax, min_voxels_detection=min_voxels_detection, + dynamic_threshold_factor=dynamic_threshold_factor, + num_lesions_to_extract=num_lesions_to_extract, + remove_adjacent_lesion_candidates=remove_adjacent_lesion_candidates, + max_prob_round_decimals=max_prob_round_decimals) + elif threshold == 'dynamic-fast': + # determine max. softmax and set a per-case 'static' threshold based on that + max_prob = np.max(softmax) + threshold = float(max_prob / dynamic_threshold_factor) + all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold, + min_voxels_detection=min_voxels_detection, + max_prob_round_decimals=max_prob_round_decimals) + else: + threshold = float(threshold) # convert threshold to float, if it wasn't already + all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold, + min_voxels_detection=min_voxels_detection, + max_prob_round_decimals=max_prob_round_decimals) + + + + + return all_hard_blobs, confidences, indexed_pred \ No newline at end of file diff --git a/src/sfransen/FROC/cal_froc_from_np.py b/src/sfransen/FROC/cal_froc_from_np.py new file mode 100755 index 0000000..47d28fb --- /dev/null +++ b/src/sfransen/FROC/cal_froc_from_np.py @@ -0,0 +1,642 @@ +import argparse +import multiprocessing +import numpy as np +import SimpleITK as sitk +import time +import os +import itertools +import concurrent.futures + +from typing import List +import sys +sys.path.append('./../code') +from utils_quintin import * + +# from fastMRI_PCa.visualization.visualize import write_array2nifti_t2 +from tensorflow.keras.models import load_model +from functools import partial +from focal_loss import BinaryFocalLoss +from scipy import ndimage +from pathlib import Path +from tqdm import tqdm +from concurrent.futures import ThreadPoolExecutor +from sklearn.metrics import roc_curve, auc + +# from fastMRI_PCa.visualization import save_slice_3d +# from fastMRI_PCa.utils import read_yaml_to_dict, list_from_file, print_stats_np +# from fastMRI_PCa.utils import print_p, get_rand_exp_decay_mask +# from fastMRI_PCa.data import resample, center_crop, normalize, undersample, binarize_s +# from fastMRI_PCa.models import ssim_loss, ssim_metric, psnr_metric + +from typing import List, Tuple, Dict, Any, Union, Optional, Callable, Iterable, Hashable, Sized + +try: + import numpy.typing as npt +except ImportError: + pass + + + +############################### PREPROCESSING BLOBS ######################### +""" +Extract lesion candidates from a softmax prediction +Authors: anindox8, matinhz, joeranbosma +""" + + +# Preprocess Softmax Volume (Clipping, Max Confidence) +def preprocess_softmax_static(softmax: np.ndarray, + threshold: float = 0.10, + min_voxels_detection: int = 10, + max_prob_round_decimals: Optional[int] = 4) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]: + # Load and Preprocess Softmax Image + all_hard_blobs = np.zeros_like(softmax) + confidences = [] + clipped_softmax = softmax.copy() + clipped_softmax[softmax < threshold] = 0 + blobs_index, num_blobs = ndimage.label(clipped_softmax, np.ones((3, 3, 3))) + + if num_blobs > 0: # For Each Prediction + for tumor in range(1, num_blobs+1): + # determine mask for current lesion + hard_mask = np.zeros_like(blobs_index) + hard_mask[blobs_index == tumor] = 1 + + if np.count_nonzero(hard_mask) <= min_voxels_detection: + # remove tiny detection of <= 0.009 cm^3 + blobs_index[hard_mask.astype(bool)] = 0 + continue + + # add sufficiently sized detection + hard_blob = hard_mask * clipped_softmax + max_prob = np.max(hard_blob) + if max_prob_round_decimals is not None: + max_prob = np.round(max_prob, max_prob_round_decimals) + hard_blob[hard_blob > 0] = max_prob + all_hard_blobs += hard_blob + confidences.append((tumor, max_prob)) + return all_hard_blobs, confidences, blobs_index + + +def preprocess_softmax_dynamic(softmax: np.ndarray, + min_voxels_detection: int = 10, + num_lesions_to_extract: int = 5, + dynamic_threshold_factor: float = 2.5, + max_prob_round_decimals: Optional[int] = None, + remove_adjacent_lesion_candidates: bool = True, + max_prob_failsafe_stopping_threshold: float = 0.01) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]: + """ + Generate detection proposals using a dynamic threshold to determine the location and size of lesions. + Author: Joeran Bosma + """ + working_softmax = softmax.copy() + dynamic_hard_blobs = np.zeros_like(softmax) + confidences: List[Tuple[int, float]] = [] + dynamic_indexed_blobs = np.zeros_like(softmax, dtype=int) + + while len(confidences) < num_lesions_to_extract: + tumor_index = 1 + len(confidences) + + # determine max. softmax + max_prob = np.max(working_softmax) + + if max_prob < max_prob_failsafe_stopping_threshold: + break + + # set dynamic threshold to half the max + threshold = max_prob / dynamic_threshold_factor + + # extract blobs for dynamix threshold + all_hard_blobs, _, _ = preprocess_softmax_static(working_softmax, threshold=threshold, + min_voxels_detection=min_voxels_detection, + max_prob_round_decimals=max_prob_round_decimals) + + # select blob with max. confidence + # note: the max_prob is re-computed in the (unlikely) case that the max. prob + # was inside a 'lesion candidate' of less than min_voxels_detection, which is + # thus removed in preprocess_softmax_static. + max_prob = np.max(all_hard_blobs) + mask_current_lesion = (all_hard_blobs == max_prob) + + # ensure that mask is only a single lesion candidate (this assumption fails when multiple lesions have the same max. prob) + mask_current_lesion_indexed, _ = ndimage.label(mask_current_lesion, np.ones((3, 3, 3))) + mask_current_lesion = (mask_current_lesion_indexed == 1) + + # create mask with its confidence + hard_blob = (all_hard_blobs * mask_current_lesion) + + # Detect whether the extractted mask is a ring/hollow sphere + # around an existing lesion candidate. For confident lesions, + # the surroundings of the prediction are still quite confident, + # and can become a second 'detection'. For an # example, please + # see extracted lesion candidates nr. 4 and 5 at: + # https://repos.diagnijmegen.nl/trac/ticket/9299#comment:49 + # Detection method: grow currently extracted lesions by one voxel, + # and check if they overlap with the current extracted lesion. + extracted_lesions_grown = ndimage.morphology.binary_dilation(dynamic_hard_blobs > 0) + current_lesion_has_overlap = (mask_current_lesion & extracted_lesions_grown).any() + + # Check if lesion candidate should be retained + if (not remove_adjacent_lesion_candidates) or (not current_lesion_has_overlap): + # store extracted lesion + dynamic_hard_blobs += hard_blob + confidences += [(tumor_index, max_prob)] + dynamic_indexed_blobs += (mask_current_lesion * tumor_index) + + # remove extracted lesion from working-softmax + working_softmax = (working_softmax * (~mask_current_lesion)) + + return dynamic_hard_blobs, confidences, dynamic_indexed_blobs + + +def preprocess_softmax(softmax: np.ndarray, + threshold: Union[str, float] = 0.10, + min_voxels_detection: int = 10, + num_lesions_to_extract: int = 5, + dynamic_threshold_factor: float = 2.5, + max_prob_round_decimals: Optional[int] = None, + remove_adjacent_lesion_candidates: bool = True) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]: + """ + Generate detection proposals using a dynamic or static threshold to determine the size of lesions. + """ + if threshold == 'dynamic': + all_hard_blobs, confidences, indexed_pred = preprocess_softmax_dynamic(softmax, min_voxels_detection=min_voxels_detection, + dynamic_threshold_factor=dynamic_threshold_factor, + num_lesions_to_extract=num_lesions_to_extract, + remove_adjacent_lesion_candidates=remove_adjacent_lesion_candidates, + max_prob_round_decimals=max_prob_round_decimals) + elif threshold == 'dynamic-fast': + # determine max. softmax and set a per-case 'static' threshold based on that + max_prob = np.max(softmax) + threshold = float(max_prob / dynamic_threshold_factor) + all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold, + min_voxels_detection=min_voxels_detection, + max_prob_round_decimals=max_prob_round_decimals) + else: + threshold = float(threshold) # convert threshold to float, if it wasn't already + all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold, + min_voxels_detection=min_voxels_detection, + max_prob_round_decimals=max_prob_round_decimals) + + return all_hard_blobs, confidences, indexed_pred + + +################################################################################ + + +# Evaluate all cases +def evaluate( + y_true: np.ndarray, + y_pred: np.ndarray, + min_overlap=0.10, + overlap_func: str = 'DSC', + case_confidence: str = 'max', + multiple_lesion_candidates_selection_criteria='overlap', + allow_unmatched_candidates_with_minimal_overlap=True, + flat: Optional[bool] = None + ) -> Dict[str, Any]: + + # Make list out of numpy array so that it can be mapped in parallel with multiple CPUs. + y_true_list = y_true #[y_true[mri_idx] for mri_idx in range(y_true.shape[0])] + y_pred_list = y_pred #[y_pred[mri_idx] for mri_idx in range(y_pred.shape[0])] + + roc_true, roc_pred = {}, {} + y_list = [] + num_lesions = 0 + + subject_idxs = list(range(len(y_true_list))) + N_CPUS = 2 + with ThreadPoolExecutor(max_workers=N_CPUS) as pool: + # define the functions that need to be processed: compute_pred_vector, with each individual + # detection_map prediction, ground truth label and parameters + future_to_args = { + pool.submit(evaluate_case, + y_true, + y_pred, + min_overlap=min_overlap, + overlap_func=overlap_func, + multiple_lesion_candidates_selection_criteria=multiple_lesion_candidates_selection_criteria, + allow_unmatched_candidates_with_minimal_overlap=allow_unmatched_candidates_with_minimal_overlap): idx + for (y_true, y_pred, idx) in zip(y_true_list, y_pred_list, subject_idxs) + } + + # process the cases in parallel + iterator = concurrent.futures.as_completed(future_to_args) + iterator = tqdm(iterator, desc='Computing FROC', total=len(y_true_list)) + for future in iterator: + try: + res = future.result() + except Exception as e: + print(f"Exception: {e}") + else: + # unpack results + y_list_pat, num_lesions_gt = res + # note: y_list_pat contains: is_lesion, confidence[, Dice, gt num voxels] + + # aggregate results + idx = future_to_args[future] + roc_true[idx] = np.max([a[0] for a in y_list_pat]) + if case_confidence == 'max': + # take highest lesion confidence as case-level confidence + roc_pred[idx] = np.max([a[1] for a in y_list_pat]) + elif case_confidence == 'bayesian': + # if a_i is the probability the i-th lesion is csPCa, then the case-level + # probability to have any csPCa lesion is 1 - Π_i{ 1 - a_i} + roc_pred[idx] = 1 - np.prod([(1-a[1]) for a in y_list_pat]) + else: + raise ValueError(f"Patient confidence calculation method not recognised. Got: {case_confidence}.") + + # accumulate outputs + y_list += y_list_pat + num_lesions += num_lesions_gt + + # calculate statistics + num_patients = len(roc_true) + + # get lesion-level results + sensitivity, FP_per_case, thresholds, num_lesions = froc_from_lesion_evaluations( + y_list=y_list, num_patients=num_patients + ) + + # calculate recall, precision and average precision + AP, precision, recall, _ = ap_from_lesion_evaluations(y_list, thresholds=thresholds) + + # calculate case-level AUROC + fpr, tpr, _ = roc_curve(y_true=[roc_true[s] for s in subject_idxs], + y_score=[roc_pred[s] for s in subject_idxs], + pos_label=1) + auc_score = auc(fpr, tpr) + + flat = True + if flat: + # flatten roc_true and roc_pred + roc_true_flat = [roc_true[s] for s in subject_idxs] + roc_pred_flat = [roc_pred[s] for s in subject_idxs] + + metrics = { + "FP_per_case": convert_np_to_list(FP_per_case), + "sensitivity": convert_np_to_list(sensitivity), + "thresholds": convert_np_to_list(thresholds), + "num_lesions": int(num_lesions), + "num_patients": int(num_patients), + "roc_true": convert_np_to_list(roc_true_flat if flat else roc_true), + "roc_pred": convert_np_to_list(roc_pred_flat if flat else roc_pred), + "AP": float(AP), + "precision": convert_np_to_list(precision), + "recall": convert_np_to_list(recall), + + # patient-level predictions + 'auroc': float(auc_score), + 'tpr': convert_np_to_list(tpr), + 'fpr': convert_np_to_list(fpr), + } + + return metrics + + +def convert_np_to_list(flat_numpy_arr): + ans = [] + for elem in flat_numpy_arr: + ans.append(float(elem)) + return ans + + +# Calculate FROC metrics (FP rate, sensitivity) +def froc_from_lesion_evaluations(y_list, num_patients, thresholds=None): + # calculate counts + TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations( + y_list=y_list, thresholds=thresholds + ) + + # calculate FROC metrics from counts + sensitivity = TP / num_lesions if num_lesions > 0 else np.nan + FP_per_case = FP / num_patients + + return sensitivity, FP_per_case, thresholds, num_lesions + + +def ap_from_lesion_evaluations(y_list, thresholds=None): + # calculate counts + TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations( + y_list=y_list, thresholds=thresholds + ) + + # calculate precision (lesion-level) + precision = TP / (TP + FP) + precision = np.append(precision, [0]) + + # calculate recall (lesion-level) + FN = num_lesions - TP + recall = TP / (TP + FN) + recall = np.append(recall, recall[-1:]) + + # calculate average precission (lesion-level) + AP = np.trapz(y=precision, x=recall) + + return AP, precision, recall, thresholds + + +# Calculate macro metrics (true positives (TP), false positives (FP)) +def counts_from_lesion_evaluations( + y_list: List[Tuple[int, float, float]], + thresholds: "Optional[npt.NDArray[np.float64]]" = None +) -> "Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32], npt.NDArray[np.float64], int]": + """ + Calculate true positives (TP) and false positives (FP) as function of threshold, + based on the case evaluations from `evaluate_case`. + """ + # sort predictions + y_list.sort() + + # collect targets and predictions + y_true: "npt.NDArray[np.float64]" = np.array([target for target, *_ in y_list]) + y_pred: "npt.NDArray[np.float64]" = np.array([pred for _, pred, *_ in y_list]) + + # calculate number of lesions + num_lesions = y_true.sum() + + if thresholds is None: + # collect thresholds for FROC analysis + thresholds = np.unique(y_pred) + thresholds[::-1].sort() # sort thresholds in descending order (inplace) + + # for >10,000 thresholds: resample to 10,000 unique thresholds, while also + # keeping all thresholds higher than 0.8 and the first 20 thresholds + if len(thresholds) > 10_000: + rng = np.arange(1, len(thresholds), len(thresholds)/10_000, dtype=np.int32) + st = [thresholds[i] for i in rng] + low_thresholds = thresholds[-20:] + thresholds = np.array([t for t in thresholds if t > 0.8 or t in st or t in low_thresholds]) + + # define placeholders + FP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32) + TP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32) + + # for each threshold: count FPs and calculate the sensitivity + for i, th in enumerate(thresholds): + if th > 0: + y_pred_thresholded = (y_pred >= th).astype(int) + tp = np.sum(y_true*y_pred_thresholded) + fp = np.sum(y_pred_thresholded - y_true*y_pred_thresholded) + + # update FROC with new point + FP[i] = fp + TP[i] = tp + else: + # extend FROC curve to infinity + TP[i] = TP[-2] + FP[i] = np.inf + + return TP, FP, thresholds, num_lesions + + +# Compute base prediction metrics TP/FP/FN with associated model confidences +def evaluate_case( + y_true: np.ndarray, + y_pred: np.ndarray, + min_overlap: float = 0.10, + overlap_func: str = 'DSC', + multiple_lesion_candidates_selection_criteria: str = 'overlap', + allow_unmatched_candidates_with_minimal_overlap: bool = True + ) -> Tuple[List[Tuple[int, float, float]], int]: + """ + Gather the list of lesion candidates, and classify in TP/FP/FN. + - multiple_lesion_candidates_selection_criteria: when multiple lesion candidates have overlap with the same + ground truth mask, use 'overlap' or 'confidence' to choose + which lesion is matched against the ground truth mask. + + Returns: + - a list of tuples with: + (is_lesion, prediction confidence, Dice similarity coefficient, number of voxels in label) + - number of ground truth lesions + """ + y_list: List[Tuple[int, float, float]] = [] + + if overlap_func == 'IoU': + overlap_func = calculate_iou + elif overlap_func == 'DSC': + overlap_func = calculate_dsc + else: + raise ValueError(f"Overlap function with name {overlap_func} not recognized. Supported are 'IoU' and 'DSC'") + + # convert dtype to float32 + y_true = y_true.astype('int32') + y_pred = y_pred.astype('float32') + + if y_pred.shape[0] < y_true.shape[0]: + print("Warning: padding prediction to match label!") + y_pred = resize_image_with_crop_or_pad(y_pred, y_true.shape) + + + confidences, indexed_pred = parse_detection_map(y_pred) + + lesion_candidates_best_overlap: Dict[str, float] = {} + + if y_true.any(): + # for each malignant scan + labeled_gt, num_gt_lesions = ndimage.label(y_true, np.ones((3, 3, 3))) + + for lesiong_id in range(1, num_gt_lesions+1): + # for each lesion in ground-truth (GT) label + gt_lesion_mask = (labeled_gt == lesiong_id) + + # collect indices of lesion candidates that have any overlap with the current GT lesion + overlapping_lesion_candidate_indices = set(np.unique(indexed_pred[gt_lesion_mask])) + overlapping_lesion_candidate_indices -= {0} # remove index 0, if present + + # collect lesion candidates for current GT lesion + lesion_candidates_for_target_gt: List[Dict[str, Union[int, float]]] = [] + for lesion_candidate_id, lesion_confidence in confidences: + if lesion_candidate_id in overlapping_lesion_candidate_indices: + # calculate overlap between lesion candidate and GT mask + lesion_pred_mask = (indexed_pred == lesion_candidate_id) + overlap_score = overlap_func(lesion_pred_mask, gt_lesion_mask) + + # keep track of the highest overlap a lesion candidate has with any GT lesion + lesion_candidates_best_overlap[lesion_candidate_id] = max( + overlap_score, lesion_candidates_best_overlap.get(lesion_candidate_id, 0) + ) + + # store lesion candidate info for current GT mask + lesion_candidates_for_target_gt.append({ + 'id': lesion_candidate_id, + 'confidence': lesion_confidence, + 'overlap': overlap_score, + }) + print(lesion_candidates_for_target_gt) + + if len(lesion_candidates_for_target_gt) == 0: + # no lesion candidate matched with GT mask. Add FN. + y_list.append((1, 0., 0.)) + elif len(lesion_candidates_for_target_gt) == 1: + # single lesion candidate overlapped with GT mask. Add TP if overlap is sufficient, or FN otherwise. + candidate_info = lesion_candidates_for_target_gt[0] + lesion_pred_mask = (indexed_pred == candidate_info['id']) + + if candidate_info['overlap'] > min_overlap: + # overlap between lesion candidate and GT mask is sufficient, add TP + indexed_pred[lesion_pred_mask] = 0 # remove lesion candidate after assignment + y_list.append((1, candidate_info['confidence'], candidate_info['overlap'])) + else: + # overlap between lesion candidate and GT mask is insufficient, add FN + y_list.append((1, 0., 0.)) + else: + # multiple predictions for current GT lesion + # sort lesion candidates based on overlap or confidence + key = multiple_lesion_candidates_selection_criteria + lesion_candidates_for_target_gt = sorted(lesion_candidates_for_target_gt, key=lambda x: x[key], reverse=True) + + gt_lesion_matched = False + for candidate_info in lesion_candidates_for_target_gt: + lesion_pred_mask = (indexed_pred == candidate_info['id']) + + if candidate_info['overlap'] > min_overlap: + indexed_pred[lesion_pred_mask] = 0 + y_list.append((1, candidate_info['confidence'], candidate_info['overlap'])) + gt_lesion_matched = True + break + + if not gt_lesion_matched: + # ground truth lesion not matched to a lesion candidate. Add FN. + y_list.append((1, 0., 0.)) + + # Remaining lesions are FPs + remaining_lesions = set(np.unique(indexed_pred)) + remaining_lesions -= {0} # remove index 0, if present + for lesion_candidate_id, lesion_confidence in confidences: + if lesion_candidate_id in remaining_lesions: + overlap_score = lesion_candidates_best_overlap.get(lesion_candidate_id, 0) + if allow_unmatched_candidates_with_minimal_overlap and overlap_score > min_overlap: + # The lesion candidate was not matched to a GT lesion, but did have overlap > min_overlap + # with a GT lesion. The GT lesion is, however, matched to another lesion candidate. + # In this operation mode, this lesion candidate is not considered as a false positive. + pass + else: + y_list.append((0, lesion_confidence, 0.)) # add FP + + else: + # for benign case, all predictions are FPs + num_gt_lesions = 0 + if len(confidences) > 0: + for _, lesion_confidence in confidences: + y_list.append((0, lesion_confidence, 0.)) + else: + y_list.append((0, 0., 0.)) # avoid empty list + + return y_list, num_gt_lesions + + +# Calculate Intersection over Union (IoU) for N-D Arrays +def calculate_iou(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float: + epsilon = 1e-8 + iou_num = np.sum(predictions[labels == 1]) + iou_denom = np.sum(predictions) + np.sum(labels) - iou_num + return float((iou_num + epsilon) / (iou_denom + epsilon)) + + +# Calculate Dice Similarity Coefficient (DSC) for N-D Arrays +def calculate_dsc(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float: + epsilon = 1e-8 + dsc_num = np.sum(predictions[labels == 1]) * 2.0 + dsc_denom = np.sum(predictions) + np.sum(labels) + return float((dsc_num + epsilon) / (dsc_denom + epsilon)) + + +# Resize images (scans/predictions/labels) by cropping and/or padding [Ref: https://github.com/DLTK/DLTK] +def resize_image_with_crop_or_pad(image, img_size=(64, 64, 64), **kwargs): + assert isinstance(image, np.ndarray) + assert (image.ndim - 1 == len(img_size) or image.ndim == len(img_size)), \ + "Target size doesn't fit image size" + + rank = len(img_size) # image dimensions + + # placeholders for new shape + from_indices = [[0, image.shape[dim]] for dim in range(rank)] + to_padding = [[0, 0] for _ in range(rank)] + slicer = [slice(None)] * rank + + # for each dimension, determine process (cropping or padding) + for i in range(rank): + if image.shape[i] < img_size[i]: + to_padding[i][0] = (img_size[i] - image.shape[i]) // 2 + to_padding[i][1] = img_size[i] - image.shape[i] - to_padding[i][0] + else: + from_indices[i][0] = int(np.floor((image.shape[i] - img_size[i]) / 2.)) + from_indices[i][1] = from_indices[i][0] + img_size[i] + + # create slicer object to crop/leave each dimension + slicer[i] = slice(from_indices[i][0], from_indices[i][1]) + + # pad cropped image to extend missing dimension + return np.pad(image[tuple(slicer)], to_padding, **kwargs) + + +# Parse Detection Maps to Individual Lesions + Likelihoods +def parse_detection_map(detection_map): + # Label All Non-Connected Components in Detection Map + blobs_index, num_blobs = ndimage.label(detection_map, np.ones((3, 3, 3))) + + confidences = [] + if num_blobs > 0: + # For Each Lesion Detection + for tumor_index in range(1, num_blobs+1): + + # Extract Mask of Current Lesion + # hard_blob = np.zeros_like(blobs_index) + # hard_blob[blobs_index == tumor] = 1 + # TODO: replace above with the following? Is faster I think. + # hard_blob = (blobs_index == tumor).astype(int) + + # Extract Max Predicted Likelihood for Lesion Detection + # max_prob = np.max(hard_blob) # <- this is always 1 + # hard_blob[hard_blob > 0] = max_prob # <- this line does nothing, as hard_blob is not used + + # Store Predicted Likelihood per Lesion Detection + max_prob = detection_map[blobs_index == tumor_index].max() + confidences.append((tumor_index, max_prob)) + return confidences, blobs_index + +############################### CONSTANTS #################################### +# Quitin code removed +############################### CONSTANTS #################################### + +def move_dims(arr): + # UMCG numpy dimensions convention: dims = (batch, width, heigth, depth) + # Joeran numpy dimensions convention: dims = (batch, depth, heigth, width) + arr = np.moveaxis(arr, 3, 1) # Joeran has his numpy arrays ordered differently. + arr = np.moveaxis(arr, 3, 2) + return arr + +############################### CONSTANTS #################################### + + + + +# # Reconstruction model directory +# RECON_DIR = "11_new_recon_multiprocess" +# T2_MODEL_PATH = f"models/prelims/{RECON_DIR}/best-direct-fold0.h5" + +# # Path to file with train, validation and test indexes. +# IDX_YAML_PATH = r"data/path_lists/pirads_4plus/train_val_test_idxs.yml" + +# # Diagnostic models +# diag_models = [r"models/14_long_diag/best-direct-fold0.h5", +# r"models/15_long_diag_early_stopping/best-direct-fold0.h5"] + + +# # x_true_val = np.load("temp/x_true_val.npy") +# print("loading numpy arrays") +# y_true_val = np.squeeze(np.load("temp/y_true_val_label.npy")) +# y_pred_val = np.squeeze(np.load("temp/y_pred_val_detection_map.npy")) +# y_true_val = move_dims(y_true_val) +# y_pred_val = move_dims(y_pred_val) + +# # Preprocess the blobs individually +# for mri_idx in range(y_pred_val.shape[0]): +# print(f"preprocessing mri idx {mri_idx}") +# y_pred_val[mri_idx] = preprocess_softmax(y_pred_val[mri_idx], threshold="dynamic")[0] + +# print(f"Start FROC evaluation.") +# metrics = evaluate(y_true=y_true_val, y_pred=y_pred_val) + +# dump_dict_to_yaml(metrics, 'temp', "froc_metrics", verbose=True) \ No newline at end of file diff --git a/src/sfransen/FROC/data_utils.py b/src/sfransen/FROC/data_utils.py new file mode 100755 index 0000000..688c0f5 --- /dev/null +++ b/src/sfransen/FROC/data_utils.py @@ -0,0 +1,39 @@ +# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import json + + +def save_metrics(metrics, file_path=None): + # convert dtypes to stock Python + save_metrics = sterilize(metrics) + + # save metrics using safe file write + file_path_tmp = file_path + '.tmp' + with open(file_path_tmp, 'w') as fp: + json.dump(save_metrics, fp, indent=4) + os.rename(file_path_tmp, file_path) + + +def sterilize(obj): + if isinstance(obj, dict): + return {k: sterilize(v) for k, v in obj.items()} + elif isinstance(obj, (list, tuple, np.ndarray)): + return [sterilize(v) for v in obj] + elif isinstance(obj, (str, int, bool, float)): + return obj + else: + return obj.__repr__() \ No newline at end of file diff --git a/src/sfransen/FROC/froc.py b/src/sfransen/FROC/froc.py new file mode 100755 index 0000000..16c8297 --- /dev/null +++ b/src/sfransen/FROC/froc.py @@ -0,0 +1,505 @@ +# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function +import os +import numpy as np +from tqdm import tqdm +from scipy import ndimage +from sklearn.metrics import roc_curve, auc +import concurrent.futures +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path +import itertools + +from typing import List, Tuple, Dict, Any, Union, Optional, Callable, Iterable, Hashable, Sized +try: + import numpy.typing as npt +except ImportError: + pass + +from image_utils import ( + resize_image_with_crop_or_pad, read_label, read_prediction +) +from analysis_utils import ( + parse_detection_map, calculate_iou, calculate_dsc +) + + +# Compute base prediction metrics TP/FP/FN with associated model confidences +def evaluate_case( + detection_map: "Union[npt.NDArray[np.float32], str]", + label: "Union[npt.NDArray[np.int32], str]", + min_overlap: float = 0.10, + overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU', + multiple_lesion_candidates_selection_criteria: str = 'overlap', + allow_unmatched_candidates_with_minimal_overlap: bool = True +) -> Tuple[List[Tuple[int, float, float]], int]: + """ + Gather the list of lesion candidates, and classify in TP/FP/FN. + - multiple_lesion_candidates_selection_criteria: when multiple lesion candidates have overlap with the same + ground truth mask, use 'overlap' or 'confidence' to choose + which lesion is matched against the ground truth mask. + + Returns: + - a list of tuples with: + (is_lesion, prediction confidence, Dice similarity coefficient, number of voxels in label) + - number of ground truth lesions + """ + y_list: List[Tuple[int, float, float]] = [] + if isinstance(label, str): + label = read_label(label) + if isinstance(detection_map, str): + detection_map = read_prediction(detection_map) + if overlap_func == 'IoU': + overlap_func = calculate_iou + elif overlap_func == 'DSC': + overlap_func = calculate_dsc + else: + raise ValueError(f"Overlap function with name {overlap_func} not recognized. Supported are 'IoU' and 'DSC'") + + # convert dtype to float32 + label = label.astype('int32') + detection_map = detection_map.astype('float32') + + if detection_map.shape[0] < label.shape[0]: + print("Warning: padding prediction to match label!") + detection_map = resize_image_with_crop_or_pad(detection_map, label.shape) + + confidences, indexed_pred = parse_detection_map(detection_map) + + lesion_candidates_best_overlap: Dict[str, float] = {} + + # note to stefan: check wether the if statements are correct and that the append goes correct + if label.any(): + # for each malignant scan + labeled_gt, num_gt_lesions = ndimage.label(label, np.ones((3, 3, 3))) + # print("test3, werkt if label.any", num_gt_lesions) WE + for lesiong_id in range(1, num_gt_lesions+1): + # for each lesion in ground-truth (GT) label + gt_lesion_mask = (labeled_gt == lesiong_id) + + # collect indices of lesion candidates that have any overlap with the current GT lesion + overlapping_lesion_candidate_indices = set(np.unique(indexed_pred[gt_lesion_mask])) + overlapping_lesion_candidate_indices -= {0} # remove index 0, if present + + # collect lesion candidates for current GT lesion + lesion_candidates_for_target_gt: List[Dict[str, Union[int, float]]] = [] + for lesion_candidate_id, lesion_confidence in confidences: + if lesion_candidate_id in overlapping_lesion_candidate_indices: + # calculate overlap between lesion candidate and GT mask + lesion_pred_mask = (indexed_pred == lesion_candidate_id) + overlap_score = overlap_func(lesion_pred_mask, gt_lesion_mask) + + # keep track of the highest overlap a lesion candidate has with any GT lesion + lesion_candidates_best_overlap[lesion_candidate_id] = max( + overlap_score, lesion_candidates_best_overlap.get(lesion_candidate_id, 0) + ) + + # store lesion candidate info for current GT mask + lesion_candidates_for_target_gt.append({ + 'id': lesion_candidate_id, + 'confidence': lesion_confidence, + 'overlap': overlap_score, + }) + print("test 4, lesion_candidates_for_target_gt:",lesion_candidates_for_target_gt) + # Min overlap wordt niet behaald: +- 0.001 + if len(lesion_candidates_for_target_gt) == 0: + # no lesion candidate matched with GT mask. Add FN. + y_list.append((1, 0., 0.)) + elif len(lesion_candidates_for_target_gt) == 1: + # single lesion candidate overlapped with GT mask. Add TP if overlap is sufficient, or FN otherwise. + candidate_info = lesion_candidates_for_target_gt[0] + lesion_pred_mask = (indexed_pred == candidate_info['id']) + + if candidate_info['overlap'] > min_overlap: + # overlap between lesion candidate and GT mask is sufficient, add TP + indexed_pred[lesion_pred_mask] = 0 # remove lesion candidate after assignment + y_list.append((1, candidate_info['confidence'], candidate_info['overlap'])) + else: + # overlap between lesion candidate and GT mask is insufficient, add FN + y_list.append((1, 0., 0.)) + else: + # multiple predictions for current GT lesion + # sort lesion candidates based on overlap or confidence + key = multiple_lesion_candidates_selection_criteria + lesion_candidates_for_target_gt = sorted(lesion_candidates_for_target_gt, key=lambda x: x[key], reverse=True) + + gt_lesion_matched = False + for candidate_info in lesion_candidates_for_target_gt: + lesion_pred_mask = (indexed_pred == candidate_info['id']) + + if candidate_info['overlap'] > min_overlap: + indexed_pred[lesion_pred_mask] = 0 + y_list.append((1, candidate_info['confidence'], candidate_info['overlap'])) + gt_lesion_matched = True + break + + if not gt_lesion_matched: + # ground truth lesion not matched to a lesion candidate. Add FN. + y_list.append((1, 0., 0.)) + + # Remaining lesions are FPs + remaining_lesions = set(np.unique(indexed_pred)) + remaining_lesions -= {0} # remove index 0, if present + for lesion_candidate_id, lesion_confidence in confidences: + if lesion_candidate_id in remaining_lesions: + overlap_score = lesion_candidates_best_overlap.get(lesion_candidate_id, 0) + if allow_unmatched_candidates_with_minimal_overlap and overlap_score > min_overlap: + # The lesion candidate was not matched to a GT lesion, but did have overlap > min_overlap + # with a GT lesion. The GT lesion is, however, matched to another lesion candidate. + # In this operation mode, this lesion candidate is not considered as a false positive. + pass + else: + y_list.append((0, lesion_confidence, 0.)) # add FP + # print("test 4, gaat alles hiernaartoe?") == JA + # print("test 3, hoe ziet y_list eruit na labels",y_list) + + else: + # for benign case, all predictions are FPs + num_gt_lesions = 0 + if len(confidences) > 0: + for _, lesion_confidence in confidences: + y_list.append((0, lesion_confidence, 0.)) + else: + y_list.append((0, 0., 0.)) # avoid empty list + + return y_list, num_gt_lesions + + +# Calculate macro metrics (true positives (TP), false positives (FP)) +def counts_from_lesion_evaluations( + y_list: List[Tuple[int, float, float]], + thresholds: "Optional[npt.NDArray[np.float64]]" = None +) -> "Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32], npt.NDArray[np.float64], int]": + """ + Calculate true positives (TP) and false positives (FP) as function of threshold, + based on the case evaluations from `evaluate_case`. + """ + # sort predictions + # print("test 4, zitten er predicties bij leasions voor sort?",y_list) + y_list.sort() + # print("test 4, zitten er predicties bij leasions na sort?",y_list,'len y_lsit:',len(y_list)) + # collect targets and predictions + y_true: "npt.NDArray[np.float64]" = np.array([target for target, *_ in y_list]) + y_pred: "npt.NDArray[np.float64]" = np.array([pred for _, pred, *_ in y_list]) + # print("test,zijn de laatste y-pred hoog?", y_pred) + # calculate number of lesions + num_lesions = y_true.sum() + + if thresholds is None: + # collect thresholds for FROC analysis + thresholds = np.unique(y_pred) + thresholds[::-1].sort() # sort thresholds in descending order (inplace) + + # for >10,000 thresholds: resample to 10,000 unique thresholds, while also + # keeping all thresholds higher than 0.8 and the first 20 thresholds + if len(thresholds) > 10_000: + rng = np.arange(1, len(thresholds), len(thresholds)/10_000, dtype=np.int32) + st = [thresholds[i] for i in rng] + low_thresholds = thresholds[-20:] + thresholds = np.array([t for t in thresholds if t > 0.8 or t in st or t in low_thresholds]) + + # define placeholders + FP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32) + TP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32) + + # for each threshold: count FPs and calculate the sensitivity + for i, th in enumerate(thresholds): + if th > 0: + y_pred_thresholded = (y_pred >= th).astype(int) + tp = np.sum(y_true*y_pred_thresholded) + fp = np.sum(y_pred_thresholded - y_true*y_pred_thresholded) + # print("test, is y_pred_thresholded altijd 0?",y_pred_thresholded) + # update FROC with new point + FP[i] = fp + TP[i] = tp + else: + # extend FROC curve to infinity + TP[i] = TP[-2] + FP[i] = np.inf + + + # print("test if tp werkt",TP) + # print("test if fp werkt",FP) + return TP, FP, thresholds, num_lesions + + +# Calculate FROC metrics (FP rate, sensitivity) +def froc_from_lesion_evaluations(y_list, num_patients, thresholds=None): + # calculate counts + TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations( + y_list=y_list, thresholds=thresholds + ) + + # calculate FROC metrics from counts + sensitivity = TP / num_lesions if num_lesions > 0 else np.nan + # print('test,Hieronder staat de tp waarde:',TP) + # print('test,Hieronder staat de num_lesions waarde:',num_lesions) + + FP_per_case = FP / num_patients + + return sensitivity, FP_per_case, thresholds, num_lesions + + +def ap_from_lesion_evaluations(y_list, thresholds=None): + # calculate counts + TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations( + y_list=y_list, thresholds=thresholds + ) + + # calculate precision (lesion-level) + precision = TP / (TP + FP) + precision = np.append(precision, [0]) + + # calculate recall (lesion-level) + FN = num_lesions - TP + recall = TP / (TP + FN) + recall = np.append(recall, recall[-1:]) + + # calculate average precission (lesion-level) + AP = np.trapz(y=precision, x=recall) + + return AP, precision, recall, thresholds + + +# Compute full FROC +def froc( + y_det: "Iterable[Union[npt.NDArray[np.float64], str, Path]]", + y_true: "Iterable[Union[npt.NDArray[np.float64], str, Path]]", + subject_list: Optional[Iterable[Hashable]] = None, + min_overlap=0.10, + overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU', + case_confidence: str = 'max', + multiple_lesion_candidates_selection_criteria='overlap', + allow_unmatched_candidates_with_minimal_overlap=True, + flat: Optional[bool] = None, + num_parallel_calls: int = 8, + verbose: int = 0, +) -> Dict[str, Any]: + """ + FROC evaluation pipeline + (written 19 January 2022 by Joeran Bosma) + + Usage: + For normal usage of the FROC evaluation pipeline, use the function `froc` with parameters + `y_det`, `y_true` and (optional) `subject_list`. Please note that this function is written + for binary 3D FROC analysis. + + - `y_det`: iterable of all detection_map volumes to evaluate. Alternatively, y_det may contain + filenames ending in .nii.gz/.mha/.mhd/.npy/.npz, which will be loaded on-the-fly. + Provide an array of shape `(num_samples, D, H, W)`, where D, H, W are the spatial + dimensions (depth, height and width). + - `y_true`: iterable of all ground truth labels. Alternatively, y_det may contain filenames + ending in .nii.gz/.mha/.mhd/.npy/.npz, which should contain binary labels and + will be loaded on-the-fly. Provide an array of the same shape as `y_det`. Use + `1` to encode ground truth lesion, and `0` to encode background. + + Additional settings: + For more control over the FROC evaluation pipeline, use: + - `min_overlap`: defines the minimal required Intersection over Union (IoU) or Dice similarity + coefficient (DSC) between a lesion candidate and ground truth lesion, to be + counted as a true positive detection. + + """ + # Initialize Lists + roc_true = {} + roc_pred = {} + y_list = [] + num_lesions = 0 + + if subject_list is None: + # generate indices to keep track of each case during multiprocessing + subject_list = itertools.count() + if flat is None: + flat = True + + with ThreadPoolExecutor(max_workers=num_parallel_calls) as pool: + # define the functions that need to be processed: compute_pred_vector, with each individual + # detection_map prediction, ground truth label and parameters + future_to_args = { + pool.submit(evaluate_case, y_pred, y_true, min_overlap=min_overlap, overlap_func=overlap_func, + multiple_lesion_candidates_selection_criteria=multiple_lesion_candidates_selection_criteria, + allow_unmatched_candidates_with_minimal_overlap=allow_unmatched_candidates_with_minimal_overlap): idx + for (y_pred, y_true, idx) in zip(y_det, y_true, subject_list) + } + + # process the cases in parallel + iterator = concurrent.futures.as_completed(future_to_args) + if verbose: + total: Optional[int] = None + if isinstance(subject_list, Sized): + total = len(subject_list) + iterator = tqdm(iterator, desc='Computing FROC', total=total) + for future in iterator: + try: + res = future.result() + except Exception as e: + print(f"Exception: {e}") + else: + # unpack results + y_list_pat, num_lesions_gt = res + # note: y_list_pat contains: is_lesion, confidence[, Dice, gt num voxels] + # print("test 3,", y_list_pat) + # aggregate results + idx = future_to_args[future] + # print("test2, indx", idx) + # test: allemaal ingelezen + roc_true[idx] = np.max([a[0] for a in y_list_pat]) + # print("test2, roc_true",roc_true) + if case_confidence == 'max': + # take highest lesion confidence as case-level confidence + roc_pred[idx] = np.max([a[1] for a in y_list_pat]) + elif case_confidence == 'bayesian': + # if a_i is the probability the i-th lesion is csPCa, then the case-level + # probability to have any csPCa lesion is 1 - Π_i{ 1 - a_i} + roc_pred[idx] = 1 - np.prod([(1-a[1]) for a in y_list_pat]) + else: + raise ValueError(f"Patient confidence calculation method not recognised. Got: {case_confidence}.") + + # accumulate outputs + y_list += y_list_pat + num_lesions += num_lesions_gt + + # print("test2,heeft y-list ook leasie pred:",y_list) + # calculate statistics + num_patients = len(roc_true) + + # get lesion-level results + sensitivity, FP_per_case, thresholds, num_lesions = froc_from_lesion_evaluations( + y_list=y_list, num_patients=num_patients + ) + + # calculate recall, precision and average precision + AP, precision, recall, _ = ap_from_lesion_evaluations(y_list, thresholds=thresholds) + + # calculate case-level AUROC + fpr, tpr, _ = roc_curve(y_true=[roc_true[s] for s in subject_list], + y_score=[roc_pred[s] for s in subject_list], + pos_label=1) + auc_score = auc(fpr, tpr) + + if flat: + # flatten roc_true and roc_pred + roc_true_flat = [roc_true[s] for s in subject_list] + roc_pred_flat = [roc_pred[s] for s in subject_list] + + metrics = { + "FP_per_case": FP_per_case, + "sensitivity": sensitivity, + "thresholds": thresholds, + "num_lesions": num_lesions, + "num_patients": num_patients, + "roc_true": (roc_true_flat if flat else roc_true), + "roc_pred": (roc_pred_flat if flat else roc_pred), + "AP": AP, + "precision": precision, + "recall": recall, + + # patient-level predictions + 'auroc': auc_score, + 'tpr': tpr, + 'fpr': fpr, + } + + return metrics + + +def froc_for_folder( + y_det_dir: Union[Path, str], + y_true_dir: Optional[Union[Path, str]] = None, + subject_list: Optional[List[str]] = None, + min_overlap: float = 0.10, + overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU', + case_confidence: str = 'max', + flat: Optional[bool] = None, + num_parallel_calls: int = 8, + verbose: int = 1 +) -> Dict[str, Any]: + """ + Perform FROC evaluation for all samples found in y_det_dir, or the samples specified in the subject_list + + Input: + - y_det_dir: path to folder containing the detection maps + - y_true_dir: (optioinal) allow labels to be stored in a different folder + - min_overlap: minimum overlap threshold + - overlap_func: intersection over union (IoU), Dice similarity coefficient (DSC), or custom function + """ + if y_true_dir is None: + y_true_dir = y_det_dir + + y_det = [] + y_true = [] + if subject_list: + # collect the detection maps and labels for each case specified in the subject list + for subject_id in subject_list: + # construct paths to detection maps and labels + # print(np.type(subject_id)) + # print(subject_list) + for postfix in [ + "_detection_map.nii.gz", "_detection_map.npy", "_detection_map.npz", + ".nii.gz", ".npy", ".npz", "_pred.nii.gz", + ]: + detection_path = os.path.join(y_det_dir, f"{subject_id}{postfix}") + if os.path.exists(detection_path): + break + + for postfix in [ + "_label.nii.gz", "label.npy", "label.npz", "_seg.nii.gz", + ]: + label_path = os.path.join(y_true_dir, f"{subject_id}{postfix}") + if os.path.exists(label_path): + break + if not os.path.exists(label_path): + assert y_true_dir != y_det_dir, f"Could not find label for {subject_id}!" + for postfix in [ + ".nii.gz", ".npy", ".npz", + ]: + label_path = os.path.join(y_true_dir, f"{subject_id}{postfix}") + if os.path.exists(label_path): + break + + # collect file paths + y_det += [detection_path] + y_true += [label_path] + else: + # collect all detection maps found in detection_map_dir + file_list = sorted(os.listdir(y_det_dir)) + subject_list = [] + if verbose >= 1: + print(f"Found {len(file_list)} files in the input directory, collecting detection_mapes with " + + "_detection_map.nii.gz and labels with _label.nii.gz..") + + # collect filenames of detection_map predictions and labels + for fn in file_list: + if '_detection_map' in fn: + y_det += [os.path.join(y_det_dir, fn)] + y_true += [os.path.join(y_true_dir, fn.replace('_detection_map', '_label'))] + subject_list += [fn] + + # ensure files exist + for detection_path in y_det: + assert os.path.exists(detection_path), f"Could not find detection map for {subject_id} at {detection_path}!" + for label_path in y_true: + assert os.path.exists(label_path), f"Could not find label for {subject_id} at {label_path}!" + + if verbose >= 1: + print(f"Found prediction and label for {len(y_det)} cases. Here are some examples:") + print(subject_list[0:5]) + + # perform FROC evaluation with compiled file lists + return froc(y_det=y_det, y_true=y_true, subject_list=subject_list, + min_overlap=min_overlap, overlap_func=overlap_func, case_confidence=case_confidence, + flat=flat, num_parallel_calls=num_parallel_calls, verbose=verbose) \ No newline at end of file diff --git a/src/sfransen/FROC/image_utils.py b/src/sfransen/FROC/image_utils.py new file mode 100755 index 0000000..514a466 --- /dev/null +++ b/src/sfransen/FROC/image_utils.py @@ -0,0 +1,83 @@ +# Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import SimpleITK as sitk +from pathlib import Path + +from typing import Union +try: + import numpy.typing as npt +except ImportError: + pass + + +# Resize images (scans/predictions/labels) by cropping and/or padding [Ref: https://github.com/DLTK/DLTK] +def resize_image_with_crop_or_pad(image, img_size=(64, 64, 64), **kwargs): + assert isinstance(image, np.ndarray) + assert (image.ndim - 1 == len(img_size) or image.ndim == len(img_size)), \ + "Target size doesn't fit image size" + + rank = len(img_size) # image dimensions + + # placeholders for new shape + from_indices = [[0, image.shape[dim]] for dim in range(rank)] + to_padding = [[0, 0] for _ in range(rank)] + slicer = [slice(None)] * rank + + # for each dimension, determine process (cropping or padding) + for i in range(rank): + if image.shape[i] < img_size[i]: + to_padding[i][0] = (img_size[i] - image.shape[i]) // 2 + to_padding[i][1] = img_size[i] - image.shape[i] - to_padding[i][0] + else: + from_indices[i][0] = int(np.floor((image.shape[i] - img_size[i]) / 2.)) + from_indices[i][1] = from_indices[i][0] + img_size[i] + + # create slicer object to crop/leave each dimension + slicer[i] = slice(from_indices[i][0], from_indices[i][1]) + + # pad cropped image to extend missing dimension + return np.pad(image[tuple(slicer)], to_padding, **kwargs) + + +def read_image(path: Union[Path, str]): + """Read image, given a filepath""" + if isinstance(path, Path): + path = path.as_posix() + else: + assert isinstance(path, str), f"Unexpected path type: {type(path)}. Please provide a Path or str." + + if '.npy' in path: + return np.load(path) + elif '.nii' in path or '.mha' in path or 'mhd' in path: + return sitk.GetArrayFromImage(sitk.ReadImage(path)) + elif '.npz' in path: + return np.load(path)['softmax'].astype('float32')[1] # nnUnet format + else: + raise ValueError(f"Unexpected file path. Supported file formats: .nii(.gz), .mha, .npy and .npz. Got: {path}.") + + +def read_prediction(path: Union[Path, str]) -> "npt.NDArray[np.float32]": + """Read prediction, given a filepath""" + # read prediction and ensure correct dtype + pred: "npt.NDArray[np.float32]" = np.array(read_image(path), dtype=np.float32) + return pred + + +def read_label(path: Union[Path, str]) -> "npt.NDArray[np.int32]": + """Read label, given a filepath""" + # read label and ensure correct dtype + lbl: "npt.NDArray[np.int32]" = np.array(read_image(path), dtype=np.int32) + return lbl \ No newline at end of file diff --git a/src/sfransen/Make_overlay.py b/src/sfransen/Make_overlay.py new file mode 100755 index 0000000..0b50e2a --- /dev/null +++ b/src/sfransen/Make_overlay.py @@ -0,0 +1,139 @@ +# http://insightsoftwareconsortium.github.io/SimpleITK-Notebooks/Python_html/05_Results_Visualization.html + + +from cgitb import grey +import SimpleITK as sitk + +def make_isotropic(image, interpolator=sitk.sitkLinear, spacing=None): + """ + Many file formats (e.g. jpg, png,...) expect the pixels to be isotropic, same + spacing for all axes. Saving non-isotropic data in these formats will result in + distorted images. This function makes an image isotropic via resampling, if needed. + Args: + image (SimpleITK.Image): Input image. + interpolator: By default the function uses a linear interpolator. For + label images one should use the sitkNearestNeighbor interpolator + so as not to introduce non-existant labels. + spacing (float): Desired spacing. If none given then use the smallest spacing from + the original image. + Returns: + SimpleITK.Image with isotropic spacing which occupies the same region in space as + the input image. + """ + original_spacing = image.GetSpacing() + # Image is already isotropic, just return a copy. + if all(spc == original_spacing[0] for spc in original_spacing): + return sitk.Image(image) + # Make image isotropic via resampling. + original_size = image.GetSize() + if spacing is None: + spacing = min(original_spacing) + new_spacing = [spacing] * image.GetDimension() + new_size = [ + int(round(osz * ospc / spacing)) + for osz, ospc in zip(original_size, original_spacing) + ] + return sitk.Resample( + image, + new_size, + sitk.Transform(), + interpolator, + image.GetOrigin(), + new_spacing, + image.GetDirection(), + 0, + image.GetPixelID(), + ) + +def mask_image_multiply(mask, image): + components_per_pixel = image.GetNumberOfComponentsPerPixel() + if components_per_pixel == 1: + return mask * image + else: + return sitk.Compose( + [ + mask * sitk.VectorIndexSelectionCast(image, channel) + for channel in range(components_per_pixel) + ] + ) + + +def alpha_blend(image1, image2, alpha=0.5, mask1=None, mask2=None): + """ + Alaph blend two images, pixels can be scalars or vectors. + The alpha blending factor can be either a scalar or an image whose + pixel type is sitkFloat32 and values are in [0,1]. + The region that is alpha blended is controled by the given masks. + """ + + if not mask1: + mask1 = sitk.Image(image1.GetSize(), sitk.sitkFloat32) + 1.0 + mask1.CopyInformation(image1) + else: + mask1 = sitk.Cast(mask1, sitk.sitkFloat32) + if not mask2: + mask2 = sitk.Image(image2.GetSize(), sitk.sitkFloat32) + 1 + mask2.CopyInformation(image2) + else: + mask2 = sitk.Cast(mask2, sitk.sitkFloat32) + # if we received a scalar, convert it to an image + if type(alpha) != sitk.SimpleITK.Image: + alpha = sitk.Image(image1.GetSize(), sitk.sitkFloat32) + alpha + alpha.CopyInformation(image1) + components_per_pixel = image1.GetNumberOfComponentsPerPixel() + if components_per_pixel > 1: + img1 = sitk.Cast(image1, sitk.sitkVectorFloat32) + img2 = sitk.Cast(image2, sitk.sitkVectorFloat32) + else: + img1 = sitk.Cast(image1, sitk.sitkFloat32) + img2 = sitk.Cast(image2, sitk.sitkFloat32) + + intersection_mask = mask1 * mask2 + + intersection_image = mask_image_multiply( + alpha * intersection_mask, img1 + ) + mask_image_multiply((1 - alpha) * intersection_mask, img2) + return ( + intersection_image + + mask_image_multiply(mask2 - intersection_mask, img2) + + mask_image_multiply(mask1 - intersection_mask, img1) + ) + + +#Dictionary with functions mapping a scalar image to a three component vector image +image_mappings = {'grey': lambda x: sitk.Compose([x]*3), + 'jet' : lambda x: sitk.ScalarToRGBColormap(x, sitk.ScalarToRGBColormapImageFilter.Jet), + 'hot' : lambda x: sitk.ScalarToRGBColormap(x, sitk.ScalarToRGBColormapImageFilter.Hot), + 'winter' : lambda x: sitk.ScalarToRGBColormap(x, sitk.ScalarToRGBColormapImageFilter.Winter) + } + + + +image = sitk.ReadImage('../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_pred.nii') +segmentation = sitk.ReadImage('../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_seg.nii') + +#Make the images isotropic so that when we save a slice along any axis +#it will be fine (most image formats assume isotropic pixel sizes). +#Segmentation is interpolated with nearest neighbor so we don't introduce new +#labels. +image = make_isotropic(image, interpolator = sitk.sitkLinear) +segmentation = make_isotropic(segmentation, interpolator = sitk.sitkNearestNeighbor) + +#Convert image to sitkUInt8 after rescaling, color image formats only work for [0,255] +image_255 = sitk.Cast(sitk.RescaleIntensity(image, 0, 255), sitk.sitkUInt8) +segmentation_255 = sitk.Cast(sitk.RescaleIntensity(segmentation, 0, 255), sitk.sitkUInt8) + + +colormap = 'hot' +vec_image = image_mappings['hot'](image_255) +vec_image_label = image_mappings['winter'](image_255) + +# vec_segmentation = sitk.ScalarToRGBColormap(segmentation, sitk.ScalarToRGBColormapImageFilter.Winter) +# vec_segmentation_2 = sitk.LabelToRGB(segmentation_255,colormap=[255,0,0]) + +vec_combined = sitk.Cast(alpha_blend(image1=vec_image, image2=vec_image_label, alpha=0.5, mask2=segmentation==1), sitk.sitkVectorUInt8) + + +sitk.WriteImage(vec_image, '../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_vec_image.nii.gz') +sitk.WriteImage(vec_image_label, '../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_vec_image_label.nii.gz') +sitk.WriteImage(vec_combined, '../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_vec_com.nii.gz') diff --git a/src/sfransen/__init__.py b/src/sfransen/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/src/sfransen/csv2graph.py b/src/sfransen/csv2graph.py new file mode 100755 index 0000000..cfc3ded --- /dev/null +++ b/src/sfransen/csv2graph.py @@ -0,0 +1,59 @@ +import matplotlib.pyplot as plt +import pandas as pd +import glob +import argparse +import os + +#create parser +def parse_input_args(): + parser = argparse.ArgumentParser(description='Parse arguments for training a Reconstruction model') + + parser.add_argument('train_out_dir', + type=str, + help='Directory name in train_output dir of the desired experiment folder. There should be a .csv file in this directory with train statistics.') + + args = parser.parse_args() + return args + +args = parse_input_args() + +print(f"Plotting {args}") + +# find csv file +# csv = glob.glob(f"train_output/{args.train_out_dir}/*.csv")[0] +folder_input = args.train_out_dir + +# load csv file +df = pd.read_csv(f'{folder_input}') + +# read csv file +for metric in df: + # if not metric == 'epoch': + if metric == 'loss' or metric == 'val_loss': + plt.plot(df['epoch'], df[metric], label=metric) + plt.ylim(ymin=0,ymax=0.01) + + + +folder, csvfile = os.path.split(args.train_out_dir) +root, experiment = os.path.split(os.path.split(folder)[0]) + +plt.title(experiment) +plt.xlabel('Epoch') +plt.ylabel('Loss') +plt.grid() +plt.legend() +plt.savefig(f"{folder}/{experiment}.png") +plt.clf() +plt.close() +print(folder+".png") +print(f"\nsaved figure to {folder}") + +# van yaml inladen 'loss' +# vanuit utils > dict to yaml +# csv viewer extension +# course .venv/bin/activate +# loop over alles en if metric then 1-metric +# kleur codering color=[] + + diff --git a/src/sfransen/data_path_rename.py b/src/sfransen/data_path_rename.py new file mode 100755 index 0000000..773c5cd --- /dev/null +++ b/src/sfransen/data_path_rename.py @@ -0,0 +1,107 @@ +#X:\datasets\radboud_lesions_2022 +#/data/pca-rad/experiments/fix-radboud-data/export/pat0777-2016.nii.gz + +#'experiments/fix-radboud-data/export' +#'datasets/radboud_lesions_2022' + +########### Read in the file ############## +# with open('./../data/Nijmegen paths/t2.txt', 'r') as file : +# filedata = file.read() + +# filedata = filedata.replace('/diffusie_cro/b-1400','/t2_tse_tra') +# import numpy as np +# print(np.shape(filedata)) + +############# check for missing files ############ +import numpy as np +import os + +serie = 'b2000' +file1 = f'./../data/Nijmegen paths/{serie}.txt' +file2 = f'./../data/Test paths/{serie}.txt' +file3 = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_dwi.txt' +# X:\qvlohuizen\fastMRI_PCa\data\path_lists\pirads_4plus +with open(file3) as fp: + data = fp.readlines() + +UMCG = [i for i, s in enumerate(data) if '-U-' in s] +Martini = [i for i, s in enumerate(data) if '-M-' in s] +patient_id = [os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(data[index]))))[-9:-6] for index in Martini] +patient_id = [item.replace("/", "") for item in patient_id] +patient_id = [item.replace("y", "") for item in patient_id] +patient_id.sort() +Martini_data = [data[index] for index in Martini] + +Martini_Diffusie = [i for i, s in enumerate(Martini_data) if not 'Diffusie' in s] + +print(patient_id) +print(np.shape(Martini_data)) +print(len(Martini_Diffusie)) +print([Martini_data[index] for index in Martini_Diffusie]) +quit() +############# merge radboud and umcg files ############ +import numpy as np +import os + +serie = 'b2000' +file1 = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_dwi.txt' +# X:\qvlohuizen\fastMRI_PCa\data\path_lists\pirads_4plus +with open(file1) as fp: + data = fp.readlines() +# Reading data from file2 +with open(file2) as fp: + data2 = fp.read() + +# Merging 2 files +# To add the data of file2 +# from next line +data += "\n" +data += data2 + +with open (f'{serie}.txt', 'w') as fp: + fp.write(data) + +quit() +############ find more martini data ############## +import numpy as np + +high_b_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_dwi.txt' +adc_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_adc.txt' +t2_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_t2w.txt' +seg_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/seg_new.txt' + +with open(high_b_data, 'r') as file : + high_b_data = file.readlines() +with open(adc_data, 'r') as file : + adc_data = file.readlines() +with open(t2_data, 'r') as file : + t2_data = file.readlines() +with open(seg_data, 'r') as file : + seg_data = file.readlines() + + +indices = [i for i, s in enumerate(high_b_data) if 'Diffusie_b0-50-400-800-2000' in s] + +b_2000 = [high_b_data[index] for index in indices] +b_50 = [item.replace("b-2000", "b-50") for item in b_2000] +b_400 = [item.replace("b-2000", "b-400") for item in b_2000] +b_800 = [item.replace("b-2000", "b-800") for item in b_2000] +adc = [adc_data[index] for index in indices] +t2 = [t2_data[index] for index in indices] +seg = [seg_data[index] for index in indices] + +with open('b50.txt', 'w') as f: + f.writelines(b_50) +with open('b400.txt', 'w') as f: + f.writelines(b_400) +with open('b800.txt', 'w') as f: + f.writelines(b_800) +with open('b2000.txt', 'w') as f: + f.writelines(b_2000) +with open('adc.txt', 'w') as f: + f.writelines(adc) +with open('t2.txt', 'w') as f: + f.writelines(t2) +with open('seg.txt', 'w') as f: + f.writelines(seg) + diff --git a/src/sfransen/dump_params.py b/src/sfransen/dump_params.py new file mode 100755 index 0000000..bb58a38 --- /dev/null +++ b/src/sfransen/dump_params.py @@ -0,0 +1,20 @@ +def dump_dict_to_yaml( + data: dict, + target_dir: str, + filename: str = "settings") -> None: + """ Writes the given dictionary as a yaml to the target directory. + + Parameters: + `data (dict)`: dictionary of data. + `target_dir (str)`: directory where the yaml will be saved. + `` filename (str)`: name of the file without extension + """ + print("\nParameters") + for pair in data.items(): + print(f"\t{pair}") + print() + + path = f"{target_dir}/{filename}.yml" + print_p(f"Wrote yaml to: {path}") + with open(path, 'w') as outfile: + yaml.dump(data, outfile, default_flow_style=False) \ No newline at end of file diff --git a/src/sfransen/pred_label_images.py b/src/sfransen/pred_label_images.py new file mode 100755 index 0000000..4aac92f --- /dev/null +++ b/src/sfransen/pred_label_images.py @@ -0,0 +1,75 @@ +# A script with returns the prediction with labels overlay and stores them in a new folder named "report" + +#modules +from preprocessing_function import preprocess +import numpy as np +from tqdm import tqdm +from helpers import * +import tensorflow as tf +from tensorflow import keras + + +#model load +Model_path = '..' +reconstructed_model = tf.keras.models.load_model(Model_path) +reconstructed_model.compile(loss='', + optimizer='', + metrics=['']) + +####image load + +# arg.series = b-50 b-400 etc. +# DATA_DIR = data dir +# IMAGE_SHAPE = +# TARGET_SPACING = + +images, image_paths = {s: [] for s in args.series}, {} +segmentations = [] +print_(f"> Loading images into RAM...") + +# Read the image paths from the data directory. +# Texts files are expected to have the name "[series_name].txt" +for s in args.series: + with open(path.join(DATA_DIR, f"{s}.txt"), 'r') as f: + image_paths[s] = [l.strip() for l in f.readlines()] +with open(path.join(DATA_DIR, f"seg.txt"), 'r') as f: + seg_paths = [l.strip() for l in f.readlines()] +num_images = len(seg_paths) + +# Read and preprocess each of the paths for each series, and the segmentations. +for img_idx in tqdm(range(num_images)): #[:40] for less images + img_s = {s: sitk.ReadImage(image_paths[s][img_idx], sitk.sitkFloat32) + for s in args.series} + seg_s = sitk.ReadImage(seg_paths[img_idx], sitk.sitkFloat32) + img_n, seg_n = preprocess(img_s, seg_s, + shape=IMAGE_SHAPE, spacing=TARGET_SPACING) + for seq in img_n: + images[seq].append(img_n[seq]) + segmentations.append(seg_n) + +self._images = np.stack( + [np.stack(self.images[k], axis=0) for k in sequences], axis=-1) + +#image predict +predictions = reconstructed_model.predict(images[idx]) + +#heatmap + input image + +#save in folder output + +# Export scan crops and targets once +# they don't change during training so we export them only once +for i in range(min(self.num_images, self.validation_set[0].shape[0])): + for s_idx, s in enumerate(sequences): + img_s = sitk.GetImageFromArray( + self.validation_set[0][i][..., s_idx].squeeze().T) + sitk.WriteImage(img_s, f"{prefix}_{i:03d}_{s}.nii.gz") + seg_s = sitk.GetImageFromArray( + self.validation_set[1][i].squeeze().T) + sitk.WriteImage(seg_s, f"{prefix}_{i:03d}_seg.nii.gz") + +prd_s = sitk.GetImageFromArray(predictions[i].squeeze().T) +prd_bin_s = sitk.GetImageFromArray( + np.around(predictions[i]).astype(np.float32).squeeze().T) +sitk.WriteImage(prd_s, f"{self.prefix}_{i:03d}_pred.nii.gz") +sitk.WriteImage(prd_bin_s, f"{self.prefix}_{i:03d}_pred_bin.nii.gz") \ No newline at end of file diff --git a/src/sfransen/utils_quintin.py b/src/sfransen/utils_quintin.py new file mode 100755 index 0000000..13064d6 --- /dev/null +++ b/src/sfransen/utils_quintin.py @@ -0,0 +1,82 @@ +import yaml +from typing import List +from collections import deque +from pathlib import Path +import numpy as np +import SimpleITK as sitk + +def print_p(text, end="\n"): + """ Print function for on Peregrine. It needs a flush before printing. """ + print(text, flush=True, end=end) + +def list_from_file(path: str) -> List: + """ Returns a list of all items on each line of the text file referenced in + the given path + + Parameters: + `path (str)`: path to the text file + """ + print_p(path) + return [line.strip() for line in open(path, "r")] + + +def dump_dict_to_yaml( + data: dict, + target_dir: str, + filename: str = "settings", + verbose: bool = True) -> None: + """ Writes the given dictionary as a yaml to the target directory. + + Parameters: + `data (dict)`: dictionary of data. + `target_dir (str)`: directory where the yaml will be saved. + `` filename (str)`: name of the file without extension + """ + if verbose: + print("\nParameters") + for pair in data.items(): + print(f"\t{pair}") + print() + + path = f"{target_dir}/{filename}.yml" + print_p(f"Wrote yaml to: {path}") + with open(path, 'w') as outfile: + yaml.dump(data, outfile, default_flow_style=False) + + +def read_yaml_to_dict(path: str) -> dict: + with open(path, 'r') as stream: + try: + parsed_yaml=yaml.safe_load(stream) + # print(parsed_yaml) + return parsed_yaml + except yaml.YAMLError as exc: + print(exc) + + +def create_dirs_if_not_exists(dirs: List[str]) -> None: + """ Creates the list of supplied directories if they don't exist yet. + + Parameters: + `dirs (List[str]) or str`: list of strings representing the directories that + need to be created + """ + + if isinstance(dirs, str): + dirs = [dirs] + + for folder in dirs: + Path(folder).mkdir(parents=True, exist_ok=True) + print_p(f"Created directory {folder} or it already existed.") + + +def save_np_to_nifti(data_np: np.ndarray, + target_dir: str, + fname: str, + target_space: List[float] = [0.5, 0.5, 3.0]): + + img_s = sitk.GetImageFromArray(data_np.T) + img_s.SetSpacing(target_space) + path = f"{target_dir}/{fname}.nii.gz" + sitk.WriteImage(img_s, path) + print_p(f"Saved numpy array to nifti: {path}") \ No newline at end of file