initialization of git

2022-03-18 12:02:03 +01:00 · 2022-03-18 12:02:03 +01:00 · 11591d5752
commit 11591d5752
23 changed files with 2962 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,152 @@
 # ---> Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 /build/
 /code/
 /old_code/
 /data/
 /job_scripts/
 /scripts/
 /temp/
 /slurms/
 *.out
 sqliteDB/
 *.db
 /train_output/
--- a/README.md
+++ b/README.md
@ -0,0 +1,3 @@
 # sfransen
 A library for fast MRI for prostate cancer. Create undersampled data, train reconstruction models, validate and visualize results.
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,24 @@
 import setuptools
 with open("README.md", "r") as fh:
    long_description = fh.read()
 setuptools.setup(
    name="sfransen",
    version="0.0.1",
    author="S.J. Fransen (rad)",
    author_email="s.j.fransen@umcg.nl",
    description="later",
    long_description=long_description,
    long_description_content_type="text/markdown",
    url="",
    package_dir={'': 'src'},
    packages=setuptools.find_packages(where='src'),
    classifiers=[
        "Programming Language :: Python :: 3",
        "Operating System :: OS Independent",
    ],
    scripts=[
        ],
    python_requires='>=3.6',
 )
--- a/src/sfransen/DWI_exp/init.py
+++ b/src/sfransen/DWI_exp/init.py
@ -0,0 +1,6 @@
 print("de juiste init file")
 from .batchgenerator import *
 from .callbacks import *
 from .helpers import *
 from .preprocessing_function import *
 from .unet import *
--- a/src/sfransen/DWI_exp/batchgenerator.py
+++ b/src/sfransen/DWI_exp/batchgenerator.py
@ -0,0 +1,64 @@
 import math
 from typing import Callable, Dict, List, Optional, Tuple
 import numpy as np
 from tensorflow.keras.utils import Sequence
 class BatchGenerator(Sequence):
    def __init__(self, 
        images: Dict[str, List[np.ndarray]], 
        segmentations: List[np.ndarray],
        sequences: List[str],
        shape: Tuple[int],
        indexes: List[int] = None, 
        batch_size: int = 5,
        shuffle: bool = False, 
        augmentation_function: Optional[Callable\
            [[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]] = None):
        self.images = images
        self.segmentations = segmentations
        self.sequences = sequences
        self.indexes = indexes
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.augmentation = augmentation_function
        self.num_rows = len(images)
        if self.indexes == None:
            self.indexes = list(range(self.num_rows))
        if type(indexes) == int:
            self.indexes = list(range(self.indexes))
        if self.batch_size == None:
            self.batch_size = len(self.indexes)
        # Stack image index as index 0, stack sequences as index -1
        self._images = np.stack(
            [np.stack(self.images[k], axis=0) for k in sequences], axis=-1)
        self._segmentations = np.stack(segmentations, axis=0)[..., np.newaxis]
    def __len__(self):
        return math.ceil(len(self.indexes) / self.batch_size)
    def __getitem__(self, batch_idx):
        # Get image / segmentation indexes for current batch
        indexes = self.indexes[batch_idx*self.batch_size:(1+batch_idx)*self.batch_size]
        # Get matching images and segmentations
        images = self._images[indexes]
        segmentations = self._segmentations[indexes]
        # Apply data augmentation if a function is provided
        if self.augmentation:
            for img_idx in range(len(indexes)):
                images[img_idx], segmentations[img_idx] = self.augmentation(
                    images[img_idx], segmentations[img_idx])
        return images, segmentations
    def on_epoch_end(self):
        super().on_epoch_end()
        if self.shuffle:
            np.random.shuffle(self.indexes)
--- a/src/sfransen/DWI_exp/callbacks.py
+++ b/src/sfransen/DWI_exp/callbacks.py
@ -0,0 +1,96 @@
 import numpy as np
 import SimpleITK as sitk
 from helpers import *
 import tensorflow.keras.backend as K
 import tensorflow as tf 
 from tensorflow.keras.callbacks import Callback
 from sklearn.metrics import roc_auc_score, roc_curve
 def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.round(K.flatten(y_pred))
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + K.epsilon()) / (
        K.sum(y_true_f) + K.sum(y_pred_f) + K.epsilon())
 # def auc_value(y_true, y_pred):
 #     # print_("start ROC_AUC")
 #     # y_true_old = self.validation_set[1].squeeze()
 #     # y_pred_old = np.around(self.model.predict(self.validation_set[0],batch_size=1).squeeze())
 #     y_true_auc = K.flatten(y_true)
 #     # print_(f"The shape of y_true = {np.shape(y_true)}" )
 #     y_pred_auc = K.round(K.flatten(y_pred))
 #     # print_(f"The shape of y_pred = {np.shape(y_pred)}" )
 #     # fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1)
 #     auc = roc_auc_score(y_true_auc, y_pred_auc)
 #     print_('AUC:', auc)
 #     print_('AUC shape:', np.shape(auc))    
 #     # print_(f'ROC: fpr={fpr} , tpr={tpr}, thresholds={thresholds}')
 #     return auc
 class IntermediateImages(Callback):
    def __init__(self, validation_set, prefix, sequences,
        num_images=10):
        self.prefix = prefix
        self.num_images = num_images
        self.validation_set = (
            validation_set[0][:num_images, ...],
            validation_set[1][:num_images, ...]
            )
        # Export scan crops and targets once
        # they don't change during training so we export them only once
        for i in range(min(self.num_images, self.validation_set[0].shape[0])):
            for s_idx, s in enumerate(sequences):
                img_s = sitk.GetImageFromArray(
                    self.validation_set[0][i][..., s_idx].squeeze().T)
                sitk.WriteImage(img_s, f"{prefix}_{i:03d}_{s}.nii.gz")
            seg_s = sitk.GetImageFromArray(
                self.validation_set[1][i].squeeze().T)
            sitk.WriteImage(seg_s, f"{prefix}_{i:03d}_seg.nii.gz")
    def on_epoch_end(self, epoch, logs={}):
        # Predict on the validation_set
        predictions = self.model.predict(self.validation_set, batch_size=1)
        # print_("start ROC_AUC")
        # y_true_old = self.validation_set[1].squeeze()
        # y_pred_old = np.around(self.model.predict(self.validation_set[0],batch_size=1).squeeze())
        # y_true = np.array(y_true_old).flatten()
        # print_(f"The shape of y_true = {np.shape(y_true)}" )
        # y_pred = np.array(y_pred_old).flatten()
        # print_(f"The shape of y_pred = {np.shape(y_pred)}" )
        # fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1)
        # auc = roc_auc_score(y_true, y_pred)
        # print_('AUC:', auc)
        # print_(f'ROC: fpr={fpr} , tpr={tpr}, thresholds={thresholds}')
        for i in range(min(self.num_images, self.validation_set[0].shape[0])):
            prd_s = sitk.GetImageFromArray(predictions[i].squeeze().T)
            prd_bin_s = sitk.GetImageFromArray(
                np.around(predictions[i]).astype(np.float32).squeeze().T)
            sitk.WriteImage(prd_s, f"{self.prefix}_{i:03d}_pred.nii.gz")
            sitk.WriteImage(prd_bin_s, f"{self.prefix}_{i:03d}_pred_bin.nii.gz")
 # class RocCallback(Callback):
 #     def __init__(self,validation_data):
 #         # self.x = training_data[0]
 #         # self.y = training_data[1]
 #         self.x_val = validation_data[0]
 #         self.y_val = validation_data[1]
 #     def on_epoch_end(self, epoch, logs={}):
 #         # y_pred_train = self.model.predict_proba(self.x)
 #         # roc_train = roc_auc_score(self.y, y_pred_train)
 #         y_pred_val = self.model.predict(self.x_val, batch_size=1)
 #         roc_val = roc_auc_score(self.y_val, y_pred_val)
 #         print('\rroc-auc_train: %s - roc-auc_val: %s' % (str(round(roc_val,4))),end=100*' '+'\n')
 #         return
--- a/src/sfransen/DWI_exp/helpers.py
+++ b/src/sfransen/DWI_exp/helpers.py
@ -0,0 +1,239 @@
 import os
 from os import path
 import numpy as np
 import SimpleITK as sitk
 from scipy import ndimage
 import yaml
 def print_(*args, **kwargs):
    print(*args, **kwargs, flush=True)
 def print_config(config, depth=0):
    for k in config:
        print(end=(" "*depth*2) + f"- {k}:")
        if type(config[k]) == dict:
            print()
            print_config(config[k], depth+1)
        else:
            print(f"\t{config[k]}")
    print_(end="")
 def prepare_project_dir(project_dir):
    """
    Prepares the work directory for a project
    TODO: Create text file containing configuration
    """
    def _makedir(n):
        os.makedirs(path.join(project_dir, n), exist_ok=True)
    _makedir("models")
    _makedir("logs")
    _makedir("data")
    _makedir("docs")
    _makedir("samples")
    _makedir("output")
 def fake_loss(*args, **kwargs):
    return 0.
 def center_crop(
    image: sitk.Image, shape: list, offset: list = None
    ) -> sitk.Image:
    """Extracts a region of interest from the center of an SITK image.
    Parameters:
    image: Input image (SITK).
    shape: The shape of the 
    Returns: Cropped image (SITK)
    """
    size = image.GetSize()
    # Determine the centroid
    centroid = [sz / 2 for sz in size]
    # Determine the origin for the bounding box by subtracting half the 
    # shape of the bounding box from each dimension of the centroid.
    box_start = [int(c - sh / 2) for c, sh in zip(centroid, shape)]
    if offset:
        box_start = [b - o for b, o in zip(box_start, offset)]
    # Extract the region of provided shape starting from the previously
    # determined starting pixel.
    region_extractor = sitk.RegionOfInterestImageFilter()
    region_extractor.SetSize(shape)
    region_extractor.SetIndex(box_start)
    cropped_image = region_extractor.Execute(image)
    return cropped_image
 def augment(img, seg, 
    noise_chance = 0.3,
    noise_mult_max = 0.001,
    rotate_chance = 0.2,
    rotate_max_angle = 30,
    ):
    #noise_mult_max was initially 0.1
    if np.random.uniform() < noise_chance:
        img = augment_noise(img, np.random.uniform(0., noise_mult_max))
    if np.random.uniform() < rotate_chance:
        img, seg = augment_rotate(img, seg, 
            angle=np.random.uniform(0-rotate_max_angle, rotate_max_angle))
        img, seg = augment_tilt(img, seg, 
            angle=np.random.uniform(0-rotate_max_angle, rotate_max_angle))
    return img, seg
 def augment_noise(img, multiplier):
    noise = np.random.standard_normal(img.shape) * multiplier
    return img + noise
 def augment_rotate(img, seg, angle):
    img = ndimage.rotate(img, angle, reshape=False, cval=0)
    seg = ndimage.rotate(seg, angle, reshape=False, order=0)
    return img, seg
 def augment_tilt(img, seg, angle):
    img = ndimage.rotate(img, angle, reshape=False, axes=(2,1), cval=0)
    seg = ndimage.rotate(seg, angle, reshape=False, axes=(2,1), order=0)
    return img, seg
 def resample( 
    image: sitk.Image, min_shape: list, method=sitk.sitkLinear, 
    new_spacing: list=[1, 1, 3.6]
    ) -> sitk.Image:
    """Resamples an image to given target spacing and shape.
    Parameters:
    image: Input image (SITK).
    shape: Minimum output shape for the underlying array.
    method: SimpleITK interpolator to use for resampling. 
        (e.g. sitk.sitkNearestNeighbor, sitk.sitkLinear)
    new_spacing: The new spacing to resample to.
    Returns:
    int: Resampled image
   """
    # Extract size and spacing from the image
    size = image.GetSize()
    spacing = image.GetSpacing()
    # Determine how much larger the image will become with the new spacing
    factor = [sp / new_sp for sp, new_sp in zip(spacing, new_spacing)]
    # Determine the outcome size of the image for each dimension
    get_size = lambda size, factor, min_shape: max(int(size * factor), min_shape)
    new_size = [get_size(sz, f, sh) for sz, f, sh in zip(size, factor, min_shape)]
    # Resample the image 
    resampler = sitk.ResampleImageFilter()
    resampler.SetReferenceImage(image)
    resampler.SetOutputSpacing(new_spacing)
    resampler.SetSize(new_size)
    resampler.SetInterpolator(method)
    resampled_image = resampler.Execute(image)
    return resampled_image
 # Start training
 def get_generator(
    images: dict, 
    segmentations: list,
    sequences: list,
    shape: tuple,
    indexes: list = None, 
    batch_size: int = 5,
    shuffle: bool = False, 
    augmentation = True
    ):
    """
    Returns a (training) generator for use with model.fit().
    Parameters:
    input_modalities: List of modalty names to include.
    output_modalities: Names of the target modalities.
    batch_size: Number of images per batch (default: all).
    indexes: Only use the specified image indexes.
    shuffle: Shuffle the lists of indexes once at the beginning.
    augmentation: Apply augmentation or not (bool).
    """
    num_rows = len(images)
    if indexes == None:
        indexes = list(range(num_rows))
    if type(indexes) == int:
        indexes = list(range(indexes))
    if batch_size == None:
        batch_size = len(indexes)  
    idx = 0
    # Prepare empty batch placeholder with named inputs and outputs
    input_batch = np.zeros((batch_size,) + shape + (len(images),))
    output_batch = np.zeros((batch_size,) + shape + (1,))
    # Loop infinitely to keep generating batches
    while True:
        # Prepare each observation in a batch
        for batch_idx in range(batch_size):
            # Shuffle the order of images if all indexes have been seen
            if idx == 0 and shuffle:
                np.random.shuffle(indexes)
            current_index = indexes[idx]
            # Insert the augmented images into the input batch
            img_crop = [images[s][current_index] for s in sequences]
            img_crop = np.stack(img_crop, axis=-1)
            seg_crop = segmentations[current_index][..., np.newaxis]
            if augmentation:
                img_crop, seg_crop = augment(img_crop, seg_crop)
            input_batch[batch_idx] = img_crop
            output_batch[batch_idx] = seg_crop
            # Increase the current index and modulo by the number of rows
            #  so that we stay within bounds
            idx = (idx + 1) % len(indexes)
        yield input_batch, output_batch
 def resample_to_reference(image, ref_img, 
    interpolator=sitk.sitkNearestNeighbor, 
    default_pixel_value=0):
    resampled_img = sitk.Resample(image, ref_img, 
            sitk.Transform(), 
            interpolator, default_pixel_value, 
            ref_img.GetPixelID())
    return resampled_img
 def dump_dict_to_yaml(
    data: dict,
    target_dir: str,
    filename: str = "settings") -> None:
    """ Writes the given dictionary as a yaml to the target directory.
    Parameters:
    `data (dict)`: dictionary of data.
    `target_dir (str)`: directory where the yaml will be saved.
    `` filename (str)`: name of the file without extension
    """
    print("\nParameters")
    for pair in data.items():
        print(f"\t{pair}")
    print()
    path = f"{target_dir}/{filename}.yml"
    print_(f"Wrote yaml to: {path}")
    with open(path, 'w') as outfile:
        yaml.dump(data, outfile, default_flow_style=False)
--- a/src/sfransen/DWI_exp/preprocessing_function.py
+++ b/src/sfransen/DWI_exp/preprocessing_function.py
@ -0,0 +1,46 @@
 import numpy as np
 import SimpleITK as sitk
 from .helpers import *
 def preprocess(imgs: dict, seg: sitk.Image, 
    shape: tuple, spacing: tuple, to_numpy=True) -> tuple:
    # Resample all of the images to the desired voxel spacing
    img_r = {k: resample(imgs[k], 
        min_shape=(s+1 for s in shape), 
        new_spacing=spacing) for k in imgs}
    seg_r = resample(seg, 
        min_shape=(s+1 for s in shape), 
        new_spacing=spacing,
        method=sitk.sitkNearestNeighbor)
    # Center crop one of the input images
    ref_seq = [k for k in img_r.keys()][0]
    ref_img = center_crop(img_r[ref_seq], shape=shape)
    # Then crop the remaining series / segmentation to match the input crop by
    # transforming them on the physical space of the cropped series.
    img_crop = {k: resample_to_reference(
        image=img_r[k], 
        ref_img=ref_img,
        interpolator=sitk.sitkLinear)
                    for k in img_r}
    seg_crop = resample_to_reference(
        image=seg_r, 
        ref_img=ref_img,
        interpolator=sitk.sitkNearestNeighbor)
    # Return sitk.Image instead of numpy np.ndarray.
    if not to_numpy:
        return img_crop, seg_crop
    img_n = {k: sitk.GetArrayFromImage(img_crop[k]).T for k in img_crop}
    seg_n = sitk.GetArrayFromImage(seg_crop).T
    seg_n = np.clip(seg_n, 0., 1.)
    # Z-score normalize all images
    # to do: 2*std
    for seq in img_n:
            img_n[seq] = (img_n[seq] - np.mean(img_n[seq])) / ( 2* np.std(img_n[seq]))
    return img_n, seg_n
--- a/src/sfransen/DWI_exp/unet.py
+++ b/src/sfransen/DWI_exp/unet.py
@ -0,0 +1,114 @@
 from tensorflow.keras import backend as K
 from tensorflow.keras import Input, Model
 from tensorflow.keras.layers import Conv3D, Activation, Dense, concatenate, add
 from tensorflow.keras.layers import Conv3DTranspose, LeakyReLU
 from tensorflow.keras import regularizers
 from tensorflow.keras.layers import GlobalAveragePooling3D, Reshape, Dense, multiply, Permute
 def squeeze_excite_block(input, ratio=8):
    ''' Create a channel-wise squeeze-excite block
    Args:
        input: input tensor
        filters: number of output filters
    Returns: a keras tensor
    References
    -   [Squeeze and Excitation Networks](https://arxiv.org/abs/1709.01507)
    '''
    init = input
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
    filters = init.shape[channel_axis]
    se_shape = (1, 1, 1, filters)
    se = GlobalAveragePooling3D()(init)
    se = Reshape(se_shape)(se)
    se = Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se)
    se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se)
    if K.image_data_format() == 'channels_first':
        se = Permute((4, 1, 2, 3))(se)
    x = multiply([init, se])
    return x
 def build_dual_attention_unet(
    input_shape,
    l2_regularization = 0.0001,
    ):
    def conv_layer(x, kernel_size, out_filters, strides=(1,1,1)):
        x = Conv3D(out_filters, kernel_size, 
                strides             = strides,
                padding             = 'same',
                kernel_regularizer  = regularizers.l2(l2_regularization), 
                kernel_initializer  = 'he_normal',
                use_bias            = False
                )(x)
        return x
    def conv_block(input, out_filters, strides=(1,1,1), with_residual=False, with_se=False, activation='relu'):
        # Strided convolution to convsample
        x = conv_layer(input, (3,3,3), out_filters, strides)
        x = Activation('relu')(x)
        # Unstrided convolution
        x = conv_layer(x, (3,3,3), out_filters)
        # Add a squeeze-excite block
        if with_se:
            se = squeeze_excite_block(x)
            x = add([x, se])
        # Add a residual connection using a 1x1x1 convolution with strides
        if with_residual:
            residual = conv_layer(input, (1,1,1), out_filters, strides)
            x = add([x, residual])
        if activation == 'leaky':
            x = LeakyReLU(alpha=.1)(x)
        else:
            x = Activation('relu')(x)
        # Activate whatever comes out of this
        return x
    # If we already have only one input, no need to combine anything
    inputs = Input(input_shape)
    # Downsampling
    conv1 = conv_block(inputs, 16)
    conv2 = conv_block(conv1, 32, strides=(2,2,1), with_residual=True, with_se=True) #72x72x18
    conv3 = conv_block(conv2, 64, strides=(2,2,1), with_residual=True, with_se=True) #36x36x18
    conv4 = conv_block(conv3, 128, strides=(2,2,2), with_residual=True, with_se=True) #18x18x9
    conv5 = conv_block(conv4, 256, strides=(2,2,2), with_residual=True, with_se=True) #9x9x9
    # First upsampling sequence
    up1_1 = Conv3DTranspose(128, (3,3,3), strides=(2,2,2), padding='same')(conv5) #18x18x9
    up1_2 = Conv3DTranspose(128, (3,3,3), strides=(2,2,2), padding='same')(up1_1) #36x36x18
    up1_3 = Conv3DTranspose(128, (3,3,3), strides=(2,2,1), padding='same')(up1_2) #72x72x18
    bridge1 = concatenate([conv4, up1_1]) #18x18x9 (128+128=256)
    dec_conv_1 = conv_block(bridge1, 128, with_residual=True, with_se=True, activation='leaky') #18x18x9
    # Second upsampling sequence
    up2_1 = Conv3DTranspose(64, (3,3,3), strides=(2,2,2), padding='same')(dec_conv_1) # 36x36x18
    up2_2 = Conv3DTranspose(64, (3,3,3), strides=(2,2,1), padding='same')(up2_1) # 72x72x18
    bridge2 = concatenate([conv3, up1_2, up2_1]) # 36x36x18 (64+128+64=256)
    dec_conv_2 = conv_block(bridge2, 64, with_residual=True, with_se=True, activation='leaky')
    # Final upsampling sequence
    up3_1 = Conv3DTranspose(32, (3,3,3), strides=(2,2,1), padding='same')(dec_conv_2) # 72x72x18
    bridge3 = concatenate([conv2, up1_3, up2_2, up3_1]) # 72x72x18 (32+128+64+32=256)
    dec_conv_3 = conv_block(bridge3, 32, with_residual=True, with_se=True, activation='leaky')
    # Last upsampling to make heatmap
    up4_1 = Conv3DTranspose(16, (3,3,3), strides=(2,2,1), padding='same')(dec_conv_3) # 72x72x18
    dec_conv_4 = conv_block(up4_1, 16, with_residual=False, with_se=True, activation='leaky') #144x144x18 (16)
    # Reduce to a single output channel with a 1x1x1 convolution
    single_channel = Conv3D(1, (1, 1, 1))(dec_conv_4)  
    # Apply sigmoid activation to get binary prediction per voxel
    act  = Activation('sigmoid')(single_channel)
    # Model definition
    model = Model(inputs=inputs, outputs=act)
    return model
--- a/src/sfransen/FROC/init.py
+++ b/src/sfransen/FROC/init.py
@ -0,0 +1,6 @@
 from .image_utils import *
 from .froc import *
 from .data_utils import *
 from .cal_froc_from_np import *
 from .blob_preprocess import *
 from .analysis_utils import *
--- a/src/sfransen/FROC/analysis_utils.py
+++ b/src/sfransen/FROC/analysis_utils.py
@ -0,0 +1,313 @@
 #  Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 import numpy as np
 from sklearn.metrics import auc
 from scipy import ndimage
 try:
    import numpy.typing as npt
 except ImportError:  # pragma: no cover
    pass
 # Parse Detection Maps to Individual Lesions + Likelihoods
 def parse_detection_map(detection_map):
    # Label All Non-Connected Components in Detection Map
    blobs_index, num_blobs = ndimage.label(detection_map, np.ones((3, 3, 3)))
    confidences = []
    if num_blobs > 0:
        # For Each Lesion Detection
        for tumor_index in range(1, num_blobs+1):
            # Extract Mask of Current Lesion
            # hard_blob = np.zeros_like(blobs_index)
            # hard_blob[blobs_index == tumor] = 1
            # TODO: replace above with the following? Is faster I think.
            # hard_blob = (blobs_index == tumor).astype(int)
            # Extract Max Predicted Likelihood for Lesion Detection
            # max_prob = np.max(hard_blob)  # <- this is always 1
            # hard_blob[hard_blob > 0] = max_prob  # <- this line does nothing, as hard_blob is not used
            # Store Predicted Likelihood per Lesion Detection
            max_prob = detection_map[blobs_index == tumor_index].max()
            confidences.append((tumor_index, max_prob))
    return confidences, blobs_index
 # Calculate Dice Similarity Coefficient (DSC) for N-D Arrays
 def calculate_dsc(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
    epsilon = 1e-8
    dsc_num = np.sum(predictions[labels == 1]) * 2.0
    dsc_denom = np.sum(predictions) + np.sum(labels)
    return float((dsc_num + epsilon) / (dsc_denom + epsilon))
 # Calculate Intersection over Union (IoU) for N-D Arrays
 def calculate_iou(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
    epsilon = 1e-8
    iou_num = np.sum(predictions[labels == 1])
    iou_denom = np.sum(predictions) + np.sum(labels) - iou_num
    return float((iou_num + epsilon) / (iou_denom + epsilon))
 # Calculate Operating Points for Curve
 def calculate_operating_points(y, x, op_match=None, verbose=1):
    """
    Input:
    - y: (monotonically increasing) performance metric, such as the True Positive Rate
    - x: (monotonically increasing) performance metric, such as the False Positive Rate
    - op_match: dictionary that specifies the target operating point: {
        'x': target x value, 'y': target y value
    }
    Returns:
    - dictionary with operating point(s): {
        'op_closest_xy_y': y_op, # y value at operating point that matches both x and y of target operating point
        'op_closest_xy_x': x_op, # x value at operating point that matches both x and y of target operating point
        ...
    }
    """
    # TODO: currently, a lower sensitivity is preferrred over a higher sensitivity if that means the operating point is matched better.
    # Would be better to go for the best sensitivity/specificity, if that can be done without hurting the other performance metric.
    # In practice, this should not be an issue, as we have many points then.
    y = np.array(y)
    x = np.array(x)
    operating_points = {}
    if not np.all(np.diff(y) >= 0) and verbose:
        print("Warning: y performance metric is not monotonically increasing, this could lead to unexpected behaviour!")
    if not np.all(np.diff(x) >= 0) and verbose:
        print("Warning: x performance metric is not monotonically increasing, this could lead to unexpected behaviour!")
    # Grab Index of Intersection -> Compute y/TPR and x/FPR @ Index -> Store
    op_best_roc_idx = np.argmin(np.abs(y - (1 - x)))
    op_best_roc_y = y[op_best_roc_idx]
    op_best_roc_x = x[op_best_roc_idx]
    operating_points.update(dict(
        op_best_roc_idx=op_best_roc_idx,
        op_best_roc_y=op_best_roc_y,
        op_best_roc_x=op_best_roc_x
    ))
    if op_match is not None:
        # calculate operating point closest to target operating point
        abs_deficit_x, abs_deficit_y = None, None
        optional_x_keys = ['x', 'fpr', 'FPR']
        optional_y_keys = ['y', 'tpr', 'TPR', 'sensitivity', 'sens']
        # if the target x value is specified, calculate the difference between target and oberved value
        for key in optional_x_keys:
            if key in op_match:
                op_match_x = op_match[key]
                abs_deficit_x = np.abs(x - op_match_x)
                break
        # if the target y value is specified, calculate the difference between target and oberved value
        for key in optional_y_keys:
            if key in op_match:
                op_match_y = op_match[key]
                abs_deficit_y = np.abs(y - op_match_y)
                break
        # if both target x and y values are specified, calculate the difference between the target pair and observed pair
        # at the best match, store the observed x and y values
        if abs_deficit_x is not None and abs_deficit_y is not None:
            # determine the index of the the closest point to the target pair
            abs_deficit = abs_deficit_x + abs_deficit_y
            op_closest_xy_idx = np.argmin(abs_deficit)
            op_closest_xy_y = y[op_closest_xy_idx]
            op_closest_xy_x = x[op_closest_xy_idx]
            # store
            operating_points.update(dict(
                op_closest_xy_idx=op_closest_xy_idx,
                op_closest_xy_y=op_closest_xy_y,
                op_closest_xy_x=op_closest_xy_x
            ))
        # same for matching x only
        if abs_deficit_x is not None:
            # determine the index of the the closest point to the target value
            op_closest_x_idx = np.argmin(abs_deficit_x)
            op_closest_x_y = y[op_closest_x_idx]
            op_closest_x_x = x[op_closest_x_idx]
            # store
            operating_points.update(dict(
                op_closest_x_idx=op_closest_x_idx,
                op_closest_x_y=op_closest_x_y,
                op_closest_x_x=op_closest_x_x
            ))
        # same for matching y only
        if abs_deficit_y is not None:
            # determine the index of the the closest point to the target value
            op_closest_y_idx = np.argmin(abs_deficit_y)
            op_closest_y_y = y[op_closest_y_idx]
            op_closest_y_x = x[op_closest_y_idx]
            # store
            operating_points.update(dict(
                op_closest_y_idx=op_closest_y_idx,
                op_closest_y_y=op_closest_y_y,
                op_closest_y_x=op_closest_y_x
            ))
    return operating_points
 # Calculate Statistics for Multiple Curves
 def calculate_statistics(metrics, op_match=None, x_start=0., x_end=1., verbose=1):
    """
    Calculate statistics, such as the area under the curve, for multiple (independent) curves.
    To calculate shared statistics, the curves must be translated to a shared x domain. To
    achieve this with virtually no loss of the step-like nature of curves like ROC and FROC,
    the shared x values are derived from the input, and offset with ± 1e-7.
    Input:
    - metrics should be a list of tuples with the y & x coordinates for each run:
    [([y1, y2, y3, ...], [x1, x2, x3]), # run 1
     ([y1, y2, y3, ...], [x1, x2, x3]), # run 2
     ]
    - op_match: {
        'y': value of y metric (e.g., TPR/sensitivity) to match,
        'x': value of x metric (e.g., FPR/false positive rate) to match,
    }
    Note: mean and 95% CI are calculated as function of the shared x.
    """
    # construct the array of shared x values
    eps = 1e-10
    x_shared = np.array([xi for _, x in metrics for xi in x], dtype=np.float64)  # collect list of all possible x-values
    x_shared = np.ravel(x_shared)  # flatten list, if necessary
    x_shared = np.append(x_shared, [x_start, x_end])  # add x_start and x_end to ensure correct pAUC calculation
    x_shared = np.concatenate((x_shared+eps, x_shared-eps))
    x_shared = np.unique(x_shared)  # only keep unique x values
    x_shared.sort()  # sort in ascending order (inplace)
    # validate x_start and x_end
    assert x_start < x_end, f"x_start must be smaller than x_end! Got x_start={x_start} and x_end={x_end}."
    # convert the per-model y (e.g., TPR) vs x (e.g., FPR) to a shared domain
    y_shared_all = np.zeros(shape=(len(metrics), len(x_shared)), dtype=np.float32)
    auroc_all = []
    individually_matched_operating_points = []
    for i, (y, x) in enumerate(metrics):
        # if necessary, unpack x and y
        if len(y) == 1:
            y = y[0]
        if len(x) == 1:
            x = x[0]
        # interpolate the y values to the shared x values
        y_shared_domain = np.interp(x_shared, x, y)
        y_shared_all[i] = y_shared_domain
        # calculate AUROC for macro stats
        mask = (x_shared >= x_start) & (x_shared <= x_end)
        auc_score = auc(x_shared[mask], y_shared_domain[mask])
        auroc_all += [auc_score]
        # match operating point for each run individually
        operating_points = calculate_operating_points(y=y, x=x, op_match=op_match)
        individually_matched_operating_points += [operating_points]
    # calculate statistics in shared domain
    y_shared_mean = np.mean(y_shared_all, axis=0)
    y_shared_std = np.std(y_shared_all, axis=0)
    y_shared_CI_lower = np.percentile(y_shared_all, 2.5, axis=0)
    y_shared_CI_higher = np.percentile(y_shared_all, 97.5, axis=0)
    auroc_mean = np.mean(auroc_all)
    auroc_std = np.std(auroc_all)
    # calculate operating points in shared domain
    operating_points = calculate_operating_points(y=y_shared_mean, x=x_shared,
                                                  op_match=op_match, verbose=verbose)
    # collect results
    results = {
        # overview statistics
        'auroc_mean': auroc_mean,
        'auroc_std': auroc_std,
        'auroc_all': auroc_all,
        # for plotting
        'x_shared': x_shared,
        'y_shared_all': y_shared_all,
        'y_shared_mean': y_shared_mean,
        'y_shared_std': y_shared_std,
        'y_shared_CI_lower': y_shared_CI_lower,
        'y_shared_CI_higher': y_shared_CI_higher,
        # individually matched operating point
        'individually_matched_operating_points': individually_matched_operating_points,
    }
    results.update(operating_points)
    # calculate standard deviation of each metric (op_closest_xy_y, etc.) between individual runs
    individually_matched_operating_points_std = {
        f"{key}_std": np.std([
            operating_point_info[key]
            for operating_point_info in individually_matched_operating_points
        ])
        for key in individually_matched_operating_points[0].keys()
    }
    results.update(individually_matched_operating_points_std)
    return results
 # Calculate (partial) Area Under Curve (pAUC) using (x,y) coordinates from the given curve
 def calculate_pAUC_from_graph(x, y, pAUC_start: float = 0.0, pAUC_end: float = 1.0, full: bool = False):
    """
    Input:
    For a single curve:
    - x: x values of a curve (e.g., the False Positive Rate points). [x1, x2, .., xn]
    - y: y values of a curve (e.g., the True Positive Rate points). [y1, y2, .., yn]
    For multiple curves:
    - list of x curves, for example the x values observed across multiple runs. [[x1m1, x2m1, .., xnm1], [x1m2, x2m2, ...., xnm2], ..]
    - list of y curves, for example the y values observed across multiple runs. [[y1m1, y2m1, .., ynm1], [y1m2, y2m2, ...., ynm2], ..]
    - pAUC_start: lower bound of x (e.g., FPR) to compute pAUC
    - pAUC_end: higher bound of x (e.g., FPR) to compute pAUC
    Returns:
    - if (full==False): List of pAUC values for each set of ([x1, ..], [y1, ..]) coordinates
    - if (full==True): Metrics as returned by `calculate_statistics` [see there]
    Note: function is not specific to the FROC curve
    """
    if not isinstance(x[0], (list, np.ndarray)) or not isinstance(y[0], (list, np.ndarray)):
        # Have a single set of (x,y) coordinates
        assert not isinstance(x[0], (list, np.ndarray)) and not isinstance(y[0], (list, np.ndarray)), \
            "Either provide multiple sequences of (x,y) coordinates, or a single sequence. Obtained a mix of both now. "
        # Pack coordinates in format expected by `calculate_statistics`
        coordinates_joined = [(y, x)]
    else:
        # Have multiple sets of (x,y) coordinates
        # Pack coordinates in format expected by `calculate_statistics`
        coordinates_joined = list(zip(y, x))
    # Calculate AUC in Given Ranges
    results = calculate_statistics(metrics=coordinates_joined, x_start=pAUC_start, x_end=pAUC_end)
    if full:
        return results
    return results['auroc_all']
--- a/src/sfransen/FROC/blob_preprocess.py
+++ b/src/sfransen/FROC/blob_preprocess.py
@ -0,0 +1,148 @@
 import numpy as np
 from scipy import ndimage
 from typing import List, Tuple, Optional, Union
 """
 Extract lesion candidates from a softmax prediction
 Authors: anindox8, matinhz, joeranbosma
 """
 # Preprocess Softmax Volume (Clipping, Max Confidence)
 def preprocess_softmax_static(softmax: np.ndarray,
                              threshold: float = 0.10,
                              min_voxels_detection: int = 10,
                              max_prob_round_decimals: Optional[int] = 4) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
    # Load and Preprocess Softmax Image
    all_hard_blobs = np.zeros_like(softmax)
    confidences = []
    clipped_softmax = softmax.copy()
    clipped_softmax[softmax < threshold] = 0
    blobs_index, num_blobs = ndimage.label(clipped_softmax, np.ones((3, 3, 3)))
    if num_blobs > 0:  # For Each Prediction
        for tumor in range(1, num_blobs+1):
            # determine mask for current lesion
            hard_mask = np.zeros_like(blobs_index)
            hard_mask[blobs_index == tumor] = 1
            if np.count_nonzero(hard_mask) <= min_voxels_detection:
                # remove tiny detection of <= 0.009 cm^3
                blobs_index[hard_mask.astype(bool)] = 0
                continue
            # add sufficiently sized detection
            hard_blob = hard_mask * clipped_softmax
            max_prob = np.max(hard_blob)
            if max_prob_round_decimals is not None:
                max_prob = np.round(max_prob, max_prob_round_decimals)
            hard_blob[hard_blob > 0] = max_prob
            all_hard_blobs += hard_blob
            confidences.append((tumor, max_prob))
    return all_hard_blobs, confidences, blobs_index
 def preprocess_softmax_dynamic(softmax: np.ndarray,
                               min_voxels_detection: int = 10,
                               num_lesions_to_extract: int = 5,
                               dynamic_threshold_factor: float = 2.5,
                               max_prob_round_decimals: Optional[int] = None,
                               remove_adjacent_lesion_candidates: bool = True,
                               max_prob_failsafe_stopping_threshold: float = 0.01) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
    """
    Generate detection proposals using a dynamic threshold to determine the location and size of lesions.
    Author: Joeran Bosma
    """
    working_softmax = softmax.copy()
    dynamic_hard_blobs = np.zeros_like(softmax)
    confidences: List[Tuple[int, float]] = []
    dynamic_indexed_blobs = np.zeros_like(softmax, dtype=int)
    while len(confidences) < num_lesions_to_extract:
        tumor_index = 1 + len(confidences)
        # determine max. softmax
        max_prob = np.max(working_softmax)
        if max_prob < max_prob_failsafe_stopping_threshold:
            break
        # set dynamic threshold to half the max
        threshold = max_prob / dynamic_threshold_factor
        # extract blobs for dynamix threshold
        all_hard_blobs, _, _ = preprocess_softmax_static(working_softmax, threshold=threshold,
                                                         min_voxels_detection=min_voxels_detection,
                                                         max_prob_round_decimals=max_prob_round_decimals)
        # select blob with max. confidence
        # note: the max_prob is re-computed in the (unlikely) case that the max. prob
        # was inside a 'lesion candidate' of less than min_voxels_detection, which is
        # thus removed in preprocess_softmax_static.
        max_prob = np.max(all_hard_blobs)
        mask_current_lesion = (all_hard_blobs == max_prob)
        # ensure that mask is only a single lesion candidate (this assumption fails when multiple lesions have the same max. prob)
        mask_current_lesion_indexed, _ = ndimage.label(mask_current_lesion, np.ones((3, 3, 3)))
        mask_current_lesion = (mask_current_lesion_indexed == 1)
        # create mask with its confidence
        hard_blob = (all_hard_blobs * mask_current_lesion)
        # Detect whether the extractted mask is a ring/hollow sphere
        # around an existing lesion candidate. For confident lesions,
        # the surroundings of the prediction are still quite confident,
        # and can become a second 'detection'. For an # example, please
        # see extracted lesion candidates nr. 4 and 5 at:
        # https://repos.diagnijmegen.nl/trac/ticket/9299#comment:49
        # Detection method: grow currently extracted lesions by one voxel,
        # and check if they overlap with the current extracted lesion.
        extracted_lesions_grown = ndimage.morphology.binary_dilation(dynamic_hard_blobs > 0)
        current_lesion_has_overlap = (mask_current_lesion & extracted_lesions_grown).any()
        # Check if lesion candidate should be retained
        if (not remove_adjacent_lesion_candidates) or (not current_lesion_has_overlap):
            # store extracted lesion
            dynamic_hard_blobs += hard_blob
            confidences += [(tumor_index, max_prob)]
            dynamic_indexed_blobs += (mask_current_lesion * tumor_index)
        # remove extracted lesion from working-softmax
        working_softmax = (working_softmax * (~mask_current_lesion))
    return dynamic_hard_blobs, confidences, dynamic_indexed_blobs
 def preprocess_softmax(softmax: np.ndarray,
                       threshold: Union[str, float] = 0.10,
                       min_voxels_detection: int = 10,
                       num_lesions_to_extract: int = 5,
                       dynamic_threshold_factor: float = 2.5,
                       max_prob_round_decimals: Optional[int] = None,
                       remove_adjacent_lesion_candidates: bool = True) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
    """
    Generate detection proposals using a dynamic or static threshold to determine the size of lesions.
    """
    if threshold == 'dynamic':
        all_hard_blobs, confidences, indexed_pred = preprocess_softmax_dynamic(softmax, min_voxels_detection=min_voxels_detection,
                                                                               dynamic_threshold_factor=dynamic_threshold_factor,
                                                                               num_lesions_to_extract=num_lesions_to_extract,
                                                                               remove_adjacent_lesion_candidates=remove_adjacent_lesion_candidates,
                                                                               max_prob_round_decimals=max_prob_round_decimals)
    elif threshold == 'dynamic-fast':
        # determine max. softmax and set a per-case 'static' threshold based on that
        max_prob = np.max(softmax)
        threshold = float(max_prob / dynamic_threshold_factor)
        all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
                                                                              min_voxels_detection=min_voxels_detection,
                                                                              max_prob_round_decimals=max_prob_round_decimals)
    else:
        threshold = float(threshold)  # convert threshold to float, if it wasn't already
        all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
                                                                              min_voxels_detection=min_voxels_detection,
                                                                              max_prob_round_decimals=max_prob_round_decimals)
    return all_hard_blobs, confidences, indexed_pred
--- a/src/sfransen/FROC/cal_froc_from_np.py
+++ b/src/sfransen/FROC/cal_froc_from_np.py
@ -0,0 +1,642 @@
 import argparse
 import multiprocessing
 import numpy as np
 import SimpleITK as sitk
 import time
 import os
 import itertools
 import concurrent.futures
 from typing import List
 import sys
 sys.path.append('./../code')
 from utils_quintin import * 
 # from fastMRI_PCa.visualization.visualize import write_array2nifti_t2
 from tensorflow.keras.models import load_model
 from functools import partial
 from focal_loss import BinaryFocalLoss
 from scipy import ndimage
 from pathlib import Path
 from tqdm import tqdm
 from concurrent.futures import ThreadPoolExecutor
 from sklearn.metrics import roc_curve, auc
 # from fastMRI_PCa.visualization import save_slice_3d
 # from fastMRI_PCa.utils import read_yaml_to_dict, list_from_file, print_stats_np
 # from fastMRI_PCa.utils import print_p, get_rand_exp_decay_mask
 # from fastMRI_PCa.data import resample, center_crop, normalize, undersample, binarize_s
 # from fastMRI_PCa.models import ssim_loss, ssim_metric, psnr_metric
 from typing import List, Tuple, Dict, Any, Union, Optional, Callable, Iterable, Hashable, Sized
 try:
    import numpy.typing as npt
 except ImportError:
    pass
 ###############################  PREPROCESSING  BLOBS  #########################
 """
 Extract lesion candidates from a softmax prediction
 Authors: anindox8, matinhz, joeranbosma
 """
 # Preprocess Softmax Volume (Clipping, Max Confidence)
 def preprocess_softmax_static(softmax: np.ndarray,
                              threshold: float = 0.10,
                              min_voxels_detection: int = 10,
                              max_prob_round_decimals: Optional[int] = 4) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
    # Load and Preprocess Softmax Image
    all_hard_blobs = np.zeros_like(softmax)
    confidences = []
    clipped_softmax = softmax.copy()
    clipped_softmax[softmax < threshold] = 0
    blobs_index, num_blobs = ndimage.label(clipped_softmax, np.ones((3, 3, 3)))
    if num_blobs > 0:  # For Each Prediction
        for tumor in range(1, num_blobs+1):
            # determine mask for current lesion
            hard_mask = np.zeros_like(blobs_index)
            hard_mask[blobs_index == tumor] = 1
            if np.count_nonzero(hard_mask) <= min_voxels_detection:
                # remove tiny detection of <= 0.009 cm^3
                blobs_index[hard_mask.astype(bool)] = 0
                continue
            # add sufficiently sized detection
            hard_blob = hard_mask * clipped_softmax
            max_prob = np.max(hard_blob)
            if max_prob_round_decimals is not None:
                max_prob = np.round(max_prob, max_prob_round_decimals)
            hard_blob[hard_blob > 0] = max_prob
            all_hard_blobs += hard_blob
            confidences.append((tumor, max_prob))
    return all_hard_blobs, confidences, blobs_index
 def preprocess_softmax_dynamic(softmax: np.ndarray,
                               min_voxels_detection: int = 10,
                               num_lesions_to_extract: int = 5,
                               dynamic_threshold_factor: float = 2.5,
                               max_prob_round_decimals: Optional[int] = None,
                               remove_adjacent_lesion_candidates: bool = True,
                               max_prob_failsafe_stopping_threshold: float = 0.01) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
    """
    Generate detection proposals using a dynamic threshold to determine the location and size of lesions.
    Author: Joeran Bosma
    """
    working_softmax = softmax.copy()
    dynamic_hard_blobs = np.zeros_like(softmax)
    confidences: List[Tuple[int, float]] = []
    dynamic_indexed_blobs = np.zeros_like(softmax, dtype=int)
    while len(confidences) < num_lesions_to_extract:
        tumor_index = 1 + len(confidences)
        # determine max. softmax
        max_prob = np.max(working_softmax)
        if max_prob < max_prob_failsafe_stopping_threshold:
            break
        # set dynamic threshold to half the max
        threshold = max_prob / dynamic_threshold_factor
        # extract blobs for dynamix threshold
        all_hard_blobs, _, _ = preprocess_softmax_static(working_softmax, threshold=threshold,
                                                         min_voxels_detection=min_voxels_detection,
                                                         max_prob_round_decimals=max_prob_round_decimals)
        # select blob with max. confidence
        # note: the max_prob is re-computed in the (unlikely) case that the max. prob
        # was inside a 'lesion candidate' of less than min_voxels_detection, which is
        # thus removed in preprocess_softmax_static.
        max_prob = np.max(all_hard_blobs)
        mask_current_lesion = (all_hard_blobs == max_prob)
        # ensure that mask is only a single lesion candidate (this assumption fails when multiple lesions have the same max. prob)
        mask_current_lesion_indexed, _ = ndimage.label(mask_current_lesion, np.ones((3, 3, 3)))
        mask_current_lesion = (mask_current_lesion_indexed == 1)
        # create mask with its confidence
        hard_blob = (all_hard_blobs * mask_current_lesion)
        # Detect whether the extractted mask is a ring/hollow sphere
        # around an existing lesion candidate. For confident lesions,
        # the surroundings of the prediction are still quite confident,
        # and can become a second 'detection'. For an # example, please
        # see extracted lesion candidates nr. 4 and 5 at:
        # https://repos.diagnijmegen.nl/trac/ticket/9299#comment:49
        # Detection method: grow currently extracted lesions by one voxel,
        # and check if they overlap with the current extracted lesion.
        extracted_lesions_grown = ndimage.morphology.binary_dilation(dynamic_hard_blobs > 0)
        current_lesion_has_overlap = (mask_current_lesion & extracted_lesions_grown).any()
        # Check if lesion candidate should be retained
        if (not remove_adjacent_lesion_candidates) or (not current_lesion_has_overlap):
            # store extracted lesion
            dynamic_hard_blobs += hard_blob
            confidences += [(tumor_index, max_prob)]
            dynamic_indexed_blobs += (mask_current_lesion * tumor_index)
        # remove extracted lesion from working-softmax
        working_softmax = (working_softmax * (~mask_current_lesion))
    return dynamic_hard_blobs, confidences, dynamic_indexed_blobs
 def preprocess_softmax(softmax: np.ndarray,
                       threshold: Union[str, float] = 0.10,
                       min_voxels_detection: int = 10,
                       num_lesions_to_extract: int = 5,
                       dynamic_threshold_factor: float = 2.5,
                       max_prob_round_decimals: Optional[int] = None,
                       remove_adjacent_lesion_candidates: bool = True) -> Tuple[np.ndarray, List[Tuple[int, float]], np.ndarray]:
    """
    Generate detection proposals using a dynamic or static threshold to determine the size of lesions.
    """
    if threshold == 'dynamic':
        all_hard_blobs, confidences, indexed_pred = preprocess_softmax_dynamic(softmax, min_voxels_detection=min_voxels_detection,
                                                                               dynamic_threshold_factor=dynamic_threshold_factor,
                                                                               num_lesions_to_extract=num_lesions_to_extract,
                                                                               remove_adjacent_lesion_candidates=remove_adjacent_lesion_candidates,
                                                                               max_prob_round_decimals=max_prob_round_decimals)
    elif threshold == 'dynamic-fast':
        # determine max. softmax and set a per-case 'static' threshold based on that
        max_prob = np.max(softmax)
        threshold = float(max_prob / dynamic_threshold_factor)
        all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
                                                                              min_voxels_detection=min_voxels_detection,
                                                                              max_prob_round_decimals=max_prob_round_decimals)
    else:
        threshold = float(threshold)  # convert threshold to float, if it wasn't already
        all_hard_blobs, confidences, indexed_pred = preprocess_softmax_static(softmax, threshold=threshold,
                                                                              min_voxels_detection=min_voxels_detection,
                                                                              max_prob_round_decimals=max_prob_round_decimals)
    return all_hard_blobs, confidences, indexed_pred
 ################################################################################
 # Evaluate all cases
 def evaluate(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    min_overlap=0.10,
    overlap_func: str = 'DSC',
    case_confidence: str = 'max',
    multiple_lesion_candidates_selection_criteria='overlap',
    allow_unmatched_candidates_with_minimal_overlap=True,
    flat: Optional[bool] = None
    ) -> Dict[str, Any]:
    # Make list out of numpy array so that it can be mapped in parallel with multiple CPUs.
    y_true_list = y_true #[y_true[mri_idx] for mri_idx in range(y_true.shape[0])]
    y_pred_list = y_pred #[y_pred[mri_idx] for mri_idx in range(y_pred.shape[0])]
    roc_true, roc_pred = {}, {}
    y_list = []
    num_lesions = 0
    subject_idxs = list(range(len(y_true_list)))
    N_CPUS = 2 
    with ThreadPoolExecutor(max_workers=N_CPUS) as pool:
        # define the functions that need to be processed: compute_pred_vector, with each individual
        # detection_map prediction, ground truth label and parameters
        future_to_args = {
            pool.submit(evaluate_case,
                        y_true,
                        y_pred,
                        min_overlap=min_overlap,
                        overlap_func=overlap_func,
                        multiple_lesion_candidates_selection_criteria=multiple_lesion_candidates_selection_criteria,
                        allow_unmatched_candidates_with_minimal_overlap=allow_unmatched_candidates_with_minimal_overlap): idx
            for (y_true, y_pred, idx) in zip(y_true_list, y_pred_list, subject_idxs)
        }
        # process the cases in parallel
        iterator = concurrent.futures.as_completed(future_to_args)
        iterator = tqdm(iterator, desc='Computing FROC', total=len(y_true_list))
        for future in iterator:
            try:
                res = future.result()
            except Exception as e:
                print(f"Exception: {e}")
            else:
                # unpack results
                y_list_pat, num_lesions_gt = res
                # note: y_list_pat contains: is_lesion, confidence[, Dice, gt num voxels]
                # aggregate results
                idx = future_to_args[future]
                roc_true[idx] = np.max([a[0] for a in y_list_pat])
                if case_confidence == 'max':
                    # take highest lesion confidence as case-level confidence
                    roc_pred[idx] = np.max([a[1] for a in y_list_pat])
                elif case_confidence == 'bayesian':
                    # if a_i is the probability the i-th lesion is csPCa, then the case-level
                    # probability to have any csPCa lesion is 1 - Π_i{ 1 - a_i}
                    roc_pred[idx] = 1 - np.prod([(1-a[1]) for a in y_list_pat])
                else:
                    raise ValueError(f"Patient confidence calculation method not recognised. Got: {case_confidence}.")
                # accumulate outputs
                y_list += y_list_pat
                num_lesions += num_lesions_gt
    # calculate statistics
    num_patients = len(roc_true)
    # get lesion-level results
    sensitivity, FP_per_case, thresholds, num_lesions = froc_from_lesion_evaluations(
        y_list=y_list, num_patients=num_patients
    )
    # calculate recall, precision and average precision
    AP, precision, recall, _ = ap_from_lesion_evaluations(y_list, thresholds=thresholds)
    # calculate case-level AUROC
    fpr, tpr, _ = roc_curve(y_true=[roc_true[s] for s in subject_idxs],
                            y_score=[roc_pred[s] for s in subject_idxs],
                            pos_label=1)
    auc_score = auc(fpr, tpr)
    flat = True
    if flat:
        # flatten roc_true and roc_pred
        roc_true_flat = [roc_true[s] for s in subject_idxs]
        roc_pred_flat = [roc_pred[s] for s in subject_idxs]
    metrics = {
        "FP_per_case": convert_np_to_list(FP_per_case),
        "sensitivity": convert_np_to_list(sensitivity),
        "thresholds": convert_np_to_list(thresholds),
        "num_lesions": int(num_lesions),
        "num_patients": int(num_patients),
        "roc_true": convert_np_to_list(roc_true_flat if flat else roc_true),
        "roc_pred": convert_np_to_list(roc_pred_flat if flat else roc_pred),
        "AP": float(AP),
        "precision": convert_np_to_list(precision),
        "recall": convert_np_to_list(recall),
        # patient-level predictions
        'auroc': float(auc_score),
        'tpr': convert_np_to_list(tpr),
        'fpr': convert_np_to_list(fpr),
    }
    return metrics
 def convert_np_to_list(flat_numpy_arr):
    ans = []
    for elem in flat_numpy_arr:
        ans.append(float(elem))
    return ans
 # Calculate FROC metrics (FP rate, sensitivity)
 def froc_from_lesion_evaluations(y_list, num_patients, thresholds=None):
    # calculate counts
    TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
        y_list=y_list, thresholds=thresholds
    )
    # calculate FROC metrics from counts
    sensitivity = TP / num_lesions if num_lesions > 0 else np.nan
    FP_per_case = FP / num_patients
    return sensitivity, FP_per_case, thresholds, num_lesions
 def ap_from_lesion_evaluations(y_list, thresholds=None):
    # calculate counts
    TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
        y_list=y_list, thresholds=thresholds
    )
    # calculate precision (lesion-level)
    precision = TP / (TP + FP)
    precision = np.append(precision, [0])
    # calculate recall (lesion-level)
    FN = num_lesions - TP
    recall = TP / (TP + FN)
    recall = np.append(recall, recall[-1:])
    # calculate average precission (lesion-level)
    AP = np.trapz(y=precision, x=recall)
    return AP, precision, recall, thresholds
 # Calculate macro metrics (true positives (TP), false positives (FP))
 def counts_from_lesion_evaluations(
    y_list: List[Tuple[int, float, float]],
    thresholds: "Optional[npt.NDArray[np.float64]]" = None
 ) -> "Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32], npt.NDArray[np.float64], int]":
    """
    Calculate true positives (TP) and false positives (FP) as function of threshold,
    based on the case evaluations from `evaluate_case`.
    """
    # sort predictions
    y_list.sort()
    # collect targets and predictions
    y_true: "npt.NDArray[np.float64]" = np.array([target for target, *_ in y_list])
    y_pred: "npt.NDArray[np.float64]" = np.array([pred for _, pred, *_ in y_list])
    # calculate number of lesions
    num_lesions = y_true.sum()
    if thresholds is None:
        # collect thresholds for FROC analysis
        thresholds = np.unique(y_pred)
        thresholds[::-1].sort()  # sort thresholds in descending order (inplace)
        # for >10,000 thresholds: resample to 10,000 unique thresholds, while also
        # keeping all thresholds higher than 0.8 and the first 20 thresholds
        if len(thresholds) > 10_000:
            rng = np.arange(1, len(thresholds), len(thresholds)/10_000, dtype=np.int32)
            st = [thresholds[i] for i in rng]
            low_thresholds = thresholds[-20:]
            thresholds = np.array([t for t in thresholds if t > 0.8 or t in st or t in low_thresholds])
    # define placeholders
    FP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
    TP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
    # for each threshold: count FPs and calculate the sensitivity
    for i, th in enumerate(thresholds):
        if th > 0:
            y_pred_thresholded = (y_pred >= th).astype(int)
            tp = np.sum(y_true*y_pred_thresholded)
            fp = np.sum(y_pred_thresholded - y_true*y_pred_thresholded)
            # update FROC with new point
            FP[i] = fp
            TP[i] = tp
        else:
            # extend FROC curve to infinity
            TP[i] = TP[-2]
            FP[i] = np.inf
    return TP, FP, thresholds, num_lesions
 # Compute base prediction metrics TP/FP/FN with associated model confidences
 def evaluate_case(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    min_overlap: float = 0.10,
    overlap_func: str = 'DSC',
    multiple_lesion_candidates_selection_criteria: str = 'overlap',
    allow_unmatched_candidates_with_minimal_overlap: bool = True
    ) -> Tuple[List[Tuple[int, float, float]], int]:
    """
    Gather the list of lesion candidates, and classify in TP/FP/FN.
    - multiple_lesion_candidates_selection_criteria: when multiple lesion candidates have overlap with the same
                                                     ground truth mask, use 'overlap' or 'confidence' to choose
                                                     which lesion is matched against the ground truth mask.
    Returns:
    - a list of tuples with:
        (is_lesion, prediction confidence, Dice similarity coefficient, number of voxels in label)
    - number of ground truth lesions
    """
    y_list: List[Tuple[int, float, float]] = []
    if overlap_func == 'IoU':
        overlap_func = calculate_iou
    elif overlap_func == 'DSC':
        overlap_func = calculate_dsc
    else:
        raise ValueError(f"Overlap function with name {overlap_func} not recognized. Supported are 'IoU' and 'DSC'")
    # convert dtype to float32
    y_true = y_true.astype('int32')
    y_pred = y_pred.astype('float32')
    if y_pred.shape[0] < y_true.shape[0]:
        print("Warning: padding prediction to match label!")
        y_pred = resize_image_with_crop_or_pad(y_pred, y_true.shape)
    confidences, indexed_pred = parse_detection_map(y_pred)
    lesion_candidates_best_overlap: Dict[str, float] = {}
    if y_true.any():
        # for each malignant scan
        labeled_gt, num_gt_lesions = ndimage.label(y_true,  np.ones((3, 3, 3)))
        for lesiong_id in range(1, num_gt_lesions+1):
            # for each lesion in ground-truth (GT) label
            gt_lesion_mask = (labeled_gt == lesiong_id)
            # collect indices of lesion candidates that have any overlap with the current GT lesion
            overlapping_lesion_candidate_indices = set(np.unique(indexed_pred[gt_lesion_mask]))
            overlapping_lesion_candidate_indices -= {0}  # remove index 0, if present
            # collect lesion candidates for current GT lesion
            lesion_candidates_for_target_gt: List[Dict[str, Union[int, float]]] = []
            for lesion_candidate_id, lesion_confidence in confidences:
                if lesion_candidate_id in overlapping_lesion_candidate_indices:
                    # calculate overlap between lesion candidate and GT mask
                    lesion_pred_mask = (indexed_pred == lesion_candidate_id)
                    overlap_score = overlap_func(lesion_pred_mask, gt_lesion_mask)
                    # keep track of the highest overlap a lesion candidate has with any GT lesion
                    lesion_candidates_best_overlap[lesion_candidate_id] = max(
                        overlap_score, lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
                    )
                    # store lesion candidate info for current GT mask
                    lesion_candidates_for_target_gt.append({
                        'id': lesion_candidate_id,
                        'confidence': lesion_confidence,
                        'overlap': overlap_score,
                    })
            print(lesion_candidates_for_target_gt)
            if len(lesion_candidates_for_target_gt) == 0:
                # no lesion candidate matched with GT mask. Add FN.
                y_list.append((1, 0., 0.))
            elif len(lesion_candidates_for_target_gt) == 1:
                # single lesion candidate overlapped with GT mask. Add TP if overlap is sufficient, or FN otherwise.
                candidate_info = lesion_candidates_for_target_gt[0]
                lesion_pred_mask = (indexed_pred == candidate_info['id'])
                if candidate_info['overlap'] > min_overlap:
                    # overlap between lesion candidate and GT mask is sufficient, add TP
                    indexed_pred[lesion_pred_mask] = 0  # remove lesion candidate after assignment
                    y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
                else:
                    # overlap between lesion candidate and GT mask is insufficient, add FN
                    y_list.append((1, 0., 0.))
            else:
                # multiple predictions for current GT lesion
                # sort lesion candidates based on overlap or confidence
                key = multiple_lesion_candidates_selection_criteria
                lesion_candidates_for_target_gt = sorted(lesion_candidates_for_target_gt, key=lambda x: x[key], reverse=True)
                gt_lesion_matched = False
                for candidate_info in lesion_candidates_for_target_gt:
                    lesion_pred_mask = (indexed_pred == candidate_info['id'])
                    if candidate_info['overlap'] > min_overlap:
                        indexed_pred[lesion_pred_mask] = 0
                        y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
                        gt_lesion_matched = True
                        break
                if not gt_lesion_matched:
                    # ground truth lesion not matched to a lesion candidate. Add FN.
                    y_list.append((1, 0., 0.))
        # Remaining lesions are FPs
        remaining_lesions = set(np.unique(indexed_pred))
        remaining_lesions -= {0}  # remove index 0, if present
        for lesion_candidate_id, lesion_confidence in confidences:
            if lesion_candidate_id in remaining_lesions:
                overlap_score = lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
                if allow_unmatched_candidates_with_minimal_overlap and overlap_score > min_overlap:
                    # The lesion candidate was not matched to a GT lesion, but did have overlap > min_overlap
                    # with a GT lesion. The GT lesion is, however, matched to another lesion candidate.
                    # In this operation mode, this lesion candidate is not considered as a false positive.
                    pass
                else:
                    y_list.append((0, lesion_confidence, 0.))  # add FP
    else:
        # for benign case, all predictions are FPs
        num_gt_lesions = 0
        if len(confidences) > 0:
            for _, lesion_confidence in confidences:
                y_list.append((0, lesion_confidence, 0.))
        else:
            y_list.append((0, 0., 0.))  # avoid empty list
    return y_list, num_gt_lesions
 # Calculate Intersection over Union (IoU) for N-D Arrays
 def calculate_iou(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
    epsilon = 1e-8
    iou_num = np.sum(predictions[labels == 1])
    iou_denom = np.sum(predictions) + np.sum(labels) - iou_num
    return float((iou_num + epsilon) / (iou_denom + epsilon))
 # Calculate Dice Similarity Coefficient (DSC) for N-D Arrays
 def calculate_dsc(predictions: "npt.NDArray[np.float32]", labels: "npt.NDArray[np.int32]") -> float:
    epsilon = 1e-8
    dsc_num = np.sum(predictions[labels == 1]) * 2.0
    dsc_denom = np.sum(predictions) + np.sum(labels)
    return float((dsc_num + epsilon) / (dsc_denom + epsilon))
 # Resize images (scans/predictions/labels) by cropping and/or padding [Ref: https://github.com/DLTK/DLTK]
 def resize_image_with_crop_or_pad(image, img_size=(64, 64, 64), **kwargs):
    assert isinstance(image, np.ndarray)
    assert (image.ndim - 1 == len(img_size) or image.ndim == len(img_size)), \
        "Target size doesn't fit image size"
    rank = len(img_size)  # image dimensions
    # placeholders for new shape
    from_indices = [[0, image.shape[dim]] for dim in range(rank)]
    to_padding = [[0, 0] for _ in range(rank)]
    slicer = [slice(None)] * rank
    # for each dimension, determine process (cropping or padding)
    for i in range(rank):
        if image.shape[i] < img_size[i]:
            to_padding[i][0] = (img_size[i] - image.shape[i]) // 2
            to_padding[i][1] = img_size[i] - image.shape[i] - to_padding[i][0]
        else:
            from_indices[i][0] = int(np.floor((image.shape[i] - img_size[i]) / 2.))
            from_indices[i][1] = from_indices[i][0] + img_size[i]
        # create slicer object to crop/leave each dimension
        slicer[i] = slice(from_indices[i][0], from_indices[i][1])
    # pad cropped image to extend missing dimension
    return np.pad(image[tuple(slicer)], to_padding, **kwargs)
 # Parse Detection Maps to Individual Lesions + Likelihoods
 def parse_detection_map(detection_map):
    # Label All Non-Connected Components in Detection Map
    blobs_index, num_blobs = ndimage.label(detection_map, np.ones((3, 3, 3)))
    confidences = []
    if num_blobs > 0:
        # For Each Lesion Detection
        for tumor_index in range(1, num_blobs+1):
            # Extract Mask of Current Lesion
            # hard_blob = np.zeros_like(blobs_index)
            # hard_blob[blobs_index == tumor] = 1
            # TODO: replace above with the following? Is faster I think.
            # hard_blob = (blobs_index == tumor).astype(int)
            # Extract Max Predicted Likelihood for Lesion Detection
            # max_prob = np.max(hard_blob)  # <- this is always 1
            # hard_blob[hard_blob > 0] = max_prob  # <- this line does nothing, as hard_blob is not used
            # Store Predicted Likelihood per Lesion Detection
            max_prob = detection_map[blobs_index == tumor_index].max()
            confidences.append((tumor_index, max_prob))
    return confidences, blobs_index
 ###############################  CONSTANTS  ####################################
 # Quitin code removed
 ###############################  CONSTANTS  ####################################
 def move_dims(arr):
    # UMCG numpy dimensions convention: dims = (batch, width, heigth, depth)
    # Joeran numpy dimensions convention: dims = (batch, depth, heigth, width)
    arr = np.moveaxis(arr, 3, 1) # Joeran has his numpy arrays ordered differently.
    arr = np.moveaxis(arr, 3, 2)
    return arr
 ###############################  CONSTANTS  ####################################
 # # Reconstruction model directory
 # RECON_DIR = "11_new_recon_multiprocess"
 # T2_MODEL_PATH = f"models/prelims/{RECON_DIR}/best-direct-fold0.h5"
 # # Path to file with train, validation and test indexes.
 # IDX_YAML_PATH = r"data/path_lists/pirads_4plus/train_val_test_idxs.yml" 
 # # Diagnostic models
 # diag_models = [r"models/14_long_diag/best-direct-fold0.h5",
 #                r"models/15_long_diag_early_stopping/best-direct-fold0.h5"]
 # # x_true_val = np.load("temp/x_true_val.npy")
 # print("loading numpy arrays")
 # y_true_val = np.squeeze(np.load("temp/y_true_val_label.npy"))
 # y_pred_val = np.squeeze(np.load("temp/y_pred_val_detection_map.npy"))
 # y_true_val = move_dims(y_true_val)
 # y_pred_val = move_dims(y_pred_val)
 # # Preprocess the blobs individually
 # for mri_idx in range(y_pred_val.shape[0]):
 #     print(f"preprocessing mri idx {mri_idx}")
 #     y_pred_val[mri_idx] = preprocess_softmax(y_pred_val[mri_idx], threshold="dynamic")[0]
 # print(f"Start FROC evaluation.")
 # metrics = evaluate(y_true=y_true_val, y_pred=y_pred_val)
 # dump_dict_to_yaml(metrics, 'temp', "froc_metrics", verbose=True)
--- a/src/sfransen/FROC/data_utils.py
+++ b/src/sfransen/FROC/data_utils.py
@ -0,0 +1,39 @@
 #  Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 import os
 import numpy as np
 import json
 def save_metrics(metrics, file_path=None):
    # convert dtypes to stock Python
    save_metrics = sterilize(metrics)
    # save metrics using safe file write
    file_path_tmp = file_path + '.tmp'
    with open(file_path_tmp, 'w') as fp:
        json.dump(save_metrics, fp, indent=4)
    os.rename(file_path_tmp, file_path)
 def sterilize(obj):
    if isinstance(obj, dict):
        return {k: sterilize(v) for k, v in obj.items()}
    elif isinstance(obj, (list, tuple, np.ndarray)):
        return [sterilize(v) for v in obj]
    elif isinstance(obj, (str, int, bool, float)):
        return obj
    else:
        return obj.__repr__()
--- a/src/sfransen/FROC/froc.py
+++ b/src/sfransen/FROC/froc.py
@ -0,0 +1,505 @@
 #  Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 from __future__ import division
 from __future__ import print_function
 import os
 import numpy as np
 from tqdm import tqdm
 from scipy import ndimage
 from sklearn.metrics import roc_curve, auc
 import concurrent.futures
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
 import itertools
 from typing import List, Tuple, Dict, Any, Union, Optional, Callable, Iterable, Hashable, Sized
 try:
    import numpy.typing as npt
 except ImportError:
    pass
 from image_utils import (
    resize_image_with_crop_or_pad, read_label, read_prediction
 )
 from analysis_utils import (
    parse_detection_map, calculate_iou, calculate_dsc
 )
 # Compute base prediction metrics TP/FP/FN with associated model confidences
 def evaluate_case(
    detection_map: "Union[npt.NDArray[np.float32], str]",
    label: "Union[npt.NDArray[np.int32], str]",
    min_overlap: float = 0.10,
    overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU',
    multiple_lesion_candidates_selection_criteria: str = 'overlap',
    allow_unmatched_candidates_with_minimal_overlap: bool = True
 ) -> Tuple[List[Tuple[int, float, float]], int]:
    """
    Gather the list of lesion candidates, and classify in TP/FP/FN.
    - multiple_lesion_candidates_selection_criteria: when multiple lesion candidates have overlap with the same
                                                     ground truth mask, use 'overlap' or 'confidence' to choose
                                                     which lesion is matched against the ground truth mask.
    Returns:
    - a list of tuples with:
        (is_lesion, prediction confidence, Dice similarity coefficient, number of voxels in label)
    - number of ground truth lesions
    """
    y_list: List[Tuple[int, float, float]] = []
    if isinstance(label, str):
        label = read_label(label)
    if isinstance(detection_map, str):
        detection_map = read_prediction(detection_map)
    if overlap_func == 'IoU':
        overlap_func = calculate_iou
    elif overlap_func == 'DSC':
        overlap_func = calculate_dsc
    else:
        raise ValueError(f"Overlap function with name {overlap_func} not recognized. Supported are 'IoU' and 'DSC'")
    # convert dtype to float32
    label = label.astype('int32')
    detection_map = detection_map.astype('float32')
    if detection_map.shape[0] < label.shape[0]:
        print("Warning: padding prediction to match label!")
        detection_map = resize_image_with_crop_or_pad(detection_map, label.shape)
    confidences, indexed_pred = parse_detection_map(detection_map)
    lesion_candidates_best_overlap: Dict[str, float] = {}
    # note to stefan: check wether the if statements are correct and that the append goes correct
    if label.any():
        # for each malignant scan
        labeled_gt, num_gt_lesions = ndimage.label(label,  np.ones((3, 3, 3)))
        # print("test3, werkt if label.any", num_gt_lesions) WE
        for lesiong_id in range(1, num_gt_lesions+1):
            # for each lesion in ground-truth (GT) label
            gt_lesion_mask = (labeled_gt == lesiong_id)
            # collect indices of lesion candidates that have any overlap with the current GT lesion
            overlapping_lesion_candidate_indices = set(np.unique(indexed_pred[gt_lesion_mask]))
            overlapping_lesion_candidate_indices -= {0}  # remove index 0, if present
            # collect lesion candidates for current GT lesion
            lesion_candidates_for_target_gt: List[Dict[str, Union[int, float]]] = []
            for lesion_candidate_id, lesion_confidence in confidences:
                if lesion_candidate_id in overlapping_lesion_candidate_indices:
                    # calculate overlap between lesion candidate and GT mask
                    lesion_pred_mask = (indexed_pred == lesion_candidate_id)
                    overlap_score = overlap_func(lesion_pred_mask, gt_lesion_mask)
                    # keep track of the highest overlap a lesion candidate has with any GT lesion
                    lesion_candidates_best_overlap[lesion_candidate_id] = max(
                        overlap_score, lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
                    )
                    # store lesion candidate info for current GT mask
                    lesion_candidates_for_target_gt.append({
                        'id': lesion_candidate_id,
                        'confidence': lesion_confidence,
                        'overlap': overlap_score,
                    })
                    print("test 4, lesion_candidates_for_target_gt:",lesion_candidates_for_target_gt)
                    # Min overlap wordt niet behaald: +- 0.001
            if len(lesion_candidates_for_target_gt) == 0:
                # no lesion candidate matched with GT mask. Add FN.
                y_list.append((1, 0., 0.))
            elif len(lesion_candidates_for_target_gt) == 1:
                # single lesion candidate overlapped with GT mask. Add TP if overlap is sufficient, or FN otherwise.
                candidate_info = lesion_candidates_for_target_gt[0]
                lesion_pred_mask = (indexed_pred == candidate_info['id'])
                if candidate_info['overlap'] > min_overlap:
                    # overlap between lesion candidate and GT mask is sufficient, add TP
                    indexed_pred[lesion_pred_mask] = 0  # remove lesion candidate after assignment
                    y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
                else:
                    # overlap between lesion candidate and GT mask is insufficient, add FN
                    y_list.append((1, 0., 0.))
            else:
                # multiple predictions for current GT lesion
                # sort lesion candidates based on overlap or confidence
                key = multiple_lesion_candidates_selection_criteria
                lesion_candidates_for_target_gt = sorted(lesion_candidates_for_target_gt, key=lambda x: x[key], reverse=True)
                gt_lesion_matched = False
                for candidate_info in lesion_candidates_for_target_gt:
                    lesion_pred_mask = (indexed_pred == candidate_info['id'])
                    if candidate_info['overlap'] > min_overlap:
                        indexed_pred[lesion_pred_mask] = 0
                        y_list.append((1, candidate_info['confidence'], candidate_info['overlap']))
                        gt_lesion_matched = True
                        break
                if not gt_lesion_matched:
                    # ground truth lesion not matched to a lesion candidate. Add FN.
                    y_list.append((1, 0., 0.))
        # Remaining lesions are FPs
        remaining_lesions = set(np.unique(indexed_pred))
        remaining_lesions -= {0}  # remove index 0, if present
        for lesion_candidate_id, lesion_confidence in confidences:
            if lesion_candidate_id in remaining_lesions:
                overlap_score = lesion_candidates_best_overlap.get(lesion_candidate_id, 0)
                if allow_unmatched_candidates_with_minimal_overlap and overlap_score > min_overlap:
                    # The lesion candidate was not matched to a GT lesion, but did have overlap > min_overlap
                    # with a GT lesion. The GT lesion is, however, matched to another lesion candidate.
                    # In this operation mode, this lesion candidate is not considered as a false positive.
                    pass
                else:
                    y_list.append((0, lesion_confidence, 0.))  # add FP
                    # print("test 4, gaat alles hiernaartoe?") == JA
        # print("test 3, hoe ziet y_list eruit na labels",y_list)
    else:
        # for benign case, all predictions are FPs
        num_gt_lesions = 0
        if len(confidences) > 0:
            for _, lesion_confidence in confidences:
                y_list.append((0, lesion_confidence, 0.))
        else:
            y_list.append((0, 0., 0.))  # avoid empty list
    return y_list, num_gt_lesions
 # Calculate macro metrics (true positives (TP), false positives (FP))
 def counts_from_lesion_evaluations(
    y_list: List[Tuple[int, float, float]],
    thresholds: "Optional[npt.NDArray[np.float64]]" = None
 ) -> "Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32], npt.NDArray[np.float64], int]":
    """
    Calculate true positives (TP) and false positives (FP) as function of threshold,
    based on the case evaluations from `evaluate_case`.
    """
    # sort predictions
    # print("test 4, zitten er predicties bij leasions voor sort?",y_list)
    y_list.sort()
    # print("test 4, zitten er predicties bij leasions na sort?",y_list,'len y_lsit:',len(y_list))
    # collect targets and predictions
    y_true: "npt.NDArray[np.float64]" = np.array([target for target, *_ in y_list])
    y_pred: "npt.NDArray[np.float64]" = np.array([pred for _, pred, *_ in y_list])
    # print("test,zijn de laatste y-pred hoog?", y_pred)
    # calculate number of lesions
    num_lesions = y_true.sum()
    if thresholds is None:
        # collect thresholds for FROC analysis
        thresholds = np.unique(y_pred)
        thresholds[::-1].sort()  # sort thresholds in descending order (inplace)
        # for >10,000 thresholds: resample to 10,000 unique thresholds, while also
        # keeping all thresholds higher than 0.8 and the first 20 thresholds
        if len(thresholds) > 10_000:
            rng = np.arange(1, len(thresholds), len(thresholds)/10_000, dtype=np.int32)
            st = [thresholds[i] for i in rng]
            low_thresholds = thresholds[-20:]
            thresholds = np.array([t for t in thresholds if t > 0.8 or t in st or t in low_thresholds])
    # define placeholders
    FP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
    TP: "npt.NDArray[np.float32]" = np.zeros_like(thresholds, dtype=np.float32)
    # for each threshold: count FPs and calculate the sensitivity
    for i, th in enumerate(thresholds):
        if th > 0:
            y_pred_thresholded = (y_pred >= th).astype(int)
            tp = np.sum(y_true*y_pred_thresholded)
            fp = np.sum(y_pred_thresholded - y_true*y_pred_thresholded)
            # print("test, is y_pred_thresholded altijd 0?",y_pred_thresholded)
            # update FROC with new point
            FP[i] = fp
            TP[i] = tp
        else:
            # extend FROC curve to infinity
            TP[i] = TP[-2]
            FP[i] = np.inf
    # print("test if tp werkt",TP)
    # print("test if fp werkt",FP)
    return TP, FP, thresholds, num_lesions
 # Calculate FROC metrics (FP rate, sensitivity)
 def froc_from_lesion_evaluations(y_list, num_patients, thresholds=None):
    # calculate counts
    TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
        y_list=y_list, thresholds=thresholds
    )
    # calculate FROC metrics from counts
    sensitivity = TP / num_lesions if num_lesions > 0 else np.nan
    # print('test,Hieronder staat de tp waarde:',TP)
    # print('test,Hieronder staat de num_lesions waarde:',num_lesions)
    FP_per_case = FP / num_patients
    return sensitivity, FP_per_case, thresholds, num_lesions
 def ap_from_lesion_evaluations(y_list, thresholds=None):
    # calculate counts
    TP, FP, thresholds, num_lesions = counts_from_lesion_evaluations(
        y_list=y_list, thresholds=thresholds
    )
    # calculate precision (lesion-level)
    precision = TP / (TP + FP)
    precision = np.append(precision, [0])
    # calculate recall (lesion-level)
    FN = num_lesions - TP
    recall = TP / (TP + FN)
    recall = np.append(recall, recall[-1:])
    # calculate average precission (lesion-level)
    AP = np.trapz(y=precision, x=recall)
    return AP, precision, recall, thresholds
 # Compute full FROC
 def froc(
    y_det: "Iterable[Union[npt.NDArray[np.float64], str, Path]]",
    y_true: "Iterable[Union[npt.NDArray[np.float64], str, Path]]",
    subject_list: Optional[Iterable[Hashable]] = None,
    min_overlap=0.10,
    overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU',
    case_confidence: str = 'max',
    multiple_lesion_candidates_selection_criteria='overlap',
    allow_unmatched_candidates_with_minimal_overlap=True,
    flat: Optional[bool] = None,
    num_parallel_calls: int = 8,
    verbose: int = 0,
 ) -> Dict[str, Any]:
    """
    FROC evaluation pipeline
    (written 19 January 2022 by Joeran Bosma)
    Usage:
    For normal usage of the FROC evaluation pipeline, use the function `froc` with parameters
    `y_det`, `y_true` and (optional) `subject_list`. Please note that this function is written
    for binary 3D FROC analysis.
    - `y_det`: iterable of all detection_map volumes to evaluate. Alternatively, y_det may contain
               filenames ending in .nii.gz/.mha/.mhd/.npy/.npz, which will be loaded on-the-fly.
               Provide an array of shape `(num_samples, D, H, W)`, where D, H, W are the spatial
               dimensions (depth, height and width).
    - `y_true`: iterable of all ground truth labels. Alternatively, y_det may contain filenames
                ending in .nii.gz/.mha/.mhd/.npy/.npz, which should contain binary labels and
                will be loaded on-the-fly. Provide an array of the same shape as `y_det`. Use
                `1` to encode ground truth lesion, and `0` to encode background.
    Additional settings:
    For more control over the FROC evaluation pipeline, use:
    - `min_overlap`: defines the minimal required Intersection over Union (IoU) or Dice similarity
                     coefficient (DSC) between a lesion candidate and ground truth lesion, to be
                     counted as a true positive detection.
    """
    # Initialize Lists
    roc_true = {}
    roc_pred = {}
    y_list = []
    num_lesions = 0
    if subject_list is None:
        # generate indices to keep track of each case during multiprocessing
        subject_list = itertools.count()
        if flat is None:
            flat = True
    with ThreadPoolExecutor(max_workers=num_parallel_calls) as pool:
        # define the functions that need to be processed: compute_pred_vector, with each individual
        # detection_map prediction, ground truth label and parameters
        future_to_args = {
            pool.submit(evaluate_case, y_pred, y_true, min_overlap=min_overlap, overlap_func=overlap_func,
                        multiple_lesion_candidates_selection_criteria=multiple_lesion_candidates_selection_criteria,
                        allow_unmatched_candidates_with_minimal_overlap=allow_unmatched_candidates_with_minimal_overlap): idx
            for (y_pred, y_true, idx) in zip(y_det, y_true, subject_list)
        }
        # process the cases in parallel
        iterator = concurrent.futures.as_completed(future_to_args)
        if verbose:
            total: Optional[int] = None
            if isinstance(subject_list, Sized):
                total = len(subject_list)
            iterator = tqdm(iterator, desc='Computing FROC', total=total)
        for future in iterator:
            try:
                res = future.result()
            except Exception as e:
                print(f"Exception: {e}")
            else:
                # unpack results
                y_list_pat, num_lesions_gt = res
                # note: y_list_pat contains: is_lesion, confidence[, Dice, gt num voxels]
                # print("test 3,", y_list_pat)
                # aggregate results
                idx = future_to_args[future]
                # print("test2, indx", idx)
                # test: allemaal ingelezen
                roc_true[idx] = np.max([a[0] for a in y_list_pat])
                # print("test2, roc_true",roc_true)
                if case_confidence == 'max':
                    # take highest lesion confidence as case-level confidence
                    roc_pred[idx] = np.max([a[1] for a in y_list_pat])
                elif case_confidence == 'bayesian':
                    # if a_i is the probability the i-th lesion is csPCa, then the case-level
                    # probability to have any csPCa lesion is 1 - Π_i{ 1 - a_i}
                    roc_pred[idx] = 1 - np.prod([(1-a[1]) for a in y_list_pat])
                else:
                    raise ValueError(f"Patient confidence calculation method not recognised. Got: {case_confidence}.")
                # accumulate outputs
                y_list += y_list_pat
                num_lesions += num_lesions_gt
    # print("test2,heeft y-list ook leasie pred:",y_list)
    # calculate statistics
    num_patients = len(roc_true)
    # get lesion-level results
    sensitivity, FP_per_case, thresholds, num_lesions = froc_from_lesion_evaluations(
        y_list=y_list, num_patients=num_patients
    )
    # calculate recall, precision and average precision
    AP, precision, recall, _ = ap_from_lesion_evaluations(y_list, thresholds=thresholds)
    # calculate case-level AUROC
    fpr, tpr, _ = roc_curve(y_true=[roc_true[s] for s in subject_list],
                            y_score=[roc_pred[s] for s in subject_list],
                            pos_label=1)
    auc_score = auc(fpr, tpr)
    if flat:
        # flatten roc_true and roc_pred
        roc_true_flat = [roc_true[s] for s in subject_list]
        roc_pred_flat = [roc_pred[s] for s in subject_list]
    metrics = {
        "FP_per_case": FP_per_case,
        "sensitivity": sensitivity,
        "thresholds": thresholds,
        "num_lesions": num_lesions,
        "num_patients": num_patients,
        "roc_true": (roc_true_flat if flat else roc_true),
        "roc_pred": (roc_pred_flat if flat else roc_pred),
        "AP": AP,
        "precision": precision,
        "recall": recall,
        # patient-level predictions
        'auroc': auc_score,
        'tpr': tpr,
        'fpr': fpr,
    }
    return metrics
 def froc_for_folder(
    y_det_dir: Union[Path, str],
    y_true_dir: Optional[Union[Path, str]] = None,
    subject_list: Optional[List[str]] = None,
    min_overlap: float = 0.10,
    overlap_func: "Union[str, Callable[[npt.NDArray[np.float32], npt.NDArray[np.int32]], float]]" = 'IoU',
    case_confidence: str = 'max',
    flat: Optional[bool] = None,
    num_parallel_calls: int = 8,
    verbose: int = 1
 ) -> Dict[str, Any]:
    """
    Perform FROC evaluation for all samples found in y_det_dir, or the samples specified in the subject_list
    Input:
    - y_det_dir: path to folder containing the detection maps
    - y_true_dir: (optioinal) allow labels to be stored in a different folder
    - min_overlap: minimum overlap threshold
    - overlap_func: intersection over union (IoU), Dice similarity coefficient (DSC), or custom function
    """
    if y_true_dir is None:
        y_true_dir = y_det_dir
    y_det = []
    y_true = []
    if subject_list:
        # collect the detection maps and labels for each case specified in the subject list
        for subject_id in subject_list:
            # construct paths to detection maps and labels
            # print(np.type(subject_id))
            # print(subject_list)
            for postfix in [
                "_detection_map.nii.gz", "_detection_map.npy", "_detection_map.npz",
                ".nii.gz", ".npy", ".npz", "_pred.nii.gz",
            ]:
                detection_path = os.path.join(y_det_dir, f"{subject_id}{postfix}")
                if os.path.exists(detection_path):
                    break
            for postfix in [
                "_label.nii.gz", "label.npy", "label.npz", "_seg.nii.gz",
            ]:
                label_path = os.path.join(y_true_dir, f"{subject_id}{postfix}")
                if os.path.exists(label_path):
                    break
            if not os.path.exists(label_path):
                assert y_true_dir != y_det_dir, f"Could not find label for {subject_id}!"
                for postfix in [
                    ".nii.gz", ".npy", ".npz",
                ]:
                    label_path = os.path.join(y_true_dir, f"{subject_id}{postfix}")
                    if os.path.exists(label_path):
                        break
            # collect file paths
            y_det += [detection_path]
            y_true += [label_path]
    else:
        # collect all detection maps found in detection_map_dir
        file_list = sorted(os.listdir(y_det_dir))
        subject_list = []
        if verbose >= 1:
            print(f"Found {len(file_list)} files in the input directory, collecting detection_mapes with " +
                  "_detection_map.nii.gz and labels with _label.nii.gz..")
            # collect filenames of detection_map predictions and labels
            for fn in file_list:
                if '_detection_map' in fn:
                    y_det += [os.path.join(y_det_dir, fn)]
                    y_true += [os.path.join(y_true_dir, fn.replace('_detection_map', '_label'))]
                    subject_list += [fn]
    # ensure files exist
    for detection_path in y_det:
        assert os.path.exists(detection_path), f"Could not find detection map for {subject_id} at {detection_path}!"
    for label_path in y_true:
        assert os.path.exists(label_path), f"Could not find label for {subject_id} at {label_path}!"
    if verbose >= 1:
        print(f"Found prediction and label for {len(y_det)} cases. Here are some examples:")
        print(subject_list[0:5])
    # perform FROC evaluation with compiled file lists
    return froc(y_det=y_det, y_true=y_true, subject_list=subject_list,
                min_overlap=min_overlap, overlap_func=overlap_func, case_confidence=case_confidence,
                flat=flat, num_parallel_calls=num_parallel_calls, verbose=verbose)
--- a/src/sfransen/FROC/image_utils.py
+++ b/src/sfransen/FROC/image_utils.py
@ -0,0 +1,83 @@
 #  Copyright 2022 Diagnostic Image Analysis Group, Radboudumc, Nijmegen, The Netherlands
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 import numpy as np
 import SimpleITK as sitk
 from pathlib import Path
 from typing import Union
 try:
    import numpy.typing as npt
 except ImportError:
    pass
 # Resize images (scans/predictions/labels) by cropping and/or padding [Ref: https://github.com/DLTK/DLTK]
 def resize_image_with_crop_or_pad(image, img_size=(64, 64, 64), **kwargs):
    assert isinstance(image, np.ndarray)
    assert (image.ndim - 1 == len(img_size) or image.ndim == len(img_size)), \
        "Target size doesn't fit image size"
    rank = len(img_size)  # image dimensions
    # placeholders for new shape
    from_indices = [[0, image.shape[dim]] for dim in range(rank)]
    to_padding = [[0, 0] for _ in range(rank)]
    slicer = [slice(None)] * rank
    # for each dimension, determine process (cropping or padding)
    for i in range(rank):
        if image.shape[i] < img_size[i]:
            to_padding[i][0] = (img_size[i] - image.shape[i]) // 2
            to_padding[i][1] = img_size[i] - image.shape[i] - to_padding[i][0]
        else:
            from_indices[i][0] = int(np.floor((image.shape[i] - img_size[i]) / 2.))
            from_indices[i][1] = from_indices[i][0] + img_size[i]
        # create slicer object to crop/leave each dimension
        slicer[i] = slice(from_indices[i][0], from_indices[i][1])
    # pad cropped image to extend missing dimension
    return np.pad(image[tuple(slicer)], to_padding, **kwargs)
 def read_image(path: Union[Path, str]):
    """Read image, given a filepath"""
    if isinstance(path, Path):
        path = path.as_posix()
    else:
        assert isinstance(path, str), f"Unexpected path type: {type(path)}. Please provide a Path or str."
    if '.npy' in path:
        return np.load(path)
    elif '.nii' in path or '.mha' in path or 'mhd' in path:
        return sitk.GetArrayFromImage(sitk.ReadImage(path))
    elif '.npz' in path:
        return np.load(path)['softmax'].astype('float32')[1]  # nnUnet format
    else:
        raise ValueError(f"Unexpected file path. Supported file formats: .nii(.gz), .mha, .npy and .npz. Got: {path}.")
 def read_prediction(path: Union[Path, str]) -> "npt.NDArray[np.float32]":
    """Read prediction, given a filepath"""
    # read prediction and ensure correct dtype
    pred: "npt.NDArray[np.float32]" = np.array(read_image(path), dtype=np.float32)
    return pred
 def read_label(path: Union[Path, str]) -> "npt.NDArray[np.int32]":
    """Read label, given a filepath"""
    # read label and ensure correct dtype
    lbl: "npt.NDArray[np.int32]" = np.array(read_image(path), dtype=np.int32)
    return lbl
--- a/src/sfransen/Make_overlay.py
+++ b/src/sfransen/Make_overlay.py
@ -0,0 +1,139 @@
 # http://insightsoftwareconsortium.github.io/SimpleITK-Notebooks/Python_html/05_Results_Visualization.html
 from cgitb import grey
 import SimpleITK as sitk
 def make_isotropic(image, interpolator=sitk.sitkLinear, spacing=None):
    """
    Many file formats (e.g. jpg, png,...) expect the pixels to be isotropic, same
    spacing for all axes. Saving non-isotropic data in these formats will result in
    distorted images. This function makes an image isotropic via resampling, if needed.
    Args:
        image (SimpleITK.Image): Input image.
        interpolator: By default the function uses a linear interpolator. For
                      label images one should use the sitkNearestNeighbor interpolator
                      so as not to introduce non-existant labels.
        spacing (float): Desired spacing. If none given then use the smallest spacing from
                         the original image.
    Returns:
        SimpleITK.Image with isotropic spacing which occupies the same region in space as
        the input image.
    """
    original_spacing = image.GetSpacing()
    # Image is already isotropic, just return a copy.
    if all(spc == original_spacing[0] for spc in original_spacing):
        return sitk.Image(image)
    # Make image isotropic via resampling.
    original_size = image.GetSize()
    if spacing is None:
        spacing = min(original_spacing)
    new_spacing = [spacing] * image.GetDimension()
    new_size = [
        int(round(osz * ospc / spacing))
        for osz, ospc in zip(original_size, original_spacing)
    ]
    return sitk.Resample(
        image,
        new_size,
        sitk.Transform(),
        interpolator,
        image.GetOrigin(),
        new_spacing,
        image.GetDirection(),
        0,
        image.GetPixelID(),
    )
 def mask_image_multiply(mask, image):
    components_per_pixel = image.GetNumberOfComponentsPerPixel()
    if components_per_pixel == 1:
        return mask * image
    else:
        return sitk.Compose(
            [
                mask * sitk.VectorIndexSelectionCast(image, channel)
                for channel in range(components_per_pixel)
            ]
        )
 def alpha_blend(image1, image2, alpha=0.5, mask1=None, mask2=None):
    """
    Alaph blend two images, pixels can be scalars or vectors.
    The alpha blending factor can be either a scalar or an image whose
    pixel type is sitkFloat32 and values are in [0,1].
    The region that is alpha blended is controled by the given masks.
    """
    if not mask1:
        mask1 = sitk.Image(image1.GetSize(), sitk.sitkFloat32) + 1.0
        mask1.CopyInformation(image1)
    else:
        mask1 = sitk.Cast(mask1, sitk.sitkFloat32)
    if not mask2:
        mask2 = sitk.Image(image2.GetSize(), sitk.sitkFloat32) + 1
        mask2.CopyInformation(image2)
    else:
        mask2 = sitk.Cast(mask2, sitk.sitkFloat32)
    # if we received a scalar, convert it to an image
    if type(alpha) != sitk.SimpleITK.Image:
        alpha = sitk.Image(image1.GetSize(), sitk.sitkFloat32) + alpha
        alpha.CopyInformation(image1)
    components_per_pixel = image1.GetNumberOfComponentsPerPixel()
    if components_per_pixel > 1:
        img1 = sitk.Cast(image1, sitk.sitkVectorFloat32)
        img2 = sitk.Cast(image2, sitk.sitkVectorFloat32)
    else:
        img1 = sitk.Cast(image1, sitk.sitkFloat32)
        img2 = sitk.Cast(image2, sitk.sitkFloat32)
    intersection_mask = mask1 * mask2
    intersection_image = mask_image_multiply(
        alpha * intersection_mask, img1
    ) + mask_image_multiply((1 - alpha) * intersection_mask, img2)
    return (
        intersection_image
        + mask_image_multiply(mask2 - intersection_mask, img2)
        + mask_image_multiply(mask1 - intersection_mask, img1)
    )
 #Dictionary with functions mapping a scalar image to a three component vector image
 image_mappings = {'grey': lambda x: sitk.Compose([x]*3),
                  'jet' : lambda x: sitk.ScalarToRGBColormap(x, sitk.ScalarToRGBColormapImageFilter.Jet),
                  'hot' : lambda x: sitk.ScalarToRGBColormap(x, sitk.ScalarToRGBColormapImageFilter.Hot),
                  'winter' : lambda x: sitk.ScalarToRGBColormap(x, sitk.ScalarToRGBColormapImageFilter.Winter)                
                  }
 image = sitk.ReadImage('../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_pred.nii')
 segmentation = sitk.ReadImage('../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_seg.nii')
 #Make the images isotropic so that when we save a slice along any axis 
 #it will be fine (most image formats assume isotropic pixel sizes).
 #Segmentation is interpolated with nearest neighbor so we don't introduce new
 #labels.
 image = make_isotropic(image, interpolator = sitk.sitkLinear)
 segmentation = make_isotropic(segmentation, interpolator = sitk.sitkNearestNeighbor)
 #Convert image to sitkUInt8 after rescaling, color image formats only work for [0,255]
 image_255 = sitk.Cast(sitk.RescaleIntensity(image, 0, 255), sitk.sitkUInt8)
 segmentation_255 = sitk.Cast(sitk.RescaleIntensity(segmentation, 0, 255), sitk.sitkUInt8)
 colormap = 'hot'
 vec_image = image_mappings['hot'](image_255)
 vec_image_label = image_mappings['winter'](image_255)
 # vec_segmentation = sitk.ScalarToRGBColormap(segmentation, sitk.ScalarToRGBColormapImageFilter.Winter)
 # vec_segmentation_2 = sitk.LabelToRGB(segmentation_255,colormap=[255,0,0])
 vec_combined = sitk.Cast(alpha_blend(image1=vec_image, image2=vec_image_label, alpha=0.5, mask2=segmentation==1), sitk.sitkVectorUInt8)
 sitk.WriteImage(vec_image, '../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_vec_image.nii.gz')
 sitk.WriteImage(vec_image_label, '../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_vec_image_label.nii.gz')
 sitk.WriteImage(vec_combined, '../train_output/train_b50_b400_b800/output/b-800_b-400_b-50_000_vec_com.nii.gz')
--- a/src/sfransen/init.py
+++ b/src/sfransen/init.py
--- a/src/sfransen/csv2graph.py
+++ b/src/sfransen/csv2graph.py
@ -0,0 +1,59 @@
 import matplotlib.pyplot as plt 
 import pandas as pd
 import glob
 import argparse
 import os
 #create parser
 def parse_input_args():
    parser = argparse.ArgumentParser(description='Parse arguments for training a Reconstruction model')
    parser.add_argument('train_out_dir',
                        type=str,
                        help='Directory name in train_output dir of the desired experiment folder. There should be a .csv file in this directory with train statistics.')
    args = parser.parse_args()
    return args
 args = parse_input_args()
 print(f"Plotting {args}")
 # find csv file
 # csv = glob.glob(f"train_output/{args.train_out_dir}/*.csv")[0]
 folder_input = args.train_out_dir 
 # load csv file
 df = pd.read_csv(f'{folder_input}')
 # read csv file
 for metric in df:
    # if not metric == 'epoch':
    if metric == 'loss' or metric == 'val_loss': 
        plt.plot(df['epoch'], df[metric], label=metric)
        plt.ylim(ymin=0,ymax=0.01)
 folder, csvfile = os.path.split(args.train_out_dir)
 root, experiment = os.path.split(os.path.split(folder)[0])
 plt.title(experiment)
 plt.xlabel('Epoch')
 plt.ylabel('Loss')
 plt.grid()
 plt.legend()
 plt.savefig(f"{folder}/{experiment}.png")
 plt.clf()
 plt.close()
 print(folder+".png")
 print(f"\nsaved figure to {folder}")
 # van yaml inladen 'loss'
 # vanuit utils > dict to yaml
 # csv viewer extension 
 # course .venv/bin/activate 
 # loop over alles en if metric then 1-metric
 # kleur codering color=[]
--- a/src/sfransen/data_path_rename.py
+++ b/src/sfransen/data_path_rename.py
@ -0,0 +1,107 @@
 #X:\datasets\radboud_lesions_2022
 #/data/pca-rad/experiments/fix-radboud-data/export/pat0777-2016.nii.gz
 #'experiments/fix-radboud-data/export'
 #'datasets/radboud_lesions_2022'
 ########### Read in the file ##############
 # with open('./../data/Nijmegen paths/t2.txt', 'r') as file :
 #   filedata = file.read()
 # filedata = filedata.replace('/diffusie_cro/b-1400','/t2_tse_tra')
 # import numpy as np
 # print(np.shape(filedata))
 ############# check for missing files ############
 import numpy as np
 import os
 serie = 'b2000'
 file1 = f'./../data/Nijmegen paths/{serie}.txt'
 file2 = f'./../data/Test paths/{serie}.txt'
 file3 = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_dwi.txt'
 # X:\qvlohuizen\fastMRI_PCa\data\path_lists\pirads_4plus
 with open(file3) as fp:
    data = fp.readlines()
 UMCG = [i for i, s in enumerate(data) if '-U-' in s]
 Martini = [i for i, s in enumerate(data) if '-M-' in s]
 patient_id = [os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(data[index]))))[-9:-6] for index in Martini]
 patient_id = [item.replace("/", "") for item in patient_id]
 patient_id = [item.replace("y", "") for item in patient_id]
 patient_id.sort()
 Martini_data = [data[index] for index in Martini]
 Martini_Diffusie = [i for i, s in enumerate(Martini_data) if not 'Diffusie' in s]
 print(patient_id)
 print(np.shape(Martini_data))
 print(len(Martini_Diffusie))
 print([Martini_data[index] for index in Martini_Diffusie])
 quit()
 ############# merge radboud and umcg files ############
 import numpy as np
 import os
 serie = 'b2000'
 file1 = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_dwi.txt'
 # X:\qvlohuizen\fastMRI_PCa\data\path_lists\pirads_4plus
 with open(file1) as fp:
    data = fp.readlines()
 # Reading data from file2
 with open(file2) as fp:
    data2 = fp.read()
 # Merging 2 files
 # To add the data of file2
 # from next line
 data += "\n"
 data += data2
 with open (f'{serie}.txt', 'w') as fp:
    fp.write(data)
 quit()
 ############ find more martini data ##############
 import numpy as np
 high_b_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_dwi.txt'
 adc_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_adc.txt'
 t2_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/current_t2w.txt'
 seg_data = './../../qvlohuizen/fastMRI_PCa/data/path_lists/pirads_4plus/seg_new.txt'
 with open(high_b_data, 'r') as file :
  high_b_data = file.readlines()
 with open(adc_data, 'r') as file :
  adc_data = file.readlines()
 with open(t2_data, 'r') as file :
  t2_data = file.readlines()
 with open(seg_data, 'r') as file :
  seg_data = file.readlines()
 indices = [i for i, s in enumerate(high_b_data) if 'Diffusie_b0-50-400-800-2000' in s]
 b_2000 = [high_b_data[index] for index in indices]
 b_50 = [item.replace("b-2000", "b-50") for item in b_2000]
 b_400 = [item.replace("b-2000", "b-400") for item in b_2000]
 b_800 = [item.replace("b-2000", "b-800") for item in b_2000]
 adc = [adc_data[index] for index in indices]
 t2 = [t2_data[index] for index in indices]
 seg = [seg_data[index] for index in indices]
 with open('b50.txt', 'w') as f:
    f.writelines(b_50)
 with open('b400.txt', 'w') as f:
    f.writelines(b_400)
 with open('b800.txt', 'w') as f:
    f.writelines(b_800)
 with open('b2000.txt', 'w') as f:
    f.writelines(b_2000)
 with open('adc.txt', 'w') as f:
    f.writelines(adc)
 with open('t2.txt', 'w') as f:
    f.writelines(t2)
 with open('seg.txt', 'w') as f:
    f.writelines(seg)    
--- a/src/sfransen/dump_params.py
+++ b/src/sfransen/dump_params.py
@ -0,0 +1,20 @@
 def dump_dict_to_yaml(
    data: dict,
    target_dir: str,
    filename: str = "settings") -> None:
    """ Writes the given dictionary as a yaml to the target directory.
    Parameters:
    `data (dict)`: dictionary of data.
    `target_dir (str)`: directory where the yaml will be saved.
    `` filename (str)`: name of the file without extension
    """
    print("\nParameters")
    for pair in data.items():
        print(f"\t{pair}")
    print()
    path = f"{target_dir}/{filename}.yml"
    print_p(f"Wrote yaml to: {path}")
    with open(path, 'w') as outfile:
        yaml.dump(data, outfile, default_flow_style=False)
--- a/src/sfransen/pred_label_images.py
+++ b/src/sfransen/pred_label_images.py
@ -0,0 +1,75 @@
 # A script with returns the prediction with labels overlay and stores them in a new folder named "report"
 #modules
 from preprocessing_function import preprocess
 import numpy as np
 from tqdm import tqdm
 from helpers import *
 import tensorflow as tf
 from tensorflow import keras
 #model load 
 Model_path = '..'
 reconstructed_model = tf.keras.models.load_model(Model_path)
 reconstructed_model.compile(loss='',
              optimizer='',
              metrics=[''])
 ####image load
 # arg.series = b-50 b-400 etc. 
 # DATA_DIR = data dir 
 # IMAGE_SHAPE = 
 # TARGET_SPACING = 
 images, image_paths = {s: [] for s in args.series}, {}
 segmentations = []
 print_(f"> Loading images into RAM...")
 # Read the image paths from the data directory.
 # Texts files are expected to have the name "[series_name].txt"
 for s in args.series:
    with open(path.join(DATA_DIR, f"{s}.txt"), 'r') as f:
        image_paths[s] = [l.strip() for l in f.readlines()]
 with open(path.join(DATA_DIR, f"seg.txt"), 'r') as f:
    seg_paths = [l.strip() for l in f.readlines()]
 num_images = len(seg_paths)
 # Read and preprocess each of the paths for each series, and the segmentations.
 for img_idx in tqdm(range(num_images)): #[:40] for less images
    img_s = {s: sitk.ReadImage(image_paths[s][img_idx], sitk.sitkFloat32) 
        for s in args.series}
    seg_s = sitk.ReadImage(seg_paths[img_idx], sitk.sitkFloat32)
    img_n, seg_n = preprocess(img_s, seg_s, 
        shape=IMAGE_SHAPE, spacing=TARGET_SPACING)
    for seq in img_n:
        images[seq].append(img_n[seq])
    segmentations.append(seg_n)
 self._images = np.stack(
    [np.stack(self.images[k], axis=0) for k in sequences], axis=-1)
 #image predict
 predictions = reconstructed_model.predict(images[idx])
 #heatmap + input image
 #save in folder output
 # Export scan crops and targets once
 # they don't change during training so we export them only once
 for i in range(min(self.num_images, self.validation_set[0].shape[0])):
    for s_idx, s in enumerate(sequences):
        img_s = sitk.GetImageFromArray(
            self.validation_set[0][i][..., s_idx].squeeze().T)
        sitk.WriteImage(img_s, f"{prefix}_{i:03d}_{s}.nii.gz")
    seg_s = sitk.GetImageFromArray(
        self.validation_set[1][i].squeeze().T)
    sitk.WriteImage(seg_s, f"{prefix}_{i:03d}_seg.nii.gz")
 prd_s = sitk.GetImageFromArray(predictions[i].squeeze().T)
 prd_bin_s = sitk.GetImageFromArray(
    np.around(predictions[i]).astype(np.float32).squeeze().T)
 sitk.WriteImage(prd_s, f"{self.prefix}_{i:03d}_pred.nii.gz")
 sitk.WriteImage(prd_bin_s, f"{self.prefix}_{i:03d}_pred_bin.nii.gz")
--- a/src/sfransen/utils_quintin.py
+++ b/src/sfransen/utils_quintin.py
@ -0,0 +1,82 @@
 import yaml
 from typing import List
 from collections import deque
 from pathlib import Path
 import numpy as np
 import SimpleITK as sitk
 def print_p(text, end="\n"):
    """ Print function for on Peregrine. It needs a flush before printing. """
    print(text, flush=True, end=end)
 def list_from_file(path: str) -> List:
    """ Returns a list of all items on each line of the text file referenced in
        the given path
    Parameters:
    `path (str)`: path to the text file
    """
    print_p(path)
    return [line.strip() for line in open(path, "r")]
 def dump_dict_to_yaml(
    data: dict,
    target_dir: str,
    filename: str = "settings",
    verbose: bool = True) -> None:
    """ Writes the given dictionary as a yaml to the target directory.
    Parameters:
    `data (dict)`: dictionary of data.
    `target_dir (str)`: directory where the yaml will be saved.
    `` filename (str)`: name of the file without extension
    """
    if verbose:
        print("\nParameters")
        for pair in data.items():
            print(f"\t{pair}")
        print()
    path = f"{target_dir}/{filename}.yml"
    print_p(f"Wrote yaml to: {path}")
    with open(path, 'w') as outfile:
        yaml.dump(data, outfile, default_flow_style=False)
 def read_yaml_to_dict(path: str) -> dict:
    with open(path, 'r') as stream:
        try:
            parsed_yaml=yaml.safe_load(stream)
            # print(parsed_yaml)
            return parsed_yaml
        except yaml.YAMLError as exc:
            print(exc)
 def create_dirs_if_not_exists(dirs: List[str]) -> None:
    """ Creates the list of supplied directories if they don't exist yet.
    Parameters:
    `dirs (List[str]) or str`: list of strings representing the directories that
    need to be created
    """
    if isinstance(dirs, str):
        dirs = [dirs]
    for folder in dirs:
        Path(folder).mkdir(parents=True, exist_ok=True)
        print_p(f"Created directory {folder} or it already existed.")
 def save_np_to_nifti(data_np: np.ndarray,
                    target_dir: str,
                    fname: str,
                    target_space: List[float] = [0.5, 0.5, 3.0]):
    img_s = sitk.GetImageFromArray(data_np.T)
    img_s.SetSpacing(target_space)
    path = f"{target_dir}/{fname}.nii.gz"
    sitk.WriteImage(img_s, path)
    print_p(f"Saved numpy array to nifti: {path}")
		`@ -0,0 +1,3 @@`
							`# sfransen`

							`A library for fast MRI for prostate cancer. Create undersampled data, train reconstruction models, validate and visualize results.`