diff --git a/scripts/1.U-net_chris.py b/scripts/1.U-net_chris.py index 0bff473..9ba4c20 100755 --- a/scripts/1.U-net_chris.py +++ b/scripts/1.U-net_chris.py @@ -29,6 +29,8 @@ from sfransen.DWI_exp.batchgenerator import BatchGenerator from sfransen.DWI_exp.unet import build_dual_attention_unet from focal_loss import BinaryFocalLoss +# from umcglib.utils import set_gpu +# set_gpu(gpu_idx=1) parser = argparse.ArgumentParser( description='Train a U-Net model for segmentation/detection tasks.' + @@ -125,7 +127,7 @@ with open(path.join(DATA_DIR, f"seg.txt"), 'r') as f: num_images = len(seg_paths) # Read and preprocess each of the paths for each series, and the segmentations. -for img_idx in tqdm(range(num_images)): #[:40]): #for less images +for img_idx in tqdm(range(num_images)): #[:20]): #for less images img_s = {s: sitk.ReadImage(image_paths[s][img_idx], sitk.sitkFloat32) for s in args.series} seg_s = sitk.ReadImage(seg_paths[img_idx], sitk.sitkFloat32) diff --git a/scripts/11.visualize_multiple_frocs.py b/scripts/11.visualize_multiple_frocs.py index 09111e4..6b4cd54 100755 --- a/scripts/11.visualize_multiple_frocs.py +++ b/scripts/11.visualize_multiple_frocs.py @@ -21,12 +21,13 @@ parser.add_argument('-yaml_metric', args = parser.parse_args() if args.comparison: - colors = ['r','r','b','b','g','g','y','y'] - plot_type = ['-','--','-','--','-','--','-','--'] + colors = ['b','c','r','m','g','g','y','y'] + plot_type = ['-','-','-','--','-','--','-','--'] else: - colors = ['r','b','k','g','c','y'] + colors = ['g','b','r','g','k','c','y'] plot_type = ['-','-','-','-','-','-'] + yaml_metric = args.yaml_metric experiments = args.experiment print(experiments) @@ -39,44 +40,53 @@ sensitivity = [] fig = plt.figure(1) ax = fig.add_subplot(111) -False_possitives_mean = np.linspace(0, 5, 200) +False_possitives_mean = np.linspace(0, 2.5, 200) for idx in range(len(args.experiment)): paufroc = [] - for fold in range(4): + experiment_path = [] + auroc = [] + paufroc = [] + False_possitives = [] + sensitivity = [] + for fold in range(5): # print('fold:',fold) - fold = fold + 1 experiment_metrics = {} experiment_path = f'./../train_output/{experiments[idx]}_{fold}/froc_metrics_{yaml_metric}.yml' experiment_metrics = read_yaml_to_dict(experiment_path) - pfroc = partial_auc(experiment_metrics["sensitivity"],experiment_metrics["FP_per_case"],low=0.1, high=5) + pfroc = partial_auc(experiment_metrics["sensitivity"],experiment_metrics["FP_per_case"],low=0.1, high=2.5) paufroc.append(round(pfroc,2)) False_possitives.append(experiment_metrics["FP_per_case"]) sensitivity_ = np.interp(False_possitives_mean,experiment_metrics["FP_per_case"],experiment_metrics["sensitivity"]) sensitivity.append(sensitivity_) + print(f'pfROC van {experiments[idx]}: {paufroc}') # calculate mean and std sensitivity_mean = np.squeeze(np.mean(sensitivity,axis=0)) sensitivity_std = np.multiply(np.squeeze(np.std(sensitivity,axis=0)),2) plt.plot(False_possitives_mean, sensitivity_mean,color=colors[idx],linestyle=plot_type[idx]) - plt.fill_between(False_possitives_mean, np.subtract(sensitivity_mean,sensitivity_std), np.add(sensitivity_mean,sensitivity_std),alpha=0.15,color=colors[idx],) + plt.fill_between(False_possitives_mean, np.subtract(sensitivity_mean,sensitivity_std), np.add(sensitivity_mean,sensitivity_std),alpha=0.10,color=colors[idx],) ax.set(xscale="log") ax.axes.xaxis.set_minor_locator(tkr.LogLocator(base=10, subs='all')) ax.axes.xaxis.set_minor_formatter(tkr.NullFormatter()) ax.axes.xaxis.set_major_formatter(tkr.ScalarFormatter()) - ax.axes.grid(True, which="both", ls="--", c='#d3d3d3') - ax.axes.set_xlim(left=0.1, right=5) - ax.axes.xaxis.set_major_locator(tkr.FixedLocator([0.1,0.5,1,5])) + ax.axes.get_xaxis() + ax.axes.get_yaxis() + ax.axes.set_xlim(left=0.1, right=2.5) + ax.axes.xaxis.set_major_locator(tkr.FixedLocator([0.1,0.5,1,2.5])) fpr = [] tpr = [] fpr_mean = np.linspace(0, 1, 200) for idx in range(len(args.experiment)): + aufroc = [] + experiment_path = [] auroc = [] - for fold in range(4): - fold= fold + 1 + fpr = [] + tpr = [] + for fold in range(5): print('fold:',fold) experiment_metrics = {} experiment_path = f'./../train_output/{experiments[idx]}_{fold}/froc_metrics_{yaml_metric}.yml' @@ -92,7 +102,7 @@ for idx in range(len(args.experiment)): plt.figure(2) plt.plot(fpr_mean, tpr_mean,color=colors[idx],linestyle=plot_type[idx]) - plt.fill_between(fpr_mean, np.subtract(tpr_mean,tpr_std), np.add(tpr_mean,tpr_std),alpha=0.15,color=colors[idx],) + plt.fill_between(fpr_mean, np.subtract(tpr_mean,tpr_std), np.add(tpr_mean,tpr_std),alpha=0.10,color=colors[idx],) print(auroc) experiments = [exp.replace('train_10h_', '') for exp in experiments] @@ -104,11 +114,13 @@ concat_func = lambda x,y: x + " (" + str(y) + ")" experiments_paufroc = list(map(concat_func,experiments,paufroc)) # list the map function plt.figure(1) -plt.title('fROC curve') -plt.xlabel('False positive per case') -plt.ylabel('Sensitivity') +plt.title('fROC curve', fontsize=20) +plt.xlabel('False positive lesions', fontsize=18) +plt.ylabel('Sensitivity', fontsize=18) # plt.legend(experiments_paufroc,loc='lower right') -plt.legend(['calculated with b50-400','calculated with b50-800'],loc='lower right') +# plt.legend(['$T2_{tra}$ $ADC_{b50-b400}$ $b1400_{b50-b400}$','$T2_{tra}$ $ADC_{b50-b800}$ $b1400_{b50-b800}$','$T2_{tra}$ $ADC_{b50-b400-b800}$ $b1400_{b50-b400-b800}$'],loc='lower right') +# plt.legend(['$T2_{tra}$ $ADC_{b50-b400-b800}$ $b1400_{b50-b400-b800}$','$T2_{tra}$ b50 b400 b800'],loc='lower right') +plt.legend(["All b-values","Omitting b800","Omitting b400"],loc='lower right',fontsize=16) # plt.xlim([0,50]) plt.grid() @@ -120,11 +132,14 @@ concat_func = lambda x,y: x + " (" + str(y) + ")" experiments_auroc = list(map(concat_func,experiments,auroc)) # list the map function plt.figure(2) -plt.title('ROC curve') +plt.title('ROC curve',fontsize=20) # plt.legend(experiments_auroc,loc='lower right') -plt.legend(['calculated with b50-400','calculated with b50-800'],loc='lower right') -plt.xlabel('False positive rate') -plt.ylabel('True positive rate') +# plt.legend(['$T2_{tra}$ $ADC_{b50-b400-b800}$ $b1400_{b50-b400-b800}$','$T2_{tra}$ b50 b400 b800'],loc='lower right') +# plt.legend(['$T2_{tra}$ $ADC_{b50-b400}$ $b1400_{b50-b400}$','$T2_{tra}$ $ADC_{b50-b800}$ $b1400_{b50-b800}$','$T2_{tra}$ $ADC_{b50-b400-b800}$ $b1400_{b50-b400-b800}$'],loc='lower right') +plt.legend(["All b-values","Omitting b800","Omitting b400"],loc='lower right',fontsize=16) + +plt.xlabel('False positive rate',fontsize=18) +plt.ylabel('True positive rate',fontsize=18) plt.ylim([0,1]) plt.xlim([0,1]) plt.grid() diff --git a/scripts/13.froc_umcglib.py b/scripts/13.froc_umcglib.py new file mode 100755 index 0000000..aea3486 --- /dev/null +++ b/scripts/13.froc_umcglib.py @@ -0,0 +1,192 @@ +from inspect import _ParameterKind +import SimpleITK as sitk +import tensorflow as tf +from tensorflow.keras.models import load_model +from focal_loss import BinaryFocalLoss +import numpy as np +import multiprocessing +from functools import partial +import os +from os import path +from tqdm import tqdm +import argparse + +from sfransen.utils_quintin import * +from sfransen.DWI_exp.helpers import * +from sfransen.DWI_exp.preprocessing_function import preprocess +from sfransen.DWI_exp.callbacks import dice_coef +#from sfransen.FROC.blob_preprocess import * +from sfransen.FROC.cal_froc_from_np import * +from sfransen.load_images import load_images_parrallel +from sfransen.DWI_exp.losses import weighted_binary_cross_entropy +from umcglib.froc import * +from umcglib.binarize import dynamic_threshold + +parser = argparse.ArgumentParser( + description='Calculate the froc metrics and store in froc_metrics.yml') +parser.add_argument('-experiment', + help='Title of experiment') +parser.add_argument('--series', '-s', + metavar='[series_name]', required=True, nargs='+', + help='List of series to include') +parser.add_argument('-fold', + default='', + help='List of series to include') +args = parser.parse_args() + +# if __name__ = '__main__': +# bovenstaande nodig om fork probleem op te lossen (windows cs linux) + +######## CUDA ################ +os.environ["CUDA_VISIBLE_DEVICES"] = "2" + +######## constants ############# +SERIES = args.series +series_ = '_'.join(args.series) +EXPERIMENT = args.experiment +# fold = args.fold +predictions_added = [] +segmentations_added = [] + +for fold in range(0,5): + + MODEL_PATH = f'./../train_output/{EXPERIMENT}_{series_}_{fold}/models/{EXPERIMENT}_{series_}_{fold}.h5' + YAML_DIR = f'./../train_output/{EXPERIMENT}_{series_}_{fold}' + IMAGE_DIR = f'./../train_output/{EXPERIMENT}_{series_}_{fold}' + + # MODEL_PATH = f'./../train_output/{EXPERIMENT}_{series_}/models/{EXPERIMENT}_{series_}.h5' + # YAML_DIR = f'./../train_output/{EXPERIMENT}_{series_}' + # IMAGE_DIR = f'./../train_output/{EXPERIMENT}_{series_}' + + DATA_DIR = "./../data/Nijmegen paths/" + TARGET_SPACING = (0.5, 0.5, 3) + INPUT_SHAPE = (192, 192, 24, len(SERIES)) + IMAGE_SHAPE = INPUT_SHAPE[:3] + + DATA_SPLIT_INDEX = read_yaml_to_dict(f'./../data/Nijmegen paths/train_val_test_idxs_{fold}.yml') + # DATA_SPLIT_INDEX = read_yaml_to_dict(f'./../data/Nijmegen paths/train_val_test_idxs.yml') + TEST_INDEX = DATA_SPLIT_INDEX['test_set0'] + + N_CPUS = 12 + + + ########## test with old method ############# + print_(f"> Loading images into RAM...") + + image_paths = {} + + for s in SERIES: + with open(path.join(DATA_DIR, f"{s}.txt"), 'r') as f: + image_paths[s] = [l.strip() for l in f.readlines()] + with open(path.join(DATA_DIR, f"seg.txt"), 'r') as f: + seg_paths = [l.strip() for l in f.readlines()] + num_images = len(seg_paths) + + images = [] + images_list = [] + segmentations = [] + + # Read and preprocess each of the paths for each series, and the segmentations. + for img_idx in tqdm(range(len(TEST_INDEX))): #[:40]): #for less images + # print('images number',[TEST_INDEX[img_idx]]) + img_s = {f'{s}': sitk.ReadImage(image_paths[s][TEST_INDEX[img_idx]], sitk.sitkFloat32) for s in SERIES} + seg_s = sitk.ReadImage(seg_paths[TEST_INDEX[img_idx]], sitk.sitkFloat32) + img_n, seg_n = preprocess(img_s, seg_s, + shape=IMAGE_SHAPE, spacing=TARGET_SPACING) + for seq in img_n: + images.append(img_n[f'{seq}']) + images_list.append(images) + images = [] + segmentations.append(seg_n) + + images_list = np.transpose(images_list, (0, 2, 3, 4, 1)) + + print('>>>>> size image_list nmr 2:', np.shape(images_list), '. equal to: (5, 192, 192, 24, 3)?') + + ########### load module ################## + print(' >>>>>>> LOAD MODEL <<<<<<<<<') + + dependencies = { + 'dice_coef': dice_coef, + 'weighted_cross_entropy_fn':weighted_binary_cross_entropy + } + reconstructed_model = load_model(MODEL_PATH, custom_objects=dependencies) + # reconstructed_model.summary(line_length=120) + + # make predictions on all TEST_INDEX + print(' >>>>>>> START prediction <<<<<<<<<') + predictions_blur = reconstructed_model.predict(images_list, batch_size=1) + + ############# preprocess ################# + # preprocess predictions by removing the blur and making individual blobs + print('>>>>>>>> START preprocess') + + def move_dims(arr): + # UMCG numpy dimensions convention: dims = (batch, width, heigth, depth) + # Joeran numpy dimensions convention: dims = (batch, depth, heigth, width) + arr = np.moveaxis(arr, 3, 1) + arr = np.moveaxis(arr, 3, 2) + return arr + + # Joeran has his numpy arrays ordered differently. + predictions_blur = move_dims(np.squeeze(predictions_blur)) + segmentations = move_dims(np.squeeze(segmentations)) + # predictions = [preprocess_softmax(pred, threshold="dynamic")[0] for pred in predictions_blur] + predictions = predictions_blur + print("the size of predictions is:",np.shape(predictions)) + # Remove outer edges + zeros = np.zeros(np.shape(predictions)) + test = np.squeeze(predictions)[:,2:-2,2:190,2:190] + zeros[:,2:-2,2:190,2:190] = test + predictions = zeros + + # # perform Froc method joeran + # metrics = evaluate(y_true=segmentations, y_pred=predictions) + # dump_dict_to_yaml(metrics, YAML_DIR, "froc_metrics_focal_10_test", verbose=True) + + ############# save image as example ################# + # save image nmr 6 + # img_s = sitk.GetImageFromArray(predictions_blur[6].squeeze()) + # sitk.WriteImage(img_s, f"{IMAGE_DIR}/predictions_blur_006_dyn_0.6.nii.gz") + + # img_s = sitk.GetImageFromArray(predictions[6].squeeze()) + # sitk.WriteImage(img_s, f"{IMAGE_DIR}/predictions_006_dyn_0.6.nii.gz") + + # img_s = sitk.GetImageFromArray(segmentations[6].squeeze()) + # sitk.WriteImage(img_s, f"{IMAGE_DIR}/segmentations_006_dyn_0.6.nii.gz") + + # img_s = sitk.GetImageFromArray(np.transpose(images_list[6,:,:,:,0].squeeze())) + # sitk.WriteImage(img_s, f"{IMAGE_DIR}/t2_006_dyn_0.6.nii.gz") + + if fold == 0: + segmentations_added = segmentations + predictions_added = predictions + else: + segmentations_added = np.append(segmentations_added,segmentations,axis=0) + predictions_added = np.append(predictions_added,predictions,axis=0) + +# Froc method umcglib +stats = calculate_froc(y_true=segmentations_added, + y_pred=predictions_added, + preprocess_func=dynamic_threshold, + dynamic_threshold_factor=0.5, + minimum_confidence=0.1, + bootstrap = 1000, + min_overlap = 0.01, + overlap_function = 'dsc') + + +dump_stats_to_yaml(stats, YAML_DIR, "umcglib_stats_overlap_0.01", verbose=True) + +quit() +plot_multiple_froc( + sensitivities=[np.array(stats['sensitivity'])], + fp_per_patient=[np.array(stats["fp_per_patient"])], + ci_low=[np.array(stats['sens_95_boot_ci_low'])], + ci_high=[np.array(stats["sens_95_boot_ci_high"])], + model_names=["test only"], + title="testtest", + height=12, width=15, + save_as="froc_test_0.5_conf_0.1_overlap_0.1_dsc.png", + xlims=(0.1, 5) + ) \ No newline at end of file diff --git a/scripts/14.saliency.py b/scripts/14.saliency.py new file mode 100755 index 0000000..e51738d --- /dev/null +++ b/scripts/14.saliency.py @@ -0,0 +1,149 @@ +import argparse +from os import path +import SimpleITK as sitk +import tensorflow as tf +from tensorflow.keras.models import load_model +import numpy as np +import os + +from sfransen.utils_quintin import * +from sfransen.DWI_exp import preprocess +from sfransen.DWI_exp.helpers import * +from sfransen.DWI_exp.callbacks import dice_coef +from sfransen.DWI_exp.losses import weighted_binary_cross_entropy + +from sfransen.FROC.blob_preprocess import * +from sfransen.FROC.cal_froc_from_np import * +from sfransen.load_images import load_images_parrallel +from sfransen.Saliency.base import * +from sfransen.Saliency.integrated_gradients import * + +parser = argparse.ArgumentParser( + description='Calculate the froc metrics and store in froc_metrics.yml') +parser.add_argument('-experiment', + help='Title of experiment') +parser.add_argument('--series', '-s', + metavar='[series_name]', required=True, nargs='+', + help='List of series to include') +args = parser.parse_args() + +######## CUDA ################ +os.environ["CUDA_VISIBLE_DEVICES"] = "2" + +######## constants ############# +SERIES = args.series +series_ = '_'.join(args.series) +EXPERIMENT = args.experiment +fold = 0 +# img_idx = 371 +img_idx = 634 + + +predictions_added = [] +segmentations_added = [] + +MODEL_PATH = f'./../train_output/{EXPERIMENT}_{series_}_{fold}/models/{EXPERIMENT}_{series_}_{fold}.h5' +YAML_DIR = f'./../train_output/{EXPERIMENT}_{series_}_{fold}' +IMAGE_DIR = f'./../train_output/{EXPERIMENT}_{series_}_{fold}' + +# MODEL_PATH = f'./../train_output/{EXPERIMENT}_{series_}/models/{EXPERIMENT}_{series_}.h5' +# YAML_DIR = f'./../train_output/{EXPERIMENT}_{series_}' +# IMAGE_DIR = f'./../train_output/{EXPERIMENT}_{series_}' + +DATA_DIR = "./../data/Nijmegen paths/" +TARGET_SPACING = (0.5, 0.5, 3) +INPUT_SHAPE = (192, 192, 24, len(SERIES)) +IMAGE_SHAPE = INPUT_SHAPE[:3] + +DATA_SPLIT_INDEX = read_yaml_to_dict(f'./../data/Nijmegen paths/train_val_test_idxs_{fold}.yml') +TEST_INDEX = DATA_SPLIT_INDEX['test_set0'] + +N_CPUS = 12 + + +print(f"> Loading images into RAM...") + +image_paths = {} + +for s in SERIES: + with open(path.join(DATA_DIR, f"{s}.txt"), 'r') as f: + image_paths[s] = [l.strip() for l in f.readlines()] +with open(path.join(DATA_DIR, f"seg.txt"), 'r') as f: + seg_paths = [l.strip() for l in f.readlines()] +num_images = len(seg_paths) + +images = [] +images_list = [] +segmentations = [] + +# Read and preprocess each of the paths for each series, and the segmentations. + # print('images number',[TEST_INDEX[img_idx]]) +img_s = {f'{s}': sitk.ReadImage(image_paths[s][img_idx], sitk.sitkFloat32) for s in SERIES} +seg_s = sitk.ReadImage(seg_paths[img_idx], sitk.sitkFloat32) +img_n, seg_n = preprocess(img_s, seg_s, + shape=IMAGE_SHAPE, spacing=TARGET_SPACING) +for seq in img_n: + images.append(img_n[f'{seq}']) +images_list.append(images) +images = [] +segmentations.append(seg_n) + +images_list = np.transpose(images_list, (0, 2, 3, 4, 1)) + +print('>>>>> size image_list nmr 2:', np.shape(images_list), '. equal to: (5, 192, 192, 24, 3)?') + +########### load module ################## +print(' >>>>>>> LOAD MODEL <<<<<<<<<') + +dependencies = { + 'dice_coef': dice_coef, + 'weighted_cross_entropy_fn':weighted_binary_cross_entropy +} +reconstructed_model = load_model(MODEL_PATH, custom_objects=dependencies) +# reconstructed_model.summary(line_length=120) + +# make predictions on all TEST_INDEX +print(' >>>>>>> START prediction <<<<<<<<<') +predictions_blur = reconstructed_model.predict(images_list, batch_size=1) + + +############# preprocess ################# +# preprocess predictions by removing the blur and making individual blobs +print('>>>>>>>> START preprocess') + +def move_dims(arr): + # UMCG numpy dimensions convention: dims = (batch, width, heigth, depth) + # Joeran numpy dimensions convention: dims = (batch, depth, heigth, width) + arr = np.moveaxis(arr, 3, 1) + arr = np.moveaxis(arr, 3, 2) + return arr + +# Joeran has his numpy arrays ordered differently. + +predictions_blur = move_dims(np.squeeze(predictions_blur,axis=4)) +segmentations = move_dims(segmentations) +# predictions = [preprocess_softmax(pred, threshold="dynamic")[0] for pred in predictions_blur] +predictions = predictions_blur +print("the size of predictions is:",np.shape(predictions)) +# Remove outer edges +zeros = np.zeros(np.shape(predictions)) +test = predictions[:,2:-2,2:190,2:190] +zeros[:,2:-2,2:190,2:190] = test +predictions = zeros +print(np.shape(predictions)) +######### Build Saliency heatmap ############## +print(' >>>>>>> Build saliency map <<<<<<<<<') + +ig = IntegratedGradients(reconstructed_model) +saliency_map = [] +for img_idx in range(len(images_list)): + # input_img = np.resize(images_list[img_idx],(1,192,192,24,len(SERIES))) + saliency_map.append(ig.get_mask(images_list).numpy()) + +print("size saliency map",np.shape(saliency_map)) +np.save(f'{YAML_DIR}/saliency_new23',saliency_map) +np.save(f'{YAML_DIR}/images_list_new23',images_list) +np.save(f'{YAML_DIR}/segmentations_new23',segmentations) +np.save(f'{YAML_DIR}/predictions_new23',predictions) + + diff --git a/scripts/15.plot_paper_graphs.py b/scripts/15.plot_paper_graphs.py new file mode 100755 index 0000000..e59a1ef --- /dev/null +++ b/scripts/15.plot_paper_graphs.py @@ -0,0 +1,34 @@ +from umcglib.froc import calculate_froc, plot_froc, plot_multiple_roc, partial_auc, plot_multiple_froc +from sfransen.utils_quintin import * +import numpy as np + +experiment_path1 = './train_output/calc_exp_t2_b1400calc_adccalc_4/umcglib_stats_overlap_0.01.yml' +experiment_path2 = './train_output/calc_exp_t2_b1400calc2_adccalc2_4/umcglib_stats_overlap_0.01.yml' +experiment_path3 = './train_output/calc_exp_t2_b1400calc3_adccalc3_4/umcglib_stats_overlap_0.01.yml' + +## !!!!!!!!!!!!!!!!!! +stats2 = read_yaml_to_dict(experiment_path1) +stats1 = read_yaml_to_dict(experiment_path2) +stats3 = read_yaml_to_dict(experiment_path3) + +plot_multiple_froc( + sensitivities=[np.array(stats1['sensitivity']),np.array(stats2['sensitivity']),np.array(stats3['sensitivity'])], + fp_per_patient=[np.array(stats1["fp_per_patient"]),np.array(stats2["fp_per_patient"]),np.array(stats3["fp_per_patient"])], + ci_low=[np.array(stats1['sens_95_boot_ci_low']),np.array(stats2['sens_95_boot_ci_low']),np.array(stats3['sens_95_boot_ci_low'])], + ci_high=[np.array(stats1["sens_95_boot_ci_high"]),np.array(stats2["sens_95_boot_ci_high"]),np.array(stats3["sens_95_boot_ci_high"])], + model_names=["b50-400","b50-800","b50-400-800"], + title="fROC plot", + height=12, width=15, + xlims=(0.1, 5.0), + save_as="fROC_overlap_0.01.png") + +plot_multiple_roc( + tpr=[np.array(stats1['roc_tpr']),np.array(stats2['roc_tpr']),np.array(stats3['roc_tpr'])], + fpr=[np.array(stats1["roc_fpr"]),np.array(stats2["roc_fpr"]),np.array(stats3["roc_fpr"])], + ci_low=[np.array(stats1['sens_95_boot_ci_low_roc']),np.array(stats2['sens_95_boot_ci_low_roc']),np.array(stats3['sens_95_boot_ci_low_roc'])], + ci_high=[np.array(stats1["sens_95_boot_ci_high_roc"]),np.array(stats2["sens_95_boot_ci_high_roc"]),np.array(stats3["sens_95_boot_ci_high_roc"])], + model_names=["b50_400","b50_800","b50-400-800"], + title="ROC plot", + height=12, width=15, + save_as="ROC_overlap_0.01.png") + diff --git a/scripts/16.plot_paper_saliency.py b/scripts/16.plot_paper_saliency.py new file mode 100755 index 0000000..1fb6648 --- /dev/null +++ b/scripts/16.plot_paper_saliency.py @@ -0,0 +1,89 @@ +import argparse +from ast import Slice +from email.mime import image +import numpy as np +import matplotlib.pyplot as plt +import SimpleITK as sitk + +parser = argparse.ArgumentParser( + description='Calculate the froc metrics and store in froc_metrics.yml') +parser.add_argument('-experiment', + help='Title of experiment') +parser.add_argument('--series', '-s', + metavar='[series_name]', required=True, nargs='+', + help='List of series to include') +args = parser.parse_args() + +########## constants ################# +SERIES = args.series +series_ = '_'.join(args.series) +EXPERIMENT = args.experiment +fold = 0 + +experiments = ['calc_exp_t2_b1400calc3_adccalc3_0','calc_exp_t2_b1400calc2_adccalc2_0','calc_exp_t2_b1400calc_adccalc_0'] +fig, axes = plt.subplots(3,len(SERIES)+1) + +for idx,experiment in enumerate(experiments): + print(idx) + SALIENCY_DIR = f'./../train_output/{experiment}/saliency_new.npy' #_new23 + IMAGES_DIR = f'./../train_output/{experiment}/images_list_new.npy' #_new23 + SEGMENTATION_DIR = f'./../train_output/{experiment}/segmentations_new.npy' #_new23 + predictions_DIR = f'./../train_output/{experiment}/predictions_new.npy' #_new23 + SLIDE = 10 #pat 371 + # SLIDE = 7 #pat 23 + + ########## load saliency map ############ + heatmap = np.load(SALIENCY_DIR) + heatmap = np.squeeze(heatmap) + + ######### load images and segmentations ########### + images_list = np.load(IMAGES_DIR) + images_list = np.squeeze(images_list) + segmentations = np.squeeze(np.load(SEGMENTATION_DIR)) + predictions = np.squeeze(np.load(predictions_DIR)) + + + ######## take average ########## + # len(heatmap) is smaller then maximum number of images + # if len(heatmap) < 100: + # heatmap = np.mean(abs(heatmap),axis=0) + heatmap = abs(heatmap) + + print(np.shape(predictions)) + print(np.shape(segmentations)) + print(np.shape(images_list)) + + max_value = np.amax(heatmap[:,:,SLIDE,:]) + min_value = np.amin(heatmap[:,:,SLIDE,:]) + + TITLES = ['$T2_{tra}$','$DWI_{b1400}$','ADC','Prediction'] + titles = ['All b-values','Omitting b800','Omitting b400'] + for indx in range(len(SERIES)+1): + print(indx) + if indx is len(SERIES): + im = axes[idx,indx].imshow(predictions[SLIDE,:,:],cmap='gray') + print(np.amax(predictions[SLIDE,:,:])) + seg = segmentations[SLIDE,:,:] + axes[idx,indx].imshow(np.ma.masked_where(seg < 0.10, seg),alpha=0.5, vmin=np.amin(seg), vmax=np.amax(seg), cmap='bwr') + if idx is 0: + axes[idx,indx].set_title(TITLES[indx]) + axes[idx,indx].set_axis_off() + axes[idx,indx].set_axis_off() + else: + heatmap_i = np.transpose(np.squeeze(heatmap[:,:,SLIDE,indx])) + im = axes[idx,indx].imshow(np.transpose(images_list[:,:,SLIDE,indx]),cmap='gray') + axes[idx,indx].imshow(np.ma.masked_where(heatmap_i < 0.10, heatmap_i),vmin=min_value, vmax=max_value*0.5, alpha=0.25, cmap='jet') + if idx is 0: + axes[idx,indx].set_title(TITLES[indx]) + # axes[idx,indx].set_axis_off() + axes[idx,indx].set_yticks([]) + axes[idx,indx].set_xticks([]) + if indx is 0: + axes[idx,indx].set_ylabel(titles[idx]) + + + # cbar = fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.5, orientation='horizontal') + # cbar.set_ticks([min_value,max_value]) +# cbar.set_ticklabels(['less important', 'important']) +# fig.suptitle('Saliency map', fontsize=16) +plt.savefig(f'./../train_output/saliency_map_paper_pat23.png', dpi=300) \ No newline at end of file diff --git a/scripts/17.plot_paper_adc_diff.py b/scripts/17.plot_paper_adc_diff.py new file mode 100755 index 0000000..17624fa --- /dev/null +++ b/scripts/17.plot_paper_adc_diff.py @@ -0,0 +1,92 @@ +import os +from os import path +import SimpleITK as sitk +from sfransen.DWI_exp import preprocess +import numpy as np +import matplotlib.pyplot as plt + +SERIES = ['adccalc2','adccalc','adccalc3'] +TARGET_SPACING = (0.5, 0.5, 3) +INPUT_SHAPE = (192, 192, 24, len(SERIES)) +IMAGE_SHAPE = INPUT_SHAPE[:3] + +DATA_DIR = "./../data/Nijmegen paths/" +########## test with old method ############# +image_paths = {} +for s in SERIES: + with open(path.join(DATA_DIR, f"{s}.txt"), 'r') as f: + image_paths[s] = [l.strip() for l in f.readlines()] +with open(path.join(DATA_DIR, f"seg.txt"), 'r') as f: + seg_paths = [l.strip() for l in f.readlines()] +num_images = len(seg_paths) + +img_idx = 371 + +images = [] +images_list = [] +segmentations = [] + +# Read and preprocess each of the paths for each series, and the segmentations. + # print('images number',[TEST_INDEX[img_idx]]) +img_s = {f'{s}': sitk.ReadImage(image_paths[s][img_idx], sitk.sitkFloat32) for s in SERIES} +seg_s = sitk.ReadImage(seg_paths[img_idx], sitk.sitkFloat32) +img_n, seg_n = preprocess(img_s, seg_s, + shape=IMAGE_SHAPE, spacing=TARGET_SPACING) +for seq in img_n: + images.append(img_n[f'{seq}']) +images_list.append(images) +images = [] +segmentations.append(seg_n) + +images_list = np.squeeze(np.transpose(images_list, (0, 2, 3, 4, 1))) + +print(np.shape(images_list)) +SLIDE = 10 +diff1 = images_list[:,:,SLIDE,0]-images_list[:,:,SLIDE,1] +diff2 = images_list[:,:,SLIDE,0]-images_list[:,:,SLIDE,2] +diff3 = images_list[:,:,SLIDE,1]-images_list[:,:,SLIDE,2] + +fig, axes = plt.subplots(2,len(SERIES)) +fig.suptitle('ADC map differences', fontsize=16) + +maxx_2 = np.amax(images_list[:,:,SLIDE,:]) +minn_2 = np.amin(images_list[:,:,SLIDE,:]) + +TITLES = ['$1) ADC_{b50-b400}$','$2) ADC_{b50-b800}$','$3) ADC_{b50-b400-b800}$'] +for indx in range(len(SERIES)): + print(indx) + im = axes[0,indx].imshow(np.transpose(images_list[:,:,SLIDE,indx]),cmap='gray',vmin=minn_2,vmax=maxx_2) + axes[0,indx].set_title(TITLES[indx]) + axes[0,indx].set_axis_off() + axes[0,indx].set_axis_off() + +maxx_2 = np.amax(images_list[:,:,SLIDE,:]) +minn_2 = np.amin(images_list[:,:,SLIDE,:]) +# print('>>>>>>',minn_2) +cbar_ax = fig.add_axes([0.85, 0.57, 0.02, 0.25]) +cbar2 = fig.colorbar(im, cax=cbar_ax, ticks=[minn_2+0.001, maxx_2-0.001]) +cbar2.ax.set_yticklabels([f'0', f'{1.3}']) + + +maxx = np.amax([np.amax(diff1), np.amax(diff2), np.amax(diff3)]) *0.99 +minn = np.amin([np.amin(diff1), np.amin(diff2), np.amin(diff3)]) + +im = axes[1,0].imshow(np.transpose(diff1),cmap='gray',vmax = maxx,vmin=minn) +axes[1,0].set_axis_off() +axes[1,0].set_axis_off() +axes[1,0].set_title('Protocol 1-2') +im = axes[1,1].imshow(np.transpose(diff2),cmap='gray',vmax = maxx,vmin=minn) +axes[1,1].set_axis_off() +axes[1,1].set_axis_off() +axes[1,1].set_title('Protocol 1-3') + +im = axes[1,2].imshow(np.transpose(diff3),cmap='gray',vmax = maxx,vmin=minn) +axes[1,2].set_axis_off() +axes[1,2].set_axis_off() +axes[1,2].set_title('Protocol 2-3') + +fig.subplots_adjust(right=0.8) +cbar_ax = fig.add_axes([0.85, 0.15, 0.02, 0.25]) +cbar = fig.colorbar(im, cax=cbar_ax, ticks=[minn, 0, maxx]) +cbar.ax.set_yticklabels([f'{round(minn,1)}','0', f'{round(maxx,1)}']) +plt.savefig(f'./../train_output/adc_diff.png', dpi=300) \ No newline at end of file diff --git a/scripts/18.build_db.py b/scripts/18.build_db.py new file mode 100755 index 0000000..4aeb7e3 --- /dev/null +++ b/scripts/18.build_db.py @@ -0,0 +1,254 @@ +import os +from matplotlib.pyplot import table +import glob +import SimpleITK as sitk +import pandas as pd +from sqlite3 import Error +import sqlite3 + +################################ README ###################################### +# TO DO - +# key headers weghalen +# op basis van exclusie +# lokaal +# ssd -> lokaal draaien +# apply parrallel (listen) + +def print_p(text, end="\n"): + """ Print function for on Peregrine. It needs a flush before printing. """ + print(text, flush=True, end=end) + +def create_connection(db_file): + """ create a database connection to the SQLite database + specified by the db_file + :param db_file: database file + :return: Connection object or None + """ + conn = None + try: + conn = sqlite3.connect(db_file) + except Error as e: + print_p(e) + + return conn + +KEY_HEADERS = [ + "0010|0020", + "0018|0089", + "0018|0093", + "0008|103e", + "0028|0010", + "0028|0011", + "0018|9087", + "0018|0024" # Sequence Name + ] + + +INCLUDE_TAGS = [ # Attributes + "0008|0005", # Specific Character Set + "0008|0008", # Image Type + "0008|0012", # Instance Creation Date + "0008|0013", # Instance Creation Time + "0008|0016", # SOP Class UID + "0008|0018", # SOP Instance UID + "0008|0020", # Study Date + "0008|0021", # Series Date + "0008|0022", # Acquisition Date + "0008|0023", # Content Date + "0008|0030", # Study Time + "0008|0031", # Series Time + "0008|0032", # Acquisition Time + "0008|0033", # Content Time + "0008|0050", # Accession Number + "0008|0060", # Modality + "0008|0070", # Manufacturer + "0008|1010", # Station Name + "0008|1030", # Study Description + "0008|103e", # Series Description + "0008|1040", # Institutional Department Name + "0008|1090", # Manufacturer's Model Name + "0010|0020", # Patient ID + "0010|0030", # Patient's Birth Date + "0010|0040", # Patient's Sex + "0010|1010", # Patient's Age + "0010|1020", # Patient's Size + "0010|1030", # Patient's Weight + "0010|21b0", # Additional Patient History + "0012|0062", # Patient Identity Removed + "0012|0063", # De-identification Method + "0018|0015", # Body Part Examined + "0018|0020", # Scanning Sequence + "0018|0021", # Sequence Variant + "0018|0022", # Scan Options + "0018|0023", # MR Acquisition Type + "0018|0050", # Slice Thickness + "0018|0080", # Repetition Time + "0018|0081", # Echo Time + "0018|0083", # Number of Averages + "0018|0084", # Imaging Frequency + "0018|0085", # Imaged Nucleus + "0018|0087", # Magnetic Field Strength + "0018|0088", # Spacing Between Slices + "0018|0089", # Number of Phase Encoding Steps IMPORTANT + "0018|0091", # Echo Train Length + "0018|0093", # Percent Sampling IMPORTANT + "0018|0094", # Percent Phase Field of View + "0018|1000", # Device Serial Number + "0018|1030", # Protocol Name IMPORTANT -> sequence type + "0018|1310", # Acquisition Matrix IMPORTANT + "0018|1312", # In-plane Phase Encoding Direction + "0018|1314", # Flip Angle + "0018|1315", # Variable Flip Angle Flag + "0018|5100", # Patient Position + "0018|9087", # Diffusion b-value IMPORTANT + "0020|000d", # Study Instance UID + "0020|000e", # Series Instance UID + "0020|0010", # Study ID + "0020|0032", # Image Position (Patient) + "0020|0037", # Image Orientation (Patient) + "0020|0052", # Frame of Reference UID + "0020|1041", # Slice Location + "0028|0002", # Samples per Pixel + "0028|0010", # Rows IMPORTANT + "0028|0011", # Columns IMPORTANT + "0028|0030", # Pixel Spacing + "0028|0100", # Bits Allocated + "0028|0101", # Bits Stored + "0028|0106", # Smallest Image Pixel Value + "0028|0107", # Largest Image Pixel Value + "0028|1050", # Window Center + "0028|1051", # Window Width + "0040|0244", # Performed Procedure Step Start Date + "0040|0254" # Performed Procedure Step Description + ] + + +################################################################################ + + +def get_dict_from_dicom(reader, verbose=False): + headers = {} + for header_name in INCLUDE_TAGS: + headers[header_name] = None + + for k in reader.GetMetaDataKeys(): + if k in INCLUDE_TAGS: + v = reader.GetMetaData(k) + headers[k] = f"{v}" + if verbose: + print_p(f"({k}) = \"{v}\"") + headers["path"] = "" + return headers + + +def has_different_key_headers(current_header_dict: dict, prev_header_dict): + """ This function returns False if one of the key headers is different in + both dictionaries supplied as arguments. + + Parameters: + `current_header_dict (dict)`: dict from dicom (Headers from DICOM) + `prev_header_dict (dict)`: dict from dicom (Headers from DICOM) + returns (bool): True if important headers are different, else False + """ + for header in KEY_HEADERS: + try: + if current_header_dict[header] != prev_header_dict.get(header, None): + return True + except: + continue + return False + + +def is_patient_in_database(conn, tablename, patient): + # Get all results from patient from database + cur = conn.cursor() + query = f"SELECT [0010|0020] FROM {tablename} WHERE [0010|0020] like '%{patient}%';" + result = cur.execute(query).fetchall() #list of tuples + if len(result) == 0: + return False + return True + + +def fill_dicom_table_RUMC_UMCG( + tablename: str, + database: str, + patients_dir_RUMC: str, + devmode = False): + """ Fills the given table with headers/tags from DICOM files from UMCG and + RUMC. The tags are cross referenced with an include list of tags. + + Parameters: + `tablename (string)`: table in sqlite that will be inserted into + `database (string)`: relative project path to .db (database) file for sqlite. + `patients_dir_RUMC (string)`: path where patient directories are stored (RUMC) + `patients_dir_UMCG (string)`: path where patient directories are stored (UMCG) + """ + + # Connect with database + db_path = f"{os.getcwd()}{database}" + conn = create_connection(db_path) + print_p(f"connection made: {db_path}") + + # patients = os.listdir(patients_dir_UMCG) + os.listdir(patients_dir_RUMC) + patients = os.listdir(patients_dir_RUMC) + prev_headers = {} + + with conn: + # Drop all rows from table if it exists. + if False: + conn.execute(f"DELETE FROM {tablename};") + print_p("done deleting all records from database") + + # loop over all patients. (RUMC and UMCG) + for p_idx, patient in enumerate(patients): + + print_p(f"\nPatient {p_idx}: {patient}") + + if is_patient_in_database(conn, tablename, patient): + print_p(f"PATIENT IS ALREADY IN DATABASE {tablename}") + continue + + print_p(f"patient: {patient} is not in database") + # Find all DICOM files + glob_pattern = f"data/raw/*/{patient}/**/*.dcm" + dicoms_paths = glob.glob(glob_pattern, recursive=True) + rows = [] + + # Loop over DICOM files + for f_idx, dcm_path in enumerate(dicoms_paths): + if f_idx > 10 and devmode: + continue + print_p(f"f{f_idx}", end=' ') + + try: + reader = sitk.ImageFileReader() + reader.SetFileName(dcm_path) + reader.LoadPrivateTagsOn() + reader.ReadImageInformation() + + except: + print_p(f"Read Image Information EXCEPTION... Skipping: {dcm_path}") + continue + + curr_headers = get_dict_from_dicom(reader, verbose=False) + curr_headers['path'] = dcm_path + if not has_different_key_headers(curr_headers, prev_headers): + continue + prev_headers = curr_headers + rows.append(curr_headers) + + df = pd.DataFrame.from_dict(rows, orient='columns') + print_p(f"\nwriting headers to sqlite database. {tablename} - num rows: {len(rows)}") + df.to_sql(name=tablename, con=conn, if_exists='append') + + print_p(f"\n--- DONE writing data to {tablename}---") + + +################################################################################ +print_p("start script") +fill_dicom_table_RUMC_UMCG( + tablename = "dicom_headers_v2", + database = r"dicoms_rumc.db", + patients_dir_RUMC = r"data/raw/RUMC", + patients_dir_UMCG = r"data/raw/UMCG", + devmode=False) \ No newline at end of file diff --git a/scripts/19.clinical_variables.py b/scripts/19.clinical_variables.py new file mode 100755 index 0000000..0850b02 --- /dev/null +++ b/scripts/19.clinical_variables.py @@ -0,0 +1,134 @@ +from multiprocessing.sharedctypes import Value +import xml.etree.ElementTree as ET +import os +import pathlib +from umcglib.utils import apply_parallel +import numpy as np +from sfransen.utils_quintin import * + + +def parse_marklist(marklistpath): + tree = ET.parse(marklistpath) + root = tree.getroot() + lesions = {} + patient_element = (list(root.iter("markpatient")) + [None])[0] + lesions['PSA'] = patient_element.find("PSA").text if (patient_element is not None and patient_element.find("PSA") is not None) else 0 + number_of_lesions = [] + current_max_PIRADS = 0 + for mark in root.iter("mark"): + PIRADS = mark.find("PIRADS").text if mark.find("PIRADS") is not None else 0 + if int(PIRADS) > 0: + number_of_lesions.append(PIRADS) + if int(PIRADS) > int(current_max_PIRADS): + current_max_PIRADS = PIRADS + + # lesions_ = 1 if mark.find("PIRADS") is not None else 0 + # number_of_lesions += number_of_lesions + lesions_ + # if current_max_PIRADS == 0: + # if len(number_of_lesions) > 0: + # print(f'no PIRADS, wel lesie {number_of_lesions}') + + lesions['PIRADS'] = current_max_PIRADS + lesions['number_of_lesions'] = len(number_of_lesions) + return lesions + +def parse_age(path): + tree = ET.parse(path) + root = tree.getroot() + age = root[6].text + return age[1:-1] + +def mean_above_zero(df): + df = df[df != 0] + print(df) + return np.mean(df) + +def std_above_zero(df): + df = df[df != 0] + return np.std(df) + +def median_above_zero(df): + df = df[df != 0] + return np.median(df) + +def interq(df): + df = df[df != 0] + q3, q1 = np.percentile(df, [75 ,25]) + iqr = q3 - q1 + return iqr + +# Get pat from X:/sfransen/data/Nijmegen paths/seg +# /data/pca-rad/datasets/radboud_lesions_2022/pat0617-2016.nii.gz +with open("./../data/Nijmegen paths/b50.txt", 'r') as f: + b50_paths = [l.strip() for l in f.readlines()] + +marklistpaths = [] +info_ages = [] +pat_ids = [] +for b50_path in b50_paths: + path_part = pathlib.Path(b50_path).parts + pat_id = path_part[5] + # print(pat_id) + marklistpath = os.path.join(path_part[0],path_part[1],path_part[2],path_part[3],path_part[4],path_part[5],path_part[6],'markdatasetlist.xml') + info_age = os.path.join(path_part[0],path_part[1],path_part[2],path_part[3],path_part[4],path_part[5],path_part[6],'t2_tse_sag','info.xml') + marklistpaths.append(marklistpath) + info_ages.append(info_age) + pat_ids.append(pat_id) + +PSA_PIRADS = apply_parallel( + list(marklistpaths), + function = parse_marklist) + +AGE = apply_parallel( + list(info_ages), + function = parse_age, +) +PSA_PIRADS = np.stack(PSA_PIRADS) + +PSA = [(int(x['PSA']) if x['PSA'] is not None else 0) for x in PSA_PIRADS] +PIRADS = np.array([int(x['PIRADS']) for x in PSA_PIRADS]) +# number_of_lesions = [x['number_of_lesions'] for x in PSA_PIRADS] +# number_of_lesions = np.array([int(item) for sublist in number_of_lesions for item in sublist] ) +number_of_lesions = np.array([int(x['number_of_lesions']) for x in PSA_PIRADS]) + +AGE = np.array([(int(x) if x is not None else 0) for x in AGE]) +patients = len(pat_ids) +patients_age_median = median_above_zero(AGE) +patients_age_iqr = interq(AGE) +patients_psa_median = median_above_zero(np.array(PSA)) +patients_psa_iqr = interq(np.array(PSA)) + +csPCA_patients = np.sum(PIRADS>3) +PSA_csPCA_patients_median = median_above_zero(np.multiply(PIRADS>3,PSA)) +PSA_csPCA_patients_iqr = interq(np.multiply(PIRADS>3,PSA)) +AGE_csPCA_patients_median = median_above_zero(np.multiply(PIRADS>3,AGE)) +AGE_csPCA_patients_iqr = interq(np.multiply(PIRADS>3,AGE)) + +healthy_patients = patients - csPCA_patients +AGE_healthy_patients_median = median_above_zero(np.multiply(PIRADS<4,AGE)) +AGE_healthy_patients_iqr = interq(np.multiply(PIRADS<4,AGE)) +PSA_healthy_patients_median = median_above_zero(np.multiply(PIRADS<4,PSA)) +PSA_healthy_patients_iqr = interq(np.multiply(PIRADS<4,PSA)) + +PIRADS_0 = np.sum(PIRADS==0) +PIRADS_1 = np.sum(PIRADS==1) +PIRADS_2 = np.sum(PIRADS==2) +PIRADS_3 = np.sum(PIRADS==3) +PIRADS_4 = np.sum(PIRADS==4) +PIRADS_5 = np.sum(PIRADS==5) + +LESIONS_0 = np.sum(number_of_lesions==0) +LESIONS_1 = np.sum(number_of_lesions==1) +LESIONS_2 = np.sum(number_of_lesions==2) +LESIONS_3 = np.sum(number_of_lesions==3) +LESIONS_4 = np.sum(number_of_lesions==4) +LESIONS_5 = np.sum(number_of_lesions==5) +LESIONS_6 = np.sum(number_of_lesions>5) + + +print(f"patients, total:{patients}, median AGE:{patients_age_median} iqr:{patients_age_iqr}, median PSA:{patients_psa_median} iqr:{patients_psa_iqr}") +print(f"healthy patients: total:{healthy_patients}, median AGE:{AGE_healthy_patients_median} iqr {AGE_healthy_patients_iqr}, median PSA:{PSA_healthy_patients_median} , iqr PSA:{PSA_healthy_patients_iqr}") +print(f"csPCA patients: total:{csPCA_patients}, median AGE:{AGE_csPCA_patients_median} iqr {AGE_csPCA_patients_iqr} , median PSA:{PSA_csPCA_patients_median} , iqr PSA:{PSA_csPCA_patients_iqr}") +print(f"Patient PIRADS count: Patients 0: {PIRADS_0}, Patients 1:{PIRADS_1}, Patient 2: {PIRADS_2}, Patients 3:{PIRADS_3} , Patients 4:{PIRADS_4} , Patients 5:{PIRADS_5} ") +print(f"Lesion count: Lesions 0: {LESIONS_0}, Lesions 1:{LESIONS_1}, Lesions 2: {LESIONS_2}, Lesions 3:{LESIONS_3} , Lesions 4:{LESIONS_4} , Lesions 5:{LESIONS_5}, Lesions >5:{LESIONS_6} ") + diff --git a/scripts/4.frocs.py b/scripts/4.frocs.py index ab31a37..15a0360 100755 --- a/scripts/4.frocs.py +++ b/scripts/4.frocs.py @@ -18,7 +18,10 @@ from sfransen.DWI_exp.callbacks import dice_coef from sfransen.FROC.cal_froc_from_np import * from sfransen.load_images import load_images_parrallel from sfransen.DWI_exp.losses import weighted_binary_cross_entropy - +from umcglib.froc import calculate_froc, plot_froc, plot_multiple_froc, partial_auc +from umcglib.binarize import dynamic_threshold +from umcglib.utils import set_gpu +set_gpu(gpu_idx=1) parser = argparse.ArgumentParser( description='Calculate the froc metrics and store in froc_metrics.yml') @@ -119,7 +122,7 @@ images_list = [] segmentations = [] # Read and preprocess each of the paths for each series, and the segmentations. -for img_idx in tqdm(range(len(TEST_INDEX))): #[:40]): #for less images +for img_idx in tqdm(range(len(TEST_INDEX))): #for less images # print('images number',[TEST_INDEX[img_idx]]) img_s = {f'{s}': sitk.ReadImage(image_paths[s][TEST_INDEX[img_idx]], sitk.sitkFloat32) for s in SERIES} seg_s = sitk.ReadImage(seg_paths[TEST_INDEX[img_idx]], sitk.sitkFloat32) @@ -133,7 +136,7 @@ for img_idx in tqdm(range(len(TEST_INDEX))): #[:40]): #for less images images_list = np.transpose(images_list, (0, 2, 3, 4, 1)) -print('>>>>> size image_list nmr 2:', np.shape(images_list), '. equal to: (5, 192, 192, 24, 3)?') +# print('>>>>>DEBUG size image_list nmr 2:', np.shape(images_list), '. equal to: (5, 192, 192, 24, 3)?') ########### load module ################## print(' >>>>>>> LOAD MODEL <<<<<<<<<') @@ -171,11 +174,10 @@ test = np.squeeze(predictions)[:,2:-2,2:190,2:190] zeros[:,2:-2,2:190,2:190] = test predictions = zeros -# perform Froc +# perform Froc method joeran metrics = evaluate(y_true=segmentations, y_pred=predictions) dump_dict_to_yaml(metrics, YAML_DIR, "froc_metrics_focal_10_test", verbose=True) - ############## save image as example ################# # save image nmr 2 img_s = sitk.GetImageFromArray(predictions_blur[2].squeeze()) @@ -190,34 +192,6 @@ sitk.WriteImage(img_s, f"{IMAGE_DIR}/segmentations_002_old.nii.gz") img_s = sitk.GetImageFromArray(np.transpose(images_list[2,:,:,:,0].squeeze())) sitk.WriteImage(img_s, f"{IMAGE_DIR}/t2_002_old.nii.gz") -# save image nmr 3 -img_s = sitk.GetImageFromArray(predictions_blur[3].squeeze()) -sitk.WriteImage(img_s, f"{IMAGE_DIR}/predictions_blur_003_old.nii.gz") -img_s = sitk.GetImageFromArray(predictions[3].squeeze()) -sitk.WriteImage(img_s, f"{IMAGE_DIR}/predictions_003_old.nii.gz") -img_s = sitk.GetImageFromArray(segmentations[3].squeeze()) -sitk.WriteImage(img_s, f"{IMAGE_DIR}/segmentations_003_old.nii.gz") -img_s = sitk.GetImageFromArray(np.transpose(images_list[3,:,:,:,0].squeeze())) -sitk.WriteImage(img_s, f"{IMAGE_DIR}/t2_003_old.nii.gz") - -img_s = sitk.GetImageFromArray(np.transpose(images_list[3,:,:,:,1].squeeze())) -sitk.WriteImage(img_s, f"{IMAGE_DIR}/highb_003_old.nii.gz") - -# save image nmr 3 -img_s = sitk.GetImageFromArray(predictions_blur[4].squeeze()) -sitk.WriteImage(img_s, f"{IMAGE_DIR}/predictions_blur_004_old.nii.gz") - -img_s = sitk.GetImageFromArray(predictions[4].squeeze()) -sitk.WriteImage(img_s, f"{IMAGE_DIR}/predictions_004_old.nii.gz") - -img_s = sitk.GetImageFromArray(segmentations[4].squeeze()) -sitk.WriteImage(img_s, f"{IMAGE_DIR}/segmentations_004_old.nii.gz") - -img_s = sitk.GetImageFromArray(np.transpose(images_list[4,:,:,:,0].squeeze())) -sitk.WriteImage(img_s, f"{IMAGE_DIR}/t2_004_old.nii.gz") - -img_s = sitk.GetImageFromArray(np.transpose(images_list[4,:,:,:,1].squeeze())) -sitk.WriteImage(img_s, f"{IMAGE_DIR}/highb_004_old.nii.gz") \ No newline at end of file diff --git a/scripts/7.Visualize_saliency.py b/scripts/7.Visualize_saliency.py index d68214c..ba37d6c 100755 --- a/scripts/7.Visualize_saliency.py +++ b/scripts/7.Visualize_saliency.py @@ -1,4 +1,5 @@ import argparse +from email.mime import image import numpy as np import matplotlib.pyplot as plt import SimpleITK as sitk @@ -16,9 +17,10 @@ args = parser.parse_args() SERIES = args.series series_ = '_'.join(args.series) EXPERIMENT = args.experiment -SALIENCY_DIR = f'./../train_output/{EXPERIMENT}_{series_}/saliency.npy' -IMAGES_DIR = f'./../train_output/{EXPERIMENT}_{series_}/images_list.npy' -SEGMENTATION_DIR = f'./../train_output/{EXPERIMENT}_{series_}/segmentations.npy' +fold = 0 +SALIENCY_DIR = f'./../train_output/{EXPERIMENT}_{series_}_{fold}/saliency.npy' +IMAGES_DIR = f'./../train_output/{EXPERIMENT}_{series_}_{fold}/images_list.npy' +SEGMENTATION_DIR = f'./../train_output/{EXPERIMENT}_{series_}_{fold}/segmentations.npy' SLIDE = 10 ########## load saliency map ############ @@ -44,8 +46,9 @@ min_value = np.amin(heatmap[:,:,SLIDE,:]) for indx in range(len(SERIES)): print(indx) - axes[0,indx].imshow(np.transpose(images_list[:,:,SLIDE,indx]),cmap='gray') - im = axes[1,indx].imshow(np.transpose(np.squeeze(heatmap[:,:,SLIDE,indx])),vmin=min_value, vmax=max_value) + heatmap_i = np.transpose(np.squeeze(heatmap[:,:,SLIDE,indx])) + im = axes[0,indx].imshow(np.transpose(images_list[:,:,SLIDE,indx]),cmap='gray') + axes[0,indx].imshow(np.ma.masked_where(heatmap_i < 0.10, heatmap_i),vmin=min_value, vmax=max_value*0.5, alpha=0.25, cmap='jet') axes[0,indx].set_title(SERIES[indx]) axes[0,indx].set_axis_off() axes[1,indx].set_axis_off() @@ -54,4 +57,4 @@ cbar = fig.colorbar(im, ax=axes.ravel().tolist(), shrink=0.5, orientation='horiz cbar.set_ticks([min_value,max_value]) cbar.set_ticklabels(['less important', 'important']) fig.suptitle('Saliency map', fontsize=16) -plt.savefig(f'./../train_output/{EXPERIMENT}_{series_}/saliency_map.png', dpi=300) \ No newline at end of file +plt.savefig(f'./../train_output/{EXPERIMENT}_{series_}_{fold}/saliency_map.png', dpi=300) \ No newline at end of file diff --git a/scripts/calc_adc_b1400.py b/scripts/calc_adc_b1400.py index c7edc63..05d79a1 100755 --- a/scripts/calc_adc_b1400.py +++ b/scripts/calc_adc_b1400.py @@ -4,15 +4,25 @@ from tqdm import tqdm import numpy as np import SimpleITK as sitk -def calc_adc(b50,b400): - mean_dwi = (50 + 400) / 2 - mean_si = np.divide(np.add(np.log(b50), np.log(b400)), 2) +# def calc_adc(b50,b400): +# mean_dwi = (50 + 400) / 2 +# mean_si = np.divide(np.add(np.log(b50), np.log(b400)), 2) - denominator = np.multiply((50 - mean_dwi), np.subtract(np.log(b50), mean_si)) + np.multiply((400 - mean_dwi), np.subtract(np.log(b400), mean_si)) - numerator = np.power((50 - mean_dwi), 2) + np.power((400 - mean_dwi), 2) +# denominator = np.multiply((50 - mean_dwi), np.subtract(np.log(b50), mean_si)) + np.multiply((400 - mean_dwi), np.subtract(np.log(b400), mean_si)) +# numerator = np.power((50 - mean_dwi), 2) + np.power((400 - mean_dwi), 2) +# adc_with_zeros = np.divide(denominator, numerator) * -1000000 +# adc = np.clip(adc_with_zeros,0,np.amax(adc_with_zeros)) +# return adc + +def calc_adc(b50, b400, b800): + "Calculates the adc based on b50, b400 and b800 DWI images/arrays." + mean_dwi = (50 + 400 + 800) / 3 + mean_si = np.divide(np.add(np.add(np.log(b50), np.log(b400)), np.log(b800)), 3) + denominator = np.multiply((50 - mean_dwi), np.subtract(np.log(b50), mean_si)) + np.multiply((400 - mean_dwi), np.subtract(np.log(b400), mean_si)) + np.multiply((800 - mean_dwi), np.subtract(np.log(b800), mean_si)) + numerator = np.power((50 - mean_dwi), 2) + np.power((400 - mean_dwi), 2) + np.power((800 - mean_dwi), 2) adc_with_zeros = np.divide(denominator, numerator) * -1000000 adc = np.clip(adc_with_zeros,0,np.amax(adc_with_zeros)) - return adc + return adc def calc_high_b(b_value_high,b_value,b_image,ADC_map): high_b = np.multiply(b_image, np.exp(np.multiply(np.subtract(b_value,b_value_high), (np.divide(ADC_map,1000000))))) @@ -33,7 +43,7 @@ def append_new_line(file_name, text_to_append): -series = ['b50','b400'] +series = ['b50','b400','b800'] # series = ['t2'] DATA_DIR = "./../data/Nijmegen paths/" @@ -55,14 +65,14 @@ for img_idx in tqdm(range(num_images)): arr_b50 = sitk.GetArrayFromImage(img_b50) img_b400 = sitk.ReadImage(image_paths['b400'][img_idx], sitk.sitkFloat32) arr_b400 = sitk.GetArrayFromImage(img_b400) - # img_b800 = sitk.ReadImage(image_paths['b800'][img_idx], sitk.sitkFloat32) - # arr_b800 = sitk.GetArrayFromImage(img_b800) + img_b800 = sitk.ReadImage(image_paths['b800'][img_idx], sitk.sitkFloat32) + arr_b800 = sitk.GetArrayFromImage(img_b800) # img_t2 = sitk.ReadImage(image_paths['t2'][img_idx], sitk.sitkFloat32) # img_t2 = sitk.GetArrayFromImage(img_t2) # img_seg = sitk.ReadImage(seg_paths[img_idx], sitk.sitkFloat32) # img_seg = sitk.GetArrayFromImage(img_seg) - adc = calc_adc(arr_b50,arr_b400) + adc = calc_adc(arr_b50,arr_b400,arr_b800) high_b = calc_high_b(1400,50,arr_b50,adc) adc = sitk.GetImageFromArray(adc) @@ -71,8 +81,8 @@ for img_idx in tqdm(range(num_images)): # seg = sitk.GetImageFromArray(img_seg) patient_ID = os.path.split(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(image_paths['b50'][img_idx])))))[1] - path_adc = f'./../data/adc_calc_2/{patient_ID}.nii.gz' - path_high_b = f'./../data/b1400_calc_2/{patient_ID}.nii.gz' + path_adc = f'./../data/adc_calc_3/{patient_ID}.nii.gz' + path_high_b = f'./../data/b1400_calc_3/{patient_ID}.nii.gz' # patient_ID = os.path.split(os.path.dirname(os.path.dirname(os.path.dirname(image_paths['t2'][img_idx]))))[1] # path_t2 = f'./../data/t2_calc/{patient_ID}.nii.gz' # path_seg = f'./../data/seg_calc/{patient_ID}.nii.gz' @@ -84,8 +94,8 @@ for img_idx in tqdm(range(num_images)): # sitk.WriteImage(t2, path_t2) # sitk.WriteImage(seg, path_seg) - append_new_line('adccalc2.txt',path_adc) - append_new_line('b1400calc2.txt',path_high_b) + append_new_line('adccalc3.txt',path_adc) + append_new_line('b1400calc3.txt',path_high_b) # append_new_line('t2calc.txt',path_t2) # append_new_line('segcalc.txt',path_seg) diff --git a/scripts/heatmap_result_1.png b/scripts/heatmap_result_1.png new file mode 100644 index 0000000..098ca10 Binary files /dev/null and b/scripts/heatmap_result_1.png differ diff --git a/scripts/miccai_rss_recon_undersampled.py b/scripts/miccai_rss_recon_undersampled.py index 7b16244..2e389c8 100755 --- a/scripts/miccai_rss_recon_undersampled.py +++ b/scripts/miccai_rss_recon_undersampled.py @@ -28,7 +28,7 @@ def img_from_ksp( image = image[:,:,::-1] return image.astype(np.float32) -for file_idx in range(187,300): +for file_idx in range(264,300): image_recon = [] kspace = [] save_file = [] diff --git a/scripts/test.py b/scripts/test.py index 344dd7f..0a8ac82 100755 --- a/scripts/test.py +++ b/scripts/test.py @@ -1,94 +1,126 @@ -from multiprocessing import set_start_method -import numpy as np -from umcglib.utils import apply_parallel,print_stats_np -from os import listdir +import os +from os import path +import SimpleITK as sitk +from sfransen.DWI_exp import preprocess +import numpy as np import matplotlib.pyplot as plt +from typing import Dict, Union +import SimpleITK as sitk +import numpy as np -# mypath = f'/data/pca-rad/datasets/miccai_2022/K2S_MICCAI2022_GRP/train/data/TBrecon1/train/untarred' -# patient_id = 'pat00123' -# seg = np.load(f'{mypath}/{patient_id}/{patient_id}_seg.npy').astype(int) -# print(np.shape(seg)) -# print_stats_np(seg,'seg_sdgsd') +import umcglib.images as im -def load_seg(file,mypath): - patient_id = file - print(patient_id) - seg = np.load(f'{mypath}/{patient_id}/{patient_id}_seg.npy').astype(int) - background = np.sum(seg == 0) - femoral_cartilage = np.sum(seg == 1) - tibial_cartilage = np.sum(seg == 2) - patellar_cartilage = np.sum(seg == 3) - femur = np.sum(seg == 4) - tibia = np.sum(seg == 5) - patella = np.sum(seg == 6) - output = [background,femoral_cartilage,tibial_cartilage,patellar_cartilage,femur,tibia,patella] - # print('background',background) - return output +def calculate_adc(b_dict: Dict[int, Union[sitk.Image, np.ndarray]]): + "Calculates the adc based on b50, b400 and b800 DWI images." + b_values = [int(b) for b in b_dict] -def calculate_hist(segs): - background = np.sum(segs == 0) - femoral_cartilage = np.sum(segs == 1) - tibial_cartilage = np.sum(segs == 2) - patellar_cartilage = np.sum(segs == 3) - femur = np.sum(segs == 4) - tibia = np.sum(segs == 5) - patella = np.sum(segs == 6) - output = [background,femoral_cartilage,tibial_cartilage,patellar_cartilage,femur,tibia,patella] - # print('background',background) - return output + # Determine the type of inputs (Image or ndarray) + b_images = list(b_dict.values()) + sample = b_images[0] -if __name__ == '__main__': - path = f'/data/pca-rad/datasets/miccai_2022/K2S_MICCAI2022_GRP/train/data/TBrecon1/train/untarred' - files = [f for f in listdir(path)] - print('files length',len(files)) - # For multiprocessing - set_start_method("spawn") - print('spawn done') - # segs_list = apply_parallel(files, load_seg, 4, mypath = path) - segs_list = apply_parallel(files, load_seg, 12, mypath = path) - print('done loading 50') - # segs_list_100 = apply_parallel(files[50:100], load_seg, 4, mypath = path) - # print('done loading 100') - # segs_list_150 = apply_parallel(files[100:150], load_seg, 4, mypath = path) - # print('done loading 150') - # segs_list_200 = apply_parallel(files[150:200], load_seg, 4, mypath = path) - # print('done loading 200') - # segs_list_250 = apply_parallel(files[200:250], load_seg, 4, mypath = path) - # print('done loading 250') - # segs_list_300 = apply_parallel(files[250:300], load_seg, 4, mypath = path) - print('done loading 300') - # segs_list.extend(segs_list_100) - # segs_list.extend(segs_list_150) - # segs_list.extend(segs_list_200) - # segs_list.extend(segs_list_250) - # segs_list.extend(segs_list_300) - print('segs_list is done') + if type(sample) == sitk.Image: + # Extract numpy arrays, and call function again with numpy input + b_dict_n = {b: sitk.GetArrayFromImage(b_dict[b]).T for b in b_dict} + adc_n = calculate_adc(b_dict_n) - output = np.stack(segs_list, axis=0) - # print('load done',np.shape(segs_n)) + # Convert calculated map back to sitk + return im.to_sitk(adc_n, ref_img=sample) - # output = apply_parallel(segs_list,calculate_hist,4) - print('hist calc done',np.shape(output)) - # print(np.stack(output)[:,0]) + mean_b_value = sum(b_values) / len(b_values) + mean_si = sum(b_images) / len(b_values) - dict = { - 'background': np.stack(output)[:,0], - 'femoral_cartilage': np.stack(output)[:,1], - 'tibial_cartilage': np.stack(output)[:,2], - 'patellar_cartilage': np.stack(output)[:,3], - 'femur': np.stack(output)[:,4], - 'tibia': np.stack(output)[:,5], - 'patella': np.stack(output)[:,6] - } + denominator = np.zeros_like(sample) + numerator = 0 + for b in b_dict: + b_img = b_dict[b] + denominator += (b - mean_b_value) * (np.log(b_img) - mean_si) + numerator += (b - mean_b_value) ** 2 - np.save('../dict.npy',dict) + adc_with_zeros = denominator / numerator * -1000000 + adc = np.clip(adc_with_zeros, a_min=0, a_max=None) + return adc - for keys in dict: - plt.figure() - data = dict[f'{keys}'] - plt.hist(data) - plt.title(f"historgram of {keys}") - plt.savefig(f"../{keys}.png", dpi=300) - print('done ',keys) - +img_b0 = '../../datasets/anonymized_mri/only_nii_directory/108-M-108/2018/Diffusie_b0-50-400-800-2000/b-0/701.nii.gz' +img_b50 = '../../datasets/anonymized_mri/only_nii_directory/108-M-108/2018/Diffusie_b0-50-400-800-2000/b-50/701.nii.gz' +img_b400 = '../../datasets/anonymized_mri/only_nii_directory/108-M-108/2018/Diffusie_b0-50-400-800-2000/b-400/701.nii.gz' +img_b800 = '../../datasets/anonymized_mri/only_nii_directory/108-M-108/2018/Diffusie_b0-50-400-800-2000/b-800/701.nii.gz' +img_b2000 = '../../datasets/anonymized_mri/only_nii_directory/108-M-108/2018/Diffusie_b0-50-400-800-2000/b-2000/701.nii.gz' +adc_or = '../../datasets/anonymized_mri/only_nii_directory/108-M-108/2018/dADC/702_.nii.gz' + +img_b0 = sitk.ReadImage(img_b0, sitk.sitkFloat32) +img_b0 = sitk.GetArrayFromImage(img_b0) + +img_b50 = sitk.ReadImage(img_b50, sitk.sitkFloat32) +img_b50 = sitk.GetArrayFromImage(img_b50) + +img_b400 = sitk.ReadImage(img_b400, sitk.sitkFloat32) +img_b400 = sitk.GetArrayFromImage(img_b400) + +img_b800 = sitk.ReadImage(img_b800, sitk.sitkFloat32) +img_b800 = sitk.GetArrayFromImage(img_b800) + +img_b2000 = sitk.ReadImage(img_b2000, sitk.sitkFloat32) +img_b2000 = sitk.GetArrayFromImage(img_b2000) + +ADC_original_img = sitk.ReadImage(adc_or, sitk.sitkFloat32) +ADC_original = sitk.GetArrayFromImage(ADC_original_img) + +print(np.shape(ADC_original)) +print(np.shape(img_b800)) + +ADC_b0_b800 = calculate_adc({0:img_b0,800:img_b800}) +ADC_b50_b800 = calculate_adc({50:img_b50,800:img_b800}) +ADC_b50_b400_b800 = calculate_adc({50:img_b50,400:img_b400,800:img_b800}) +ADC_b0_b50_b400_b800 = calculate_adc({0:img_b0,50:img_b50,400:img_b400,800:img_b800}) + +ADC_b50_2000 = calculate_adc({0:img_b0,800:img_b800,2000:img_b2000}) +ADC_b0_b800_2000 = calculate_adc({0:img_b0,800:img_b800,2000:img_b2000}) +ADC_b50_b800_2000 = calculate_adc({50:img_b50,800:img_b800,2000:img_b2000}) +ADC_b50_b400_b800_2000 = calculate_adc({50:img_b50,400:img_b400,800:img_b800,2000:img_b2000}) +ADC_b0_b50_b400_b800_2000 = calculate_adc({0:img_b0,50:img_b50,400:img_b400,800:img_b800,2000:img_b2000}) + + +ADC_b50_b400_b800_2000 = sitk.GetImageFromArray(ADC_b50_b400_b800_2000) +ADC_b50_b400_b800_2000.CopyInformation(ADC_original_img) +sitk.WriteImage(ADC_b50_b400_b800_2000, f"../train_output/ADC_b50_b400_b800_2000.nii.gz") + +SLIDE = 10 +diffs = [] +diffs.append(ADC_b0_b800[SLIDE,:,:]-ADC_original[SLIDE,:,:]) +diffs.append(ADC_b50_b800[SLIDE,:,:]-ADC_original[SLIDE,:,:]) +diffs.append(ADC_b50_b400_b800[SLIDE,:,:]-ADC_original[SLIDE,:,:]) +diffs.append(ADC_b0_b50_b400_b800[SLIDE,:,:]-ADC_original[SLIDE,:,:]) + +diffs_2000 = [] +diffs_2000.append(ADC_b0_b800_2000[SLIDE,:,:]-ADC_original[SLIDE,:,:]) +diffs_2000.append(ADC_b50_b800_2000[SLIDE,:,:]-ADC_original[SLIDE,:,:]) +diffs_2000.append(ADC_b50_b400_b800_2000[SLIDE,:,:]-ADC_original[SLIDE,:,:]) +diffs_2000.append(ADC_b0_b50_b400_b800_2000[SLIDE,:,:]-ADC_original[SLIDE,:,:]) + + +TITLES = ['ADC_b0_b800','ADC_b50_b800','ADC_b50_b400_b800','ADC_b0_b50_b400_b800'] +TITLES_2000 = ['ADC_b0_b800_2000','ADC_b50_b800_2000','ADC_b50_b400_b800_2000','ADC_b0_b50_b400_b800_2000'] + +fig, axes = plt.subplots(len(TITLES),2) +fig.suptitle('ADC map differences', fontsize=16) + +x = abs(np.asarray(np.stack(diffs_2000))) +x[np.isneginf(x)] = 0 +x[np.isposinf(x)] = 0 + +vmin = np.nanmin(x) +vmax = np.nanmax(x) +print('vmax',vmax,' ',type(np.asarray(np.stack(diffs)))) +for indx in range(len(TITLES)): + print(indx) + axes[indx,0].imshow(abs(np.transpose(diffs[indx])),cmap='gray',vmin =vmin , vmax=vmax) + axes[indx,0].set_title(f'{TITLES[indx]}') + axes[indx,0].set_axis_off() + axes[indx,0].set_axis_off() + axes[indx,1].imshow(abs(np.transpose(diffs_2000[indx])),cmap='gray',vmin =vmin , vmax=vmax) + axes[indx,1].set_title(f'{TITLES_2000[indx]}') + axes[indx,1].set_axis_off() + axes[indx,1].set_axis_off() + +plt.savefig(f'./../train_output/adc_diff_joeran.png', dpi=300) diff --git a/scripts/test2.py b/scripts/test2.py new file mode 100755 index 0000000..91cdfb8 --- /dev/null +++ b/scripts/test2.py @@ -0,0 +1,64 @@ +from glob import glob +from os.path import normpath, basename + + +def get_paths(main_dir): + all_niftis = glob(main_dir, recursive=True) + all_niftis = [a for a in all_niftis if "fd" not in a.lower()] + all_niftis = [a for a in all_niftis if "seg" not in a.lower()] + t2s = [i for i in all_niftis if "t2" in i.lower() and "tra" in i.lower()] + + adcs = [i for i in all_niftis if "adc" in i.lower() and ("manual" not in i.lower())] + if not adcs: + adcs = [i for i in all_niftis if "manual_adc" in i.lower()] + + dwis = [i for i in all_niftis if ("diff" in i.lower() or "dwi" in i.lower()) + and ("b-2000" in i.lower() or "b-1400" in i.lower() or "b-1500" in i.lower())] + if not dwis: + dwis = [i for i in all_niftis if "b-1400" in i.lower()] + + return t2s, adcs, dwis + +# check is pat is available in umcg lesions 2022 clinsig: pat_available +paths = glob('../../datasets/umcg_lesions_2022_clinsig/*.nii.gz') +pat_available = [] +for path in paths: + pat_id = basename(normpath(path))[:-7] + pat_available.append(pat_id) + +# read patients included in R script +pat_id_dir = '../pat_ids.txt' +pat_ids_cs = [] +with open(pat_id_dir, 'r') as f: + pat_ids_cs = [l.strip() for l in f.readlines()] + +results = {} +paths_seg = [] +paths_t2 = [] +paths_dwi = [] +paths_adc = [] +for pat_id_cs in pat_ids_cs: + t2s,adcs,dwis = get_paths(f"../../datasets/anonymized_mri/only_nii_directory/{pat_id_cs}/**/*.nii.gz") + + pat_id = pat_id_cs.replace("/","-") + if pat_id in pat_available: + results[pat_id] = "p" + paths_seg.append(f'/data/pca-rad/datasets/umcg_lesions_2022_clinsig/{pat_id}.nii.gz') + paths_t2.append(t2s) + paths_adc.append(adcs) + paths_dwi.append(dwis) + + else: + results[pat_id] = "missing" + +with open('t2_test.txt','w') as output: + for item in paths_t2: + output.write("%s\n" % ''.join(item)) + +with open('dwi_test.txt','w') as output: + for item in paths_dwi: + output.write("%s\n" % ''.join(item)) + +with open('adc_test.txt','w') as output: + for item in paths_adc: + output.write("%s\n" % ''.join(item)) \ No newline at end of file