175 lines
6.7 KiB
Python
Executable File
175 lines
6.7 KiB
Python
Executable File
import pandas
|
|
import numpy as np
|
|
import os
|
|
from sfransen.DWI_exp.helpers import *
|
|
from sfransen.utils_quintin import *
|
|
from os import path
|
|
import SimpleITK as sitk
|
|
import xml.etree.ElementTree as ET
|
|
import pathlib
|
|
|
|
def parse_marklist(marklistpath):
|
|
tree = ET.parse(marklistpath)
|
|
root = tree.getroot()
|
|
patient_element = (list(root.iter("markpatient")) + [None])[0]
|
|
PSA = patient_element.find("PSA").text if (patient_element is not None and patient_element.find("PSA") is not None) else 0
|
|
number_of_lesions = []
|
|
locations =[]
|
|
current_max_PIRADS = 0
|
|
for mark in root.iter("mark"):
|
|
PIRADS = mark.find("PIRADS").text if mark.find("PIRADS") is not None else 0
|
|
if int(PIRADS) > 0:
|
|
number_of_lesions.append(PIRADS)
|
|
if int(PIRADS) > int(current_max_PIRADS):
|
|
current_max_PIRADS = PIRADS
|
|
|
|
location = mark.find("Zones/Zone/Type")
|
|
if location is not None:
|
|
location = location.text
|
|
else:
|
|
location = ''
|
|
locations.append(location)
|
|
|
|
# lesions_ = 1 if mark.find("PIRADS") is not None else 0
|
|
# number_of_lesions += number_of_lesions + lesions_
|
|
# if current_max_PIRADS == 0:
|
|
# if len(number_of_lesions) > 0:
|
|
# print(f'no PIRADS, wel lesie {number_of_lesions}')
|
|
|
|
return PSA, current_max_PIRADS, number_of_lesions, locations
|
|
|
|
def parse_age(path):
|
|
tree = ET.parse(path)
|
|
root = tree.getroot()
|
|
age = root[6].text
|
|
return age[1:-1]
|
|
|
|
DATA_DIR = "./../data/Nijmegen paths/"
|
|
OUTPUT_DIR = "./random_images_2/"
|
|
|
|
with open(path.join(DATA_DIR, "t2.txt"), 'r') as f:
|
|
pat_ids = [l.split('/')[5] for l in f.readlines()]
|
|
with open(path.join(DATA_DIR, "t2.txt"), 'r') as f:
|
|
years = [l.split('/')[6] for l in f.readlines()]
|
|
|
|
PSA_list = []
|
|
current_max_PIRADS_list = []
|
|
number_of_lesions_list = []
|
|
age_list = []
|
|
pat_id_list = []
|
|
locations_list = []
|
|
for img in ['t2','adccalc2','adccalc3','b1400calc2','b1400calc3']:
|
|
with open(path.join(DATA_DIR, f"{img}.txt"), 'r') as f:
|
|
image_paths = [l.strip() for l in f.readlines()]
|
|
for idx in [23,161,543,734,367, 85, 380, 231, 406, 435, 660, 327, 305, 7, 479, 540, 558, 361, 167, 320, 666, 178, 700, 831, 707, 596, 715, 823, 561, 782]:
|
|
print(idx)
|
|
read_img = sitk.ReadImage(image_paths[idx],sitk.sitkFloat32)
|
|
marklistpath = f'../../datasets/radboud_new/{pat_ids[idx]}/{years[idx]}/markdatasetlist.xml'
|
|
info_age = f'../../datasets/radboud_new/{pat_ids[idx]}/{years[idx]}/t2_tse_tra/info.xml'
|
|
PSA, current_max_PIRADS, number_of_lesions, locations = parse_marklist(marklistpath)
|
|
age = parse_age(info_age)
|
|
pat_id_list.append(str(pat_ids[idx]))
|
|
if img == 't2':
|
|
PSA_list.append(PSA)
|
|
current_max_PIRADS_list.append(current_max_PIRADS)
|
|
number_of_lesions_list.append(list(number_of_lesions))
|
|
age_list.append(age)
|
|
locations_list.append(locations)
|
|
|
|
# if len(number_of_lesions) > 2:
|
|
# print(f'number_of_lesion {number_of_lesions}')
|
|
# input(f'current_max_PIRADS {current_max_PIRADS}')
|
|
# input(f'PSA {PSA}')
|
|
# input(f'age {age}')
|
|
# method 1: all bvalues, method 2: omitting b800
|
|
if img == 't2':
|
|
name = f'{pat_ids[idx]}-t2'
|
|
if img == 'adccalc2':
|
|
name = f'{pat_ids[idx]}-adc_method2'
|
|
if img == 'adccalc3':
|
|
name = f'{pat_ids[idx]}-adc_method1'
|
|
if img == 'b1400calc2':
|
|
name = f'{pat_ids[idx]}-dwi_method2'
|
|
if img == 'b1400calc3':
|
|
name = f'{pat_ids[idx]}-dwi_method1'
|
|
|
|
# sitk.WriteImage(read_img,f'{OUTPUT_DIR}{name}.nii.gz')
|
|
|
|
for idx in [23,161,543,734,367, 85, 380, 231, 406, 435, 660, 327, 305, 7, 479, 540, 558, 361, 167, 320, 666, 178, 700, 831, 707, 596, 715, 823, 561, 782]:
|
|
|
|
|
|
with open(f'./PSA.txt', 'w') as f:
|
|
for line in PSA_list:
|
|
f.write(str(line))
|
|
f.write('\n')
|
|
with open(f'./age.txt', 'w') as f:
|
|
for line in age_list:
|
|
f.write(str(line))
|
|
f.write('\n')
|
|
input(number_of_lesions_list)
|
|
with open(f'./number_of_lesions.txt', 'w') as f:
|
|
for line in number_of_lesions_list:
|
|
f.write(str(line))
|
|
f.write('\n')
|
|
with open(f'./current_max_PIRADS.txt', 'w') as f:
|
|
for line in current_max_PIRADS_list:
|
|
f.write(str(line))
|
|
f.write('\n')
|
|
with open(f'./pat_ids.txt', 'w') as f:
|
|
for line in pat_id_list:
|
|
f.write(str(line))
|
|
f.write('\n')
|
|
with open(f'./locations.txt', 'w') as f:
|
|
for line in locations_list:
|
|
f.write(str(line))
|
|
f.write('\n')
|
|
exit()
|
|
# Read and preprocess each of the paths for each series, and the segmentations.
|
|
for img_idx in tqdm(range(num_images)): #[:20]): #for less images
|
|
img_s = {s: sitk.ReadImage(image_paths[s][img_idx], sitk.sitkFloat32)
|
|
for s in args.series}
|
|
seg_s = sitk.ReadImage(seg_paths[img_idx], sitk.sitkFloat32)
|
|
img_n, seg_n = preprocess(img_s, seg_s,
|
|
shape=IMAGE_SHAPE, spacing=TARGET_SPACING)
|
|
for seq in img_n:
|
|
images[seq].append(img_n[seq])
|
|
segmentations.append(seg_n)
|
|
|
|
# Split train and validation
|
|
# We use KFold to split the data, but we don't actually do cross validation, we
|
|
# just use it to split the data 1:9.
|
|
# kfold = KFold(10, shuffle=True, random_state=123)
|
|
# train_idxs, valid_idxs = list(kfold.split(segmentations))[0]
|
|
# train_idxs = list(train_idxs)
|
|
# valid_idxs = list(valid_idxs)
|
|
|
|
yml_paths = read_yaml_to_dict(f'./../data/Nijmegen paths/train_val_test_idxs_{args.fold}.yml')
|
|
print('test, train paths',yml_paths)
|
|
train_idxs = yml_paths['train_set0']
|
|
valid_idxs = yml_paths['val_set0']
|
|
|
|
|
|
df = pandas.read_csv('./marksheet_with_gleason.csv')
|
|
gleason_idxs = [idx for idx, i in enumerate(df['lesion_GS'].values) if not pandas.isna(i)]
|
|
values = [f"{df['patient_id'].values[i]}_{df['study_id'].values[i]}" for i in gleason_idxs]
|
|
print(values)
|
|
|
|
# read picai paths
|
|
files = ['picai_seg_list','picai_adc_list','picai_hbv_list','picai_t2_list']
|
|
for file in files:
|
|
image_paths = []
|
|
with open(f"../../../../datasets/picai/{file}.txt") as f:
|
|
image_paths = [l.strip() for l in f.readlines()]
|
|
|
|
if file is 'picai_seg_list':
|
|
image_paths_gleason = [image_path for image_path in image_paths if os.path.basename(image_path[:-7]) in values]
|
|
else:
|
|
image_paths_gleason = [image_path for image_path in image_paths if os.path.basename(image_path[:-8]) in values]
|
|
print(len(image_paths_gleason))
|
|
|
|
with open(f'./{file}.txt', 'w') as f:
|
|
for line in image_paths_gleason:
|
|
f.write(line)
|
|
f.write('\n')
|
|
|