push before migration

This commit is contained in:
Stefan 2023-03-28 14:48:28 +02:00
parent 49b18fe7f0
commit 9468dadfa3
195 changed files with 645 additions and 18 deletions

30
reader_study/PSA.txt Normal file
View File

@ -0,0 +1,30 @@
4
4
18
4
20
6
12
1
0
20
11
9
None
12
18
7
0
11
14
9
7
8
8
17
13
1
8
10
1
0

30
reader_study/age.txt Normal file
View File

@ -0,0 +1,30 @@
62
56
67
49
66
67
66
77
74
77
73
74
69
51
71
69
71
72
78
70
77
56
56
64
77
54
63
69
49
69

View File

@ -0,0 +1,30 @@
2
2
1
2
2
4
0
5
3
2
4
5
2
5
2
4
3
5
4
3
2
2
0
4
5
5
2
2
3
5

View File

@ -0,0 +1,30 @@
['PZ']
['TZ', 'PZ', 'TZ', 'PZ', '', 'PZ', 'PZ']
['TZ']
['']
['TZ']
['', '', 'PZ', 'CZ', 'CZ']
[]
['PZ']
['TZ', 'PZ', 'PZ', 'TZ', 'PZ', 'AS', 'AS']
['PZ']
['PZ']
['TZ', 'PZ', 'PZ']
['PZ']
['PZ']
['PZ', 'TZ']
['PZ']
['PZ']
['TZ', 'TZ', 'PZ', 'AS']
['CZ', 'PZ']
['TZ']
['TZ']
['PZ', 'PZ']
[]
['PZ']
['AS']
['PZ']
['PZ']
['PZ']
['PZ', 'PZ', 'PZ']
['CZ', 'PZ']

174
reader_study/make_data_paths.py Executable file
View File

@ -0,0 +1,174 @@
import pandas
import numpy as np
import os
from sfransen.DWI_exp.helpers import *
from sfransen.utils_quintin import *
from os import path
import SimpleITK as sitk
import xml.etree.ElementTree as ET
import pathlib
def parse_marklist(marklistpath):
tree = ET.parse(marklistpath)
root = tree.getroot()
patient_element = (list(root.iter("markpatient")) + [None])[0]
PSA = patient_element.find("PSA").text if (patient_element is not None and patient_element.find("PSA") is not None) else 0
number_of_lesions = []
locations =[]
current_max_PIRADS = 0
for mark in root.iter("mark"):
PIRADS = mark.find("PIRADS").text if mark.find("PIRADS") is not None else 0
if int(PIRADS) > 0:
number_of_lesions.append(PIRADS)
if int(PIRADS) > int(current_max_PIRADS):
current_max_PIRADS = PIRADS
location = mark.find("Zones/Zone/Type")
if location is not None:
location = location.text
else:
location = ''
locations.append(location)
# lesions_ = 1 if mark.find("PIRADS") is not None else 0
# number_of_lesions += number_of_lesions + lesions_
# if current_max_PIRADS == 0:
# if len(number_of_lesions) > 0:
# print(f'no PIRADS, wel lesie {number_of_lesions}')
return PSA, current_max_PIRADS, number_of_lesions, locations
def parse_age(path):
tree = ET.parse(path)
root = tree.getroot()
age = root[6].text
return age[1:-1]
DATA_DIR = "./../data/Nijmegen paths/"
OUTPUT_DIR = "./random_images_2/"
with open(path.join(DATA_DIR, "t2.txt"), 'r') as f:
pat_ids = [l.split('/')[5] for l in f.readlines()]
with open(path.join(DATA_DIR, "t2.txt"), 'r') as f:
years = [l.split('/')[6] for l in f.readlines()]
PSA_list = []
current_max_PIRADS_list = []
number_of_lesions_list = []
age_list = []
pat_id_list = []
locations_list = []
for img in ['t2','adccalc2','adccalc3','b1400calc2','b1400calc3']:
with open(path.join(DATA_DIR, f"{img}.txt"), 'r') as f:
image_paths = [l.strip() for l in f.readlines()]
for idx in [23,161,543,734,367, 85, 380, 231, 406, 435, 660, 327, 305, 7, 479, 540, 558, 361, 167, 320, 666, 178, 700, 831, 707, 596, 715, 823, 561, 782]:
print(idx)
read_img = sitk.ReadImage(image_paths[idx],sitk.sitkFloat32)
marklistpath = f'../../datasets/radboud_new/{pat_ids[idx]}/{years[idx]}/markdatasetlist.xml'
info_age = f'../../datasets/radboud_new/{pat_ids[idx]}/{years[idx]}/t2_tse_tra/info.xml'
PSA, current_max_PIRADS, number_of_lesions, locations = parse_marklist(marklistpath)
age = parse_age(info_age)
pat_id_list.append(str(pat_ids[idx]))
if img == 't2':
PSA_list.append(PSA)
current_max_PIRADS_list.append(current_max_PIRADS)
number_of_lesions_list.append(list(number_of_lesions))
age_list.append(age)
locations_list.append(locations)
# if len(number_of_lesions) > 2:
# print(f'number_of_lesion {number_of_lesions}')
# input(f'current_max_PIRADS {current_max_PIRADS}')
# input(f'PSA {PSA}')
# input(f'age {age}')
# method 1: all bvalues, method 2: omitting b800
if img == 't2':
name = f'{pat_ids[idx]}-t2'
if img == 'adccalc2':
name = f'{pat_ids[idx]}-adc_method2'
if img == 'adccalc3':
name = f'{pat_ids[idx]}-adc_method1'
if img == 'b1400calc2':
name = f'{pat_ids[idx]}-dwi_method2'
if img == 'b1400calc3':
name = f'{pat_ids[idx]}-dwi_method1'
# sitk.WriteImage(read_img,f'{OUTPUT_DIR}{name}.nii.gz')
for idx in [23,161,543,734,367, 85, 380, 231, 406, 435, 660, 327, 305, 7, 479, 540, 558, 361, 167, 320, 666, 178, 700, 831, 707, 596, 715, 823, 561, 782]:
with open(f'./PSA.txt', 'w') as f:
for line in PSA_list:
f.write(str(line))
f.write('\n')
with open(f'./age.txt', 'w') as f:
for line in age_list:
f.write(str(line))
f.write('\n')
input(number_of_lesions_list)
with open(f'./number_of_lesions.txt', 'w') as f:
for line in number_of_lesions_list:
f.write(str(line))
f.write('\n')
with open(f'./current_max_PIRADS.txt', 'w') as f:
for line in current_max_PIRADS_list:
f.write(str(line))
f.write('\n')
with open(f'./pat_ids.txt', 'w') as f:
for line in pat_id_list:
f.write(str(line))
f.write('\n')
with open(f'./locations.txt', 'w') as f:
for line in locations_list:
f.write(str(line))
f.write('\n')
exit()
# Read and preprocess each of the paths for each series, and the segmentations.
for img_idx in tqdm(range(num_images)): #[:20]): #for less images
img_s = {s: sitk.ReadImage(image_paths[s][img_idx], sitk.sitkFloat32)
for s in args.series}
seg_s = sitk.ReadImage(seg_paths[img_idx], sitk.sitkFloat32)
img_n, seg_n = preprocess(img_s, seg_s,
shape=IMAGE_SHAPE, spacing=TARGET_SPACING)
for seq in img_n:
images[seq].append(img_n[seq])
segmentations.append(seg_n)
# Split train and validation
# We use KFold to split the data, but we don't actually do cross validation, we
# just use it to split the data 1:9.
# kfold = KFold(10, shuffle=True, random_state=123)
# train_idxs, valid_idxs = list(kfold.split(segmentations))[0]
# train_idxs = list(train_idxs)
# valid_idxs = list(valid_idxs)
yml_paths = read_yaml_to_dict(f'./../data/Nijmegen paths/train_val_test_idxs_{args.fold}.yml')
print('test, train paths',yml_paths)
train_idxs = yml_paths['train_set0']
valid_idxs = yml_paths['val_set0']
df = pandas.read_csv('./marksheet_with_gleason.csv')
gleason_idxs = [idx for idx, i in enumerate(df['lesion_GS'].values) if not pandas.isna(i)]
values = [f"{df['patient_id'].values[i]}_{df['study_id'].values[i]}" for i in gleason_idxs]
print(values)
# read picai paths
files = ['picai_seg_list','picai_adc_list','picai_hbv_list','picai_t2_list']
for file in files:
image_paths = []
with open(f"../../../../datasets/picai/{file}.txt") as f:
image_paths = [l.strip() for l in f.readlines()]
if file is 'picai_seg_list':
image_paths_gleason = [image_path for image_path in image_paths if os.path.basename(image_path[:-7]) in values]
else:
image_paths_gleason = [image_path for image_path in image_paths if os.path.basename(image_path[:-8]) in values]
print(len(image_paths_gleason))
with open(f'./{file}.txt', 'w') as f:
for line in image_paths_gleason:
f.write(line)
f.write('\n')

View File

@ -0,0 +1,30 @@
['2']
['2', '2', '2', '2', '2', '2', '2']
['1']
['2']
['2']
['4', '2', '2', '2']
[]
['5']
['2', '3', '2', '2', '2', '3', '2']
['2']
['4']
['4', '5', '2']
['2']
['5']
['2', '2']
['4']
['3']
['2', '2', '4', '5']
['4', '4']
['3']
['2']
['2', '2']
[]
['4']
['5']
['5']
['2']
['2']
['2', '3', '2']
['5', '4']

31
reader_study/pat_ids.txt Normal file
View File

@ -0,0 +1,31 @@
pat0744
pat0587
pat0607
pat0092
pat0600
pat1022
pat0355
pat0836
pat1011
pat0337
pat0693
pat0216
pat0687
pat0124
pat0779
pat0833
pat0197
pat0121
pat0597
pat0008
pat0182
pat0027
pat0263
pat0599
pat0643
pat0812
pat0883
pat0165
pat0117
pat0239

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
reader_study/random_images_2.zip Executable file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More