135 lines
5.3 KiB
Python
Executable File
135 lines
5.3 KiB
Python
Executable File
from multiprocessing.sharedctypes import Value
|
|
import xml.etree.ElementTree as ET
|
|
import os
|
|
import pathlib
|
|
from umcglib.utils import apply_parallel
|
|
import numpy as np
|
|
from sfransen.utils_quintin import *
|
|
|
|
|
|
def parse_marklist(marklistpath):
|
|
tree = ET.parse(marklistpath)
|
|
root = tree.getroot()
|
|
lesions = {}
|
|
patient_element = (list(root.iter("markpatient")) + [None])[0]
|
|
lesions['PSA'] = patient_element.find("PSA").text if (patient_element is not None and patient_element.find("PSA") is not None) else 0
|
|
number_of_lesions = []
|
|
current_max_PIRADS = 0
|
|
for mark in root.iter("mark"):
|
|
PIRADS = mark.find("PIRADS").text if mark.find("PIRADS") is not None else 0
|
|
if int(PIRADS) > 0:
|
|
number_of_lesions.append(PIRADS)
|
|
if int(PIRADS) > int(current_max_PIRADS):
|
|
current_max_PIRADS = PIRADS
|
|
|
|
# lesions_ = 1 if mark.find("PIRADS") is not None else 0
|
|
# number_of_lesions += number_of_lesions + lesions_
|
|
# if current_max_PIRADS == 0:
|
|
# if len(number_of_lesions) > 0:
|
|
# print(f'no PIRADS, wel lesie {number_of_lesions}')
|
|
|
|
lesions['PIRADS'] = current_max_PIRADS
|
|
lesions['number_of_lesions'] = len(number_of_lesions)
|
|
return lesions
|
|
|
|
def parse_age(path):
|
|
tree = ET.parse(path)
|
|
root = tree.getroot()
|
|
age = root[6].text
|
|
return age[1:-1]
|
|
|
|
def mean_above_zero(df):
|
|
df = df[df != 0]
|
|
print(df)
|
|
return np.mean(df)
|
|
|
|
def std_above_zero(df):
|
|
df = df[df != 0]
|
|
return np.std(df)
|
|
|
|
def median_above_zero(df):
|
|
df = df[df != 0]
|
|
return np.median(df)
|
|
|
|
def interq(df):
|
|
df = df[df != 0]
|
|
q3, q1 = np.percentile(df, [75 ,25])
|
|
iqr = q3 - q1
|
|
return iqr
|
|
|
|
# Get pat from X:/sfransen/data/Nijmegen paths/seg
|
|
# /data/pca-rad/datasets/radboud_lesions_2022/pat0617-2016.nii.gz
|
|
with open("./../data/Nijmegen paths/b50.txt", 'r') as f:
|
|
b50_paths = [l.strip() for l in f.readlines()]
|
|
|
|
marklistpaths = []
|
|
info_ages = []
|
|
pat_ids = []
|
|
for b50_path in b50_paths:
|
|
path_part = pathlib.Path(b50_path).parts
|
|
pat_id = path_part[5]
|
|
# print(pat_id)
|
|
marklistpath = os.path.join(path_part[0],path_part[1],path_part[2],path_part[3],path_part[4],path_part[5],path_part[6],'markdatasetlist.xml')
|
|
info_age = os.path.join(path_part[0],path_part[1],path_part[2],path_part[3],path_part[4],path_part[5],path_part[6],'t2_tse_sag','info.xml')
|
|
marklistpaths.append(marklistpath)
|
|
info_ages.append(info_age)
|
|
pat_ids.append(pat_id)
|
|
|
|
PSA_PIRADS = apply_parallel(
|
|
list(marklistpaths),
|
|
function = parse_marklist)
|
|
|
|
AGE = apply_parallel(
|
|
list(info_ages),
|
|
function = parse_age,
|
|
)
|
|
PSA_PIRADS = np.stack(PSA_PIRADS)
|
|
|
|
PSA = [(int(x['PSA']) if x['PSA'] is not None else 0) for x in PSA_PIRADS]
|
|
PIRADS = np.array([int(x['PIRADS']) for x in PSA_PIRADS])
|
|
# number_of_lesions = [x['number_of_lesions'] for x in PSA_PIRADS]
|
|
# number_of_lesions = np.array([int(item) for sublist in number_of_lesions for item in sublist] )
|
|
number_of_lesions = np.array([int(x['number_of_lesions']) for x in PSA_PIRADS])
|
|
|
|
AGE = np.array([(int(x) if x is not None else 0) for x in AGE])
|
|
patients = len(pat_ids)
|
|
patients_age_median = median_above_zero(AGE)
|
|
patients_age_iqr = interq(AGE)
|
|
patients_psa_median = median_above_zero(np.array(PSA))
|
|
patients_psa_iqr = interq(np.array(PSA))
|
|
|
|
csPCA_patients = np.sum(PIRADS>3)
|
|
PSA_csPCA_patients_median = median_above_zero(np.multiply(PIRADS>3,PSA))
|
|
PSA_csPCA_patients_iqr = interq(np.multiply(PIRADS>3,PSA))
|
|
AGE_csPCA_patients_median = median_above_zero(np.multiply(PIRADS>3,AGE))
|
|
AGE_csPCA_patients_iqr = interq(np.multiply(PIRADS>3,AGE))
|
|
|
|
healthy_patients = patients - csPCA_patients
|
|
AGE_healthy_patients_median = median_above_zero(np.multiply(PIRADS<4,AGE))
|
|
AGE_healthy_patients_iqr = interq(np.multiply(PIRADS<4,AGE))
|
|
PSA_healthy_patients_median = median_above_zero(np.multiply(PIRADS<4,PSA))
|
|
PSA_healthy_patients_iqr = interq(np.multiply(PIRADS<4,PSA))
|
|
|
|
PIRADS_0 = np.sum(PIRADS==0)
|
|
PIRADS_1 = np.sum(PIRADS==1)
|
|
PIRADS_2 = np.sum(PIRADS==2)
|
|
PIRADS_3 = np.sum(PIRADS==3)
|
|
PIRADS_4 = np.sum(PIRADS==4)
|
|
PIRADS_5 = np.sum(PIRADS==5)
|
|
|
|
LESIONS_0 = np.sum(number_of_lesions==0)
|
|
LESIONS_1 = np.sum(number_of_lesions==1)
|
|
LESIONS_2 = np.sum(number_of_lesions==2)
|
|
LESIONS_3 = np.sum(number_of_lesions==3)
|
|
LESIONS_4 = np.sum(number_of_lesions==4)
|
|
LESIONS_5 = np.sum(number_of_lesions==5)
|
|
LESIONS_6 = np.sum(number_of_lesions>5)
|
|
|
|
|
|
print(f"patients, total:{patients}, median AGE:{patients_age_median} iqr:{patients_age_iqr}, median PSA:{patients_psa_median} iqr:{patients_psa_iqr}")
|
|
print(f"healthy patients: total:{healthy_patients}, median AGE:{AGE_healthy_patients_median} iqr {AGE_healthy_patients_iqr}, median PSA:{PSA_healthy_patients_median} , iqr PSA:{PSA_healthy_patients_iqr}")
|
|
print(f"csPCA patients: total:{csPCA_patients}, median AGE:{AGE_csPCA_patients_median} iqr {AGE_csPCA_patients_iqr} , median PSA:{PSA_csPCA_patients_median} , iqr PSA:{PSA_csPCA_patients_iqr}")
|
|
print(f"Patient PIRADS count: Patients 0: {PIRADS_0}, Patients 1:{PIRADS_1}, Patient 2: {PIRADS_2}, Patients 3:{PIRADS_3} , Patients 4:{PIRADS_4} , Patients 5:{PIRADS_5} ")
|
|
print(f"Lesion count: Lesions 0: {LESIONS_0}, Lesions 1:{LESIONS_1}, Lesions 2: {LESIONS_2}, Lesions 3:{LESIONS_3} , Lesions 4:{LESIONS_4} , Lesions 5:{LESIONS_5}, Lesions >5:{LESIONS_6} ")
|
|
|