fast-mri/scripts/19.clinical_variables.py

135 lines
5.3 KiB
Python
Executable File

from multiprocessing.sharedctypes import Value
import xml.etree.ElementTree as ET
import os
import pathlib
from umcglib.utils import apply_parallel
import numpy as np
from sfransen.utils_quintin import *
def parse_marklist(marklistpath):
tree = ET.parse(marklistpath)
root = tree.getroot()
lesions = {}
patient_element = (list(root.iter("markpatient")) + [None])[0]
lesions['PSA'] = patient_element.find("PSA").text if (patient_element is not None and patient_element.find("PSA") is not None) else 0
number_of_lesions = []
current_max_PIRADS = 0
for mark in root.iter("mark"):
PIRADS = mark.find("PIRADS").text if mark.find("PIRADS") is not None else 0
if int(PIRADS) > 0:
number_of_lesions.append(PIRADS)
if int(PIRADS) > int(current_max_PIRADS):
current_max_PIRADS = PIRADS
# lesions_ = 1 if mark.find("PIRADS") is not None else 0
# number_of_lesions += number_of_lesions + lesions_
# if current_max_PIRADS == 0:
# if len(number_of_lesions) > 0:
# print(f'no PIRADS, wel lesie {number_of_lesions}')
lesions['PIRADS'] = current_max_PIRADS
lesions['number_of_lesions'] = len(number_of_lesions)
return lesions
def parse_age(path):
tree = ET.parse(path)
root = tree.getroot()
age = root[6].text
return age[1:-1]
def mean_above_zero(df):
df = df[df != 0]
print(df)
return np.mean(df)
def std_above_zero(df):
df = df[df != 0]
return np.std(df)
def median_above_zero(df):
df = df[df != 0]
return np.median(df)
def interq(df):
df = df[df != 0]
q3, q1 = np.percentile(df, [75 ,25])
iqr = q3 - q1
return iqr
# Get pat from X:/sfransen/data/Nijmegen paths/seg
# /data/pca-rad/datasets/radboud_lesions_2022/pat0617-2016.nii.gz
with open("./../data/Nijmegen paths/b50.txt", 'r') as f:
b50_paths = [l.strip() for l in f.readlines()]
marklistpaths = []
info_ages = []
pat_ids = []
for b50_path in b50_paths:
path_part = pathlib.Path(b50_path).parts
pat_id = path_part[5]
# print(pat_id)
marklistpath = os.path.join(path_part[0],path_part[1],path_part[2],path_part[3],path_part[4],path_part[5],path_part[6],'markdatasetlist.xml')
info_age = os.path.join(path_part[0],path_part[1],path_part[2],path_part[3],path_part[4],path_part[5],path_part[6],'t2_tse_sag','info.xml')
marklistpaths.append(marklistpath)
info_ages.append(info_age)
pat_ids.append(pat_id)
PSA_PIRADS = apply_parallel(
list(marklistpaths),
function = parse_marklist)
AGE = apply_parallel(
list(info_ages),
function = parse_age,
)
PSA_PIRADS = np.stack(PSA_PIRADS)
PSA = [(int(x['PSA']) if x['PSA'] is not None else 0) for x in PSA_PIRADS]
PIRADS = np.array([int(x['PIRADS']) for x in PSA_PIRADS])
# number_of_lesions = [x['number_of_lesions'] for x in PSA_PIRADS]
# number_of_lesions = np.array([int(item) for sublist in number_of_lesions for item in sublist] )
number_of_lesions = np.array([int(x['number_of_lesions']) for x in PSA_PIRADS])
AGE = np.array([(int(x) if x is not None else 0) for x in AGE])
patients = len(pat_ids)
patients_age_median = median_above_zero(AGE)
patients_age_iqr = interq(AGE)
patients_psa_median = median_above_zero(np.array(PSA))
patients_psa_iqr = interq(np.array(PSA))
csPCA_patients = np.sum(PIRADS>3)
PSA_csPCA_patients_median = median_above_zero(np.multiply(PIRADS>3,PSA))
PSA_csPCA_patients_iqr = interq(np.multiply(PIRADS>3,PSA))
AGE_csPCA_patients_median = median_above_zero(np.multiply(PIRADS>3,AGE))
AGE_csPCA_patients_iqr = interq(np.multiply(PIRADS>3,AGE))
healthy_patients = patients - csPCA_patients
AGE_healthy_patients_median = median_above_zero(np.multiply(PIRADS<4,AGE))
AGE_healthy_patients_iqr = interq(np.multiply(PIRADS<4,AGE))
PSA_healthy_patients_median = median_above_zero(np.multiply(PIRADS<4,PSA))
PSA_healthy_patients_iqr = interq(np.multiply(PIRADS<4,PSA))
PIRADS_0 = np.sum(PIRADS==0)
PIRADS_1 = np.sum(PIRADS==1)
PIRADS_2 = np.sum(PIRADS==2)
PIRADS_3 = np.sum(PIRADS==3)
PIRADS_4 = np.sum(PIRADS==4)
PIRADS_5 = np.sum(PIRADS==5)
LESIONS_0 = np.sum(number_of_lesions==0)
LESIONS_1 = np.sum(number_of_lesions==1)
LESIONS_2 = np.sum(number_of_lesions==2)
LESIONS_3 = np.sum(number_of_lesions==3)
LESIONS_4 = np.sum(number_of_lesions==4)
LESIONS_5 = np.sum(number_of_lesions==5)
LESIONS_6 = np.sum(number_of_lesions>5)
print(f"patients, total:{patients}, median AGE:{patients_age_median} iqr:{patients_age_iqr}, median PSA:{patients_psa_median} iqr:{patients_psa_iqr}")
print(f"healthy patients: total:{healthy_patients}, median AGE:{AGE_healthy_patients_median} iqr {AGE_healthy_patients_iqr}, median PSA:{PSA_healthy_patients_median} , iqr PSA:{PSA_healthy_patients_iqr}")
print(f"csPCA patients: total:{csPCA_patients}, median AGE:{AGE_csPCA_patients_median} iqr {AGE_csPCA_patients_iqr} , median PSA:{PSA_csPCA_patients_median} , iqr PSA:{PSA_csPCA_patients_iqr}")
print(f"Patient PIRADS count: Patients 0: {PIRADS_0}, Patients 1:{PIRADS_1}, Patient 2: {PIRADS_2}, Patients 3:{PIRADS_3} , Patients 4:{PIRADS_4} , Patients 5:{PIRADS_5} ")
print(f"Lesion count: Lesions 0: {LESIONS_0}, Lesions 1:{LESIONS_1}, Lesions 2: {LESIONS_2}, Lesions 3:{LESIONS_3} , Lesions 4:{LESIONS_4} , Lesions 5:{LESIONS_5}, Lesions >5:{LESIONS_6} ")