from multiprocessing.sharedctypes import Value import xml.etree.ElementTree as ET import os import pathlib from umcglib.utils import apply_parallel import numpy as np from sfransen.utils_quintin import * def parse_marklist(marklistpath): tree = ET.parse(marklistpath) root = tree.getroot() lesions = {} patient_element = (list(root.iter("markpatient")) + [None])[0] lesions['PSA'] = patient_element.find("PSA").text if (patient_element is not None and patient_element.find("PSA") is not None) else 0 number_of_lesions = [] current_max_PIRADS = 0 for mark in root.iter("mark"): PIRADS = mark.find("PIRADS").text if mark.find("PIRADS") is not None else 0 if int(PIRADS) > 0: number_of_lesions.append(PIRADS) if int(PIRADS) > int(current_max_PIRADS): current_max_PIRADS = PIRADS # lesions_ = 1 if mark.find("PIRADS") is not None else 0 # number_of_lesions += number_of_lesions + lesions_ # if current_max_PIRADS == 0: # if len(number_of_lesions) > 0: # print(f'no PIRADS, wel lesie {number_of_lesions}') lesions['PIRADS'] = current_max_PIRADS lesions['number_of_lesions'] = len(number_of_lesions) return lesions def parse_age(path): tree = ET.parse(path) root = tree.getroot() age = root[6].text return age[1:-1] def mean_above_zero(df): df = df[df != 0] print(df) return np.mean(df) def std_above_zero(df): df = df[df != 0] return np.std(df) def median_above_zero(df): df = df[df != 0] return np.median(df) def interq(df): df = df[df != 0] q3, q1 = np.percentile(df, [75 ,25]) iqr = q3 - q1 return iqr # Get pat from X:/sfransen/data/Nijmegen paths/seg # /data/pca-rad/datasets/radboud_lesions_2022/pat0617-2016.nii.gz with open("./../data/Nijmegen paths/b50.txt", 'r') as f: b50_paths = [l.strip() for l in f.readlines()] marklistpaths = [] info_ages = [] pat_ids = [] for b50_path in b50_paths: path_part = pathlib.Path(b50_path).parts pat_id = path_part[5] # print(pat_id) marklistpath = os.path.join(path_part[0],path_part[1],path_part[2],path_part[3],path_part[4],path_part[5],path_part[6],'markdatasetlist.xml') info_age = os.path.join(path_part[0],path_part[1],path_part[2],path_part[3],path_part[4],path_part[5],path_part[6],'t2_tse_sag','info.xml') marklistpaths.append(marklistpath) info_ages.append(info_age) pat_ids.append(pat_id) PSA_PIRADS = apply_parallel( list(marklistpaths), function = parse_marklist) AGE = apply_parallel( list(info_ages), function = parse_age, ) PSA_PIRADS = np.stack(PSA_PIRADS) PSA = [(int(x['PSA']) if x['PSA'] is not None else 0) for x in PSA_PIRADS] PIRADS = np.array([int(x['PIRADS']) for x in PSA_PIRADS]) # number_of_lesions = [x['number_of_lesions'] for x in PSA_PIRADS] # number_of_lesions = np.array([int(item) for sublist in number_of_lesions for item in sublist] ) number_of_lesions = np.array([int(x['number_of_lesions']) for x in PSA_PIRADS]) AGE = np.array([(int(x) if x is not None else 0) for x in AGE]) patients = len(pat_ids) patients_age_median = median_above_zero(AGE) patients_age_iqr = interq(AGE) patients_psa_median = median_above_zero(np.array(PSA)) patients_psa_iqr = interq(np.array(PSA)) csPCA_patients = np.sum(PIRADS>3) PSA_csPCA_patients_median = median_above_zero(np.multiply(PIRADS>3,PSA)) PSA_csPCA_patients_iqr = interq(np.multiply(PIRADS>3,PSA)) AGE_csPCA_patients_median = median_above_zero(np.multiply(PIRADS>3,AGE)) AGE_csPCA_patients_iqr = interq(np.multiply(PIRADS>3,AGE)) healthy_patients = patients - csPCA_patients AGE_healthy_patients_median = median_above_zero(np.multiply(PIRADS<4,AGE)) AGE_healthy_patients_iqr = interq(np.multiply(PIRADS<4,AGE)) PSA_healthy_patients_median = median_above_zero(np.multiply(PIRADS<4,PSA)) PSA_healthy_patients_iqr = interq(np.multiply(PIRADS<4,PSA)) PIRADS_0 = np.sum(PIRADS==0) PIRADS_1 = np.sum(PIRADS==1) PIRADS_2 = np.sum(PIRADS==2) PIRADS_3 = np.sum(PIRADS==3) PIRADS_4 = np.sum(PIRADS==4) PIRADS_5 = np.sum(PIRADS==5) LESIONS_0 = np.sum(number_of_lesions==0) LESIONS_1 = np.sum(number_of_lesions==1) LESIONS_2 = np.sum(number_of_lesions==2) LESIONS_3 = np.sum(number_of_lesions==3) LESIONS_4 = np.sum(number_of_lesions==4) LESIONS_5 = np.sum(number_of_lesions==5) LESIONS_6 = np.sum(number_of_lesions>5) print(f"patients, total:{patients}, median AGE:{patients_age_median} iqr:{patients_age_iqr}, median PSA:{patients_psa_median} iqr:{patients_psa_iqr}") print(f"healthy patients: total:{healthy_patients}, median AGE:{AGE_healthy_patients_median} iqr {AGE_healthy_patients_iqr}, median PSA:{PSA_healthy_patients_median} , iqr PSA:{PSA_healthy_patients_iqr}") print(f"csPCA patients: total:{csPCA_patients}, median AGE:{AGE_csPCA_patients_median} iqr {AGE_csPCA_patients_iqr} , median PSA:{PSA_csPCA_patients_median} , iqr PSA:{PSA_csPCA_patients_iqr}") print(f"Patient PIRADS count: Patients 0: {PIRADS_0}, Patients 1:{PIRADS_1}, Patient 2: {PIRADS_2}, Patients 3:{PIRADS_3} , Patients 4:{PIRADS_4} , Patients 5:{PIRADS_5} ") print(f"Lesion count: Lesions 0: {LESIONS_0}, Lesions 1:{LESIONS_1}, Lesions 2: {LESIONS_2}, Lesions 3:{LESIONS_3} , Lesions 4:{LESIONS_4} , Lesions 5:{LESIONS_5}, Lesions >5:{LESIONS_6} ")