fast-mri/uncertainty/scripts/select_data_umcg_new.py

92 lines
3.6 KiB
Python
Executable File

import glob
import csv
from sqlite3 import Error
import sqlite3
def create_connection(db_file):
""" create a database connection to the SQLite database
specified by the db_file
:param db_file: database file
:return: Connection object or None
"""
conn = None
try:
conn = sqlite3.connect(db_file)
except Error as e:
print(e)
return conn
## get pat_id, birthdate, PSA, PIRADS,
conn = create_connection('./db/master.db')
cur = conn.cursor()
query_pirads = f"""SELECT visits.patientID, visits.StudyDate, rois.roiID FROM biopsies
INNER JOIN rois ON biopsies.target_visitID = rois.StudyInstanceUID AND biopsies.target_roi = rois.roiID
INNER JOIN visits ON biopsies.target_visitID = visits.StudyInstanceUID
WHERE pirads != "NA" AND (pirads > 3);"""
result_pirads = cur.execute(query_pirads).fetchall() #list of tuples
dates = [f"{date[1][:4]}-{date[1][4:6]}-{date[1][6:8]}" for date in result_pirads]
pirads_paths = [path.join(x[0],y,x[2]) for x,y in zip(result_pirads,dates) ]
query_gleason = f"""SELECT visits.patientID, visits.StudyDate, rois.roiID FROM biopsies
INNER JOIN rois ON biopsies.target_visitID = rois.StudyInstanceUID AND biopsies.target_roi = rois.roiID
INNER JOIN visits ON biopsies.target_visitID = visits.StudyInstanceUID
WHERE gleason_1 != "NA" AND (gleason_1 > 3 OR gleason_2 > 3);"""
result_gleason = cur.execute(query_gleason).fetchall() #list of tuples
dates = [f"{date[1][:4]}-{date[1][4:6]}-{date[1][6:8]}" for date in result_gleason]
gleason_paths = [path.join(x[0],y,x[2]) for x,y in zip(result_gleason,dates) ]
print("Looking for visit folders containing niftis in ./nifti")
niftis = glob("./nifti/**/*.nii.gz", recursive=True)
folders = list(set([path.dirname(f) for f in niftis]))
print("Found", len(folders), "folders")
# df_list = []
# ROOT_DIR = '../../../datasets/anonymized_mri/only_nii_directory/*/*'
# x = glob.glob(ROOT_DIR)
# for patient_dir in sorted(x):
# T2 = []
# adc = []
# highb = []
# patient_id = []
# age_year = []
# patient_id = patient_dir.split('/')[-2]
# age_year = patient_dir.split('/')[-1]
# for path in sorted(glob.glob(f'../../../datasets/anonymized_mri/only_nii_directory/{patient_dir.split("/")[-2]}/{patient_dir.split("/")[-1]}/*/*')):
# if ('T2' in path or 't2' in path) and ('tra' in path or 'TRA' in path) and not ('seg' in path):
# T2 = path
# for path in sorted(glob.glob(f'../../../datasets/anonymized_mri/only_nii_directory/{patient_dir.split("/")[-2]}/{patient_dir.split("/")[-1]}/*')):
# if 'manual_adc' in path:
# manual_adc = path
# else:
# if 'ADC' in path and 'nii.gz' in path:
# adc = path
# print(path)
# if 'manual_b-1400' in path:
# highb1400 = path
# else:
# if 'b-2000' in path and 'nii.gz' in path:
# highb = path
# print(path)
# if manual_adc:
# adc = manual_adc
# if highb1400:
# highb = highb1400
# df_list.append({'patient_id':patient_id,'age_year':age_year,'t2':T2,'adc':adc,'high_b':highb})
# if not T2 or not adc or not highb:
# input(f'{patient_id} {age_year} {T2} {adc} {highb}')
# with open('csv_file.csv', 'w') as csvfile:
# writer = csv.DictWriter(csvfile, fieldnames=['patient_id','age_year','t2','adc','high_b'],delimiter=';')
# writer.writeheader()
# for data in df_list:
# writer.writerow(data)