fast-mri/uncertainty/scripts/select_data_umcg_new.py

import glob
import csv
from sqlite3 import Error
import sqlite3


def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by the db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Error as e:
        print(e)

    return conn

## get pat_id, birthdate, PSA, PIRADS,
conn = create_connection('./db/master.db')
cur = conn.cursor()
query_pirads = f"""SELECT visits.patientID, visits.StudyDate, rois.roiID FROM biopsies
         INNER JOIN rois ON biopsies.target_visitID = rois.StudyInstanceUID AND biopsies.target_roi = rois.roiID
         INNER JOIN visits ON biopsies.target_visitID = visits.StudyInstanceUID
         WHERE pirads != "NA" AND (pirads > 3);"""
result_pirads = cur.execute(query_pirads).fetchall() #list of tuples
dates = [f"{date[1][:4]}-{date[1][4:6]}-{date[1][6:8]}" for date in result_pirads]
pirads_paths = [path.join(x[0],y,x[2]) for x,y in zip(result_pirads,dates) ]

query_gleason = f"""SELECT visits.patientID, visits.StudyDate, rois.roiID FROM biopsies
         INNER JOIN rois ON biopsies.target_visitID = rois.StudyInstanceUID AND biopsies.target_roi = rois.roiID
         INNER JOIN visits ON biopsies.target_visitID = visits.StudyInstanceUID
         WHERE gleason_1 != "NA" AND (gleason_1 > 3 OR gleason_2 > 3);"""
result_gleason = cur.execute(query_gleason).fetchall() #list of tuples
dates = [f"{date[1][:4]}-{date[1][4:6]}-{date[1][6:8]}" for date in result_gleason]
gleason_paths = [path.join(x[0],y,x[2]) for x,y in zip(result_gleason,dates) ]

print("Looking for visit folders containing niftis in ./nifti")
niftis = glob("./nifti/**/*.nii.gz", recursive=True)
folders = list(set([path.dirname(f) for f in niftis]))

print("Found", len(folders), "folders")


# df_list = []
# ROOT_DIR = '../../../datasets/anonymized_mri/only_nii_directory/*/*'
# x = glob.glob(ROOT_DIR)

# for patient_dir in sorted(x):
#     T2 = []
#     adc = []
#     highb = []
#     patient_id = []
#     age_year = []

#     patient_id = patient_dir.split('/')[-2]
#     age_year = patient_dir.split('/')[-1]
#     for path in sorted(glob.glob(f'../../../datasets/anonymized_mri/only_nii_directory/{patient_dir.split("/")[-2]}/{patient_dir.split("/")[-1]}/*/*')):
#         if ('T2' in path or 't2' in path) and ('tra' in path or 'TRA' in path) and not ('seg' in path):
#             T2 = path
#     for path in sorted(glob.glob(f'../../../datasets/anonymized_mri/only_nii_directory/{patient_dir.split("/")[-2]}/{patient_dir.split("/")[-1]}/*')):
#         if 'manual_adc' in path:
#             manual_adc = path
#         else:
#             if 'ADC' in path and 'nii.gz' in path:
#                 adc = path
#                 print(path)

#         if 'manual_b-1400' in path:
#             highb1400 = path
#         else:
#             if 'b-2000' in path and 'nii.gz' in path:
#                 highb = path
#                 print(path)

#     if manual_adc:
#         adc = manual_adc
#     if highb1400:
#         highb = highb1400

#     df_list.append({'patient_id':patient_id,'age_year':age_year,'t2':T2,'adc':adc,'high_b':highb})
#     if not T2 or not adc or not highb:
#         input(f'{patient_id} {age_year} {T2} {adc} {highb}')

# with open('csv_file.csv', 'w') as csvfile:
#     writer = csv.DictWriter(csvfile, fieldnames=['patient_id','age_year','t2','adc','high_b'],delimiter=';')
#     writer.writeheader()
#     for data in df_list:
#         writer.writerow(data)