254 lines
9.0 KiB
Python
Executable File
254 lines
9.0 KiB
Python
Executable File
import os
|
|
from matplotlib.pyplot import table
|
|
import glob
|
|
import SimpleITK as sitk
|
|
import pandas as pd
|
|
from sqlite3 import Error
|
|
import sqlite3
|
|
|
|
################################ README ######################################
|
|
# TO DO -
|
|
# key headers weghalen
|
|
# op basis van exclusie
|
|
# lokaal
|
|
# ssd -> lokaal draaien
|
|
# apply parrallel (listen)
|
|
|
|
def print_p(text, end="\n"):
|
|
""" Print function for on Peregrine. It needs a flush before printing. """
|
|
print(text, flush=True, end=end)
|
|
|
|
def create_connection(db_file):
|
|
""" create a database connection to the SQLite database
|
|
specified by the db_file
|
|
:param db_file: database file
|
|
:return: Connection object or None
|
|
"""
|
|
conn = None
|
|
try:
|
|
conn = sqlite3.connect(db_file)
|
|
except Error as e:
|
|
print_p(e)
|
|
|
|
return conn
|
|
|
|
KEY_HEADERS = [
|
|
"0010|0020",
|
|
"0018|0089",
|
|
"0018|0093",
|
|
"0008|103e",
|
|
"0028|0010",
|
|
"0028|0011",
|
|
"0018|9087",
|
|
"0018|0024" # Sequence Name
|
|
]
|
|
|
|
|
|
INCLUDE_TAGS = [ # Attributes
|
|
"0008|0005", # Specific Character Set
|
|
"0008|0008", # Image Type
|
|
"0008|0012", # Instance Creation Date
|
|
"0008|0013", # Instance Creation Time
|
|
"0008|0016", # SOP Class UID
|
|
"0008|0018", # SOP Instance UID
|
|
"0008|0020", # Study Date
|
|
"0008|0021", # Series Date
|
|
"0008|0022", # Acquisition Date
|
|
"0008|0023", # Content Date
|
|
"0008|0030", # Study Time
|
|
"0008|0031", # Series Time
|
|
"0008|0032", # Acquisition Time
|
|
"0008|0033", # Content Time
|
|
"0008|0050", # Accession Number
|
|
"0008|0060", # Modality
|
|
"0008|0070", # Manufacturer
|
|
"0008|1010", # Station Name
|
|
"0008|1030", # Study Description
|
|
"0008|103e", # Series Description
|
|
"0008|1040", # Institutional Department Name
|
|
"0008|1090", # Manufacturer's Model Name
|
|
"0010|0020", # Patient ID
|
|
"0010|0030", # Patient's Birth Date
|
|
"0010|0040", # Patient's Sex
|
|
"0010|1010", # Patient's Age
|
|
"0010|1020", # Patient's Size
|
|
"0010|1030", # Patient's Weight
|
|
"0010|21b0", # Additional Patient History
|
|
"0012|0062", # Patient Identity Removed
|
|
"0012|0063", # De-identification Method
|
|
"0018|0015", # Body Part Examined
|
|
"0018|0020", # Scanning Sequence
|
|
"0018|0021", # Sequence Variant
|
|
"0018|0022", # Scan Options
|
|
"0018|0023", # MR Acquisition Type
|
|
"0018|0050", # Slice Thickness
|
|
"0018|0080", # Repetition Time
|
|
"0018|0081", # Echo Time
|
|
"0018|0083", # Number of Averages
|
|
"0018|0084", # Imaging Frequency
|
|
"0018|0085", # Imaged Nucleus
|
|
"0018|0087", # Magnetic Field Strength
|
|
"0018|0088", # Spacing Between Slices
|
|
"0018|0089", # Number of Phase Encoding Steps IMPORTANT
|
|
"0018|0091", # Echo Train Length
|
|
"0018|0093", # Percent Sampling IMPORTANT
|
|
"0018|0094", # Percent Phase Field of View
|
|
"0018|1000", # Device Serial Number
|
|
"0018|1030", # Protocol Name IMPORTANT -> sequence type
|
|
"0018|1310", # Acquisition Matrix IMPORTANT
|
|
"0018|1312", # In-plane Phase Encoding Direction
|
|
"0018|1314", # Flip Angle
|
|
"0018|1315", # Variable Flip Angle Flag
|
|
"0018|5100", # Patient Position
|
|
"0018|9087", # Diffusion b-value IMPORTANT
|
|
"0020|000d", # Study Instance UID
|
|
"0020|000e", # Series Instance UID
|
|
"0020|0010", # Study ID
|
|
"0020|0032", # Image Position (Patient)
|
|
"0020|0037", # Image Orientation (Patient)
|
|
"0020|0052", # Frame of Reference UID
|
|
"0020|1041", # Slice Location
|
|
"0028|0002", # Samples per Pixel
|
|
"0028|0010", # Rows IMPORTANT
|
|
"0028|0011", # Columns IMPORTANT
|
|
"0028|0030", # Pixel Spacing
|
|
"0028|0100", # Bits Allocated
|
|
"0028|0101", # Bits Stored
|
|
"0028|0106", # Smallest Image Pixel Value
|
|
"0028|0107", # Largest Image Pixel Value
|
|
"0028|1050", # Window Center
|
|
"0028|1051", # Window Width
|
|
"0040|0244", # Performed Procedure Step Start Date
|
|
"0040|0254" # Performed Procedure Step Description
|
|
]
|
|
|
|
|
|
################################################################################
|
|
|
|
|
|
def get_dict_from_dicom(reader, verbose=False):
|
|
headers = {}
|
|
for header_name in INCLUDE_TAGS:
|
|
headers[header_name] = None
|
|
|
|
for k in reader.GetMetaDataKeys():
|
|
if k in INCLUDE_TAGS:
|
|
v = reader.GetMetaData(k)
|
|
headers[k] = f"{v}"
|
|
if verbose:
|
|
print_p(f"({k}) = \"{v}\"")
|
|
headers["path"] = ""
|
|
return headers
|
|
|
|
|
|
def has_different_key_headers(current_header_dict: dict, prev_header_dict):
|
|
""" This function returns False if one of the key headers is different in
|
|
both dictionaries supplied as arguments.
|
|
|
|
Parameters:
|
|
`current_header_dict (dict)`: dict from dicom (Headers from DICOM)
|
|
`prev_header_dict (dict)`: dict from dicom (Headers from DICOM)
|
|
returns (bool): True if important headers are different, else False
|
|
"""
|
|
for header in KEY_HEADERS:
|
|
try:
|
|
if current_header_dict[header] != prev_header_dict.get(header, None):
|
|
return True
|
|
except:
|
|
continue
|
|
return False
|
|
|
|
|
|
def is_patient_in_database(conn, tablename, patient):
|
|
# Get all results from patient from database
|
|
cur = conn.cursor()
|
|
query = f"SELECT [0010|0020] FROM {tablename} WHERE [0010|0020] like '%{patient}%';"
|
|
result = cur.execute(query).fetchall() #list of tuples
|
|
if len(result) == 0:
|
|
return False
|
|
return True
|
|
|
|
|
|
def fill_dicom_table_RUMC_UMCG(
|
|
tablename: str,
|
|
database: str,
|
|
patients_dir_RUMC: str,
|
|
devmode = False):
|
|
""" Fills the given table with headers/tags from DICOM files from UMCG and
|
|
RUMC. The tags are cross referenced with an include list of tags.
|
|
|
|
Parameters:
|
|
`tablename (string)`: table in sqlite that will be inserted into
|
|
`database (string)`: relative project path to .db (database) file for sqlite.
|
|
`patients_dir_RUMC (string)`: path where patient directories are stored (RUMC)
|
|
`patients_dir_UMCG (string)`: path where patient directories are stored (UMCG)
|
|
"""
|
|
|
|
# Connect with database
|
|
db_path = f"{os.getcwd()}{database}"
|
|
conn = create_connection(db_path)
|
|
print_p(f"connection made: {db_path}")
|
|
|
|
# patients = os.listdir(patients_dir_UMCG) + os.listdir(patients_dir_RUMC)
|
|
patients = os.listdir(patients_dir_RUMC)
|
|
prev_headers = {}
|
|
|
|
with conn:
|
|
# Drop all rows from table if it exists.
|
|
if False:
|
|
conn.execute(f"DELETE FROM {tablename};")
|
|
print_p("done deleting all records from database")
|
|
|
|
# loop over all patients. (RUMC and UMCG)
|
|
for p_idx, patient in enumerate(patients):
|
|
|
|
print_p(f"\nPatient {p_idx}: {patient}")
|
|
|
|
if is_patient_in_database(conn, tablename, patient):
|
|
print_p(f"PATIENT IS ALREADY IN DATABASE {tablename}")
|
|
continue
|
|
|
|
print_p(f"patient: {patient} is not in database")
|
|
# Find all DICOM files
|
|
glob_pattern = f"data/raw/*/{patient}/**/*.dcm"
|
|
dicoms_paths = glob.glob(glob_pattern, recursive=True)
|
|
rows = []
|
|
|
|
# Loop over DICOM files
|
|
for f_idx, dcm_path in enumerate(dicoms_paths):
|
|
if f_idx > 10 and devmode:
|
|
continue
|
|
print_p(f"f{f_idx}", end=' ')
|
|
|
|
try:
|
|
reader = sitk.ImageFileReader()
|
|
reader.SetFileName(dcm_path)
|
|
reader.LoadPrivateTagsOn()
|
|
reader.ReadImageInformation()
|
|
|
|
except:
|
|
print_p(f"Read Image Information EXCEPTION... Skipping: {dcm_path}")
|
|
continue
|
|
|
|
curr_headers = get_dict_from_dicom(reader, verbose=False)
|
|
curr_headers['path'] = dcm_path
|
|
if not has_different_key_headers(curr_headers, prev_headers):
|
|
continue
|
|
prev_headers = curr_headers
|
|
rows.append(curr_headers)
|
|
|
|
df = pd.DataFrame.from_dict(rows, orient='columns')
|
|
print_p(f"\nwriting headers to sqlite database. {tablename} - num rows: {len(rows)}")
|
|
df.to_sql(name=tablename, con=conn, if_exists='append')
|
|
|
|
print_p(f"\n--- DONE writing data to {tablename}---")
|
|
|
|
|
|
################################################################################
|
|
print_p("start script")
|
|
fill_dicom_table_RUMC_UMCG(
|
|
tablename = "dicom_headers_v2",
|
|
database = r"dicoms_rumc.db",
|
|
patients_dir_RUMC = r"data/raw/RUMC",
|
|
patients_dir_UMCG = r"data/raw/UMCG",
|
|
devmode=False) |