113 lines
4.5 KiB
Python
113 lines
4.5 KiB
Python
from pathlib import Path
|
|
import pandas as pd
|
|
import shlex
|
|
import subprocess
|
|
from zipfile import ZipFile
|
|
import json
|
|
|
|
from django.conf import settings
|
|
from uuid import uuid4
|
|
|
|
def available_states():
|
|
"""This method will return a sorted list of available states based on the 'geography/demographics.csv' in the `settings.SYNTHEA_RESOURCE_DIR` folder.
|
|
|
|
Returns:
|
|
List: Sorted list on state name with dicts holding the id and name of the state.
|
|
"""
|
|
|
|
states = []
|
|
# Read the demographics.csv file from the Synthea resources and get all the unique state names
|
|
# Important, the state name for synthea is case sensitive (field id)
|
|
df = pd.read_csv(settings.SYNTHEA_RESOURCE_DIR / 'geography/demographics.csv', index_col=False)
|
|
for state in df.STNAME.unique():
|
|
states.append({'id' : state , 'name' : state})
|
|
|
|
# Sort on name
|
|
states = sorted(states, key=lambda s: s['name'].lower())
|
|
return states
|
|
|
|
def available_modules():
|
|
"""This method will load all the available modules that are in the folder `settings.SYNTHEA_MODULE_DIR`. Only files ending on .json will be loaded.
|
|
|
|
Returns:
|
|
List: Sorted list on module name with dicts holding the id and name of the module.
|
|
"""
|
|
|
|
# Assumption here: Only .json files in the main folder are modules. The rest are submodules...
|
|
modules = []
|
|
for module in settings.SYNTHEA_MODULE_DIR.iterdir():
|
|
if module.is_file() and module.suffix == '.json':
|
|
data = json.loads(module.read_text())
|
|
modules.append({'id' : module.name.replace('.json',''), 'name' : data['name']})
|
|
|
|
modules = sorted(modules, key=lambda m: m['name'].lower())
|
|
return modules
|
|
|
|
def run_synthea(state, population = 50, gender = None, age = None, module = None):
|
|
"""This module will run the Synthea application on the background. This method expects Synthea to be installed on the `settings.SYNTHEA_BASE_DIR` location.
|
|
|
|
The output will be written to a unique folder in `settings.SYNTHEA_OUTPUT_DIR` that will be zipped and returned.
|
|
|
|
It will return the log and the zipfile location for futher processing. The zip file will not be deleted afterwards. So cleanup needs to be done manually.
|
|
|
|
Args:
|
|
state (str, required): The state where to generate synthetic patient data for.
|
|
population (int, optional): The amount of patients to generate. Defaults to 50.
|
|
gender (str, optional): Either generate only male(m), only female(f), or None for both. Defaults to None.
|
|
age (str, optional): This is the age range of the generated patients. Input is always like [min_age]-[max_age]. Defaults to None.
|
|
module (str, optional): The module to use for generating patient data When None, all modules are used. Defaults to None.
|
|
|
|
Raises:
|
|
Exception: When the Synthea run fails it will return an Exception witht he Java error in it.
|
|
|
|
Returns:
|
|
(str,Path): The returning zipfile has the enabled options in the file name.
|
|
"""
|
|
# Add a unique dir to the output, so multiple Synthea processes can run parallel
|
|
temp_id = uuid4().hex
|
|
output_folder = settings.SYNTHEA_OUTPUT_DIR / temp_id
|
|
|
|
synthea_cmd = [settings.SYNTHEA_BASE_DIR / 'run_synthea','--exporter.baseDirectory',output_folder]
|
|
zip_file = 'Synthea_'
|
|
|
|
if population:
|
|
synthea_cmd.append('-p')
|
|
synthea_cmd.append(str(population))
|
|
zip_file += f'population_{population}_'
|
|
|
|
if gender:
|
|
synthea_cmd.append('-g')
|
|
synthea_cmd.append(gender.upper())
|
|
zip_file += f'gender_{gender}_'
|
|
|
|
if age:
|
|
synthea_cmd.append('-a')
|
|
synthea_cmd.append(age)
|
|
zip_file += f'age_{age}_'
|
|
|
|
if module:
|
|
synthea_cmd.append('-m')
|
|
synthea_cmd.append(module)
|
|
zip_file += f'module_{module}_'
|
|
|
|
if state:
|
|
synthea_cmd.append(state)
|
|
zip_file += f'state_{state}'
|
|
|
|
process_ok = False
|
|
log = ''
|
|
with subprocess.Popen(synthea_cmd,cwd=settings.SYNTHEA_BASE_DIR, stdout=subprocess.PIPE,stderr=subprocess.PIPE) as process:
|
|
for line in process.stdout:
|
|
line = line.decode('utf8')
|
|
log += line
|
|
if not process_ok:
|
|
process_ok = line.find('BUILD SUCCESSFUL') >= 0
|
|
|
|
if process_ok:
|
|
with ZipFile(f'{output_folder}/{zip_file}_{temp_id}.zip', 'w') as export:
|
|
for file in (output_folder / settings.SYNTHEA_EXPORT_TYPE).iterdir():
|
|
export.write(file,file.name)
|
|
|
|
return (log,Path(f'{output_folder}/{zip_file}_{temp_id}.zip'))
|
|
else:
|
|
return (log,None) |