synthea_webservice/webservice/apps/synthea/lib/utils.py

113 lines
4.5 KiB
Python

from pathlib import Path
import pandas as pd
import shlex
import subprocess
from zipfile import ZipFile
import json
from django.conf import settings
from uuid import uuid4
def available_states():
"""This method will return a sorted list of available states based on the 'geography/demographics.csv' in the `settings.SYNTHEA_RESOURCE_DIR` folder.
Returns:
List: Sorted list on state name with dicts holding the id and name of the state.
"""
states = []
# Read the demographics.csv file from the Synthea resources and get all the unique state names
# Important, the state name for synthea is case sensitive (field id)
df = pd.read_csv(settings.SYNTHEA_RESOURCE_DIR / 'geography/demographics.csv', index_col=False)
for state in df.STNAME.unique():
states.append({'id' : state , 'name' : state})
# Sort on name
states = sorted(states, key=lambda s: s['name'].lower())
return states
def available_modules():
"""This method will load all the available modules that are in the folder `settings.SYNTHEA_MODULE_DIR`. Only files ending on .json will be loaded.
Returns:
List: Sorted list on module name with dicts holding the id and name of the module.
"""
# Assumption here: Only .json files in the main folder are modules. The rest are submodules...
modules = []
for module in settings.SYNTHEA_MODULE_DIR.iterdir():
if module.is_file() and module.suffix == '.json':
data = json.loads(module.read_text())
modules.append({'id' : module.name.replace('.json',''), 'name' : data['name']})
modules = sorted(modules, key=lambda m: m['name'].lower())
return modules
def run_synthea(state, population = 50, gender = None, age = None, module = None):
"""This module will run the Synthea application on the background. This method expects Synthea to be installed on the `settings.SYNTHEA_BASE_DIR` location.
The output will be written to a unique folder in `settings.SYNTHEA_OUTPUT_DIR` that will be zipped and returned.
It will return the log and the zipfile location for futher processing. The zip file will not be deleted afterwards. So cleanup needs to be done manually.
Args:
state (str, required): The state where to generate synthetic patient data for.
population (int, optional): The amount of patients to generate. Defaults to 50.
gender (str, optional): Either generate only male(m), only female(f), or None for both. Defaults to None.
age (str, optional): This is the age range of the generated patients. Input is always like [min_age]-[max_age]. Defaults to None.
module (str, optional): The module to use for generating patient data When None, all modules are used. Defaults to None.
Raises:
Exception: When the Synthea run fails it will return an Exception witht he Java error in it.
Returns:
(str,Path): The returning zipfile has the enabled options in the file name.
"""
# Add a unique dir to the output, so multiple Synthea processes can run parallel
temp_id = uuid4().hex
output_folder = settings.SYNTHEA_OUTPUT_DIR / temp_id
synthea_cmd = [settings.SYNTHEA_BASE_DIR / 'run_synthea','--exporter.baseDirectory',output_folder]
zip_file = 'Synthea_'
if population:
synthea_cmd.append('-p')
synthea_cmd.append(str(population))
zip_file += f'population_{population}_'
if gender:
synthea_cmd.append('-g')
synthea_cmd.append(gender.upper())
zip_file += f'gender_{gender}_'
if age:
synthea_cmd.append('-a')
synthea_cmd.append(age)
zip_file += f'age_{age}_'
if module:
synthea_cmd.append('-m')
synthea_cmd.append(module)
zip_file += f'module_{module}_'
if state:
synthea_cmd.append(state)
zip_file += f'state_{state}'
process_ok = False
log = ''
with subprocess.Popen(synthea_cmd,cwd=settings.SYNTHEA_BASE_DIR, stdout=subprocess.PIPE,stderr=subprocess.PIPE) as process:
for line in process.stdout:
line = line.decode('utf8')
log += line
if not process_ok:
process_ok = line.find('BUILD SUCCESSFUL') >= 0
if process_ok:
with ZipFile(f'{output_folder}/{zip_file}_{temp_id}.zip', 'w') as export:
for file in (output_folder / settings.SYNTHEA_EXPORT_TYPE).iterdir():
export.write(file,file.name)
return (log,Path(f'{output_folder}/{zip_file}_{temp_id}.zip'))
else:
raise Exception(log)