synthea_webservice/webservice/apps/synthea/lib/utils.py

83 lines
2.8 KiB
Python
Raw Normal View History

2020-11-13 15:31:14 +01:00
from pathlib import Path
import pandas as pd
import shlex
import subprocess
from zipfile import ZipFile
import json
2020-11-16 16:30:41 +01:00
from django.conf import settings
from uuid import uuid4
2020-11-13 15:31:14 +01:00
2020-11-16 16:30:41 +01:00
def available_states():
states = []
2020-11-24 14:54:37 +01:00
# Read the demographics.csv file from the Synthea resources and get all the unique state names
# Important, the state name for synthea is case sensitive (field id)
df = pd.read_csv(settings.SYNTHEA_STATES_DIR / 'demographics.csv', index_col=False)
for state in df.STNAME.unique():
2020-11-16 16:30:41 +01:00
states.append({'id' : state , 'name' : state})
# Sort on name
states = sorted(states, key=lambda k: k['name'].lower())
2020-11-13 15:31:14 +01:00
return states
def available_modules():
2020-11-16 16:30:41 +01:00
# Assumption here: Only .json files in the main folder are modules. The rest are submodules...
2020-11-13 15:31:14 +01:00
modules = []
2020-11-16 16:30:41 +01:00
for module in settings.SYNTHEA_MODULE_DIR.iterdir():
2020-11-13 15:31:14 +01:00
if module.is_file() and module.suffix == '.json':
data = json.loads(module.read_text())
2020-11-16 16:30:41 +01:00
modules.append({'id' : module.name.replace('.json',''), 'name' : data['name']})
2020-11-13 15:31:14 +01:00
modules = sorted(modules, key=lambda k: k['name'].lower())
return modules
def run_synthea(state = None, population = None, gender = None, age = None, module = None):
2020-11-16 16:30:41 +01:00
# Add a unique dir to the output, so multiple Synthea processes can run parallel
temp_id = uuid4().hex
output_folder = settings.SYNTHEA_OUTPUT_DIR / temp_id
synthea_cmd = [settings.SYNTHEA_BASE_DIR / 'run_synthea','--exporter.baseDirectory',output_folder]
2020-11-13 15:31:14 +01:00
zip_file = 'Synthea_'
if population:
synthea_cmd.append('-p')
synthea_cmd.append(str(population))
zip_file += f'population_{population}_'
if gender:
synthea_cmd.append('-g')
synthea_cmd.append(gender.upper())
zip_file += f'gender_{gender}_'
if age:
synthea_cmd.append('-a')
synthea_cmd.append(age)
zip_file += f'age_{age}_'
if module:
synthea_cmd.append('-m')
synthea_cmd.append(module)
zip_file += f'module_{module}_'
if state:
synthea_cmd.append(state)
zip_file += f'state_{state}'
process_ok = False
log = ''
2020-11-16 16:30:41 +01:00
with subprocess.Popen(synthea_cmd,cwd=settings.SYNTHEA_BASE_DIR, stdout=subprocess.PIPE,stderr=subprocess.PIPE) as process:
2020-11-13 15:31:14 +01:00
for line in process.stdout:
line = line.decode('utf8')
log += line
if not process_ok:
process_ok = line.find('BUILD SUCCESSFUL') >= 0
if process_ok:
2020-11-16 16:30:41 +01:00
with ZipFile(f'{output_folder}/{zip_file}_{temp_id}.zip', 'w') as export:
for file in (output_folder / settings.SYNTHEA_EXPORT_TYPE).iterdir():
2020-11-13 15:31:14 +01:00
export.write(file,file.name)
2020-11-16 16:30:41 +01:00
return (log,Path(f'{output_folder}/{zip_file}_{temp_id}.zip'))
2020-11-13 15:31:14 +01:00
else:
2020-11-16 16:30:41 +01:00
raise Exception(log)