2020-11-13 15:31:14 +01:00
from pathlib import Path
import pandas as pd
import shlex
import subprocess
from zipfile import ZipFile
import json
2020-11-16 16:30:41 +01:00
from django . conf import settings
from uuid import uuid4
def available_states ( ) :
2020-11-27 12:49:03 +01:00
""" This method will return a sorted list of available states based on the ' geography/demographics.csv ' in the `settings.SYNTHEA_RESOURCE_DIR` folder.
Returns :
List : Sorted list on state name with dicts holding the id and name of the state .
"""
2020-11-16 16:30:41 +01:00
states = [ ]
2020-11-24 14:54:37 +01:00
# Read the demographics.csv file from the Synthea resources and get all the unique state names
# Important, the state name for synthea is case sensitive (field id)
2020-11-27 12:49:03 +01:00
df = pd . read_csv ( settings . SYNTHEA_RESOURCE_DIR / ' geography/demographics.csv ' , index_col = False )
2020-11-24 14:54:37 +01:00
for state in df . STNAME . unique ( ) :
2020-11-16 16:30:41 +01:00
states . append ( { ' id ' : state , ' name ' : state } )
# Sort on name
2020-11-27 12:49:03 +01:00
states = sorted ( states , key = lambda s : s [ ' name ' ] . lower ( ) )
2020-11-13 15:31:14 +01:00
return states
def available_modules ( ) :
2020-11-27 12:49:03 +01:00
""" This method will load all the available modules that are in the folder `settings.SYNTHEA_MODULE_DIR`. Only files ending on .json will be loaded.
Returns :
List : Sorted list on module name with dicts holding the id and name of the module .
"""
2020-11-16 16:30:41 +01:00
# Assumption here: Only .json files in the main folder are modules. The rest are submodules...
2020-11-13 15:31:14 +01:00
modules = [ ]
2020-11-16 16:30:41 +01:00
for module in settings . SYNTHEA_MODULE_DIR . iterdir ( ) :
2020-11-13 15:31:14 +01:00
if module . is_file ( ) and module . suffix == ' .json ' :
data = json . loads ( module . read_text ( ) )
2020-11-16 16:30:41 +01:00
modules . append ( { ' id ' : module . name . replace ( ' .json ' , ' ' ) , ' name ' : data [ ' name ' ] } )
2020-11-13 15:31:14 +01:00
2020-11-27 12:49:03 +01:00
modules = sorted ( modules , key = lambda m : m [ ' name ' ] . lower ( ) )
2020-11-13 15:31:14 +01:00
return modules
2020-11-27 12:49:03 +01:00
def run_synthea ( state , population = 50 , gender = None , age = None , module = None ) :
""" This module will run the Synthea application on the background. This method expects Synthea to be installed on the `settings.SYNTHEA_BASE_DIR` location.
The output will be written to a unique folder in ` settings . SYNTHEA_OUTPUT_DIR ` that will be zipped and returned .
It will return the log and the zipfile location for futher processing . The zip file will not be deleted afterwards . So cleanup needs to be done manually .
Args :
state ( str , required ) : The state where to generate synthetic patient data for .
population ( int , optional ) : The amount of patients to generate . Defaults to 50.
gender ( str , optional ) : Either generate only male ( m ) , only female ( f ) , or None for both . Defaults to None .
age ( str , optional ) : This is the age range of the generated patients . Input is always like [ min_age ] - [ max_age ] . Defaults to None .
module ( str , optional ) : The module to use for generating patient data When None , all modules are used . Defaults to None .
Raises :
Exception : When the Synthea run fails it will return an Exception witht he Java error in it .
Returns :
( str , Path ) : The returning zipfile has the enabled options in the file name .
"""
2020-11-16 16:30:41 +01:00
# Add a unique dir to the output, so multiple Synthea processes can run parallel
temp_id = uuid4 ( ) . hex
output_folder = settings . SYNTHEA_OUTPUT_DIR / temp_id
synthea_cmd = [ settings . SYNTHEA_BASE_DIR / ' run_synthea ' , ' --exporter.baseDirectory ' , output_folder ]
2020-11-13 15:31:14 +01:00
zip_file = ' Synthea_ '
if population :
synthea_cmd . append ( ' -p ' )
synthea_cmd . append ( str ( population ) )
zip_file + = f ' population_ { population } _ '
if gender :
synthea_cmd . append ( ' -g ' )
synthea_cmd . append ( gender . upper ( ) )
zip_file + = f ' gender_ { gender } _ '
if age :
synthea_cmd . append ( ' -a ' )
synthea_cmd . append ( age )
zip_file + = f ' age_ { age } _ '
if module :
synthea_cmd . append ( ' -m ' )
synthea_cmd . append ( module )
zip_file + = f ' module_ { module } _ '
if state :
synthea_cmd . append ( state )
zip_file + = f ' state_ { state } '
process_ok = False
log = ' '
2020-11-16 16:30:41 +01:00
with subprocess . Popen ( synthea_cmd , cwd = settings . SYNTHEA_BASE_DIR , stdout = subprocess . PIPE , stderr = subprocess . PIPE ) as process :
2020-11-13 15:31:14 +01:00
for line in process . stdout :
line = line . decode ( ' utf8 ' )
log + = line
if not process_ok :
process_ok = line . find ( ' BUILD SUCCESSFUL ' ) > = 0
if process_ok :
2020-11-16 16:30:41 +01:00
with ZipFile ( f ' { output_folder } / { zip_file } _ { temp_id } .zip ' , ' w ' ) as export :
for file in ( output_folder / settings . SYNTHEA_EXPORT_TYPE ) . iterdir ( ) :
2020-11-13 15:31:14 +01:00
export . write ( file , file . name )
2020-11-16 16:30:41 +01:00
return ( log , Path ( f ' { output_folder } / { zip_file } _ { temp_id } .zip ' ) )
2020-11-13 15:31:14 +01:00
else :
2020-11-16 16:30:41 +01:00
raise Exception ( log )