1
0
mirror of https://github.com/msberends/AMR.git synced 2025-01-13 22:51:37 +01:00
AMR/data-raw/_generate_python_wrapper.sh

303 lines
11 KiB
Bash
Raw Normal View History

2024-10-10 16:38:20 +02:00
#!/bin/bash
# ==================================================================== #
# TITLE: #
# AMR: An R Package for Working with Antimicrobial Resistance Data #
# #
# SOURCE CODE: #
# https://github.com/msberends/AMR #
# #
# PLEASE CITE THIS SOFTWARE AS: #
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
# Journal of Statistical Software, 104(3), 1-31. #
# https://doi.org/10.18637/jss.v104.i03 #
# #
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
2024-10-15 17:12:55 +02:00
# Clean up
2024-11-21 10:06:26 +01:00
rm -rf ../PythonPackage/AMR/*
mkdir -p ../PythonPackage/AMR/AMR
2024-10-15 17:12:55 +02:00
2024-10-10 16:38:20 +02:00
# Output Python file
2024-11-21 10:06:26 +01:00
setup_file="../PythonPackage/AMR/setup.py"
functions_file="../PythonPackage/AMR/AMR/functions.py"
datasets_file="../PythonPackage/AMR/AMR/datasets.py"
init_file="../PythonPackage/AMR/AMR/__init__.py"
2024-11-21 21:58:05 +01:00
description_file="../DESCRIPTION"
2024-10-15 17:12:55 +02:00
# Write header to the datasets Python file, including the convert_to_python function
cat <<EOL > "$datasets_file"
BLUE = '\033[94m'
GREEN = '\033[32m'
RESET = '\033[0m'
2024-11-21 10:06:26 +01:00
import os
2024-10-15 17:12:55 +02:00
from rpy2 import robjects
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr, isinstalled
import pandas as pd
2024-11-21 10:06:26 +01:00
import importlib.metadata as metadata
# Get the path to the virtual environment
venv_path = os.getenv('VIRTUAL_ENV') # Path to the active virtual environment
# Define R library path within the venv
r_lib_path = os.path.join(venv_path, "R_libs")
# Ensure the R library path exists
os.makedirs(r_lib_path, exist_ok=True)
# Set the R library path in .libPaths
base = importr('base')
base._libPaths(r_lib_path)
# Check if the AMR package is installed in R
2024-10-15 17:12:55 +02:00
if not isinstalled('AMR'):
utils = importr('utils')
2024-11-21 10:06:26 +01:00
utils.install_packages('AMR', repos='https://msberends.r-universe.dev', quiet=True)
2024-10-17 11:52:01 +02:00
# Python package version of AMR
2024-11-21 10:06:26 +01:00
try:
python_amr_version = metadata.version('AMR')
except metadata.PackageNotFoundError:
2024-11-21 22:23:40 +01:00
python_amr_version = ''
2024-11-21 10:06:26 +01:00
2024-10-17 11:52:01 +02:00
# R package version of AMR
2024-11-21 10:06:26 +01:00
r_amr_version = robjects.r(f'as.character(packageVersion("AMR", lib.loc = "{r_lib_path}"))')[0]
2024-10-17 11:52:01 +02:00
# Compare R and Python package versions
2024-11-21 22:23:40 +01:00
if r_amr_version != python_amr_version:
2024-11-21 10:06:26 +01:00
try:
print(f"{BLUE}AMR:{RESET} Updating package version{RESET}", flush=True)
2024-11-21 10:06:26 +01:00
utils = importr('utils')
utils.install_packages('AMR', repos='https://msberends.r-universe.dev', quiet=True)
except Exception as e:
print(f"{BLUE}AMR:{RESET} Could not update: {e}{RESET}", flush=True)
2024-10-15 17:12:55 +02:00
print(f"{BLUE}AMR:{RESET} Setting up R environment and AMR datasets...", flush=True)
2024-10-15 17:12:55 +02:00
# Activate the automatic conversion between R and pandas DataFrames
pandas2ri.activate()
2024-10-17 11:52:01 +02:00
2024-10-15 17:12:55 +02:00
# example_isolates
example_isolates = pandas2ri.rpy2py(robjects.r('''
df <- AMR::example_isolates
df[] <- lapply(df, function(x) {
if (inherits(x, c("Date", "POSIXt", "factor"))) {
as.character(x)
} else {
x
}
})
2024-10-17 11:52:01 +02:00
df <- df[, !sapply(df, is.list)]
2024-10-15 17:12:55 +02:00
df
'''))
example_isolates['date'] = pd.to_datetime(example_isolates['date'])
2024-10-10 16:38:20 +02:00
2024-10-15 17:12:55 +02:00
# microorganisms
microorganisms = pandas2ri.rpy2py(robjects.r('AMR::microorganisms[, !sapply(AMR::microorganisms, is.list)]'))
antibiotics = pandas2ri.rpy2py(robjects.r('AMR::antibiotics[, !sapply(AMR::antibiotics, is.list)]'))
2024-10-17 11:52:01 +02:00
clinical_breakpoints = pandas2ri.rpy2py(robjects.r('AMR::clinical_breakpoints[, !sapply(AMR::clinical_breakpoints, is.list)]'))
2024-10-15 17:12:55 +02:00
print(f"{BLUE}AMR:{RESET} {GREEN}Done.{RESET}", flush=True)
EOL
echo "from .datasets import example_isolates" >> $init_file
echo "from .datasets import microorganisms" >> $init_file
echo "from .datasets import antibiotics" >> $init_file
echo "from .datasets import clinical_breakpoints" >> $init_file
# Write header to the functions Python file, including the convert_to_python function
cat <<EOL > "$functions_file"
2024-10-10 16:38:20 +02:00
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
2024-10-15 17:12:55 +02:00
from rpy2.robjects.vectors import StrVector, FactorVector, IntVector, FloatVector, DataFrame
2024-10-10 16:38:20 +02:00
from rpy2.robjects import pandas2ri
import pandas as pd
2024-10-15 17:12:55 +02:00
import numpy as np
2024-10-10 16:38:20 +02:00
# Activate automatic conversion between R data frames and pandas data frames
pandas2ri.activate()
# Import the AMR R package
amr_r = importr('AMR')
def convert_to_python(r_output):
# Check if it's a StrVector (R character vector)
if isinstance(r_output, StrVector):
return list(r_output) # Convert to a Python list of strings
# Check if it's a FactorVector (R factor)
elif isinstance(r_output, FactorVector):
return list(r_output) # Convert to a list of integers (factor levels)
# Check if it's an IntVector or FloatVector (numeric R vectors)
elif isinstance(r_output, (IntVector, FloatVector)):
return list(r_output) # Convert to a Python list of integers or floats
# Check if it's a pandas-compatible R data frame
elif isinstance(r_output, pd.DataFrame):
return r_output # Return as pandas DataFrame (already converted by pandas2ri)
2024-10-15 17:12:55 +02:00
elif isinstance(r_output, DataFrame):
return pandas2ri.rpy2py(r_output) # Return as pandas DataFrame
# Check if the input is a NumPy array and has a string data type
if isinstance(r_output, np.ndarray) and np.issubdtype(r_output.dtype, np.str_):
return r_output.tolist() # Convert to a regular Python list
2024-10-10 16:38:20 +02:00
2024-10-15 17:12:55 +02:00
# Fall-back
2024-10-10 16:38:20 +02:00
return r_output
EOL
# Directory where the .Rd files are stored (update path as needed)
rd_dir="../man"
# Iterate through each .Rd file in the man directory
for rd_file in "$rd_dir"/*.Rd; do
# Extract function names and their arguments from the .Rd files
awk '
BEGIN {
usage_started = 0
}
# Detect the start of the \usage block
/^\\usage\{/ {
usage_started = 1
}
# Detect the end of the \usage block
usage_started && /^\}/ {
usage_started = 0
}
# Process lines within the \usage block that look like function calls
usage_started && /^[a-zA-Z_]+/ {
func_line = $0
func_line_py = $0
# Extract the function name (up to the first parenthesis)
sub(/\(.*/, "", func_line)
func_name = func_line
func_name_py = func_name
# Replace dots with underscores in Python function names
gsub(/\./, "_", func_name_py)
# Extract the arguments (inside the parentheses)
sub(/^[^(]+\(/, "", $0)
sub(/\).*/, "", $0)
func_args = $0
# Count the number of arguments
arg_count = split(func_args, arg_array, ",")
2024-10-15 17:12:55 +02:00
# Handle "..." arguments (convert them to *args, **kwargs in Python)
gsub("\\.\\.\\.", "*args, **kwargs", func_args)
2024-10-10 16:38:20 +02:00
# Remove default values from arguments
gsub(/ = [^,]+/, "", func_args)
# If no arguments, skip the function (dont print it)
if (arg_count == 0) {
2024-10-15 17:12:55 +02:00
func_args = "*args, **kwargs"
2024-10-10 16:38:20 +02:00
}
2024-10-15 17:12:55 +02:00
# If more than 1 argument, replace the 2nd to nth arguments with *args, **kwargs
2024-10-10 16:38:20 +02:00
if (arg_count > 1) {
first_arg = arg_array[1]
2024-10-15 17:12:55 +02:00
func_args = first_arg ", *args, **kwargs"
}
if (arg_array[1] == "...") {
func_args = "*args, **kwargs"
2024-10-10 16:38:20 +02:00
}
# Skip functions where func_name_py is identical to func_args
if (func_name_py == func_args) {
next
}
2024-10-15 17:12:55 +02:00
# Skip functions matching the regex pattern
if (func_name_py ~ /^(x |facet|scale|set|get|NA_|microorganisms|antibiotics|clinical_breakpoints|example_isolates)/) {
2024-10-10 16:38:20 +02:00
next
}
2024-10-15 17:12:55 +02:00
# Replace TRUE/FALSE/NULL
gsub("TRUE", "True", func_args)
gsub("FALSE", "False", func_args)
gsub("NULL", "None", func_args)
2024-10-10 16:38:20 +02:00
# Write the Python function definition to the output file
2024-10-15 17:12:55 +02:00
print "def " func_name_py "(" func_args "):" >> "'"$functions_file"'"
print " \"\"\"See our website of the R package for the manual: https://msberends.github.io/AMR/index.html\"\"\"" >> "'"$functions_file"'"
print " return convert_to_python(amr_r." func_name_py "(" func_args "))" >> "'"$functions_file"'"
print "from .functions import " func_name_py >> "'"$init_file"'"
2024-10-10 16:38:20 +02:00
}
' "$rd_file"
done
# Output completion message
2024-10-15 17:12:55 +02:00
echo "Python wrapper functions generated in $functions_file."
echo "Python wrapper functions listed in $init_file."
2024-10-10 16:38:20 +02:00
2024-11-21 10:06:26 +01:00
cp ../vignettes/AMR_for_Python.Rmd ../PythonPackage/AMR/README.md
sed -i '1,/^# Introduction$/d' ../PythonPackage/AMR/README.md
2024-10-10 16:38:20 +02:00
echo "README copied"
# Extract the relevant fields from DESCRIPTION
version=$(grep "^Version:" "$description_file" | awk '{print $2}')
# Write the setup.py file
2024-11-21 10:06:26 +01:00
cat <<EOL > "$setup_file"
2024-10-10 16:38:20 +02:00
from setuptools import setup, find_packages
setup(
name='AMR',
2024-10-15 17:12:55 +02:00
version='$version',
2024-10-10 16:38:20 +02:00
packages=find_packages(),
install_requires=[
'rpy2',
2024-10-15 17:12:55 +02:00
'numpy',
2024-10-10 16:38:20 +02:00
'pandas',
],
2024-10-15 17:27:00 +02:00
author='Dr. Matthijs Berends',
2024-10-10 16:38:20 +02:00
author_email='m.s.berends@umcg.nl',
description='A Python wrapper for the AMR R package',
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
url='https://github.com/msberends/AMR',
project_urls={
'Bug Tracker': 'https://github.com/msberends/AMR/issues',
},
license='GPL 2',
classifiers=[
'Programming Language :: Python :: 3',
'Operating System :: OS Independent',
],
python_requires='>=3.6',
)
EOL
# Output completion message
2024-11-21 10:06:26 +01:00
echo "setup.py has been generated in $setup_file."
2024-10-10 16:38:20 +02:00
2024-11-21 10:06:26 +01:00
cd ../PythonPackage/AMR
pip3 install build
python3 -m build
# python3 setup.py sdist bdist_wheel
2024-10-10 16:38:20 +02:00