1
0
mirror of https://github.com/msberends/AMR.git synced 2026-06-29 13:36:21 +02:00
Files
AMR/data-raw/_generate_python_wrapper.sh

474 lines
16 KiB
Bash

#!/bin/bash
# ==================================================================== #
# TITLE: #
# AMR: An R Package for Working with Antimicrobial Resistance Data #
# #
# SOURCE CODE: #
# https://github.com/msberends/AMR #
# #
# PLEASE CITE THIS SOFTWARE AS: #
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
# Journal of Statistical Software, 104(3), 1-31. #
# https://doi.org/10.18637/jss.v104.i03 #
# #
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://amr-for-r.org #
# ==================================================================== #
# Clean up
rm -rf ../PythonPackage/AMR/*
mkdir -p ../PythonPackage/AMR/AMR
# Output files
setup_file="../PythonPackage/AMR/setup.py"
init_file="../PythonPackage/AMR/AMR/__init__.py"
engine_file="../PythonPackage/AMR/AMR/_engine.py"
datasets_file="../PythonPackage/AMR/AMR/datasets.py"
functions_file="../PythonPackage/AMR/AMR/functions.py"
beta_file="../PythonPackage/AMR/AMR/beta.py"
description_file="../DESCRIPTION"
# ---- _engine.py: R environment setup and installation logic ---- #
cat <<'EOL' > "$engine_file"
import os
import sys
import importlib.metadata as metadata
# Get the path to the virtual environment
venv_path = sys.prefix
r_lib_path = os.path.join(venv_path, "R_libs")
os.makedirs(r_lib_path, exist_ok=True)
# Set environment variable before importing rpy2
os.environ['R_LIBS_SITE'] = r_lib_path
from rpy2 import robjects
from rpy2.robjects.vectors import StrVector
from rpy2.robjects.packages import importr, isinstalled
# Import base and utils once
base = importr('base')
utils = importr('utils')
# Silence R console output entirely
robjects.r('suppressMessages(suppressWarnings(sink(tempfile())))')
base._libPaths(r_lib_path)
_installed_source = None
def _r_version():
"""Return the currently installed AMR R package version, or None."""
try:
return str(robjects.r(
f'as.character(packageVersion("AMR", lib.loc = "{r_lib_path}"))')[0])
except Exception:
return None
def _py_version():
"""Return the Python AMR package version from metadata, or empty string."""
try:
return str(metadata.version('AMR'))
except metadata.PackageNotFoundError:
return ''
def _install_cran():
"""Install AMR from CRAN into the isolated library."""
print("AMR: Installing from CRAN...", flush=True)
utils.install_packages(
'AMR',
repos='https://cloud.r-project.org',
lib=r_lib_path,
quiet=True
)
def _install_github():
"""Install AMR development version from GitHub into the isolated library."""
print("AMR: Installing development version from GitHub...", flush=True)
utils.install_packages(
StrVector(['remotes', 'desc']),
repos='https://cloud.r-project.org',
lib=r_lib_path,
quiet=True
)
remotes = importr('remotes', lib_loc=r_lib_path)
remotes.install_github('msberends/AMR', lib=r_lib_path, quiet=True)
def ensure_amr(source="cran"):
"""Ensure AMR is installed from the requested source. Idempotent per source."""
global _installed_source
if _installed_source == source:
return
install_fn = _install_github if source == "github" else _install_cran
if not isinstalled('AMR', lib_loc=r_lib_path):
install_fn()
else:
# Check for version mismatch and update if needed
r_ver = _r_version()
py_ver = _py_version()
if r_ver != py_ver:
try:
install_fn()
except Exception as e:
print(f"AMR: Could not update ({e})", flush=True)
print(f"AMR: R package version {_r_version()} ready.", flush=True)
_installed_source = source
def restore_sink():
"""Restore R console output after setup is complete."""
try:
robjects.r('sink()')
except Exception:
pass
EOL
# ---- datasets.py: only dataset loading ---- #
cat <<'EOL' > "$datasets_file"
import pandas as pd
from rpy2 import robjects
from rpy2.robjects.conversion import localconverter
from rpy2.robjects import default_converter, numpy2ri, pandas2ri
from ._engine import ensure_amr, restore_sink
_cache = {}
_loaded_source = None
def _load_datasets(source="cran"):
"""Load all AMR datasets into the module cache."""
global _loaded_source
if _cache and _loaded_source == source:
return
if _cache and _loaded_source != source:
_cache.clear()
ensure_amr(source)
with localconverter(default_converter + numpy2ri.converter + pandas2ri.converter):
_cache['example_isolates'] = _load_example_isolates()
_cache['microorganisms'] = robjects.r(
'AMR::microorganisms[, !sapply(AMR::microorganisms, is.list)]')
_cache['antimicrobials'] = robjects.r(
'AMR::antimicrobials[, !sapply(AMR::antimicrobials, is.list)]')
_cache['clinical_breakpoints'] = robjects.r(
'AMR::clinical_breakpoints[, !sapply(AMR::clinical_breakpoints, is.list)]')
restore_sink()
_loaded_source = source
def _load_example_isolates():
df = robjects.r('''
df <- AMR::example_isolates
df[] <- lapply(df, function(x) {
if (inherits(x, c("Date", "POSIXt", "factor"))) {
as.character(x)
} else {
x
}
})
df <- df[, !sapply(df, is.list)]
df
''')
df['date'] = pd.to_datetime(df['date'])
return df
def get(name, source="cran"):
"""Retrieve a dataset by name, installing AMR if needed."""
_load_datasets(source)
return _cache[name]
EOL
# ---- __init__.py: lazy module, CRAN by default ---- #
cat <<'EOL' > "$init_file"
import sys
_DATASETS = frozenset({
'example_isolates', 'microorganisms',
'antimicrobials', 'clinical_breakpoints'
})
class _AMRModule(type(sys.modules[__name__])):
"""Lazy-loading module: nothing runs until an attribute is accessed."""
def __getattr__(self, name):
if name in _DATASETS:
from .datasets import get
return get(name, source="cran")
try:
from . import functions
return getattr(functions, name)
except AttributeError:
raise AttributeError(
f"module 'AMR' has no attribute '{name}'")
sys.modules[__name__].__class__ = _AMRModule
EOL
# ---- beta.py: GitHub development version ---- #
cat <<'EOL' > "$beta_file"
import sys
_DATASETS = frozenset({
'example_isolates', 'microorganisms',
'antimicrobials', 'clinical_breakpoints'
})
class _BetaModule(type(sys.modules[__name__])):
"""Lazy-loading module: installs AMR from GitHub on first access."""
def __getattr__(self, name):
if name in _DATASETS:
from .datasets import get
return get(name, source="github")
try:
from . import functions
return getattr(functions, name)
except AttributeError:
raise AttributeError(
f"module 'AMR.beta' has no attribute '{name}'")
sys.modules[__name__].__class__ = _BetaModule
EOL
# ---- functions.py: R-to-Python wrapper functions ---- #
cat <<'EOL' > "$functions_file"
import functools
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import StrVector, FactorVector, IntVector, FloatVector, DataFrame
from rpy2.robjects.conversion import localconverter
from rpy2.robjects import default_converter, numpy2ri, pandas2ri
import pandas as pd
import numpy as np
from ._engine import ensure_amr
# Ensure AMR is available before importing it in R
ensure_amr("cran")
amr_r = importr('AMR')
def convert_to_r(value):
"""Convert Python lists/tuples to typed R vectors.
rpy2's default_converter passes Python lists to R as R lists, not as
character/numeric vectors. This causes element-wise type-check functions
such as is.mic(), is.sir(), and is.disk() to return a logical vector
rather than a single logical, breaking R's scalar && operator.
This helper converts Python lists and tuples to the appropriate R vector
type based on the element types, so R always receives a proper vector."""
if isinstance(value, (list, tuple)):
if len(value) == 0:
return StrVector([])
# bool must be checked before int because bool is a subclass of int
if all(isinstance(v, bool) for v in value):
return robjects.vectors.BoolVector(value)
if all(isinstance(v, int) for v in value):
return IntVector(value)
if all(isinstance(v, float) for v in value):
return FloatVector(value)
if all(isinstance(v, str) for v in value):
return StrVector(value)
# Mixed types: coerce all to string
return StrVector([str(v) for v in value])
return value
def convert_to_python(r_output):
# Check if it's a StrVector (R character vector)
if isinstance(r_output, StrVector):
return list(r_output) # Convert to a Python list of strings
# Check if it's a FactorVector (R factor)
elif isinstance(r_output, FactorVector):
return list(r_output) # Convert to a list of integers (factor levels)
# Check if it's an IntVector or FloatVector (numeric R vectors)
elif isinstance(r_output, (IntVector, FloatVector)):
return list(r_output) # Convert to a Python list of integers or floats
# Check if it's a pandas-compatible R data frame
elif isinstance(r_output, (pd.DataFrame, DataFrame)):
return r_output # Return as pandas DataFrame (already converted by pandas2ri)
# Check if the input is a NumPy array and has a string data type
if isinstance(r_output, np.ndarray) and np.issubdtype(r_output.dtype, np.str_):
return r_output.tolist() # Convert to a regular Python list
# Fall-back
return r_output
def r_to_python(r_func):
"""Decorator that converts Python list/tuple inputs to typed R vectors,
runs the rpy2 function under a localconverter, and converts the output
to a Python type."""
@functools.wraps(r_func)
def wrapper(*args, **kwargs):
args = tuple(convert_to_r(a) for a in args)
kwargs = {k: convert_to_r(v) for k, v in kwargs.items()}
with localconverter(default_converter + numpy2ri.converter + pandas2ri.converter):
return convert_to_python(r_func(*args, **kwargs))
return wrapper
EOL
# ---- Generate wrapper functions from .Rd files ---- #
rd_dir="../man"
for rd_file in "$rd_dir"/*.Rd; do
awk '
BEGIN {
usage_started = 0
}
# Detect the start of the \usage block
/^\\usage\{/ {
usage_started = 1
}
# Detect the end of the \usage block
usage_started && /^\}/ {
usage_started = 0
}
# Process lines within the \usage block that look like function calls
usage_started && /^[a-zA-Z_]+/ {
func_line = $0
func_line_py = $0
# Extract the function name (up to the first parenthesis)
sub(/\(.*/, "", func_line)
func_name = func_line
func_name_py = func_name
# Replace dots with underscores in Python function names
gsub(/\./, "_", func_name_py)
# Extract the arguments (inside the parentheses)
sub(/^[^(]+\(/, "", $0)
sub(/\).*/, "", $0)
func_args = $0
# Count the number of arguments
arg_count = split(func_args, arg_array, ",")
# Handle "..." arguments (convert them to *args, **kwargs in Python)
gsub("\\.\\.\\.", "*args, **kwargs", func_args)
# Remove default values from arguments
gsub(/ = [^,]+/, "", func_args)
# If no arguments, skip the function (dont print it)
if (arg_count == 0) {
func_args = "*args, **kwargs"
}
# If more than 1 argument, replace the 2nd to nth arguments with *args, **kwargs
if (arg_count > 1) {
first_arg = arg_array[1]
func_args = first_arg ", *args, **kwargs"
}
if (arg_array[1] == "...") {
func_args = "*args, **kwargs"
}
# Skip functions where func_name_py is identical to func_args
if (func_name_py == func_args) {
next
}
# Skip functions matching the regex pattern
if (func_name_py ~ /^(x |facet|scale|set|get|NA_|microorganisms|antimicrobials|clinical_breakpoints|example_isolates)/) {
next
}
# Replace TRUE/FALSE/NULL
gsub("TRUE", "True", func_args)
gsub("FALSE", "False", func_args)
gsub("NULL", "None", func_args)
# Write the Python function definition to the output file, using decorator
print "@r_to_python" >> "'"$functions_file"'"
print "def " func_name_py "(" func_args "):" >> "'"$functions_file"'"
print " \"\"\"Please see our website of the R package for the full manual: https://amr-for-r.org\"\"\"" >> "'"$functions_file"'"
print " return amr_r." func_name_py "(" func_args ")" >> "'"$functions_file"'"
print "from .functions import " func_name_py >> "'"$init_file"'"
}
' "$rd_file"
done
echo "Python wrapper functions generated in $functions_file."
echo "Python wrapper functions listed in $init_file."
# ---- README ---- #
cp ../vignettes/AMR_for_Python.Rmd ../PythonPackage/AMR/README.md
sed -i '1,/^# Introduction$/d' ../PythonPackage/AMR/README.md
echo "README copied."
# ---- setup.py ---- #
version=$(grep "^Version:" "$description_file" | awk '{print $2}')
cat <<EOL > "$setup_file"
from setuptools import setup, find_packages
setup(
name='AMR',
version='$version',
packages=find_packages(),
install_requires=[
'rpy2',
'numpy',
'pandas',
],
author='Matthijs Berends',
author_email='m.s.berends@umcg.nl',
description='A Python wrapper for the AMR R package',
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
url='https://github.com/msberends/AMR',
project_urls={
'Bug Tracker': 'https://github.com/msberends/AMR/issues',
},
license='GPL 2',
classifiers=[
'Programming Language :: Python :: 3',
'Operating System :: OS Independent',
],
python_requires='>=3.6',
)
EOL
echo "setup.py generated."
# ---- Build ---- #
cd ../PythonPackage/AMR
pip3 install build
python3 -m build