#!/bin/bash # ==================================================================== # # TITLE: # # AMR: An R Package for Working with Antimicrobial Resistance Data # # # # SOURCE CODE: # # https://github.com/msberends/AMR # # # # PLEASE CITE THIS SOFTWARE AS: # # Berends MS, Luz CF, Friedrich AW, et al. (2022). # # AMR: An R Package for Working with Antimicrobial Resistance Data. # # Journal of Statistical Software, 104(3), 1-31. # # https://doi.org/10.18637/jss.v104.i03 # # # # Developed at the University of Groningen and the University Medical # # Center Groningen in The Netherlands, in collaboration with many # # colleagues from around the world, see our website. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # # GNU General Public License version 2.0 (GNU GPL-2), as published by # # the Free Software Foundation. # # We created this package for both routine data analysis and academic # # research and it was publicly released in the hope that it will be # # useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # # # # Visit our website for the full manual and a complete tutorial about # # how to conduct AMR data analysis: https://amr-for-r.org # # ==================================================================== # # Clean up rm -rf ../PythonPackage/AMR/* mkdir -p ../PythonPackage/AMR/AMR # Output files setup_file="../PythonPackage/AMR/setup.py" init_file="../PythonPackage/AMR/AMR/__init__.py" engine_file="../PythonPackage/AMR/AMR/_engine.py" datasets_file="../PythonPackage/AMR/AMR/datasets.py" functions_file="../PythonPackage/AMR/AMR/functions.py" beta_file="../PythonPackage/AMR/AMR/beta.py" description_file="../DESCRIPTION" # ---- _engine.py: R environment setup and installation logic ---- # cat <<'EOL' > "$engine_file" import os import sys import importlib.metadata as metadata # Get the path to the virtual environment venv_path = sys.prefix r_lib_path = os.path.join(venv_path, "R_libs") os.makedirs(r_lib_path, exist_ok=True) # Set environment variable before importing rpy2 os.environ['R_LIBS_SITE'] = r_lib_path from rpy2 import robjects from rpy2.robjects.vectors import StrVector from rpy2.robjects.packages import importr, isinstalled # Import base and utils once base = importr('base') utils = importr('utils') # Silence R console output entirely robjects.r('suppressMessages(suppressWarnings(sink(tempfile())))') base._libPaths(r_lib_path) _installed_source = None def _r_version(): """Return the currently installed AMR R package version, or None.""" try: return str(robjects.r( f'as.character(packageVersion("AMR", lib.loc = "{r_lib_path}"))')[0]) except Exception: return None def _py_version(): """Return the Python AMR package version from metadata, or empty string.""" try: return str(metadata.version('AMR')) except metadata.PackageNotFoundError: return '' def _install_cran(): """Install AMR from CRAN into the isolated library.""" print("AMR: Installing from CRAN...", flush=True) utils.install_packages( 'AMR', repos='https://cloud.r-project.org', lib=r_lib_path, quiet=True ) def _install_github(): """Install AMR development version from GitHub into the isolated library.""" print("AMR: Installing development version from GitHub...", flush=True) utils.install_packages( StrVector(['remotes', 'desc']), repos='https://cloud.r-project.org', lib=r_lib_path, quiet=True ) remotes = importr('remotes', lib_loc=r_lib_path) remotes.install_github('msberends/AMR', lib=r_lib_path, quiet=True) def ensure_amr(source="cran"): """Ensure AMR is installed from the requested source. Idempotent per source.""" global _installed_source if _installed_source == source: return install_fn = _install_github if source == "github" else _install_cran if not isinstalled('AMR', lib_loc=r_lib_path): install_fn() else: # Check for version mismatch and update if needed r_ver = _r_version() py_ver = _py_version() if r_ver != py_ver: try: install_fn() except Exception as e: print(f"AMR: Could not update ({e})", flush=True) print(f"AMR: R package version {_r_version()} ready.", flush=True) _installed_source = source def restore_sink(): """Restore R console output after setup is complete.""" try: robjects.r('sink()') except Exception: pass EOL # ---- datasets.py: only dataset loading ---- # cat <<'EOL' > "$datasets_file" import pandas as pd from rpy2 import robjects from rpy2.robjects.conversion import localconverter from rpy2.robjects import default_converter, numpy2ri, pandas2ri from ._engine import ensure_amr, restore_sink _cache = {} _loaded_source = None def _load_datasets(source="cran"): """Load all AMR datasets into the module cache.""" global _loaded_source if _cache and _loaded_source == source: return if _cache and _loaded_source != source: _cache.clear() ensure_amr(source) with localconverter(default_converter + numpy2ri.converter + pandas2ri.converter): _cache['example_isolates'] = _load_example_isolates() _cache['microorganisms'] = robjects.r( 'AMR::microorganisms[, !sapply(AMR::microorganisms, is.list)]') _cache['antimicrobials'] = robjects.r( 'AMR::antimicrobials[, !sapply(AMR::antimicrobials, is.list)]') _cache['clinical_breakpoints'] = robjects.r( 'AMR::clinical_breakpoints[, !sapply(AMR::clinical_breakpoints, is.list)]') restore_sink() _loaded_source = source def _load_example_isolates(): df = robjects.r(''' df <- AMR::example_isolates df[] <- lapply(df, function(x) { if (inherits(x, c("Date", "POSIXt", "factor"))) { as.character(x) } else { x } }) df <- df[, !sapply(df, is.list)] df ''') df['date'] = pd.to_datetime(df['date']) return df def get(name, source="cran"): """Retrieve a dataset by name, installing AMR if needed.""" _load_datasets(source) return _cache[name] EOL # ---- __init__.py: lazy module, CRAN by default ---- # cat <<'EOL' > "$init_file" import sys _DATASETS = frozenset({ 'example_isolates', 'microorganisms', 'antimicrobials', 'clinical_breakpoints' }) class _AMRModule(type(sys.modules[__name__])): """Lazy-loading module: nothing runs until an attribute is accessed.""" def __getattr__(self, name): if name in _DATASETS: from .datasets import get return get(name, source="cran") try: from . import functions return getattr(functions, name) except AttributeError: raise AttributeError( f"module 'AMR' has no attribute '{name}'") sys.modules[__name__].__class__ = _AMRModule EOL # ---- beta.py: GitHub development version ---- # cat <<'EOL' > "$beta_file" import sys _DATASETS = frozenset({ 'example_isolates', 'microorganisms', 'antimicrobials', 'clinical_breakpoints' }) class _BetaModule(type(sys.modules[__name__])): """Lazy-loading module: installs AMR from GitHub on first access.""" def __getattr__(self, name): if name in _DATASETS: from .datasets import get return get(name, source="github") try: from . import functions return getattr(functions, name) except AttributeError: raise AttributeError( f"module 'AMR.beta' has no attribute '{name}'") sys.modules[__name__].__class__ = _BetaModule EOL # ---- functions.py: R-to-Python wrapper functions ---- # cat <<'EOL' > "$functions_file" import functools import rpy2.robjects as robjects from rpy2.robjects.packages import importr from rpy2.robjects.vectors import StrVector, FactorVector, IntVector, FloatVector, DataFrame from rpy2.robjects.conversion import localconverter from rpy2.robjects import default_converter, numpy2ri, pandas2ri import pandas as pd import numpy as np from ._engine import ensure_amr # Ensure AMR is available before importing it in R ensure_amr("cran") amr_r = importr('AMR') def convert_to_r(value): """Convert Python lists/tuples to typed R vectors. rpy2's default_converter passes Python lists to R as R lists, not as character/numeric vectors. This causes element-wise type-check functions such as is.mic(), is.sir(), and is.disk() to return a logical vector rather than a single logical, breaking R's scalar && operator. This helper converts Python lists and tuples to the appropriate R vector type based on the element types, so R always receives a proper vector.""" if isinstance(value, (list, tuple)): if len(value) == 0: return StrVector([]) # bool must be checked before int because bool is a subclass of int if all(isinstance(v, bool) for v in value): return robjects.vectors.BoolVector(value) if all(isinstance(v, int) for v in value): return IntVector(value) if all(isinstance(v, float) for v in value): return FloatVector(value) if all(isinstance(v, str) for v in value): return StrVector(value) # Mixed types: coerce all to string return StrVector([str(v) for v in value]) return value def convert_to_python(r_output): # Check if it's a StrVector (R character vector) if isinstance(r_output, StrVector): return list(r_output) # Convert to a Python list of strings # Check if it's a FactorVector (R factor) elif isinstance(r_output, FactorVector): return list(r_output) # Convert to a list of integers (factor levels) # Check if it's an IntVector or FloatVector (numeric R vectors) elif isinstance(r_output, (IntVector, FloatVector)): return list(r_output) # Convert to a Python list of integers or floats # Check if it's a pandas-compatible R data frame elif isinstance(r_output, (pd.DataFrame, DataFrame)): return r_output # Return as pandas DataFrame (already converted by pandas2ri) # Check if the input is a NumPy array and has a string data type if isinstance(r_output, np.ndarray) and np.issubdtype(r_output.dtype, np.str_): return r_output.tolist() # Convert to a regular Python list # Fall-back return r_output def r_to_python(r_func): """Decorator that converts Python list/tuple inputs to typed R vectors, runs the rpy2 function under a localconverter, and converts the output to a Python type.""" @functools.wraps(r_func) def wrapper(*args, **kwargs): args = tuple(convert_to_r(a) for a in args) kwargs = {k: convert_to_r(v) for k, v in kwargs.items()} with localconverter(default_converter + numpy2ri.converter + pandas2ri.converter): return convert_to_python(r_func(*args, **kwargs)) return wrapper EOL # ---- Generate wrapper functions from .Rd files ---- # rd_dir="../man" for rd_file in "$rd_dir"/*.Rd; do awk ' BEGIN { usage_started = 0 } # Detect the start of the \usage block /^\\usage\{/ { usage_started = 1 } # Detect the end of the \usage block usage_started && /^\}/ { usage_started = 0 } # Process lines within the \usage block that look like function calls usage_started && /^[a-zA-Z_]+/ { func_line = $0 func_line_py = $0 # Extract the function name (up to the first parenthesis) sub(/\(.*/, "", func_line) func_name = func_line func_name_py = func_name # Replace dots with underscores in Python function names gsub(/\./, "_", func_name_py) # Extract the arguments (inside the parentheses) sub(/^[^(]+\(/, "", $0) sub(/\).*/, "", $0) func_args = $0 # Count the number of arguments arg_count = split(func_args, arg_array, ",") # Handle "..." arguments (convert them to *args, **kwargs in Python) gsub("\\.\\.\\.", "*args, **kwargs", func_args) # Remove default values from arguments gsub(/ = [^,]+/, "", func_args) # If no arguments, skip the function (dont print it) if (arg_count == 0) { func_args = "*args, **kwargs" } # If more than 1 argument, replace the 2nd to nth arguments with *args, **kwargs if (arg_count > 1) { first_arg = arg_array[1] func_args = first_arg ", *args, **kwargs" } if (arg_array[1] == "...") { func_args = "*args, **kwargs" } # Skip functions where func_name_py is identical to func_args if (func_name_py == func_args) { next } # Skip functions matching the regex pattern if (func_name_py ~ /^(x |facet|scale|set|get|NA_|microorganisms|antimicrobials|clinical_breakpoints|example_isolates)/) { next } # Replace TRUE/FALSE/NULL gsub("TRUE", "True", func_args) gsub("FALSE", "False", func_args) gsub("NULL", "None", func_args) # Write the Python function definition to the output file, using decorator print "@r_to_python" >> "'"$functions_file"'" print "def " func_name_py "(" func_args "):" >> "'"$functions_file"'" print " \"\"\"Please see our website of the R package for the full manual: https://amr-for-r.org\"\"\"" >> "'"$functions_file"'" print " return amr_r." func_name_py "(" func_args ")" >> "'"$functions_file"'" print "from .functions import " func_name_py >> "'"$init_file"'" } ' "$rd_file" done echo "Python wrapper functions generated in $functions_file." echo "Python wrapper functions listed in $init_file." # ---- README ---- # cp ../vignettes/AMR_for_Python.Rmd ../PythonPackage/AMR/README.md sed -i '1,/^# Introduction$/d' ../PythonPackage/AMR/README.md echo "README copied." # ---- setup.py ---- # version=$(grep "^Version:" "$description_file" | awk '{print $2}') cat < "$setup_file" from setuptools import setup, find_packages setup( name='AMR', version='$version', packages=find_packages(), install_requires=[ 'rpy2', 'numpy', 'pandas', ], author='Matthijs Berends', author_email='m.s.berends@umcg.nl', description='A Python wrapper for the AMR R package', long_description=open('README.md').read(), long_description_content_type='text/markdown', url='https://github.com/msberends/AMR', project_urls={ 'Bug Tracker': 'https://github.com/msberends/AMR/issues', }, license='GPL 2', classifiers=[ 'Programming Language :: Python :: 3', 'Operating System :: OS Independent', ], python_requires='>=3.6', ) EOL echo "setup.py generated." # ---- Build ---- # cd ../PythonPackage/AMR pip3 install build python3 -m build