mirror of
https://github.com/msberends/AMR.git
synced 2026-06-29 11:36:20 +02:00
474 lines
16 KiB
Bash
474 lines
16 KiB
Bash
#!/bin/bash
|
|
|
|
# ==================================================================== #
|
|
# TITLE: #
|
|
# AMR: An R Package for Working with Antimicrobial Resistance Data #
|
|
# #
|
|
# SOURCE CODE: #
|
|
# https://github.com/msberends/AMR #
|
|
# #
|
|
# PLEASE CITE THIS SOFTWARE AS: #
|
|
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
|
|
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
|
|
# Journal of Statistical Software, 104(3), 1-31. #
|
|
# https://doi.org/10.18637/jss.v104.i03 #
|
|
# #
|
|
# Developed at the University of Groningen and the University Medical #
|
|
# Center Groningen in The Netherlands, in collaboration with many #
|
|
# colleagues from around the world, see our website. #
|
|
# #
|
|
# This R package is free software; you can freely use and distribute #
|
|
# it for both personal and commercial purposes under the terms of the #
|
|
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
|
# the Free Software Foundation. #
|
|
# We created this package for both routine data analysis and academic #
|
|
# research and it was publicly released in the hope that it will be #
|
|
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
|
# #
|
|
# Visit our website for the full manual and a complete tutorial about #
|
|
# how to conduct AMR data analysis: https://amr-for-r.org #
|
|
# ==================================================================== #
|
|
|
|
# Clean up
|
|
rm -rf ../PythonPackage/AMR/*
|
|
mkdir -p ../PythonPackage/AMR/AMR
|
|
|
|
# Output files
|
|
setup_file="../PythonPackage/AMR/setup.py"
|
|
init_file="../PythonPackage/AMR/AMR/__init__.py"
|
|
engine_file="../PythonPackage/AMR/AMR/_engine.py"
|
|
datasets_file="../PythonPackage/AMR/AMR/datasets.py"
|
|
functions_file="../PythonPackage/AMR/AMR/functions.py"
|
|
beta_file="../PythonPackage/AMR/AMR/beta.py"
|
|
description_file="../DESCRIPTION"
|
|
|
|
# ---- _engine.py: R environment setup and installation logic ---- #
|
|
|
|
cat <<'EOL' > "$engine_file"
|
|
import os
|
|
import sys
|
|
import importlib.metadata as metadata
|
|
|
|
# Get the path to the virtual environment
|
|
venv_path = sys.prefix
|
|
r_lib_path = os.path.join(venv_path, "R_libs")
|
|
os.makedirs(r_lib_path, exist_ok=True)
|
|
|
|
# Set environment variable before importing rpy2
|
|
os.environ['R_LIBS_SITE'] = r_lib_path
|
|
|
|
from rpy2 import robjects
|
|
from rpy2.robjects.vectors import StrVector
|
|
from rpy2.robjects.packages import importr, isinstalled
|
|
|
|
# Import base and utils once
|
|
base = importr('base')
|
|
utils = importr('utils')
|
|
|
|
# Silence R console output entirely
|
|
robjects.r('suppressMessages(suppressWarnings(sink(tempfile())))')
|
|
base._libPaths(r_lib_path)
|
|
|
|
_installed_source = None
|
|
|
|
def _r_version():
|
|
"""Return the currently installed AMR R package version, or None."""
|
|
try:
|
|
return str(robjects.r(
|
|
f'as.character(packageVersion("AMR", lib.loc = "{r_lib_path}"))')[0])
|
|
except Exception:
|
|
return None
|
|
|
|
def _py_version():
|
|
"""Return the Python AMR package version from metadata, or empty string."""
|
|
try:
|
|
return str(metadata.version('AMR'))
|
|
except metadata.PackageNotFoundError:
|
|
return ''
|
|
|
|
def _install_cran():
|
|
"""Install AMR from CRAN into the isolated library."""
|
|
print("AMR: Installing from CRAN...", flush=True)
|
|
utils.install_packages(
|
|
'AMR',
|
|
repos='https://cloud.r-project.org',
|
|
lib=r_lib_path,
|
|
quiet=True
|
|
)
|
|
|
|
def _install_github():
|
|
"""Install AMR development version from GitHub into the isolated library."""
|
|
print("AMR: Installing development version from GitHub...", flush=True)
|
|
utils.install_packages(
|
|
StrVector(['remotes', 'desc']),
|
|
repos='https://cloud.r-project.org',
|
|
lib=r_lib_path,
|
|
quiet=True
|
|
)
|
|
remotes = importr('remotes', lib_loc=r_lib_path)
|
|
remotes.install_github('msberends/AMR', lib=r_lib_path, quiet=True)
|
|
|
|
def ensure_amr(source="cran"):
|
|
"""Ensure AMR is installed from the requested source. Idempotent per source."""
|
|
global _installed_source
|
|
|
|
if _installed_source == source:
|
|
return
|
|
|
|
install_fn = _install_github if source == "github" else _install_cran
|
|
|
|
if not isinstalled('AMR', lib_loc=r_lib_path):
|
|
install_fn()
|
|
else:
|
|
# Check for version mismatch and update if needed
|
|
r_ver = _r_version()
|
|
py_ver = _py_version()
|
|
if r_ver != py_ver:
|
|
try:
|
|
install_fn()
|
|
except Exception as e:
|
|
print(f"AMR: Could not update ({e})", flush=True)
|
|
|
|
print(f"AMR: R package version {_r_version()} ready.", flush=True)
|
|
_installed_source = source
|
|
|
|
def restore_sink():
|
|
"""Restore R console output after setup is complete."""
|
|
try:
|
|
robjects.r('sink()')
|
|
except Exception:
|
|
pass
|
|
EOL
|
|
|
|
# ---- datasets.py: only dataset loading ---- #
|
|
|
|
cat <<'EOL' > "$datasets_file"
|
|
import pandas as pd
|
|
from rpy2 import robjects
|
|
from rpy2.robjects.conversion import localconverter
|
|
from rpy2.robjects import default_converter, numpy2ri, pandas2ri
|
|
|
|
from ._engine import ensure_amr, restore_sink
|
|
|
|
_cache = {}
|
|
_loaded_source = None
|
|
|
|
def _load_datasets(source="cran"):
|
|
"""Load all AMR datasets into the module cache."""
|
|
global _loaded_source
|
|
|
|
if _cache and _loaded_source == source:
|
|
return
|
|
|
|
if _cache and _loaded_source != source:
|
|
_cache.clear()
|
|
|
|
ensure_amr(source)
|
|
|
|
with localconverter(default_converter + numpy2ri.converter + pandas2ri.converter):
|
|
_cache['example_isolates'] = _load_example_isolates()
|
|
_cache['microorganisms'] = robjects.r(
|
|
'AMR::microorganisms[, !sapply(AMR::microorganisms, is.list)]')
|
|
_cache['antimicrobials'] = robjects.r(
|
|
'AMR::antimicrobials[, !sapply(AMR::antimicrobials, is.list)]')
|
|
_cache['clinical_breakpoints'] = robjects.r(
|
|
'AMR::clinical_breakpoints[, !sapply(AMR::clinical_breakpoints, is.list)]')
|
|
|
|
restore_sink()
|
|
_loaded_source = source
|
|
|
|
def _load_example_isolates():
|
|
df = robjects.r('''
|
|
df <- AMR::example_isolates
|
|
df[] <- lapply(df, function(x) {
|
|
if (inherits(x, c("Date", "POSIXt", "factor"))) {
|
|
as.character(x)
|
|
} else {
|
|
x
|
|
}
|
|
})
|
|
df <- df[, !sapply(df, is.list)]
|
|
df
|
|
''')
|
|
df['date'] = pd.to_datetime(df['date'])
|
|
return df
|
|
|
|
def get(name, source="cran"):
|
|
"""Retrieve a dataset by name, installing AMR if needed."""
|
|
_load_datasets(source)
|
|
return _cache[name]
|
|
EOL
|
|
|
|
# ---- __init__.py: lazy module, CRAN by default ---- #
|
|
|
|
cat <<'EOL' > "$init_file"
|
|
import sys
|
|
|
|
_DATASETS = frozenset({
|
|
'example_isolates', 'microorganisms',
|
|
'antimicrobials', 'clinical_breakpoints'
|
|
})
|
|
|
|
class _AMRModule(type(sys.modules[__name__])):
|
|
"""Lazy-loading module: nothing runs until an attribute is accessed."""
|
|
|
|
def __getattr__(self, name):
|
|
if name in _DATASETS:
|
|
from .datasets import get
|
|
return get(name, source="cran")
|
|
try:
|
|
from . import functions
|
|
return getattr(functions, name)
|
|
except AttributeError:
|
|
raise AttributeError(
|
|
f"module 'AMR' has no attribute '{name}'")
|
|
|
|
sys.modules[__name__].__class__ = _AMRModule
|
|
EOL
|
|
|
|
# ---- beta.py: GitHub development version ---- #
|
|
|
|
cat <<'EOL' > "$beta_file"
|
|
import sys
|
|
|
|
_DATASETS = frozenset({
|
|
'example_isolates', 'microorganisms',
|
|
'antimicrobials', 'clinical_breakpoints'
|
|
})
|
|
|
|
class _BetaModule(type(sys.modules[__name__])):
|
|
"""Lazy-loading module: installs AMR from GitHub on first access."""
|
|
|
|
def __getattr__(self, name):
|
|
if name in _DATASETS:
|
|
from .datasets import get
|
|
return get(name, source="github")
|
|
try:
|
|
from . import functions
|
|
return getattr(functions, name)
|
|
except AttributeError:
|
|
raise AttributeError(
|
|
f"module 'AMR.beta' has no attribute '{name}'")
|
|
|
|
sys.modules[__name__].__class__ = _BetaModule
|
|
EOL
|
|
|
|
# ---- functions.py: R-to-Python wrapper functions ---- #
|
|
|
|
cat <<'EOL' > "$functions_file"
|
|
import functools
|
|
import rpy2.robjects as robjects
|
|
from rpy2.robjects.packages import importr
|
|
from rpy2.robjects.vectors import StrVector, FactorVector, IntVector, FloatVector, DataFrame
|
|
from rpy2.robjects.conversion import localconverter
|
|
from rpy2.robjects import default_converter, numpy2ri, pandas2ri
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
from ._engine import ensure_amr
|
|
|
|
# Ensure AMR is available before importing it in R
|
|
ensure_amr("cran")
|
|
amr_r = importr('AMR')
|
|
|
|
def convert_to_r(value):
|
|
"""Convert Python lists/tuples to typed R vectors.
|
|
|
|
rpy2's default_converter passes Python lists to R as R lists, not as
|
|
character/numeric vectors. This causes element-wise type-check functions
|
|
such as is.mic(), is.sir(), and is.disk() to return a logical vector
|
|
rather than a single logical, breaking R's scalar && operator.
|
|
|
|
This helper converts Python lists and tuples to the appropriate R vector
|
|
type based on the element types, so R always receives a proper vector."""
|
|
if isinstance(value, (list, tuple)):
|
|
if len(value) == 0:
|
|
return StrVector([])
|
|
# bool must be checked before int because bool is a subclass of int
|
|
if all(isinstance(v, bool) for v in value):
|
|
return robjects.vectors.BoolVector(value)
|
|
if all(isinstance(v, int) for v in value):
|
|
return IntVector(value)
|
|
if all(isinstance(v, float) for v in value):
|
|
return FloatVector(value)
|
|
if all(isinstance(v, str) for v in value):
|
|
return StrVector(value)
|
|
# Mixed types: coerce all to string
|
|
return StrVector([str(v) for v in value])
|
|
return value
|
|
|
|
def convert_to_python(r_output):
|
|
# Check if it's a StrVector (R character vector)
|
|
if isinstance(r_output, StrVector):
|
|
return list(r_output) # Convert to a Python list of strings
|
|
|
|
# Check if it's a FactorVector (R factor)
|
|
elif isinstance(r_output, FactorVector):
|
|
return list(r_output) # Convert to a list of integers (factor levels)
|
|
|
|
# Check if it's an IntVector or FloatVector (numeric R vectors)
|
|
elif isinstance(r_output, (IntVector, FloatVector)):
|
|
return list(r_output) # Convert to a Python list of integers or floats
|
|
|
|
# Check if it's a pandas-compatible R data frame
|
|
elif isinstance(r_output, (pd.DataFrame, DataFrame)):
|
|
return r_output # Return as pandas DataFrame (already converted by pandas2ri)
|
|
|
|
# Check if the input is a NumPy array and has a string data type
|
|
if isinstance(r_output, np.ndarray) and np.issubdtype(r_output.dtype, np.str_):
|
|
return r_output.tolist() # Convert to a regular Python list
|
|
|
|
# Fall-back
|
|
return r_output
|
|
|
|
def r_to_python(r_func):
|
|
"""Decorator that converts Python list/tuple inputs to typed R vectors,
|
|
runs the rpy2 function under a localconverter, and converts the output
|
|
to a Python type."""
|
|
@functools.wraps(r_func)
|
|
def wrapper(*args, **kwargs):
|
|
args = tuple(convert_to_r(a) for a in args)
|
|
kwargs = {k: convert_to_r(v) for k, v in kwargs.items()}
|
|
with localconverter(default_converter + numpy2ri.converter + pandas2ri.converter):
|
|
return convert_to_python(r_func(*args, **kwargs))
|
|
return wrapper
|
|
EOL
|
|
|
|
# ---- Generate wrapper functions from .Rd files ---- #
|
|
|
|
rd_dir="../man"
|
|
|
|
for rd_file in "$rd_dir"/*.Rd; do
|
|
awk '
|
|
BEGIN {
|
|
usage_started = 0
|
|
}
|
|
|
|
# Detect the start of the \usage block
|
|
/^\\usage\{/ {
|
|
usage_started = 1
|
|
}
|
|
|
|
# Detect the end of the \usage block
|
|
usage_started && /^\}/ {
|
|
usage_started = 0
|
|
}
|
|
|
|
# Process lines within the \usage block that look like function calls
|
|
usage_started && /^[a-zA-Z_]+/ {
|
|
func_line = $0
|
|
func_line_py = $0
|
|
|
|
# Extract the function name (up to the first parenthesis)
|
|
sub(/\(.*/, "", func_line)
|
|
func_name = func_line
|
|
func_name_py = func_name
|
|
|
|
# Replace dots with underscores in Python function names
|
|
gsub(/\./, "_", func_name_py)
|
|
|
|
# Extract the arguments (inside the parentheses)
|
|
sub(/^[^(]+\(/, "", $0)
|
|
sub(/\).*/, "", $0)
|
|
func_args = $0
|
|
|
|
# Count the number of arguments
|
|
arg_count = split(func_args, arg_array, ",")
|
|
|
|
# Handle "..." arguments (convert them to *args, **kwargs in Python)
|
|
gsub("\\.\\.\\.", "*args, **kwargs", func_args)
|
|
|
|
# Remove default values from arguments
|
|
gsub(/ = [^,]+/, "", func_args)
|
|
|
|
# If no arguments, skip the function (dont print it)
|
|
if (arg_count == 0) {
|
|
func_args = "*args, **kwargs"
|
|
}
|
|
|
|
# If more than 1 argument, replace the 2nd to nth arguments with *args, **kwargs
|
|
if (arg_count > 1) {
|
|
first_arg = arg_array[1]
|
|
func_args = first_arg ", *args, **kwargs"
|
|
}
|
|
if (arg_array[1] == "...") {
|
|
func_args = "*args, **kwargs"
|
|
}
|
|
|
|
# Skip functions where func_name_py is identical to func_args
|
|
if (func_name_py == func_args) {
|
|
next
|
|
}
|
|
|
|
# Skip functions matching the regex pattern
|
|
if (func_name_py ~ /^(x |facet|scale|set|get|NA_|microorganisms|antimicrobials|clinical_breakpoints|example_isolates)/) {
|
|
next
|
|
}
|
|
|
|
# Replace TRUE/FALSE/NULL
|
|
gsub("TRUE", "True", func_args)
|
|
gsub("FALSE", "False", func_args)
|
|
gsub("NULL", "None", func_args)
|
|
|
|
# Write the Python function definition to the output file, using decorator
|
|
print "@r_to_python" >> "'"$functions_file"'"
|
|
print "def " func_name_py "(" func_args "):" >> "'"$functions_file"'"
|
|
print " \"\"\"Please see our website of the R package for the full manual: https://amr-for-r.org\"\"\"" >> "'"$functions_file"'"
|
|
print " return amr_r." func_name_py "(" func_args ")" >> "'"$functions_file"'"
|
|
|
|
print "from .functions import " func_name_py >> "'"$init_file"'"
|
|
}
|
|
' "$rd_file"
|
|
done
|
|
|
|
echo "Python wrapper functions generated in $functions_file."
|
|
echo "Python wrapper functions listed in $init_file."
|
|
|
|
# ---- README ---- #
|
|
|
|
cp ../vignettes/AMR_for_Python.Rmd ../PythonPackage/AMR/README.md
|
|
sed -i '1,/^# Introduction$/d' ../PythonPackage/AMR/README.md
|
|
echo "README copied."
|
|
|
|
# ---- setup.py ---- #
|
|
|
|
version=$(grep "^Version:" "$description_file" | awk '{print $2}')
|
|
|
|
cat <<EOL > "$setup_file"
|
|
from setuptools import setup, find_packages
|
|
|
|
setup(
|
|
name='AMR',
|
|
version='$version',
|
|
packages=find_packages(),
|
|
install_requires=[
|
|
'rpy2',
|
|
'numpy',
|
|
'pandas',
|
|
],
|
|
author='Matthijs Berends',
|
|
author_email='m.s.berends@umcg.nl',
|
|
description='A Python wrapper for the AMR R package',
|
|
long_description=open('README.md').read(),
|
|
long_description_content_type='text/markdown',
|
|
url='https://github.com/msberends/AMR',
|
|
project_urls={
|
|
'Bug Tracker': 'https://github.com/msberends/AMR/issues',
|
|
},
|
|
license='GPL 2',
|
|
classifiers=[
|
|
'Programming Language :: Python :: 3',
|
|
'Operating System :: OS Independent',
|
|
],
|
|
python_requires='>=3.6',
|
|
)
|
|
EOL
|
|
|
|
echo "setup.py generated."
|
|
|
|
# ---- Build ---- #
|
|
|
|
cd ../PythonPackage/AMR
|
|
pip3 install build
|
|
python3 -m build
|