1
0
mirror of https://github.com/msberends/AMR.git synced 2025-04-19 22:33:49 +02:00
AMR/AMR/datasets.py
2025-03-28 10:41:00 +00:00

78 lines
2.4 KiB
Python

import os
import sys
import pandas as pd
import importlib.metadata as metadata
# Get the path to the virtual environment
venv_path = sys.prefix
r_lib_path = os.path.join(venv_path, "R_libs")
os.makedirs(r_lib_path, exist_ok=True)
# Set environment variable before importing rpy2
os.environ['R_LIBS_SITE'] = r_lib_path
from rpy2 import robjects
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr, isinstalled
# Import base and utils
base = importr('base')
utils = importr('utils')
base.options(warn=-1)
# Ensure library paths explicitly
base._libPaths(r_lib_path)
# Check if the AMR package is installed in R
if not isinstalled('AMR', lib_loc=r_lib_path):
print(f"AMR: Installing latest AMR R package to {r_lib_path}...", flush=True)
utils.install_packages('AMR', repos='https://msberends.r-universe.dev', quiet=True)
# # Retrieve Python AMR version
# try:
# python_amr_version = metadata.version('AMR')
# except metadata.PackageNotFoundError:
# python_amr_version = ''
#
# # Retrieve R AMR version
# r_amr_version = robjects.r(f'as.character(packageVersion("AMR", lib.loc = "{r_lib_path}"))')
# r_amr_version = str(r_amr_version[0])
#
# # Compare R and Python package versions
# if r_amr_version != python_amr_version:
# try:
# print(f"AMR: Updating AMR package in {r_lib_path}...", flush=True)
# utils.install_packages('AMR', repos='https://msberends.r-universe.dev', quiet=True)
# except Exception as e:
# print(f"AMR: Could not update: {e}", flush=True)
print(f"AMR: Setting up R environment and AMR datasets...", flush=True)
# Activate the automatic conversion between R and pandas DataFrames
pandas2ri.activate()
# example_isolates
example_isolates = pandas2ri.rpy2py(robjects.r('''
df <- AMR::example_isolates
df[] <- lapply(df, function(x) {
if (inherits(x, c("Date", "POSIXt", "factor"))) {
as.character(x)
} else {
x
}
})
df <- df[, !sapply(df, is.list)]
df
'''))
example_isolates['date'] = pd.to_datetime(example_isolates['date'])
# microorganisms
microorganisms = pandas2ri.rpy2py(robjects.r('AMR::microorganisms[, !sapply(AMR::microorganisms, is.list)]'))
antimicrobials = pandas2ri.rpy2py(robjects.r('AMR::antimicrobials[, !sapply(AMR::antimicrobials, is.list)]'))
clinical_breakpoints = pandas2ri.rpy2py(robjects.r('AMR::clinical_breakpoints[, !sapply(AMR::clinical_breakpoints, is.list)]'))
base.options(warn = 0)
print(f"AMR: Done.", flush=True)