1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 15:21:58 +02:00

(v1.0.1.9004) Support dplyr 1.0.0

This commit is contained in:
2020-03-14 14:05:43 +01:00
parent 3760bcb11e
commit 219cff403f
62 changed files with 616 additions and 213 deletions

View File

@ -50,7 +50,7 @@ rm(data_json)
# add country data based on IP address and ipinfo.io API
unique_ip <- unique(data$ipaddress)
ip_tbl <- GET_df(unique_ip[1])
p <- progress_estimated(n = length(unique_ip) - 1, min_time = 0)
p <- AMR:::progress_estimated(n = length(unique_ip) - 1, min_time = 0)
for (i in 2:length(unique_ip)) {
p$tick()$print()
ip_tbl <- ip_tbl %>%

View File

@ -23,6 +23,7 @@
# Data retrieved from the Catalogue of Life (CoL) through the Encyclopaedia of Life:
# https://opendata.eol.org/dataset/catalogue-of-life/
# https://doi.org/10.15468/rffz4x
# (download the resource file with a name like "Catalogue of Life yyyy-mm-dd")
# and from the Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures
# https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads.html
@ -32,7 +33,8 @@ library(dplyr)
library(AMR)
# unzip and extract taxon.tab (around 1.5 GB) from the CoL archive, then:
data_col <- data.table::fread("data-raw/taxon.tab")
# data_col <- data.table::fread("data-raw/taxon.tab")
data_col <- data.table::fread("data-raw/taxa.txt", quote = "")
# read the xlsx file from DSMZ (only around 2.5 MB):
data_dsmz <- readxl::read_xlsx("data-raw/DSMZ_bactnames.xlsx")
@ -66,7 +68,7 @@ data_col <- data_col %>%
subspecies = infraspecificEpithet,
rank = taxonRank,
ref = scientificNameAuthorship,
species_id = furtherInformationURL)
species_id = references)
data_col$source <- "CoL"
# clean data_dsmz
@ -761,6 +763,7 @@ new_families <- MOs %>%
class(MOs$mo) <- "character"
MOs <- rbind(MOs %>% filter(!(rank == "family" & fullname %in% new_families)),
AMR::microorganisms %>%
select(-snomed) %>%
filter(family == "Enterobacteriaceae" & rank == "family") %>%
rbind(., ., ., ., ., ., .) %>%
mutate(fullname = new_families,
@ -794,7 +797,9 @@ colnames(MOs)
MOs %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms$fullname) %>% View()
MOs.old %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms.old$fullname) %>% View()
# and the ones we lost:
AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View()
AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View() # based on fullname
AMR::microorganisms %>% filter(!mo %in% MOs$mo) %>% View() # based on mo
AMR::microorganisms %>% filter(!mo %in% MOs$mo & !fullname %in% MOs$fullname) %>% View()
# and these IDs have changed:
old_new <- MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
@ -805,24 +810,41 @@ old_new <- MOs %>%
View(old_new)
# to keep all the old IDs:
# MOs <- MOs %>% filter(!mo %in% old_new$mo_new) %>%
# MOs <- MOs %>% filter(!mo %in% old_new$mo_new) %>%
# rbind(microorganisms %>%
# filter(mo %in% old_new$mo_old) %>%
# select(mo, fullname) %>%
# left_join(MOs %>%
# left_join(MOs %>%
# select(-mo), by = "fullname"))
# and these codes are now missing (which will throw a unit test error):
AMR::microorganisms.codes %>% filter(!mo %in% MOs$mo)
AMR::rsi_translation %>% filter(!mo %in% MOs$mo)
AMR::microorganisms.translation %>% filter(!mo_new %in% MOs$mo)
AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo) %>% View()
# this is how to fix it
microorganisms.codes <- AMR::microorganisms.codes %>%
left_join(MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
left_join(AMR::microorganisms %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
select(mo, kingdom_fullname), by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
left_join(AMR::microorganisms %>%
transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
select(mo_old, mo_new),
by = c("mo" = "mo_old")) %>%
select(code, mo = mo_new) %>%
filter(!is.na(mo))
microorganisms.codes %>% filter(!mo %in% MOs$mo)
# and for microorganisms.translation:
microorganisms.translation <- AMR:::microorganisms.translation %>%
select(mo = mo_new) %>%
left_join(AMR::microorganisms %>%
transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
select(mo_old, mo_new)
left_join(MOs %>%
mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
left_join(AMR::microorganisms %>%
transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
select(mo_old, mo_new),
by = c("mo" = "mo_old")) %>%
select(code, mo = mo_new) %>%