mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 13:21:50 +02:00
fix missing R breakpoints
This commit is contained in:
Binary file not shown.
Binary file not shown.
@ -1 +1 @@
|
||||
8c1fdbe23853d30840dc5d863bc761df
|
||||
4cb5e83062897061b17ddac6d5cd31d7
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -592,18 +592,61 @@ for (i in 2:6) {
|
||||
status = "accepted",
|
||||
source = "manually added"
|
||||
) %>%
|
||||
filter(!paste(kingdom, .[[ncol(.) - 4]], rank) %in% paste(taxonomy$kingdom, taxonomy[[i + 1]], taxonomy$rank)) %>%
|
||||
filter(!paste(kingdom, .[[ncol(.) - 4]], rank) %in% paste(taxonomy$kingdom, taxonomy[[i + 1]], taxonomy$rank))# %>%
|
||||
# get GBIF identifier where available
|
||||
left_join(current_gbif %>%
|
||||
select(kingdom, all_of(i_name), rank = taxonRank, ref = scientificNameAuthorship, gbif = taxonID, gbif_parent = parentNameUsageID),
|
||||
by = c("kingdom", "rank", i_name)
|
||||
) %>%
|
||||
mutate(source = ifelse(!is.na(gbif), "GBIF", source))
|
||||
# left_join(current_gbif %>%
|
||||
# select(kingdom, all_of(i_name), rank = taxonRank, ref = scientificNameAuthorship, gbif = taxonID, gbif_parent = parentNameUsageID),
|
||||
# by = c("kingdom", "rank", i_name)
|
||||
# ) %>%
|
||||
# mutate(source = ifelse(!is.na(gbif), "GBIF", source))
|
||||
message("n = ", nrow(to_add))
|
||||
taxonomy <- taxonomy %>%
|
||||
bind_rows(to_add)
|
||||
}
|
||||
|
||||
# FIX LATER: added missings after finding out still some taxonomic levels were missing
|
||||
# this should not be needed - it was the only part that was required after last update
|
||||
# can now be removed? Check with next update!
|
||||
new_df <- AMR::microorganisms[0, ]
|
||||
for (tax in c("phylum", "class", "order", "family", "genus")) {
|
||||
print(tax)
|
||||
out <- AMR::microorganisms %>% pull(tax) %>% unique()
|
||||
missing <- vapply(FUN.VALUE = logical(1), out, function(x) length(which(AMR::microorganisms[[tax]] == x & AMR::microorganisms$rank == tax)) == 0)
|
||||
missing <- names(missing)[which(missing == TRUE & names(missing) != "" & names(missing) %unlike% "unknown")]
|
||||
out <- microorganisms %>%
|
||||
filter(.[[tax]] %in% missing) %>%
|
||||
distinct(.[[tax]], .keep_all = TRUE) %>%
|
||||
mutate_at(vars((which(colnames(.) == tax) + 1):subspecies), ~"") %>%
|
||||
mutate_at(vars(lpsn:gbif_renamed_to), ~NA_character_) %>%
|
||||
mutate(rank = tax,
|
||||
ref = NA_character_,
|
||||
status = "accepted",
|
||||
fullname = .[[tax]],
|
||||
source = "manually added",
|
||||
snomed = rep(list(character(0)), nrow(.)))
|
||||
new_df <- bind_rows(new_df, out)
|
||||
if (".[[tax]]" %in% colnames(new_df)) {
|
||||
new_df <- new_df %>% select(-`.[[tax]]`)
|
||||
}
|
||||
}
|
||||
new_df <- new_df %>%
|
||||
mutate(mo = as.character(mo))
|
||||
|
||||
new_mo <- new_df %>%
|
||||
filter(rank == "family") %>%
|
||||
mutate(
|
||||
mo_rank_new8 = abbreviate_mo(family, minlength = 8, prefix = "[FAM]_"),
|
||||
mo_rank_new9 = abbreviate_mo(family, minlength = 9, prefix = "[FAM]_"),
|
||||
mo_rank_new = mo_rank_new8,
|
||||
mo_duplicated = duplicated(mo_rank_new),
|
||||
mo_rank_new = ifelse(mo_duplicated, mo_rank_new9, mo_rank_new),
|
||||
mo_duplicated = duplicated(mo_rank_new)
|
||||
) %>%
|
||||
transmute(fullname, mo_rank_new = paste0(gsub("_.*", "_", as.character(mo)), mo_rank_new))
|
||||
any(new_mo$mo_rank_new %in% microorganisms$mo)
|
||||
new_df[which(new_df$fullname %in% new_mo$fullname), "mo"] <- new_mo$mo_rank_new
|
||||
|
||||
|
||||
# species (requires combination with genus)
|
||||
taxonomy <- taxonomy %>%
|
||||
bind_rows(taxonomy %>%
|
||||
@ -998,9 +1041,16 @@ taxonomy <- taxonomy %>%
|
||||
.before = 1
|
||||
) %>%
|
||||
select(!starts_with("mo_")) %>%
|
||||
arrange(fullname) %>%
|
||||
arrange(fullname)
|
||||
|
||||
# now check these - e.g. Nitrospira is the name of a genus AND its class
|
||||
taxonomy %>% filter(fullname %in% .[duplicated(fullname), "fullname", drop = TRUE])
|
||||
taxonomy <- taxonomy %>%
|
||||
distinct(fullname, .keep_all = TRUE)
|
||||
|
||||
# This must not exist:
|
||||
taxonomy %>% filter(mo %like% "__")
|
||||
|
||||
|
||||
# Remove unwanted taxonomic entries from Protoza/Fungi --------------------
|
||||
|
||||
@ -1027,7 +1077,7 @@ message("\nCongratulations! The new taxonomic table will contain ", format(nrow(
|
||||
# we will use Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS)
|
||||
# as a source, which copies directly from the latest US SNOMED CT version
|
||||
# - go to https://phinvads.cdc.gov/vads/ViewValueSet.action?oid=2.16.840.1.114222.4.11.1009
|
||||
# - check that current online version is higher than SNOMED_VERSION$current_version
|
||||
# - check that current online version is higher than TAXONOMY_VERSION$SNOMED
|
||||
# - if so, click on 'Download Value Set', choose 'TXT'
|
||||
snomed <- vroom("data-raw/SNOMED_PHVS_Microorganism_CDC_V12.txt", skip = 3) %>%
|
||||
select(1:2) %>%
|
||||
|
@ -37,6 +37,7 @@ library(AMR)
|
||||
|
||||
# Install the WHONET 2022 software on Windows (http://www.whonet.org/software.html),
|
||||
# and copy the folder C:\WHONET\Resources to the data-raw/WHONET/ folder
|
||||
# (for ASIARS-Net update, also copy C:\WHONET\Codes to the data-raw/WHONET/ folder)
|
||||
|
||||
# Load source data ----
|
||||
whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
|
||||
@ -134,9 +135,8 @@ breakpoints_new <- breakpoints %>%
|
||||
ab = as.ab(WHONET_ABX_CODE),
|
||||
ref_tbl = REFERENCE_TABLE,
|
||||
disk_dose = POTENCY,
|
||||
# keep disks within 6-50 mm
|
||||
breakpoint_S = if_else(method == "DISK", S %>% pmax(6) %>% pmin(50), S),
|
||||
breakpoint_R = if_else(method == "DISK", R %>% pmax(6) %>% pmin(50), R),
|
||||
breakpoint_S = S,
|
||||
breakpoint_R = R,
|
||||
uti = SITE_OF_INFECTION %like% "(UTI|urinary|urine)") %>%
|
||||
# Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
|
||||
mutate(disk_dose = disk_dose %>%
|
||||
@ -177,6 +177,9 @@ breakpoints_new <- breakpoints_new %>%
|
||||
mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "DISK" & breakpoint_S - breakpoint_R != 0,
|
||||
breakpoint_R + 1,
|
||||
breakpoint_R))
|
||||
# fix missing R breakpoint where there is an S breakpoint
|
||||
breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_S"]
|
||||
|
||||
# check again
|
||||
breakpoints_new %>% filter(guideline == "EUCAST 2022", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
|
||||
# compare with current version
|
||||
|
@ -1 +1 @@
|
||||
c7fbfa8e8b012a00c9e0de1476e28f99
|
||||
547b6b086e20bcfb918b3db6f55f84a5
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Reference in New Issue
Block a user