1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 13:21:50 +02:00

fix missing R breakpoints

This commit is contained in:
2022-10-29 14:15:23 +02:00
parent c2801ba7a1
commit 6ad7857d39
34 changed files with 959 additions and 865 deletions

Binary file not shown.

Binary file not shown.

View File

@ -1 +1 @@
8c1fdbe23853d30840dc5d863bc761df
4cb5e83062897061b17ddac6d5cd31d7

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -592,18 +592,61 @@ for (i in 2:6) {
status = "accepted",
source = "manually added"
) %>%
filter(!paste(kingdom, .[[ncol(.) - 4]], rank) %in% paste(taxonomy$kingdom, taxonomy[[i + 1]], taxonomy$rank)) %>%
filter(!paste(kingdom, .[[ncol(.) - 4]], rank) %in% paste(taxonomy$kingdom, taxonomy[[i + 1]], taxonomy$rank))# %>%
# get GBIF identifier where available
left_join(current_gbif %>%
select(kingdom, all_of(i_name), rank = taxonRank, ref = scientificNameAuthorship, gbif = taxonID, gbif_parent = parentNameUsageID),
by = c("kingdom", "rank", i_name)
) %>%
mutate(source = ifelse(!is.na(gbif), "GBIF", source))
# left_join(current_gbif %>%
# select(kingdom, all_of(i_name), rank = taxonRank, ref = scientificNameAuthorship, gbif = taxonID, gbif_parent = parentNameUsageID),
# by = c("kingdom", "rank", i_name)
# ) %>%
# mutate(source = ifelse(!is.na(gbif), "GBIF", source))
message("n = ", nrow(to_add))
taxonomy <- taxonomy %>%
bind_rows(to_add)
}
# FIX LATER: added missings after finding out still some taxonomic levels were missing
# this should not be needed - it was the only part that was required after last update
# can now be removed? Check with next update!
new_df <- AMR::microorganisms[0, ]
for (tax in c("phylum", "class", "order", "family", "genus")) {
print(tax)
out <- AMR::microorganisms %>% pull(tax) %>% unique()
missing <- vapply(FUN.VALUE = logical(1), out, function(x) length(which(AMR::microorganisms[[tax]] == x & AMR::microorganisms$rank == tax)) == 0)
missing <- names(missing)[which(missing == TRUE & names(missing) != "" & names(missing) %unlike% "unknown")]
out <- microorganisms %>%
filter(.[[tax]] %in% missing) %>%
distinct(.[[tax]], .keep_all = TRUE) %>%
mutate_at(vars((which(colnames(.) == tax) + 1):subspecies), ~"") %>%
mutate_at(vars(lpsn:gbif_renamed_to), ~NA_character_) %>%
mutate(rank = tax,
ref = NA_character_,
status = "accepted",
fullname = .[[tax]],
source = "manually added",
snomed = rep(list(character(0)), nrow(.)))
new_df <- bind_rows(new_df, out)
if (".[[tax]]" %in% colnames(new_df)) {
new_df <- new_df %>% select(-`.[[tax]]`)
}
}
new_df <- new_df %>%
mutate(mo = as.character(mo))
new_mo <- new_df %>%
filter(rank == "family") %>%
mutate(
mo_rank_new8 = abbreviate_mo(family, minlength = 8, prefix = "[FAM]_"),
mo_rank_new9 = abbreviate_mo(family, minlength = 9, prefix = "[FAM]_"),
mo_rank_new = mo_rank_new8,
mo_duplicated = duplicated(mo_rank_new),
mo_rank_new = ifelse(mo_duplicated, mo_rank_new9, mo_rank_new),
mo_duplicated = duplicated(mo_rank_new)
) %>%
transmute(fullname, mo_rank_new = paste0(gsub("_.*", "_", as.character(mo)), mo_rank_new))
any(new_mo$mo_rank_new %in% microorganisms$mo)
new_df[which(new_df$fullname %in% new_mo$fullname), "mo"] <- new_mo$mo_rank_new
# species (requires combination with genus)
taxonomy <- taxonomy %>%
bind_rows(taxonomy %>%
@ -998,9 +1041,16 @@ taxonomy <- taxonomy %>%
.before = 1
) %>%
select(!starts_with("mo_")) %>%
arrange(fullname) %>%
arrange(fullname)
# now check these - e.g. Nitrospira is the name of a genus AND its class
taxonomy %>% filter(fullname %in% .[duplicated(fullname), "fullname", drop = TRUE])
taxonomy <- taxonomy %>%
distinct(fullname, .keep_all = TRUE)
# This must not exist:
taxonomy %>% filter(mo %like% "__")
# Remove unwanted taxonomic entries from Protoza/Fungi --------------------
@ -1027,7 +1077,7 @@ message("\nCongratulations! The new taxonomic table will contain ", format(nrow(
# we will use Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS)
# as a source, which copies directly from the latest US SNOMED CT version
# - go to https://phinvads.cdc.gov/vads/ViewValueSet.action?oid=2.16.840.1.114222.4.11.1009
# - check that current online version is higher than SNOMED_VERSION$current_version
# - check that current online version is higher than TAXONOMY_VERSION$SNOMED
# - if so, click on 'Download Value Set', choose 'TXT'
snomed <- vroom("data-raw/SNOMED_PHVS_Microorganism_CDC_V12.txt", skip = 3) %>%
select(1:2) %>%

View File

@ -37,6 +37,7 @@ library(AMR)
# Install the WHONET 2022 software on Windows (http://www.whonet.org/software.html),
# and copy the folder C:\WHONET\Resources to the data-raw/WHONET/ folder
# (for ASIARS-Net update, also copy C:\WHONET\Codes to the data-raw/WHONET/ folder)
# Load source data ----
whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
@ -134,9 +135,8 @@ breakpoints_new <- breakpoints %>%
ab = as.ab(WHONET_ABX_CODE),
ref_tbl = REFERENCE_TABLE,
disk_dose = POTENCY,
# keep disks within 6-50 mm
breakpoint_S = if_else(method == "DISK", S %>% pmax(6) %>% pmin(50), S),
breakpoint_R = if_else(method == "DISK", R %>% pmax(6) %>% pmin(50), R),
breakpoint_S = S,
breakpoint_R = R,
uti = SITE_OF_INFECTION %like% "(UTI|urinary|urine)") %>%
# Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
mutate(disk_dose = disk_dose %>%
@ -177,6 +177,9 @@ breakpoints_new <- breakpoints_new %>%
mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "DISK" & breakpoint_S - breakpoint_R != 0,
breakpoint_R + 1,
breakpoint_R))
# fix missing R breakpoint where there is an S breakpoint
breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_S"]
# check again
breakpoints_new %>% filter(guideline == "EUCAST 2022", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
# compare with current version

View File

@ -1 +1 @@
c7fbfa8e8b012a00c9e0de1476e28f99
547b6b086e20bcfb918b3db6f55f84a5

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.