diff --git a/DESCRIPTION b/DESCRIPTION index 3545cce0..9d727576 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.5.0.9019 -Date: 2021-02-17 +Version: 1.5.0.9020 +Date: 2021-02-18 Title: Antimicrobial Resistance Data Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 9a3278f5..402fb27d 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ -# AMR 1.5.0.9019 -## Last updated: 17 February 2021 +# AMR 1.5.0.9020 +## Last updated: 18 February 2021 ### New * Support for EUCAST Clinical Breakpoints v11.0 (2021), effective in the `eucast_rules()` function and in `as.rsi()` to interpret MIC and disk diffusion values. This is now the default guideline in this package. @@ -57,6 +57,7 @@ * Updated colours of values R, S and I in tibble printing * Functions `print()` and `summary()` on a Principal Components Analysis object (`pca()`) now print additional group info if the original data was grouped using `dplyr::group_by()` * Improved speed and reliability of `guess_ab_col()`. As this also internally improves the reliability of `first_isolate()` and `mdro()`, this might have a slight impact on the results of those functions. +* Fix for `mo_name()` when used in other languages than English ### Other * Big documentation updates diff --git a/R/mo.R b/R/mo.R index 6ca15e83..62076bdf 100755 --- a/R/mo.R +++ b/R/mo.R @@ -451,528 +451,167 @@ exec_as.mo <- function(x, x_backup_untouched <- x x <- strip_whitespace(x, dyslexia_mode) + # translate 'unknown' names back to English + if (any(x %like% "unbekannt|onbekend|desconocid|sconosciut|iconnu|desconhecid", na.rm = TRUE)) { + trns <- subset(translations_file, pattern %like% "unknown" | affect_mo_name == TRUE) + lapply(seq_len(nrow(trns)), + function(i) x <<- gsub(pattern = trns$replacement[i], + replacement = trns$pattern[i], + x = x, + ignore.case = TRUE, + perl = TRUE)) + } + x_backup <- x # from here on case-insensitive x <- tolower(x) - - x_backup[grepl("^(fungus|fungi)$", x)] <- "F_FUNGUS" # will otherwise become the kingdom - - # remove spp and species - x <- gsub(" +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)", " ", x, perl = TRUE) - x <- gsub("(spp.?|subsp.?|subspecies|biovar|serovar|species)", "", x, perl = TRUE) - x <- gsub("^([a-z]{2,4})(spe.?)$", "\\1", x, perl = TRUE) # when ending in SPE instead of SPP and preceded by 2-4 characters - x <- strip_whitespace(x, dyslexia_mode) - - x_backup_without_spp <- x - x_species <- paste(x, "species") - # translate to English for supported languages of mo_property - x <- gsub("(gruppe|groep|grupo|gruppo|groupe)", "group", x, perl = TRUE) - # no groups and complexes as ending - x <- gsub("(complex|group)$", "", x, perl = TRUE) - x <- gsub("((an)?aero+b)[a-z]*", "", x, perl = TRUE) - x <- gsub("^atyp[a-z]*", "", x, perl = TRUE) - x <- gsub("(vergroen)[a-z]*", "viridans", x, perl = TRUE) - x <- gsub("[a-z]*diff?erent[a-z]*", "", x, perl = TRUE) - x <- gsub("(hefe|gist|gisten|levadura|lievito|fermento|levure)[a-z]*", "yeast", x, perl = TRUE) - x <- gsub("(schimmels?|mofo|molde|stampo|moisissure|fungi)[a-z]*", "fungus", x, perl = TRUE) - x <- gsub("fungus[ph|f]rya", "fungiphrya", x, perl = TRUE) - # no contamination - x <- gsub("(contamination|kontamination|mengflora|contaminaci.n|contamina..o)", "", x, perl = TRUE) - # remove non-text in case of "E. coli" except dots and spaces - x <- trimws(gsub("[^.a-zA-Z0-9/ \\-]+", " ", x, perl = TRUE)) - # but make sure that dots are followed by a space - x <- gsub("[.] ?", ". ", x, perl = TRUE) - # replace minus by a space - x <- gsub("-+", " ", x, perl = TRUE) - # replace hemolytic by haemolytic - x <- gsub("ha?emoly", "haemoly", x, perl = TRUE) - # place minus back in streptococci - x <- gsub("(alpha|beta|gamma).?ha?emoly", "\\1-haemoly", x, perl = TRUE) - # remove genus as first word - x <- gsub("^genus ", "", x, perl = TRUE) - # remove 'uncertain'-like texts - x <- trimws(gsub("(uncertain|susp[ie]c[a-z]+|verdacht)", "", x, perl = TRUE)) - # allow characters that resemble others = dyslexia_mode ---- - if (dyslexia_mode == TRUE) { - x <- tolower(x) - x <- gsub("[iy]+", "[iy]+", x, perl = TRUE) - x <- gsub("(c|k|q|qu|s|z|x|ks)+", "(c|k|q|qu|s|z|x|ks)+", x, perl = TRUE) - x <- gsub("(ph|hp|f|v)+", "(ph|hp|f|v)+", x, perl = TRUE) - x <- gsub("(th|ht|t)+", "(th|ht|t)+", x, perl = TRUE) - x <- gsub("a+", "a+", x, perl = TRUE) - x <- gsub("u+", "u+", x, perl = TRUE) - # allow any ending of -um, -us, -ium, -icum, -ius, -icus, -ica, -ia and -a (needs perl for the negative backward lookup): - x <- gsub("(u\\+\\(c\\|k\\|q\\|qu\\+\\|s\\|z\\|x\\|ks\\)\\+)(?![a-z])", - "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE) - x <- gsub("(\\[iy\\]\\+\\(c\\|k\\|q\\|qu\\+\\|s\\|z\\|x\\|ks\\)\\+a\\+)(?![a-z])", - "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE) - x <- gsub("(\\[iy\\]\\+u\\+m)(?![a-z])", - "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE) - x <- gsub("(\\[iy\\]\\+a\\+)(?![a-z])", - "([iy]*a+|[iy]+a*)", x, perl = TRUE) - x <- gsub("e+", "e+", x, perl = TRUE) - x <- gsub("o+", "o+", x, perl = TRUE) - x <- gsub("(.)\\1+", "\\1+", x, perl = TRUE) - # allow multiplication of all other consonants - x <- gsub("([bdgjlnrw]+)", "\\1+", x, perl = TRUE) - # allow ending in -en or -us - x <- gsub("e\\+n(?![a-z[])", "(e+n|u+(c|k|q|qu|s|z|x|ks)+)", x, perl = TRUE) - # if the input is longer than 10 characters, allow any forgotten consonant between all characters, as some might just have forgotten one... - # this will allow "Pasteurella damatis" to be correctly read as "Pasteurella dagmatis". - consonants <- paste(letters[!letters %in% c("a", "e", "i", "o", "u")], collapse = "") - x[nchar(x_backup_without_spp) > 10] <- gsub("[+]", paste0("+[", consonants, "]?"), x[nchar(x_backup_without_spp) > 10]) - # allow au and ou after all these regex implementations - x <- gsub("a+[bcdfghjklmnpqrstvwxyz]?u+[bcdfghjklmnpqrstvwxyz]?", "(a+u+|o+u+)[bcdfghjklmnpqrstvwxyz]?", x, fixed = TRUE) - x <- gsub("o+[bcdfghjklmnpqrstvwxyz]?u+[bcdfghjklmnpqrstvwxyz]?", "(a+u+|o+u+)[bcdfghjklmnpqrstvwxyz]?", x, fixed = TRUE) - } - x <- strip_whitespace(x, dyslexia_mode) - # make sure to remove regex overkill (will lead to errors) - x <- gsub("++", "+", x, fixed = TRUE) - x <- gsub("?+", "?", x, fixed = TRUE) - - x_trimmed <- x - x_trimmed_species <- paste(x_trimmed, "species") - x_trimmed_without_group <- gsub(" gro.u.p$", "", x_trimmed, perl = TRUE) - # remove last part from "-" or "/" - x_trimmed_without_group <- gsub("(.*)[-/].*", "\\1", x_trimmed_without_group) - # replace space and dot by regex sign - x_withspaces <- gsub("[ .]+", ".* ", x, perl = TRUE) - x <- gsub("[ .]+", ".*", x, perl = TRUE) - # add start en stop regex - x <- paste0("^", x, "$") - - x_withspaces_start_only <- paste0("^", x_withspaces) - x_withspaces_end_only <- paste0(x_withspaces, "$") - x_withspaces_start_end <- paste0("^", x_withspaces, "$") - - if (isTRUE(debug)) { - cat(paste0(font_blue("x"), ' "', x, '"\n')) - cat(paste0(font_blue("x_species"), ' "', x_species, '"\n')) - cat(paste0(font_blue("x_withspaces_start_only"), ' "', x_withspaces_start_only, '"\n')) - cat(paste0(font_blue("x_withspaces_end_only"), ' "', x_withspaces_end_only, '"\n')) - cat(paste0(font_blue("x_withspaces_start_end"), ' "', x_withspaces_start_end, '"\n')) - cat(paste0(font_blue("x_backup"), ' "', x_backup, '"\n')) - cat(paste0(font_blue("x_backup_without_spp"), ' "', x_backup_without_spp, '"\n')) - cat(paste0(font_blue("x_trimmed"), ' "', x_trimmed, '"\n')) - cat(paste0(font_blue("x_trimmed_species"), ' "', x_trimmed_species, '"\n')) - cat(paste0(font_blue("x_trimmed_without_group"), ' "', x_trimmed_without_group, '"\n')) - } - - if (initial_search == TRUE) { - progress <- progress_ticker(n = length(x), n_min = 25) # start if n >= 25 - on.exit(close(progress)) - } - - for (i in seq_len(length(x))) { - + + x_backup[x %like_case% "^(fungus|fungi)$"] <- "(unknown fungus)" # will otherwise become the kingdom + x_backup[x_backup_untouched == "Fungi"] <- "Fungi" # is literally the kingdom + + # Fill in fullnames and MO codes at once + known_names <- x_backup %in% MO_lookup$fullname + x[known_names] <- MO_lookup[match(x_backup[known_names], MO_lookup$fullname), property, drop = TRUE] + known_codes <- x_backup %in% MO_lookup$mo + x[known_codes] <- MO_lookup[match(x_backup[known_codes], MO_lookup$mo), property, drop = TRUE] + already_known <- known_names | known_codes + + # now only continue where the right taxonomic output is not already known + if (any(!already_known)) { + x_known <- x[already_known] + + # remove spp and species + x <- gsub(" +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)", " ", x, perl = TRUE) + x <- gsub("(spp.?|subsp.?|subspecies|biovar|serovar|species)", "", x, perl = TRUE) + x <- gsub("^([a-z]{2,4})(spe.?)$", "\\1", x, perl = TRUE) # when ending in SPE instead of SPP and preceded by 2-4 characters + x <- strip_whitespace(x, dyslexia_mode) + + x_backup_without_spp <- x + x_species <- paste(x, "species") + # translate to English for supported languages of mo_property + x <- gsub("(gruppe|groep|grupo|gruppo|groupe)", "group", x, perl = TRUE) + # no groups and complexes as ending + x <- gsub("(complex|group)$", "", x, perl = TRUE) + x <- gsub("(^|[^a-z])((an)?aero+b)[a-z]*", "", x, perl = TRUE) + x <- gsub("^atyp[a-z]*", "", x, perl = TRUE) + x <- gsub("(vergroen)[a-z]*", "viridans", x, perl = TRUE) + x <- gsub("[a-z]*diff?erent[a-z]*", "", x, perl = TRUE) + x <- gsub("(hefe|gist|gisten|levadura|lievito|fermento|levure)[a-z]*", "yeast", x, perl = TRUE) + x <- gsub("(schimmels?|mofo|molde|stampo|moisissure|fungi)[a-z]*", "fungus", x, perl = TRUE) + x <- gsub("fungus[ph|f]rya", "fungiphrya", x, perl = TRUE) + # no contamination + x <- gsub("(contamination|kontamination|mengflora|contaminaci.n|contamina..o)", "", x, perl = TRUE) + # remove non-text in case of "E. coli" except dots and spaces + x <- trimws(gsub("[^.a-zA-Z0-9/ \\-]+", " ", x, perl = TRUE)) + # but make sure that dots are followed by a space + x <- gsub("[.] ?", ". ", x, perl = TRUE) + # replace minus by a space + x <- gsub("-+", " ", x, perl = TRUE) + # replace hemolytic by haemolytic + x <- gsub("ha?emoly", "haemoly", x, perl = TRUE) + # place minus back in streptococci + x <- gsub("(alpha|beta|gamma).?ha?emoly", "\\1-haemoly", x, perl = TRUE) + # remove genus as first word + x <- gsub("^genus ", "", x, perl = TRUE) + # remove 'uncertain'-like texts + x <- trimws(gsub("(uncertain|susp[ie]c[a-z]+|verdacht)", "", x, perl = TRUE)) + # allow characters that resemble others = dyslexia_mode ---- + if (dyslexia_mode == TRUE) { + x <- tolower(x) + x <- gsub("[iy]+", "[iy]+", x, perl = TRUE) + x <- gsub("(c|k|q|qu|s|z|x|ks)+", "(c|k|q|qu|s|z|x|ks)+", x, perl = TRUE) + x <- gsub("(ph|hp|f|v)+", "(ph|hp|f|v)+", x, perl = TRUE) + x <- gsub("(th|ht|t)+", "(th|ht|t)+", x, perl = TRUE) + x <- gsub("a+", "a+", x, perl = TRUE) + x <- gsub("u+", "u+", x, perl = TRUE) + # allow any ending of -um, -us, -ium, -icum, -ius, -icus, -ica, -ia and -a (needs perl for the negative backward lookup): + x <- gsub("(u\\+\\(c\\|k\\|q\\|qu\\+\\|s\\|z\\|x\\|ks\\)\\+)(?![a-z])", + "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE) + x <- gsub("(\\[iy\\]\\+\\(c\\|k\\|q\\|qu\\+\\|s\\|z\\|x\\|ks\\)\\+a\\+)(?![a-z])", + "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE) + x <- gsub("(\\[iy\\]\\+u\\+m)(?![a-z])", + "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE) + x <- gsub("(\\[iy\\]\\+a\\+)(?![a-z])", + "([iy]*a+|[iy]+a*)", x, perl = TRUE) + x <- gsub("e+", "e+", x, perl = TRUE) + x <- gsub("o+", "o+", x, perl = TRUE) + x <- gsub("(.)\\1+", "\\1+", x, perl = TRUE) + # allow multiplication of all other consonants + x <- gsub("([bdgjlnrw]+)", "\\1+", x, perl = TRUE) + # allow ending in -en or -us + x <- gsub("e\\+n(?![a-z[])", "(e+n|u+(c|k|q|qu|s|z|x|ks)+)", x, perl = TRUE) + # if the input is longer than 10 characters, allow any forgotten consonant between all characters, as some might just have forgotten one... + # this will allow "Pasteurella damatis" to be correctly read as "Pasteurella dagmatis". + consonants <- paste(letters[!letters %in% c("a", "e", "i", "o", "u")], collapse = "") + x[nchar(x_backup_without_spp) > 10] <- gsub("[+]", paste0("+[", consonants, "]?"), x[nchar(x_backup_without_spp) > 10]) + # allow au and ou after all these regex implementations + x <- gsub("a+[bcdfghjklmnpqrstvwxyz]?u+[bcdfghjklmnpqrstvwxyz]?", "(a+u+|o+u+)[bcdfghjklmnpqrstvwxyz]?", x, fixed = TRUE) + x <- gsub("o+[bcdfghjklmnpqrstvwxyz]?u+[bcdfghjklmnpqrstvwxyz]?", "(a+u+|o+u+)[bcdfghjklmnpqrstvwxyz]?", x, fixed = TRUE) + } + x <- strip_whitespace(x, dyslexia_mode) + # make sure to remove regex overkill (will lead to errors) + x <- gsub("++", "+", x, fixed = TRUE) + x <- gsub("?+", "?", x, fixed = TRUE) + + x_trimmed <- x + x_trimmed_species <- paste(x_trimmed, "species") + x_trimmed_without_group <- gsub(" gro.u.p$", "", x_trimmed, perl = TRUE) + # remove last part from "-" or "/" + x_trimmed_without_group <- gsub("(.*)[-/].*", "\\1", x_trimmed_without_group) + # replace space and dot by regex sign + x_withspaces <- gsub("[ .]+", ".* ", x, perl = TRUE) + x <- gsub("[ .]+", ".*", x, perl = TRUE) + # add start en stop regex + x <- paste0("^", x, "$") + + x_withspaces_start_only <- paste0("^", x_withspaces) + x_withspaces_end_only <- paste0(x_withspaces, "$") + x_withspaces_start_end <- paste0("^", x_withspaces, "$") + + if (isTRUE(debug)) { + cat(paste0(font_blue("x"), ' "', x, '"\n')) + cat(paste0(font_blue("x_species"), ' "', x_species, '"\n')) + cat(paste0(font_blue("x_withspaces_start_only"), ' "', x_withspaces_start_only, '"\n')) + cat(paste0(font_blue("x_withspaces_end_only"), ' "', x_withspaces_end_only, '"\n')) + cat(paste0(font_blue("x_withspaces_start_end"), ' "', x_withspaces_start_end, '"\n')) + cat(paste0(font_blue("x_backup"), ' "', x_backup, '"\n')) + cat(paste0(font_blue("x_backup_without_spp"), ' "', x_backup_without_spp, '"\n')) + cat(paste0(font_blue("x_trimmed"), ' "', x_trimmed, '"\n')) + cat(paste0(font_blue("x_trimmed_species"), ' "', x_trimmed_species, '"\n')) + cat(paste0(font_blue("x_trimmed_without_group"), ' "', x_trimmed_without_group, '"\n')) + } + if (initial_search == TRUE) { - progress$tick() - } - - # valid MO code ---- - found <- lookup(mo == toupper(x_backup[i])) - if (!is.na(found)) { - x[i] <- found[1L] - next - } - - # valid fullname ---- - found <- lookup(fullname_lower %in% gsub("[^a-zA-Z0-9_. -]", "", tolower(c(x_backup[i], x_backup_without_spp[i])), perl = TRUE)) - # added the gsub() for "(unknown fungus)", since fullname_lower does not contain brackets - if (!is.na(found)) { - x[i] <- found[1L] - next - } - - # old fullname ---- - found <- lookup(fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])), - column = NULL, # all columns - haystack = MO.old_lookup) - if (!all(is.na(found))) { - # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: - # mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning) - # mo_ref() of "Chlamydophila psittaci" will be "Everett et al., 1999" - if (property == "ref") { - x[i] <- found["ref"] - } else { - x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup) - } - pkg_env$mo_renamed_last_run <- found["fullname"] - was_renamed(name_old = found["fullname"], - name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), - ref_old = found["ref"], - ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), - mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)) - next - } - - if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)" | tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) { - # empty and nonsense values, ignore without warning - x[i] <- lookup(mo == "UNKNOWN") - next - } - - # exact SNOMED code ---- - if (x_backup[i] %like% "^[0-9]+$") { - snomed_found <- unlist(lapply(reference_data_to_use$snomed, - function(s) if (x_backup[i] %in% s) { - TRUE - } else { - FALSE - })) - if (sum(snomed_found, na.rm = TRUE) > 0) { - found <- reference_data_to_use[snomed_found == TRUE, property][[1]] - if (!is.na(found)) { - x[i] <- found[1L] - next - } - } - } - - # very probable: is G. species ---- - found <- lookup(g_species %in% gsub("[^a-z0-9/ \\-]+", "", - tolower(c(x_backup[i], x_backup_without_spp[i])), perl = TRUE)) - if (!is.na(found)) { - x[i] <- found[1L] - next - } - - # WHONET and other common LIS codes ---- - found <- microorganisms.codes[which(microorganisms.codes$code %in% toupper(c(x_backup_untouched[i], x_backup[i], x_backup_without_spp[i]))), "mo", drop = TRUE][1L] - if (!is.na(found)) { - x[i] <- lookup(mo == found) - next - } - - # user-defined reference ---- - if (!is.null(reference_df)) { - if (x_backup[i] %in% reference_df[, 1]) { - # already checked integrity of reference_df, all MOs are valid - ref_mo <- reference_df[reference_df[, 1] == x_backup[i], "mo"][[1L]] - x[i] <- lookup(mo == ref_mo) - next - } - } - - # WHONET: xxx = no growth - if (tolower(as.character(paste0(x_backup_without_spp[i], ""))) %in% c("", "xxx", "na", "nan")) { - x[i] <- NA_character_ - next - } - - # check for very small input, but ignore the O antigens of E. coli - if (nchar(gsub("[^a-zA-Z]", "", x_trimmed[i])) < 3 - & !toupper(x_backup_without_spp[i]) %like_case% "O?(26|103|104|104|111|121|145|157)") { - # fewer than 3 chars and not looked for species, add as failure - x[i] <- lookup(mo == "UNKNOWN") - if (initial_search == TRUE) { - failures <- c(failures, x_backup[i]) - } - next - } - - if (x_backup_without_spp[i] %like_case% "(virus|viridae)") { - # there is no fullname like virus or viridae, so don't try to coerce it - x[i] <- NA_character_ - next - } - - # translate known trivial abbreviations to genus + species ---- - if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA", "BORSA") - | x_backup_without_spp[i] %like_case% "(^| )(mrsa|mssa|visa|vrsa|borsa|la-?mrsa|ca-?mrsa)( |$)") { - x[i] <- lookup(fullname == "Staphylococcus aureus", uncertainty = -1) - next - } - if (toupper(x_backup_without_spp[i]) %in% c("MRSE", "MSSE") - | x_backup_without_spp[i] %like_case% "(^| )(mrse|msse)( |$)") { - x[i] <- lookup(fullname == "Staphylococcus epidermidis", uncertainty = -1) - next - } - if (toupper(x_backup_without_spp[i]) == "VRE" - | x_backup_without_spp[i] %like_case% "(^| )vre " - | x_backup_without_spp[i] %like_case% "(enterococci|enterokok|enterococo)[a-z]*?$") { - x[i] <- lookup(genus == "Enterococcus", uncertainty = -1) - next - } - # support for: - # - AIEC (Adherent-Invasive E. coli) - # - ATEC (Atypical Entero-pathogenic E. coli) - # - DAEC (Diffusely Adhering E. coli) - # - EAEC (Entero-Aggresive E. coli) - # - EHEC (Entero-Haemorrhagic E. coli) - # - EIEC (Entero-Invasive E. coli) - # - EPEC (Entero-Pathogenic E. coli) - # - ETEC (Entero-Toxigenic E. coli) - # - NMEC (Neonatal Meningitis‐causing E. coli) - # - STEC (Shiga-toxin producing E. coli) - # - UPEC (Uropathogenic E. coli) - if (toupper(x_backup_without_spp[i]) %in% c("AIEC", "ATEC", "DAEC", "EAEC", "EHEC", "EIEC", "EPEC", "ETEC", "NMEC", "STEC", "UPEC") - # also support O-antigens of E. coli: O26, O103, O104, O111, O121, O145, O157 - | x_backup_without_spp[i] %like_case% "o?(26|103|104|111|121|145|157)") { - x[i] <- lookup(fullname == "Escherichia coli", uncertainty = -1) - next - } - if (toupper(x_backup_without_spp[i]) == "MRPA" - | x_backup_without_spp[i] %like_case% "(^| )mrpa( |$)") { - # multi resistant P. aeruginosa - x[i] <- lookup(fullname == "Pseudomonas aeruginosa", uncertainty = -1) - next - } - if (toupper(x_backup_without_spp[i]) == "CRSM") { - # co-trim resistant S. maltophilia - x[i] <- lookup(fullname == "Stenotrophomonas maltophilia", uncertainty = -1) - next - } - if (toupper(x_backup_without_spp[i]) %in% c("PISP", "PRSP", "VISP", "VRSP") - | x_backup_without_spp[i] %like_case% "(^| )(pisp|prsp|visp|vrsp)( |$)") { - # peni I, peni R, vanco I, vanco R: S. pneumoniae - x[i] <- lookup(fullname == "Streptococcus pneumoniae", uncertainty = -1) - next - } - if (x_backup_without_spp[i] %like_case% "^g[abcdfghk]s$") { - # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB) - x[i] <- lookup(mo == toupper(gsub("g([abcdfghk])s", - "B_STRPT_GRP\\1", - x_backup_without_spp[i])), uncertainty = -1) - next - } - if (x_backup_without_spp[i] %like_case% "(streptococ|streptokok).* [abcdfghk]$") { - # Streptococci in different languages, like "estreptococos grupo B" - x[i] <- lookup(mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", - "B_STRPT_GRP\\2", - x_backup_without_spp[i])), uncertainty = -1) - next - } - if (x_backup_without_spp[i] %like_case% "group [abcdfghk] (streptococ|streptokok|estreptococ)") { - # Streptococci in different languages, like "Group A Streptococci" - x[i] <- lookup(mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", - "B_STRPT_GRP\\1", - x_backup_without_spp[i])), uncertainty = -1) - next - } - if (x_backup_without_spp[i] %like_case% "haemoly.*strep") { - # Haemolytic streptococci in different languages - x[i] <- lookup(mo == "B_STRPT_HAEM", uncertainty = -1) - next - } - # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) - if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" - | x_trimmed[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" - | x_backup_without_spp[i] %like_case% "[ck]o?ns[^a-z]?$") { - # coerce S. coagulase negative - x[i] <- lookup(mo == "B_STPHY_CONS", uncertainty = -1) - next - } - if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] positie?[vf]" - | x_trimmed[i] %like_case% "[ck]oagulas[ea] positie?[vf]" - | x_backup_without_spp[i] %like_case% "[ck]o?ps[^a-z]?$") { - # coerce S. coagulase positive - x[i] <- lookup(mo == "B_STPHY_COPS", uncertainty = -1) - next - } - # streptococcal groups: milleri and viridans - if (x_trimmed[i] %like_case% "strepto.* mil+er+i" - | x_backup_without_spp[i] %like_case% "strepto.* mil+er+i" - | x_backup_without_spp[i] %like_case% "mgs[^a-z]?$") { - # Milleri Group Streptococcus (MGS) - x[i] <- lookup(mo == "B_STRPT_MILL", uncertainty = -1) - next - } - if (x_trimmed[i] %like_case% "strepto.* viridans" - | x_backup_without_spp[i] %like_case% "strepto.* viridans" - | x_backup_without_spp[i] %like_case% "vgs[^a-z]?$") { - # Viridans Group Streptococcus (VGS) - x[i] <- lookup(mo == "B_STRPT_VIRI", uncertainty = -1) - next - } - if (x_backup_without_spp[i] %like_case% "gram[ -]?neg.*" - | x_backup_without_spp[i] %like_case% "negatie?[vf]" - | x_trimmed[i] %like_case% "gram[ -]?neg.*") { - # coerce Gram negatives - x[i] <- lookup(mo == "B_GRAMN", uncertainty = -1) - next - } - if (x_backup_without_spp[i] %like_case% "gram[ -]?pos.*" - | x_backup_without_spp[i] %like_case% "positie?[vf]" - | x_trimmed[i] %like_case% "gram[ -]?pos.*") { - # coerce Gram positives - x[i] <- lookup(mo == "B_GRAMP", uncertainty = -1) - next - } - if (x_backup_without_spp[i] %like_case% "mycoba[ck]teri.[nm]?$") { - # coerce mycobacteria in multiple languages - x[i] <- lookup(genus == "Mycobacterium", uncertainty = -1) - next - } - - if (x_backup_without_spp[i] %like_case% "salmonella [a-z]+ ?.*") { - if (x_backup_without_spp[i] %like_case% "salmonella group") { - # Salmonella Group A to Z, just return S. species for now - x[i] <- lookup(genus == "Salmonella", uncertainty = -1) - next - } else if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup[i], ignore.case = FALSE) & - !x_backup[i] %like% "t[iy](ph|f)[iy]") { - # Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica - # except for S. typhi, S. paratyphi, S. typhimurium - x[i] <- lookup(fullname == "Salmonella enterica", uncertainty = -1) - uncertainties <- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = 1, - input = x_backup[i], - result_mo = lookup(fullname == "Salmonella enterica", "mo", uncertainty = -1)), - stringsAsFactors = FALSE) - next - } - } - - # trivial names known to the field: - if ("meningococcus" %like_case% x_trimmed[i]) { - # coerce Neisseria meningitidis - x[i] <- lookup(fullname == "Neisseria meningitidis", uncertainty = -1) - next - } - if ("gonococcus" %like_case% x_trimmed[i]) { - # coerce Neisseria gonorrhoeae - x[i] <- lookup(fullname == "Neisseria gonorrhoeae", uncertainty = -1) - next - } - if ("pneumococcus" %like_case% x_trimmed[i]) { - # coerce Streptococcus penumoniae - x[i] <- lookup(fullname == "Streptococcus pneumoniae", uncertainty = -1) - next + progress <- progress_ticker(n = length(x[!already_known]), n_min = 25) # start if n >= 25 + on.exit(close(progress)) } - if (x_backup[i] %in% pkg_env$mo_failed) { - # previously failed already in this session ---- - # (at this point the latest reference_df has also be checked) - x[i] <- lookup(mo == "UNKNOWN") + for (i in which(!already_known)) { + if (initial_search == TRUE) { - failures <- c(failures, x_backup[i]) - } - next - } - - # NOW RUN THROUGH DIFFERENT PREVALENCE LEVELS - check_per_prevalence <- function(data_to_check, - data.old_to_check, - a.x_backup, - b.x_trimmed, - c.x_trimmed_without_group, - d.x_withspaces_start_end, - e.x_withspaces_start_only, - f.x_withspaces_end_only, - g.x_backup_without_spp, - h.x_species, - i.x_trimmed_species) { - - # FIRST TRY FULLNAMES AND CODES ---- - # if only genus is available, return only genus - - if (all(!c(x[i], b.x_trimmed) %like_case% " ")) { - found <- lookup(fullname_lower %in% c(h.x_species, i.x_trimmed_species), - haystack = data_to_check) - if (!is.na(found)) { - x[i] <- found[1L] - return(x[i]) - } - if (nchar(g.x_backup_without_spp) >= 6) { - found <- lookup(fullname_lower %like_case% paste0("^", unregex(g.x_backup_without_spp), "[a-z]+"), - haystack = data_to_check) - if (!is.na(found)) { - x[i] <- found[1L] - return(x[i]) - } - } - # rest of genus only is in allow_uncertain part. - } - - # allow no codes less than 4 characters long, was already checked for WHONET earlier - if (nchar(g.x_backup_without_spp) < 4) { - x[i] <- lookup(mo == "UNKNOWN") - if (initial_search == TRUE) { - failures <- c(failures, a.x_backup) - } - return(x[i]) - } - - # try probable: trimmed version of fullname ---- - found <- lookup(fullname_lower %in% tolower(g.x_backup_without_spp), - haystack = data_to_check) - if (!is.na(found)) { - return(found[1L]) - } - - # try any match keeping spaces ---- - if (nchar(g.x_backup_without_spp) >= 6) { - found <- lookup(fullname_lower %like_case% d.x_withspaces_start_end, - haystack = data_to_check) - if (!is.na(found)) { - return(found[1L]) - } - } - - # try any match keeping spaces, not ending with $ ---- - found <- lookup(fullname_lower %like_case% paste0(trimws(e.x_withspaces_start_only), " "), - haystack = data_to_check) - if (!is.na(found)) { - return(found[1L]) - } - if (nchar(g.x_backup_without_spp) >= 6) { - found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, - haystack = data_to_check) - if (!is.na(found)) { - return(found[1L]) - } - } - - # try any match keeping spaces, not start with ^ ---- - found <- lookup(fullname_lower %like_case% paste0(" ", trimws(f.x_withspaces_end_only)), - haystack = data_to_check) - if (!is.na(found)) { - return(found[1L]) - } - - # try a trimmed version - if (nchar(g.x_backup_without_spp) >= 6) { - found <- lookup(fullname_lower %like_case% b.x_trimmed | - fullname_lower %like_case% c.x_trimmed_without_group, - haystack = data_to_check) - if (!is.na(found)) { - return(found[1L]) - } + progress$tick() } - - # try splitting of characters in the middle and then find ID ---- - # only when text length is 6 or lower - # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus - if (nchar(g.x_backup_without_spp) <= 6) { - x_length <- nchar(g.x_backup_without_spp) - x_split <- paste0("^", - g.x_backup_without_spp %pm>% substr(1, x_length / 2), - ".* ", - g.x_backup_without_spp %pm>% substr((x_length / 2) + 1, x_length)) - found <- lookup(fullname_lower %like_case% x_split, - haystack = data_to_check) - if (!is.na(found)) { - return(found[1L]) - } - } - - # try fullname without start and without nchar limit of >= 6 ---- - # like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH - found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, - haystack = data_to_check) + # valid MO code ---- + found <- lookup(mo == toupper(x_backup[i])) if (!is.na(found)) { - return(found[1L]) + x[i] <- found[1L] + next } - - # MISCELLANEOUS ---- - - # look for old taxonomic names ---- - found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, + + # valid fullname ---- + found <- lookup(fullname_lower %in% gsub("[^a-zA-Z0-9_. -]", "", tolower(c(x_backup[i], x_backup_without_spp[i])), perl = TRUE)) + # added the gsub() for "(unknown fungus)", since fullname_lower does not contain brackets + if (!is.na(found)) { + x[i] <- found[1L] + next + } + + # old fullname ---- + found <- lookup(fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])), column = NULL, # all columns - haystack = data.old_to_check) + haystack = MO.old_lookup) if (!all(is.na(found))) { # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: # mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning) @@ -988,351 +627,461 @@ exec_as.mo <- function(x, ref_old = found["ref"], ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)) - return(x[i]) + next } - - # check for uncertain results ---- - uncertain_fn <- function(a.x_backup, - b.x_trimmed, - d.x_withspaces_start_end, - e.x_withspaces_start_only, - f.x_withspaces_end_only, - g.x_backup_without_spp, - uncertain.reference_data_to_use) { - - if (uncertainty_level == 0) { - # do not allow uncertainties - return(NA_character_) - } - - # UNCERTAINTY LEVEL 1 ---- - if (uncertainty_level >= 1) { - now_checks_for_uncertainty_level <- 1 - - # (1) look again for old taxonomic names, now for G. species ---- - if (isTRUE(debug)) { - cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (1) look again for old taxonomic names, now for G. species\n")) - } - if (isTRUE(debug)) { - message("Running '", d.x_withspaces_start_end, "' and '", e.x_withspaces_start_only, "'") - } - found <- lookup(fullname_lower %like_case% d.x_withspaces_start_end | - fullname_lower %like_case% e.x_withspaces_start_only, - column = NULL, # all columns - haystack = data.old_to_check) - if (!all(is.na(found)) & nchar(g.x_backup_without_spp) >= 6) { - if (property == "ref") { - # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: - # mo_ref("Chlamydia psittaci") = "Page, 1968" (with warning) - # mo_ref("Chlamydophila psittaci") = "Everett et al., 1999" - x <- found["ref"] - } else { - x <- lookup(fullname == found["fullname_new"], haystack = MO_lookup) - } - was_renamed(name_old = found["fullname"], - name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), - ref_old = found["ref"], - ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), - mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)) - pkg_env$mo_renamed_last_run <- found["fullname"] - uncertainties <<- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, - input = a.x_backup, - result_mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)), - stringsAsFactors = FALSE) - return(x) - } - - # (2) Try with misspelled input ---- - # just rerun with dyslexia_mode = TRUE will used the extensive regex part above - if (isTRUE(debug)) { - cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (2) Try with misspelled input\n")) - } - if (isTRUE(debug)) { - message("Running '", a.x_backup, "'") - } - # first try without dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 1, actual_input = a.x_backup))) - if (empty_result(found)) { - # then with dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 1, actual_input = a.x_backup))) - } - if (!empty_result(found)) { - found_result <- found - uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) - found <- lookup(mo == found) - return(found) + + if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)" | tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) { + # empty and nonsense values, ignore without warning + x[i] <- lookup(mo == "UNKNOWN") + next + } + + # exact SNOMED code ---- + if (x_backup[i] %like_case% "^[0-9]+$") { + snomed_found <- unlist(lapply(reference_data_to_use$snomed, + function(s) if (x_backup[i] %in% s) { + TRUE + } else { + FALSE + })) + if (sum(snomed_found, na.rm = TRUE) > 0) { + found <- reference_data_to_use[snomed_found == TRUE, property][[1]] + if (!is.na(found)) { + x[i] <- found[1L] + next } } - - # UNCERTAINTY LEVEL 2 ---- - if (uncertainty_level >= 2) { - now_checks_for_uncertainty_level <- 2 - - # (3) look for genus only, part of name ---- - if (isTRUE(debug)) { - cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (3) look for genus only, part of name\n")) - } - if (nchar(g.x_backup_without_spp) > 4 & !b.x_trimmed %like_case% " ") { - if (!grepl("^[A-Z][a-z]+", b.x_trimmed, ignore.case = FALSE)) { - if (isTRUE(debug)) { - message("Running '", paste(b.x_trimmed, "species"), "'") - } - # not when input is like Genustext, because then Neospora would lead to Actinokineospora - found <- lookup(fullname_lower %like_case% paste(b.x_trimmed, "species"), - haystack = uncertain.reference_data_to_use) - if (!is.na(found)) { - found_result <- found - found <- lookup(mo == found) - uncertainties <<- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, - input = a.x_backup, - result_mo = found_result), - stringsAsFactors = FALSE) - return(found) - } - } - } - - # (4) strip values between brackets ---- - if (isTRUE(debug)) { - cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (4) strip values between brackets\n")) - } - a.x_backup_stripped <- gsub("( *[(].*[)] *)", " ", a.x_backup, perl = TRUE) - a.x_backup_stripped <- trimws(gsub(" +", " ", a.x_backup_stripped, perl = TRUE)) - if (isTRUE(debug)) { - message("Running '", a.x_backup_stripped, "'") - } - # first try without dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_stripped, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) - if (empty_result(found)) { - # then with dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_stripped, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) - } - if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) { - found_result <- found - uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) - found <- lookup(mo == found) - return(found) - } - - # (5) inverse input ---- - if (isTRUE(debug)) { - cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (5) inverse input\n")) - } - a.x_backup_inversed <- paste(rev(unlist(strsplit(a.x_backup, split = " "))), collapse = " ") - if (isTRUE(debug)) { - message("Running '", a.x_backup_inversed, "'") - } - - # first try without dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_inversed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) - if (empty_result(found)) { - # then with dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_inversed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) - } - if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) { - found_result <- found - uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) - found <- lookup(mo == found) - return(found) - } - - # (6) try to strip off half an element from end and check the remains ---- - if (isTRUE(debug)) { - cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (6) try to strip off half an element from end and check the remains\n")) - } - x_strip <- a.x_backup %pm>% strsplit("[ .]") %pm>% unlist() - if (length(x_strip) > 1) { - for (i in seq_len(length(x_strip) - 1)) { - lastword <- x_strip[length(x_strip) - i + 1] - lastword_half <- substr(lastword, 1, as.integer(nchar(lastword) / 2)) - # remove last half of the second term - x_strip_collapsed <- paste(c(x_strip[seq_len(length(x_strip) - i)], lastword_half), collapse = " ") - if (nchar(x_strip_collapsed) >= 4 & nchar(lastword_half) > 2) { - if (isTRUE(debug)) { - message("Running '", x_strip_collapsed, "'") - } - # first try without dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) - if (empty_result(found)) { - # then with dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) - } - if (!empty_result(found)) { - found_result <- found - uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) - found <- lookup(mo == found) - return(found) - } - } - } - } - # (7) try to strip off one element from end and check the remains ---- - if (isTRUE(debug)) { - cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (7) try to strip off one element from end and check the remains\n")) - } - if (length(x_strip) > 1) { - for (i in seq_len(length(x_strip) - 1)) { - x_strip_collapsed <- paste(x_strip[seq_len(length(x_strip) - i)], collapse = " ") - if (nchar(x_strip_collapsed) >= 6) { - if (isTRUE(debug)) { - message("Running '", x_strip_collapsed, "'") - } - # first try without dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) - if (empty_result(found)) { - # then with dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) - } - - if (!empty_result(found)) { - found_result <- found - uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) - found <- lookup(mo == found) - return(found) - } - } - } - } - # (8) check for unknown yeasts/fungi ---- - if (isTRUE(debug)) { - cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (8) check for unknown yeasts/fungi\n")) - } - if (b.x_trimmed %like_case% "yeast") { - found <- "F_YEAST" - found_result <- found - found <- lookup(mo == found) - uncertainties <<- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, - input = a.x_backup, - result_mo = found_result), - stringsAsFactors = FALSE) - return(found) - } - if (b.x_trimmed %like_case% "(fungus|fungi)" & !b.x_trimmed %like_case% "fungiphrya") { - found <- "F_FUNGUS" - found_result <- found - found <- lookup(mo == found) - uncertainties <<- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, - input = a.x_backup, - result_mo = found_result), - stringsAsFactors = FALSE) - return(found) - } - # (9) try to strip off one element from start and check the remains (only allow >= 2-part name outcome) ---- - if (isTRUE(debug)) { - cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (9) try to strip off one element from start and check the remains (only allow >= 2-part name outcome)\n")) - } - x_strip <- a.x_backup %pm>% strsplit("[ .]") %pm>% unlist() - if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) { - for (i in 2:(length(x_strip))) { - x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ") - if (isTRUE(debug)) { - message("Running '", x_strip_collapsed, "'") - } - # first try without dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) - if (empty_result(found)) { - # then with dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) - } - if (!empty_result(found)) { - found_result <- found - # uncertainty level 2 only if searched part contains a space (otherwise it will be found with lvl 3) - if (x_strip_collapsed %like_case% " ") { - uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) - found <- lookup(mo == found) - return(found) - } - } - } - } + } + + # very probable: is G. species ---- + found <- lookup(g_species %in% gsub("[^a-z0-9/ \\-]+", "", + tolower(c(x_backup[i], x_backup_without_spp[i])), perl = TRUE)) + if (!is.na(found)) { + x[i] <- found[1L] + next + } + + # WHONET and other common LIS codes ---- + found <- microorganisms.codes[which(microorganisms.codes$code %in% toupper(c(x_backup_untouched[i], x_backup[i], x_backup_without_spp[i]))), "mo", drop = TRUE][1L] + if (!is.na(found)) { + x[i] <- lookup(mo == found) + next + } + + # user-defined reference ---- + if (!is.null(reference_df)) { + if (x_backup[i] %in% reference_df[, 1]) { + # already checked integrity of reference_df, all MOs are valid + ref_mo <- reference_df[reference_df[, 1] == x_backup[i], "mo"][[1L]] + x[i] <- lookup(mo == ref_mo) + next } - - # UNCERTAINTY LEVEL 3 ---- - if (uncertainty_level >= 3) { - now_checks_for_uncertainty_level <- 3 - - # (10) try to strip off one element from start and check the remains (any text size) ---- - if (isTRUE(debug)) { - cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (10) try to strip off one element from start and check the remains (any text size)\n")) - } - x_strip <- a.x_backup %pm>% strsplit("[ .]") %pm>% unlist() - if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) { - for (i in 2:(length(x_strip))) { - x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ") - if (isTRUE(debug)) { - message("Running '", x_strip_collapsed, "'") - } - # first try without dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 3, actual_input = a.x_backup))) - if (empty_result(found)) { - # then with dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 3, actual_input = a.x_backup))) - } - if (!empty_result(found)) { - found_result <- found - uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) - found <- lookup(mo == found) - return(found) - } - } - } - # (11) try to strip off one element from end and check the remains (any text size) ---- - # (this is in fact 7 but without nchar limit of >=6) - if (isTRUE(debug)) { - cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (11) try to strip off one element from end and check the remains (any text size)\n")) - } - if (length(x_strip) > 1) { - for (i in seq_len(length(x_strip) - 1)) { - x_strip_collapsed <- paste(x_strip[seq_len(length(x_strip) - i)], collapse = " ") - if (isTRUE(debug)) { - message("Running '", x_strip_collapsed, "'") - } - # first try without dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 3, actual_input = a.x_backup))) - if (empty_result(found)) { - # then with dyslexia mode - found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 3, actual_input = a.x_backup))) - } - if (!empty_result(found)) { - found_result <- found - uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) - found <- lookup(mo == found) - return(found) - } - } - } - - # (12) part of a name (very unlikely match) ---- - if (isTRUE(debug)) { - cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (12) part of a name (very unlikely match)\n")) - } - if (isTRUE(debug)) { - message("Running '", f.x_withspaces_end_only, "'") + } + + # WHONET: xxx = no growth + if (tolower(as.character(paste0(x_backup_without_spp[i], ""))) %in% c("", "xxx", "na", "nan")) { + x[i] <- NA_character_ + next + } + + # check for very small input, but ignore the O antigens of E. coli + if (nchar(gsub("[^a-zA-Z]", "", x_trimmed[i])) < 3 + & !toupper(x_backup_without_spp[i]) %like_case% "O?(26|103|104|104|111|121|145|157)") { + # fewer than 3 chars and not looked for species, add as failure + x[i] <- lookup(mo == "UNKNOWN") + if (initial_search == TRUE) { + failures <- c(failures, x_backup[i]) + } + next + } + + if (x_backup_without_spp[i] %like_case% "(virus|viridae)") { + # there is no fullname like virus or viridae, so don't try to coerce it + x[i] <- NA_character_ + next + } + + # translate known trivial abbreviations to genus + species ---- + if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA", "BORSA") + | x_backup_without_spp[i] %like_case% "(^| )(mrsa|mssa|visa|vrsa|borsa|la-?mrsa|ca-?mrsa)( |$)") { + x[i] <- lookup(fullname == "Staphylococcus aureus", uncertainty = -1) + next + } + if (toupper(x_backup_without_spp[i]) %in% c("MRSE", "MSSE") + | x_backup_without_spp[i] %like_case% "(^| )(mrse|msse)( |$)") { + x[i] <- lookup(fullname == "Staphylococcus epidermidis", uncertainty = -1) + next + } + if (toupper(x_backup_without_spp[i]) == "VRE" + | x_backup_without_spp[i] %like_case% "(^| )vre " + | x_backup_without_spp[i] %like_case% "(enterococci|enterokok|enterococo)[a-z]*?$") { + x[i] <- lookup(genus == "Enterococcus", uncertainty = -1) + next + } + # support for: + # - AIEC (Adherent-Invasive E. coli) + # - ATEC (Atypical Entero-pathogenic E. coli) + # - DAEC (Diffusely Adhering E. coli) + # - EAEC (Entero-Aggresive E. coli) + # - EHEC (Entero-Haemorrhagic E. coli) + # - EIEC (Entero-Invasive E. coli) + # - EPEC (Entero-Pathogenic E. coli) + # - ETEC (Entero-Toxigenic E. coli) + # - NMEC (Neonatal Meningitis‐causing E. coli) + # - STEC (Shiga-toxin producing E. coli) + # - UPEC (Uropathogenic E. coli) + if (toupper(x_backup_without_spp[i]) %in% c("AIEC", "ATEC", "DAEC", "EAEC", "EHEC", "EIEC", "EPEC", "ETEC", "NMEC", "STEC", "UPEC") + # also support O-antigens of E. coli: O26, O103, O104, O111, O121, O145, O157 + | x_backup_without_spp[i] %like_case% "o?(26|103|104|111|121|145|157)") { + x[i] <- lookup(fullname == "Escherichia coli", uncertainty = -1) + next + } + if (toupper(x_backup_without_spp[i]) == "MRPA" + | x_backup_without_spp[i] %like_case% "(^| )mrpa( |$)") { + # multi resistant P. aeruginosa + x[i] <- lookup(fullname == "Pseudomonas aeruginosa", uncertainty = -1) + next + } + if (toupper(x_backup_without_spp[i]) == "CRSM") { + # co-trim resistant S. maltophilia + x[i] <- lookup(fullname == "Stenotrophomonas maltophilia", uncertainty = -1) + next + } + if (toupper(x_backup_without_spp[i]) %in% c("PISP", "PRSP", "VISP", "VRSP") + | x_backup_without_spp[i] %like_case% "(^| )(pisp|prsp|visp|vrsp)( |$)") { + # peni I, peni R, vanco I, vanco R: S. pneumoniae + x[i] <- lookup(fullname == "Streptococcus pneumoniae", uncertainty = -1) + next + } + if (x_backup_without_spp[i] %like_case% "^g[abcdfghk]s$") { + # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB) + x[i] <- lookup(mo == toupper(gsub("g([abcdfghk])s", + "B_STRPT_GRP\\1", + x_backup_without_spp[i])), uncertainty = -1) + next + } + if (x_backup_without_spp[i] %like_case% "(streptococ|streptokok).* [abcdfghk]$") { + # Streptococci in different languages, like "estreptococos grupo B" + x[i] <- lookup(mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", + "B_STRPT_GRP\\2", + x_backup_without_spp[i])), uncertainty = -1) + next + } + if (x_backup_without_spp[i] %like_case% "group [abcdfghk] (streptococ|streptokok|estreptococ)") { + # Streptococci in different languages, like "Group A Streptococci" + x[i] <- lookup(mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", + "B_STRPT_GRP\\1", + x_backup_without_spp[i])), uncertainty = -1) + next + } + if (x_backup_without_spp[i] %like_case% "haemoly.*strep") { + # Haemolytic streptococci in different languages + x[i] <- lookup(mo == "B_STRPT_HAEM", uncertainty = -1) + next + } + # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) + if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" + | x_trimmed[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" + | x_backup_without_spp[i] %like_case% "[ck]o?ns[^a-z]?$") { + # coerce S. coagulase negative + x[i] <- lookup(mo == "B_STPHY_CONS", uncertainty = -1) + next + } + if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] positie?[vf]" + | x_trimmed[i] %like_case% "[ck]oagulas[ea] positie?[vf]" + | x_backup_without_spp[i] %like_case% "[ck]o?ps[^a-z]?$") { + # coerce S. coagulase positive + x[i] <- lookup(mo == "B_STPHY_COPS", uncertainty = -1) + next + } + # streptococcal groups: milleri and viridans + if (x_trimmed[i] %like_case% "strepto.* mil+er+i" + | x_backup_without_spp[i] %like_case% "strepto.* mil+er+i" + | x_backup_without_spp[i] %like_case% "mgs[^a-z]?$") { + # Milleri Group Streptococcus (MGS) + x[i] <- lookup(mo == "B_STRPT_MILL", uncertainty = -1) + next + } + if (x_trimmed[i] %like_case% "strepto.* viridans" + | x_backup_without_spp[i] %like_case% "strepto.* viridans" + | x_backup_without_spp[i] %like_case% "vgs[^a-z]?$") { + # Viridans Group Streptococcus (VGS) + x[i] <- lookup(mo == "B_STRPT_VIRI", uncertainty = -1) + next + } + if (x_backup_without_spp[i] %like_case% "gram[ -]?neg.*" + | x_backup_without_spp[i] %like_case% "negatie?[vf]" + | x_trimmed[i] %like_case% "gram[ -]?neg.*") { + # coerce Gram negatives + x[i] <- lookup(mo == "B_GRAMN", uncertainty = -1) + next + } + if (x_backup_without_spp[i] %like_case% "gram[ -]?pos.*" + | x_backup_without_spp[i] %like_case% "positie?[vf]" + | x_trimmed[i] %like_case% "gram[ -]?pos.*") { + # coerce Gram positives + x[i] <- lookup(mo == "B_GRAMP", uncertainty = -1) + next + } + if (x_backup_without_spp[i] %like_case% "mycoba[ck]teri.[nm]?$") { + # coerce mycobacteria in multiple languages + x[i] <- lookup(genus == "Mycobacterium", uncertainty = -1) + next + } + + if (x_backup_without_spp[i] %like_case% "salmonella [a-z]+ ?.*") { + if (x_backup_without_spp[i] %like_case% "salmonella group") { + # Salmonella Group A to Z, just return S. species for now + x[i] <- lookup(genus == "Salmonella", uncertainty = -1) + next + } else if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup[i], ignore.case = FALSE) & + !x_backup[i] %like% "t[iy](ph|f)[iy]") { + # Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica + # except for S. typhi, S. paratyphi, S. typhimurium + x[i] <- lookup(fullname == "Salmonella enterica", uncertainty = -1) + uncertainties <- rbind(uncertainties, + format_uncertainty_as_df(uncertainty_level = 1, + input = x_backup[i], + result_mo = lookup(fullname == "Salmonella enterica", "mo", uncertainty = -1)), + stringsAsFactors = FALSE) + next + } + } + + # trivial names known to the field: + if ("meningococcus" %like_case% x_trimmed[i]) { + # coerce Neisseria meningitidis + x[i] <- lookup(fullname == "Neisseria meningitidis", uncertainty = -1) + next + } + if ("gonococcus" %like_case% x_trimmed[i]) { + # coerce Neisseria gonorrhoeae + x[i] <- lookup(fullname == "Neisseria gonorrhoeae", uncertainty = -1) + next + } + if ("pneumococcus" %like_case% x_trimmed[i]) { + # coerce Streptococcus penumoniae + x[i] <- lookup(fullname == "Streptococcus pneumoniae", uncertainty = -1) + next + } + + if (x_backup[i] %in% pkg_env$mo_failed) { + # previously failed already in this session ---- + # (at this point the latest reference_df has also been checked) + x[i] <- lookup(mo == "UNKNOWN") + if (initial_search == TRUE) { + failures <- c(failures, x_backup[i]) + } + next + } + + # NOW RUN THROUGH DIFFERENT PREVALENCE LEVELS + check_per_prevalence <- function(data_to_check, + data.old_to_check, + a.x_backup, + b.x_trimmed, + c.x_trimmed_without_group, + d.x_withspaces_start_end, + e.x_withspaces_start_only, + f.x_withspaces_end_only, + g.x_backup_without_spp, + h.x_species, + i.x_trimmed_species) { + + # FIRST TRY FULLNAMES AND CODES ---- + # if only genus is available, return only genus + + if (all(!c(x[i], b.x_trimmed) %like_case% " ")) { + found <- lookup(fullname_lower %in% c(h.x_species, i.x_trimmed_species), + haystack = data_to_check) + if (!is.na(found)) { + x[i] <- found[1L] + return(x[i]) } if (nchar(g.x_backup_without_spp) >= 6) { - found <- lookup(fullname_lower %like_case% f.x_withspaces_end_only, column = "mo") + found <- lookup(fullname_lower %like_case% paste0("^", unregex(g.x_backup_without_spp), "[a-z]+"), + haystack = data_to_check) if (!is.na(found)) { - found_result <- lookup(mo == found) + x[i] <- found[1L] + return(x[i]) + } + } + # rest of genus only is in allow_uncertain part. + } + + # allow no codes less than 4 characters long, was already checked for WHONET earlier + if (nchar(g.x_backup_without_spp) < 4) { + x[i] <- lookup(mo == "UNKNOWN") + if (initial_search == TRUE) { + failures <- c(failures, a.x_backup) + } + return(x[i]) + } + + # try probable: trimmed version of fullname ---- + found <- lookup(fullname_lower %in% tolower(g.x_backup_without_spp), + haystack = data_to_check) + if (!is.na(found)) { + return(found[1L]) + } + + # try any match keeping spaces ---- + if (nchar(g.x_backup_without_spp) >= 6) { + found <- lookup(fullname_lower %like_case% d.x_withspaces_start_end, + haystack = data_to_check) + if (!is.na(found)) { + return(found[1L]) + } + } + + # try any match keeping spaces, not ending with $ ---- + found <- lookup(fullname_lower %like_case% paste0(trimws(e.x_withspaces_start_only), " "), + haystack = data_to_check) + if (!is.na(found)) { + return(found[1L]) + } + if (nchar(g.x_backup_without_spp) >= 6) { + found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, + haystack = data_to_check) + if (!is.na(found)) { + return(found[1L]) + } + } + + # try any match keeping spaces, not start with ^ ---- + found <- lookup(fullname_lower %like_case% paste0(" ", trimws(f.x_withspaces_end_only)), + haystack = data_to_check) + if (!is.na(found)) { + return(found[1L]) + } + + # try a trimmed version + if (nchar(g.x_backup_without_spp) >= 6) { + found <- lookup(fullname_lower %like_case% b.x_trimmed | + fullname_lower %like_case% c.x_trimmed_without_group, + haystack = data_to_check) + if (!is.na(found)) { + return(found[1L]) + } + } + + + # try splitting of characters in the middle and then find ID ---- + # only when text length is 6 or lower + # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus + if (nchar(g.x_backup_without_spp) <= 6) { + x_length <- nchar(g.x_backup_without_spp) + x_split <- paste0("^", + g.x_backup_without_spp %pm>% substr(1, x_length / 2), + ".* ", + g.x_backup_without_spp %pm>% substr((x_length / 2) + 1, x_length)) + found <- lookup(fullname_lower %like_case% x_split, + haystack = data_to_check) + if (!is.na(found)) { + return(found[1L]) + } + } + + # try fullname without start and without nchar limit of >= 6 ---- + # like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH + found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, + haystack = data_to_check) + if (!is.na(found)) { + return(found[1L]) + } + + # MISCELLANEOUS ---- + + # look for old taxonomic names ---- + found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, + column = NULL, # all columns + haystack = data.old_to_check) + if (!all(is.na(found))) { + # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: + # mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning) + # mo_ref() of "Chlamydophila psittaci" will be "Everett et al., 1999" + if (property == "ref") { + x[i] <- found["ref"] + } else { + x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup) + } + pkg_env$mo_renamed_last_run <- found["fullname"] + was_renamed(name_old = found["fullname"], + name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), + ref_old = found["ref"], + ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), + mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)) + return(x[i]) + } + + # check for uncertain results ---- + uncertain_fn <- function(a.x_backup, + b.x_trimmed, + d.x_withspaces_start_end, + e.x_withspaces_start_only, + f.x_withspaces_end_only, + g.x_backup_without_spp, + uncertain.reference_data_to_use) { + + if (uncertainty_level == 0) { + # do not allow uncertainties + return(NA_character_) + } + + # UNCERTAINTY LEVEL 1 ---- + if (uncertainty_level >= 1) { + now_checks_for_uncertainty_level <- 1 + + # (1) look again for old taxonomic names, now for G. species ---- + if (isTRUE(debug)) { + cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (1) look again for old taxonomic names, now for G. species\n")) + } + if (isTRUE(debug)) { + message("Running '", d.x_withspaces_start_end, "' and '", e.x_withspaces_start_only, "'") + } + found <- lookup(fullname_lower %like_case% d.x_withspaces_start_end | + fullname_lower %like_case% e.x_withspaces_start_only, + column = NULL, # all columns + haystack = data.old_to_check) + if (!all(is.na(found)) & nchar(g.x_backup_without_spp) >= 6) { + if (property == "ref") { + # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: + # mo_ref("Chlamydia psittaci") = "Page, 1968" (with warning) + # mo_ref("Chlamydophila psittaci") = "Everett et al., 1999" + x <- found["ref"] + } else { + x <- lookup(fullname == found["fullname_new"], haystack = MO_lookup) + } + was_renamed(name_old = found["fullname"], + name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), + ref_old = found["ref"], + ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), + mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)) + pkg_env$mo_renamed_last_run <- found["fullname"] + uncertainties <<- rbind(uncertainties, + format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, + input = a.x_backup, + result_mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)), + stringsAsFactors = FALSE) + return(x) + } + + # (2) Try with misspelled input ---- + # just rerun with dyslexia_mode = TRUE will used the extensive regex part above + if (isTRUE(debug)) { + cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (2) Try with misspelled input\n")) + } + if (isTRUE(debug)) { + message("Running '", a.x_backup, "'") + } + # first try without dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 1, actual_input = a.x_backup))) + if (empty_result(found)) { + # then with dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 1, actual_input = a.x_backup))) + } + if (!empty_result(found)) { + found_result <- found uncertainties <<- rbind(uncertainties, attr(found, which = "uncertainties", exact = TRUE), stringsAsFactors = FALSE) @@ -1340,110 +1089,377 @@ exec_as.mo <- function(x, return(found) } } + + # UNCERTAINTY LEVEL 2 ---- + if (uncertainty_level >= 2) { + now_checks_for_uncertainty_level <- 2 + + # (3) look for genus only, part of name ---- + if (isTRUE(debug)) { + cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (3) look for genus only, part of name\n")) + } + if (nchar(g.x_backup_without_spp) > 4 & !b.x_trimmed %like_case% " ") { + if (!grepl("^[A-Z][a-z]+", b.x_trimmed, ignore.case = FALSE)) { + if (isTRUE(debug)) { + message("Running '", paste(b.x_trimmed, "species"), "'") + } + # not when input is like Genustext, because then Neospora would lead to Actinokineospora + found <- lookup(fullname_lower %like_case% paste(b.x_trimmed, "species"), + haystack = uncertain.reference_data_to_use) + if (!is.na(found)) { + found_result <- found + found <- lookup(mo == found) + uncertainties <<- rbind(uncertainties, + format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, + input = a.x_backup, + result_mo = found_result), + stringsAsFactors = FALSE) + return(found) + } + } + } + + # (4) strip values between brackets ---- + if (isTRUE(debug)) { + cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (4) strip values between brackets\n")) + } + a.x_backup_stripped <- gsub("( *[(].*[)] *)", " ", a.x_backup, perl = TRUE) + a.x_backup_stripped <- trimws(gsub(" +", " ", a.x_backup_stripped, perl = TRUE)) + if (isTRUE(debug)) { + message("Running '", a.x_backup_stripped, "'") + } + # first try without dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_stripped, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) + if (empty_result(found)) { + # then with dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_stripped, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) + } + if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) { + found_result <- found + uncertainties <<- rbind(uncertainties, + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE) + found <- lookup(mo == found) + return(found) + } + + # (5) inverse input ---- + if (isTRUE(debug)) { + cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (5) inverse input\n")) + } + a.x_backup_inversed <- paste(rev(unlist(strsplit(a.x_backup, split = " "))), collapse = " ") + if (isTRUE(debug)) { + message("Running '", a.x_backup_inversed, "'") + } + + # first try without dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_inversed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) + if (empty_result(found)) { + # then with dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_inversed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) + } + if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) { + found_result <- found + uncertainties <<- rbind(uncertainties, + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE) + found <- lookup(mo == found) + return(found) + } + + # (6) try to strip off half an element from end and check the remains ---- + if (isTRUE(debug)) { + cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (6) try to strip off half an element from end and check the remains\n")) + } + x_strip <- a.x_backup %pm>% strsplit("[ .]") %pm>% unlist() + if (length(x_strip) > 1) { + for (i in seq_len(length(x_strip) - 1)) { + lastword <- x_strip[length(x_strip) - i + 1] + lastword_half <- substr(lastword, 1, as.integer(nchar(lastword) / 2)) + # remove last half of the second term + x_strip_collapsed <- paste(c(x_strip[seq_len(length(x_strip) - i)], lastword_half), collapse = " ") + if (nchar(x_strip_collapsed) >= 4 & nchar(lastword_half) > 2) { + if (isTRUE(debug)) { + message("Running '", x_strip_collapsed, "'") + } + # first try without dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) + if (empty_result(found)) { + # then with dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) + } + if (!empty_result(found)) { + found_result <- found + uncertainties <<- rbind(uncertainties, + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE) + found <- lookup(mo == found) + return(found) + } + } + } + } + # (7) try to strip off one element from end and check the remains ---- + if (isTRUE(debug)) { + cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (7) try to strip off one element from end and check the remains\n")) + } + if (length(x_strip) > 1) { + for (i in seq_len(length(x_strip) - 1)) { + x_strip_collapsed <- paste(x_strip[seq_len(length(x_strip) - i)], collapse = " ") + if (nchar(x_strip_collapsed) >= 6) { + if (isTRUE(debug)) { + message("Running '", x_strip_collapsed, "'") + } + # first try without dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) + if (empty_result(found)) { + # then with dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) + } + + if (!empty_result(found)) { + found_result <- found + uncertainties <<- rbind(uncertainties, + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE) + found <- lookup(mo == found) + return(found) + } + } + } + } + # (8) check for unknown yeasts/fungi ---- + if (isTRUE(debug)) { + cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (8) check for unknown yeasts/fungi\n")) + } + if (b.x_trimmed %like_case% "yeast") { + found <- "F_YEAST" + found_result <- found + found <- lookup(mo == found) + uncertainties <<- rbind(uncertainties, + format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, + input = a.x_backup, + result_mo = found_result), + stringsAsFactors = FALSE) + return(found) + } + if (b.x_trimmed %like_case% "(fungus|fungi)" & !b.x_trimmed %like_case% "fungiphrya") { + found <- "F_FUNGUS" + found_result <- found + found <- lookup(mo == found) + uncertainties <<- rbind(uncertainties, + format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, + input = a.x_backup, + result_mo = found_result), + stringsAsFactors = FALSE) + return(found) + } + # (9) try to strip off one element from start and check the remains (only allow >= 2-part name outcome) ---- + if (isTRUE(debug)) { + cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (9) try to strip off one element from start and check the remains (only allow >= 2-part name outcome)\n")) + } + x_strip <- a.x_backup %pm>% strsplit("[ .]") %pm>% unlist() + if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) { + for (i in 2:(length(x_strip))) { + x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ") + if (isTRUE(debug)) { + message("Running '", x_strip_collapsed, "'") + } + # first try without dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) + if (empty_result(found)) { + # then with dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) + } + if (!empty_result(found)) { + found_result <- found + # uncertainty level 2 only if searched part contains a space (otherwise it will be found with lvl 3) + if (x_strip_collapsed %like_case% " ") { + uncertainties <<- rbind(uncertainties, + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE) + found <- lookup(mo == found) + return(found) + } + } + } + } + } + + # UNCERTAINTY LEVEL 3 ---- + if (uncertainty_level >= 3) { + now_checks_for_uncertainty_level <- 3 + + # (10) try to strip off one element from start and check the remains (any text size) ---- + if (isTRUE(debug)) { + cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (10) try to strip off one element from start and check the remains (any text size)\n")) + } + x_strip <- a.x_backup %pm>% strsplit("[ .]") %pm>% unlist() + if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) { + for (i in 2:(length(x_strip))) { + x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ") + if (isTRUE(debug)) { + message("Running '", x_strip_collapsed, "'") + } + # first try without dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 3, actual_input = a.x_backup))) + if (empty_result(found)) { + # then with dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 3, actual_input = a.x_backup))) + } + if (!empty_result(found)) { + found_result <- found + uncertainties <<- rbind(uncertainties, + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE) + found <- lookup(mo == found) + return(found) + } + } + } + # (11) try to strip off one element from end and check the remains (any text size) ---- + # (this is in fact 7 but without nchar limit of >=6) + if (isTRUE(debug)) { + cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (11) try to strip off one element from end and check the remains (any text size)\n")) + } + if (length(x_strip) > 1) { + for (i in seq_len(length(x_strip) - 1)) { + x_strip_collapsed <- paste(x_strip[seq_len(length(x_strip) - i)], collapse = " ") + if (isTRUE(debug)) { + message("Running '", x_strip_collapsed, "'") + } + # first try without dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 3, actual_input = a.x_backup))) + if (empty_result(found)) { + # then with dyslexia mode + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 3, actual_input = a.x_backup))) + } + if (!empty_result(found)) { + found_result <- found + uncertainties <<- rbind(uncertainties, + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE) + found <- lookup(mo == found) + return(found) + } + } + } + + # (12) part of a name (very unlikely match) ---- + if (isTRUE(debug)) { + cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (12) part of a name (very unlikely match)\n")) + } + if (isTRUE(debug)) { + message("Running '", f.x_withspaces_end_only, "'") + } + if (nchar(g.x_backup_without_spp) >= 6) { + found <- lookup(fullname_lower %like_case% f.x_withspaces_end_only, column = "mo") + if (!is.na(found)) { + found_result <- lookup(mo == found) + uncertainties <<- rbind(uncertainties, + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE) + found <- lookup(mo == found) + return(found) + } + } + } + + + # didn't found in uncertain results too + return(NA_character_) } - - - # didn't found in uncertain results too + + # uncertain results + x[i] <- uncertain_fn(a.x_backup = a.x_backup, + b.x_trimmed = b.x_trimmed, + d.x_withspaces_start_end = d.x_withspaces_start_end, + e.x_withspaces_start_only = e.x_withspaces_start_only, + f.x_withspaces_end_only = f.x_withspaces_end_only, + g.x_backup_without_spp = g.x_backup_without_spp, + uncertain.reference_data_to_use = MO_lookup) + if (!empty_result(x[i])) { + return(x[i]) + } + + # didn't found any return(NA_character_) } - - # uncertain results - x[i] <- uncertain_fn(a.x_backup = a.x_backup, - b.x_trimmed = b.x_trimmed, - d.x_withspaces_start_end = d.x_withspaces_start_end, - e.x_withspaces_start_only = e.x_withspaces_start_only, - f.x_withspaces_end_only = f.x_withspaces_end_only, - g.x_backup_without_spp = g.x_backup_without_spp, - uncertain.reference_data_to_use = MO_lookup) # MO_lookup[which(MO_lookup$prevalence %in% c(1, 2)), ]) + + # CHECK ALL IN ONE GO ---- + x[i] <- check_per_prevalence(data_to_check = MO_lookup, + data.old_to_check = MO.old_lookup, + a.x_backup = x_backup[i], + b.x_trimmed = x_trimmed[i], + c.x_trimmed_without_group = x_trimmed_without_group[i], + d.x_withspaces_start_end = x_withspaces_start_end[i], + e.x_withspaces_start_only = x_withspaces_start_only[i], + f.x_withspaces_end_only = x_withspaces_end_only[i], + g.x_backup_without_spp = x_backup_without_spp[i], + h.x_species = x_species[i], + i.x_trimmed_species = x_trimmed_species[i]) if (!empty_result(x[i])) { - return(x[i]) + next + } + + + # no results found: make them UNKNOWN ---- + x[i] <- lookup(mo == "UNKNOWN", uncertainty = -1) + if (initial_search == TRUE) { + failures <- c(failures, x_backup[i]) } - # x[i] <- uncertain_fn(a.x_backup = a.x_backup, - # b.x_trimmed = b.x_trimmed, - # d.x_withspaces_start_end = d.x_withspaces_start_end, - # e.x_withspaces_start_only = e.x_withspaces_start_only, - # f.x_withspaces_end_only = f.x_withspaces_end_only, - # g.x_backup_without_spp = g.x_backup_without_spp, - # uncertain.reference_data_to_use = MO_lookup[which(MO_lookup$prevalence == 3), ]) - # if (!empty_result(x[i])) { - # return(x[i]) - # } - - # didn't found any - return(NA_character_) } - - # CHECK ALL IN ONE GO ---- - x[i] <- check_per_prevalence(data_to_check = MO_lookup, - data.old_to_check = MO.old_lookup, - a.x_backup = x_backup[i], - b.x_trimmed = x_trimmed[i], - c.x_trimmed_without_group = x_trimmed_without_group[i], - d.x_withspaces_start_end = x_withspaces_start_end[i], - e.x_withspaces_start_only = x_withspaces_start_only[i], - f.x_withspaces_end_only = x_withspaces_end_only[i], - g.x_backup_without_spp = x_backup_without_spp[i], - h.x_species = x_species[i], - i.x_trimmed_species = x_trimmed_species[i]) - if (!empty_result(x[i])) { - next - } - - - # no results found: make them UNKNOWN ---- - x[i] <- lookup(mo == "UNKNOWN", uncertainty = -1) + if (initial_search == TRUE) { - failures <- c(failures, x_backup[i]) + close(progress) } - } - - if (initial_search == TRUE) { - close(progress) + + + # handling failures ---- + failures <- failures[!failures %in% c(NA, NULL, NaN)] + if (length(failures) > 0 & initial_search == TRUE) { + pkg_env$mo_failures <- sort(unique(failures)) + pkg_env$mo_failed <- c(pkg_env$mo_failed, pkg_env$mo_failures) + plural <- c("value", "it", "was") + if (pm_n_distinct(failures) > 1) { + plural <- c("values", "them", "were") + } + x_input_clean <- trimws2(x_input) + total_failures <- length(x_input_clean[as.character(x_input_clean) %in% as.character(failures) & !x_input %in% c(NA, NULL, NaN)]) + total_n <- length(x_input[!x_input %in% c(NA, NULL, NaN)]) + msg <- paste0(nr2char(pm_n_distinct(failures)), " unique ", plural[1], + " (covering ", percentage(total_failures / total_n), + ") could not be coerced and ", plural[3], " considered 'unknown'") + if (pm_n_distinct(failures) <= 10) { + msg <- paste0(msg, ": ", vector_and(failures, quotes = TRUE)) + } + msg <- paste0(msg, + ".\nUse `mo_failures()` to review ", plural[2], ". Edit the `allow_uncertain` argument if needed (see ?as.mo).\n", + "You can also use your own reference data with set_mo_source() or directly, e.g.:\n", + ' as.mo("mycode", reference_df = data.frame(own = "mycode", mo = "', MO_lookup$mo[match("Escherichia coli", MO_lookup$fullname)], '"))\n', + ' mo_name("mycode", reference_df = data.frame(own = "mycode", mo = "', MO_lookup$mo[match("Escherichia coli", MO_lookup$fullname)], '"))\n') + warning_(paste0("\n", msg), + add_fn = font_red, + call = FALSE, + immediate = TRUE) # thus will always be shown, even if >= warnings + } + # handling uncertainties ---- + if (NROW(uncertainties) > 0 & initial_search == TRUE) { + uncertainties <- as.list(pm_distinct(uncertainties, input, .keep_all = TRUE)) + pkg_env$mo_uncertainties <- uncertainties + + plural <- c("", "it", "was") + if (length(uncertainties$input) > 1) { + plural <- c("s", "them", "were") + } + msg <- paste0("Translation to ", nr2char(length(uncertainties$input)), " microorganism", plural[1], + " was guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".") + message_(msg) + } + x[already_known] <- x_known } } - - # handling failures ---- - failures <- failures[!failures %in% c(NA, NULL, NaN)] - if (length(failures) > 0 & initial_search == TRUE) { - pkg_env$mo_failures <- sort(unique(failures)) - pkg_env$mo_failed <- c(pkg_env$mo_failed, pkg_env$mo_failures) - plural <- c("value", "it", "was") - if (pm_n_distinct(failures) > 1) { - plural <- c("values", "them", "were") - } - x_input_clean <- trimws2(x_input) - total_failures <- length(x_input_clean[as.character(x_input_clean) %in% as.character(failures) & !x_input %in% c(NA, NULL, NaN)]) - total_n <- length(x_input[!x_input %in% c(NA, NULL, NaN)]) - msg <- paste0(nr2char(pm_n_distinct(failures)), " unique ", plural[1], - " (covering ", percentage(total_failures / total_n), - ") could not be coerced and ", plural[3], " considered 'unknown'") - if (pm_n_distinct(failures) <= 10) { - msg <- paste0(msg, ": ", vector_and(failures, quotes = TRUE)) - } - msg <- paste0(msg, - ".\nUse mo_failures() to review ", plural[2], ". Edit the `allow_uncertain` argument if needed (see ?as.mo).\n", - "You can also use your own reference data with set_mo_source() or directly, e.g.:\n", - ' as.mo("mycode", reference_df = data.frame(own = "mycode", mo = "', MO_lookup$mo[match("Escherichia coli", MO_lookup$fullname)], '"))\n', - ' mo_name("mycode", reference_df = data.frame(own = "mycode", mo = "', MO_lookup$mo[match("Escherichia coli", MO_lookup$fullname)], '"))\n') - warning_(paste0("\n", msg), - add_fn = font_red, - call = FALSE, - immediate = TRUE) # thus will always be shown, even if >= warnings - } - # handling uncertainties ---- - if (NROW(uncertainties) > 0 & initial_search == TRUE) { - uncertainties <- as.list(pm_distinct(uncertainties, input, .keep_all = TRUE)) - pkg_env$mo_uncertainties <- uncertainties - - plural <- c("", "it", "was") - if (length(uncertainties$input) > 1) { - plural <- c("s", "them", "were") - } - msg <- paste0("Translation to ", nr2char(length(uncertainties$input)), " microorganism", plural[1], - " was guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".") - message_(msg) - } - + # Becker ---- if (Becker == TRUE | Becker == "all") { # warn when species found that are not in: diff --git a/R/mo_property.R b/R/mo_property.R index 12758d5a..b7e459aa 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -172,7 +172,10 @@ mo_name <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - translate_AMR(mo_validate(x = x, property = "fullname", language = language, ...), language = language, only_unknown = FALSE) + translate_AMR(mo_validate(x = x, property = "fullname", language = language, ...), + language = language, + only_unknown = FALSE, + affect_mo_name = TRUE) } #' @rdname mo_property @@ -214,7 +217,7 @@ mo_shortname <- function(x, language = get_locale(), ...) { shortnames[is.na(x.mo)] <- NA_character_ load_mo_failures_uncertainties_renamed(metadata) - translate_AMR(shortnames, language = language, only_unknown = FALSE) + translate_AMR(shortnames, language = language, only_unknown = FALSE, affect_mo_name = TRUE) } #' @rdname mo_property diff --git a/R/sysdata.rda b/R/sysdata.rda index 9dccb9eb..f9f314f8 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/translate.R b/R/translate.R index 2ad6b02f..579fa8aa 100755 --- a/R/translate.R +++ b/R/translate.R @@ -123,7 +123,7 @@ coerce_language_setting <- function(lang) { } # translate strings based on inst/translations.tsv -translate_AMR <- function(from, language = get_locale(), only_unknown = FALSE) { +translate_AMR <- function(from, language = get_locale(), only_unknown = FALSE, affect_mo_name = FALSE) { if (is.null(language)) { return(from) @@ -146,10 +146,13 @@ translate_AMR <- function(from, language = get_locale(), only_unknown = FALSE) { if (only_unknown == TRUE) { df_trans <- subset(df_trans, pattern %like% "unknown") } + if (affect_mo_name == TRUE) { + df_trans <- subset(df_trans, affect_mo_name == TRUE) + } - # default case sensitive if value if 'ignore.case' is missing: + # default: case sensitive if value if 'ignore.case' is missing: df_trans$ignore.case[is.na(df_trans$ignore.case)] <- FALSE - # default not using regular expressions (fixed = TRUE) if 'fixed' is missing: + # default: not using regular expressions (fixed = TRUE) if 'fixed' is missing: df_trans$fixed[is.na(df_trans$fixed)] <- TRUE # check if text to look for is in one of the patterns @@ -167,7 +170,8 @@ translate_AMR <- function(from, language = get_locale(), only_unknown = FALSE) { replacement = df_trans$replacement[i], x = from_unique_translated, ignore.case = df_trans$ignore.case[i], - fixed = df_trans$fixed[i])) + fixed = df_trans$fixed[i], + perl = !df_trans$fixed[i])) # force UTF-8 for diacritics from_unique_translated <- enc2utf8(from_unique_translated) diff --git a/README.md b/README.md index d4d1592f..3dd3d815 100755 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ -The latest built **source package** (`AMR_x.x.x.tar.gz`) can be found in folder [/data-raw/](data-raw). +The latest built **source package** (`AMR_latest.tar.gz`) can be found in folder [/data-raw/](data-raw). `AMR` is a free, open-source and independent R package to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. Our aim is to provide a standard for clean and reproducible antimicrobial resistance data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. diff --git a/data-raw/AMR_1.5.0.9019.tar.gz b/data-raw/AMR_latest.tar.gz similarity index 59% rename from data-raw/AMR_1.5.0.9019.tar.gz rename to data-raw/AMR_latest.tar.gz index 1b685946..ee737731 100644 Binary files a/data-raw/AMR_1.5.0.9019.tar.gz and b/data-raw/AMR_latest.tar.gz differ diff --git a/data-raw/reproduction_of_microorganisms.R b/data-raw/reproduction_of_microorganisms.R index 7bacef5b..977fce7f 100644 --- a/data-raw/reproduction_of_microorganisms.R +++ b/data-raw/reproduction_of_microorganisms.R @@ -31,8 +31,9 @@ # Data retrieved from the Global Biodiversity Information Facility (GBIF): # https://doi.org/10.15468/rffz4x # -# And from the Leibniz Institute: German Collection of Microorganisms and Cell Cultures (DSMZ) -# (register first at https://bacdive.dsmz.de/api/pnu/registration/register/ and use API as done below) +# And from the List of Prokaryotic names with Standing in Nomenclature (LPSN) +# (register first) https://lpsn.dsmz.de/downloads +# download the latest CSV file. library(dplyr) library(AMR) diff --git a/data-raw/translations.tsv b/data-raw/translations.tsv index d6c9ada5..30380082 100644 --- a/data-raw/translations.tsv +++ b/data-raw/translations.tsv @@ -1,407 +1,412 @@ -lang pattern replacement fixed ignore.case -de Coagulase-negative Staphylococcus Koagulase-negative Staphylococcus FALSE FALSE -de Coagulase-positive Staphylococcus Koagulase-positive Staphylococcus FALSE FALSE -de Beta-haemolytic Streptococcus Beta-hämolytischer Streptococcus FALSE FALSE -de unknown Gram-negatives unbekannte Gramnegativen FALSE FALSE -de unknown Gram-positives unbekannte Grampositiven FALSE FALSE -de unknown name unbekannte Name FALSE FALSE -de unknown kingdom unbekanntes Reich FALSE FALSE -de unknown phylum unbekannter Stamm FALSE FALSE -de unknown class unbekannte Klasse FALSE FALSE -de unknown order unbekannte Ordnung FALSE FALSE -de unknown family unbekannte Familie FALSE FALSE -de unknown genus unbekannte Gattung FALSE FALSE -de unknown species unbekannte Art FALSE FALSE -de unknown subspecies unbekannte Unterart FALSE FALSE -de unknown rank unbekannter Rang FALSE FALSE -de CoNS KNS TRUE FALSE -de CoPS KPS TRUE FALSE -de Gram-negative Gramnegativ FALSE FALSE -de Gram-positive Grampositiv FALSE FALSE -de Bacteria Bakterien FALSE FALSE -de Fungi Pilze FALSE FALSE -de Yeasts Hefen FALSE FALSE -de Protozoa Protozoen FALSE FALSE -de biogroup Biogruppe FALSE FALSE -de biotype Biotyp FALSE FALSE -de vegetative vegetativ FALSE FALSE -de ([([ ]*?)group \\1Gruppe FALSE FALSE -de ([([ ]*?)Group \\1Gruppe FALSE FALSE -de no .*growth keine? .*wachstum FALSE TRUE -de no|not keine? FALSE TRUE - -nl Coagulase-negative Staphylococcus Coagulase-negatieve Staphylococcus FALSE FALSE -nl Coagulase-positive Staphylococcus Coagulase-positieve Staphylococcus FALSE FALSE -nl Beta-haemolytic Streptococcus Beta-hemolytische Streptococcus FALSE FALSE -nl unknown Gram-negatives onbekende Gram-negatieven FALSE FALSE -nl unknown Gram-positives onbekende Gram-positieven FALSE FALSE -nl unknown name onbekende naam FALSE FALSE -nl unknown kingdom onbekend koninkrijk FALSE FALSE -nl unknown phylum onbekend fylum FALSE FALSE -nl unknown class onbekende klasse FALSE FALSE -nl unknown order onbekende orde FALSE FALSE -nl unknown family onbekende familie FALSE FALSE -nl unknown genus onbekend geslacht FALSE FALSE -nl unknown species onbekende soort FALSE FALSE -nl unknown subspecies onbekende ondersoort FALSE FALSE -nl unknown rank onbekende rang FALSE FALSE -nl CoNS CNS TRUE FALSE -nl CoPS CPS TRUE FALSE -nl Gram-negative Gram-negatief FALSE FALSE -nl Gram-positive Gram-positief FALSE FALSE -nl Bacteria Bacteriën FALSE FALSE -nl Fungi Schimmels FALSE FALSE -nl Yeasts Gisten FALSE FALSE -nl Protozoa Protozoën FALSE FALSE -nl biogroup biogroep FALSE FALSE -nl vegetative vegetatief FALSE FALSE -nl ([([ ]*?)group \\1groep FALSE FALSE -nl ([([ ]*?)Group \\1Groep FALSE FALSE -nl antibiotic antibioticum FALSE FALSE -nl Antibiotic Antibioticum FALSE FALSE -nl Drug Middel FALSE FALSE -nl drug middel FALSE FALSE -nl no .*growth geen .*groei FALSE TRUE -nl no|not geen|niet FALSE TRUE - -es Coagulase-negative Staphylococcus Staphylococcus coagulasa negativo FALSE FALSE -es Coagulase-positive Staphylococcus Staphylococcus coagulasa positivo FALSE FALSE -es Beta-haemolytic Streptococcus Streptococcus Beta-hemolítico FALSE FALSE -es unknown Gram-negatives Gram negativos desconocidos FALSE FALSE -es unknown Gram-positives Gram positivos desconocidos FALSE FALSE -es unknown name nombre desconocido FALSE FALSE -es unknown kingdom reino desconocido FALSE FALSE -es unknown phylum filo desconocido FALSE FALSE -es unknown class clase desconocida FALSE FALSE -es unknown order orden desconocido FALSE FALSE -es unknown family familia desconocida FALSE FALSE -es unknown genus género desconocido FALSE FALSE -es unknown species especie desconocida FALSE FALSE -es unknown subspecies subespecie desconocida FALSE FALSE -es unknown rank rango desconocido FALSE FALSE -es CoNS SCN TRUE FALSE -es CoPS SCP TRUE FALSE -es Gram-negative Gram negativo FALSE FALSE -es Gram-positive Gram positivo FALSE FALSE -es Bacteria Bacterias FALSE FALSE -es Fungi Hongos FALSE FALSE -es Yeasts Levaduras FALSE FALSE -es Protozoa Protozoarios FALSE FALSE -es biogroup biogrupo FALSE FALSE -es biotype biotipo FALSE FALSE -es vegetative vegetativo FALSE FALSE -es ([([ ]*?)group \\1grupo FALSE FALSE -es ([([ ]*?)Group \\1Grupo FALSE FALSE -es no .*growth no .*crecimientonon FALSE TRUE -es no|not no|sin FALSE TRUE - -it Coagulase-negative Staphylococcus Staphylococcus negativo coagulasi FALSE FALSE -it Coagulase-positive Staphylococcus Staphylococcus positivo coagulasi FALSE FALSE -it Beta-haemolytic Streptococcus Streptococcus Beta-emolitico FALSE FALSE -it unknown Gram-negatives Gram negativi sconosciuti FALSE FALSE -it unknown Gram-positives Gram positivi sconosciuti FALSE FALSE -it unknown name nome sconosciuto FALSE FALSE -it unknown kingdom regno sconosciuto FALSE FALSE -it unknown phylum phylum sconosciuto FALSE FALSE -it unknown class classe sconosciuta FALSE FALSE -it unknown order ordine sconosciuto FALSE FALSE -it unknown family famiglia sconosciuta FALSE FALSE -it unknown genus genere sconosciuto FALSE FALSE -it unknown species specie sconosciute FALSE FALSE -it unknown subspecies sottospecie sconosciute FALSE FALSE -it unknown rank grado sconosciuto FALSE FALSE -it Gram-negative Gram negativo FALSE FALSE -it Gram-positive Gram positivo FALSE FALSE -it Bacteria Batteri FALSE FALSE -it Fungi Funghi FALSE FALSE -it Yeasts Lieviti FALSE FALSE -it Protozoa Protozoi FALSE FALSE -it biogroup biogruppo FALSE FALSE -it biotype biotipo FALSE FALSE -it vegetative vegetativo FALSE FALSE -it ([([ ]*?)group \\1gruppo FALSE FALSE -it ([([ ]*?)Group \\1Gruppo FALSE FALSE -it no .*growth sem .*crescimento FALSE TRUE -it no|not sem FALSE TRUE - -fr Coagulase-negative Staphylococcus Staphylococcus à coagulase négative FALSE FALSE -fr Coagulase-positive Staphylococcus Staphylococcus à coagulase positif FALSE FALSE -fr Beta-haemolytic Streptococcus Streptococcus Bêta-hémolytique FALSE FALSE -fr unknown Gram-negatives Gram négatifs inconnus FALSE FALSE -fr unknown Gram-positives Gram positifs inconnus FALSE FALSE -fr unknown name nom inconnu FALSE FALSE -fr unknown kingdom règme inconnu FALSE FALSE -fr unknown phylum embranchement inconnu FALSE FALSE -fr unknown class classe inconnue FALSE FALSE -fr unknown order ordre inconnu FALSE FALSE -fr unknown family famille inconnue FALSE FALSE -fr unknown genus genre inconnu FALSE FALSE -fr unknown species espèce inconnue FALSE FALSE -fr unknown subspecies sous-espèce inconnue FALSE FALSE -fr unknown rank rang inconnu FALSE FALSE -fr Gram-negative Gram négatif FALSE FALSE -fr Gram-positive Gram positif FALSE FALSE -fr Bacteria Bactéries FALSE FALSE -fr Fungi Champignons FALSE FALSE -fr Yeasts Levures FALSE FALSE -fr Protozoa Protozoaires FALSE FALSE -fr biogroup biogroupe FALSE FALSE -fr vegetative végétatif FALSE FALSE -fr ([([ ]*?)group \\1groupe FALSE FALSE -fr ([([ ]*?)Group \\1Groupe FALSE FALSE -fr no .*growth pas .*croissance FALSE TRUE -fr no|not non FALSE TRUE - -pt Coagulase-negative Staphylococcus Staphylococcus coagulase negativo FALSE FALSE -pt Coagulase-positive Staphylococcus Staphylococcus coagulase positivo FALSE FALSE -pt Beta-haemolytic Streptococcus Streptococcus Beta-hemolítico FALSE FALSE -pt unknown Gram-negatives Gram negativos desconhecidos FALSE FALSE -pt unknown Gram-positives Gram positivos desconhecidos FALSE FALSE -pt unknown name nome desconhecido FALSE FALSE -pt unknown kingdom reino desconhecido FALSE FALSE -pt unknown phylum filo desconhecido FALSE FALSE -pt unknown class classe desconhecida FALSE FALSE -pt unknown order ordem desconhecido FALSE FALSE -pt unknown family família desconhecida FALSE FALSE -pt unknown genus gênero desconhecido FALSE FALSE -pt unknown species espécies desconhecida FALSE FALSE -pt unknown subspecies subespécies desconhecida FALSE FALSE -pt unknown rank classificação desconhecido FALSE FALSE -pt Gram-negative Gram negativo FALSE FALSE -pt Gram-positive Gram positivo FALSE FALSE -pt Bacteria Bactérias FALSE FALSE -pt Fungi Fungos FALSE FALSE -pt Yeasts Leveduras FALSE FALSE -pt Protozoa Protozoários FALSE FALSE -pt biogroup biogrupo FALSE FALSE -pt biotype biótipo FALSE FALSE -pt vegetative vegetativo FALSE FALSE -pt ([([ ]*?)group \\1grupo FALSE FALSE -pt ([([ ]*?)Group \\1Grupo FALSE FALSE -pt no .*growth sem .*crescimento FALSE TRUE -pt no|not sem FALSE TRUE - -de clavulanic acid Clavulansäure FALSE TRUE - -nl 4-aminosalicylic acid 4-aminosalicylzuur -nl Adefovir dipivoxil Adefovir -nl Aldesulfone sodium Aldesulfon -nl Amikacin Amikacine -nl Amoxicillin Amoxicilline -nl Amoxicillin/beta-lactamase inhibitor Amoxicilline/enzymremmer -nl Amphotericin B Amfotericine B -nl Ampicillin Ampicilline -nl Ampicillin/beta-lactamase inhibitor Ampicilline/enzymremmer -nl Anidulafungin Anidulafungine -nl Azidocillin Azidocilline -nl Azithromycin Azitromycine -nl Azlocillin Azlocilline -nl Bacampicillin Bacampicilline -nl Bacitracin Bacitracine -nl Benzathine benzylpenicillin Benzylpenicillinebenzathine -nl Benzathine phenoxymethylpenicillin Fenoxymethylpenicillinebenzathine -nl Benzylpenicillin Benzylpenicilline -nl Calcium aminosalicylate Aminosalicylzuur -nl Capreomycin Capreomycine -nl Carbenicillin Carbenicilline -nl Carindacillin Carindacilline -nl Caspofungin Caspofungine -nl Ce(f|ph)acetrile Cefacetril FALSE -nl Ce(f|ph)alexin Cefalexine FALSE FALSE -nl Ce(f|ph)alotin Cefalotine FALSE -nl Ce(f|ph)amandole Cefamandol FALSE -nl Ce(f|ph)apirin Cefapirine FALSE -nl Ce(f|ph)azedone Cefazedon FALSE -nl Ce(f|ph)azolin Cefazoline FALSE -nl Ce(f|ph)alothin Cefalotine FALSE -nl Ce(f|ph)alexin Cefalexine FALSE -nl Ce(f|ph)epime Cefepim FALSE -nl Ce(f|ph)ixime Cefixim FALSE -nl Ce(f|ph)menoxime Cefmenoxim FALSE -nl Ce(f|ph)metazole Cefmetazol FALSE -nl Ce(f|ph)odizime Cefodizim FALSE -nl Ce(f|ph)onicid Cefonicide FALSE -nl Ce(f|ph)operazone Cefoperazon FALSE -nl Ce(f|ph)operazone/beta-lactamase inhibitor Cefoperazon/enzymremmer FALSE -nl Ce(f|ph)otaxime Cefotaxim FALSE -nl Ce(f|ph)oxitin Cefoxitine FALSE -nl Ce(f|ph)pirome Cefpirom FALSE -nl Ce(f|ph)podoxime Cefpodoxim FALSE -nl Ce(f|ph)radine Cefradine FALSE -nl Ce(f|ph)sulodin Cefsulodine FALSE -nl Ce(f|ph)tazidime Ceftazidim FALSE -nl Ce(f|ph)tezole Ceftezol FALSE -nl Ce(f|ph)tizoxime Ceftizoxim FALSE -nl Ce(f|ph)triaxone Ceftriaxon FALSE -nl Ce(f|ph)uroxime Cefuroxim FALSE -nl Ce(f|ph)uroxime/metronidazole Cefuroxim/andere antibacteriele middelen FALSE -nl Chloramphenicol Chlooramfenicol -nl Chlortetracycline Chloortetracycline -nl Cinoxacin Cinoxacine -nl Ciprofloxacin Ciprofloxacine -nl Clarithromycin Claritromycine -nl Clavulanic acid Clavulaanzuur -nl clavulanic acid clavulaanzuur -nl Clindamycin Clindamycine -nl Clometocillin Clometocilline -nl Clotrimazole Clotrimazol -nl Cloxacillin Cloxacilline -nl Colistin Colistine -nl Dapsone Dapson -nl Daptomycin Daptomycine -nl Dibekacin Dibekacine -nl Dicloxacillin Dicloxacilline -nl Dirithromycin Diritromycine -nl Econazole Econazol -nl Enoxacin Enoxacine -nl Epicillin Epicilline -nl Erythromycin Erytromycine -nl Ethambutol/isoniazid Ethambutol/isoniazide -nl Fleroxacin Fleroxacine -nl Flucloxacillin Flucloxacilline -nl Fluconazole Fluconazol -nl Flucytosine Fluorocytosine -nl Flurithromycin Fluritromycine -nl Fosfomycin Fosfomycine -nl Fusidic acid Fusidinezuur -nl Gatifloxacin Gatifloxacine -nl Gemifloxacin Gemifloxacine -nl Gentamicin Gentamicine -nl Grepafloxacin Grepafloxacine -nl Hachimycin Hachimycine -nl Hetacillin Hetacilline -nl Imipenem/cilastatin Imipenem/enzymremmer -nl Inosine pranobex Inosiplex -nl Isepamicin Isepamicine -nl Isoconazole Isoconazol -nl Isoniazid Isoniazide -nl Itraconazole Itraconazol -nl Josamycin Josamycine -nl Kanamycin Kanamycine -nl Ketoconazole Ketoconazol -nl Levofloxacin Levofloxacine -nl Lincomycin Lincomycine -nl Lomefloxacin Lomefloxacine -nl Lysozyme Lysozym -nl Mandelic acid Amandelzuur -nl Metampicillin Metampicilline -nl Meticillin Meticilline -nl Metisazone Metisazon -nl Metronidazole Metronidazol -nl Mezlocillin Mezlocilline -nl Micafungin Micafungine -nl Miconazole Miconazol -nl Midecamycin Midecamycine -nl Miocamycin Miocamycine -nl Moxifloxacin Moxifloxacine -nl Mupirocin Mupirocine -nl Nalidixic acid Nalidixinezuur -nl Neomycin Neomycine -nl Netilmicin Netilmicine -nl Nitrofurantoin Nitrofurantoine -nl Norfloxacin Norfloxacine -nl Novobiocin Novobiocine -nl Nystatin Nystatine -nl Ofloxacin Ofloxacine -nl Oleandomycin Oleandomycine -nl Ornidazole Ornidazol -nl Oxacillin Oxacilline -nl Oxolinic acid Oxolinezuur -nl Oxytetracycline Oxytetracycline -nl Pazufloxacin Pazufloxacine -nl Pefloxacin Pefloxacine -nl Penamecillin Penamecilline -nl Penicillin Penicilline -nl Pheneticillin Feneticilline -nl Phenoxymethylpenicillin Fenoxymethylpenicilline -nl Pipemidic acid Pipemidinezuur -nl Piperacillin Piperacilline -nl Piperacillin/beta-lactamase inhibitor Piperacilline/enzymremmer -nl Piromidic acid Piromidinezuur -nl Pivampicillin Pivampicilline -nl Polymyxin B Polymyxine B -nl Posaconazole Posaconazol -nl Pristinamycin Pristinamycine -nl Procaine benzylpenicillin Benzylpenicillineprocaine -nl Propicillin Propicilline -nl Prulifloxacin Prulifloxacine -nl Quinupristin/dalfopristin Quinupristine/dalfopristine -nl Ribostamycin Ribostamycine -nl Rifabutin Rifabutine -nl Rifampicin Rifampicine -nl Rifampicin/pyrazinamide/ethambutol/isoniazid Rifampicine/pyrazinamide/ethambutol/isoniazide -nl Rifampicin/pyrazinamide/isoniazid Rifampicine/pyrazinamide/isoniazide -nl Rifampicin/isoniazid Rifampicine/isoniazide -nl Rifamycin Rifamycine -nl Rifaximin Rifaximine -nl Rokitamycin Rokitamycine -nl Rosoxacin Rosoxacine -nl Roxithromycin Roxitromycine -nl Rufloxacin Rufloxacine -nl Sisomicin Sisomicine -nl Sodium aminosalicylate Aminosalicylzuur -nl Sparfloxacin Sparfloxacine -nl Spectinomycin Spectinomycine -nl Spiramycin Spiramycine -nl Spiramycin/metronidazole Spiramycine/metronidazol -nl Staphylococcus immunoglobulin Stafylokokkenimmunoglobuline -nl Streptoduocin Streptoduocine -nl Streptomycin Streptomycine -nl Streptomycin/isoniazid Streptomycine/isoniazide -nl Sulbenicillin Sulbenicilline -nl Sulfadiazine/tetroxoprim Sulfadiazine/tetroxoprim -nl Sulfadiazine/trimethoprim Sulfadiazine/trimethoprim -nl Sulfadimidine/trimethoprim Sulfadimidine/trimethoprim -nl Sulfafurazole Sulfafurazol -nl Sulfaisodimidine Sulfisomidine -nl Sulfalene Sulfaleen -nl Sulfamazone Sulfamazon -nl Sulfamerazine/trimethoprim Sulfamerazine/trimethoprim -nl Sulfamethizole Sulfamethizol -nl Sulfamethoxazole Sulfamethoxazol -nl Sulfamethoxazole/trimethoprim Sulfamethoxazol/trimethoprim -nl Sulfametoxydiazine Sulfamethoxydiazine -nl Sulfametrole/trimethoprim Sulfametrol/trimethoprim -nl Sulfamoxole Sulfamoxol -nl Sulfamoxole/trimethoprim Sulfamoxol/trimethoprim -nl Sulfaperin Sulfaperine -nl Sulfaphenazole Sulfafenazol -nl Sulfathiazole Sulfathiazol -nl Sulfathiourea Sulfathioureum -nl Sultamicillin Sultamicilline -nl Talampicillin Talampicilline -nl Teicoplanin Teicoplanine -nl Telithromycin Telitromycine -nl Temafloxacin Temafloxacine -nl Temocillin Temocilline -nl Tenofovir disoproxil Tenofovir -nl Terizidone Terizidon -nl Thiamphenicol Thiamfenicol -nl Thioacetazone/isoniazid Thioacetazon/isoniazide -nl Ticarcillin Ticarcilline -nl Ticarcillin/beta-lactamase inhibitor Ticarcilline/enzymremmer -nl Ticarcillin/clavulanic acid Ticarcilline/clavulaanzuur -nl Tinidazole Tinidazol -nl Tobramycin Tobramycine -nl Trimethoprim/sulfamethoxazole Cotrimoxazol -nl Troleandomycin Troleandomycine -nl Trovafloxacin Trovafloxacine -nl Vancomycin Vancomycine -nl Voriconazole Voriconazol -nl Aminoglycosides Aminoglycosiden TRUE FALSE -nl Amphenicols Amfenicolen TRUE FALSE -nl Antifungals/antimycotics Antifungica/antimycotica TRUE FALSE -nl Antimycobacterials Antimycobacteriele middelen TRUE FALSE -nl Beta-lactams/penicillins Beta-lactams/penicillines TRUE FALSE -nl Cephalosporins (1st gen.) Cefalosporines (1e gen.) TRUE FALSE -nl Cephalosporins (2nd gen.) Cefalosporines (2e gen.) TRUE FALSE -nl Cephalosporins (3rd gen.) Cefalosporines (3e gen.) TRUE FALSE -nl Cephalosporins (4th gen.) Cefalosporines (4e gen.) TRUE FALSE -nl Cephalosporins (5th gen.) Cefalosporines (5e gen.) TRUE FALSE -nl Cephalosporins (unclassified gen.) Cefalosporines (ongeclassificeerd) TRUE FALSE -nl Cephalosporins Cefalosporines TRUE FALSE -nl Glycopeptides Glycopeptiden TRUE FALSE -nl Macrolides/lincosamides Macroliden/lincosamiden TRUE FALSE -nl Other antibacterials Overige antibiotica TRUE FALSE -nl Polymyxins Polymyxines TRUE FALSE -nl Quinolones Quinolonen TRUE FALSE +lang pattern replacement fixed ignore.case affect_mo_name +de Coagulase-negative Staphylococcus Koagulase-negative Staphylococcus FALSE FALSE TRUE +de Coagulase-positive Staphylococcus Koagulase-positive Staphylococcus FALSE FALSE TRUE +de Beta-haemolytic Streptococcus Beta-hämolytischer Streptococcus FALSE FALSE TRUE +de unknown Gram-negatives unbekannte Gramnegativen FALSE FALSE TRUE +de unknown Gram-positives unbekannte Grampositiven FALSE FALSE TRUE +de unknown fungus unbekannter Pilze FALSE FALSE TRUE +de unknown yeast unbekannte Hefe FALSE FALSE TRUE +de unknown name unbekannte Name FALSE FALSE TRUE +de unknown kingdom unbekanntes Reich FALSE FALSE TRUE +de unknown phylum unbekannter Stamm FALSE FALSE TRUE +de unknown class unbekannte Klasse FALSE FALSE TRUE +de unknown order unbekannte Ordnung FALSE FALSE TRUE +de unknown family unbekannte Familie FALSE FALSE TRUE +de unknown genus unbekannte Gattung FALSE FALSE TRUE +de unknown species unbekannte Art FALSE FALSE TRUE +de unknown subspecies unbekannte Unterart FALSE FALSE TRUE +de unknown rank unbekannter Rang FALSE FALSE TRUE +de CoNS KNS TRUE FALSE TRUE +de CoPS KPS TRUE FALSE TRUE +de Gram-negative Gramnegativ FALSE FALSE FALSE +de Gram-positive Grampositiv FALSE FALSE FALSE +de ^Bacteria$ Bakterien FALSE FALSE FALSE +de ^Fungi$ Pilze FALSE FALSE FALSE +de ^Yeasts$ Hefen FALSE FALSE FALSE +de ^Protozoa$ Protozoen FALSE FALSE FALSE +de biogroup Biogruppe FALSE FALSE FALSE +de biotype Biotyp FALSE FALSE FALSE +de vegetative vegetativ FALSE FALSE FALSE +de ([([ ]*?)group \\1Gruppe FALSE FALSE FALSE +de ([([ ]*?)Group \\1Gruppe FALSE FALSE FALSE +de no .*growth keine? .*wachstum FALSE TRUE FALSE +de (^| )no|not keine? FALSE TRUE FALSE +nl Coagulase-negative Staphylococcus Coagulase-negatieve Staphylococcus FALSE FALSE TRUE +nl Coagulase-positive Staphylococcus Coagulase-positieve Staphylococcus FALSE FALSE TRUE +nl Beta-haemolytic Streptococcus Beta-hemolytische Streptococcus FALSE FALSE TRUE +nl unknown Gram-negatives onbekende Gram-negatieven FALSE FALSE TRUE +nl unknown Gram-positives onbekende Gram-positieven FALSE FALSE TRUE +nl unknown fungus onbekende schimmel FALSE FALSE TRUE +nl unknown yeast onbekende gist FALSE FALSE TRUE +nl unknown name onbekende naam FALSE FALSE TRUE +nl unknown kingdom onbekend koninkrijk FALSE FALSE TRUE +nl unknown phylum onbekend fylum FALSE FALSE TRUE +nl unknown class onbekende klasse FALSE FALSE TRUE +nl unknown order onbekende orde FALSE FALSE TRUE +nl unknown family onbekende familie FALSE FALSE TRUE +nl unknown genus onbekend geslacht FALSE FALSE TRUE +nl unknown species onbekende soort FALSE FALSE TRUE +nl unknown subspecies onbekende ondersoort FALSE FALSE TRUE +nl unknown rank onbekende rang FALSE FALSE TRUE +nl CoNS CNS TRUE FALSE TRUE +nl CoPS CPS TRUE FALSE TRUE +nl Gram-negative Gram-negatief FALSE FALSE FALSE +nl Gram-positive Gram-positief FALSE FALSE FALSE +nl ^Bacteria$ Bacteriën FALSE FALSE FALSE +nl ^Fungi$ Schimmels FALSE FALSE FALSE +nl ^Yeasts$ Gisten FALSE FALSE FALSE +nl ^Protozoa$ Protozoën FALSE FALSE FALSE +nl biogroup biogroep FALSE FALSE FALSE +nl vegetative vegetatief FALSE FALSE FALSE +nl ([([ ]*?)group \\1groep FALSE FALSE FALSE +nl ([([ ]*?)Group \\1Groep FALSE FALSE FALSE +nl antibiotic antibioticum FALSE FALSE FALSE +nl Antibiotic Antibioticum FALSE FALSE FALSE +nl Drug Middel FALSE FALSE FALSE +nl drug middel FALSE FALSE FALSE +nl no .*growth geen .*groei FALSE TRUE FALSE +nl no|not geen|niet FALSE TRUE FALSE +es Coagulase-negative Staphylococcus Staphylococcus coagulasa negativo FALSE FALSE TRUE +es Coagulase-positive Staphylococcus Staphylococcus coagulasa positivo FALSE FALSE TRUE +es Beta-haemolytic Streptococcus Streptococcus Beta-hemolítico FALSE FALSE TRUE +es unknown Gram-negatives Gram negativos desconocidos FALSE FALSE TRUE +es unknown Gram-positives Gram positivos desconocidos FALSE FALSE TRUE +es unknown fungus hongo desconocido FALSE FALSE TRUE +es unknown yeast levadura desconocida FALSE FALSE TRUE +es unknown name nombre desconocido FALSE FALSE TRUE +es unknown kingdom reino desconocido FALSE FALSE TRUE +es unknown phylum filo desconocido FALSE FALSE TRUE +es unknown class clase desconocida FALSE FALSE TRUE +es unknown order orden desconocido FALSE FALSE TRUE +es unknown family familia desconocida FALSE FALSE TRUE +es unknown genus género desconocido FALSE FALSE TRUE +es unknown species especie desconocida FALSE FALSE TRUE +es unknown subspecies subespecie desconocida FALSE FALSE TRUE +es unknown rank rango desconocido FALSE FALSE TRUE +es CoNS SCN TRUE FALSE TRUE +es CoPS SCP TRUE FALSE TRUE +es Gram-negative Gram negativo FALSE FALSE FALSE +es Gram-positive Gram positivo FALSE FALSE FALSE +es ^Bacteria$ Bacterias FALSE FALSE FALSE +es ^Fungi$ Hongos FALSE FALSE FALSE +es ^Yeasts$ Levaduras FALSE FALSE FALSE +es ^Protozoa$ Protozoarios FALSE FALSE FALSE +es biogroup biogrupo FALSE FALSE FALSE +es biotype biotipo FALSE FALSE FALSE +es vegetative vegetativo FALSE FALSE FALSE +es ([([ ]*?)group \\1grupo FALSE FALSE FALSE +es ([([ ]*?)Group \\1Grupo FALSE FALSE FALSE +es no .*growth no .*crecimientonon FALSE TRUE FALSE +es no|not no|sin FALSE TRUE FALSE +it Coagulase-negative Staphylococcus Staphylococcus negativo coagulasi FALSE FALSE TRUE +it Coagulase-positive Staphylococcus Staphylococcus positivo coagulasi FALSE FALSE TRUE +it Beta-haemolytic Streptococcus Streptococcus Beta-emolitico FALSE FALSE TRUE +it unknown Gram-negatives Gram negativi sconosciuti FALSE FALSE TRUE +it unknown Gram-positives Gram positivi sconosciuti FALSE FALSE TRUE +it unknown fungus fungo sconosciuto FALSE FALSE TRUE +it unknown yeast lievito sconosciuto FALSE FALSE TRUE +it unknown name nome sconosciuto FALSE FALSE TRUE +it unknown kingdom regno sconosciuto FALSE FALSE TRUE +it unknown phylum phylum sconosciuto FALSE FALSE TRUE +it unknown class classe sconosciuta FALSE FALSE TRUE +it unknown order ordine sconosciuto FALSE FALSE TRUE +it unknown family famiglia sconosciuta FALSE FALSE TRUE +it unknown genus genere sconosciuto FALSE FALSE TRUE +it unknown species specie sconosciute FALSE FALSE TRUE +it unknown subspecies sottospecie sconosciute FALSE FALSE TRUE +it unknown rank grado sconosciuto FALSE FALSE TRUE +it Gram-negative Gram negativo FALSE FALSE FALSE +it Gram-positive Gram positivo FALSE FALSE FALSE +it ^Bacteria$ Batteri FALSE FALSE FALSE +it ^Fungi$ Funghi FALSE FALSE FALSE +it ^Yeasts$ Lieviti FALSE FALSE FALSE +it ^Protozoa$ Protozoi FALSE FALSE FALSE +it biogroup biogruppo FALSE FALSE FALSE +it biotype biotipo FALSE FALSE FALSE +it vegetative vegetativo FALSE FALSE FALSE +it ([([ ]*?)group \\1gruppo FALSE FALSE FALSE +it ([([ ]*?)Group \\1Gruppo FALSE FALSE FALSE +it no .*growth sem .*crescimento FALSE TRUE FALSE +it no|not sem FALSE TRUE FALSE +fr Coagulase-negative Staphylococcus Staphylococcus à coagulase négative FALSE FALSE TRUE +fr Coagulase-positive Staphylococcus Staphylococcus à coagulase positif FALSE FALSE TRUE +fr Beta-haemolytic Streptococcus Streptococcus Bêta-hémolytique FALSE FALSE TRUE +fr unknown Gram-negatives Gram négatifs inconnus FALSE FALSE TRUE +fr unknown Gram-positives Gram positifs inconnus FALSE FALSE TRUE +fr unknown fungus champignon inconnu FALSE FALSE TRUE +fr unknown yeast levure inconnue FALSE FALSE TRUE +fr unknown name nom inconnu FALSE FALSE TRUE +fr unknown kingdom règme inconnu FALSE FALSE TRUE +fr unknown phylum embranchement inconnu FALSE FALSE TRUE +fr unknown class classe inconnue FALSE FALSE TRUE +fr unknown order ordre inconnu FALSE FALSE TRUE +fr unknown family famille inconnue FALSE FALSE TRUE +fr unknown genus genre inconnu FALSE FALSE TRUE +fr unknown species espèce inconnue FALSE FALSE TRUE +fr unknown subspecies sous-espèce inconnue FALSE FALSE TRUE +fr unknown rank rang inconnu FALSE FALSE TRUE +fr Gram-negative Gram négatif FALSE FALSE FALSE +fr Gram-positive Gram positif FALSE FALSE FALSE +fr ^Bacteria$ Bactéries FALSE FALSE FALSE +fr ^Fungi$ Champignons FALSE FALSE FALSE +fr ^Yeasts$ Levures FALSE FALSE FALSE +fr ^Protozoa$ Protozoaires FALSE FALSE FALSE +fr biogroup biogroupe FALSE FALSE FALSE +fr vegetative végétatif FALSE FALSE FALSE +fr ([([ ]*?)group \\1groupe FALSE FALSE FALSE +fr ([([ ]*?)Group \\1Groupe FALSE FALSE FALSE +fr no .*growth pas .*croissance FALSE TRUE FALSE +fr no|not non FALSE TRUE FALSE +pt Coagulase-negative Staphylococcus Staphylococcus coagulase negativo FALSE FALSE TRUE +pt Coagulase-positive Staphylococcus Staphylococcus coagulase positivo FALSE FALSE TRUE +pt Beta-haemolytic Streptococcus Streptococcus Beta-hemolítico FALSE FALSE TRUE +pt unknown Gram-negatives Gram negativos desconhecidos FALSE FALSE TRUE +pt unknown Gram-positives Gram positivos desconhecidos FALSE FALSE TRUE +pt unknown fungus fungo desconhecido FALSE FALSE TRUE +pt unknown yeast levedura desconhecida FALSE FALSE TRUE +pt unknown name nome desconhecido FALSE FALSE TRUE +pt unknown kingdom reino desconhecido FALSE FALSE TRUE +pt unknown phylum filo desconhecido FALSE FALSE TRUE +pt unknown class classe desconhecida FALSE FALSE TRUE +pt unknown order ordem desconhecido FALSE FALSE TRUE +pt unknown family família desconhecida FALSE FALSE TRUE +pt unknown genus gênero desconhecido FALSE FALSE TRUE +pt unknown species espécies desconhecida FALSE FALSE TRUE +pt unknown subspecies subespécies desconhecida FALSE FALSE TRUE +pt unknown rank classificação desconhecido FALSE FALSE TRUE +pt Gram-negative Gram negativo FALSE FALSE FALSE +pt Gram-positive Gram positivo FALSE FALSE FALSE +pt ^Bacteria$ Bactérias FALSE FALSE FALSE +pt ^Fungi$ Fungos FALSE FALSE FALSE +pt ^Yeasts$ Leveduras FALSE FALSE FALSE +pt ^Protozoa$ Protozoários FALSE FALSE FALSE +pt biogroup biogrupo FALSE FALSE FALSE +pt biotype biótipo FALSE FALSE FALSE +pt vegetative vegetativo FALSE FALSE FALSE +pt ([([ ]*?)group \\1grupo FALSE FALSE FALSE +pt ([([ ]*?)Group \\1Grupo FALSE FALSE FALSE +pt no .*growth sem .*crescimento FALSE TRUE FALSE +pt no|not sem FALSE TRUE FALSE +de clavulanic acid Clavulansäure FALSE TRUE FALSE +nl 4-aminosalicylic acid 4-aminosalicylzuur TRUE FALSE FALSE +nl Adefovir dipivoxil Adefovir TRUE FALSE FALSE +nl Aldesulfone sodium Aldesulfon TRUE FALSE FALSE +nl Amikacin Amikacine TRUE FALSE FALSE +nl Amoxicillin Amoxicilline TRUE FALSE FALSE +nl Amoxicillin/beta-lactamase inhibitor Amoxicilline/enzymremmer TRUE FALSE FALSE +nl Amphotericin B Amfotericine B TRUE FALSE FALSE +nl Ampicillin Ampicilline TRUE FALSE FALSE +nl Ampicillin/beta-lactamase inhibitor Ampicilline/enzymremmer TRUE FALSE FALSE +nl Anidulafungin Anidulafungine TRUE FALSE FALSE +nl Azidocillin Azidocilline TRUE FALSE FALSE +nl Azithromycin Azitromycine TRUE FALSE FALSE +nl Azlocillin Azlocilline TRUE FALSE FALSE +nl Bacampicillin Bacampicilline TRUE FALSE FALSE +nl Bacitracin Bacitracine TRUE FALSE FALSE +nl Benzathine benzylpenicillin Benzylpenicillinebenzathine TRUE FALSE FALSE +nl Benzathine phenoxymethylpenicillin Fenoxymethylpenicillinebenzathine TRUE FALSE FALSE +nl Benzylpenicillin Benzylpenicilline TRUE FALSE FALSE +nl Calcium aminosalicylate Aminosalicylzuur TRUE FALSE FALSE +nl Capreomycin Capreomycine TRUE FALSE FALSE +nl Carbenicillin Carbenicilline TRUE FALSE FALSE +nl Carindacillin Carindacilline TRUE FALSE FALSE +nl Caspofungin Caspofungine TRUE FALSE FALSE +nl Ce(f|ph)acetrile Cefacetril FALSE FALSE FALSE +nl Ce(f|ph)alexin Cefalexine FALSE FALSE FALSE +nl Ce(f|ph)alotin Cefalotine FALSE FALSE FALSE +nl Ce(f|ph)amandole Cefamandol FALSE FALSE FALSE +nl Ce(f|ph)apirin Cefapirine FALSE FALSE FALSE +nl Ce(f|ph)azedone Cefazedon FALSE FALSE FALSE +nl Ce(f|ph)azolin Cefazoline FALSE FALSE FALSE +nl Ce(f|ph)alothin Cefalotine FALSE FALSE FALSE +nl Ce(f|ph)alexin Cefalexine FALSE FALSE FALSE +nl Ce(f|ph)epime Cefepim FALSE FALSE FALSE +nl Ce(f|ph)ixime Cefixim FALSE FALSE FALSE +nl Ce(f|ph)menoxime Cefmenoxim FALSE FALSE FALSE +nl Ce(f|ph)metazole Cefmetazol FALSE FALSE FALSE +nl Ce(f|ph)odizime Cefodizim FALSE FALSE FALSE +nl Ce(f|ph)onicid Cefonicide FALSE FALSE FALSE +nl Ce(f|ph)operazone Cefoperazon FALSE FALSE FALSE +nl Ce(f|ph)operazone/beta-lactamase inhibitor Cefoperazon/enzymremmer FALSE FALSE FALSE +nl Ce(f|ph)otaxime Cefotaxim FALSE FALSE FALSE +nl Ce(f|ph)oxitin Cefoxitine FALSE FALSE FALSE +nl Ce(f|ph)pirome Cefpirom FALSE FALSE FALSE +nl Ce(f|ph)podoxime Cefpodoxim FALSE FALSE FALSE +nl Ce(f|ph)radine Cefradine FALSE FALSE FALSE +nl Ce(f|ph)sulodin Cefsulodine FALSE FALSE FALSE +nl Ce(f|ph)tazidime Ceftazidim FALSE FALSE FALSE +nl Ce(f|ph)tezole Ceftezol FALSE FALSE FALSE +nl Ce(f|ph)tizoxime Ceftizoxim FALSE FALSE FALSE +nl Ce(f|ph)triaxone Ceftriaxon FALSE FALSE FALSE +nl Ce(f|ph)uroxime Cefuroxim FALSE FALSE FALSE +nl Ce(f|ph)uroxime/metronidazole Cefuroxim/andere antibacteriele middelen FALSE FALSE FALSE +nl Chloramphenicol Chlooramfenicol TRUE FALSE FALSE +nl Chlortetracycline Chloortetracycline TRUE FALSE FALSE +nl Cinoxacin Cinoxacine TRUE FALSE FALSE +nl Ciprofloxacin Ciprofloxacine TRUE FALSE FALSE +nl Clarithromycin Claritromycine TRUE FALSE FALSE +nl Clavulanic acid Clavulaanzuur TRUE FALSE FALSE +nl clavulanic acid clavulaanzuur TRUE FALSE FALSE +nl Clindamycin Clindamycine TRUE FALSE FALSE +nl Clometocillin Clometocilline TRUE FALSE FALSE +nl Clotrimazole Clotrimazol TRUE FALSE FALSE +nl Cloxacillin Cloxacilline TRUE FALSE FALSE +nl Colistin Colistine TRUE FALSE FALSE +nl Dapsone Dapson TRUE FALSE FALSE +nl Daptomycin Daptomycine TRUE FALSE FALSE +nl Dibekacin Dibekacine TRUE FALSE FALSE +nl Dicloxacillin Dicloxacilline TRUE FALSE FALSE +nl Dirithromycin Diritromycine TRUE FALSE FALSE +nl Econazole Econazol TRUE FALSE FALSE +nl Enoxacin Enoxacine TRUE FALSE FALSE +nl Epicillin Epicilline TRUE FALSE FALSE +nl Erythromycin Erytromycine TRUE FALSE FALSE +nl Ethambutol/isoniazid Ethambutol/isoniazide TRUE FALSE FALSE +nl Fleroxacin Fleroxacine TRUE FALSE FALSE +nl Flucloxacillin Flucloxacilline TRUE FALSE FALSE +nl Fluconazole Fluconazol TRUE FALSE FALSE +nl Flucytosine Fluorocytosine TRUE FALSE FALSE +nl Flurithromycin Fluritromycine TRUE FALSE FALSE +nl Fosfomycin Fosfomycine TRUE FALSE FALSE +nl Fusidic acid Fusidinezuur TRUE FALSE FALSE +nl Gatifloxacin Gatifloxacine TRUE FALSE FALSE +nl Gemifloxacin Gemifloxacine TRUE FALSE FALSE +nl Gentamicin Gentamicine TRUE FALSE FALSE +nl Grepafloxacin Grepafloxacine TRUE FALSE FALSE +nl Hachimycin Hachimycine TRUE FALSE FALSE +nl Hetacillin Hetacilline TRUE FALSE FALSE +nl Imipenem/cilastatin Imipenem/enzymremmer TRUE FALSE FALSE +nl Inosine pranobex Inosiplex TRUE FALSE FALSE +nl Isepamicin Isepamicine TRUE FALSE FALSE +nl Isoconazole Isoconazol TRUE FALSE FALSE +nl Isoniazid Isoniazide TRUE FALSE FALSE +nl Itraconazole Itraconazol TRUE FALSE FALSE +nl Josamycin Josamycine TRUE FALSE FALSE +nl Kanamycin Kanamycine TRUE FALSE FALSE +nl Ketoconazole Ketoconazol TRUE FALSE FALSE +nl Levofloxacin Levofloxacine TRUE FALSE FALSE +nl Lincomycin Lincomycine TRUE FALSE FALSE +nl Lomefloxacin Lomefloxacine TRUE FALSE FALSE +nl Lysozyme Lysozym TRUE FALSE FALSE +nl Mandelic acid Amandelzuur TRUE FALSE FALSE +nl Metampicillin Metampicilline TRUE FALSE FALSE +nl Meticillin Meticilline TRUE FALSE FALSE +nl Metisazone Metisazon TRUE FALSE FALSE +nl Metronidazole Metronidazol TRUE FALSE FALSE +nl Mezlocillin Mezlocilline TRUE FALSE FALSE +nl Micafungin Micafungine TRUE FALSE FALSE +nl Miconazole Miconazol TRUE FALSE FALSE +nl Midecamycin Midecamycine TRUE FALSE FALSE +nl Miocamycin Miocamycine TRUE FALSE FALSE +nl Moxifloxacin Moxifloxacine TRUE FALSE FALSE +nl Mupirocin Mupirocine TRUE FALSE FALSE +nl Nalidixic acid Nalidixinezuur TRUE FALSE FALSE +nl Neomycin Neomycine TRUE FALSE FALSE +nl Netilmicin Netilmicine TRUE FALSE FALSE +nl Nitrofurantoin Nitrofurantoine TRUE FALSE FALSE +nl Norfloxacin Norfloxacine TRUE FALSE FALSE +nl Novobiocin Novobiocine TRUE FALSE FALSE +nl Nystatin Nystatine TRUE FALSE FALSE +nl Ofloxacin Ofloxacine TRUE FALSE FALSE +nl Oleandomycin Oleandomycine TRUE FALSE FALSE +nl Ornidazole Ornidazol TRUE FALSE FALSE +nl Oxacillin Oxacilline TRUE FALSE FALSE +nl Oxolinic acid Oxolinezuur TRUE FALSE FALSE +nl Oxytetracycline Oxytetracycline TRUE FALSE FALSE +nl Pazufloxacin Pazufloxacine TRUE FALSE FALSE +nl Pefloxacin Pefloxacine TRUE FALSE FALSE +nl Penamecillin Penamecilline TRUE FALSE FALSE +nl Penicillin Penicilline TRUE FALSE FALSE +nl Pheneticillin Feneticilline TRUE FALSE FALSE +nl Phenoxymethylpenicillin Fenoxymethylpenicilline TRUE FALSE FALSE +nl Pipemidic acid Pipemidinezuur TRUE FALSE FALSE +nl Piperacillin Piperacilline TRUE FALSE FALSE +nl Piperacillin/beta-lactamase inhibitor Piperacilline/enzymremmer TRUE FALSE FALSE +nl Piromidic acid Piromidinezuur TRUE FALSE FALSE +nl Pivampicillin Pivampicilline TRUE FALSE FALSE +nl Polymyxin B Polymyxine B TRUE FALSE FALSE +nl Posaconazole Posaconazol TRUE FALSE FALSE +nl Pristinamycin Pristinamycine TRUE FALSE FALSE +nl Procaine benzylpenicillin Benzylpenicillineprocaine TRUE FALSE FALSE +nl Propicillin Propicilline TRUE FALSE FALSE +nl Prulifloxacin Prulifloxacine TRUE FALSE FALSE +nl Quinupristin/dalfopristin Quinupristine/dalfopristine TRUE FALSE FALSE +nl Ribostamycin Ribostamycine TRUE FALSE FALSE +nl Rifabutin Rifabutine TRUE FALSE FALSE +nl Rifampicin Rifampicine TRUE FALSE FALSE +nl Rifampicin/pyrazinamide/ethambutol/isoniazid Rifampicine/pyrazinamide/ethambutol/isoniazide TRUE FALSE FALSE +nl Rifampicin/pyrazinamide/isoniazid Rifampicine/pyrazinamide/isoniazide TRUE FALSE FALSE +nl Rifampicin/isoniazid Rifampicine/isoniazide TRUE FALSE FALSE +nl Rifamycin Rifamycine TRUE FALSE FALSE +nl Rifaximin Rifaximine TRUE FALSE FALSE +nl Rokitamycin Rokitamycine TRUE FALSE FALSE +nl Rosoxacin Rosoxacine TRUE FALSE FALSE +nl Roxithromycin Roxitromycine TRUE FALSE FALSE +nl Rufloxacin Rufloxacine TRUE FALSE FALSE +nl Sisomicin Sisomicine TRUE FALSE FALSE +nl Sodium aminosalicylate Aminosalicylzuur TRUE FALSE FALSE +nl Sparfloxacin Sparfloxacine TRUE FALSE FALSE +nl Spectinomycin Spectinomycine TRUE FALSE FALSE +nl Spiramycin Spiramycine TRUE FALSE FALSE +nl Spiramycin/metronidazole Spiramycine/metronidazol TRUE FALSE FALSE +nl Staphylococcus immunoglobulin Stafylokokkenimmunoglobuline TRUE FALSE FALSE +nl Streptoduocin Streptoduocine TRUE FALSE FALSE +nl Streptomycin Streptomycine TRUE FALSE FALSE +nl Streptomycin/isoniazid Streptomycine/isoniazide TRUE FALSE FALSE +nl Sulbenicillin Sulbenicilline TRUE FALSE FALSE +nl Sulfadiazine/tetroxoprim Sulfadiazine/tetroxoprim TRUE FALSE FALSE +nl Sulfadiazine/trimethoprim Sulfadiazine/trimethoprim TRUE FALSE FALSE +nl Sulfadimidine/trimethoprim Sulfadimidine/trimethoprim TRUE FALSE FALSE +nl Sulfafurazole Sulfafurazol TRUE FALSE FALSE +nl Sulfaisodimidine Sulfisomidine TRUE FALSE FALSE +nl Sulfalene Sulfaleen TRUE FALSE FALSE +nl Sulfamazone Sulfamazon TRUE FALSE FALSE +nl Sulfamerazine/trimethoprim Sulfamerazine/trimethoprim TRUE FALSE FALSE +nl Sulfamethizole Sulfamethizol TRUE FALSE FALSE +nl Sulfamethoxazole Sulfamethoxazol TRUE FALSE FALSE +nl Sulfamethoxazole/trimethoprim Sulfamethoxazol/trimethoprim TRUE FALSE FALSE +nl Sulfametoxydiazine Sulfamethoxydiazine TRUE FALSE FALSE +nl Sulfametrole/trimethoprim Sulfametrol/trimethoprim TRUE FALSE FALSE +nl Sulfamoxole Sulfamoxol TRUE FALSE FALSE +nl Sulfamoxole/trimethoprim Sulfamoxol/trimethoprim TRUE FALSE FALSE +nl Sulfaperin Sulfaperine TRUE FALSE FALSE +nl Sulfaphenazole Sulfafenazol TRUE FALSE FALSE +nl Sulfathiazole Sulfathiazol TRUE FALSE FALSE +nl Sulfathiourea Sulfathioureum TRUE FALSE FALSE +nl Sultamicillin Sultamicilline TRUE FALSE FALSE +nl Talampicillin Talampicilline TRUE FALSE FALSE +nl Teicoplanin Teicoplanine TRUE FALSE FALSE +nl Telithromycin Telitromycine TRUE FALSE FALSE +nl Temafloxacin Temafloxacine TRUE FALSE FALSE +nl Temocillin Temocilline TRUE FALSE FALSE +nl Tenofovir disoproxil Tenofovir TRUE FALSE FALSE +nl Terizidone Terizidon TRUE FALSE FALSE +nl Thiamphenicol Thiamfenicol TRUE FALSE FALSE +nl Thioacetazone/isoniazid Thioacetazon/isoniazide TRUE FALSE FALSE +nl Ticarcillin Ticarcilline TRUE FALSE FALSE +nl Ticarcillin/beta-lactamase inhibitor Ticarcilline/enzymremmer TRUE FALSE FALSE +nl Ticarcillin/clavulanic acid Ticarcilline/clavulaanzuur TRUE FALSE FALSE +nl Tinidazole Tinidazol TRUE FALSE FALSE +nl Tobramycin Tobramycine TRUE FALSE FALSE +nl Trimethoprim/sulfamethoxazole Cotrimoxazol TRUE FALSE FALSE +nl Troleandomycin Troleandomycine TRUE FALSE FALSE +nl Trovafloxacin Trovafloxacine TRUE FALSE FALSE +nl Vancomycin Vancomycine TRUE FALSE FALSE +nl Voriconazole Voriconazol TRUE FALSE FALSE +nl Aminoglycosides Aminoglycosiden TRUE FALSE FALSE +nl Amphenicols Amfenicolen TRUE FALSE FALSE +nl Antifungals/antimycotics Antifungica/antimycotica TRUE FALSE FALSE +nl Antimycobacterials Antimycobacteriele middelen TRUE FALSE FALSE +nl Beta-lactams/penicillins Beta-lactams/penicillines TRUE FALSE FALSE +nl Cephalosporins (1st gen.) Cefalosporines (1e gen.) TRUE FALSE FALSE +nl Cephalosporins (2nd gen.) Cefalosporines (2e gen.) TRUE FALSE FALSE +nl Cephalosporins (3rd gen.) Cefalosporines (3e gen.) TRUE FALSE FALSE +nl Cephalosporins (4th gen.) Cefalosporines (4e gen.) TRUE FALSE FALSE +nl Cephalosporins (5th gen.) Cefalosporines (5e gen.) TRUE FALSE FALSE +nl Cephalosporins (unclassified gen.) Cefalosporines (ongeclassificeerd) TRUE FALSE FALSE +nl Cephalosporins Cefalosporines TRUE FALSE FALSE +nl Glycopeptides Glycopeptiden TRUE FALSE FALSE +nl Macrolides/lincosamides Macroliden/lincosamiden TRUE FALSE FALSE +nl Other antibacterials Overige antibiotica TRUE FALSE FALSE +nl Polymyxins Polymyxines TRUE FALSE FALSE +nl Quinolones Quinolonen TRUE FALSE FALSE diff --git a/docs/404.html b/docs/404.html index 43c4fad1..17888f86 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9019 + 1.5.0.9020 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 6d5d1e4f..adc8ceba 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9019 + 1.5.0.9020 diff --git a/docs/articles/index.html b/docs/articles/index.html index e05b9bd2..422c81e8 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9019 + 1.5.0.9020 diff --git a/docs/authors.html b/docs/authors.html index 8d83429d..824b29ee 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9019 + 1.5.0.9020 diff --git a/docs/index.html b/docs/index.html index 7b6fc433..01de6418 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.5.0.9019 + 1.5.0.9020 @@ -395,8 +395,9 @@ Since you are one of our users, we would like to know how you use the package an

R-code-checkCodeFactor Codecov

The latest and unpublished development version can be installed from GitHub using:

-install.packages("remotes") 
+install.packages("remotes") # if you haven't already
 remotes::install_github("msberends/AMR")
+

You can also download the latest build from our repository: https://github.com/msberends/AMR/raw/master/data-raw/AMR_latest.tar.gz

diff --git a/docs/news/index.html b/docs/news/index.html index bff725b8..becdfb44 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9019 + 1.5.0.9020
@@ -236,13 +236,13 @@ Source: NEWS.md -
-

-AMR 1.5.0.9019 Unreleased +
+

+AMR 1.5.0.9020 Unreleased

-
+

-Last updated: 17 February 2021 +Last updated: 18 February 2021

@@ -325,6 +325,7 @@
  • Functions print() and summary() on a Principal Components Analysis object (pca()) now print additional group info if the original data was grouped using dplyr::group_by()
  • Improved speed and reliability of guess_ab_col(). As this also internally improves the reliability of first_isolate() and mdro(), this might have a slight impact on the results of those functions.
  • +
  • Fix for mo_name() when used in other languages than English
  • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index c09de22a..ffde33f2 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -12,7 +12,7 @@ articles: datasets: datasets.html resistance_predict: resistance_predict.html welcome_to_AMR: welcome_to_AMR.html -last_built: 2021-02-17T09:58Z +last_built: 2021-02-18T22:21Z urls: reference: https://msberends.github.io/AMR//reference article: https://msberends.github.io/AMR//articles diff --git a/docs/reference/index.html b/docs/reference/index.html index c9778900..e4023f24 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9019 + 1.5.0.9020
    diff --git a/docs/survey.html b/docs/survey.html index bc36ce7e..639d0692 100644 --- a/docs/survey.html +++ b/docs/survey.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9019 + 1.5.0.9020
    diff --git a/git_premaster.sh b/git_premaster.sh index 5f1022ea..1e4bfd3c 100755 --- a/git_premaster.sh +++ b/git_premaster.sh @@ -123,10 +123,8 @@ echo echo "••••••••••••••••••••" echo "• Building package •" echo "••••••••••••••••••••" -echo "• Removing old build from 'data-raw/'..." -rm data-raw/AMR_*.tar.gz -echo "• Building 'data-raw/AMR_${new_version}.tar.gz'..." -Rscript -e "x <- devtools::build(path = 'data-raw', vignettes = FALSE, manual = FALSE, binary = FALSE, quiet = TRUE)" +echo "• Building 'data-raw/AMR_latest.tar.gz'..." +Rscript -e "x <- devtools::build(path = 'data-raw/AMR_latest.tar.gz', vignettes = FALSE, manual = FALSE, binary = FALSE, quiet = TRUE)" echo "• Installing..." Rscript -e "devtools::install(quiet = TRUE, dependencies = FALSE)" echo diff --git a/index.md b/index.md index 52e24b55..b8855fed 100644 --- a/index.md +++ b/index.md @@ -125,10 +125,12 @@ It will be downloaded and installed automatically. For RStudio, click on the men The latest and unpublished development version can be installed from GitHub using: ```r -install.packages("remotes") +install.packages("remotes") # if you haven't already remotes::install_github("msberends/AMR") ``` +You can also download the latest build from our repository: + ### Get started To find out how to conduct AMR data analysis, please [continue reading here to get started](./articles/AMR.html) or click the links in the 'How to' menu. diff --git a/tests/testthat/test-mo_property.R b/tests/testthat/test-mo_property.R index 8d9c0f2a..d5f9cbf7 100644 --- a/tests/testthat/test-mo_property.R +++ b/tests/testthat/test-mo_property.R @@ -84,6 +84,9 @@ test_that("mo_property works", { expect_output(print(mo_gramstain("Escherichia coli", language = "fr"))) expect_error(mo_gramstain("Escherichia coli", language = "UNKNOWN")) + + dutch <- mo_name(microorganisms$fullname, language = "nl") # should be transformable to English again + expect_identical(mo_name(dutch, language = NULL), microorganisms$fullname) # gigantic test - will run ALL names # manual property function expect_error(mo_property("Escherichia coli", property = c("tsn", "fullname")))