mirror of
				https://github.com/msberends/AMR.git
				synced 2025-10-31 08:48:16 +01:00 
			
		
		
		
	read_4D
This commit is contained in:
		| @@ -116,6 +116,7 @@ export(portion_S) | |||||||
| export(portion_SI) | export(portion_SI) | ||||||
| export(portion_df) | export(portion_df) | ||||||
| export(ratio) | export(ratio) | ||||||
|  | export(read_4D) | ||||||
| export(resistance_predict) | export(resistance_predict) | ||||||
| export(right_join_microorganisms) | export(right_join_microorganisms) | ||||||
| export(rsi) | export(rsi) | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								NEWS.md
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								NEWS.md
									
									
									
									
									
								
							| @@ -5,6 +5,7 @@ | |||||||
| * Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all` | * Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all` | ||||||
| * Function `get_locale` to determine language for language-dependent output for some `mo_*` functions. This is now the default value for their `language` parameter, by which the system language will be used at default. | * Function `get_locale` to determine language for language-dependent output for some `mo_*` functions. This is now the default value for their `language` parameter, by which the system language will be used at default. | ||||||
| * Data sets `microorganismsDT`, `microorganisms.prevDT`, `microorganisms.unprevDT` and `microorganisms.oldDT` to improve the speed of `as.mo`. They are for reference only, since they are primarily for internal use of `as.mo`. | * Data sets `microorganismsDT`, `microorganisms.prevDT`, `microorganisms.unprevDT` and `microorganisms.oldDT` to improve the speed of `as.mo`. They are for reference only, since they are primarily for internal use of `as.mo`. | ||||||
|  | * Function `read_4D` to read from the 4D database of the MMB department of the UMCG | ||||||
|  |  | ||||||
| #### Changed | #### Changed | ||||||
| * Big changes to the `EUCAST_rules` function: | * Big changes to the `EUCAST_rules` function: | ||||||
|   | |||||||
							
								
								
									
										134
									
								
								R/read_4d.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								R/read_4d.R
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,134 @@ | |||||||
|  | # ==================================================================== # | ||||||
|  | # TITLE                                                                # | ||||||
|  | # Antimicrobial Resistance (AMR) Analysis                              # | ||||||
|  | #                                                                      # | ||||||
|  | # AUTHORS                                                              # | ||||||
|  | # Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           # | ||||||
|  | #                                                                      # | ||||||
|  | # LICENCE                                                              # | ||||||
|  | # This program is free software; you can redistribute it and/or modify # | ||||||
|  | # it under the terms of the GNU General Public License version 2.0,    # | ||||||
|  | # as published by the Free Software Foundation.                        # | ||||||
|  | #                                                                      # | ||||||
|  | # This program is distributed in the hope that it will be useful,      # | ||||||
|  | # but WITHOUT ANY WARRANTY; without even the implied warranty of       # | ||||||
|  | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        # | ||||||
|  | # GNU General Public License for more details.                         # | ||||||
|  | # ==================================================================== # | ||||||
|  |  | ||||||
|  | #' Read data from 4D database | ||||||
|  | #' | ||||||
|  | #' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}. | ||||||
|  | #' @inheritParams utils::read.table | ||||||
|  | #' @export | ||||||
|  | read_4D <- function(file, | ||||||
|  |                     header = TRUE, | ||||||
|  |                     sep = "\t", | ||||||
|  |                     quote = "\"'", | ||||||
|  |                     dec = ",", | ||||||
|  |                     na.strings = c("NA", "", "."), | ||||||
|  |                     skip = 2, | ||||||
|  |                     check.names = TRUE, | ||||||
|  |                     strip.white = TRUE, | ||||||
|  |                     fill = TRUE, | ||||||
|  |                     blank.lines.skip = TRUE, | ||||||
|  |                     stringsAsFactors = FALSE, | ||||||
|  |                     fileEncoding = "UTF-8", | ||||||
|  |                     encoding = "UTF-8") { | ||||||
|  |  | ||||||
|  |   data_4D <- utils::read.table(file = file, | ||||||
|  |                                header = header, | ||||||
|  |                                sep = sep, | ||||||
|  |                                quote = quote, | ||||||
|  |                                dec = dec, | ||||||
|  |                                na.strings = na.strings, | ||||||
|  |                                skip = skip, | ||||||
|  |                                check.names = check.names, | ||||||
|  |                                strip.white = strip.white, | ||||||
|  |                                fill = fill, | ||||||
|  |                                blank.lines.skip = blank.lines.skip, | ||||||
|  |                                stringsAsFactors = stringsAsFactors, | ||||||
|  |                                fileEncoding = fileEncoding, | ||||||
|  |                                encoding = encoding) | ||||||
|  |  | ||||||
|  |   # helper functions | ||||||
|  |   to_date_4D <- function(x) { | ||||||
|  |     date_regular <- as.Date(x, format = "%d-%m-%y") | ||||||
|  |     posixlt <- as.POSIXlt(date_regular) | ||||||
|  |     # born after today will be born 100 years ago | ||||||
|  |     # based on https://stackoverflow.com/a/3312971/4575331 | ||||||
|  |     posixlt[date_regular > Sys.Date()]$year <- posixlt[date_regular > Sys.Date()]$year - 100 | ||||||
|  |     as.Date(posixlt) | ||||||
|  |   } | ||||||
|  |   to_age_4D <- function(from, to) { | ||||||
|  |     from_lt = as.POSIXlt(from) | ||||||
|  |     to_lt = as.POSIXlt(to) | ||||||
|  |  | ||||||
|  |     age = to_lt$year - from_lt$year | ||||||
|  |  | ||||||
|  |     ifelse(to_lt$mon < from_lt$mon | | ||||||
|  |              (to_lt$mon == from_lt$mon & to_lt$mday < from_lt$mday), | ||||||
|  |            age - 1, age) | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   colnames(data_4D) <- tolower(colnames(data_4D)) | ||||||
|  |   if (all(c("afnamedat", "gebdatum") %in% colnames(data_4D))) { | ||||||
|  |     # add age | ||||||
|  |     data_4D$age <- NA_integer_ | ||||||
|  |   } | ||||||
|  |   cols_wanted <- c("patientnr", "gebdatum", "age", "mv", "monsternr", "afnamedat", "bepaling", | ||||||
|  |                    "afd.", "spec", "mat", "matbijz.",  "mocode", | ||||||
|  |                    "amfo", "amox", "anid", "azit", "casp", "cecl", "cefe", "cfcl", | ||||||
|  |                    "cfot", "cfox", "cfta", "cftr", "cfur", "chlo", "cipr", "clin", | ||||||
|  |                    "cocl", "ctta", "dapt", "doxy", "eryt", "fluo", "fluz", "fosf", | ||||||
|  |                    "fusi", "gehi", "gent", "imip", "kana", "levo", "line", "mero", | ||||||
|  |                    "metr", "mico", "mino", "moxi", "mupi", "nali", "nitr", "norf", | ||||||
|  |                    "oxac", "peni", "pipe", "pita", "poly", "posa", "quda", "rifa", | ||||||
|  |                    "spat", "teic", "tige", "tobr", "trim", "trsu", "vana", "vanb", | ||||||
|  |                    "vanc", "vori") | ||||||
|  |   # this ones actually exist | ||||||
|  |   cols_wanted <- cols_wanted[cols_wanted %in% colnames(data_4D)] | ||||||
|  |   # order of columns | ||||||
|  |   data_4D <- data_4D[, cols_wanted] | ||||||
|  |  | ||||||
|  |   # rename of columns | ||||||
|  |   colnames(data_4D) <- gsub("patientnr", "patient_id", colnames(data_4D), fixed = TRUE) | ||||||
|  |   colnames(data_4D) <- gsub("gebdatum", "date_birth", colnames(data_4D), fixed = TRUE) | ||||||
|  |   colnames(data_4D) <- gsub("mv", "gender", colnames(data_4D), fixed = TRUE) | ||||||
|  |   colnames(data_4D) <- gsub("monsternr", "sample_id", colnames(data_4D), fixed = TRUE) | ||||||
|  |   colnames(data_4D) <- gsub("afnamedat", "date_received", colnames(data_4D), fixed = TRUE) | ||||||
|  |   colnames(data_4D) <- gsub("bepaling", "sample_test", colnames(data_4D), fixed = TRUE) | ||||||
|  |   colnames(data_4D) <- gsub("afd.", "department", colnames(data_4D), fixed = TRUE) | ||||||
|  |   colnames(data_4D) <- gsub("spec", "specialty", colnames(data_4D), fixed = TRUE) | ||||||
|  |   colnames(data_4D) <- gsub("matbijz.", "specimen_type", colnames(data_4D), fixed = TRUE) | ||||||
|  |   colnames(data_4D) <- gsub("mat", "specimen_group", colnames(data_4D), fixed = TRUE) | ||||||
|  |   colnames(data_4D) <- gsub("mocode", "mo", colnames(data_4D), fixed = TRUE) | ||||||
|  |  | ||||||
|  |   if ("date_birth" %in% colnames(data_4D)) { | ||||||
|  |     data_4D$date_birth <- to_date_4D(data_4D$date_birth) | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |   if ("date_received" %in% colnames(data_4D)) { | ||||||
|  |     data_4D$date_received <- to_date_4D(data_4D$date_received) | ||||||
|  |   } | ||||||
|  |   if ("age" %in% colnames(data_4D)) { | ||||||
|  |     data_4D$age <- to_age_4D(data_4D$date_birth, data_4D$date_received) | ||||||
|  |   } | ||||||
|  |   if ("gender" %in% colnames(data_4D)) { | ||||||
|  |     data_4D$gender[data_4D$gender == "V"] <- "F" | ||||||
|  |   } | ||||||
|  |   if ("mo" %in% colnames(data_4D)) { | ||||||
|  |     data_4D$mo <- as.mo(data_4D$mo) | ||||||
|  |     # column right of mo is: | ||||||
|  |     drug1 <- colnames(data_4D)[grep("^mo$", colnames(data_4D)) + 1] | ||||||
|  |     if (!is.na(drug1)) { | ||||||
|  |       # and last is: | ||||||
|  |       drug_last <- colnames(data_4D)[length(data_4D)] | ||||||
|  |       # transform those to rsi: | ||||||
|  |       data_4D <- suppressWarnings(mutate_at(data_4D, vars(drug1:drug_last), as.rsi)) | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   data_4D | ||||||
|  | } | ||||||
|  |  | ||||||
							
								
								
									
										18
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								README.md
									
									
									
									
									
								
							| @@ -599,14 +599,24 @@ mo_fullname("CoNS", language = "en") # or just mo_fullname("CoNS") on an English | |||||||
| mo_fullname("CoNS", language = "fr") # or just mo_fullname("CoNS") on a French system | mo_fullname("CoNS", language = "fr") # or just mo_fullname("CoNS") on a French system | ||||||
| # "Staphylococcus à coagulase négative (CoNS)" | # "Staphylococcus à coagulase négative (CoNS)" | ||||||
|  |  | ||||||
| microbenchmark(A = mo_fullname("CoNS", language = "en"), | microbenchmark(en = mo_fullname("CoNS", language = "en"), | ||||||
|                B = mo_fullname("CoNS", language = "fr"), |                de = mo_fullname("CoNS", language = "de"), | ||||||
|  |                nl = mo_fullname("CoNS", language = "nl"), | ||||||
|  |                es = mo_fullname("CoNS", language = "es"), | ||||||
|  |                it = mo_fullname("CoNS", language = "it"), | ||||||
|  |                fr = mo_fullname("CoNS", language = "fr"), | ||||||
|  |                pt = mo_fullname("CoNS", language = "pt"), | ||||||
|                times = 10, |                times = 10, | ||||||
|                unit = "ms") |                unit = "ms") | ||||||
| # Unit: milliseconds | # Unit: milliseconds | ||||||
| #  expr       min       lq      mean    median        uq      max neval | #  expr       min       lq      mean    median        uq      max neval | ||||||
| #     A  6.080733  6.33684  6.467129  6.493773  6.593926  6.963666    10 | #    en  6.093583  6.51724  6.555105  6.562986  6.630663  6.99698   100 | ||||||
| #     B 14.076651 14.10452 14.446035 14.315893 14.636918 15.254106    10 | #    de 13.934874 14.35137 16.891587 14.462210 14.764658 43.63956   100 | ||||||
|  | #    nl 13.900092 14.34729 15.943268 14.424565 14.581535 43.76283   100 | ||||||
|  | #    es 13.833813 14.34596 14.574783 14.439757 14.653994 17.49168   100 | ||||||
|  | #    it 13.811883 14.36621 15.179060 14.453515 14.812359 43.64284   100 | ||||||
|  | #    fr 13.798683 14.37019 16.344731 14.468775 14.697610 48.62923   100 | ||||||
|  | #    pt 13.789674 14.36244 15.706321 14.443772 14.679905 44.76701   100 | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
| Currently supported are German, Dutch, Spanish, Italian, French and Portuguese. | Currently supported are German, Dutch, Spanish, Italian, French and Portuguese. | ||||||
|   | |||||||
							
								
								
									
										104
									
								
								man/read_4D.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								man/read_4D.Rd
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,104 @@ | |||||||
|  | % Generated by roxygen2: do not edit by hand | ||||||
|  | % Please edit documentation in R/read_4d.R | ||||||
|  | \name{read_4D} | ||||||
|  | \alias{read_4D} | ||||||
|  | \title{Read data from 4D database} | ||||||
|  | \usage{ | ||||||
|  | read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'", | ||||||
|  |   dec = ",", na.strings = c("NA", "", "."), skip = 2, | ||||||
|  |   check.names = TRUE, strip.white = TRUE, fill = TRUE, | ||||||
|  |   blank.lines.skip = TRUE, stringsAsFactors = FALSE, | ||||||
|  |   fileEncoding = "UTF-8", encoding = "UTF-8") | ||||||
|  | } | ||||||
|  | \arguments{ | ||||||
|  | \item{file}{the name of the file which the data are to be read from. | ||||||
|  |     Each row of the table appears as one line of the file.  If it does | ||||||
|  |     not contain an \emph{absolute} path, the file name is | ||||||
|  |     \emph{relative} to the current working directory, | ||||||
|  |     \code{\link{getwd}()}. Tilde-expansion is performed where supported. | ||||||
|  |     This can be a compressed file (see \code{\link{file}}). | ||||||
|  |  | ||||||
|  |     Alternatively, \code{file} can be a readable text-mode | ||||||
|  |     \link{connection} (which will be opened for reading if | ||||||
|  |     necessary, and if so \code{\link{close}}d (and hence destroyed) at | ||||||
|  |     the end of the function call).  (If \code{\link{stdin}()} is used, | ||||||
|  |     the prompts for lines may be somewhat confusing.  Terminate input | ||||||
|  |     with a blank line or an EOF signal, \code{Ctrl-D} on Unix and | ||||||
|  |     \code{Ctrl-Z} on Windows.  Any pushback on \code{stdin()} will be | ||||||
|  |     cleared before return.) | ||||||
|  |  | ||||||
|  |     \code{file} can also be a complete URL.  (For the supported URL | ||||||
|  |     schemes, see the \sQuote{URLs} section of the help for | ||||||
|  |     \code{\link{url}}.) | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | \item{header}{a logical value indicating whether the file contains the | ||||||
|  |     names of the variables as its first line.  If missing, the value is | ||||||
|  |     determined from the file format: \code{header} is set to \code{TRUE} | ||||||
|  |     if and only if the first row contains one fewer field than the | ||||||
|  |     number of columns.} | ||||||
|  |  | ||||||
|  | \item{sep}{the field separator character.  Values on each line of the | ||||||
|  |     file are separated by this character.  If \code{sep = ""} (the | ||||||
|  |     default for \code{read.table}) the separator is \sQuote{white space}, | ||||||
|  |     that is one or more spaces, tabs, newlines or carriage returns.} | ||||||
|  |  | ||||||
|  | \item{quote}{the set of quoting characters. To disable quoting | ||||||
|  |     altogether, use \code{quote = ""}.  See \code{\link{scan}} for the | ||||||
|  |     behaviour on quotes embedded in quotes.  Quoting is only considered | ||||||
|  |     for columns read as character, which is all of them unless | ||||||
|  |     \code{colClasses} is specified.} | ||||||
|  |  | ||||||
|  | \item{dec}{the character used in the file for decimal points.} | ||||||
|  |  | ||||||
|  | \item{na.strings}{a character vector of strings which are to be | ||||||
|  |     interpreted as \code{\link{NA}} values.  Blank fields are also | ||||||
|  |     considered to be missing values in logical, integer, numeric and | ||||||
|  |     complex fields.  Note that the test happens \emph{after}  | ||||||
|  |     white space is stripped from the input, so \code{na.strings}  | ||||||
|  |     values may need their own white space stripped in advance.} | ||||||
|  |  | ||||||
|  | \item{skip}{integer: the number of lines of the data file to skip before | ||||||
|  |     beginning to read data.} | ||||||
|  |  | ||||||
|  | \item{check.names}{logical.  If \code{TRUE} then the names of the | ||||||
|  |     variables in the data frame are checked to ensure that they are | ||||||
|  |     syntactically valid variable names.  If necessary they are adjusted | ||||||
|  |     (by \code{\link{make.names}}) so that they are, and also to ensure | ||||||
|  |     that there are no duplicates.} | ||||||
|  |  | ||||||
|  | \item{strip.white}{logical. Used only when \code{sep} has | ||||||
|  |     been specified, and allows the stripping of leading and trailing | ||||||
|  |     white space from unquoted \code{character} fields (\code{numeric} fields | ||||||
|  |     are always stripped).  See \code{\link{scan}} for further details | ||||||
|  |     (including the exact meaning of \sQuote{white space}), | ||||||
|  |     remembering that the columns may include the row names.} | ||||||
|  |  | ||||||
|  | \item{fill}{logical. If \code{TRUE} then in case the rows have unequal | ||||||
|  |     length, blank fields are implicitly added.  See \sQuote{Details}.} | ||||||
|  |  | ||||||
|  | \item{blank.lines.skip}{logical: if \code{TRUE} blank lines in the | ||||||
|  |     input are ignored.} | ||||||
|  |  | ||||||
|  | \item{stringsAsFactors}{logical: should character vectors be converted | ||||||
|  |     to factors?  Note that this is overridden by \code{as.is} and | ||||||
|  |     \code{colClasses}, both of which allow finer control.} | ||||||
|  |  | ||||||
|  | \item{fileEncoding}{character string: if non-empty declares the | ||||||
|  |     encoding used on a file (not a connection) so the character data can | ||||||
|  |     be re-encoded.  See the \sQuote{Encoding} section of the help for | ||||||
|  |     \code{\link{file}}, the \sQuote{R Data Import/Export Manual} and | ||||||
|  |     \sQuote{Note}. | ||||||
|  |   } | ||||||
|  |  | ||||||
|  | \item{encoding}{encoding to be assumed for input strings.  It is | ||||||
|  |     used to mark character strings as known to be in | ||||||
|  |     Latin-1 or UTF-8 (see \code{\link{Encoding}}): it is not used to | ||||||
|  |     re-encode the input, but allows \R to handle encoded strings in | ||||||
|  |     their native encoding (if one of those two).  See \sQuote{Value} | ||||||
|  |     and \sQuote{Note}. | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | \description{ | ||||||
|  | This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}. | ||||||
|  | } | ||||||
| @@ -9,7 +9,7 @@ test_that("get_locale works", { | |||||||
|   expect_identical(mo_fullname("CoNS", "nl"), "Coagulase-negatieve Staphylococcus (CNS)") |   expect_identical(mo_fullname("CoNS", "nl"), "Coagulase-negatieve Staphylococcus (CNS)") | ||||||
|   expect_identical(mo_fullname("CoNS", "es"), "Staphylococcus coagulasa negativo (CoNS)") |   expect_identical(mo_fullname("CoNS", "es"), "Staphylococcus coagulasa negativo (CoNS)") | ||||||
|   expect_identical(mo_fullname("CoNS", "it"), "Staphylococcus negativo coagulasi (CoNS)") |   expect_identical(mo_fullname("CoNS", "it"), "Staphylococcus negativo coagulasi (CoNS)") | ||||||
|   expect_identical(mo_fullname("CoNS", "fr"), "Staphylococcus à coagulase négative (CoNS)") |   expect_identical(mo_fullname("CoNS", "fr"), "Staphylococcus \u00e0 coagulase n\u00e9gative (CoNS)") | ||||||
|   expect_identical(mo_fullname("CoNS", "pt"), "Staphylococcus coagulase negativo (CoNS)") |   expect_identical(mo_fullname("CoNS", "pt"), "Staphylococcus coagulase negativo (CoNS)") | ||||||
|  |  | ||||||
| }) | }) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user