diff --git a/DESCRIPTION b/DESCRIPTION
index f8366141..d652038a 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.2.0.9019
-Date: 2018-08-01
+Version: 0.2.0.9020
+Date: 2018-08-02
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
     person(
diff --git a/NEWS.md b/NEWS.md
index b3daa2a4..80c5afdb 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,7 +7,11 @@
   * Universal: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin,  trimethoprim/sulfamethoxazole
   * Gram-positive: vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
   * Gram-negative: gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem
-* Functions `as.bactid` and `is.bactid` to transform/look up microbial ID's
+* Determining bacterial ID:
+  * New functions `as.bactid` and `is.bactid` to transform/ look up microbial ID's.
+  * The existing function `guess_bactid` is now an alias of `as.bactid`
+  * New Becker classification for *Staphylococcus* to categorise them into Coagulase Negative *Staphylococci* (CoNS) and Coagulase Positve *Staphylococci* (CoPS)
+  * New Lancefield classification for *Streptococcus* to categorise them into Lancefield groups
 * For convience, new descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
 * Function `g.test` to perform the Χ<sup>2</sup> distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test`
 * Function `ratio` to transform a vector of values to a preset ratio
diff --git a/R/bactid.R b/R/bactid.R
index 0c1bcb98..07218704 100644
--- a/R/bactid.R
+++ b/R/bactid.R
@@ -20,10 +20,12 @@
 #'
 #' Use this function to determine a valid ID based on a genus (and species). This input can be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
 #' @param x a character vector or a dataframe with one or two columns
+#' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".
+#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, i.e. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. Groups D and E will be ignored, since they are \emph{Enterococci}.
 #' @rdname as.bactid
-#' @details \code{guess_bactid} does exactly the same as \code{as.bactid}.
+#' @details \code{guess_bactid} is an alias of \code{as.bactid}.
 #'
-#' Some exceptions have been built in to get more logical results, based on prevalence of human pathogens. For example:
+#' Some exceptions have been built in to get more logical results, based on prevalence of human pathogens. These are:
 #' \itemize{
 #'   \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
 #'   \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae}}
@@ -32,6 +34,11 @@
 #' }
 #' Moreover, this function also supports ID's based on only Gram stain, when the species is not known. \cr
 #' For example, \code{"Gram negative rods"} and \code{"GNR"} will both return the ID of a Gram negative rod: \code{GNR}.
+#' @source
+#' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \cr
+#'     \url{https://dx.doi.org/10.1128/CMR.00109-13} \cr
+#' [2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \cr
+#'     \url{https://dx.doi.org/10.1084/jem.57.4.571}
 #' @export
 #' @importFrom dplyr %>% filter pull
 #' @return Character (vector) with class \code{"bactid"}. Unknown values will return \code{NA}.
@@ -48,6 +55,12 @@
 #' as.bactid("VISA") # Vancomycin Intermediate S. aureus
 #' as.bactid("VRSA") # Vancomycin Resistant S. aureus
 #'
+#' guess_bactid("S. epidermidis")                 # will remain species: STAEPI
+#' guess_bactid("S. epidermidis", Becker = TRUE)  # will not remain species: STACNS
+#'
+#' guess_bactid("S. pyogenes")                    # will remain species: STCAGA
+#' guess_bactid("S. pyogenes", Lancefield = TRUE) # will not remain species: STCGRA
+#'
 #' \dontrun{
 #' df$bactid <- as.bactid(df$microorganism_name)
 #'
@@ -66,7 +79,7 @@
 #' df <- df %>%
 #'   mutate(bactid = guess_bactid(paste(genus, species)))
 #' }
-as.bactid <- function(x) {
+as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {
 
   failures <- character(0)
 
@@ -96,13 +109,79 @@ as.bactid <- function(x) {
   x <- trimws(x, which = "both")
   x.backup <- x
   # replace space by regex sign
+  x_withspaces <- gsub(" ", ".* ", x, fixed = TRUE)
   x <- gsub(" ", ".*", x, fixed = TRUE)
-  # add start and stop
+  # for species
   x_species <- paste(x, 'species')
+  # add start en stop regex
   x <- paste0('^', x, '$')
+  x_withspaces <- paste0('^', x_withspaces, '$')
 
   for (i in 1:length(x)) {
 
+    if (Becker == TRUE | Becker == "all") {
+      mo <- suppressWarnings(guess_bactid(x.fullbackup[i]))
+      if (mo %like% '^STA') {
+        # See Source. It's this figure:
+        # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4187637/figure/F3/
+        species <- left_join_microorganisms(mo)$species
+        if (species %in% c("arlettae", "auricularis", "capitis",
+                           "caprae", "carnosus", "cohnii", "condimene",
+                           "devriesei", "epidermidis", "equorum",
+                           "fleurettii", "gallinarum", "haemolyticus",
+                           "hominis", "jettensis", "kloosii", "lentus",
+                           "lugdunensis", "massiliensis", "microti",
+                           "muscae", "nepalensis", "pasteuri", "perrasii",
+                           "pettenkoleri", "piscifermentans", "rostri",
+                           "saccharott", "saprophyticus", "sciuri",
+                           "siepanovicii", "simulans", "succinus",
+                           "vitulinus", "warneri", "xylosus")) {
+          x[i] <- "STACNS"
+          next
+        } else if ((Becker == "all"  & species == "aureus")
+                   | species %in% c("simiae", "agnetis", "chromogenes",
+                                    "delphirul", "felis", "futrae",
+                                    "hyicus", "intermedius",
+                                    "pseudointermedius", "schleiferi")) {
+          x[i] <- "STACPS"
+          next
+        }
+      }
+    }
+
+    if (Lancefield == TRUE) {
+      mo <- suppressWarnings(guess_bactid(x.fullbackup[i]))
+      if (mo %like% '^STC') {
+        # See Source
+        species <- left_join_microorganisms(mo)$species
+        if (species == "pyogenes") {
+          x[i] <- "STCGRA"
+          next
+        }
+        if (species == "agalactiae") {
+          x[i] <- "STCGRB"
+          next
+        }
+        if (species %in% c("equisimilis", "equi",
+                           "zooepidemicus", "dysgalactiae")) {
+          x[i] <- "STCGRC"
+          next
+        }
+        if (species == "anginosus") {
+          x[i] <- "STCGRF"
+          next
+        }
+        if (species == "sanguis") {
+          x[i] <- "STCGRH"
+          next
+        }
+        if (species == "salivarius") {
+          x[i] <- "STCGRK"
+          next
+        }
+      }
+    }
+
     if (identical(x.backup[i], "")) {
       # empty values
       x[i] <- NA
@@ -142,7 +221,7 @@ as.bactid <- function(x) {
       x[i] <- 'PSEAER'
       next
     }
-    if (tolower(x[i]) %like% 'coagulase'
+    if (tolower(x[i]) %like% 'coagulase negative'
         | tolower(x[i]) %like% 'cns'
         | tolower(x[i]) %like% 'cons') {
       # coerce S. coagulase negative, also as CNS and CoNS
@@ -192,7 +271,14 @@ as.bactid <- function(x) {
       next
     }
 
-    # try any match
+    # try any match keeping spaces
+    found <- AMR::microorganisms[which(AMR::microorganisms$fullname %like% x_withspaces[i]),]$bactid
+    if (length(found) > 0) {
+      x[i] <- found[1L]
+      next
+    }
+
+    # try any match diregarding spaces
     found <- AMR::microorganisms[which(AMR::microorganisms$fullname %like% x[i]),]$bactid
     if (length(found) > 0) {
       x[i] <- found[1L]
@@ -200,7 +286,7 @@ as.bactid <- function(x) {
     }
 
     # try exact match of only genus, with 'species' attached
-    # (e.g. this prevents Streptococcus for becoming Peptostreptococcus, since "p" < "s")
+    # (this prevents Streptococcus from becoming Peptostreptococcus, since "p" < "s")
     found <- AMR::microorganisms[which(AMR::microorganisms$fullname == x_species[i]),]$bactid
     if (length(found) > 0) {
       x[i] <- found[1L]
diff --git a/R/data.R b/R/data.R
index a83b798c..aee20e3e 100755
--- a/R/data.R
+++ b/R/data.R
@@ -236,8 +236,8 @@
 
 #' Dataset with ~2500 microorganisms
 #'
-#' A dataset containing 2453 microorganisms. MO codes of the UMCG can be looked up using \code{\link{microorganisms.umcg}}.
-#' @format A data.frame with 2453 observations and 12 variables:
+#' A dataset containing 2456 microorganisms. MO codes of the UMCG can be looked up using \code{\link{microorganisms.umcg}}.
+#' @format A data.frame with 2456 observations and 12 variables:
 #' \describe{
 #'   \item{\code{bactid}}{ID of microorganism}
 #'   \item{\code{bactsys}}{Bactsyscode of microorganism}
diff --git a/README.md b/README.md
index 805465f6..b68843fc 100755
--- a/README.md
+++ b/README.md
@@ -33,6 +33,8 @@ With `AMR` you can:
     * Universal: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin,  trimethoprim/sulfamethoxazole
     * Specific for Gram-positives: vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
     * Specific for Gram-negatives: gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem
+* Categorise *Staphylococci* into Coagulase Negative *Staphylococci* (CoNS) and Coagulase Positve *Staphylococci* (CoPS) according to [Karsten Becker *et al.*](https://www.ncbi.nlm.nih.gov/pmc/articles/pmid/25278577/)
+* Categorise *Streptococci* into Lancefield groups
 * Get antimicrobial ATC properties from the WHO Collaborating Centre for Drug Statistics Methodology ([WHOCC](https://www.whocc.no/atc_ddd_methodology/who_collaborating_centre/)), to be able to:
   * Translate antibiotic codes (like *AMOX*), official names (like *amoxicillin*) and even trade names (like *Amoxil* or *Trimox*) to an [ATC code](https://www.whocc.no/atc_ddd_index/?code=J01CA04&showdescription=no) (like *J01CA04*) and vice versa with the `abname` function
   * Get the latest antibiotic properties like hierarchic groups and [defined daily dose](https://en.wikipedia.org/wiki/Defined_daily_dose) (DDD) with units and administration form from the WHOCC website with the `atc_property` function
diff --git a/data/microorganisms.rda b/data/microorganisms.rda
index d66b105d..a540ee69 100755
Binary files a/data/microorganisms.rda and b/data/microorganisms.rda differ
diff --git a/man/as.bactid.Rd b/man/as.bactid.Rd
index ebd58f68..4bf99cc7 100644
--- a/man/as.bactid.Rd
+++ b/man/as.bactid.Rd
@@ -5,15 +5,25 @@
 \alias{guess_bactid}
 \alias{is.bactid}
 \title{Transform to bacteria ID}
+\source{
+[1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \cr
+    \url{https://dx.doi.org/10.1128/CMR.00109-13} \cr
+[2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \cr
+    \url{https://dx.doi.org/10.1084/jem.57.4.571}
+}
 \usage{
-as.bactid(x)
+as.bactid(x, Becker = FALSE, Lancefield = FALSE)
 
-guess_bactid(x)
+guess_bactid(x, Becker = FALSE, Lancefield = FALSE)
 
 is.bactid(x)
 }
 \arguments{
 \item{x}{a character vector or a dataframe with one or two columns}
+
+\item{Becker}{a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".}
+
+\item{Lancefield}{a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, i.e. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. Groups D and E will be ignored, since they are \emph{Enterococci}.}
 }
 \value{
 Character (vector) with class \code{"bactid"}. Unknown values will return \code{NA}.
@@ -22,9 +32,9 @@ Character (vector) with class \code{"bactid"}. Unknown values will return \code{
 Use this function to determine a valid ID based on a genus (and species). This input can be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
 }
 \details{
-\code{guess_bactid} does exactly the same as \code{as.bactid}.
+\code{guess_bactid} is an alias of \code{as.bactid}.
 
-Some exceptions have been built in to get more logical results, based on prevalence of human pathogens. For example:
+Some exceptions have been built in to get more logical results, based on prevalence of human pathogens. These are:
 \itemize{
   \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
   \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae}}
@@ -46,6 +56,12 @@ as.bactid("MRSA") # Methicillin Resistant S. aureus
 as.bactid("VISA") # Vancomycin Intermediate S. aureus
 as.bactid("VRSA") # Vancomycin Resistant S. aureus
 
+guess_bactid("S. epidermidis")                 # will remain species: STAEPI
+guess_bactid("S. epidermidis", Becker = TRUE)  # will not remain species: STACNS
+
+guess_bactid("S. pyogenes")                    # will remain species: STCAGA
+guess_bactid("S. pyogenes", Lancefield = TRUE) # will not remain species: STCGRA
+
 \dontrun{
 df$bactid <- as.bactid(df$microorganism_name)
 
diff --git a/man/microorganisms.Rd b/man/microorganisms.Rd
index 2dd3a2d6..48a792d3 100755
--- a/man/microorganisms.Rd
+++ b/man/microorganisms.Rd
@@ -4,7 +4,7 @@
 \name{microorganisms}
 \alias{microorganisms}
 \title{Dataset with ~2500 microorganisms}
-\format{A data.frame with 2453 observations and 12 variables:
+\format{A data.frame with 2456 observations and 12 variables:
 \describe{
   \item{\code{bactid}}{ID of microorganism}
   \item{\code{bactsys}}{Bactsyscode of microorganism}
@@ -23,7 +23,7 @@
 microorganisms
 }
 \description{
-A dataset containing 2453 microorganisms. MO codes of the UMCG can be looked up using \code{\link{microorganisms.umcg}}.
+A dataset containing 2456 microorganisms. MO codes of the UMCG can be looked up using \code{\link{microorganisms.umcg}}.
 }
 \seealso{
 \code{\link{guess_bactid}} \code{\link{antibiotics}} \code{\link{microorganisms.umcg}}
diff --git a/tests/testthat/test-bactid.R b/tests/testthat/test-bactid.R
index 38ff93d9..73b32342 100644
--- a/tests/testthat/test-bactid.R
+++ b/tests/testthat/test-bactid.R
@@ -30,6 +30,33 @@ test_that("as.bactid works", {
                      "VISA"))),
     rep("STAAUR", 8))
 
+  # check for Becker classification
+  expect_identical(as.character(guess_bactid("S. epidermidis", Becker = FALSE)), "STAEPI")
+  expect_identical(as.character(guess_bactid("S. epidermidis", Becker = TRUE)),  "STACNS")
+  expect_identical(as.character(guess_bactid("STAEPI",         Becker = TRUE)),  "STACNS")
+  expect_identical(as.character(guess_bactid("S. intermedius", Becker = FALSE)), "STAINT")
+  expect_identical(as.character(guess_bactid("S. intermedius", Becker = TRUE)),  "STACPS")
+  expect_identical(as.character(guess_bactid("STAINT",         Becker = TRUE)),  "STACPS")
+  # aureus must only be influenced if Becker = "all"
+  expect_identical(as.character(guess_bactid("STAAUR", Becker = FALSE)), "STAAUR")
+  expect_identical(as.character(guess_bactid("STAAUR", Becker = TRUE)),  "STAAUR")
+  expect_identical(as.character(guess_bactid("STAAUR", Becker = "all")), "STACPS")
+
+  # check for Lancefield classification
+  expect_identical(as.character(guess_bactid("S. pyogenes", Lancefield = FALSE)), "STCPYO")
+  expect_identical(as.character(guess_bactid("S. pyogenes", Lancefield = TRUE)),  "STCGRA")
+  expect_identical(as.character(guess_bactid("STCPYO",      Lancefield = TRUE)),  "STCGRA")
+  expect_identical(as.character(guess_bactid("S. agalactiae",  Lancefield = FALSE)),  "STCAGA")
+  expect_identical(as.character(guess_bactid("S. agalactiae",  Lancefield = TRUE)),   "STCGRB") # group B
+  expect_identical(as.character(guess_bactid("S. equisimilis", Lancefield = FALSE)),  "STCEQS")
+  expect_identical(as.character(guess_bactid("S. equisimilis", Lancefield = TRUE)),   "STCGRC") # group C
+  expect_identical(as.character(guess_bactid("S. anginosus",   Lancefield = FALSE)),  "STCANG")
+  expect_identical(as.character(guess_bactid("S. anginosus",   Lancefield = TRUE)),   "STCGRF") # group F
+  expect_identical(as.character(guess_bactid("S. sanguis",     Lancefield = FALSE)),  "STCSAN")
+  expect_identical(as.character(guess_bactid("S. sanguis",     Lancefield = TRUE)),   "STCGRH") # group H
+  expect_identical(as.character(guess_bactid("S. salivarius",  Lancefield = FALSE)),  "STCSAL")
+  expect_identical(as.character(guess_bactid("S. salivarius",  Lancefield = TRUE)),   "STCGRK") # group K
+
   # select with one column
   expect_identical(
     septic_patients[1:10,] %>%
diff --git a/tests/testthat/test-first_isolate.R b/tests/testthat/test-first_isolate.R
index 8f37f4f7..4f13ee73 100755
--- a/tests/testthat/test-first_isolate.R
+++ b/tests/testthat/test-first_isolate.R
@@ -1,7 +1,7 @@
 context("first_isolate.R")
 
 test_that("first isolates work", {
-  # septic_patients contains 1959 out of 2000 first isolates
+  # septic_patients contains 1331 out of 2000 first isolates
   expect_equal(
     sum(
       first_isolate(tbl = septic_patients,
@@ -10,9 +10,9 @@ test_that("first isolates work", {
                     col_bactid = "bactid",
                     info = TRUE),
       na.rm = TRUE),
-    1326)
+    1331)
 
-  # septic_patients contains 1962 out of 2000 first *weighted* isolates
+  # septic_patients contains 1426 out of 2000 first *weighted* isolates
   expect_equal(
     suppressWarnings(
       sum(
@@ -24,8 +24,8 @@ test_that("first isolates work", {
                       type = "keyantibiotics",
                       info = TRUE),
         na.rm = TRUE)),
-    1421)
-  # and 1961 when using points
+    1426)
+  # and 1430 when using points
   expect_equal(
     suppressWarnings(
       sum(
@@ -37,9 +37,9 @@ test_that("first isolates work", {
                       type = "points",
                       info = TRUE),
         na.rm = TRUE)),
-    1425)
+    1430)
 
-  # septic_patients contains 1732 out of 2000 first non-ICU isolates
+  # septic_patients contains 1176 out of 2000 first non-ICU isolates
   expect_equal(
     sum(
       first_isolate(septic_patients,
@@ -50,7 +50,7 @@ test_that("first isolates work", {
                     info = TRUE,
                     icu_exclude = TRUE),
       na.rm = TRUE),
-    1171)
+    1176)
 
   # set 1500 random observations to be of specimen type 'Urine'
   random_rows <- sample(x = 1:2000, size = 1500, replace = FALSE)