So only 91% is suitable for resistance analysis! We can now filter on
it with the filter()
function, also from the
dplyr
package:
So we end up with 2 764 isolates for analysis. Now our data looks
+
So we end up with 2 724 isolates for analysis. Now our data looks
like:
Time for the analysis.
@@ -438,44 +438,44 @@ impression, as it comes with support for the new
summary(our_data_1st)
#> patient_id hospital date
-#> Length:2764 Length:2764 Min. :2011-01-01
-#> Class :character Class :character 1st Qu.:2013-04-18
-#> Mode :character Mode :character Median :2015-06-07
-#> Mean :2015-06-16
-#> 3rd Qu.:2017-08-23
-#> Max. :2020-01-01
+#> Length:2724 Length:2724 Min. :2011-01-01
+#> Class :character Class :character 1st Qu.:2013-04-07
+#> Mode :character Mode :character Median :2015-06-03
+#> Mean :2015-06-09
+#> 3rd Qu.:2017-08-11
+#> Max. :2019-12-27
#> bacteria AMX AMC
#> Class :mo Class:sir Class:sir
-#> <NA> :0 %S :42.1% (n=1163) %S :53.0% (n=1464)
-#> Unique:5 %SDD : 0.0% (n=0) %SDD : 0.0% (n=0)
-#> #1 :B_ESCHR_COLI %I :16.2% (n=447) %I :12.2% (n=337)
-#> #2 :B_STPHY_AURS %R :41.8% (n=1154) %R :34.8% (n=963)
-#> #3 :B_ENTRC_CRTL %NI : 0.0% (n=0) %NI : 0.0% (n=0)
+#> <NA> :0 %S :41.6% (n=1133) %S :52.6% (n=1432)
+#> Unique:4 %SDD : 0.0% (n=0) %SDD : 0.0% (n=0)
+#> #1 :B_ESCHR_COLI %I :16.4% (n=446) %I :12.2% (n=333)
+#> #2 :B_STPHY_AURS %R :42.0% (n=1145) %R :35.2% (n=959)
+#> #3 :B_STRPT_PNMN %NI : 0.0% (n=0) %NI : 0.0% (n=0)
#> CIP GEN first
#> Class:sir Class:sir Mode:logical
-#> %S :52.9% (n=1462) %S :61.5% (n=1699) TRUE:2764
+#> %S :52.5% (n=1431) %S :61.0% (n=1661) TRUE:2724
#> %SDD : 0.0% (n=0) %SDD : 0.0% (n=0)
-#> %I : 6.3% (n=174) %I : 3.0% (n=82)
-#> %R :40.8% (n=1128) %R :35.6% (n=983)
+#> %I : 6.5% (n=176) %I : 3.0% (n=82)
+#> %R :41.0% (n=1117) %R :36.0% (n=981)
#> %NI : 0.0% (n=0) %NI : 0.0% (n=0)
glimpse(our_data_1st)
-#> Rows: 2,764
+#> Rows: 2,724
#> Columns: 9
-#> $ patient_id <chr> "J3", "R7", "P10", "B7", "W3", "M3", "J3", "G6", "P4", "Z1"…
+#> $ patient_id <chr> "J3", "R7", "P3", "P10", "B7", "W3", "M3", "J3", "G6", "P4"…
#> $ hospital <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",…
-#> $ date <date> 2012-11-21, 2018-04-03, 2015-12-10, 2015-03-02, 2018-03-31…
-#> $ bacteria <mo> "B_ENTRC_CRTL", "B_KLBSL_PNMN", "B_ENTRC_CRTL", "B_ENTRC_CRT…
-#> $ AMX <sir> R, R, S, S, R, R, S, S, S, S, R, S, S, S, R, R, R, R, S, R,…
-#> $ AMC <sir> I, I, I, S, S, S, S, S, S, S, S, S, I, S, S, S, S, R, S, S,…
-#> $ CIP <sir> S, S, S, S, R, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S,…
-#> $ GEN <sir> S, S, S, S, S, S, S, S, S, S, R, S, S, S, S, S, S, S, S, S,…
+#> $ date <date> 2012-11-21, 2018-04-03, 2014-09-19, 2015-12-10, 2015-03-02…
+#> $ bacteria <mo> "B_ESCHR_COLI", "B_KLBSL_PNMN", "B_ESCHR_COLI", "B_ESCHR_COL…
+#> $ AMX <sir> R, R, R, S, S, R, R, S, S, S, S, R, S, S, R, R, R, R, S, R,…
+#> $ AMC <sir> I, I, S, I, S, S, S, S, S, S, S, S, S, S, S, S, S, R, S, S,…
+#> $ CIP <sir> S, S, S, S, S, R, S, S, S, S, S, S, S, S, S, S, S, S, S, S,…
+#> $ GEN <sir> S, S, S, S, S, S, S, S, S, S, S, R, S, S, S, S, S, S, S, S,…
#> $ first <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,…
# number of unique values per column:
sapply(our_data_1st, n_distinct)
#> patient_id hospital date bacteria AMX AMC CIP
-#> 260 3 1877 5 3 3 3
+#> 260 3 1854 4 3 3 3
#> GEN first
#> 3 1