So only 90% is suitable for resistance analysis! We can now filter on
it with the filter()
function, also from the
dplyr
package:
So we end up with 2 626 isolates for analysis. Now our data looks
+
So we end up with 2 712 isolates for analysis. Now our data looks
like:
Time for the analysis.
@@ -557,44 +557,44 @@ impression, as it comes with support for the new
summary(our_data_1st)
#> patient_id hospital date
-#> Length:2626 Length:2626 Min. :2011-01-01
-#> Class :character Class :character 1st Qu.:2013-04-14
-#> Mode :character Mode :character Median :2015-06-05
-#> Mean :2015-06-15
-#> 3rd Qu.:2017-08-23
-#> Max. :2020-01-01
+#> Length:2712 Length:2712 Min. :2011-01-01
+#> Class :character Class :character 1st Qu.:2013-05-03
+#> Mode :character Mode :character Median :2015-06-16
+#> Mean :2015-06-21
+#> 3rd Qu.:2017-08-24
+#> Max. :2019-12-27
#> bacteria AMX AMC
#> Class :mo Class:sir Class:sir
-#> <NA> :0 %R :43.2% (n=1134) %R :36.1% (n=947)
-#> Unique:4 %SI :56.8% (n=1492) %SI :63.9% (n=1679)
-#> #1 :B_ESCHR_COLI - %S :41.1% (n=1080) - %S :52.7% (n=1383)
-#> #2 :B_STPHY_AURS - %I :15.7% (n=412) - %I :11.3% (n=296)
+#> <NA> :0 %R :42.9% (n=1163) %R :36.1% (n=978)
+#> Unique:4 %SI :57.1% (n=1549) %SI :63.9% (n=1734)
+#> #1 :B_ESCHR_COLI - %S :41.0% (n=1112) - %S :52.0% (n=1409)
+#> #2 :B_STPHY_AURS - %I :16.1% (n=437) - %I :12.0% (n=325)
#> #3 :B_STRPT_PNMN
#> CIP GEN first
#> Class:sir Class:sir Mode:logical
-#> %R :42.0% (n=1102) %R :37.0% (n=971) TRUE:2626
-#> %SI :58.0% (n=1524) %SI :63.0% (n=1655)
-#> - %S :51.9% (n=1362) - %S :59.9% (n=1574)
-#> - %I : 6.2% (n=162) - %I : 3.1% (n=81)
+#> %R :42.0% (n=1138) %R :37.3% (n=1011) TRUE:2712
+#> %SI :58.0% (n=1574) %SI :62.7% (n=1701)
+#> - %S :51.5% (n=1396) - %S :59.6% (n=1616)
+#> - %I : 6.6% (n=178) - %I : 3.1% (n=85)
#>
glimpse(our_data_1st)
-#> Rows: 2,626
+#> Rows: 2,712
#> Columns: 9
-#> $ patient_id <chr> "J3", "R7", "P10", "B7", "W3", "J8", "M3", "J3", "G6", "P4"…
+#> $ patient_id <chr> "J3", "R7", "P10", "B7", "W3", "M3", "J3", "G6", "P4", "Z1"…
#> $ hospital <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",…
#> $ date <date> 2012-11-21, 2018-04-03, 2015-12-10, 2015-03-02, 2018-03-31…
#> $ bacteria <mo> "B_ESCHR_COLI", "B_KLBSL_PNMN", "B_ESCHR_COLI", "B_ESCHR_COL…
-#> $ AMX <sir> R, R, S, S, R, R, R, S, S, S, S, R, S, S, R, R, R, R, I, S,…
-#> $ AMC <sir> I, I, I, S, S, S, S, S, S, S, S, S, S, S, S, S, S, R, S, R,…
+#> $ AMX <sir> R, R, S, S, R, R, S, S, S, S, R, S, S, S, R, R, R, R, S, R,…
+#> $ AMC <sir> I, I, I, S, S, S, S, S, S, S, S, S, I, S, S, S, S, R, S, S,…
#> $ CIP <sir> S, S, S, S, R, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S,…
-#> $ GEN <sir> S, S, S, S, S, S, S, S, S, S, S, R, S, S, S, S, S, S, S, S,…
+#> $ GEN <sir> S, S, S, S, S, S, S, S, S, S, R, S, S, S, S, S, S, S, S, S,…
#> $ first <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,…
# number of unique values per column:
sapply(our_data_1st, n_distinct)
#> patient_id hospital date bacteria AMX AMC CIP
-#> 260 3 1808 4 3 3 3
+#> 260 3 1852 4 3 3 3
#> GEN first
#> 3 1