This is basically it for the cleaning, time to start the data
inclusion.
@@ -404,9 +404,9 @@ the methods on the So only 92% is suitable for resistance analysis! We can now filter on
it with the filter()
function, also from the
dplyr
package:
So we end up with 2 724 isolates for analysis. Now our data looks
+
So we end up with 2 764 isolates for analysis. Now our data looks
like:
Time for the analysis.
@@ -446,34 +446,34 @@ impression, as it comes with support for the new
summary(our_data_1st)
#> patient_id hospital date
-#> Length:2724 Length:2724 Min. :2011-01-01
-#> Class :character Class :character 1st Qu.:2013-04-26
-#> Mode :character Mode :character Median :2015-06-14
-#> Mean :2015-06-18
-#> 3rd Qu.:2017-08-21
-#> Max. :2019-12-27
+#> Length:2764 Length:2764 Min. :2011-01-01
+#> Class :character Class :character 1st Qu.:2013-04-18
+#> Mode :character Mode :character Median :2015-06-07
+#> Mean :2015-06-16
+#> 3rd Qu.:2017-08-23
+#> Max. :2020-01-01
#> bacteria AMX AMC
#> Class :mo Class:sir Class:sir
-#> <NA> :0 %S :41.4% (n=1127) %S :52.2% (n=1421)
+#> <NA> :0 %S :42.1% (n=1163) %S :53.0% (n=1464)
#> Unique:5 %SDD : 0.0% (n=0) %SDD : 0.0% (n=0)
-#> #1 :B_ESCHR_COLI %I :16.1% (n=438) %I :12.1% (n=329)
-#> #2 :B_STPHY_AURS %R :42.5% (n=1159) %R :35.8% (n=974)
-#> #3 :B_STRPT_PNMN %NI : 0.0% (n=0) %NI : 0.0% (n=0)
+#> #1 :B_ESCHR_COLI %I :16.2% (n=447) %I :12.2% (n=337)
+#> #2 :B_STPHY_AURS %R :41.8% (n=1154) %R :34.8% (n=963)
+#> #3 :B_ENTRC_CRTL %NI : 0.0% (n=0) %NI : 0.0% (n=0)
#> CIP GEN first
#> Class:sir Class:sir Mode:logical
-#> %S :51.7% (n=1407) %S :59.9% (n=1632) TRUE:2724
+#> %S :52.9% (n=1462) %S :61.5% (n=1699) TRUE:2764
#> %SDD : 0.0% (n=0) %SDD : 0.0% (n=0)
-#> %I : 6.5% (n=178) %I : 3.1% (n=85)
-#> %R :41.8% (n=1139) %R :37.0% (n=1007)
+#> %I : 6.3% (n=174) %I : 3.0% (n=82)
+#> %R :40.8% (n=1128) %R :35.6% (n=983)
#> %NI : 0.0% (n=0) %NI : 0.0% (n=0)
glimpse(our_data_1st)
-#> Rows: 2,724
+#> Rows: 2,764
#> Columns: 9
#> $ patient_id <chr> "J3", "R7", "P10", "B7", "W3", "M3", "J3", "G6", "P4", "Z1"…
#> $ hospital <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A",…
#> $ date <date> 2012-11-21, 2018-04-03, 2015-12-10, 2015-03-02, 2018-03-31…
-#> $ bacteria <mo> "B_ESCHR_COLI", "B_KLBSL_PNMN", "B_ESCHR_COLI", "B_ESCHR_COL…
+#> $ bacteria <mo> "B_ENTRC_CRTL", "B_KLBSL_PNMN", "B_ENTRC_CRTL", "B_ENTRC_CRT…
#> $ AMX <sir> R, R, S, S, R, R, S, S, S, S, R, S, S, S, R, R, R, R, S, R,…
#> $ AMC <sir> I, I, I, S, S, S, S, S, S, S, S, S, I, S, S, S, S, R, S, S,…
#> $ CIP <sir> S, S, S, S, R, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S,…
@@ -483,7 +483,7 @@ impression, as it comes with support for the new mo
and
# number of unique values per column:
sapply(our_data_1st, n_distinct)
#> patient_id hospital date bacteria AMX AMC CIP
-#> 260 3 1853 5 3 3 3
+#> 260 3 1877 5 3 3 3
#> GEN first
#> 3 1