nw <- read.table('data/2026/algemeen-count-per-2026.23-1', sep="\t", quote="", encoding="utf-8", col.names=c("f", "word")) od <- read.table('data/2026/algemeen-count-per-2026.22-4', sep="\t", quote="", encoding="utf-8", col.names=c("f", "word")) words <- unique(c(od$word, nw$word)) o <- order(words) words <- words[o] n <- length(words) d <- data.frame(word=words, od=rep(0, n), nw=rep(0, n)) for (word in nw$word) { d$nw[d$word==word] <- nw$f[nw$word==word] } for (word in od$word) { d$od[d$word==word] <- od$f[od$word==word] } write.table(d, file="tmp.csv", quote=FALSE, sep="\t", row.names=FALSE, fileEncoding="utf-8") #### oud <- d$od nieuw <- d$nw oud[oud == 0] <- 0.5 nieuw[nieuw == 0] <- 0.5 plot(log(oud), log(nieuw)) lines(log(range(oud)), log(range(nieuw))) identify(log(oud), log(nieuw), labels=words)