This commit is contained in:
Peter Kleiweg
2026-06-18 12:52:40 +02:00
parent a8bea0ab44
commit 01e6d48665
13 changed files with 15363 additions and 8 deletions

28
r/test-count.R Normal file
View File

@@ -0,0 +1,28 @@
nw <- read.table('data/2026/algemeen-count-per-2026.23-1', sep="\t", quote="", encoding="utf-8", col.names=c("f", "word"))
od <- read.table('data/2026/algemeen-count-per-2026.22-4', sep="\t", quote="", encoding="utf-8", col.names=c("f", "word"))
words <- unique(c(od$word, nw$word))
o <- order(words)
words <- words[o]
n <- length(words)
d <- data.frame(word=words, od=rep(0, n), nw=rep(0, n))
for (word in nw$word) {
d$nw[d$word==word] <- nw$f[nw$word==word]
}
for (word in od$word) {
d$od[d$word==word] <- od$f[od$word==word]
}
write.table(d, file="tmp.csv", quote=FALSE, sep="\t", row.names=FALSE, fileEncoding="utf-8")
####
oud <- d$od
nieuw <- d$nw
oud[oud == 0] <- 0.5
nieuw[nieuw == 0] <- 0.5
plot(log(oud), log(nieuw))
lines(log(range(oud)), log(range(nieuw)))
identify(log(oud), log(nieuw), labels=words)

32
r/test-rang.R Normal file
View File

@@ -0,0 +1,32 @@
nw <- read.table('data/2026/algemeen-rang-per-2026.23-1', sep="\t", quote="", encoding="utf-8", col.names=c("f", "word"))
od <- read.table('data/2026/algemeen-rang-per-2026.22-4', sep="\t", quote="", encoding="utf-8", col.names=c("f", "word"))
words <- unique(c(od$word, nw$word))
o <- order(words)
words <- words[o]
n <- length(words)
d <- data.frame(word=words, od=rep(NA, n), nw=rep(NA, n))
for (word in nw$word) {
d$nw[d$word==word] <- nw$f[nw$word==word]
}
for (word in od$word) {
d$od[d$word==word] <- od$f[od$word==word]
}
d$nw[is.na(d$nw)] <- max(nw$f) + 2
d$od[is.na(d$od)] <- max(od$f) + 2
myplot <- function(values, labels, titel="", sub ="") {
y <- 1:length(values)
xx <- range(values)
plot(values, y, xlim=c(xx[1], xx[2] + (xx[2]-xx[1]) / 5), xlab="score", ylab="index", main=titel, sub=sub)
text(values, y, labels, pos=4)
}
nwn <- (d$nw - 1) / (max(nw$f) + 1)
odn <- (d$od - 1) / (max(od$f) + 1)
v <- odn - nwn
o <- order(-v)
myplot(v[o][1:40], words[o][1:40], titel="score op basis van genormaliseerde rang", "week 23 t.o.v. week 19 t/m 22")