update stijgers2json.py

This commit is contained in:
Peter Kleiweg
2026-06-20 12:41:48 +02:00
parent c2389c65af
commit ce8ed07327
2 changed files with 22 additions and 14 deletions

View File

@@ -56,17 +56,24 @@ oudfile = sys.argv[1]
nieuwfile = sys.argv[2]
jsonfile = sys.argv[3]
extras = {}
counts_recent = {}
counts_reference = {}
with open(oudfile, "rt", encoding="utf-8") as fp:
for line in fp:
aa = line.split("\t")
counts_reference[aa[1].strip()] = int(aa[0])
w = aa[1].strip()
counts_reference[w] = int(aa[0])
if len(aa) > 2:
extras[w] = '\t'.join(aa[2:]).strip()
with open(nieuwfile, "rt", encoding="utf-8") as fp:
for line in fp:
aa = line.split("\t")
counts_recent[aa[1].strip()] = int(aa[0])
w = aa[1].strip()
counts_recent[w] = int(aa[0])
if len(aa) > 2:
extras[w] = '\t'.join(aa[2:]).strip()
for key in counts_recent:
if not key in counts_reference:
@@ -90,6 +97,11 @@ _, p_adjusted, _, _ = multipletests(p_values, method="fdr_bh")
for r, p_adj in zip(results, p_adjusted):
r["p_g2_adjusted"] = p_adj
for i in range(len(results)):
results[i]["n"] = int(counts_recent[results[i]["word"]])
if len(extras) > 0:
results[i]["extras"] = extras.get(results[i]["word"], '')
o = {}
#o['up'] = sorted([x for x in results if x['p_g2'] < .05 and x['pct_diff'] > 0], key=lambda x: x['g2'], reverse=True)[:40]
#o['dn'] = sorted([x for x in results if x['p_g2'] < .05 and x['pct_diff'] < 0], key=lambda x: x['g2'], reverse=True)[:40]