Files
nlnieuws/python/namen.ipynb
Peter Kleiweg 01e6d48665 stijgers
2026-06-18 12:52:40 +02:00

1564 lines
59 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "eec04efd-3f8f-4032-8038-83d61bff1792",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from scipy.stats import chi2_contingency\n",
"from statsmodels.stats.multitest import multipletests\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "e3fcbc08-25aa-47fd-bee3-b468982bf04b",
"metadata": {},
"outputs": [],
"source": [
"def corpus_stats(word, counts_recent, counts_reference, total_recent, total_reference):\n",
" \"\"\"\n",
" word : the word being tested\n",
" counts_recent : raw count in week 5\n",
" counts_reference : raw count in weeks 1-4\n",
" total_recent : total tokens in week 5\n",
" total_reference : total tokens in weeks 1-4\n",
" \"\"\"\n",
" a = counts_recent # word in recent\n",
" b = counts_reference # word in reference\n",
" c = total_recent - a # non-word in recent\n",
" d = total_reference - b # non-word in reference\n",
"\n",
" contingency = np.array([[a, b],\n",
" [c, d]])\n",
"\n",
" # --- Chi-Squared ---\n",
" chi2_stat, p_chi2, _, _ = chi2_contingency(contingency, correction=False)\n",
"\n",
" # --- Log-Likelihood (G²) ---\n",
" # G² = 2 * sum(observed * log(observed / expected))\n",
" # scipy's chi2_contingency with lambda_=\"log-likelihood\" computes this\n",
" g2_stat, p_g2, _, _ = chi2_contingency(contingency, lambda_=\"log-likelihood\")\n",
"\n",
" # --- Effect sizes ---\n",
" freq_recent = a / total_recent\n",
" freq_reference = b / total_reference\n",
"\n",
" pct_diff = (freq_recent - freq_reference) / freq_reference * 100\n",
"\n",
" # Avoid log(0) with a small epsilon\n",
" eps = 1e-9\n",
" log_ratio = np.log2((freq_recent + eps) / (freq_reference + eps))\n",
"\n",
" return {\n",
" \"word\": word,\n",
" \"freq_recent\": freq_recent,\n",
" \"freq_reference\": freq_reference,\n",
" \"pct_diff\": pct_diff,\n",
" \"log_ratio\": log_ratio,\n",
" \"chi2\": chi2_stat,\n",
" \"p_chi2\": p_chi2,\n",
" \"g2\": g2_stat,\n",
" \"p_g2\": p_g2,\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2888f740-1539-4f62-a13e-73512a2be290",
"metadata": {},
"outputs": [],
"source": [
"counts_recent = {}\n",
"counts_reference = {}\n",
"with open(\"data.txt\", \"rt\", encoding=\"utf-8\") as fp:\n",
" for line in fp:\n",
" aa = line.split(\"\\t\")\n",
" counts_reference[aa[0]] = max(int(aa[1]), 0.5)\n",
" counts_recent[aa[0]] = max(int(aa[2]), 0.5)\n",
"total_recent = sum(counts_recent.values())\n",
"total_reference = sum(counts_reference.values())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5ef99c2b-50d9-450b-854c-bfa9d688c63b",
"metadata": {},
"outputs": [],
"source": [
"results = [\n",
" corpus_stats(word, counts_recent[word], counts_reference.get(word, 0),\n",
" total_recent, total_reference)\n",
" for word in counts_recent]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "285064c8-bc52-4096-9aa5-bc57115fb813",
"metadata": {},
"outputs": [],
"source": [
"# FDR correction across all words\n",
"p_values = [r[\"p_g2\"] for r in results]\n",
"_, p_adjusted, _, _ = multipletests(p_values, method=\"fdr_bh\")\n",
"\n",
"for r, p_adj in zip(results, p_adjusted):\n",
" r[\"p_g2_adjusted\"] = p_adj"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c0146062-608a-40dd-80b7-742df67ca9ee",
"metadata": {},
"outputs": [],
"source": [
"results = pd.DataFrame(results)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8805479d-461f-4e56-8b85-b5aa65fc7f33",
"metadata": {},
"outputs": [],
"source": [
"#pd.set_option('display.max_rows', None)\n",
"#pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "f992bc03-6529-4521-9c98-3f7fe36eb7e8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>word</th>\n",
" <th>freq_recent</th>\n",
" <th>freq_reference</th>\n",
" <th>pct_diff</th>\n",
" <th>log_ratio</th>\n",
" <th>chi2</th>\n",
" <th>p_chi2</th>\n",
" <th>g2</th>\n",
" <th>p_g2</th>\n",
" <th>p_g2_adjusted</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4666</th>\n",
" <td>Ye</td>\n",
" <td>0.009191</td>\n",
" <td>0.001412</td>\n",
" <td>551.030639</td>\n",
" <td>2.702725</td>\n",
" <td>88.873013</td>\n",
" <td>4.209875e-21</td>\n",
" <td>73.611094</td>\n",
" <td>9.512852e-18</td>\n",
" <td>2.265010e-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2233</th>\n",
" <td>Kanye West</td>\n",
" <td>0.006175</td>\n",
" <td>0.001150</td>\n",
" <td>436.822849</td>\n",
" <td>2.424445</td>\n",
" <td>51.860997</td>\n",
" <td>5.957208e-13</td>\n",
" <td>42.512795</td>\n",
" <td>7.022134e-11</td>\n",
" <td>1.114647e-07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2521</th>\n",
" <td>Lieke Marsman</td>\n",
" <td>0.002298</td>\n",
" <td>0.000026</td>\n",
" <td>8688.913621</td>\n",
" <td>6.457558</td>\n",
" <td>41.675092</td>\n",
" <td>1.077733e-10</td>\n",
" <td>34.046050</td>\n",
" <td>5.382305e-09</td>\n",
" <td>6.407634e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>Ahmed Marcouch</td>\n",
" <td>0.003016</td>\n",
" <td>0.000261</td>\n",
" <td>1053.544913</td>\n",
" <td>3.527997</td>\n",
" <td>38.897401</td>\n",
" <td>4.466750e-10</td>\n",
" <td>30.774198</td>\n",
" <td>2.898665e-08</td>\n",
" <td>2.760689e-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1343</th>\n",
" <td>Flavio Cobolli</td>\n",
" <td>0.002010</td>\n",
" <td>0.000026</td>\n",
" <td>7590.299418</td>\n",
" <td>6.264913</td>\n",
" <td>36.186456</td>\n",
" <td>1.793125e-09</td>\n",
" <td>29.023926</td>\n",
" <td>7.148986e-08</td>\n",
" <td>5.673912e-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4362</th>\n",
" <td>Van Dissel</td>\n",
" <td>0.002585</td>\n",
" <td>0.000209</td>\n",
" <td>1135.940978</td>\n",
" <td>3.627532</td>\n",
" <td>34.197157</td>\n",
" <td>4.980167e-09</td>\n",
" <td>26.757903</td>\n",
" <td>2.306067e-07</td>\n",
" <td>1.546231e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2797</th>\n",
" <td>Marsman</td>\n",
" <td>0.001867</td>\n",
" <td>0.000026</td>\n",
" <td>7040.992317</td>\n",
" <td>6.157998</td>\n",
" <td>33.443641</td>\n",
" <td>7.335924e-09</td>\n",
" <td>26.527883</td>\n",
" <td>2.597616e-07</td>\n",
" <td>1.546231e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2503</th>\n",
" <td>Lewis Hamilton</td>\n",
" <td>0.004021</td>\n",
" <td>0.000784</td>\n",
" <td>412.686628</td>\n",
" <td>2.358076</td>\n",
" <td>32.500200</td>\n",
" <td>1.191803e-08</td>\n",
" <td>26.104423</td>\n",
" <td>3.234422e-07</td>\n",
" <td>1.711368e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>673</th>\n",
" <td>Charles Leclerc</td>\n",
" <td>0.002872</td>\n",
" <td>0.000418</td>\n",
" <td>586.633877</td>\n",
" <td>2.779538</td>\n",
" <td>28.670638</td>\n",
" <td>8.579474e-08</td>\n",
" <td>22.442992</td>\n",
" <td>2.164739e-06</td>\n",
" <td>9.371352e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1858</th>\n",
" <td>Jaap van Dissel</td>\n",
" <td>0.002872</td>\n",
" <td>0.000418</td>\n",
" <td>586.633877</td>\n",
" <td>2.779538</td>\n",
" <td>28.670638</td>\n",
" <td>8.579474e-08</td>\n",
" <td>22.442992</td>\n",
" <td>2.164739e-06</td>\n",
" <td>9.371352e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4516</th>\n",
" <td>Vollering</td>\n",
" <td>0.001867</td>\n",
" <td>0.000105</td>\n",
" <td>1685.248079</td>\n",
" <td>4.158040</td>\n",
" <td>27.590589</td>\n",
" <td>1.499064e-07</td>\n",
" <td>21.070561</td>\n",
" <td>4.426770e-06</td>\n",
" <td>1.716105e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2923</th>\n",
" <td>Mette-Marit</td>\n",
" <td>0.004595</td>\n",
" <td>0.001307</td>\n",
" <td>251.556545</td>\n",
" <td>1.813756</td>\n",
" <td>25.318681</td>\n",
" <td>4.859791e-07</td>\n",
" <td>20.713510</td>\n",
" <td>5.333836e-06</td>\n",
" <td>1.716105e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1801</th>\n",
" <td>Ingrid Alexandra</td>\n",
" <td>0.002010</td>\n",
" <td>0.000157</td>\n",
" <td>1181.716570</td>\n",
" <td>3.679997</td>\n",
" <td>26.935070</td>\n",
" <td>2.104062e-07</td>\n",
" <td>20.564349</td>\n",
" <td>5.765997e-06</td>\n",
" <td>1.716105e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3466</th>\n",
" <td>Pols</td>\n",
" <td>0.002010</td>\n",
" <td>0.000157</td>\n",
" <td>1181.716570</td>\n",
" <td>3.679997</td>\n",
" <td>26.935070</td>\n",
" <td>2.104062e-07</td>\n",
" <td>20.564349</td>\n",
" <td>5.765997e-06</td>\n",
" <td>1.716105e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2717</th>\n",
" <td>Marcouch</td>\n",
" <td>0.002441</td>\n",
" <td>0.000314</td>\n",
" <td>678.185060</td>\n",
" <td>2.960109</td>\n",
" <td>26.234266</td>\n",
" <td>3.024086e-07</td>\n",
" <td>20.256468</td>\n",
" <td>6.772409e-06</td>\n",
" <td>1.897071e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1053</th>\n",
" <td>Donald Pols</td>\n",
" <td>0.002154</td>\n",
" <td>0.000261</td>\n",
" <td>723.960652</td>\n",
" <td>3.042571</td>\n",
" <td>23.871944</td>\n",
" <td>1.029615e-06</td>\n",
" <td>18.209227</td>\n",
" <td>1.979176e-05</td>\n",
" <td>5.236021e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3971</th>\n",
" <td>Sjoerdsma</td>\n",
" <td>0.001867</td>\n",
" <td>0.000209</td>\n",
" <td>792.624040</td>\n",
" <td>3.158047</td>\n",
" <td>21.542862</td>\n",
" <td>3.460080e-06</td>\n",
" <td>16.178300</td>\n",
" <td>5.765077e-05</td>\n",
" <td>1.372665e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2734</th>\n",
" <td>Marianne Thieme</td>\n",
" <td>0.001149</td>\n",
" <td>0.000026</td>\n",
" <td>4294.456811</td>\n",
" <td>5.457559</td>\n",
" <td>19.755766</td>\n",
" <td>8.799584e-06</td>\n",
" <td>14.281240</td>\n",
" <td>1.574261e-04</td>\n",
" <td>2.404300e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2583</th>\n",
" <td>Lotte van Kruistum</td>\n",
" <td>0.001149</td>\n",
" <td>0.000026</td>\n",
" <td>4294.456811</td>\n",
" <td>5.457559</td>\n",
" <td>19.755766</td>\n",
" <td>8.799584e-06</td>\n",
" <td>14.281240</td>\n",
" <td>1.574261e-04</td>\n",
" <td>2.404300e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3003</th>\n",
" <td>Mirra Andreeva</td>\n",
" <td>0.001149</td>\n",
" <td>0.000026</td>\n",
" <td>4294.456811</td>\n",
" <td>5.457559</td>\n",
" <td>19.755766</td>\n",
" <td>8.799584e-06</td>\n",
" <td>14.281240</td>\n",
" <td>1.574261e-04</td>\n",
" <td>2.404300e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2351</th>\n",
" <td>Kluytmans</td>\n",
" <td>0.001149</td>\n",
" <td>0.000026</td>\n",
" <td>4294.456811</td>\n",
" <td>5.457559</td>\n",
" <td>19.755766</td>\n",
" <td>8.799584e-06</td>\n",
" <td>14.281240</td>\n",
" <td>1.574261e-04</td>\n",
" <td>2.404300e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>249</th>\n",
" <td>Antonia Niedermaier</td>\n",
" <td>0.001149</td>\n",
" <td>0.000026</td>\n",
" <td>4294.456811</td>\n",
" <td>5.457559</td>\n",
" <td>19.755766</td>\n",
" <td>8.799584e-06</td>\n",
" <td>14.281240</td>\n",
" <td>1.574261e-04</td>\n",
" <td>2.404300e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1791</th>\n",
" <td>Ilse Kuijt</td>\n",
" <td>0.001149</td>\n",
" <td>0.000026</td>\n",
" <td>4294.456811</td>\n",
" <td>5.457559</td>\n",
" <td>19.755766</td>\n",
" <td>8.799584e-06</td>\n",
" <td>14.281240</td>\n",
" <td>1.574261e-04</td>\n",
" <td>2.404300e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>742</th>\n",
" <td>Cobolli</td>\n",
" <td>0.001149</td>\n",
" <td>0.000026</td>\n",
" <td>4294.456811</td>\n",
" <td>5.457559</td>\n",
" <td>19.755766</td>\n",
" <td>8.799584e-06</td>\n",
" <td>14.281240</td>\n",
" <td>1.574261e-04</td>\n",
" <td>2.404300e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4724</th>\n",
" <td>Zoë Slagter</td>\n",
" <td>0.001149</td>\n",
" <td>0.000026</td>\n",
" <td>4294.456811</td>\n",
" <td>5.457559</td>\n",
" <td>19.755766</td>\n",
" <td>8.799584e-06</td>\n",
" <td>14.281240</td>\n",
" <td>1.574261e-04</td>\n",
" <td>2.404300e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>278</th>\n",
" <td>Arjan Veurink</td>\n",
" <td>0.001580</td>\n",
" <td>0.000157</td>\n",
" <td>907.063019</td>\n",
" <td>3.332074</td>\n",
" <td>19.268029</td>\n",
" <td>1.135926e-05</td>\n",
" <td>14.174484</td>\n",
" <td>1.666147e-04</td>\n",
" <td>2.404300e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3098</th>\n",
" <td>Naomi Mestrum</td>\n",
" <td>0.001292</td>\n",
" <td>0.000105</td>\n",
" <td>1135.940978</td>\n",
" <td>3.627525</td>\n",
" <td>17.091366</td>\n",
" <td>3.562389e-05</td>\n",
" <td>12.221543</td>\n",
" <td>4.724085e-04</td>\n",
" <td>6.103910e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2665</th>\n",
" <td>Maja Chwalinska</td>\n",
" <td>0.001580</td>\n",
" <td>0.000209</td>\n",
" <td>655.297264</td>\n",
" <td>2.917039</td>\n",
" <td>16.686682</td>\n",
" <td>4.408944e-05</td>\n",
" <td>12.214228</td>\n",
" <td>4.742643e-04</td>\n",
" <td>6.103910e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4420</th>\n",
" <td>Van der Breggen</td>\n",
" <td>0.001436</td>\n",
" <td>0.000157</td>\n",
" <td>815.511836</td>\n",
" <td>3.194570</td>\n",
" <td>16.772235</td>\n",
" <td>4.214548e-05</td>\n",
" <td>12.131568</td>\n",
" <td>4.957543e-04</td>\n",
" <td>6.212584e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1731</th>\n",
" <td>Hitler</td>\n",
" <td>0.001005</td>\n",
" <td>0.000026</td>\n",
" <td>3745.149709</td>\n",
" <td>5.264914</td>\n",
" <td>17.027730</td>\n",
" <td>3.683788e-05</td>\n",
" <td>11.905641</td>\n",
" <td>5.596421e-04</td>\n",
" <td>6.345276e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2519</th>\n",
" <td>Lieke</td>\n",
" <td>0.001005</td>\n",
" <td>0.000026</td>\n",
" <td>3745.149709</td>\n",
" <td>5.264914</td>\n",
" <td>17.027730</td>\n",
" <td>3.683788e-05</td>\n",
" <td>11.905641</td>\n",
" <td>5.596421e-04</td>\n",
" <td>6.345276e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4176</th>\n",
" <td>Teunissen</td>\n",
" <td>0.001005</td>\n",
" <td>0.000026</td>\n",
" <td>3745.149709</td>\n",
" <td>5.264914</td>\n",
" <td>17.027730</td>\n",
" <td>3.683788e-05</td>\n",
" <td>11.905641</td>\n",
" <td>5.596421e-04</td>\n",
" <td>6.345276e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4464</th>\n",
" <td>Veurink</td>\n",
" <td>0.001005</td>\n",
" <td>0.000026</td>\n",
" <td>3745.149709</td>\n",
" <td>5.264914</td>\n",
" <td>17.027730</td>\n",
" <td>3.683788e-05</td>\n",
" <td>11.905641</td>\n",
" <td>5.596421e-04</td>\n",
" <td>6.345276e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>968</th>\n",
" <td>Denzel Dumfries</td>\n",
" <td>0.002010</td>\n",
" <td>0.000418</td>\n",
" <td>380.643714</td>\n",
" <td>2.264965</td>\n",
" <td>15.359454</td>\n",
" <td>8.887510e-05</td>\n",
" <td>11.600092</td>\n",
" <td>6.594857e-04</td>\n",
" <td>7.303421e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2329</th>\n",
" <td>Kimi Antonelli</td>\n",
" <td>0.003447</td>\n",
" <td>0.001203</td>\n",
" <td>186.595009</td>\n",
" <td>1.519013</td>\n",
" <td>14.294065</td>\n",
" <td>1.563571e-04</td>\n",
" <td>11.539896</td>\n",
" <td>6.811844e-04</td>\n",
" <td>7.372272e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2465</th>\n",
" <td>Leclerc</td>\n",
" <td>0.001292</td>\n",
" <td>0.000157</td>\n",
" <td>723.960652</td>\n",
" <td>3.042567</td>\n",
" <td>14.318772</td>\n",
" <td>1.543183e-04</td>\n",
" <td>10.145781</td>\n",
" <td>1.446319e-03</td>\n",
" <td>1.306897e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1250</th>\n",
" <td>Esther Ouwehand</td>\n",
" <td>0.001292</td>\n",
" <td>0.000157</td>\n",
" <td>723.960652</td>\n",
" <td>3.042567</td>\n",
" <td>14.318772</td>\n",
" <td>1.543183e-04</td>\n",
" <td>10.145781</td>\n",
" <td>1.446319e-03</td>\n",
" <td>1.306897e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>713</th>\n",
" <td>Christine Teunissen</td>\n",
" <td>0.001149</td>\n",
" <td>0.000105</td>\n",
" <td>998.614203</td>\n",
" <td>3.457600</td>\n",
" <td>14.529034</td>\n",
" <td>1.380160e-04</td>\n",
" <td>10.122801</td>\n",
" <td>1.464463e-03</td>\n",
" <td>1.306897e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1917</th>\n",
" <td>Jan Kluytmans</td>\n",
" <td>0.001149</td>\n",
" <td>0.000105</td>\n",
" <td>998.614203</td>\n",
" <td>3.457600</td>\n",
" <td>14.529034</td>\n",
" <td>1.380160e-04</td>\n",
" <td>10.122801</td>\n",
" <td>1.464463e-03</td>\n",
" <td>1.306897e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2124</th>\n",
" <td>Joost Luiten</td>\n",
" <td>0.001149</td>\n",
" <td>0.000105</td>\n",
" <td>998.614203</td>\n",
" <td>3.457600</td>\n",
" <td>14.529034</td>\n",
" <td>1.380160e-04</td>\n",
" <td>10.122801</td>\n",
" <td>1.464463e-03</td>\n",
" <td>1.306897e-01</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" word freq_recent freq_reference pct_diff \\\n",
"4666 Ye 0.009191 0.001412 551.030639 \n",
"2233 Kanye West 0.006175 0.001150 436.822849 \n",
"2521 Lieke Marsman 0.002298 0.000026 8688.913621 \n",
"55 Ahmed Marcouch 0.003016 0.000261 1053.544913 \n",
"1343 Flavio Cobolli 0.002010 0.000026 7590.299418 \n",
"4362 Van Dissel 0.002585 0.000209 1135.940978 \n",
"2797 Marsman 0.001867 0.000026 7040.992317 \n",
"2503 Lewis Hamilton 0.004021 0.000784 412.686628 \n",
"673 Charles Leclerc 0.002872 0.000418 586.633877 \n",
"1858 Jaap van Dissel 0.002872 0.000418 586.633877 \n",
"4516 Vollering 0.001867 0.000105 1685.248079 \n",
"2923 Mette-Marit 0.004595 0.001307 251.556545 \n",
"1801 Ingrid Alexandra 0.002010 0.000157 1181.716570 \n",
"3466 Pols 0.002010 0.000157 1181.716570 \n",
"2717 Marcouch 0.002441 0.000314 678.185060 \n",
"1053 Donald Pols 0.002154 0.000261 723.960652 \n",
"3971 Sjoerdsma 0.001867 0.000209 792.624040 \n",
"2734 Marianne Thieme 0.001149 0.000026 4294.456811 \n",
"2583 Lotte van Kruistum 0.001149 0.000026 4294.456811 \n",
"3003 Mirra Andreeva 0.001149 0.000026 4294.456811 \n",
"2351 Kluytmans 0.001149 0.000026 4294.456811 \n",
"249 Antonia Niedermaier 0.001149 0.000026 4294.456811 \n",
"1791 Ilse Kuijt 0.001149 0.000026 4294.456811 \n",
"742 Cobolli 0.001149 0.000026 4294.456811 \n",
"4724 Zoë Slagter 0.001149 0.000026 4294.456811 \n",
"278 Arjan Veurink 0.001580 0.000157 907.063019 \n",
"3098 Naomi Mestrum 0.001292 0.000105 1135.940978 \n",
"2665 Maja Chwalinska 0.001580 0.000209 655.297264 \n",
"4420 Van der Breggen 0.001436 0.000157 815.511836 \n",
"1731 Hitler 0.001005 0.000026 3745.149709 \n",
"2519 Lieke 0.001005 0.000026 3745.149709 \n",
"4176 Teunissen 0.001005 0.000026 3745.149709 \n",
"4464 Veurink 0.001005 0.000026 3745.149709 \n",
"968 Denzel Dumfries 0.002010 0.000418 380.643714 \n",
"2329 Kimi Antonelli 0.003447 0.001203 186.595009 \n",
"2465 Leclerc 0.001292 0.000157 723.960652 \n",
"1250 Esther Ouwehand 0.001292 0.000157 723.960652 \n",
"713 Christine Teunissen 0.001149 0.000105 998.614203 \n",
"1917 Jan Kluytmans 0.001149 0.000105 998.614203 \n",
"2124 Joost Luiten 0.001149 0.000105 998.614203 \n",
"\n",
" log_ratio chi2 p_chi2 g2 p_g2 \\\n",
"4666 2.702725 88.873013 4.209875e-21 73.611094 9.512852e-18 \n",
"2233 2.424445 51.860997 5.957208e-13 42.512795 7.022134e-11 \n",
"2521 6.457558 41.675092 1.077733e-10 34.046050 5.382305e-09 \n",
"55 3.527997 38.897401 4.466750e-10 30.774198 2.898665e-08 \n",
"1343 6.264913 36.186456 1.793125e-09 29.023926 7.148986e-08 \n",
"4362 3.627532 34.197157 4.980167e-09 26.757903 2.306067e-07 \n",
"2797 6.157998 33.443641 7.335924e-09 26.527883 2.597616e-07 \n",
"2503 2.358076 32.500200 1.191803e-08 26.104423 3.234422e-07 \n",
"673 2.779538 28.670638 8.579474e-08 22.442992 2.164739e-06 \n",
"1858 2.779538 28.670638 8.579474e-08 22.442992 2.164739e-06 \n",
"4516 4.158040 27.590589 1.499064e-07 21.070561 4.426770e-06 \n",
"2923 1.813756 25.318681 4.859791e-07 20.713510 5.333836e-06 \n",
"1801 3.679997 26.935070 2.104062e-07 20.564349 5.765997e-06 \n",
"3466 3.679997 26.935070 2.104062e-07 20.564349 5.765997e-06 \n",
"2717 2.960109 26.234266 3.024086e-07 20.256468 6.772409e-06 \n",
"1053 3.042571 23.871944 1.029615e-06 18.209227 1.979176e-05 \n",
"3971 3.158047 21.542862 3.460080e-06 16.178300 5.765077e-05 \n",
"2734 5.457559 19.755766 8.799584e-06 14.281240 1.574261e-04 \n",
"2583 5.457559 19.755766 8.799584e-06 14.281240 1.574261e-04 \n",
"3003 5.457559 19.755766 8.799584e-06 14.281240 1.574261e-04 \n",
"2351 5.457559 19.755766 8.799584e-06 14.281240 1.574261e-04 \n",
"249 5.457559 19.755766 8.799584e-06 14.281240 1.574261e-04 \n",
"1791 5.457559 19.755766 8.799584e-06 14.281240 1.574261e-04 \n",
"742 5.457559 19.755766 8.799584e-06 14.281240 1.574261e-04 \n",
"4724 5.457559 19.755766 8.799584e-06 14.281240 1.574261e-04 \n",
"278 3.332074 19.268029 1.135926e-05 14.174484 1.666147e-04 \n",
"3098 3.627525 17.091366 3.562389e-05 12.221543 4.724085e-04 \n",
"2665 2.917039 16.686682 4.408944e-05 12.214228 4.742643e-04 \n",
"4420 3.194570 16.772235 4.214548e-05 12.131568 4.957543e-04 \n",
"1731 5.264914 17.027730 3.683788e-05 11.905641 5.596421e-04 \n",
"2519 5.264914 17.027730 3.683788e-05 11.905641 5.596421e-04 \n",
"4176 5.264914 17.027730 3.683788e-05 11.905641 5.596421e-04 \n",
"4464 5.264914 17.027730 3.683788e-05 11.905641 5.596421e-04 \n",
"968 2.264965 15.359454 8.887510e-05 11.600092 6.594857e-04 \n",
"2329 1.519013 14.294065 1.563571e-04 11.539896 6.811844e-04 \n",
"2465 3.042567 14.318772 1.543183e-04 10.145781 1.446319e-03 \n",
"1250 3.042567 14.318772 1.543183e-04 10.145781 1.446319e-03 \n",
"713 3.457600 14.529034 1.380160e-04 10.122801 1.464463e-03 \n",
"1917 3.457600 14.529034 1.380160e-04 10.122801 1.464463e-03 \n",
"2124 3.457600 14.529034 1.380160e-04 10.122801 1.464463e-03 \n",
"\n",
" p_g2_adjusted \n",
"4666 2.265010e-14 \n",
"2233 1.114647e-07 \n",
"2521 6.407634e-06 \n",
"55 2.760689e-05 \n",
"1343 5.673912e-05 \n",
"4362 1.546231e-04 \n",
"2797 1.546231e-04 \n",
"2503 1.711368e-04 \n",
"673 9.371352e-04 \n",
"1858 9.371352e-04 \n",
"4516 1.716105e-03 \n",
"2923 1.716105e-03 \n",
"1801 1.716105e-03 \n",
"3466 1.716105e-03 \n",
"2717 1.897071e-03 \n",
"1053 5.236021e-03 \n",
"3971 1.372665e-02 \n",
"2734 2.404300e-02 \n",
"2583 2.404300e-02 \n",
"3003 2.404300e-02 \n",
"2351 2.404300e-02 \n",
"249 2.404300e-02 \n",
"1791 2.404300e-02 \n",
"742 2.404300e-02 \n",
"4724 2.404300e-02 \n",
"278 2.404300e-02 \n",
"3098 6.103910e-02 \n",
"2665 6.103910e-02 \n",
"4420 6.212584e-02 \n",
"1731 6.345276e-02 \n",
"2519 6.345276e-02 \n",
"4176 6.345276e-02 \n",
"4464 6.345276e-02 \n",
"968 7.303421e-02 \n",
"2329 7.372272e-02 \n",
"2465 1.306897e-01 \n",
"1250 1.306897e-01 \n",
"713 1.306897e-01 \n",
"1917 1.306897e-01 \n",
"2124 1.306897e-01 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# stijgers\n",
"results[results.pct_diff > 0].sort_values('g2', ascending=False)[:40]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "bccce269-9540-445e-a5f9-775d55b469b9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>word</th>\n",
" <th>freq_recent</th>\n",
" <th>freq_reference</th>\n",
" <th>pct_diff</th>\n",
" <th>log_ratio</th>\n",
" <th>chi2</th>\n",
" <th>p_chi2</th>\n",
" <th>g2</th>\n",
" <th>p_g2</th>\n",
" <th>p_g2_adjusted</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1743</th>\n",
" <td>Hondius</td>\n",
" <td>0.000431</td>\n",
" <td>0.010457</td>\n",
" <td>-95.880197</td>\n",
" <td>-4.601278</td>\n",
" <td>66.470517</td>\n",
" <td>3.551664e-16</td>\n",
" <td>98.275792</td>\n",
" <td>3.639886e-23</td>\n",
" <td>1.733314e-19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4215</th>\n",
" <td>Thymen Arensman</td>\n",
" <td>0.000072</td>\n",
" <td>0.002353</td>\n",
" <td>-96.948294</td>\n",
" <td>-5.034221</td>\n",
" <td>15.256919</td>\n",
" <td>9.383239e-05</td>\n",
" <td>20.686464</td>\n",
" <td>5.409706e-06</td>\n",
" <td>1.716105e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1508</th>\n",
" <td>Gidi Markuszower</td>\n",
" <td>0.000072</td>\n",
" <td>0.002091</td>\n",
" <td>-96.566831</td>\n",
" <td>-4.864296</td>\n",
" <td>13.434157</td>\n",
" <td>2.470840e-04</td>\n",
" <td>17.811640</td>\n",
" <td>2.438866e-05</td>\n",
" <td>6.112569e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4495</th>\n",
" <td>Vingegaard</td>\n",
" <td>0.000072</td>\n",
" <td>0.001882</td>\n",
" <td>-96.185367</td>\n",
" <td>-4.712293</td>\n",
" <td>11.977136</td>\n",
" <td>5.385731e-04</td>\n",
" <td>15.534557</td>\n",
" <td>8.101072e-05</td>\n",
" <td>1.837014e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2782</th>\n",
" <td>Markuszower</td>\n",
" <td>0.000072</td>\n",
" <td>0.001830</td>\n",
" <td>-96.076378</td>\n",
" <td>-4.671651</td>\n",
" <td>11.613072</td>\n",
" <td>6.548989e-04</td>\n",
" <td>14.969018</td>\n",
" <td>1.092909e-04</td>\n",
" <td>2.365652e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2109</th>\n",
" <td>Jonas Vingegaard</td>\n",
" <td>0.000431</td>\n",
" <td>0.002667</td>\n",
" <td>-83.843909</td>\n",
" <td>-2.629847</td>\n",
" <td>12.353905</td>\n",
" <td>4.400650e-04</td>\n",
" <td>14.707179</td>\n",
" <td>1.255674e-04</td>\n",
" <td>2.404300e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3750</th>\n",
" <td>Rubio</td>\n",
" <td>0.000287</td>\n",
" <td>0.002301</td>\n",
" <td>-87.515748</td>\n",
" <td>-3.001814</td>\n",
" <td>11.757125</td>\n",
" <td>6.061077e-04</td>\n",
" <td>14.212300</td>\n",
" <td>1.632996e-04</td>\n",
" <td>2.404300e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>Ali B</td>\n",
" <td>0.000072</td>\n",
" <td>0.001725</td>\n",
" <td>-95.838583</td>\n",
" <td>-4.586762</td>\n",
" <td>10.885206</td>\n",
" <td>9.693541e-04</td>\n",
" <td>13.843023</td>\n",
" <td>1.987332e-04</td>\n",
" <td>2.783434e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>512</th>\n",
" <td>Botic van de Zandschulp</td>\n",
" <td>0.000072</td>\n",
" <td>0.001621</td>\n",
" <td>-95.570104</td>\n",
" <td>-4.496564</td>\n",
" <td>10.157724</td>\n",
" <td>1.436979e-03</td>\n",
" <td>12.724502</td>\n",
" <td>3.608960e-04</td>\n",
" <td>4.910248e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>921</th>\n",
" <td>De Jong</td>\n",
" <td>0.000574</td>\n",
" <td>0.002562</td>\n",
" <td>-77.579302</td>\n",
" <td>-2.157095</td>\n",
" <td>9.947317</td>\n",
" <td>1.610840e-03</td>\n",
" <td>11.220837</td>\n",
" <td>8.088400e-04</td>\n",
" <td>8.559325e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4034</th>\n",
" <td>Starmer</td>\n",
" <td>0.000574</td>\n",
" <td>0.002510</td>\n",
" <td>-77.112204</td>\n",
" <td>-2.127348</td>\n",
" <td>9.611842</td>\n",
" <td>1.933266e-03</td>\n",
" <td>10.778088</td>\n",
" <td>1.027087e-03</td>\n",
" <td>1.060152e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1054</th>\n",
" <td>Donald Trump</td>\n",
" <td>0.010196</td>\n",
" <td>0.015529</td>\n",
" <td>-34.342080</td>\n",
" <td>-0.606959</td>\n",
" <td>10.440052</td>\n",
" <td>1.233118e-03</td>\n",
" <td>10.743700</td>\n",
" <td>1.046349e-03</td>\n",
" <td>1.060152e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4169</th>\n",
" <td>Tedros Adhanom Ghebreyesus</td>\n",
" <td>0.000072</td>\n",
" <td>0.001412</td>\n",
" <td>-94.913823</td>\n",
" <td>-4.297255</td>\n",
" <td>8.704154</td>\n",
" <td>3.174857e-03</td>\n",
" <td>10.514021</td>\n",
" <td>1.184722e-03</td>\n",
" <td>1.175343e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>269</th>\n",
" <td>Arensman</td>\n",
" <td>0.000072</td>\n",
" <td>0.001359</td>\n",
" <td>-94.718201</td>\n",
" <td>-4.242808</td>\n",
" <td>8.341110</td>\n",
" <td>3.875791e-03</td>\n",
" <td>9.967878</td>\n",
" <td>1.592950e-03</td>\n",
" <td>1.306897e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3185</th>\n",
" <td>Noam Bettan</td>\n",
" <td>0.000072</td>\n",
" <td>0.001307</td>\n",
" <td>-94.506929</td>\n",
" <td>-4.186224</td>\n",
" <td>7.978231</td>\n",
" <td>4.734318e-03</td>\n",
" <td>9.424725</td>\n",
" <td>2.140792e-03</td>\n",
" <td>1.396500e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3082</th>\n",
" <td>Máxima</td>\n",
" <td>0.002010</td>\n",
" <td>0.004497</td>\n",
" <td>-55.288957</td>\n",
" <td>-1.161296</td>\n",
" <td>8.263354</td>\n",
" <td>4.045333e-03</td>\n",
" <td>8.639908</td>\n",
" <td>3.288780e-03</td>\n",
" <td>2.088156e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1099</th>\n",
" <td>Dylan Groenewegen</td>\n",
" <td>0.000072</td>\n",
" <td>0.001203</td>\n",
" <td>-94.029271</td>\n",
" <td>-4.065930</td>\n",
" <td>7.253040</td>\n",
" <td>7.078107e-03</td>\n",
" <td>8.348388</td>\n",
" <td>3.860297e-03</td>\n",
" <td>2.418781e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4258</th>\n",
" <td>Tom Berendsen</td>\n",
" <td>0.000574</td>\n",
" <td>0.002196</td>\n",
" <td>-73.842519</td>\n",
" <td>-1.934703</td>\n",
" <td>7.626670</td>\n",
" <td>5.751139e-03</td>\n",
" <td>8.204369</td>\n",
" <td>4.178964e-03</td>\n",
" <td>2.456818e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4598</th>\n",
" <td>Willem-Alexander</td>\n",
" <td>0.001436</td>\n",
" <td>0.003555</td>\n",
" <td>-59.609772</td>\n",
" <td>-1.307921</td>\n",
" <td>7.692603</td>\n",
" <td>5.544761e-03</td>\n",
" <td>8.044226</td>\n",
" <td>4.564892e-03</td>\n",
" <td>2.587859e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3222</th>\n",
" <td>Oceanwide Expeditions</td>\n",
" <td>0.000072</td>\n",
" <td>0.001150</td>\n",
" <td>-93.757874</td>\n",
" <td>-4.001800</td>\n",
" <td>6.890771</td>\n",
" <td>8.664187e-03</td>\n",
" <td>7.815786</td>\n",
" <td>5.179183e-03</td>\n",
" <td>2.603795e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1411</th>\n",
" <td>Freek</td>\n",
" <td>0.000072</td>\n",
" <td>0.001150</td>\n",
" <td>-93.757874</td>\n",
" <td>-4.001800</td>\n",
" <td>6.890771</td>\n",
" <td>8.664187e-03</td>\n",
" <td>7.815786</td>\n",
" <td>5.179183e-03</td>\n",
" <td>2.603795e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2226</th>\n",
" <td>Kaja Kallas</td>\n",
" <td>0.000072</td>\n",
" <td>0.001098</td>\n",
" <td>-93.460630</td>\n",
" <td>-3.934686</td>\n",
" <td>6.528755</td>\n",
" <td>1.061442e-02</td>\n",
" <td>7.287350</td>\n",
" <td>6.944183e-03</td>\n",
" <td>2.603795e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4454</th>\n",
" <td>Verhoeven</td>\n",
" <td>0.000072</td>\n",
" <td>0.001098</td>\n",
" <td>-93.460630</td>\n",
" <td>-3.934686</td>\n",
" <td>6.528755</td>\n",
" <td>1.061442e-02</td>\n",
" <td>7.287350</td>\n",
" <td>6.944183e-03</td>\n",
" <td>2.603795e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1401</th>\n",
" <td>Fred Rutten</td>\n",
" <td>0.000072</td>\n",
" <td>0.001098</td>\n",
" <td>-93.460630</td>\n",
" <td>-3.934686</td>\n",
" <td>6.528755</td>\n",
" <td>1.061442e-02</td>\n",
" <td>7.287350</td>\n",
" <td>6.944183e-03</td>\n",
" <td>2.603795e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1981</th>\n",
" <td>Jerdy Schouten</td>\n",
" <td>0.000072</td>\n",
" <td>0.001046</td>\n",
" <td>-93.133661</td>\n",
" <td>-3.864296</td>\n",
" <td>6.167024</td>\n",
" <td>1.301534e-02</td>\n",
" <td>6.763485</td>\n",
" <td>9.304187e-03</td>\n",
" <td>3.370342e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2977</th>\n",
" <td>Mikel Arteta</td>\n",
" <td>0.000072</td>\n",
" <td>0.001046</td>\n",
" <td>-93.133661</td>\n",
" <td>-3.864296</td>\n",
" <td>6.167024</td>\n",
" <td>1.301534e-02</td>\n",
" <td>6.763485</td>\n",
" <td>9.304187e-03</td>\n",
" <td>3.370342e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3013</th>\n",
" <td>Modi</td>\n",
" <td>0.000072</td>\n",
" <td>0.001046</td>\n",
" <td>-93.133661</td>\n",
" <td>-3.864296</td>\n",
" <td>6.167024</td>\n",
" <td>1.301534e-02</td>\n",
" <td>6.763485</td>\n",
" <td>9.304187e-03</td>\n",
" <td>3.370342e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3237</th>\n",
" <td>Oleksandr Usyk</td>\n",
" <td>0.000072</td>\n",
" <td>0.001046</td>\n",
" <td>-93.133661</td>\n",
" <td>-3.864296</td>\n",
" <td>6.167024</td>\n",
" <td>1.301534e-02</td>\n",
" <td>6.763485</td>\n",
" <td>9.304187e-03</td>\n",
" <td>3.370342e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>417</th>\n",
" <td>Berendsen</td>\n",
" <td>0.000718</td>\n",
" <td>0.002248</td>\n",
" <td>-68.063541</td>\n",
" <td>-1.646722</td>\n",
" <td>6.509375</td>\n",
" <td>1.073072e-02</td>\n",
" <td>6.756171</td>\n",
" <td>9.342400e-03</td>\n",
" <td>3.370342e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016</th>\n",
" <td>Jetten</td>\n",
" <td>0.002441</td>\n",
" <td>0.004601</td>\n",
" <td>-46.941928</td>\n",
" <td>-0.914356</td>\n",
" <td>5.941062</td>\n",
" <td>1.479210e-02</td>\n",
" <td>5.969003</td>\n",
" <td>1.455951e-02</td>\n",
" <td>4.952314e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1310</th>\n",
" <td>Felix Gall</td>\n",
" <td>0.000072</td>\n",
" <td>0.000941</td>\n",
" <td>-92.370735</td>\n",
" <td>-3.712293</td>\n",
" <td>5.444587</td>\n",
" <td>1.962906e-02</td>\n",
" <td>5.731410</td>\n",
" <td>1.666410e-02</td>\n",
" <td>5.501480e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1167</th>\n",
" <td>Ellen ten Damme</td>\n",
" <td>0.000072</td>\n",
" <td>0.000941</td>\n",
" <td>-92.370735</td>\n",
" <td>-3.712293</td>\n",
" <td>5.444587</td>\n",
" <td>1.962906e-02</td>\n",
" <td>5.731410</td>\n",
" <td>1.666410e-02</td>\n",
" <td>5.501480e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4101</th>\n",
" <td>Suzan &amp; Freek</td>\n",
" <td>0.000072</td>\n",
" <td>0.000941</td>\n",
" <td>-92.370735</td>\n",
" <td>-3.712293</td>\n",
" <td>5.444587</td>\n",
" <td>1.962906e-02</td>\n",
" <td>5.731410</td>\n",
" <td>1.666410e-02</td>\n",
" <td>5.501480e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4141</th>\n",
" <td>Tallon Griekspoor</td>\n",
" <td>0.000431</td>\n",
" <td>0.001621</td>\n",
" <td>-73.420624</td>\n",
" <td>-1.911618</td>\n",
" <td>5.554715</td>\n",
" <td>1.843098e-02</td>\n",
" <td>5.650035</td>\n",
" <td>1.745502e-02</td>\n",
" <td>5.501480e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4655</th>\n",
" <td>Xi Jinping</td>\n",
" <td>0.001149</td>\n",
" <td>0.002719</td>\n",
" <td>-57.745608</td>\n",
" <td>-1.242826</td>\n",
" <td>5.484134</td>\n",
" <td>1.918982e-02</td>\n",
" <td>5.488524</td>\n",
" <td>1.914170e-02</td>\n",
" <td>5.501480e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>133</th>\n",
" <td>Amalia</td>\n",
" <td>0.000431</td>\n",
" <td>0.001569</td>\n",
" <td>-72.534645</td>\n",
" <td>-1.864313</td>\n",
" <td>5.230992</td>\n",
" <td>2.218788e-02</td>\n",
" <td>5.249197</td>\n",
" <td>2.195690e-02</td>\n",
" <td>5.501480e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3767</th>\n",
" <td>Rutten</td>\n",
" <td>0.000072</td>\n",
" <td>0.000889</td>\n",
" <td>-91.921954</td>\n",
" <td>-3.629831</td>\n",
" <td>5.083988</td>\n",
" <td>2.414777e-02</td>\n",
" <td>5.224377</td>\n",
" <td>2.227242e-02</td>\n",
" <td>5.501480e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2970</th>\n",
" <td>Mika Godts</td>\n",
" <td>0.000072</td>\n",
" <td>0.000889</td>\n",
" <td>-91.921954</td>\n",
" <td>-3.629831</td>\n",
" <td>5.083988</td>\n",
" <td>2.414777e-02</td>\n",
" <td>5.224377</td>\n",
" <td>2.227242e-02</td>\n",
" <td>5.501480e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>537</th>\n",
" <td>Brekelmans</td>\n",
" <td>0.000072</td>\n",
" <td>0.000889</td>\n",
" <td>-91.921954</td>\n",
" <td>-3.629831</td>\n",
" <td>5.083988</td>\n",
" <td>2.414777e-02</td>\n",
" <td>5.224377</td>\n",
" <td>2.227242e-02</td>\n",
" <td>5.501480e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2712</th>\n",
" <td>Marco Rubio</td>\n",
" <td>0.000862</td>\n",
" <td>0.002248</td>\n",
" <td>-61.676249</td>\n",
" <td>-1.383688</td>\n",
" <td>5.236100</td>\n",
" <td>2.212281e-02</td>\n",
" <td>5.213944</td>\n",
" <td>2.240645e-02</td>\n",
" <td>5.501480e-01</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" word freq_recent freq_reference pct_diff \\\n",
"1743 Hondius 0.000431 0.010457 -95.880197 \n",
"4215 Thymen Arensman 0.000072 0.002353 -96.948294 \n",
"1508 Gidi Markuszower 0.000072 0.002091 -96.566831 \n",
"4495 Vingegaard 0.000072 0.001882 -96.185367 \n",
"2782 Markuszower 0.000072 0.001830 -96.076378 \n",
"2109 Jonas Vingegaard 0.000431 0.002667 -83.843909 \n",
"3750 Rubio 0.000287 0.002301 -87.515748 \n",
"115 Ali B 0.000072 0.001725 -95.838583 \n",
"512 Botic van de Zandschulp 0.000072 0.001621 -95.570104 \n",
"921 De Jong 0.000574 0.002562 -77.579302 \n",
"4034 Starmer 0.000574 0.002510 -77.112204 \n",
"1054 Donald Trump 0.010196 0.015529 -34.342080 \n",
"4169 Tedros Adhanom Ghebreyesus 0.000072 0.001412 -94.913823 \n",
"269 Arensman 0.000072 0.001359 -94.718201 \n",
"3185 Noam Bettan 0.000072 0.001307 -94.506929 \n",
"3082 Máxima 0.002010 0.004497 -55.288957 \n",
"1099 Dylan Groenewegen 0.000072 0.001203 -94.029271 \n",
"4258 Tom Berendsen 0.000574 0.002196 -73.842519 \n",
"4598 Willem-Alexander 0.001436 0.003555 -59.609772 \n",
"3222 Oceanwide Expeditions 0.000072 0.001150 -93.757874 \n",
"1411 Freek 0.000072 0.001150 -93.757874 \n",
"2226 Kaja Kallas 0.000072 0.001098 -93.460630 \n",
"4454 Verhoeven 0.000072 0.001098 -93.460630 \n",
"1401 Fred Rutten 0.000072 0.001098 -93.460630 \n",
"1981 Jerdy Schouten 0.000072 0.001046 -93.133661 \n",
"2977 Mikel Arteta 0.000072 0.001046 -93.133661 \n",
"3013 Modi 0.000072 0.001046 -93.133661 \n",
"3237 Oleksandr Usyk 0.000072 0.001046 -93.133661 \n",
"417 Berendsen 0.000718 0.002248 -68.063541 \n",
"2016 Jetten 0.002441 0.004601 -46.941928 \n",
"1310 Felix Gall 0.000072 0.000941 -92.370735 \n",
"1167 Ellen ten Damme 0.000072 0.000941 -92.370735 \n",
"4101 Suzan & Freek 0.000072 0.000941 -92.370735 \n",
"4141 Tallon Griekspoor 0.000431 0.001621 -73.420624 \n",
"4655 Xi Jinping 0.001149 0.002719 -57.745608 \n",
"133 Amalia 0.000431 0.001569 -72.534645 \n",
"3767 Rutten 0.000072 0.000889 -91.921954 \n",
"2970 Mika Godts 0.000072 0.000889 -91.921954 \n",
"537 Brekelmans 0.000072 0.000889 -91.921954 \n",
"2712 Marco Rubio 0.000862 0.002248 -61.676249 \n",
"\n",
" log_ratio chi2 p_chi2 g2 p_g2 \\\n",
"1743 -4.601278 66.470517 3.551664e-16 98.275792 3.639886e-23 \n",
"4215 -5.034221 15.256919 9.383239e-05 20.686464 5.409706e-06 \n",
"1508 -4.864296 13.434157 2.470840e-04 17.811640 2.438866e-05 \n",
"4495 -4.712293 11.977136 5.385731e-04 15.534557 8.101072e-05 \n",
"2782 -4.671651 11.613072 6.548989e-04 14.969018 1.092909e-04 \n",
"2109 -2.629847 12.353905 4.400650e-04 14.707179 1.255674e-04 \n",
"3750 -3.001814 11.757125 6.061077e-04 14.212300 1.632996e-04 \n",
"115 -4.586762 10.885206 9.693541e-04 13.843023 1.987332e-04 \n",
"512 -4.496564 10.157724 1.436979e-03 12.724502 3.608960e-04 \n",
"921 -2.157095 9.947317 1.610840e-03 11.220837 8.088400e-04 \n",
"4034 -2.127348 9.611842 1.933266e-03 10.778088 1.027087e-03 \n",
"1054 -0.606959 10.440052 1.233118e-03 10.743700 1.046349e-03 \n",
"4169 -4.297255 8.704154 3.174857e-03 10.514021 1.184722e-03 \n",
"269 -4.242808 8.341110 3.875791e-03 9.967878 1.592950e-03 \n",
"3185 -4.186224 7.978231 4.734318e-03 9.424725 2.140792e-03 \n",
"3082 -1.161296 8.263354 4.045333e-03 8.639908 3.288780e-03 \n",
"1099 -4.065930 7.253040 7.078107e-03 8.348388 3.860297e-03 \n",
"4258 -1.934703 7.626670 5.751139e-03 8.204369 4.178964e-03 \n",
"4598 -1.307921 7.692603 5.544761e-03 8.044226 4.564892e-03 \n",
"3222 -4.001800 6.890771 8.664187e-03 7.815786 5.179183e-03 \n",
"1411 -4.001800 6.890771 8.664187e-03 7.815786 5.179183e-03 \n",
"2226 -3.934686 6.528755 1.061442e-02 7.287350 6.944183e-03 \n",
"4454 -3.934686 6.528755 1.061442e-02 7.287350 6.944183e-03 \n",
"1401 -3.934686 6.528755 1.061442e-02 7.287350 6.944183e-03 \n",
"1981 -3.864296 6.167024 1.301534e-02 6.763485 9.304187e-03 \n",
"2977 -3.864296 6.167024 1.301534e-02 6.763485 9.304187e-03 \n",
"3013 -3.864296 6.167024 1.301534e-02 6.763485 9.304187e-03 \n",
"3237 -3.864296 6.167024 1.301534e-02 6.763485 9.304187e-03 \n",
"417 -1.646722 6.509375 1.073072e-02 6.756171 9.342400e-03 \n",
"2016 -0.914356 5.941062 1.479210e-02 5.969003 1.455951e-02 \n",
"1310 -3.712293 5.444587 1.962906e-02 5.731410 1.666410e-02 \n",
"1167 -3.712293 5.444587 1.962906e-02 5.731410 1.666410e-02 \n",
"4101 -3.712293 5.444587 1.962906e-02 5.731410 1.666410e-02 \n",
"4141 -1.911618 5.554715 1.843098e-02 5.650035 1.745502e-02 \n",
"4655 -1.242826 5.484134 1.918982e-02 5.488524 1.914170e-02 \n",
"133 -1.864313 5.230992 2.218788e-02 5.249197 2.195690e-02 \n",
"3767 -3.629831 5.083988 2.414777e-02 5.224377 2.227242e-02 \n",
"2970 -3.629831 5.083988 2.414777e-02 5.224377 2.227242e-02 \n",
"537 -3.629831 5.083988 2.414777e-02 5.224377 2.227242e-02 \n",
"2712 -1.383688 5.236100 2.212281e-02 5.213944 2.240645e-02 \n",
"\n",
" p_g2_adjusted \n",
"1743 1.733314e-19 \n",
"4215 1.716105e-03 \n",
"1508 6.112569e-03 \n",
"4495 1.837014e-02 \n",
"2782 2.365652e-02 \n",
"2109 2.404300e-02 \n",
"3750 2.404300e-02 \n",
"115 2.783434e-02 \n",
"512 4.910248e-02 \n",
"921 8.559325e-02 \n",
"4034 1.060152e-01 \n",
"1054 1.060152e-01 \n",
"4169 1.175343e-01 \n",
"269 1.306897e-01 \n",
"3185 1.396500e-01 \n",
"3082 2.088156e-01 \n",
"1099 2.418781e-01 \n",
"4258 2.456818e-01 \n",
"4598 2.587859e-01 \n",
"3222 2.603795e-01 \n",
"1411 2.603795e-01 \n",
"2226 2.603795e-01 \n",
"4454 2.603795e-01 \n",
"1401 2.603795e-01 \n",
"1981 3.370342e-01 \n",
"2977 3.370342e-01 \n",
"3013 3.370342e-01 \n",
"3237 3.370342e-01 \n",
"417 3.370342e-01 \n",
"2016 4.952314e-01 \n",
"1310 5.501480e-01 \n",
"1167 5.501480e-01 \n",
"4101 5.501480e-01 \n",
"4141 5.501480e-01 \n",
"4655 5.501480e-01 \n",
"133 5.501480e-01 \n",
"3767 5.501480e-01 \n",
"2970 5.501480e-01 \n",
"537 5.501480e-01 \n",
"2712 5.501480e-01 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# dalers\n",
"results[results.pct_diff < 0].sort_values('g2', ascending=False)[:40]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}