stimmenfryslan/notebooks/To Per Word.ipynb

2.6 KiB

In [6]:
from glob import glob

import os
In [16]:
waves = [
    wave
    for location in os.listdir('data')
    for date in os.listdir(os.path.join('data', location))
    for wave in os.listdir(os.path.join('data', location, date))
]
assert len(waves) == len(set(waves)), "Not all filenames are unique :("
In [25]:
for location in os.listdir('data'):
    for date in os.listdir(os.path.join('data', location)):
        for wave in os.listdir(os.path.join('data', location, date)):
            source = os.path.join('data', location, date, wave)
            destination = os.path.join('per_word', wave.split('_')[1])
            if not os.path.isdir(destination):
                os.mkdir(destination)
            os.rename(source, os.path.join(destination, wave))
In [26]:
import pandas

data = pandas.read_csv('/home/herbert/picture-game-result-export.csv', delimiter=';')
data['Filename'] = [x.split('/')[-1] for x in data['Opname']]
In [52]:
for word in os.listdir('per_word'):
    for wave in os.listdir(os.path.join('per_word', word)):
        source = os.path.join('per_word', word, wave)
        if wave not in relevant:
            destination = os.path.join('per_word', word, 'irrelevant_accent_' + wave)
            os.rename(source, destination)
In [63]:
data.to_excel('/home/herbert/picture-game-result-export-filename.xlsx')