diff --git a/.gitignore b/.gitignore index 3c4efe2..b1d3894 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ ## Ignore Visual Studio temporary files, build results, and ## files generated by popular Visual Studio add-ons. +## important ## +.acoustic_model/forced_alignment_novo.py + # User-specific files *.suo *.user diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index b0dbc23..c60d6d4 100644 Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc index 1057f0f..b05a221 100644 Binary files a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc and b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc differ diff --git a/acoustic_model/check_novoapi.py b/acoustic_model/check_novoapi.py index 40defe6..35da212 100644 --- a/acoustic_model/check_novoapi.py +++ b/acoustic_model/check_novoapi.py @@ -25,14 +25,32 @@ mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_ stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) -phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) -df = pd.read_excel(phonelist_novo70_, 'list') +## novo phoneset translation_key = dict() -for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']): - if not pd.isnull(ipa): - print('{0}:{1}'.format(ipa, novo70)) - translation_key[ipa] = novo70 -#df = pd.read_excel(stimmen_transcription, 'check') +#phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) +#df = pd.read_excel(phonelist_novo70_, 'list') +## *_simple includes columns which has only one phone in. +#for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']): +# if not pd.isnull(ipa): +# print('{0}:{1}'.format(ipa, novo70)) +# translation_key[ipa] = novo70 +#phonelist_novo70 = np.unique(list(df['novo70_simple'])) + +phoneset_ipa = [] +phoneset_novo70 = [] +with open(default.cmu69_phoneset, "rt", encoding="utf-8") as fin: + lines = fin.read() + lines = lines.split('\n') + for line in lines: + words = line.split('\t') + if len(words) > 1: + novo70 = words[0] + ipa = words[1] + phoneset_ipa.append(ipa) + phoneset_novo70.append(novo70) + translation_key[ipa] = novo70 +phoneset_ipa = np.unique(phoneset_ipa) +phoneset_novo70 = np.unique(phoneset_novo70) diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py index 9f4d4fa..4f98e6c 100644 --- a/acoustic_model/defaultfiles.py +++ b/acoustic_model/defaultfiles.py @@ -3,7 +3,7 @@ import os #default_hvite_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'htk', 'config.HVite') cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' -kaldi_dir = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5' + #config_hcopy = os.path.join(cygwin_dir, 'config', 'config.HCopy') #config_train = os.path.join(cygwin_dir, 'config', 'config.train') config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite') @@ -30,11 +30,15 @@ repo_dir = r'C:\Users\Aki\source\repos' ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') -fame_dir = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus' +WSL_dir = r'C:\OneDrive\WSL' +fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') +fame_s5_dir = os.path.join(fame_dir, 's5') +fame_corpus_dir = os.path.join(fame_dir, 'corpus') experiments_dir = r'c:\OneDrive\Research\rug\experiments' stimmen_transcription_xlsx = os.path.join(experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx') stimmen_data_dir = os.path.join(experiments_dir, 'stimmen', 'data') phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') -phonelist_novo70_xlsx = os.path.join(experiments_dir, 'Nederlandse phonesets_aki.xlsx') +novo_api_dir = os.path.join(WSL_dir, 'python-novo-api') +cmu69_phoneset = os.path.join(novo_api_dir, 'novoapi', 'asr', 'phoneset', 'en', 'cmu69.phoneset') \ No newline at end of file