#!/usr/bin/env python3 import html, sys titles = { 'nieuw-namen': 'nieuwe namen', 'nieuw-woorden': 'nieuwe woorden', 'locaties':'locaties', 'personen':'personen', 'organisaties':'organisaties', 'overige-namen':'andere namen', 'nieuw-adjww':'nieuwe adjectieven, deelwoorden en werkwoorden', 'cat.txt': 'categoriƫn', 'tag.txt': 'tags' } title = sys.argv[1] for key, value in titles.items(): if sys.argv[1].find(key) >= 0: title = value break sys.stdout.buffer.write('''

{}

'''.format(html.escape(title)).encode('utf-8')) with open(sys.argv[1], 'rt', encoding='utf-8') as fp: lineno = 0 mx = 0 for line in fp: line = line.strip() aa = line.split('\t') if len(aa) == 1: bb = line.split() aa[0] = bb[0] aa.append(' '.join(bb[1:])) for i in range(1, len(aa)): aa[i] = html.escape(aa[i]) v = int(aa[0]) if lineno == 0: mx = v p = 100 / mx * v sys.stdout.buffer.write('\n'.format(p, '\n') sys.stdout.buffer.write(b'
{}
'.join(aa[1:])).encode('utf-8')) lineno += 1 if lineno == 20: break while lineno < 20: lineno += 1 sys.stdout.buffer.write(b'
 
\n
\n\n')