Created
November 17, 2017 13:14
-
-
Save jcb91/956abc5611d61098d80d8333caa25486 to your computer and use it in GitHub Desktop.
investigating hunspell eu dictionaries
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "heading_collapsed": true | |
| }, | |
| "source": [ | |
| "## prep" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T11:52:54.498794Z", | |
| "start_time": "2017-11-16T11:52:54.311405Z" | |
| }, | |
| "collapsed": true, | |
| "hidden": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import os\n", | |
| "\n", | |
| "try: # py3\n", | |
| " from urllib.request import urlopen\n", | |
| "except ImportError: # py2\n", | |
| " from urllib2 import urlopen\n", | |
| " \n", | |
| "def get_dict_file(url, force_fetch=False):\n", | |
| " localpath = os.path.basename(url)\n", | |
| " if not force_fetch and os.path.exists(localpath):\n", | |
| " print('loading', os.path.realpath(localpath))\n", | |
| " with open(localpath, 'r') as f:\n", | |
| " return f.readlines()\n", | |
| " print('fetching', url)\n", | |
| " lines = []\n", | |
| " with urlopen(url) as req:\n", | |
| " for line in req:\n", | |
| " lines.append(line.decode())\n", | |
| " if (len(lines) % 100) == 0:\n", | |
| " print('.', end=('' if len(lines) % 8000 else '\\n'))\n", | |
| " print('\\n\\n' + ('-'*80) + '\\n')\n", | |
| " print(sum([len(line) for line in lines]), 'chars')\n", | |
| " # save a local copy\n", | |
| " with open(localpath, 'w') as f:\n", | |
| " f.writelines(lines)\n", | |
| " return lines" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "heading_collapsed": true | |
| }, | |
| "source": [ | |
| "## `.aff` file" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T11:53:03.701529Z", | |
| "start_time": "2017-11-16T11:52:57.200996Z" | |
| }, | |
| "hidden": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "fetching http://xuxen.eus/static/hunspell/eu_ES.aff\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| ".................................................................\n", | |
| "\n", | |
| "--------------------------------------------------------------------------------\n", | |
| "\n", | |
| "3302091 chars\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "aff_lines = get_dict_file('http://xuxen.eus/static/hunspell/eu_ES.aff')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T12:24:08.953332Z", | |
| "start_time": "2017-11-16T12:24:07.460376Z" | |
| }, | |
| "hidden": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "353 rules loaded\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "rules = {}\n", | |
| "current_rule, seen = '', {}\n", | |
| "for ii, line in enumerate(aff_lines):\n", | |
| " lineparts = tuple(line.strip().split())\n", | |
| " if not lineparts or lineparts[0].startswith('#'):\n", | |
| " continue\n", | |
| " if lineparts[0] not in ('SFX', 'PFX'):\n", | |
| " continue\n", | |
| "\n", | |
| " rtype, rcode = lineparts[:2]\n", | |
| " if len(lineparts) < 5:\n", | |
| " # new rule!\n", | |
| " current_rule = rtype, rcode\n", | |
| " continue\n", | |
| " else:\n", | |
| " rule = rules.setdefault(rcode, {})\n", | |
| " if lineparts in rule:\n", | |
| " print('duplicate line:', ii, 'already seen on line', rule)\n", | |
| " rule.setdefault(lineparts, ii)\n", | |
| " if (rtype, rcode) != current_rule:\n", | |
| " raise ValueError('err: line %d' % ii)\n", | |
| "print(len(rules), 'rules loaded')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T11:53:19.603521Z", | |
| "start_time": "2017-11-16T11:53:17.948323Z" | |
| }, | |
| "collapsed": true, | |
| "hidden": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "rules = {}\n", | |
| "ii = 0\n", | |
| "while ii < len(aff_lines):\n", | |
| " line = aff_lines[ii].strip()\n", | |
| " if not line or line.startswith('#') or not line.startswith('SFX'):\n", | |
| " ii += 1\n", | |
| " continue\n", | |
| " \n", | |
| " lineparts = line.split()\n", | |
| " rule_type = lineparts[0]\n", | |
| " rule_code = int(lineparts[1])\n", | |
| " combineable = lineparts[2] == 'Y'\n", | |
| " nents = int(lineparts[3])\n", | |
| " \n", | |
| " seen = {}\n", | |
| " for jj in range(ii+1,ii+1+nents):\n", | |
| " line = aff_lines[jj].strip()\n", | |
| " lineparts = tuple(line.split())\n", | |
| " seen.setdefault(lineparts, []).append(jj)\n", | |
| " if lineparts[0] != rule_type:\n", | |
| " raise ValueError('err: line %d' % jj)\n", | |
| " rules[rule_code] = seen\n", | |
| " for idxs in [ll for ll in seen.values() if len(ll)>1]:\n", | |
| " print('duplicate lines: ' + ','.join(map(str, idxs)))\n", | |
| " ii = jj + 1" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 48, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T12:59:14.728897Z", | |
| "start_time": "2017-11-16T12:59:14.241940Z" | |
| }, | |
| "hidden": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "exact-duplicate rules:\n", | |
| "[1] : 18\n", | |
| "[11] : 21, 22\n", | |
| "[23] : 27, 29\n", | |
| "[34] : 35\n", | |
| "[51] : 354, 355\n", | |
| "[53] : 54, 55\n", | |
| "[63] : 67, 69\n", | |
| "[65] : 66, 68\n", | |
| "[76] : 77, 78\n", | |
| "[85] : 86, 87, 88, 89, 91, 92\n", | |
| "[97] : 357\n", | |
| "[104] : 106, 107\n", | |
| "[105] : 108, 109\n", | |
| "[110] : 111\n", | |
| "[117] : 120\n", | |
| "[122] : 128, 129\n", | |
| "[126] : 130, 132\n", | |
| "[150] : 152, 153, 311\n", | |
| "[157] : 159, 163, 164, 165, 166\n", | |
| "[171] : 173\n", | |
| "[172] : 175\n", | |
| "[174] : 176, 177, 181\n", | |
| "[183] : 184\n", | |
| "[193] : 194, 197, 198, 199, 200, 201, 202, 203, 204, 205\n", | |
| "[207] : 208\n", | |
| "[220] : 225\n", | |
| "[221] : 226\n", | |
| "[229] : 230\n", | |
| "[234] : 321\n", | |
| "[237] : 322\n", | |
| "[238] : 244, 323, 328\n", | |
| "[239] : 324\n", | |
| "[240] : 242, 243, 319, 325, 326, 327\n", | |
| "[247] : 329\n", | |
| "[250] : 253, 255, 260, 261, 262, 263\n", | |
| "[254] : 257, 258\n", | |
| "[272] : 274, 277\n", | |
| "[273] : 275, 276\n", | |
| "[282] : 283, 284\n", | |
| "[285] : 286, 287\n", | |
| "[293] : 297, 298, 300\n", | |
| "[331] : 332, 334, 335\n", | |
| "[340] : 342, 343, 344\n", | |
| "[353] : 356\n", | |
| "[1000] : 1001\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "rdict = {}\n", | |
| "for rcode, rule in sorted(rules.items()):\n", | |
| " rkey = tuple(sorted(' '.join(kk[2:]) for kk in list(rule.keys())))\n", | |
| " rdict.setdefault(rkey, []).append(rcode)\n", | |
| "\n", | |
| "print('exact-duplicate rules:')\n", | |
| "for rcodes in sorted(sorted(map(int, v)) for v in rdict.values()):\n", | |
| " if len(rcodes) > 1:\n", | |
| " print(rcodes[:1], ':',\n", | |
| " ', '.join(map(str, sorted(rcodes[1:]))))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 56, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T13:02:30.150607Z", | |
| "start_time": "2017-11-16T13:02:30.120704Z" | |
| }, | |
| "hidden": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "353 rules in aff\n", | |
| "252 distinct rules in aff\n", | |
| "=> 28.61% duplicate rules in aff\n", | |
| "=> 23.11% duplicate lines in aff\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print('{} rules in aff'.format(len(rules)))\n", | |
| "print('{} distinct rules in aff'.format(len(rdict)))\n", | |
| "print('=> {:.2f}% duplicate rules in aff'.format(\n", | |
| " 100 * (1 - len(rdict) / len(rules))))\n", | |
| "print('=> {:.2f}% duplicate lines in aff'.format(\n", | |
| " 100 * (1 - sum(map(len, rdict.keys())) / sum(map(len, rules.values())))))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "heading_collapsed": true | |
| }, | |
| "source": [ | |
| "## `.dic` file" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T11:54:13.675880Z", | |
| "start_time": "2017-11-16T11:54:09.091309Z" | |
| }, | |
| "hidden": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "fetching http://xuxen.eus/static/hunspell/eu_ES.dic\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "................................................................................\n", | |
| "......\n", | |
| "\n", | |
| "--------------------------------------------------------------------------------\n", | |
| "\n", | |
| "2144523 chars\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "dic_lines = get_dict_file('http://xuxen.eus/static/hunspell/eu_ES.dic')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 117, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T13:26:11.591863Z", | |
| "start_time": "2017-11-16T13:26:09.471543Z" | |
| }, | |
| "collapsed": true, | |
| "hidden": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "dic = {}\n", | |
| "for ii, line in enumerate(dic_lines):\n", | |
| " line = line.strip()\n", | |
| " # skip first line which just lists the approx number of entries\n", | |
| " if ii < 1 or not line or line.startswith('#'):\n", | |
| " continue\n", | |
| " lineparts = tuple(line.split('/'))\n", | |
| " if len(lineparts) > 1:\n", | |
| " rulecodes = tuple(sorted(lineparts[1].split(',')))\n", | |
| " else:\n", | |
| " rulecodes = tuple()\n", | |
| " dic.setdefault(lineparts[:1] + rulecodes, []).append(ii)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 118, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T13:26:15.710637Z", | |
| "start_time": "2017-11-16T13:26:15.603245Z" | |
| }, | |
| "hidden": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "144690 non-blank lines in dic\n", | |
| "133225 distinct non-blank lines in dic\n", | |
| "=> 7.92% exact-duplicate lines in dic\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print('%d non-blank lines in dic' % len(dic_lines))\n", | |
| "print('%d distinct non-blank lines in dic' % len(set(dic_lines)))\n", | |
| "print('=> %.2f%% exact-duplicate lines in dic' % (\n", | |
| " 100 * (1 - float(len(set(dic_lines)))/len(dic_lines))))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "heading_collapsed": true | |
| }, | |
| "source": [ | |
| "## mismatched codes" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 258, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T20:08:54.073304Z", | |
| "start_time": "2017-11-16T20:08:53.764438Z" | |
| }, | |
| "hidden": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "codes in aff unused by dic:\n", | |
| " 197, 312\n", | |
| "codes in dic missing from aff:\n", | |
| " 314, 315, 316, 317, 359, 52, 9999\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "aff_rcodes = set(rules.keys())\n", | |
| "dic_rcodes = {rcode for dkey in dic.keys() for rcode in dkey[1:]}\n", | |
| "\n", | |
| "print('codes in aff unused by dic:')\n", | |
| "print(' ', ', '.join(map(str, sorted(aff_rcodes.difference(dic_rcodes)))))\n", | |
| "print('codes in dic missing from aff:')\n", | |
| "print(' ', ', '.join(map(str, sorted(dic_rcodes.difference(aff_rcodes)))))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 291, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T20:30:46.772251Z", | |
| "start_time": "2017-11-16T20:30:46.160376Z" | |
| }, | |
| "hidden": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1 word:\n", | |
| "\t1, 3, 7, 11, 12, 13, 18, 22, 23, 32, 39, 42, 45, 50, 53, 55, 56, 57, 58, 59, 73, 75, 77, 78, 83, 86, 92, 93, 94, 98, 99, 110, 111, 112, 113, 114, 117, 122, 136, 138, 139, 141, 142, 143, 144, 148, 149, 150, 151, 152, 155, 156, 157, 165, 169, 171, 172, 174, 176, 179, 184, 186, 188, 193, 200, 204, 205, 207, 208, 210, 211, 213, 220, 221, 222, 227, 232, 236, 250, 251, 254, 302, 305, 306, 308, 310, 313, 315, 329, 330, 333, 334, 335, 342, 343, 344, 345, 347, 349, 350, 351, 352, 354, 355, 356, 357, 358\n", | |
| "\n", | |
| "2 words:\n", | |
| "\t2, 15, 16, 17, 21, 40, 47, 52, 76, 80, 88, 91, 96, 115, 140, 145, 147, 153, 154, 173, 175, 178, 180, 187, 189, 190, 192, 194, 215, 216, 218, 219, 224, 225, 226, 231, 253, 259, 281, 326, 331, 340, 348\n", | |
| "\n", | |
| "3 words:\n", | |
| "\t19, 20, 33, 44, 62, 66, 67, 84, 85, 109, 119, 120, 121, 125, 170, 177, 182, 203, 206, 242, 249, 264, 272, 304, 309, 311, 322, 338, 346\n", | |
| "\n", | |
| "4 words:\n", | |
| "\t10, 34, 51, 54, 79, 97, 183, 217, 223, 283, 317, 332\n", | |
| "\n", | |
| "5 words:\n", | |
| "\t71, 95, 160, 195, 202, 228, 307\n", | |
| "\n", | |
| "6 words:\n", | |
| "\t214, 248, 339, 341\n", | |
| "\n", | |
| "7 words:\n", | |
| "\t9, 146, 257, 265, 321, 353\n", | |
| "\n", | |
| "8 words:\n", | |
| "\t81, 135, 185, 199, 235, 286, 303, 323\n", | |
| "\n", | |
| "9 words:\n", | |
| "\t8, 41, 90, 252, 316, 359\n", | |
| "\n", | |
| "10 words:\n", | |
| "\t31, 38, 103\n", | |
| "\n", | |
| "11 words:\n", | |
| "\t87, 137, 198\n", | |
| "\n", | |
| "12 words:\n", | |
| "\t270, 320\n", | |
| "\n", | |
| "13 words:\n", | |
| "\t181, 209, 229\n", | |
| "\n", | |
| "14 words:\n", | |
| "\t127\n", | |
| "\n", | |
| "16 words:\n", | |
| "\t201, 319\n", | |
| "\n", | |
| "17 words:\n", | |
| "\t241\n", | |
| "\n", | |
| "18 words:\n", | |
| "\t134, 269, 318\n", | |
| "\n", | |
| "19 words:\n", | |
| "\t129, 191\n", | |
| "\n", | |
| "20 words:\n", | |
| "\t82\n", | |
| "\n", | |
| "21 words:\n", | |
| "\t324\n", | |
| "\n", | |
| "23 words:\n", | |
| "\t36, 294\n", | |
| "\n", | |
| "24 words:\n", | |
| "\t46, 168, 325\n", | |
| "\n", | |
| "25 words:\n", | |
| "\t162\n", | |
| "\n", | |
| "26 words:\n", | |
| "\t48, 280\n", | |
| "\n", | |
| "27 words:\n", | |
| "\t291\n", | |
| "\n", | |
| "34 words:\n", | |
| "\t26, 260\n", | |
| "\n", | |
| "35 words:\n", | |
| "\t196, 314, 327\n", | |
| "\n", | |
| "36 words:\n", | |
| "\t258\n", | |
| "\n", | |
| "38 words:\n", | |
| "\t6\n", | |
| "\n", | |
| "39 words:\n", | |
| "\t72\n", | |
| "\n", | |
| "40 words:\n", | |
| "\t35\n", | |
| "\n", | |
| "41 words:\n", | |
| "\t237, 246\n", | |
| "\n", | |
| "43 words:\n", | |
| "\t4\n", | |
| "\n", | |
| "45 words:\n", | |
| "\t256\n", | |
| "\n", | |
| "49 words:\n", | |
| "\t285\n", | |
| "\n", | |
| "50 words:\n", | |
| "\t261, 298\n", | |
| "\n", | |
| "52 words:\n", | |
| "\t282, 289, 290\n", | |
| "\n", | |
| "53 words:\n", | |
| "\t5\n", | |
| "\n", | |
| "55 words:\n", | |
| "\t288\n", | |
| "\n", | |
| "61 words:\n", | |
| "\t105, 263, 295\n", | |
| "\n", | |
| "63 words:\n", | |
| "\t14, 43\n", | |
| "\n", | |
| "67 words:\n", | |
| "\t292\n", | |
| "\n", | |
| "75 words:\n", | |
| "\t101\n", | |
| "\n", | |
| "77 words:\n", | |
| "\t296\n", | |
| "\n", | |
| "78 words:\n", | |
| "\t64, 108, 278, 328\n", | |
| "\n", | |
| "83 words:\n", | |
| "\t102\n", | |
| "\n", | |
| "92 words:\n", | |
| "\t100, 158\n", | |
| "\n", | |
| "102 words:\n", | |
| "\t37\n", | |
| "\n", | |
| "104 words:\n", | |
| "\t130\n", | |
| "\n", | |
| "115 words:\n", | |
| "\t106\n", | |
| "\n", | |
| "117 words:\n", | |
| "\t262\n", | |
| "\n", | |
| "119 words:\n", | |
| "\t247\n", | |
| "\n", | |
| "132 words:\n", | |
| "\t284\n", | |
| "\n", | |
| "136 words:\n", | |
| "\t299\n", | |
| "\n", | |
| "143 words:\n", | |
| "\t164\n", | |
| "\n", | |
| "146 words:\n", | |
| "\t49\n", | |
| "\n", | |
| "192 words:\n", | |
| "\t275\n", | |
| "\n", | |
| "203 words:\n", | |
| "\t161\n", | |
| "\n", | |
| "248 words:\n", | |
| "\t163\n", | |
| "\n", | |
| "249 words:\n", | |
| "\t27\n", | |
| "\n", | |
| "286 words:\n", | |
| "\t277\n", | |
| "\n", | |
| "296 words:\n", | |
| "\t132\n", | |
| "\n", | |
| "300 words:\n", | |
| "\t24\n", | |
| "\n", | |
| "301 words:\n", | |
| "\t25, 301\n", | |
| "\n", | |
| "303 words:\n", | |
| "\t287\n", | |
| "\n", | |
| "313 words:\n", | |
| "\t107\n", | |
| "\n", | |
| "314 words:\n", | |
| "\t133\n", | |
| "\n", | |
| "317 words:\n", | |
| "\t30\n", | |
| "\n", | |
| "360 words:\n", | |
| "\t70\n", | |
| "\n", | |
| "389 words:\n", | |
| "\t239\n", | |
| "\n", | |
| "393 words:\n", | |
| "\t255\n", | |
| "\n", | |
| "426 words:\n", | |
| "\t9999\n", | |
| "\n", | |
| "435 words:\n", | |
| "\t123\n", | |
| "\n", | |
| "445 words:\n", | |
| "\t167\n", | |
| "\n", | |
| "500 words:\n", | |
| "\t104\n", | |
| "\n", | |
| "523 words:\n", | |
| "\t297\n", | |
| "\n", | |
| "527 words:\n", | |
| "\t268\n", | |
| "\n", | |
| "671 words:\n", | |
| "\t267\n", | |
| "\n", | |
| "684 words:\n", | |
| "\t126\n", | |
| "\n", | |
| "704 words:\n", | |
| "\t266\n", | |
| "\n", | |
| "718 words:\n", | |
| "\t245\n", | |
| "\n", | |
| "732 words:\n", | |
| "\t89\n", | |
| "\n", | |
| "779 words:\n", | |
| "\t61\n", | |
| "\n", | |
| "911 words:\n", | |
| "\t159\n", | |
| "\n", | |
| "929 words:\n", | |
| "\t29\n", | |
| "\n", | |
| "938 words:\n", | |
| "\t230\n", | |
| "\n", | |
| "989 words:\n", | |
| "\t300\n", | |
| "\n", | |
| "997 words:\n", | |
| "\t166\n", | |
| "\n", | |
| "1023 words:\n", | |
| "\t63\n", | |
| "\n", | |
| "1107 words:\n", | |
| "\t69\n", | |
| "\n", | |
| "1273 words:\n", | |
| "\t279\n", | |
| "\n", | |
| "1414 words:\n", | |
| "\t293\n", | |
| "\n", | |
| "1419 words:\n", | |
| "\t128\n", | |
| "\n", | |
| "1494 words:\n", | |
| "\t276\n", | |
| "\n", | |
| "1701 words:\n", | |
| "\t244\n", | |
| "\n", | |
| "1788 words:\n", | |
| "\t65\n", | |
| "\n", | |
| "2279 words:\n", | |
| "\t1003\n", | |
| "\n", | |
| "2350 words:\n", | |
| "\t124\n", | |
| "\n", | |
| "2488 words:\n", | |
| "\t273\n", | |
| "\n", | |
| "2718 words:\n", | |
| "\t233\n", | |
| "\n", | |
| "2963 words:\n", | |
| "\t60\n", | |
| "\n", | |
| "3035 words:\n", | |
| "\t131\n", | |
| "\n", | |
| "3391 words:\n", | |
| "\t274\n", | |
| "\n", | |
| "3536 words:\n", | |
| "\t1002\n", | |
| "\n", | |
| "4026 words:\n", | |
| "\t28\n", | |
| "\n", | |
| "4480 words:\n", | |
| "\t1004\n", | |
| "\n", | |
| "4536 words:\n", | |
| "\t238\n", | |
| "\n", | |
| "6183 words:\n", | |
| "\t240\n", | |
| "\n", | |
| "6887 words:\n", | |
| "\t68\n", | |
| "\n", | |
| "7941 words:\n", | |
| "\t271\n", | |
| "\n", | |
| "8136 words:\n", | |
| "\t1005\n", | |
| "\n", | |
| "8562 words:\n", | |
| "\t1001\n", | |
| "\n", | |
| "9753 words:\n", | |
| "\t1000\n", | |
| "\n", | |
| "12018 words:\n", | |
| "\t0\n", | |
| "\n", | |
| "13477 words:\n", | |
| "\t234\n", | |
| "\n", | |
| "18001 words:\n", | |
| "\t243\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "_rcodes = {}\n", | |
| "for rs in list(dk[1:] for dk in dic.keys()):\n", | |
| " for r in rs:\n", | |
| " _rcodes[r] = _rcodes.get(r, 0) + 1\n", | |
| " if not rs:\n", | |
| " _rcodes['0'] = _rcodes.get('0', 0) + 1\n", | |
| "\n", | |
| "for num in sorted(set(_rcodes.values())):\n", | |
| " print('{} word{}:\\n\\t{}\\n'.format(\n", | |
| " num, 's' if num != 1 else '',\n", | |
| " ', '.join(map(str, sorted(int(rc) for rc, nn in _rcodes.items() if nn == num)))\n", | |
| " ))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 289, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T20:29:57.641083Z", | |
| "start_time": "2017-11-16T20:29:57.621896Z" | |
| }, | |
| "hidden": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "12018 .dic entries have no rules\n", | |
| "18001 .dic entries have rule 243\n", | |
| "\n", | |
| "these rules are used for only a single .dic entry:\n", | |
| "1, 3, 7, 11, 12, 13, 18, 22, 23, 32, 39, 42, 45, 50, 53, 55, 56, 57, 58, 59, 73, 75, 77, 78, 83, 86, 92, 93, 94, 98, 99, 110, 111, 112, 113, 114, 117, 122, 136, 138, 139, 141, 142, 143, 144, 148, 149, 150, 151, 152, 155, 156, 157, 165, 169, 171, 172, 174, 176, 179, 184, 186, 188, 193, 200, 204, 205, 207, 208, 210, 211, 213, 220, 221, 222, 227, 232, 236, 250, 251, 254, 302, 305, 306, 308, 310, 313, 315, 329, 330, 333, 334, 335, 342, 343, 344, 345, 347, 349, 350, 351, 352, 354, 355, 356, 357, 358\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print('{} .dic entries have no rules'.format(_rcodes['0']))\n", | |
| "print('{} .dic entries have rule {}'.format(_rcodes['243'], '243'))\n", | |
| "print()\n", | |
| "print('these rules are used for only a single .dic entry:\\n{}'.format(\n", | |
| " ', '.join(map(str,\n", | |
| " sorted(int(rc) for rc, nn in _rcodes.items() if nn == 1)))))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 301, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T20:35:22.932514Z", | |
| "start_time": "2017-11-16T20:35:12.394894Z" | |
| }, | |
| "hidden": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEKCAYAAAA4t9PUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAE51JREFUeJzt3X+w5XV93/Hny0VREIkI2dkBzEJna4I2WckWTCXWhESQ\ntICNkrVpslZTmhYzsdYki1p10mFq0sZOk1aSzci4MUTEEcbNQKOwxtBMY3ChC7LgllWXwmZhB43A\nSIa4m3f/OJ+Lh/Xu3Xv3c8+v5fmYuXO+38/3x3nf7zn3+7rf36kqJEk6Us+ZdAGSpNlmkEiSuhgk\nkqQuBokkqYtBIknqYpBIkroYJJKkLgaJJKmLQSJJ6nLMpAvocfLJJ9fq1asnXYYkzZQ77rjj0ao6\nZbnmN9NBsnr1arZt2zbpMiRppiR5YDnn564tSVIXg0SS1MUgkSR1MUgkSV0MEklSF4NEktTFIJEk\ndTFIJEldDBJJUpeZDpIv7XmM1RtvYvXGmyZdiiQ9a810kEiSJs8gkSR1MUgkSV0MEklSF4NEktTF\nIJEkdTFIJEldDBJJUheDRJLUxSCRJHUxSCRJXQwSSVIXg0SS1MUgkSR1MUgkSV0MEklSF4NEktTF\nIJEkdTFIJEldDBJJUheDRJLUxSCRJHUxSCRJXQwSSVIXg0SS1GVkQZLk9CR/muTeJDuS/HJrPynJ\nLUnub68vHprmyiS7kuxMcsGoapMkLZ9RbpHsB/59VZ0FvAq4IslZwEZga1WtAba2ftqw9cDLgQuB\nDydZMcL6JEnLYGRBUlV7q+rO1v0EcB9wKnAJsLmNthm4tHVfAlxXVU9V1deAXcA5o6pPkrQ8xnKM\nJMlq4JXAXwIrq2pvG/QwsLJ1nwo8ODTZQ61NkjTFRh4kSV4IfAp4R1U9PjysqgqoJc7v8iTbkmw7\n8ORjy1ipJOlIjDRIkjyXQYhcW1U3tOZHkqxqw1cB+1r7HuD0oclPa23PUFWbqmpdVa1bcdyJoyte\nkrQoozxrK8BHgPuq6kNDg7YAG1r3BuDTQ+3rkxyb5AxgDXD7qOqTJC2PY0Y471cDPwd8Kcn21vZu\n4IPA9UneBjwAXAZQVTuSXA/cy+CMryuq6sAI65MkLYORBUlV/TmQQww+/xDTXAVcNaqaJEnLzyvb\nJUldDBJJUheDRJLUxSCRJHUxSCRJXQwSSVIXg0SS1MUgkSR1MUgkSV0MEklSF4NEktTFIJEkdTFI\nJEldDBJJUheDRJLUxSCRJHUxSCRJXQwSSVIXg0SS1MUgkSR1MUgkSV0MEklSF4NEktTFIJEkdTFI\nJEldDBJJUheDRJLUxSCRJHUxSCRJXQwSSVIXg0SS1MUgkSR1MUgkSV0MEklSF4NEktRlZEGS5Jok\n+5LcM9T2gSR7kmxvPxcNDbsyya4kO5NcMKq6JEnLa5RbJB8FLpyn/b9W1dr2czNAkrOA9cDL2zQf\nTrJihLVJkpbJyIKkqm4DvrHI0S8Brquqp6rqa8Au4JxR1SZJWj6TOEbyS0nubru+XtzaTgUeHBrn\nodYmSZpy4w6Sq4EzgbXAXuC3ljqDJJcn2ZZk24EnH1vu+iRJSzTWIKmqR6rqQFX9HfD7fGf31R7g\n9KFRT2tt881jU1Wtq6p1K447cbQFS5IOa6xBkmTVUO8bgLkzurYA65Mcm+QMYA1w+zhrkyQdmWNG\nNeMkHwdeC5yc5CHg/cBrk6wFCtgN/GuAqtqR5HrgXmA/cEVVHRhVbZKk5TOyIKmqN8/T/JEFxr8K\nuGpU9UiSRsMr2yVJXQwSSVIXg0SS1OWwQZLkTUlOaN3vTXJDkrNHX5okaRYsZovkP1TVE0nOA36C\nwQHzq0dbliRpViwmSOZOw/0pYFNV3QQ8b3QlSZJmyWKCZE+S3wN+Brg5ybGLnE6S9CywmEC4DPgM\ncEFVfRM4CfiVkVYlSZoZhw2SqnoS2Aec15r2A/ePsihJ0uxYzFlb7wd+DbiyNT0X+MNRFiVJmh2L\n2bX1BuBi4FsAVfVXwAmjLEqSNDsWEyR/W1XF4EaLJDl+tCVJkmbJYoLk+nbW1vck+VfArQyeJSJJ\n0uHv/ltV/yXJTwKPAy8D3ldVt4y8MknSTFjUbeRbcBgekqTvcsggSfIEg+Miaa9PDwKqql404tok\nSTPgkEFSVZ6ZJUk6rMPu2kry0vnaq+r/LX85kqRZs5hjJDcNdT8fOAPYCbx8JBVJkmbKYs7a+gfD\n/e1ZJP92ZBVJkmbKku/iW1V3AueOoBZJ0gxazDGSdw71Pgc4G/irkVUkSZopizlGMnz21n4Gx0w+\nNZpyJEmzZsEgSbICOKGq3jWmeiRJM2bBYyRVdQB49ZhqkSTNoMXs2tqeZAvwSdqt5AGq6oaRVSVJ\nmhmLCZLnA18HfnyorQCDRJK0qOtI/uU4CpEkzaYlX0ciSdIwg0SS1MUgkSR1OWyQJHnvUPexoy1H\nkjRrDhkkSX4tyY8Abxxq/ovRlyRJmiULnbX1ZeBNwJlJ/lfrf0mSl1XVzrFUJ0maegvt2vom8G5g\nF/Ba4L+19o1J/veI65IkzYiFtkguAN4H/D3gQ8DdwLe8rkSSNOyQWyRV9e6qOh/YDXwMWAGckuTP\nk/zx4Wac5Jok+5LcM9R2UpJbktzfXl88NOzKJLuS7ExyQddvJUkam8Wc/vuZqtpWVZuAh6rqPGAx\nWyUfBS48qG0jsLWq1gBbWz9JzgLWM3h874XAh9udhyVJU+6wQVJVvzrU+5bW9ugiprsN+MZBzZcA\nm1v3ZuDSofbrquqpqvoag+My5xzuPSRJk7ekCxKr6q7O91tZVXtb98PAytZ9KvDg0HgPtbbvkuTy\nJNuSbDvw5GOd5UiSek3syvaqKgZ3EV7qdJuqal1VrVtx3IkjqEyStBTjDpJHkqwCaK/7Wvse4PSh\n8U5rbZKkKTfuINkCbGjdG4BPD7WvT3JskjOANcDtY65NknQEFvNgqyOS5OMMLmQ8OclDwPuBDwLX\nJ3kb8ABwGUBV7UhyPXAvsB+4oj3mV5I05UYWJFX15kMMOv8Q418FXDWqeiRJo+Ft5CVJXQwSSVIX\ng0SS1MUgkSR1MUgkSV0MEklSF4NEktTFIJEkdTFIJEldDBJJUheDRJLUxSCRJHUxSCRJXQwSSVIX\ng0SS1MUgkSR1MUgkSV0MEklSF4NEktTFIJEkdTFIJEldDBJJUheDRJLUxSCRJHUxSCRJXQwSSVIX\ng0SS1MUgkSR1MUgkSV0MEklSF4NEktTFIJEkdTFIJEldDBJJUheDRJLU5ZhJvGmS3cATwAFgf1Wt\nS3IS8AlgNbAbuKyq/noS9UmSFm+SWyQ/VlVrq2pd698IbK2qNcDW1i9JmnLTtGvrEmBz694MXDrB\nWiRJizSpICng1iR3JLm8ta2sqr2t+2Fg5WRKkyQtxUSOkQDnVdWeJN8L3JLky8MDq6qS1HwTtuC5\nHGDFi04ZfaWSpAVNZIukqva0133AjcA5wCNJVgG0132HmHZTVa2rqnUrjjtxXCVLkg5h7EGS5Pgk\nJ8x1A68D7gG2ABvaaBuAT4+7NknS0k1i19ZK4MYkc+//R1X1J0m+CFyf5G3AA8BlE6hNkrREYw+S\nqvoq8EPztH8dOH/c9UiS+kzT6b+SpBlkkEiSuhgkkqQuBokkqYtBIknqYpBIkroYJJKkLgaJJKmL\nQSJJ6mKQSJK6GCSSpC4GiSSpi0EiSepikEiSuhgkkqQuBokkqYtBIknqYpBIkroYJJKkLgaJJKmL\nQSJJ6mKQSJK6HDVBsnrjTazeeNOky5CkZ52jJkgkSZNhkEiSuhgkkqQuBokkqYtBIknqcsykC1hu\nw2du7f7gT02wEkl6dnCLRJLUxSCRJHU5qoPEixQlafSO6iCRJI3eUXewfT7zbZV4IF6SlodbJJKk\nLlMXJEkuTLIzya4kGyddzzh4LEfSLJuqXVtJVgD/A/hJ4CHgi0m2VNW9o37vabj+ZBpqkKSlmqog\nAc4BdlXVVwGSXAdcAix7kCy0BXDwsJ6V+ty85ptH71bIYuY9qto1P/8ZODS/T0evaQuSU4EHh/of\nAs6dUC0LGvcB/CNdQS12unH/kS81RMdd+0L1LfV9jrS+SX4m07Syn4YA6q1hWpftcklVTbqGpyV5\nI3BhVf1C6/854NyqevvQOJcDl7feVwD3jL3QpTsZeHTSRSyCdS4v61w+s1AjzE6dL6uqE5ZrZtO2\nRbIHOH2o/7TW9rSq2gRsAkiyrarWja+8I2Ody8s6l9cs1DkLNcJs1bmc85u2s7a+CKxJckaS5wHr\ngS0TrkmStICp2iKpqv1J3g58BlgBXFNVOyZcliRpAVMVJABVdTNw8yJH3zTKWpaRdS4v61xes1Dn\nLNQIz9I6p+pguyRp9kzbMRJJ0oyZ2SCZllupJDk9yZ8muTfJjiS/3No/kGRPku3t56Khaa5sde9M\ncsEYa92d5Eutnm2t7aQktyS5v72+eJJ1JnnZ0DLbnuTxJO+YhuWZ5Jok+5LcM9S25OWX5Ifb57Ar\nyW8nyRjq/M9Jvpzk7iQ3Jvme1r46yd8MLdffnXCdS/6cJ1TnJ4Zq3J1ke2ufyPJcYD00nu9nVc3c\nD4MD8V8BzgSeB9wFnDWhWlYBZ7fuE4D/C5wFfAB41zzjn9XqPRY4o/0eK8ZU627g5IPafhPY2Lo3\nAr8x6ToP+pwfBr5vGpYn8BrgbOCenuUH3A68CgjwP4HXj6HO1wHHtO7fGKpz9fB4B81nEnUu+XOe\nRJ0HDf8t4H2TXJ4cej00lu/nrG6RPH0rlar6W2DuVipjV1V7q+rO1v0EcB+DK/QP5RLguqp6qqq+\nBuxi8PtMyiXA5ta9Gbh0qH3SdZ4PfKWqHlhgnLHVWVW3Ad+Y5/0XvfySrAJeVFVfqMFf7R8MTTOy\nOqvqs1W1v/V+gcE1Woc0qToXMFXLc077b/0y4OMLzWPUdS6wHhrL93NWg2S+W6kstPIeiySrgVcC\nf9mafqntSrhmaJNykrUXcGuSOzK4QwDAyqra27ofBla27mlYxut55h/otC1PWPryO7V1H9w+Tm9l\n8J/mnDPabpg/S/KjrW2SdS7lc5708vxR4JGqun+obaLL86D10Fi+n7MaJFMnyQuBTwHvqKrHgasZ\n7HpbC+xlsPk7aedV1Vrg9cAVSV4zPLD9BzIVp/FlcEHqxcAnW9M0Ls9nmKbldyhJ3gPsB65tTXuB\nl7bvxTuBP0ryoknVxwx8zgd5M8/8Z2eiy3Oe9dDTRvn9nNUgOeytVMYpyXMZfHjXVtUNAFX1SFUd\nqKq/A36f7+xumVjtVbWnve4Dbmw1PdI2Z+c2v/dNus7m9cCdVfUITOfybJa6/PbwzN1KY6s3yVuA\nfwL8bFup0HZtfL1138FgX/nfn1SdR/A5T3J5HgP8M+ATc22TXJ7zrYcY0/dzVoNkam6l0vaRfgS4\nr6o+NNS+ami0N/Cdm0tuAdYnOTbJGcAaBge3Rl3n8UlOmOtmcPD1nlbPhjbaBuDTk6xzyDP+05u2\n5TlkScuv7WZ4PMmr2nfn54emGZkkFwK/ClxcVU8OtZ+SwXOASHJmq/OrE6xzSZ/zpOpsfgL4clU9\nvStoUsvzUOshxvX9XK6zBsb9A1zE4MyErwDvmWAd5zHYXLwb2N5+LgI+BnyptW8BVg1N855W906W\n+QyTBeo8k8FZGncBO+aWGfASYCtwP3ArcNIk62zvezzwdeDEobaJL08GwbYX+DaDfcdvO5LlB6xj\nsIL8CvDfaRcGj7jOXQz2ic99R3+3jfvT7fuwHbgT+KcTrnPJn/Mk6mztHwV+8aBxJ7I8OfR6aCzf\nT69slyR1mdVdW5KkKWGQSJK6GCSSpC4GiSSpi0EiSepikOioluQ/JfmxJJcmufIIpv98knWt++a0\nu+aOQpJ3H2b4SN9fOlIGiY525zK4SeE/Bm7rmVFVXVRV31yWquY3b5Bk4DljeH/piBgkOipl8PyN\nu4F/CPwF8AvA1Uned5jpXpDkuiT3JbkReMHQsN1JTm7dP99uLHhXko/NM5/j200Hb0/yf5Jc0trf\nkuSGJH+SwTMifrO1fxB4QbvZ37UZPNdiZ5I/YHBx2OkHvf+/aPPenuT3kqxoPx9Nck8Gz5P4d8ux\nLKXDmbpntkvLoap+Jcn1DG7x8E7g81X16kVM+m+AJ6vqB5L8IIOrk58hycuB9wL/qKoeTXLSPPN5\nD/C5qnpr2x11e5Jb27C1DO7O+hSwM8nvVNXGJG+vwc3+5u7gugbYUFVfaG1z7/8DwM8Ar66qbyf5\nMPCzDK6oPrWqXtHGczeYxsIg0dHsbAa3hPl+Bs9nWIzXAL8NUFV3t62ag/048MmqerSNN9+zKl4H\nXJzkXa3/+cBLW/fWqnoMIMm9DB7c9eB3z4IH5kLkIOcDPwx8sYXLCxjcjO+PgTOT/A5wE/DZw/yu\n0rIwSHTUSbKWwX2QTgMeBY4bNGc78CNV9TfjKAP46araeVBt5zLYEplzgEP/HX5rgXlvrqrvOnkg\nyQ8BFwC/yOCBS29dYt3SknmMREedqtredhHNPW70c8AFVbV2ESFyG/DPAZK8AvjBecb5HPCmJC9p\n4823a+szDB7QlDbOKxdR+rczuBX44WwF3pjke+feP8n3teMnz6mqTzHY9Xb2IuYldTNIdFRKcgrw\n1zV4rsX3V9W9C4x7cZJfb71XAy9Mch/w68AdB49fVTuAq4A/S3IX8KGDxwH+I/Bc4O4kO1r/4Wxq\n41+70Ejtd3kv8Nm26+0WBs/sPhX4fNvy+kNgyac7S0fCu/9Kkrq4RSJJ6mKQSJK6GCSSpC4GiSSp\ni0EiSepikEiSuhgkkqQuBokkqcv/BxsV9dfUmeJ9AAAAAElFTkSuQmCC\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x7f6bb4fccba8>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "import matplotlib.pyplot as plt\n", | |
| "\n", | |
| "plt.hist(list(_rcodes.values()), bins='auto')\n", | |
| "plt.xlabel('# .dic entries')\n", | |
| "plt.ylabel('# rules')\n", | |
| "plt.xlim(0, 2000)\n", | |
| "plt.show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## enumerating entries" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "let's find which rules have restricted applications:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 332, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T20:50:55.526054Z", | |
| "start_time": "2017-11-16T20:50:55.356690Z" | |
| }, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| " 5 : tz\n", | |
| " 6 : ts\n", | |
| " 9 : tz\n", | |
| " 10 : ts\n", | |
| " 14 : n\n", | |
| " 20 : n\n", | |
| " 31 : tz\n", | |
| " 32 : tx\n", | |
| " 33 : ts\n", | |
| " 36 : tz\n", | |
| " 37 : ts\n", | |
| " 38 : tz\n", | |
| " 39 : ts\n", | |
| " 40 : a\n", | |
| " 42 : a\n", | |
| " 45 : ts\n", | |
| " 50 : n\n", | |
| " 61 : a\n", | |
| " 62 : k\n", | |
| " 70 : tz\n", | |
| " 71 : tx\n", | |
| " 72 : ts\n", | |
| " 80 : tz\n", | |
| " 82 : a\n", | |
| " 97 : ei\n", | |
| " 98 : ni\n", | |
| "100 : t\n", | |
| "102 : k\n", | |
| "124 : a\n", | |
| "125 : k\n", | |
| "133 : tz\n", | |
| "134 : ts\n", | |
| "138 : t\n", | |
| "143 : na\n", | |
| "144 : a\n", | |
| "160 : k\n", | |
| "168 : tz\n", | |
| "169 : tx\n", | |
| "170 : ts\n", | |
| "178 : tz\n", | |
| "182 : t\n", | |
| "188 : a\n", | |
| "190 : t\n", | |
| "192 : tz\n", | |
| "196 : a\n", | |
| "206 : nak\n", | |
| "213 : tza\n", | |
| "234 : a\n", | |
| "236 : a\n", | |
| "237 : k\n", | |
| "245 : tz\n", | |
| "246 : tx\n", | |
| "247 : ts\n", | |
| "259 : t\n", | |
| "264 : tz\n", | |
| "266 : t\n", | |
| "268 : k\n", | |
| "270 : a\n", | |
| "271 : n\n", | |
| "278 : nan\n", | |
| "279 : n\n", | |
| "280 : an\n", | |
| "281 : a\n", | |
| "289 : a\n", | |
| "294 : k\n", | |
| "299 : tz\n", | |
| "302 : tx\n", | |
| "303 : ts\n", | |
| "304 : ts\n", | |
| "308 : t\n", | |
| "313 : an\n", | |
| "321 : a\n", | |
| "322 : k\n", | |
| "329 : ts\n", | |
| "338 : a\n", | |
| "346 : ei\n", | |
| "347 : rri\n", | |
| "357 : ei\n", | |
| "358 : rri\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for rcode in map(str, sorted(map(int, rules.keys()))):\n", | |
| " restrictions = sorted(set([k[-1] for k in rules[rcode].keys() if k[-1] != '.']))\n", | |
| " if restrictions:\n", | |
| " print(rcode.rjust(3), ':', ', '.join(restrictions))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "a function to determine how many words (and characters!) result from applying a rule to a given starting word:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 385, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-17T00:26:26.298106Z", | |
| "start_time": "2017-11-17T00:26:25.579637Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import re\n", | |
| "\n", | |
| "# cache lengths:\n", | |
| "rlengths = {}\n", | |
| "\n", | |
| "def apply_rule(rulecode, nsfx=0, npfx=0, disp=False, word=('ai',)):\n", | |
| " \"\"\"Count words and chars created by applying a rule to a starting word.\"\"\"\n", | |
| " rulehash = (rulecode, nsfx, npfx)\n", | |
| " try:\n", | |
| " return rlengths[rulehash]\n", | |
| " except KeyError:\n", | |
| " pass\n", | |
| " rkeys = sorted(list(rules.get(rulecode, {}).keys()))\n", | |
| " if not rkeys:\n", | |
| " rlengths[rulehash] = 0, 0\n", | |
| " if disp:\n", | |
| " print('rlengths[%s] -> %s' % (rulehash, rlengths[rulehash]))\n", | |
| " return rlengths[rulehash]\n", | |
| "\n", | |
| " # can't apply rule if we would now have 2 pre/suffixes\n", | |
| " if nsfx > 1 or npfx > 1:\n", | |
| " if rulehash not in rlengths:\n", | |
| " rlengths[rulehash] = 0, 0\n", | |
| " if disp:\n", | |
| " print('. rlengths[{}] -> {}'.format(rulehash, rlengths[rulehash]))\n", | |
| " return rlengths[rulehash]\n", | |
| " if disp:\n", | |
| " print('apply_rule{}'.format(rulehash))\n", | |
| "\n", | |
| " num, chars = 0, 0\n", | |
| " ruletype = rkeys[0][0]\n", | |
| " is_sfx, is_pfx = ruletype == \"SFX\", ruletype != \"SFX\"\n", | |
| " for ii, variant in enumerate(rkeys):\n", | |
| " if not (variant[4] == '.' or\n", | |
| " re.search(variant[4] + '$', ''.join(word[:npfx+nsfx+1]))):\n", | |
| " continue\n", | |
| " to_remove = '' if variant[2] == '0' else variant[2]\n", | |
| " parts = variant[3].split('/')\n", | |
| " new_word = list(word[:npfx+nsfx+1])\n", | |
| " if is_sfx:\n", | |
| " if to_remove:\n", | |
| " new_word[-1] = new_word[-1][:-len(to_remove)]\n", | |
| " new_word = tuple(new_word + parts[:1])\n", | |
| " else:\n", | |
| " if to_remove:\n", | |
| " new_word[0] = new_word[0][len(to_remove):]\n", | |
| " new_word = tuple(parts[:1] + new_word)\n", | |
| " if disp:\n", | |
| " print('{} {} {}'.format(nsfx + is_sfx, npfx + is_pfx, '-'.join(new_word)))\n", | |
| " num += 1 # add just this affix\n", | |
| " l_affix = len(''.join(new_word)) - len(''.join(word))\n", | |
| " chars += l_affix # add chars for the affix\n", | |
| " if len(parts) > 1:\n", | |
| " # also add compound affixes\n", | |
| " for subrulecode in map(str, sorted(list(map(int, parts[1].split(','))))):\n", | |
| " to_add = apply_rule(subrulecode, nsfx + is_sfx, npfx + is_pfx,\n", | |
| " disp=disp, word=new_word)\n", | |
| " if to_add[0] and disp:\n", | |
| " print('/{} -> {}'.format(subrulecode, to_add))\n", | |
| " num += to_add[0]\n", | |
| " # add chars for new affixes, plus the current affix for each\n", | |
| " chars += (to_add[0] * l_affix) + to_add[1]\n", | |
| " rlengths[rulehash] = num, chars\n", | |
| " if disp:\n", | |
| " print('. . rlengths[{}] -> {}'.format(rulehash, rlengths[rulehash]))\n", | |
| " return num, chars" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-17T12:46:24.416407Z", | |
| "start_time": "2017-11-17T12:46:24.405797Z" | |
| } | |
| }, | |
| "source": [ | |
| "how many words have the last rule (358) applied to them?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 387, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-17T00:27:51.454376Z", | |
| "start_time": "2017-11-17T00:27:51.284350Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('berorri', '358')]" | |
| ] | |
| }, | |
| "execution_count": 387, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "list(dk for dk in dic.keys() if '358' in dk[1:])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Only one. Weird. Anyway, let's apply it, and see whether we've got the removal working correctly:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 271, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T20:12:01.388411Z", | |
| "start_time": "2017-11-16T20:12:00.876330Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "0 0 berorri\n", | |
| "apply_rule('358', 0, 0)\n", | |
| "1 0 bero-rregatik\n", | |
| "1 0 bero-rrek\n", | |
| "1 0 bero-rrekiko\n", | |
| "apply_rule('243', 1, 0)\n", | |
| "2 0 bero-rrekiko-a\n", | |
| "2 0 bero-rrekiko-agan\n", | |
| "2 0 bero-rrekiko-agana\n", | |
| "2 0 bero-rrekiko-aganaino\n", | |
| "2 0 bero-rrekiko-aganantz\n", | |
| "2 0 bero-rrekiko-agandik\n", | |
| "2 0 bero-rrekiko-agatik\n", | |
| "2 0 bero-rrekiko-ago\n", | |
| ". rlengths[('243', 2, 0)] -> (0, 0)\n", | |
| "2 0 bero-rrekiko-agoa\n", | |
| "2 0 bero-rrekiko-agoak\n", | |
| "2 0 bero-rrekiko-agoan\n", | |
| "2 0 bero-rrekiko-agoarekin\n", | |
| "2 0 bero-rrekiko-agoaren\n", | |
| ". rlengths[('238', 2, 0)] -> (0, 0)\n", | |
| "2 0 bero-rrekiko-agoarendako\n", | |
| "2 0 bero-rrekiko-agoarentzat\n", | |
| "2 0 bero-rrekiko-agoari\n", | |
| "2 0 bero-rrekiko-agoaz\n", | |
| "2 0 bero-rrekiko-agoei\n", | |
| "2 0 bero-rrekiko-agoek\n", | |
| "2 0 bero-rrekiko-agoekin\n", | |
| "2 0 bero-rrekiko-agoen\n", | |
| "2 0 bero-rrekiko-agoendako\n", | |
| "2 0 bero-rrekiko-agoentzat\n", | |
| "2 0 bero-rrekiko-agoez\n", | |
| "2 0 bero-rrekiko-agogatik\n", | |
| "2 0 bero-rrekiko-agok\n", | |
| "2 0 bero-rrekiko-agoko\n", | |
| "2 0 bero-rrekiko-agooi\n", | |
| "2 0 bero-rrekiko-agook\n", | |
| "2 0 bero-rrekiko-agookin\n", | |
| "2 0 bero-rrekiko-agoon\n", | |
| "2 0 bero-rrekiko-agoontzat\n", | |
| "2 0 bero-rrekiko-agootaz\n", | |
| "2 0 bero-rrekiko-agooz\n", | |
| "2 0 bero-rrekiko-agora\n", | |
| "2 0 bero-rrekiko-agoraino\n", | |
| "2 0 bero-rrekiko-agorantz\n", | |
| "2 0 bero-rrekiko-agorat\n", | |
| "2 0 bero-rrekiko-agorekin\n", | |
| "2 0 bero-rrekiko-agoren\n", | |
| "2 0 bero-rrekiko-agorendako\n", | |
| "2 0 bero-rrekiko-agorentzat\n", | |
| "2 0 bero-rrekiko-agori\n", | |
| "2 0 bero-rrekiko-agorik\n", | |
| "2 0 bero-rrekiko-agotik\n", | |
| "2 0 bero-rrekiko-agotzat\n", | |
| "2 0 bero-rrekiko-agoz\n", | |
| "2 0 bero-rrekiko-ak\n", | |
| "2 0 bero-rrekiko-an\n", | |
| "2 0 bero-rrekiko-arekiko\n", | |
| "2 0 bero-rrekiko-arekin\n", | |
| "2 0 bero-rrekiko-aren\n", | |
| "2 0 bero-rrekiko-arena\n", | |
| "2 0 bero-rrekiko-arenak\n", | |
| "2 0 bero-rrekiko-arenarekin\n", | |
| "2 0 bero-rrekiko-arenarendako\n", | |
| "2 0 bero-rrekiko-arenarentzat\n", | |
| "2 0 bero-rrekiko-arenari\n", | |
| "2 0 bero-rrekiko-arenaz\n", | |
| "2 0 bero-rrekiko-arendako\n", | |
| "2 0 bero-rrekiko-arenean\n", | |
| "2 0 bero-rrekiko-arenei\n", | |
| "2 0 bero-rrekiko-arenek\n", | |
| "2 0 bero-rrekiko-arenekin\n", | |
| "2 0 bero-rrekiko-arenendako\n", | |
| "2 0 bero-rrekiko-arenentzat\n", | |
| "2 0 bero-rrekiko-arenera\n", | |
| "2 0 bero-rrekiko-areneraino\n", | |
| "2 0 bero-rrekiko-arenerantz\n", | |
| "2 0 bero-rrekiko-arenerat\n", | |
| "2 0 bero-rrekiko-arenetan\n", | |
| "2 0 bero-rrekiko-arenetara\n", | |
| "2 0 bero-rrekiko-arenetaraino\n", | |
| "2 0 bero-rrekiko-arenetarantz\n", | |
| "2 0 bero-rrekiko-arenetarat\n", | |
| "2 0 bero-rrekiko-arenetarik\n", | |
| "2 0 bero-rrekiko-arenetatik\n", | |
| "2 0 bero-rrekiko-arenetik\n", | |
| "2 0 bero-rrekiko-arenez\n", | |
| "2 0 bero-rrekiko-arengan\n", | |
| "2 0 bero-rrekiko-arengana\n", | |
| "2 0 bero-rrekiko-arenganaino\n", | |
| "2 0 bero-rrekiko-arenganantz\n", | |
| "2 0 bero-rrekiko-arengandik\n", | |
| "2 0 bero-rrekiko-arengatik\n", | |
| "2 0 bero-rrekiko-areni\n", | |
| "2 0 bero-rrekiko-arenik\n", | |
| "2 0 bero-rrekiko-arenoi\n", | |
| "2 0 bero-rrekiko-arenok\n", | |
| "2 0 bero-rrekiko-arenokin\n", | |
| "2 0 bero-rrekiko-arenontzat\n", | |
| "2 0 bero-rrekiko-arenotaz\n", | |
| "2 0 bero-rrekiko-arenoz\n", | |
| "2 0 bero-rrekiko-arentzako\n", | |
| "2 0 bero-rrekiko-arentzat\n", | |
| "2 0 bero-rrekiko-ari\n", | |
| "2 0 bero-rrekiko-az\n", | |
| "2 0 bero-rrekiko-ei\n", | |
| "2 0 bero-rrekiko-ek\n", | |
| "2 0 bero-rrekiko-ekiko\n", | |
| "2 0 bero-rrekiko-ekin\n", | |
| "2 0 bero-rrekiko-en\n", | |
| "2 0 bero-rrekiko-ena\n", | |
| "2 0 bero-rrekiko-enak\n", | |
| "2 0 bero-rrekiko-enarekin\n", | |
| "2 0 bero-rrekiko-enarendako\n", | |
| "2 0 bero-rrekiko-enarentzat\n", | |
| "2 0 bero-rrekiko-enari\n", | |
| "2 0 bero-rrekiko-enaz\n", | |
| "2 0 bero-rrekiko-endako\n", | |
| "2 0 bero-rrekiko-enean\n", | |
| "2 0 bero-rrekiko-enei\n", | |
| "2 0 bero-rrekiko-enek\n", | |
| "2 0 bero-rrekiko-enekin\n", | |
| "2 0 bero-rrekiko-enendako\n", | |
| "2 0 bero-rrekiko-enentzat\n", | |
| "2 0 bero-rrekiko-enera\n", | |
| "2 0 bero-rrekiko-eneraino\n", | |
| "2 0 bero-rrekiko-enerantz\n", | |
| "2 0 bero-rrekiko-enerat\n", | |
| "2 0 bero-rrekiko-enetan\n", | |
| "2 0 bero-rrekiko-enetara\n", | |
| "2 0 bero-rrekiko-enetaraino\n", | |
| "2 0 bero-rrekiko-enetarantz\n", | |
| "2 0 bero-rrekiko-enetarat\n", | |
| "2 0 bero-rrekiko-enetarik\n", | |
| "2 0 bero-rrekiko-enetatik\n", | |
| "2 0 bero-rrekiko-enetik\n", | |
| "2 0 bero-rrekiko-enez\n", | |
| "2 0 bero-rrekiko-engan\n", | |
| "2 0 bero-rrekiko-engana\n", | |
| "2 0 bero-rrekiko-enganaino\n", | |
| "2 0 bero-rrekiko-enganantz\n", | |
| "2 0 bero-rrekiko-engandik\n", | |
| "2 0 bero-rrekiko-engatik\n", | |
| "2 0 bero-rrekiko-eni\n", | |
| "2 0 bero-rrekiko-enik\n", | |
| "2 0 bero-rrekiko-enoi\n", | |
| "2 0 bero-rrekiko-enok\n", | |
| "2 0 bero-rrekiko-enokin\n", | |
| "2 0 bero-rrekiko-enontzat\n", | |
| "2 0 bero-rrekiko-enotaz\n", | |
| "2 0 bero-rrekiko-enoz\n", | |
| "2 0 bero-rrekiko-entzako\n", | |
| "2 0 bero-rrekiko-entzat\n", | |
| "2 0 bero-rrekiko-etako\n", | |
| "2 0 bero-rrekiko-etakoa\n", | |
| "2 0 bero-rrekiko-etakoago\n", | |
| "2 0 bero-rrekiko-etakoak\n", | |
| "2 0 bero-rrekiko-etakoan\n", | |
| "2 0 bero-rrekiko-etakoarekin\n", | |
| "2 0 bero-rrekiko-etakoarendako\n", | |
| "2 0 bero-rrekiko-etakoarentzat\n", | |
| "2 0 bero-rrekiko-etakoari\n", | |
| "2 0 bero-rrekiko-etakoaz\n", | |
| "2 0 bero-rrekiko-etakoegi\n", | |
| "2 0 bero-rrekiko-etakoei\n", | |
| "2 0 bero-rrekiko-etakoek\n", | |
| "2 0 bero-rrekiko-etakoekin\n", | |
| "2 0 bero-rrekiko-etakoendako\n", | |
| "2 0 bero-rrekiko-etakoentzat\n", | |
| "2 0 bero-rrekiko-etakoetan\n", | |
| "2 0 bero-rrekiko-etakoetara\n", | |
| "2 0 bero-rrekiko-etakoetaraino\n", | |
| "2 0 bero-rrekiko-etakoetarantz\n", | |
| "2 0 bero-rrekiko-etakoetarat\n", | |
| "2 0 bero-rrekiko-etakoetarik\n", | |
| "2 0 bero-rrekiko-etakoetatik\n", | |
| "2 0 bero-rrekiko-etakoez\n", | |
| "2 0 bero-rrekiko-etakogatik\n", | |
| "2 0 bero-rrekiko-etakok\n", | |
| "2 0 bero-rrekiko-etakooi\n", | |
| "2 0 bero-rrekiko-etakook\n", | |
| "2 0 bero-rrekiko-etakookin\n", | |
| "2 0 bero-rrekiko-etakoontzat\n", | |
| "2 0 bero-rrekiko-etakootaz\n", | |
| "2 0 bero-rrekiko-etakooz\n", | |
| "2 0 bero-rrekiko-etakora\n", | |
| "2 0 bero-rrekiko-etakoraino\n", | |
| "2 0 bero-rrekiko-etakorantz\n", | |
| "2 0 bero-rrekiko-etakorat\n", | |
| "2 0 bero-rrekiko-etakorekin\n", | |
| "2 0 bero-rrekiko-etakorendako\n", | |
| "2 0 bero-rrekiko-etakorentzat\n", | |
| "2 0 bero-rrekiko-etakori\n", | |
| "2 0 bero-rrekiko-etakorik\n", | |
| "2 0 bero-rrekiko-etakotan\n", | |
| "2 0 bero-rrekiko-etakotara\n", | |
| "2 0 bero-rrekiko-etakotaraino\n", | |
| "2 0 bero-rrekiko-etakotarantz\n", | |
| "2 0 bero-rrekiko-etakotarat\n", | |
| "2 0 bero-rrekiko-etakotarik\n", | |
| "2 0 bero-rrekiko-etakotatik\n", | |
| "2 0 bero-rrekiko-etakotik\n", | |
| "2 0 bero-rrekiko-etakotzat\n", | |
| "2 0 bero-rrekiko-etakoz\n", | |
| "2 0 bero-rrekiko-etan\n", | |
| "2 0 bero-rrekiko-etara\n", | |
| "2 0 bero-rrekiko-etarago\n", | |
| "2 0 bero-rrekiko-etaraino\n", | |
| "2 0 bero-rrekiko-etarainoko\n", | |
| "2 0 bero-rrekiko-etarako\n", | |
| "2 0 bero-rrekiko-etarantz\n", | |
| "2 0 bero-rrekiko-etarantzago\n", | |
| "2 0 bero-rrekiko-etarantzegi\n", | |
| "2 0 bero-rrekiko-etaranzko\n", | |
| "2 0 bero-rrekiko-etarat\n", | |
| "2 0 bero-rrekiko-etarik\n", | |
| "2 0 bero-rrekiko-etariko\n", | |
| "2 0 bero-rrekiko-etatik\n", | |
| "2 0 bero-rrekiko-etatiko\n", | |
| "2 0 bero-rrekiko-ez\n", | |
| "2 0 bero-rrekiko-gatik\n", | |
| "2 0 bero-rrekiko-gatiko\n", | |
| "2 0 bero-rrekiko-k\n", | |
| "2 0 bero-rrekiko-ko\n", | |
| "2 0 bero-rrekiko-koa\n", | |
| "2 0 bero-rrekiko-koago\n", | |
| "2 0 bero-rrekiko-koak\n", | |
| "2 0 bero-rrekiko-koan\n", | |
| "2 0 bero-rrekiko-koarekin\n", | |
| "2 0 bero-rrekiko-koarendako\n", | |
| "2 0 bero-rrekiko-koarentzat\n", | |
| "2 0 bero-rrekiko-koari\n", | |
| "2 0 bero-rrekiko-koaz\n", | |
| "2 0 bero-rrekiko-koegi\n", | |
| "2 0 bero-rrekiko-koei\n", | |
| "2 0 bero-rrekiko-koek\n", | |
| "2 0 bero-rrekiko-koekin\n", | |
| "2 0 bero-rrekiko-koendako\n", | |
| "2 0 bero-rrekiko-koentzat\n", | |
| "2 0 bero-rrekiko-koetan\n", | |
| "2 0 bero-rrekiko-koetara\n", | |
| "2 0 bero-rrekiko-koetaraino\n", | |
| "2 0 bero-rrekiko-koetarantz\n", | |
| "2 0 bero-rrekiko-koetarat\n", | |
| "2 0 bero-rrekiko-koetarik\n", | |
| "2 0 bero-rrekiko-koetatik\n", | |
| "2 0 bero-rrekiko-koez\n", | |
| "2 0 bero-rrekiko-kogatik\n", | |
| "2 0 bero-rrekiko-kok\n", | |
| "2 0 bero-rrekiko-kooi\n", | |
| "2 0 bero-rrekiko-kook\n", | |
| "2 0 bero-rrekiko-kookin\n", | |
| "2 0 bero-rrekiko-koontzat\n", | |
| "2 0 bero-rrekiko-kootaz\n", | |
| "2 0 bero-rrekiko-kooz\n", | |
| "2 0 bero-rrekiko-kora\n", | |
| "2 0 bero-rrekiko-koraino\n", | |
| "2 0 bero-rrekiko-korantz\n", | |
| "2 0 bero-rrekiko-korat\n", | |
| "2 0 bero-rrekiko-korekin\n", | |
| "2 0 bero-rrekiko-korendako\n", | |
| "2 0 bero-rrekiko-korentzat\n", | |
| "2 0 bero-rrekiko-kori\n", | |
| "2 0 bero-rrekiko-korik\n", | |
| "2 0 bero-rrekiko-kotan\n", | |
| "2 0 bero-rrekiko-kotara\n", | |
| "2 0 bero-rrekiko-kotaraino\n", | |
| "2 0 bero-rrekiko-kotarantz\n", | |
| "2 0 bero-rrekiko-kotarat\n", | |
| "2 0 bero-rrekiko-kotarik\n", | |
| "2 0 bero-rrekiko-kotatik\n", | |
| "2 0 bero-rrekiko-kotik\n", | |
| "2 0 bero-rrekiko-kotzat\n", | |
| "2 0 bero-rrekiko-koz\n", | |
| "2 0 bero-rrekiko-oi\n", | |
| "2 0 bero-rrekiko-ok\n", | |
| "2 0 bero-rrekiko-okiko\n", | |
| "2 0 bero-rrekiko-okin\n", | |
| "2 0 bero-rrekiko-on\n", | |
| "2 0 bero-rrekiko-ona\n", | |
| "2 0 bero-rrekiko-onak\n", | |
| "2 0 bero-rrekiko-onarekin\n", | |
| "2 0 bero-rrekiko-onarendako\n", | |
| "2 0 bero-rrekiko-onarentzat\n", | |
| "2 0 bero-rrekiko-onari\n", | |
| "2 0 bero-rrekiko-onaz\n", | |
| "2 0 bero-rrekiko-onean\n", | |
| "2 0 bero-rrekiko-onei\n", | |
| "2 0 bero-rrekiko-onek\n", | |
| "2 0 bero-rrekiko-onekin\n", | |
| "2 0 bero-rrekiko-onendako\n", | |
| "2 0 bero-rrekiko-onentzat\n", | |
| "2 0 bero-rrekiko-onera\n", | |
| "2 0 bero-rrekiko-oneraino\n", | |
| "2 0 bero-rrekiko-onerantz\n", | |
| "2 0 bero-rrekiko-onerat\n", | |
| "2 0 bero-rrekiko-onetik\n", | |
| "2 0 bero-rrekiko-onez\n", | |
| "2 0 bero-rrekiko-ongan\n", | |
| "2 0 bero-rrekiko-ongana\n", | |
| "2 0 bero-rrekiko-onganaino\n", | |
| "2 0 bero-rrekiko-onganantz\n", | |
| "2 0 bero-rrekiko-ongandik\n", | |
| "2 0 bero-rrekiko-ongatik\n", | |
| "2 0 bero-rrekiko-oni\n", | |
| "2 0 bero-rrekiko-onik\n", | |
| "2 0 bero-rrekiko-onoi\n", | |
| "2 0 bero-rrekiko-onok\n", | |
| "2 0 bero-rrekiko-onokin\n", | |
| "2 0 bero-rrekiko-onontzat\n", | |
| "2 0 bero-rrekiko-onotaz\n", | |
| "2 0 bero-rrekiko-onoz\n", | |
| "2 0 bero-rrekiko-ontzako\n", | |
| "2 0 bero-rrekiko-ontzat\n", | |
| "2 0 bero-rrekiko-otako\n", | |
| "2 0 bero-rrekiko-otan\n", | |
| "2 0 bero-rrekiko-otara\n", | |
| "2 0 bero-rrekiko-otaraino\n", | |
| "2 0 bero-rrekiko-otarantz\n", | |
| "2 0 bero-rrekiko-otarat\n", | |
| "2 0 bero-rrekiko-otarik\n", | |
| "2 0 bero-rrekiko-otatik\n", | |
| "2 0 bero-rrekiko-otaz\n", | |
| "2 0 bero-rrekiko-oz\n", | |
| "2 0 bero-rrekiko-ra\n", | |
| "2 0 bero-rrekiko-rago\n", | |
| "2 0 bero-rrekiko-raino\n", | |
| "2 0 bero-rrekiko-rainoko\n", | |
| "2 0 bero-rrekiko-rako\n", | |
| "2 0 bero-rrekiko-rantz\n", | |
| "2 0 bero-rrekiko-rantzago\n", | |
| "2 0 bero-rrekiko-rantzegi\n", | |
| "2 0 bero-rrekiko-ranzko\n", | |
| "2 0 bero-rrekiko-rat\n", | |
| "2 0 bero-rrekiko-regatik\n", | |
| "2 0 bero-rrekiko-rekiko\n", | |
| "2 0 bero-rrekiko-rekin\n", | |
| "2 0 bero-rrekiko-ren\n", | |
| "2 0 bero-rrekiko-rena\n", | |
| "2 0 bero-rrekiko-renak\n", | |
| "2 0 bero-rrekiko-renarekin\n", | |
| "2 0 bero-rrekiko-renarendako\n", | |
| "2 0 bero-rrekiko-renarentzat\n", | |
| "2 0 bero-rrekiko-renari\n", | |
| "2 0 bero-rrekiko-renaz\n", | |
| "2 0 bero-rrekiko-rendako\n", | |
| "2 0 bero-rrekiko-renean\n", | |
| "2 0 bero-rrekiko-renei\n", | |
| "2 0 bero-rrekiko-renek\n", | |
| "2 0 bero-rrekiko-renekin\n", | |
| "2 0 bero-rrekiko-renendako\n", | |
| "2 0 bero-rrekiko-renentzat\n", | |
| "2 0 bero-rrekiko-renera\n", | |
| "2 0 bero-rrekiko-reneraino\n", | |
| "2 0 bero-rrekiko-renerantz\n", | |
| "2 0 bero-rrekiko-renerat\n", | |
| "2 0 bero-rrekiko-renetan\n", | |
| "2 0 bero-rrekiko-renetara\n", | |
| "2 0 bero-rrekiko-renetaraino\n", | |
| "2 0 bero-rrekiko-renetarantz\n", | |
| "2 0 bero-rrekiko-renetarat\n", | |
| "2 0 bero-rrekiko-renetarik\n", | |
| "2 0 bero-rrekiko-renetatik\n", | |
| "2 0 bero-rrekiko-renetik\n", | |
| "2 0 bero-rrekiko-renez\n", | |
| "2 0 bero-rrekiko-rengan\n", | |
| "2 0 bero-rrekiko-rengana\n", | |
| "2 0 bero-rrekiko-renganaino\n", | |
| "2 0 bero-rrekiko-renganantz\n", | |
| "2 0 bero-rrekiko-rengandik\n", | |
| "2 0 bero-rrekiko-rengatik\n", | |
| "2 0 bero-rrekiko-reni\n", | |
| "2 0 bero-rrekiko-renik\n", | |
| "2 0 bero-rrekiko-renoi\n", | |
| "2 0 bero-rrekiko-renok\n", | |
| "2 0 bero-rrekiko-renokin\n", | |
| "2 0 bero-rrekiko-renontzat\n", | |
| "2 0 bero-rrekiko-renotaz\n", | |
| "2 0 bero-rrekiko-renoz\n", | |
| "2 0 bero-rrekiko-rentzako\n", | |
| "2 0 bero-rrekiko-rentzat\n", | |
| "2 0 bero-rrekiko-ri\n", | |
| "2 0 bero-rrekiko-rik\n", | |
| "2 0 bero-rrekiko-tako\n", | |
| "2 0 bero-rrekiko-takoa\n", | |
| "2 0 bero-rrekiko-takoago\n", | |
| "2 0 bero-rrekiko-takoak\n", | |
| "2 0 bero-rrekiko-takoan\n", | |
| "2 0 bero-rrekiko-takoarekin\n", | |
| "2 0 bero-rrekiko-takoarendako\n", | |
| "2 0 bero-rrekiko-takoarentzat\n", | |
| "2 0 bero-rrekiko-takoari\n", | |
| "2 0 bero-rrekiko-takoaz\n", | |
| "2 0 bero-rrekiko-takoegi\n", | |
| "2 0 bero-rrekiko-takoei\n", | |
| "2 0 bero-rrekiko-takoek\n", | |
| "2 0 bero-rrekiko-takoekin\n", | |
| "2 0 bero-rrekiko-takoendako\n", | |
| "2 0 bero-rrekiko-takoentzat\n", | |
| "2 0 bero-rrekiko-takoetan\n", | |
| "2 0 bero-rrekiko-takoetara\n", | |
| "2 0 bero-rrekiko-takoetaraino\n", | |
| "2 0 bero-rrekiko-takoetarantz\n", | |
| "2 0 bero-rrekiko-takoetarat\n", | |
| "2 0 bero-rrekiko-takoetarik\n", | |
| "2 0 bero-rrekiko-takoetatik\n", | |
| "2 0 bero-rrekiko-takoez\n", | |
| "2 0 bero-rrekiko-takogatik\n", | |
| "2 0 bero-rrekiko-takok\n", | |
| "2 0 bero-rrekiko-takooi\n", | |
| "2 0 bero-rrekiko-takook\n", | |
| "2 0 bero-rrekiko-takookin\n", | |
| "2 0 bero-rrekiko-takoontzat\n", | |
| "2 0 bero-rrekiko-takootaz\n", | |
| "2 0 bero-rrekiko-takooz\n", | |
| "2 0 bero-rrekiko-takora\n", | |
| "2 0 bero-rrekiko-takoraino\n", | |
| "2 0 bero-rrekiko-takorantz\n", | |
| "2 0 bero-rrekiko-takorat\n", | |
| "2 0 bero-rrekiko-takorekin\n", | |
| "2 0 bero-rrekiko-takorendako\n", | |
| "2 0 bero-rrekiko-takorentzat\n", | |
| "2 0 bero-rrekiko-takori\n", | |
| "2 0 bero-rrekiko-takorik\n", | |
| "2 0 bero-rrekiko-takotan\n", | |
| "2 0 bero-rrekiko-takotara\n", | |
| "2 0 bero-rrekiko-takotaraino\n", | |
| "2 0 bero-rrekiko-takotarantz\n", | |
| "2 0 bero-rrekiko-takotarat\n", | |
| "2 0 bero-rrekiko-takotarik\n", | |
| "2 0 bero-rrekiko-takotatik\n", | |
| "2 0 bero-rrekiko-takotik\n", | |
| "2 0 bero-rrekiko-takotzat\n", | |
| "2 0 bero-rrekiko-takoz\n", | |
| "2 0 bero-rrekiko-tan\n", | |
| "2 0 bero-rrekiko-tara\n", | |
| "2 0 bero-rrekiko-tarago\n", | |
| "2 0 bero-rrekiko-taraino\n", | |
| "2 0 bero-rrekiko-tarainoko\n", | |
| "2 0 bero-rrekiko-tarako\n", | |
| "2 0 bero-rrekiko-tarantz\n", | |
| "2 0 bero-rrekiko-tarantzago\n", | |
| "2 0 bero-rrekiko-tarantzegi\n", | |
| "2 0 bero-rrekiko-taranzko\n", | |
| "2 0 bero-rrekiko-tarat\n", | |
| "2 0 bero-rrekiko-tarik\n", | |
| "2 0 bero-rrekiko-tariko\n", | |
| "2 0 bero-rrekiko-tatik\n", | |
| "2 0 bero-rrekiko-tatiko\n", | |
| "2 0 bero-rrekiko-tik\n", | |
| "2 0 bero-rrekiko-tiko\n", | |
| "2 0 bero-rrekiko-txo\n", | |
| "2 0 bero-rrekiko-txoa\n", | |
| "2 0 bero-rrekiko-txoak\n", | |
| "2 0 bero-rrekiko-txoan\n", | |
| "2 0 bero-rrekiko-txoarekin\n", | |
| "2 0 bero-rrekiko-txoaren\n", | |
| "2 0 bero-rrekiko-txoarendako\n", | |
| "2 0 bero-rrekiko-txoarentzat\n", | |
| "2 0 bero-rrekiko-txoari\n", | |
| "2 0 bero-rrekiko-txoaz\n", | |
| "2 0 bero-rrekiko-txoei\n", | |
| "2 0 bero-rrekiko-txoek\n", | |
| "2 0 bero-rrekiko-txoekin\n", | |
| "2 0 bero-rrekiko-txoen\n", | |
| "2 0 bero-rrekiko-txoendako\n", | |
| "2 0 bero-rrekiko-txoentzat\n", | |
| "2 0 bero-rrekiko-txoez\n", | |
| "2 0 bero-rrekiko-txogatik\n", | |
| "2 0 bero-rrekiko-txok\n", | |
| "2 0 bero-rrekiko-txoko\n", | |
| "2 0 bero-rrekiko-txooi\n", | |
| "2 0 bero-rrekiko-txook\n", | |
| "2 0 bero-rrekiko-txookin\n", | |
| "2 0 bero-rrekiko-txoon\n", | |
| "2 0 bero-rrekiko-txoontzat\n", | |
| "2 0 bero-rrekiko-txootaz\n", | |
| "2 0 bero-rrekiko-txooz\n", | |
| "2 0 bero-rrekiko-txora\n", | |
| "2 0 bero-rrekiko-txoraino\n", | |
| "2 0 bero-rrekiko-txorantz\n", | |
| "2 0 bero-rrekiko-txorat\n", | |
| "2 0 bero-rrekiko-txorekin\n", | |
| "2 0 bero-rrekiko-txoren\n", | |
| "2 0 bero-rrekiko-txorendako\n", | |
| "2 0 bero-rrekiko-txorentzat\n", | |
| "2 0 bero-rrekiko-txori\n", | |
| "2 0 bero-rrekiko-txorik\n", | |
| "2 0 bero-rrekiko-txotik\n", | |
| "2 0 bero-rrekiko-txotzat\n", | |
| "2 0 bero-rrekiko-txoz\n", | |
| "2 0 bero-rrekiko-tzar\n", | |
| "2 0 bero-rrekiko-tzargatik\n", | |
| "2 0 bero-rrekiko-tzarra\n", | |
| "2 0 bero-rrekiko-tzarrak\n", | |
| "2 0 bero-rrekiko-tzarrarekin\n", | |
| "2 0 bero-rrekiko-tzarraren\n", | |
| "2 0 bero-rrekiko-tzarrarendako\n", | |
| "2 0 bero-rrekiko-tzarrarentzat\n", | |
| "2 0 bero-rrekiko-tzarrari\n", | |
| "2 0 bero-rrekiko-tzarraz\n", | |
| "2 0 bero-rrekiko-tzarrean\n", | |
| "2 0 bero-rrekiko-tzarrei\n", | |
| "2 0 bero-rrekiko-tzarrek\n", | |
| "2 0 bero-rrekiko-tzarrekin\n", | |
| "2 0 bero-rrekiko-tzarreko\n", | |
| "2 0 bero-rrekiko-tzarren\n", | |
| "2 0 bero-rrekiko-tzarrendako\n", | |
| "2 0 bero-rrekiko-tzarrentzat\n", | |
| "2 0 bero-rrekiko-tzarrera\n", | |
| "2 0 bero-rrekiko-tzarreraino\n", | |
| "2 0 bero-rrekiko-tzarrerantz\n", | |
| "2 0 bero-rrekiko-tzarrerat\n", | |
| "2 0 bero-rrekiko-tzarretik\n", | |
| "2 0 bero-rrekiko-tzarrez\n", | |
| "2 0 bero-rrekiko-tzarri\n", | |
| "2 0 bero-rrekiko-tzarrik\n", | |
| "2 0 bero-rrekiko-tzarroi\n", | |
| "2 0 bero-rrekiko-tzarrok\n", | |
| "2 0 bero-rrekiko-tzarrokin\n", | |
| "2 0 bero-rrekiko-tzarron\n", | |
| "2 0 bero-rrekiko-tzarrontzat\n", | |
| "2 0 bero-rrekiko-tzarrotaz\n", | |
| "2 0 bero-rrekiko-tzarroz\n", | |
| "2 0 bero-rrekiko-tzartzat\n", | |
| "2 0 bero-rrekiko-tzat\n", | |
| "2 0 bero-rrekiko-z\n", | |
| "2 0 bero-rrekiko-zko\n", | |
| ". . rlengths[('243', 1, 0)] -> (520, 9356)\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rrekin\n", | |
| "1 0 bero-rren\n", | |
| "apply_rule('238', 1, 0)\n", | |
| "2 0 bero-rren-a\n", | |
| "2 0 bero-rren-agan\n", | |
| "2 0 bero-rren-agana\n", | |
| "2 0 bero-rren-aganaino\n", | |
| "2 0 bero-rren-aganantz\n", | |
| "2 0 bero-rren-agandik\n", | |
| "2 0 bero-rren-agatik\n", | |
| "2 0 bero-rren-ago\n", | |
| "2 0 bero-rren-agoa\n", | |
| "2 0 bero-rren-agoak\n", | |
| "2 0 bero-rren-agoan\n", | |
| "2 0 bero-rren-agoarekin\n", | |
| "2 0 bero-rren-agoaren\n", | |
| "2 0 bero-rren-agoarendako\n", | |
| "2 0 bero-rren-agoarentzat\n", | |
| "2 0 bero-rren-agoari\n", | |
| "2 0 bero-rren-agoaz\n", | |
| "2 0 bero-rren-agoei\n", | |
| "2 0 bero-rren-agoek\n", | |
| "2 0 bero-rren-agoekin\n", | |
| "2 0 bero-rren-agoen\n", | |
| "2 0 bero-rren-agoendako\n", | |
| "2 0 bero-rren-agoentzat\n", | |
| "2 0 bero-rren-agoez\n", | |
| "2 0 bero-rren-agogatik\n", | |
| "2 0 bero-rren-agok\n", | |
| "2 0 bero-rren-agoko\n", | |
| "2 0 bero-rren-agooi\n", | |
| "2 0 bero-rren-agook\n", | |
| "2 0 bero-rren-agookin\n", | |
| "2 0 bero-rren-agoon\n", | |
| "2 0 bero-rren-agoontzat\n", | |
| "2 0 bero-rren-agootaz\n", | |
| "2 0 bero-rren-agooz\n", | |
| "2 0 bero-rren-agora\n", | |
| "2 0 bero-rren-agoraino\n", | |
| "2 0 bero-rren-agorantz\n", | |
| "2 0 bero-rren-agorat\n", | |
| "2 0 bero-rren-agorekin\n", | |
| "2 0 bero-rren-agoren\n", | |
| "2 0 bero-rren-agorendako\n", | |
| "2 0 bero-rren-agorentzat\n", | |
| "2 0 bero-rren-agori\n", | |
| "2 0 bero-rren-agorik\n", | |
| "2 0 bero-rren-agotik\n", | |
| "2 0 bero-rren-agotzat\n", | |
| "2 0 bero-rren-agoz\n", | |
| "2 0 bero-rren-ak\n", | |
| "2 0 bero-rren-arekiko\n", | |
| "2 0 bero-rren-arekin\n", | |
| "2 0 bero-rren-aren\n", | |
| "2 0 bero-rren-arena\n", | |
| "2 0 bero-rren-arenak\n", | |
| "2 0 bero-rren-arenarekin\n", | |
| "2 0 bero-rren-arenarendako\n", | |
| "2 0 bero-rren-arenarentzat\n", | |
| "2 0 bero-rren-arenari\n", | |
| "2 0 bero-rren-arenaz\n", | |
| "2 0 bero-rren-arendako\n", | |
| "2 0 bero-rren-arenean\n", | |
| "2 0 bero-rren-arenei\n", | |
| "2 0 bero-rren-arenek\n", | |
| "2 0 bero-rren-arenekin\n", | |
| "2 0 bero-rren-arenendako\n", | |
| "2 0 bero-rren-arenentzat\n", | |
| "2 0 bero-rren-arenera\n", | |
| "2 0 bero-rren-areneraino\n", | |
| "2 0 bero-rren-arenerantz\n", | |
| "2 0 bero-rren-arenerat\n", | |
| "2 0 bero-rren-arenetan\n", | |
| "2 0 bero-rren-arenetara\n", | |
| "2 0 bero-rren-arenetaraino\n", | |
| "2 0 bero-rren-arenetarantz\n", | |
| "2 0 bero-rren-arenetarat\n", | |
| "2 0 bero-rren-arenetarik\n", | |
| "2 0 bero-rren-arenetatik\n", | |
| "2 0 bero-rren-arenetik\n", | |
| "2 0 bero-rren-arenez\n", | |
| "2 0 bero-rren-arengan\n", | |
| "2 0 bero-rren-arengana\n", | |
| "2 0 bero-rren-arenganaino\n", | |
| "2 0 bero-rren-arenganantz\n", | |
| "2 0 bero-rren-arengandik\n", | |
| "2 0 bero-rren-arengatik\n", | |
| "2 0 bero-rren-areni\n", | |
| "2 0 bero-rren-arenik\n", | |
| "2 0 bero-rren-arenoi\n", | |
| "2 0 bero-rren-arenok\n", | |
| "2 0 bero-rren-arenokin\n", | |
| "2 0 bero-rren-arenontzat\n", | |
| "2 0 bero-rren-arenotaz\n", | |
| "2 0 bero-rren-arenoz\n", | |
| "2 0 bero-rren-arentzako\n", | |
| "2 0 bero-rren-arentzat\n", | |
| "2 0 bero-rren-ari\n", | |
| "2 0 bero-rren-az\n", | |
| "2 0 bero-rren-ean\n", | |
| "2 0 bero-rren-egatik\n", | |
| "2 0 bero-rren-ei\n", | |
| "2 0 bero-rren-ek\n", | |
| "2 0 bero-rren-ekiko\n", | |
| "2 0 bero-rren-ekin\n", | |
| "2 0 bero-rren-eko\n", | |
| "2 0 bero-rren-ekoa\n", | |
| "2 0 bero-rren-ekoago\n", | |
| "2 0 bero-rren-ekoak\n", | |
| "2 0 bero-rren-ekoan\n", | |
| "2 0 bero-rren-ekoarekin\n", | |
| "2 0 bero-rren-ekoarendako\n", | |
| "2 0 bero-rren-ekoarentzat\n", | |
| "2 0 bero-rren-ekoari\n", | |
| "2 0 bero-rren-ekoaz\n", | |
| "2 0 bero-rren-ekoegi\n", | |
| "2 0 bero-rren-ekoei\n", | |
| "2 0 bero-rren-ekoek\n", | |
| "2 0 bero-rren-ekoekin\n", | |
| "2 0 bero-rren-ekoendako\n", | |
| "2 0 bero-rren-ekoentzat\n", | |
| "2 0 bero-rren-ekoetan\n", | |
| "2 0 bero-rren-ekoetara\n", | |
| "2 0 bero-rren-ekoetaraino\n", | |
| "2 0 bero-rren-ekoetarantz\n", | |
| "2 0 bero-rren-ekoetarat\n", | |
| "2 0 bero-rren-ekoetarik\n", | |
| "2 0 bero-rren-ekoetatik\n", | |
| "2 0 bero-rren-ekoez\n", | |
| "2 0 bero-rren-ekogatik\n", | |
| "2 0 bero-rren-ekok\n", | |
| "2 0 bero-rren-ekooi\n", | |
| "2 0 bero-rren-ekook\n", | |
| "2 0 bero-rren-ekookin\n", | |
| "2 0 bero-rren-ekoontzat\n", | |
| "2 0 bero-rren-ekootaz\n", | |
| "2 0 bero-rren-ekooz\n", | |
| "2 0 bero-rren-ekora\n", | |
| "2 0 bero-rren-ekoraino\n", | |
| "2 0 bero-rren-ekorantz\n", | |
| "2 0 bero-rren-ekorat\n", | |
| "2 0 bero-rren-ekorekin\n", | |
| "2 0 bero-rren-ekorendako\n", | |
| "2 0 bero-rren-ekorentzat\n", | |
| "2 0 bero-rren-ekori\n", | |
| "2 0 bero-rren-ekorik\n", | |
| "2 0 bero-rren-ekotan\n", | |
| "2 0 bero-rren-ekotara\n", | |
| "2 0 bero-rren-ekotaraino\n", | |
| "2 0 bero-rren-ekotarantz\n", | |
| "2 0 bero-rren-ekotarat\n", | |
| "2 0 bero-rren-ekotarik\n", | |
| "2 0 bero-rren-ekotatik\n", | |
| "2 0 bero-rren-ekotik\n", | |
| "2 0 bero-rren-ekotzat\n", | |
| "2 0 bero-rren-ekoz\n", | |
| "2 0 bero-rren-en\n", | |
| "2 0 bero-rren-ena\n", | |
| "2 0 bero-rren-enak\n", | |
| "2 0 bero-rren-enarekin\n", | |
| "2 0 bero-rren-enarendako\n", | |
| "2 0 bero-rren-enarentzat\n", | |
| "2 0 bero-rren-enari\n", | |
| "2 0 bero-rren-enaz\n", | |
| "2 0 bero-rren-endako\n", | |
| "2 0 bero-rren-enean\n", | |
| "2 0 bero-rren-enei\n", | |
| "2 0 bero-rren-enek\n", | |
| "2 0 bero-rren-enekin\n", | |
| "2 0 bero-rren-enendako\n", | |
| "2 0 bero-rren-enentzat\n", | |
| "2 0 bero-rren-enera\n", | |
| "2 0 bero-rren-eneraino\n", | |
| "2 0 bero-rren-enerantz\n", | |
| "2 0 bero-rren-enerat\n", | |
| "2 0 bero-rren-enetan\n", | |
| "2 0 bero-rren-enetara\n", | |
| "2 0 bero-rren-enetaraino\n", | |
| "2 0 bero-rren-enetarantz\n", | |
| "2 0 bero-rren-enetarat\n", | |
| "2 0 bero-rren-enetarik\n", | |
| "2 0 bero-rren-enetatik\n", | |
| "2 0 bero-rren-enetik\n", | |
| "2 0 bero-rren-enez\n", | |
| "2 0 bero-rren-engan\n", | |
| "2 0 bero-rren-engana\n", | |
| "2 0 bero-rren-enganaino\n", | |
| "2 0 bero-rren-enganantz\n", | |
| "2 0 bero-rren-engandik\n", | |
| "2 0 bero-rren-engatik\n", | |
| "2 0 bero-rren-eni\n", | |
| "2 0 bero-rren-enik\n", | |
| "2 0 bero-rren-enoi\n", | |
| "2 0 bero-rren-enok\n", | |
| "2 0 bero-rren-enokin\n", | |
| "2 0 bero-rren-enontzat\n", | |
| "2 0 bero-rren-enotaz\n", | |
| "2 0 bero-rren-enoz\n", | |
| "2 0 bero-rren-entzako\n", | |
| "2 0 bero-rren-entzat\n", | |
| "2 0 bero-rren-era\n", | |
| "2 0 bero-rren-erago\n", | |
| "2 0 bero-rren-eraino\n", | |
| "2 0 bero-rren-erainoko\n", | |
| "2 0 bero-rren-erako\n", | |
| "2 0 bero-rren-erantz\n", | |
| "2 0 bero-rren-erantzago\n", | |
| "2 0 bero-rren-erantzegi\n", | |
| "2 0 bero-rren-eranzko\n", | |
| "2 0 bero-rren-erat\n", | |
| "2 0 bero-rren-etako\n", | |
| "2 0 bero-rren-etakoa\n", | |
| "2 0 bero-rren-etakoago\n", | |
| "2 0 bero-rren-etakoak\n", | |
| "2 0 bero-rren-etakoan\n", | |
| "2 0 bero-rren-etakoarekin\n", | |
| "2 0 bero-rren-etakoarendako\n", | |
| "2 0 bero-rren-etakoarentzat\n", | |
| "2 0 bero-rren-etakoari\n", | |
| "2 0 bero-rren-etakoaz\n", | |
| "2 0 bero-rren-etakoegi\n", | |
| "2 0 bero-rren-etakoei\n", | |
| "2 0 bero-rren-etakoek\n", | |
| "2 0 bero-rren-etakoekin\n", | |
| "2 0 bero-rren-etakoendako\n", | |
| "2 0 bero-rren-etakoentzat\n", | |
| "2 0 bero-rren-etakoetan\n", | |
| "2 0 bero-rren-etakoetara\n", | |
| "2 0 bero-rren-etakoetaraino\n", | |
| "2 0 bero-rren-etakoetarantz\n", | |
| "2 0 bero-rren-etakoetarat\n", | |
| "2 0 bero-rren-etakoetarik\n", | |
| "2 0 bero-rren-etakoetatik\n", | |
| "2 0 bero-rren-etakoez\n", | |
| "2 0 bero-rren-etakogatik\n", | |
| "2 0 bero-rren-etakok\n", | |
| "2 0 bero-rren-etakooi\n", | |
| "2 0 bero-rren-etakook\n", | |
| "2 0 bero-rren-etakookin\n", | |
| "2 0 bero-rren-etakoontzat\n", | |
| "2 0 bero-rren-etakootaz\n", | |
| "2 0 bero-rren-etakooz\n", | |
| "2 0 bero-rren-etakora\n", | |
| "2 0 bero-rren-etakoraino\n", | |
| "2 0 bero-rren-etakorantz\n", | |
| "2 0 bero-rren-etakorat\n", | |
| "2 0 bero-rren-etakorekin\n", | |
| "2 0 bero-rren-etakorendako\n", | |
| "2 0 bero-rren-etakorentzat\n", | |
| "2 0 bero-rren-etakori\n", | |
| "2 0 bero-rren-etakorik\n", | |
| "2 0 bero-rren-etakotan\n", | |
| "2 0 bero-rren-etakotara\n", | |
| "2 0 bero-rren-etakotaraino\n", | |
| "2 0 bero-rren-etakotarantz\n", | |
| "2 0 bero-rren-etakotarat\n", | |
| "2 0 bero-rren-etakotarik\n", | |
| "2 0 bero-rren-etakotatik\n", | |
| "2 0 bero-rren-etakotik\n", | |
| "2 0 bero-rren-etakotzat\n", | |
| "2 0 bero-rren-etakoz\n", | |
| "2 0 bero-rren-etan\n", | |
| "2 0 bero-rren-etara\n", | |
| "2 0 bero-rren-etarago\n", | |
| "2 0 bero-rren-etaraino\n", | |
| "2 0 bero-rren-etarainoko\n", | |
| "2 0 bero-rren-etarako\n", | |
| "2 0 bero-rren-etarantz\n", | |
| "2 0 bero-rren-etarantzago\n", | |
| "2 0 bero-rren-etarantzegi\n", | |
| "2 0 bero-rren-etaranzko\n", | |
| "2 0 bero-rren-etarat\n", | |
| "2 0 bero-rren-etarik\n", | |
| "2 0 bero-rren-etariko\n", | |
| "2 0 bero-rren-etatik\n", | |
| "2 0 bero-rren-etatiko\n", | |
| "2 0 bero-rren-etik\n", | |
| "2 0 bero-rren-etiko\n", | |
| "2 0 bero-rren-ez\n", | |
| "2 0 bero-rren-ezko\n", | |
| "2 0 bero-rren-gatik\n", | |
| "2 0 bero-rren-gatiko\n", | |
| "2 0 bero-rren-i\n", | |
| "2 0 bero-rren-ik\n", | |
| "2 0 bero-rren-oi\n", | |
| "2 0 bero-rren-ok\n", | |
| "2 0 bero-rren-okiko\n", | |
| "2 0 bero-rren-okin\n", | |
| "2 0 bero-rren-on\n", | |
| "2 0 bero-rren-ona\n", | |
| "2 0 bero-rren-onak\n", | |
| "2 0 bero-rren-onarekin\n", | |
| "2 0 bero-rren-onarendako\n", | |
| "2 0 bero-rren-onarentzat\n", | |
| "2 0 bero-rren-onari\n", | |
| "2 0 bero-rren-onaz\n", | |
| "2 0 bero-rren-onean\n", | |
| "2 0 bero-rren-onei\n", | |
| "2 0 bero-rren-onek\n", | |
| "2 0 bero-rren-onekin\n", | |
| "2 0 bero-rren-onendako\n", | |
| "2 0 bero-rren-onentzat\n", | |
| "2 0 bero-rren-onera\n", | |
| "2 0 bero-rren-oneraino\n", | |
| "2 0 bero-rren-onerantz\n", | |
| "2 0 bero-rren-onerat\n", | |
| "2 0 bero-rren-onetik\n", | |
| "2 0 bero-rren-onez\n", | |
| "2 0 bero-rren-ongan\n", | |
| "2 0 bero-rren-ongana\n", | |
| "2 0 bero-rren-onganaino\n", | |
| "2 0 bero-rren-onganantz\n", | |
| "2 0 bero-rren-ongandik\n", | |
| "2 0 bero-rren-ongatik\n", | |
| "2 0 bero-rren-oni\n", | |
| "2 0 bero-rren-onik\n", | |
| "2 0 bero-rren-onoi\n", | |
| "2 0 bero-rren-onok\n", | |
| "2 0 bero-rren-onokin\n", | |
| "2 0 bero-rren-onontzat\n", | |
| "2 0 bero-rren-onotaz\n", | |
| "2 0 bero-rren-onoz\n", | |
| "2 0 bero-rren-ontzako\n", | |
| "2 0 bero-rren-ontzat\n", | |
| "2 0 bero-rren-otako\n", | |
| "2 0 bero-rren-otan\n", | |
| "2 0 bero-rren-otara\n", | |
| "2 0 bero-rren-otaraino\n", | |
| "2 0 bero-rren-otarantz\n", | |
| "2 0 bero-rren-otarat\n", | |
| "2 0 bero-rren-otarik\n", | |
| "2 0 bero-rren-otatik\n", | |
| "2 0 bero-rren-otaz\n", | |
| "2 0 bero-rren-oz\n", | |
| "2 0 bero-rren-txo\n", | |
| "2 0 bero-rren-txoa\n", | |
| "2 0 bero-rren-txoak\n", | |
| "2 0 bero-rren-txoan\n", | |
| "2 0 bero-rren-txoarekin\n", | |
| "2 0 bero-rren-txoaren\n", | |
| "2 0 bero-rren-txoarendako\n", | |
| "2 0 bero-rren-txoarentzat\n", | |
| "2 0 bero-rren-txoari\n", | |
| "2 0 bero-rren-txoaz\n", | |
| "2 0 bero-rren-txoei\n", | |
| "2 0 bero-rren-txoek\n", | |
| "2 0 bero-rren-txoekin\n", | |
| "2 0 bero-rren-txoen\n", | |
| "2 0 bero-rren-txoendako\n", | |
| "2 0 bero-rren-txoentzat\n", | |
| "2 0 bero-rren-txoez\n", | |
| "2 0 bero-rren-txogatik\n", | |
| "2 0 bero-rren-txok\n", | |
| "2 0 bero-rren-txoko\n", | |
| "2 0 bero-rren-txooi\n", | |
| "2 0 bero-rren-txook\n", | |
| "2 0 bero-rren-txookin\n", | |
| "2 0 bero-rren-txoon\n", | |
| "2 0 bero-rren-txoontzat\n", | |
| "2 0 bero-rren-txootaz\n", | |
| "2 0 bero-rren-txooz\n", | |
| "2 0 bero-rren-txora\n", | |
| "2 0 bero-rren-txoraino\n", | |
| "2 0 bero-rren-txorantz\n", | |
| "2 0 bero-rren-txorat\n", | |
| "2 0 bero-rren-txorekin\n", | |
| "2 0 bero-rren-txoren\n", | |
| "2 0 bero-rren-txorendako\n", | |
| "2 0 bero-rren-txorentzat\n", | |
| "2 0 bero-rren-txori\n", | |
| "2 0 bero-rren-txorik\n", | |
| "2 0 bero-rren-txotik\n", | |
| "2 0 bero-rren-txotzat\n", | |
| "2 0 bero-rren-txoz\n", | |
| "2 0 bero-rren-tzar\n", | |
| "2 0 bero-rren-tzargatik\n", | |
| "2 0 bero-rren-tzarra\n", | |
| "2 0 bero-rren-tzarrak\n", | |
| "2 0 bero-rren-tzarrarekin\n", | |
| "2 0 bero-rren-tzarraren\n", | |
| "2 0 bero-rren-tzarrarendako\n", | |
| "2 0 bero-rren-tzarrarentzat\n", | |
| "2 0 bero-rren-tzarrari\n", | |
| "2 0 bero-rren-tzarraz\n", | |
| "2 0 bero-rren-tzarrean\n", | |
| "2 0 bero-rren-tzarrei\n", | |
| "2 0 bero-rren-tzarrek\n", | |
| "2 0 bero-rren-tzarrekin\n", | |
| "2 0 bero-rren-tzarreko\n", | |
| "2 0 bero-rren-tzarren\n", | |
| "2 0 bero-rren-tzarrendako\n", | |
| "2 0 bero-rren-tzarrentzat\n", | |
| "2 0 bero-rren-tzarrera\n", | |
| "2 0 bero-rren-tzarreraino\n", | |
| "2 0 bero-rren-tzarrerantz\n", | |
| "2 0 bero-rren-tzarrerat\n", | |
| "2 0 bero-rren-tzarretik\n", | |
| "2 0 bero-rren-tzarrez\n", | |
| "2 0 bero-rren-tzarri\n", | |
| "2 0 bero-rren-tzarrik\n", | |
| "2 0 bero-rren-tzarroi\n", | |
| "2 0 bero-rren-tzarrok\n", | |
| "2 0 bero-rren-tzarrokin\n", | |
| "2 0 bero-rren-tzarron\n", | |
| "2 0 bero-rren-tzarrontzat\n", | |
| "2 0 bero-rren-tzarrotaz\n", | |
| "2 0 bero-rren-tzarroz\n", | |
| "2 0 bero-rren-tzartzat\n", | |
| "2 0 bero-rren-tzat\n", | |
| ". . rlengths[('238', 1, 0)] -> (406, 6099)\n", | |
| "/238 -> (406, 6099)\n", | |
| "1 0 bero-rrena\n", | |
| "1 0 bero-rrenak\n", | |
| "1 0 bero-rrenarekin\n", | |
| "1 0 bero-rrenarendako\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rrenarentzat\n", | |
| "1 0 bero-rrenari\n", | |
| "1 0 bero-rrenaz\n", | |
| "1 0 bero-rrendako\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rrenean\n", | |
| "1 0 bero-rrenei\n", | |
| "1 0 bero-rrenek\n", | |
| "1 0 bero-rrenekin\n", | |
| "1 0 bero-rrenendako\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rrenentzat\n", | |
| "1 0 bero-rrenera\n", | |
| "1 0 bero-rreneraino\n", | |
| "1 0 bero-rrenerantz\n", | |
| "1 0 bero-rrenerat\n", | |
| "1 0 bero-rrenetan\n", | |
| "1 0 bero-rrenetara\n", | |
| "1 0 bero-rrenetaraino\n", | |
| "1 0 bero-rrenetarantz\n", | |
| "1 0 bero-rrenetarat\n", | |
| "1 0 bero-rrenetarik\n", | |
| "1 0 bero-rrenetatik\n", | |
| "1 0 bero-rrenetik\n", | |
| "1 0 bero-rrenez\n", | |
| "1 0 bero-rrengan\n", | |
| "1 0 bero-rrengana\n", | |
| "1 0 bero-rrenganaino\n", | |
| "1 0 bero-rrenganantz\n", | |
| "1 0 bero-rrengandik\n", | |
| "1 0 bero-rrengatik\n", | |
| "1 0 bero-rreni\n", | |
| "1 0 bero-rrenik\n", | |
| "1 0 bero-rrenoi\n", | |
| "1 0 bero-rrenok\n", | |
| "1 0 bero-rrenokin\n", | |
| "1 0 bero-rrenontzat\n", | |
| "1 0 bero-rrenotaz\n", | |
| "1 0 bero-rrenoz\n", | |
| "1 0 bero-rrentzako\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rrentzat\n", | |
| "1 0 bero-rretako\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rretakoa\n", | |
| "1 0 bero-rretakoago\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rretakoak\n", | |
| "1 0 bero-rretakoan\n", | |
| "1 0 bero-rretakoarekin\n", | |
| "1 0 bero-rretakoarendako\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rretakoarentzat\n", | |
| "1 0 bero-rretakoari\n", | |
| "1 0 bero-rretakoaz\n", | |
| "1 0 bero-rretakoegi\n", | |
| "1 0 bero-rretakoei\n", | |
| "1 0 bero-rretakoek\n", | |
| "1 0 bero-rretakoekin\n", | |
| "1 0 bero-rretakoendako\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rretakoentzat\n", | |
| "1 0 bero-rretakoetan\n", | |
| "1 0 bero-rretakoetara\n", | |
| "1 0 bero-rretakoetaraino\n", | |
| "1 0 bero-rretakoetarantz\n", | |
| "1 0 bero-rretakoetarat\n", | |
| "1 0 bero-rretakoetarik\n", | |
| "1 0 bero-rretakoetatik\n", | |
| "1 0 bero-rretakoez\n", | |
| "1 0 bero-rretakogatik\n", | |
| "1 0 bero-rretakok\n", | |
| "1 0 bero-rretakooi\n", | |
| "1 0 bero-rretakook\n", | |
| "1 0 bero-rretakookin\n", | |
| "1 0 bero-rretakoontzat\n", | |
| "1 0 bero-rretakootaz\n", | |
| "1 0 bero-rretakooz\n", | |
| "1 0 bero-rretakora\n", | |
| "1 0 bero-rretakoraino\n", | |
| "1 0 bero-rretakorantz\n", | |
| "1 0 bero-rretakorat\n", | |
| "1 0 bero-rretakorekin\n", | |
| "1 0 bero-rretakorendako\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rretakorentzat\n", | |
| "1 0 bero-rretakori\n", | |
| "1 0 bero-rretakorik\n", | |
| "1 0 bero-rretakotan\n", | |
| "1 0 bero-rretakotara\n", | |
| "1 0 bero-rretakotaraino\n", | |
| "1 0 bero-rretakotarantz\n", | |
| "1 0 bero-rretakotarat\n", | |
| "1 0 bero-rretakotarik\n", | |
| "1 0 bero-rretakotatik\n", | |
| "1 0 bero-rretakotik\n", | |
| "1 0 bero-rretakotzat\n", | |
| "1 0 bero-rretakoz\n", | |
| "1 0 bero-rretan\n", | |
| "1 0 bero-rretara\n", | |
| "1 0 bero-rretarago\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rretaraino\n", | |
| "1 0 bero-rretarainoko\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rretarako\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rretarantz\n", | |
| "1 0 bero-rretarantzago\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rretarantzegi\n", | |
| "1 0 bero-rretaranzko\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rretarat\n", | |
| "1 0 bero-rretarik\n", | |
| "1 0 bero-rretariko\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rretatik\n", | |
| "1 0 bero-rretatiko\n", | |
| "/243 -> (520, 9356)\n", | |
| "1 0 bero-rretaz\n", | |
| "1 0 bero-rrez\n", | |
| "1 0 bero-rrezaz\n", | |
| ". . rlengths[('358', 0, 0)] -> (9363, 166736)\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(9363, 166736)" | |
| ] | |
| }, | |
| "execution_count": 271, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "word = list(dk for dk in dic.keys() if '358' in dk[1:])[0][:1]\n", | |
| "print('0 0 {}'.format('-'.join(word)))\n", | |
| "apply_rule('358', disp=True, word=word)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "and now let's use `apply_rule` to try to decide how big the dictionary would be if we expanded all of the rules:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 460, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-17T00:56:22.124226Z", | |
| "start_time": "2017-11-17T00:56:21.279549Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "5,757,832,836 words, 123.9 Gb\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "nwords, nchars = 0, 0\n", | |
| "for lineparts in dic:\n", | |
| " nwords += 1\n", | |
| " word = lineparts[0]\n", | |
| " l_word = len(word)\n", | |
| " nchars += l_word\n", | |
| " for rc in lineparts[1:]:\n", | |
| " to_add = apply_rule(rc, word=(word,))\n", | |
| " nwords += to_add[0]\n", | |
| " nchars += to_add[0] * l_word + to_add[1]\n", | |
| "\n", | |
| "print('{:,} words, {:.1f} Gb'.format(nwords, nchars / 1024**3))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "quite big 😲" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 511, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-17T12:53:41.065118Z", | |
| "start_time": "2017-11-17T12:53:41.053504Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "~ 43,219 constructed words per .dic entry\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "wpe = nwords / len(dic)\n", | |
| "print('~ {:,.0f} constructed words per .dic entry'.format(wpe))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 494, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-17T01:04:35.485601Z", | |
| "start_time": "2017-11-17T01:04:35.469838Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "~ 23.1 chars per constructed word\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "avg_wlength = nchars / nwords\n", | |
| "print('~ {:.3g} chars per constructed word'.format(avg_wlength))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "let's see which rules chain (i.e. have been applied to words which already have an affix):" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 309, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-16T20:40:08.205170Z", | |
| "start_time": "2017-11-16T20:40:08.174143Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{('238', 1, 0): 406, ('243', 1, 0): 520}" | |
| ] | |
| }, | |
| "execution_count": 309, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "{rh: nw for rh, (nw, chars) in rlengths.items() if (rh[1] or rh[2]) and nw}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "so, only rules 238 and 243 can be applied to words which already have an affix." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## look for similar rules" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 381, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-17T00:06:53.980527Z", | |
| "start_time": "2017-11-17T00:06:53.934844Z" | |
| }, | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def hashrule(rcode):\n", | |
| " \"\"\"return a string representation of a rule, with sorted variants\"\"\"\n", | |
| " return '\\n'.join(' '.join(rk[2:]) for rk in sorted(rules.get(str(rcode), {}).keys()))\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "let's check for similar rules, using difflib. This will take quite some time:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 504, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-17T12:46:24.398956Z", | |
| "start_time": "2017-11-17T11:51:12.363263Z" | |
| }, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1\n", | |
| " 18: 100%\n", | |
| " 29: 99.9507%\n", | |
| " 27: 99.9507%\n", | |
| "2\n", | |
| " 28: 99.9194%\n", | |
| " 13: 99.6229%\n", | |
| " 22: 99.6227%\n", | |
| "3\n", | |
| "4\n", | |
| " 8: 99.6655%\n", | |
| "5\n", | |
| " 9: 99.7228%\n", | |
| "6\n", | |
| " 10: 99.7228%\n", | |
| "7\n", | |
| " 47: 99.9676%\n", | |
| " 48: 99.9073%\n", | |
| "8\n", | |
| "9\n", | |
| "10\n", | |
| "11\n", | |
| " 22: 100%\n", | |
| " 21: 100%\n", | |
| " 13: 99.9549%\n", | |
| "12\n", | |
| "13\n", | |
| " 22: 99.9549%\n", | |
| " 21: 99.9549%\n", | |
| " 11: 99.9549%\n", | |
| "14\n", | |
| "15\n", | |
| "16\n", | |
| "17\n", | |
| "18\n", | |
| " 1: 100%\n", | |
| " 29: 99.9507%\n", | |
| " 27: 99.9507%\n", | |
| "19\n", | |
| " 68: 99.2916%\n", | |
| " 66: 99.2916%\n", | |
| " 65: 99.2916%\n", | |
| "20\n", | |
| "21\n", | |
| " 22: 100%\n", | |
| " 11: 100%\n", | |
| " 13: 99.9549%\n", | |
| "22\n", | |
| " 21: 100%\n", | |
| " 11: 100%\n", | |
| " 13: 99.9549%\n", | |
| "23\n", | |
| " 29: 100%\n", | |
| " 27: 100%\n", | |
| " 18: 99.9507%\n", | |
| "24\n", | |
| " 13: 99.6497%\n", | |
| " 22: 99.6496%\n", | |
| " 21: 99.6496%\n", | |
| "25\n", | |
| " 30: 99.8205%\n", | |
| "26\n", | |
| "27\n", | |
| " 29: 100%\n", | |
| " 23: 100%\n", | |
| " 18: 99.9507%\n", | |
| "28\n", | |
| " 2: 99.9194%\n", | |
| " 13: 99.7035%\n", | |
| " 22: 99.7033%\n", | |
| "29\n", | |
| " 27: 100%\n", | |
| " 23: 100%\n", | |
| " 18: 99.9507%\n", | |
| "30\n", | |
| " 25: 99.8205%\n", | |
| "31\n", | |
| "32\n", | |
| "33\n", | |
| "34\n", | |
| " 35: 100%\n", | |
| " 49: 99.5953%\n", | |
| "35\n", | |
| " 34: 100%\n", | |
| " 49: 99.5953%\n", | |
| "36\n", | |
| "37\n", | |
| "38\n", | |
| " 36: 99.3461%\n", | |
| "39\n", | |
| " 37: 99.4115%\n", | |
| "40\n", | |
| "41\n", | |
| " 345: 85.2934%\n", | |
| "42\n", | |
| "43\n", | |
| "44\n", | |
| "45\n", | |
| "46\n", | |
| "47\n", | |
| " 7: 99.9676%\n", | |
| " 48: 99.9397%\n", | |
| "48\n", | |
| " 47: 99.9397%\n", | |
| " 7: 99.9073%\n", | |
| "49\n", | |
| "50\n", | |
| "51\n", | |
| " 355: 100%\n", | |
| " 354: 100%\n", | |
| "53\n", | |
| " 55: 100%\n", | |
| " 54: 100%\n", | |
| "54\n", | |
| " 55: 100%\n", | |
| " 53: 100%\n", | |
| "55\n", | |
| " 54: 100%\n", | |
| " 53: 100%\n", | |
| "56\n", | |
| "57\n", | |
| " 327: 90.8714%\n", | |
| " 326: 90.8714%\n", | |
| " 325: 90.8714%\n", | |
| "58\n", | |
| " 60: 90.3279%\n", | |
| " 227: 83.6715%\n", | |
| "59\n", | |
| " 64: 91.3816%\n", | |
| " 69: 89.6601%\n", | |
| " 67: 89.6601%\n", | |
| "60\n", | |
| " 227: 94.491%\n", | |
| " 58: 90.3279%\n", | |
| " 233: 85.0395%\n", | |
| "61\n", | |
| "62\n", | |
| " 69: 90.7178%\n", | |
| " 67: 90.7178%\n", | |
| " 63: 90.7178%\n", | |
| "63\n", | |
| " 69: 100%\n", | |
| " 67: 100%\n", | |
| " 231: 94.5827%\n", | |
| "64\n", | |
| " 59: 91.3816%\n", | |
| " 69: 88.8471%\n", | |
| " 67: 88.8471%\n", | |
| "65\n", | |
| " 68: 100%\n", | |
| " 66: 100%\n", | |
| " 19: 99.2916%\n", | |
| "66\n", | |
| " 68: 100%\n", | |
| " 65: 100%\n", | |
| " 19: 99.2916%\n", | |
| "67\n", | |
| " 69: 100%\n", | |
| " 63: 100%\n", | |
| " 231: 94.5827%\n", | |
| "68\n", | |
| " 66: 100%\n", | |
| " 65: 100%\n", | |
| " 19: 99.2916%\n", | |
| "69\n", | |
| " 67: 100%\n", | |
| " 63: 100%\n", | |
| " 231: 94.5827%\n", | |
| "70\n", | |
| " 69: 88.2988%\n", | |
| " 67: 88.2988%\n", | |
| " 63: 88.2988%\n", | |
| "71\n", | |
| " 59: 87.7655%\n", | |
| " 246: 85.2364%\n", | |
| " 64: 83.1837%\n", | |
| "72\n", | |
| " 69: 88.2988%\n", | |
| " 67: 88.2988%\n", | |
| " 63: 88.2988%\n", | |
| "73\n", | |
| "75\n", | |
| " 78: 96%\n", | |
| " 77: 96%\n", | |
| " 76: 96%\n", | |
| "76\n", | |
| " 78: 100%\n", | |
| " 77: 100%\n", | |
| " 75: 96%\n", | |
| "77\n", | |
| " 78: 100%\n", | |
| " 76: 100%\n", | |
| " 75: 96%\n", | |
| "78\n", | |
| " 77: 100%\n", | |
| " 76: 100%\n", | |
| " 75: 96%\n", | |
| "79\n", | |
| " 263: 92.6471%\n", | |
| " 262: 92.6471%\n", | |
| " 261: 92.6471%\n", | |
| "80\n", | |
| "81\n", | |
| " 92: 95.5882%\n", | |
| " 91: 95.5882%\n", | |
| " 89: 95.5882%\n", | |
| "82\n", | |
| " 92: 85.9375%\n", | |
| " 91: 85.9375%\n", | |
| " 89: 85.9375%\n", | |
| "83\n", | |
| " 90: 99.3007%\n", | |
| " 92: 95.5882%\n", | |
| " 91: 95.5882%\n", | |
| "84\n", | |
| "85\n", | |
| " 92: 100%\n", | |
| " 91: 100%\n", | |
| " 89: 100%\n", | |
| "86\n", | |
| " 92: 100%\n", | |
| " 91: 100%\n", | |
| " 89: 100%\n", | |
| "87\n", | |
| " 92: 100%\n", | |
| " 91: 100%\n", | |
| " 89: 100%\n", | |
| "88\n", | |
| " 92: 100%\n", | |
| " 91: 100%\n", | |
| " 89: 100%\n", | |
| "89\n", | |
| " 92: 100%\n", | |
| " 91: 100%\n", | |
| " 88: 100%\n", | |
| "90\n", | |
| " 83: 99.3007%\n", | |
| " 92: 94.8905%\n", | |
| " 91: 94.8905%\n", | |
| "91\n", | |
| " 92: 100%\n", | |
| " 89: 100%\n", | |
| " 88: 100%\n", | |
| "92\n", | |
| " 91: 100%\n", | |
| " 89: 100%\n", | |
| " 88: 100%\n", | |
| "93\n", | |
| " 263: 99.4924%\n", | |
| " 262: 99.4924%\n", | |
| " 261: 99.4924%\n", | |
| "94\n", | |
| " 263: 99.4083%\n", | |
| " 262: 99.4083%\n", | |
| " 261: 99.4083%\n", | |
| "95\n", | |
| " 265: 94.7036%\n", | |
| "96\n", | |
| " 185: 98.1937%\n", | |
| " 350: 97.0473%\n", | |
| " 184: 97.046%\n", | |
| "97\n", | |
| " 357: 100%\n", | |
| "98\n", | |
| "99\n", | |
| " 224: 96.5574%\n", | |
| "100\n", | |
| "101\n", | |
| " 107: 90.9091%\n", | |
| " 106: 90.9091%\n", | |
| " 104: 90.9091%\n", | |
| "102\n", | |
| "103\n", | |
| " 109: 95.2381%\n", | |
| " 108: 95.2381%\n", | |
| " 105: 95.2381%\n", | |
| "104\n", | |
| " 107: 100%\n", | |
| " 106: 100%\n", | |
| " 101: 90.9091%\n", | |
| "105\n", | |
| " 109: 100%\n", | |
| " 108: 100%\n", | |
| " 103: 95.2381%\n", | |
| "106\n", | |
| " 107: 100%\n", | |
| " 104: 100%\n", | |
| " 101: 90.9091%\n", | |
| "107\n", | |
| " 106: 100%\n", | |
| " 104: 100%\n", | |
| " 101: 90.9091%\n", | |
| "108\n", | |
| " 109: 100%\n", | |
| " 105: 100%\n", | |
| " 103: 95.2381%\n", | |
| "109\n", | |
| " 108: 100%\n", | |
| " 105: 100%\n", | |
| " 103: 95.2381%\n", | |
| "110\n", | |
| " 111: 100%\n", | |
| "111\n", | |
| " 110: 100%\n", | |
| "112\n", | |
| " 135: 90.1322%\n", | |
| " 132: 82.3011%\n", | |
| " 130: 82.3011%\n", | |
| "113\n", | |
| " 114: 93.7613%\n", | |
| "114\n", | |
| " 113: 93.7613%\n", | |
| " 137: 84.8217%\n", | |
| " 129: 80.9109%\n", | |
| "115\n", | |
| " 140: 93.5969%\n", | |
| "117\n", | |
| " 120: 100%\n", | |
| " 344: 95.9811%\n", | |
| " 343: 95.9811%\n", | |
| "119\n", | |
| "120\n", | |
| " 117: 100%\n", | |
| " 344: 95.9811%\n", | |
| " 343: 95.9811%\n", | |
| "121\n", | |
| " 154: 99.5544%\n", | |
| " 307: 96.6601%\n", | |
| " 311: 96.1842%\n", | |
| "122\n", | |
| " 129: 100%\n", | |
| " 128: 100%\n", | |
| " 137: 88.8496%\n", | |
| "123\n", | |
| " 186: 82.6018%\n", | |
| "124\n", | |
| "125\n", | |
| " 132: 99.782%\n", | |
| " 130: 99.782%\n", | |
| " 126: 99.782%\n", | |
| "126\n", | |
| " 132: 100%\n", | |
| " 130: 100%\n", | |
| " 125: 99.782%\n", | |
| "127\n", | |
| " 134: 99.7543%\n", | |
| " 133: 99.7543%\n", | |
| " 132: 99.7408%\n", | |
| "128\n", | |
| " 129: 100%\n", | |
| " 122: 100%\n", | |
| " 137: 88.8496%\n", | |
| "129\n", | |
| " 128: 100%\n", | |
| " 122: 100%\n", | |
| " 137: 88.8496%\n", | |
| "130\n", | |
| " 132: 100%\n", | |
| " 126: 100%\n", | |
| " 125: 99.782%\n", | |
| "131\n", | |
| " 129: 99.347%\n", | |
| " 128: 99.347%\n", | |
| " 122: 99.347%\n", | |
| "132\n", | |
| " 130: 100%\n", | |
| " 126: 100%\n", | |
| " 125: 99.782%\n", | |
| "133\n", | |
| " 127: 99.7543%\n", | |
| " 134: 99.7271%\n", | |
| " 132: 99.6319%\n", | |
| "134\n", | |
| " 127: 99.7543%\n", | |
| " 133: 99.7271%\n", | |
| " 132: 99.6319%\n", | |
| "135\n", | |
| " 112: 90.1322%\n", | |
| "136\n", | |
| " 114: 92.4336%\n", | |
| " 113: 87.3561%\n", | |
| " 137: 81.6458%\n", | |
| "137\n", | |
| "138\n", | |
| "139\n", | |
| " 141: 99.6754%\n", | |
| "140\n", | |
| "141\n", | |
| " 139: 99.6754%\n", | |
| "142\n", | |
| "143\n", | |
| "144\n", | |
| " 338: 88.8314%\n", | |
| " 307: 81.1396%\n", | |
| " 311: 80.8924%\n", | |
| "145\n", | |
| "146\n", | |
| " 307: 80.9308%\n", | |
| " 311: 80.5698%\n", | |
| " 153: 80.5698%\n", | |
| "147\n", | |
| " 146: 80.2244%\n", | |
| "148\n", | |
| " 258: 85.921%\n", | |
| " 257: 85.921%\n", | |
| " 254: 85.921%\n", | |
| "149\n", | |
| " 93: 87.881%\n", | |
| " 263: 87.8267%\n", | |
| " 262: 87.8267%\n", | |
| "150\n", | |
| " 311: 100%\n", | |
| " 153: 100%\n", | |
| " 152: 100%\n", | |
| "151\n", | |
| " 341: 92.3166%\n", | |
| "152\n", | |
| " 311: 100%\n", | |
| " 153: 100%\n", | |
| " 150: 100%\n", | |
| "153\n", | |
| " 311: 100%\n", | |
| " 152: 100%\n", | |
| " 150: 100%\n", | |
| "154\n", | |
| " 121: 99.5544%\n", | |
| " 307: 96.8421%\n", | |
| " 311: 96.6292%\n", | |
| "155\n", | |
| " 161: 98.8095%\n", | |
| " 162: 98.8077%\n", | |
| " 167: 97.619%\n", | |
| "156\n", | |
| " 166: 95.6036%\n", | |
| " 165: 95.6036%\n", | |
| " 164: 95.6036%\n", | |
| "157\n", | |
| " 166: 100%\n", | |
| " 165: 100%\n", | |
| " 164: 100%\n", | |
| "158\n", | |
| " 156: 94.3711%\n", | |
| " 166: 93.2692%\n", | |
| " 165: 93.2692%\n", | |
| "159\n", | |
| " 166: 100%\n", | |
| " 165: 100%\n", | |
| " 164: 100%\n", | |
| "160\n", | |
| " 162: 97.7909%\n", | |
| " 167: 97.6471%\n", | |
| " 170: 97.2182%\n", | |
| "161\n", | |
| " 162: 99.1163%\n", | |
| " 155: 98.8095%\n", | |
| " 167: 98.0882%\n", | |
| "162\n", | |
| " 167: 98.0854%\n", | |
| " 160: 97.7909%\n", | |
| " 170: 97.3607%\n", | |
| "163\n", | |
| " 166: 100%\n", | |
| " 165: 100%\n", | |
| " 164: 100%\n", | |
| "164\n", | |
| " 166: 100%\n", | |
| " 165: 100%\n", | |
| " 163: 100%\n", | |
| "165\n", | |
| " 166: 100%\n", | |
| " 164: 100%\n", | |
| " 163: 100%\n", | |
| "166\n", | |
| " 165: 100%\n", | |
| " 164: 100%\n", | |
| " 163: 100%\n", | |
| "167\n", | |
| " 162: 98.0854%\n", | |
| " 160: 97.6471%\n", | |
| " 170: 97.2182%\n", | |
| "168\n", | |
| " 162: 97.3607%\n", | |
| " 167: 97.2182%\n", | |
| " 160: 97.2182%\n", | |
| "169\n", | |
| " 162: 97.3607%\n", | |
| " 167: 97.2182%\n", | |
| " 160: 97.2182%\n", | |
| "170\n", | |
| " 162: 97.3607%\n", | |
| " 167: 97.2182%\n", | |
| " 160: 97.2182%\n", | |
| "171\n", | |
| " 173: 100%\n", | |
| "172\n", | |
| " 175: 100%\n", | |
| " 178: 99.5902%\n", | |
| " 190: 87.696%\n", | |
| "173\n", | |
| " 171: 100%\n", | |
| "174\n", | |
| " 181: 100%\n", | |
| " 177: 100%\n", | |
| " 176: 100%\n", | |
| "175\n", | |
| " 172: 100%\n", | |
| " 178: 99.5902%\n", | |
| " 190: 87.696%\n", | |
| "176\n", | |
| " 181: 100%\n", | |
| " 177: 100%\n", | |
| " 174: 100%\n", | |
| "177\n", | |
| " 181: 100%\n", | |
| " 176: 100%\n", | |
| " 174: 100%\n", | |
| "178\n", | |
| " 175: 99.5902%\n", | |
| " 172: 99.5902%\n", | |
| " 190: 87.5452%\n", | |
| "179\n", | |
| " 219: 98.2355%\n", | |
| " 222: 95.614%\n", | |
| " 205: 83.2604%\n", | |
| "180\n", | |
| " 226: 97.0741%\n", | |
| " 221: 97.0741%\n", | |
| "181\n", | |
| " 177: 100%\n", | |
| " 176: 100%\n", | |
| " 174: 100%\n", | |
| "182\n", | |
| " 175: 89.7852%\n", | |
| " 172: 89.7852%\n", | |
| " 178: 89.7065%\n", | |
| "183\n", | |
| " 184: 100%\n", | |
| "184\n", | |
| " 183: 100%\n", | |
| "185\n", | |
| " 181: 95.502%\n", | |
| " 177: 95.502%\n", | |
| " 176: 95.502%\n", | |
| "186\n", | |
| "187\n", | |
| "188\n", | |
| "189\n", | |
| " 192: 83.8299%\n", | |
| "190\n", | |
| " 175: 87.696%\n", | |
| " 172: 87.696%\n", | |
| " 178: 87.5452%\n", | |
| "191\n", | |
| "192\n", | |
| "193\n", | |
| " 205: 100%\n", | |
| " 204: 100%\n", | |
| " 203: 100%\n", | |
| "194\n", | |
| " 205: 100%\n", | |
| " 204: 100%\n", | |
| " 203: 100%\n", | |
| "195\n", | |
| "196\n", | |
| "197\n", | |
| " 205: 100%\n", | |
| " 204: 100%\n", | |
| " 203: 100%\n", | |
| "198\n", | |
| " 205: 100%\n", | |
| " 204: 100%\n", | |
| " 203: 100%\n", | |
| "199\n", | |
| " 205: 100%\n", | |
| " 204: 100%\n", | |
| " 203: 100%\n", | |
| "200\n", | |
| " 205: 100%\n", | |
| " 204: 100%\n", | |
| " 203: 100%\n", | |
| "201\n", | |
| " 205: 100%\n", | |
| " 204: 100%\n", | |
| " 203: 100%\n", | |
| "202\n", | |
| " 205: 100%\n", | |
| " 204: 100%\n", | |
| " 203: 100%\n", | |
| "203\n", | |
| " 205: 100%\n", | |
| " 204: 100%\n", | |
| " 202: 100%\n", | |
| "204\n", | |
| " 205: 100%\n", | |
| " 203: 100%\n", | |
| " 202: 100%\n", | |
| "205\n", | |
| " 204: 100%\n", | |
| " 203: 100%\n", | |
| " 202: 100%\n", | |
| "206\n", | |
| "207\n", | |
| " 208: 100%\n", | |
| "208\n", | |
| " 207: 100%\n", | |
| "209\n", | |
| "210\n", | |
| " 209: 99.2492%\n", | |
| "211\n", | |
| "213\n", | |
| "214\n", | |
| "215\n", | |
| "216\n", | |
| "217\n", | |
| " 226: 93.5162%\n", | |
| " 221: 93.5162%\n", | |
| " 180: 92.3786%\n", | |
| "218\n", | |
| " 219: 99.9675%\n", | |
| " 222: 97.141%\n", | |
| " 205: 84.0532%\n", | |
| "219\n", | |
| " 222: 97.1719%\n", | |
| " 205: 84.1318%\n", | |
| " 204: 84.1318%\n", | |
| "220\n", | |
| " 225: 100%\n", | |
| "221\n", | |
| " 226: 100%\n", | |
| "222\n", | |
| " 219: 97.1719%\n", | |
| " 205: 82.0944%\n", | |
| " 204: 82.0944%\n", | |
| "223\n", | |
| " 330: 93.6906%\n", | |
| " 333: 91.4768%\n", | |
| " 155: 88.6345%\n", | |
| "224\n", | |
| " 99: 96.5574%\n", | |
| "225\n", | |
| " 220: 100%\n", | |
| "226\n", | |
| " 221: 100%\n", | |
| "227\n", | |
| " 233: 87.2215%\n", | |
| " 320: 86.5241%\n", | |
| "228\n", | |
| "229\n", | |
| " 230: 100%\n", | |
| " 232: 86.6614%\n", | |
| " 231: 84.7613%\n", | |
| "230\n", | |
| " 229: 100%\n", | |
| " 232: 86.6614%\n", | |
| " 231: 84.7613%\n", | |
| "231\n", | |
| " 228: 91.7589%\n", | |
| " 328: 86.7244%\n", | |
| " 323: 86.7244%\n", | |
| "232\n", | |
| " 328: 86.4274%\n", | |
| " 323: 86.4274%\n", | |
| " 244: 86.4274%\n", | |
| "233\n", | |
| " 320: 99.3742%\n", | |
| "234\n", | |
| " 321: 100%\n", | |
| "235\n", | |
| "236\n", | |
| "237\n", | |
| " 322: 100%\n", | |
| " 328: 89.9406%\n", | |
| " 323: 89.9406%\n", | |
| "238\n", | |
| " 328: 100%\n", | |
| " 323: 100%\n", | |
| " 244: 100%\n", | |
| "239\n", | |
| " 324: 100%\n", | |
| " 328: 87.785%\n", | |
| " 323: 87.785%\n", | |
| "240\n", | |
| " 327: 100%\n", | |
| " 326: 100%\n", | |
| " 325: 100%\n", | |
| "241\n", | |
| " 327: 86.9756%\n", | |
| " 326: 86.9756%\n", | |
| " 325: 86.9756%\n", | |
| "242\n", | |
| " 327: 100%\n", | |
| " 326: 100%\n", | |
| " 325: 100%\n", | |
| "243\n", | |
| " 327: 100%\n", | |
| " 326: 100%\n", | |
| " 325: 100%\n", | |
| "244\n", | |
| " 328: 100%\n", | |
| " 323: 100%\n", | |
| " 238: 100%\n", | |
| "245\n", | |
| " 328: 87.3012%\n", | |
| " 323: 87.3012%\n", | |
| " 322: 87.3012%\n", | |
| "246\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| " 324: 81.7121%\n", | |
| " 239: 81.7121%\n", | |
| " 328: 80.3563%\n", | |
| "247\n", | |
| " 329: 100%\n", | |
| " 328: 87.3012%\n", | |
| " 323: 87.3012%\n", | |
| "248\n", | |
| " 328: 90.1288%\n", | |
| " 323: 90.1288%\n", | |
| " 244: 90.1288%\n", | |
| "249\n", | |
| " 305: 88.6889%\n", | |
| "250\n", | |
| " 263: 100%\n", | |
| " 262: 100%\n", | |
| " 261: 100%\n", | |
| "251\n", | |
| " 252: 99.4553%\n", | |
| " 310: 89.1667%\n", | |
| " 306: 88.674%\n", | |
| "252\n", | |
| " 251: 99.4553%\n", | |
| " 310: 89.1835%\n", | |
| " 306: 88.6884%\n", | |
| "253\n", | |
| " 263: 100%\n", | |
| " 262: 100%\n", | |
| " 261: 100%\n", | |
| "254\n", | |
| " 258: 100%\n", | |
| " 257: 100%\n", | |
| " 148: 85.921%\n", | |
| "255\n", | |
| " 263: 100%\n", | |
| " 262: 100%\n", | |
| " 261: 100%\n", | |
| "256\n", | |
| "257\n", | |
| " 258: 100%\n", | |
| " 254: 100%\n", | |
| " 148: 85.921%\n", | |
| "258\n", | |
| " 257: 100%\n", | |
| " 254: 100%\n", | |
| " 148: 85.921%\n", | |
| "259\n", | |
| "260\n", | |
| " 263: 100%\n", | |
| " 262: 100%\n", | |
| " 261: 100%\n", | |
| "261\n", | |
| " 263: 100%\n", | |
| " 262: 100%\n", | |
| " 260: 100%\n", | |
| "262\n", | |
| " 263: 100%\n", | |
| " 261: 100%\n", | |
| " 260: 100%\n", | |
| "263\n", | |
| " 262: 100%\n", | |
| " 261: 100%\n", | |
| " 260: 100%\n", | |
| "264\n", | |
| "265\n", | |
| " 94: 94.7284%\n", | |
| " 95: 94.7036%\n", | |
| " 93: 94.6443%\n", | |
| "266\n", | |
| "267\n", | |
| "268\n", | |
| "269\n", | |
| "270\n", | |
| "271\n", | |
| " 279: 95.3333%\n", | |
| " 280: 91.9692%\n", | |
| " 278: 91.939%\n", | |
| "272\n", | |
| " 277: 100%\n", | |
| " 274: 100%\n", | |
| "273\n", | |
| " 276: 100%\n", | |
| " 275: 100%\n", | |
| "274\n", | |
| " 277: 100%\n", | |
| " 272: 100%\n", | |
| "275\n", | |
| " 276: 100%\n", | |
| " 273: 100%\n", | |
| "276\n", | |
| " 275: 100%\n", | |
| " 273: 100%\n", | |
| "277\n", | |
| " 274: 100%\n", | |
| " 272: 100%\n", | |
| "278\n", | |
| " 279: 96.347%\n", | |
| " 280: 95.1412%\n", | |
| " 271: 91.939%\n", | |
| "279\n", | |
| " 280: 96.4245%\n", | |
| " 278: 96.347%\n", | |
| " 271: 95.3333%\n", | |
| "280\n", | |
| " 279: 96.4245%\n", | |
| " 278: 95.1412%\n", | |
| " 271: 91.9692%\n", | |
| "281\n", | |
| "282\n", | |
| " 284: 100%\n", | |
| " 283: 100%\n", | |
| "283\n", | |
| " 284: 100%\n", | |
| " 282: 100%\n", | |
| "284\n", | |
| " 283: 100%\n", | |
| " 282: 100%\n", | |
| "285\n", | |
| " 287: 100%\n", | |
| " 286: 100%\n", | |
| "286\n", | |
| " 287: 100%\n", | |
| " 285: 100%\n", | |
| "287\n", | |
| " 286: 100%\n", | |
| " 285: 100%\n", | |
| "288\n", | |
| "289\n", | |
| "290\n", | |
| " 300: 96.6316%\n", | |
| " 298: 96.6316%\n", | |
| " 297: 96.6316%\n", | |
| "291\n", | |
| "292\n", | |
| " 296: 88.4672%\n", | |
| " 301: 87.9971%\n", | |
| "293\n", | |
| " 300: 100%\n", | |
| " 298: 100%\n", | |
| " 297: 100%\n", | |
| "294\n", | |
| " 304: 99.0602%\n", | |
| "295\n", | |
| "296\n", | |
| " 301: 96.5442%\n", | |
| " 292: 88.4672%\n", | |
| "297\n", | |
| " 300: 100%\n", | |
| " 298: 100%\n", | |
| " 293: 100%\n", | |
| "298\n", | |
| " 300: 100%\n", | |
| " 297: 100%\n", | |
| " 293: 100%\n", | |
| "299\n", | |
| "300\n", | |
| " 298: 100%\n", | |
| " 297: 100%\n", | |
| " 293: 100%\n", | |
| "301\n", | |
| " 296: 96.5442%\n", | |
| " 292: 87.9971%\n", | |
| "302\n", | |
| "303\n", | |
| "304\n", | |
| " 294: 99.0602%\n", | |
| "305\n", | |
| " 249: 88.6889%\n", | |
| "306\n", | |
| " 310: 99.4987%\n", | |
| " 252: 88.6884%\n", | |
| " 251: 88.674%\n", | |
| "307\n", | |
| " 311: 99.5235%\n", | |
| " 153: 99.5235%\n", | |
| " 152: 99.5235%\n", | |
| "308\n", | |
| " 259: 83.0272%\n", | |
| "309\n", | |
| " 311: 95.769%\n", | |
| " 153: 95.769%\n", | |
| " 152: 95.769%\n", | |
| "310\n", | |
| " 306: 99.4987%\n", | |
| " 252: 89.1835%\n", | |
| " 251: 89.1667%\n", | |
| "311\n", | |
| " 153: 100%\n", | |
| " 152: 100%\n", | |
| " 150: 100%\n", | |
| "312\n", | |
| "313\n", | |
| "318\n", | |
| " 327: 93.407%\n", | |
| " 326: 93.407%\n", | |
| " 325: 93.407%\n", | |
| "319\n", | |
| " 327: 100%\n", | |
| " 326: 100%\n", | |
| " 325: 100%\n", | |
| "320\n", | |
| "321\n", | |
| " 234: 100%\n", | |
| "322\n", | |
| " 237: 100%\n", | |
| " 328: 89.9406%\n", | |
| " 323: 89.9406%\n", | |
| "323\n", | |
| " 328: 100%\n", | |
| " 244: 100%\n", | |
| " 238: 100%\n", | |
| "324\n", | |
| " 239: 100%\n", | |
| " 328: 87.785%\n", | |
| " 323: 87.785%\n", | |
| "325\n", | |
| " 327: 100%\n", | |
| " 326: 100%\n", | |
| " 319: 100%\n", | |
| "326\n", | |
| " 327: 100%\n", | |
| " 325: 100%\n", | |
| " 319: 100%\n", | |
| "327\n", | |
| " 326: 100%\n", | |
| " 325: 100%\n", | |
| " 319: 100%\n", | |
| "328\n", | |
| " 323: 100%\n", | |
| " 244: 100%\n", | |
| " 238: 100%\n", | |
| "329\n", | |
| " 247: 100%\n", | |
| " 328: 87.3012%\n", | |
| " 323: 87.3012%\n", | |
| "330\n", | |
| " 223: 93.6906%\n", | |
| " 333: 91.8301%\n", | |
| " 162: 90.2821%\n", | |
| "331\n", | |
| " 335: 100%\n", | |
| " 334: 100%\n", | |
| " 332: 100%\n", | |
| "332\n", | |
| " 335: 100%\n", | |
| " 334: 100%\n", | |
| " 331: 100%\n", | |
| "333\n", | |
| " 330: 91.8301%\n", | |
| " 223: 91.4768%\n", | |
| " 162: 86.5031%\n", | |
| "334\n", | |
| " 335: 100%\n", | |
| " 332: 100%\n", | |
| " 331: 100%\n", | |
| "335\n", | |
| " 334: 100%\n", | |
| " 332: 100%\n", | |
| " 331: 100%\n", | |
| "338\n", | |
| " 144: 88.8314%\n", | |
| " 265: 83.4816%\n", | |
| "339\n", | |
| "340\n", | |
| " 344: 100%\n", | |
| " 343: 100%\n", | |
| " 342: 100%\n", | |
| "341\n", | |
| " 151: 92.3166%\n", | |
| "342\n", | |
| " 344: 100%\n", | |
| " 343: 100%\n", | |
| " 340: 100%\n", | |
| "343\n", | |
| " 344: 100%\n", | |
| " 342: 100%\n", | |
| " 340: 100%\n", | |
| "344\n", | |
| " 343: 100%\n", | |
| " 342: 100%\n", | |
| " 340: 100%\n", | |
| "345\n", | |
| "346\n", | |
| " 97: 89.6939%\n", | |
| " 357: 89.6939%\n", | |
| "347\n", | |
| " 358: 98.5922%\n", | |
| "348\n", | |
| " 182: 96.149%\n", | |
| " 175: 92.3989%\n", | |
| " 172: 92.3989%\n", | |
| "349\n", | |
| "350\n", | |
| " 351: 99.7158%\n", | |
| " 184: 95.4506%\n", | |
| " 183: 95.4506%\n", | |
| "351\n", | |
| " 350: 99.7158%\n", | |
| " 184: 95.1673%\n", | |
| " 183: 95.1673%\n", | |
| "352\n", | |
| "353\n", | |
| " 356: 100%\n", | |
| "354\n", | |
| " 51: 100%\n", | |
| " 355: 100%\n", | |
| "355\n", | |
| " 51: 100%\n", | |
| " 354: 100%\n", | |
| "356\n", | |
| " 353: 100%\n", | |
| "357\n", | |
| " 97: 100%\n", | |
| "358\n", | |
| " 347: 98.5922%\n", | |
| "1000\n", | |
| " 1001: 100%\n", | |
| " 1003: 92.3077%\n", | |
| " 1002: 85.7143%\n", | |
| "1001\n", | |
| " 1000: 100%\n", | |
| " 1003: 92.3077%\n", | |
| " 1002: 85.7143%\n", | |
| "1002\n", | |
| " 1003: 93.3333%\n", | |
| " 1001: 85.7143%\n", | |
| " 1000: 85.7143%\n", | |
| "1003\n", | |
| " 1002: 93.3333%\n", | |
| " 1001: 92.3077%\n", | |
| " 1000: 92.3077%\n", | |
| "1004\n", | |
| " 1005: 80%\n", | |
| "1005\n", | |
| " 1003: 85.7143%\n", | |
| " 1004: 80%\n", | |
| " 1002: 80%\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import difflib\n", | |
| "from heapq import nlargest\n", | |
| "\n", | |
| "rcodes = [str(rc) for rc in sorted([int(rc) for rc in rules.keys()])]\n", | |
| "hashed_rules = {rc: hashrule(rc) for rc in rcodes}\n", | |
| "seqm = difflib.SequenceMatcher()\n", | |
| "similarities = {}\n", | |
| "\n", | |
| "n, cutoff = 3, 0.8\n", | |
| "for rcode2 in rcodes:\n", | |
| " print(rcode2)\n", | |
| " sims = []\n", | |
| " seqm.set_seq2(hashed_rules[rcode2])\n", | |
| " for rcode1 in [rc for rc in rcodes if rc != rcode2]:\n", | |
| " seqm.set_seq1(hashed_rules[rcode1])\n", | |
| " if (seqm.real_quick_ratio() >= cutoff and \n", | |
| " seqm.quick_ratio() >= cutoff and \n", | |
| " seqm.ratio() >= cutoff):\n", | |
| " sims.append((seqm.ratio(), rcode1))\n", | |
| " sims = similarities[rcode2] = nlargest(n, sims)\n", | |
| " for sim in sims:\n", | |
| " print(' {}: {:g}%'.format(sim[1], 100 * sim[0]))\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "So, rule 358 (the last) is 'similar' to rule 347. Just how similar are they?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 509, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-17T12:52:24.636564Z", | |
| "start_time": "2017-11-17T12:52:24.517046Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "ratio: 98.59%\n", | |
| "\u001b[31m--- 347\n", | |
| "\u001b[0m\n", | |
| "\u001b[32m+++ 358\n", | |
| "\u001b[0m\n", | |
| "\u001b[36m@@ -1,4 +1,3 @@\n", | |
| "\u001b[0m\n", | |
| "\u001b[31m-0 xe .\u001b[0m\n", | |
| " rri rregatik rri\n", | |
| " rri rrek rri\n", | |
| " rri rrekiko/243 rri\n", | |
| "\u001b[36m@@ -97,21 +96,18 @@\n", | |
| "\u001b[0m\n", | |
| " rri rretakotatik rri\n", | |
| " rri rretakotik rri\n", | |
| " rri rretakotzat rri\n", | |
| "\u001b[31m-rri rretakoxe rri\u001b[0m\n", | |
| " rri rretakoz rri\n", | |
| " rri rretan rri\n", | |
| " rri rretara rri\n", | |
| " rri rretarago/243 rri\n", | |
| " rri rretaraino rri\n", | |
| " rri rretarainoko/243 rri\n", | |
| "\u001b[31m-rri rretarainoxe rri\u001b[0m\n", | |
| " rri rretarako/243 rri\n", | |
| " rri rretarantz rri\n", | |
| " rri rretarantzago/243 rri\n", | |
| " rri rretarantzegi rri\n", | |
| " rri rretaranzko/243 rri\n", | |
| " rri rretarat rri\n", | |
| "\u001b[31m-rri rretaraxe rri\u001b[0m\n", | |
| " rri rretarik rri\n", | |
| " rri rretariko/243 rri\n", | |
| " rri rretatik rri\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import difflib\n", | |
| "\n", | |
| "\n", | |
| "def colorize_diff(diff_lines):\n", | |
| " return [{'+': '\\x1b[32m', '-': '\\x1b[31m', '@': '\\x1b[36m'}.get(l[0], '') + l +\n", | |
| " ('\\x1b[0m' if l[0] in '+-@' else '') for l in diff_lines]\n", | |
| "\n", | |
| "def compare_rules(rc1, rc2):\n", | |
| " rc1, rc2 = str(rc1), str(rc2)\n", | |
| " h1, h2 = hashrule(rc1), hashrule(rc2)\n", | |
| " ratio = 100*difflib.SequenceMatcher(a=h1, b=h2).ratio()\n", | |
| " return '\\n'.join(['ratio: {:.2f}%'.format(ratio)] + colorize_diff(\n", | |
| " difflib.unified_diff(\n", | |
| " h1.splitlines(), h2.splitlines(), fromfile=rc1, tofile=rc2)))\n", | |
| "\n", | |
| "print(compare_rules('347', '358'))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "so essentially, 358 is just 347, plus an extra 4 lines.\n", | |
| "\n", | |
| "It seems a bit pointless to keep all the duplicates, especially since 358 applies to only a single `.dic` entry" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "what about rules `13` & `22`?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 513, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-17T13:12:41.981636Z", | |
| "start_time": "2017-11-17T13:12:41.952435Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(743, 742)" | |
| ] | |
| }, | |
| "execution_count": 513, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(rules['13']), len(rules['22'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 510, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2017-11-17T12:52:39.444677Z", | |
| "start_time": "2017-11-17T12:52:39.290168Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "ratio: 99.95%\n", | |
| "\u001b[31m--- 13\n", | |
| "\u001b[0m\n", | |
| "\u001b[32m+++ 22\n", | |
| "\u001b[0m\n", | |
| "\u001b[36m@@ -1,4 +1,3 @@\n", | |
| "\u001b[0m\n", | |
| "\u001b[31m-0 etara .\u001b[0m\n", | |
| " 0 tu .\n", | |
| " 0 tua .\n", | |
| " 0 tuagan .\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(compare_rules('13', '22'))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "again, pretty similar 😒" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python [conda env:jup]", | |
| "language": "python", | |
| "name": "conda-env-jup-py" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.4" | |
| }, | |
| "toc": { | |
| "nav_menu": {}, | |
| "number_sections": true, | |
| "sideBar": true, | |
| "skip_h1_title": false, | |
| "toc_cell": false, | |
| "toc_position": {}, | |
| "toc_section_display": "block", | |
| "toc_window_display": false | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment