{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "#voc = [1, 1, 1, 1, 1, 1, 1, 1] ## complete tree\n", "voc = [4, 3, 2, 1] ## single path tree\n", "voc_size = len(voc)\n", "\n", "count = voc + [1e5 for _ in range(voc_size + 1)]\n", "binary = [0 for _ in range(voc_size * 2 + 1)]\n", "parent_node = [0 for _ in range(voc_size * 2 + 1)]\n", "print count" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[4, 3, 2, 1, 100000.0, 100000.0, 100000.0, 100000.0, 100000.0]\n" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "pos1, pos2 = voc_size - 1, voc_size\n", "for i in range(0, voc_size-1):\n", " if pos1 >= 0:\n", " if count[pos1] < count[pos2]:\n", " min1i = pos1\n", " pos1 -= 1\n", " else:\n", " min1i = pos2\n", " pos2 += 1\n", " else:\n", " min1i = pos2\n", " pos2 += 1\n", " \n", " if pos1 >= 0:\n", " if count[pos1] < count[pos2]:\n", " min2i = pos1\n", " pos1 -= 1\n", " else:\n", " min2i = pos2\n", " pos2 += 1\n", " else:\n", " min2i = pos2\n", " pos2 += 1\n", " count[voc_size + i] = count[min1i] + count[min2i]\n", " parent_node[min1i] = voc_size + i\n", " parent_node[min2i] = voc_size + i\n", " ## implicitly binary[min1i] = 0\n", " binary[min2i] = 1" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "print count\n", "print binary" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[4, 3, 2, 1, 3, 6, 10, 100000.0, 100000.0]\n", "[0, 1, 1, 0, 0, 1, 0, 0, 0]\n" ] } ], "prompt_number": 9 }, { "cell_type": "markdown", "metadata": {}, "source": [ "*So it proves that only 2 * voc_size - 1 nodes will be needed to construct a tree, even the tree is complete*" ] }, { "cell_type": "code", "collapsed": false, "input": [ "point = [0 for _ in range(40)]\n", "code = [0 for _ in range(40)]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "for a in range(voc_size):\n", " b = a\n", " i = 0\n", " while True:\n", " code[i] = binary[b]\n", " point[i] = b\n", " i += 1\n", " b = parent_node[b]\n", " if (b == voc_size * 2 - 2):\n", " break" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "print code\n", "print point" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", "[3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }