{ "metadata": { "name": "", "signature": "sha256:845f8bd9d4e35aed0344392245e45765a8537e26451d12a9886d6d43c8af7cf8" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "%load_ext autoreload\n", "%autoreload 2" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "import word2vec" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "model = word2vec.load('/Users/danielfrg/Downloads/text8.bin')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Cosine" ] }, { "cell_type": "code", "collapsed": false, "input": [ "%%timeit\n", "indexes, metrics = model.cosine('word', n=10)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 22.9 ms per loop\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "%%timeit\n", "indexes, metrics = model.cosine('socks', n=10)\n", "model.generate_response(indexes, metrics)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 22.8 ms per loop\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "%%timeit\n", "indexes, metrics = model.cosine('word', n=5000)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 26.2 ms per loop\n" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "%%timeit\n", "indexes, metrics = model.cosine('word', n=5000)\n", "model.generate_response(indexes, metrics)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 26.4 ms per loop\n" ] } ], "prompt_number": 8 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Analogy" ] }, { "cell_type": "code", "collapsed": false, "input": [ "%%timeit\n", "indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=10)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 27.5 ms per loop\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "%%timeit\n", "indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=10)\n", "model.generate_response(indexes, metrics)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 31.9 ms per loop\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "%%timeit\n", "indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=5000)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 28.6 ms per loop\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "%%timeit\n", "indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=5000)\n", "model.generate_response(indexes, metrics)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 29.7 ms per loop\n" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "%%timeit\n", "indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=5000)\n", "model.generate_response(indexes, metrics).tolist()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 26.7 ms per loop\n" ] } ], "prompt_number": 13 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Cluster" ] }, { "cell_type": "code", "collapsed": false, "input": [ "clusters = word2vec.load_clusters('/Users/danielfrg/Downloads/text8-clusters.txt')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "model.clusters = clusters" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "%%timeit\n", "indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=10)\n", "model.generate_response(indexes, metrics)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 26.3 ms per loop\n" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "%%timeit\n", "indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=5000)\n", "model.generate_response(indexes, metrics)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 28.5 ms per loop\n" ] } ], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "%%timeit\n", "indexes, metrics = model.analogy(pos=['paris', 'germany'], neg=['france'], n=5000)\n", "model.generate_response(indexes, metrics).tolist()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10 loops, best of 3: 28.7 ms per loop\n" ] } ], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }