{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Visualizing GraphLab's LDA TopicModel with pyLDAvis\n", "\n", "This is an example of how to use [`pyLDAvis`](https://github.com/bmabey/pyLDAvis) [helper functions](https://pyldavis.readthedocs.org/en/latest/modules/API.html#module-pyLDAvis.graphlab) to visualize a GraphLab Create Topic Model model. For our example model we will be extending the example provided by [GraphLab's own documenation](https://dato.com/products/create/docs/generated/graphlab.topic_model.create.html)." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import graphlab as gl\n", "import pyLDAvis\n", "import pyLDAvis.graphlab" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# turn on automatic rendering of visualizations\n", "pyLDAvis.enable_notebook()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/nytimes/dir_archive.ini to /var/tmp/graphlab-bmabey/7868/000000.ini\n", "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/nytimes/objects.bin to /var/tmp/graphlab-bmabey/7868/000001.bin\n", "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/nytimes/m_2ae8944a.sidx to /var/tmp/graphlab-bmabey/7868/000002.sidx\n", "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/nytimes/m_2ae8944a.0000 to /var/tmp/graphlab-bmabey/7868/000003.0000\n", "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/nytimes/m_2ae8944a.0001 to /var/tmp/graphlab-bmabey/7868/000004.0001\n", "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/nytimes/m_2ae8944a.0002 to /var/tmp/graphlab-bmabey/7868/000005.0002\n", "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/nytimes/m_2ae8944a.0003 to /var/tmp/graphlab-bmabey/7868/000006.0003\n", "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/nytimes/m_2ae8944a.0004 to /var/tmp/graphlab-bmabey/7868/000007.0004\n", "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/nytimes/m_2ae8944a.0005 to /var/tmp/graphlab-bmabey/7868/000008.0005\n", "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/nytimes/m_2ae8944a.0006 to /var/tmp/graphlab-bmabey/7868/000009.0006\n", "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/nytimes/m_2ae8944a.0007 to /var/tmp/graphlab-bmabey/7868/000010.0007\n" ] } ], "source": [ "docs = gl.SArray('http://s3.amazonaws.com/GraphLab-Datasets/nytimes')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "gl.topic_model.create?" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PROGRESS: Learning a topic model\n", "PROGRESS: Number of documents 10000\n", "PROGRESS: Vocabulary size 63958\n", "PROGRESS: Running collapsed Gibbs sampling\n", "PROGRESS: +-----------+---------------+----------------+-----------------+\n", "PROGRESS: | Iteration | Elapsed Time | Tokens/Second | Est. Perplexity |\n", "PROGRESS: +-----------+---------------+----------------+-----------------+\n", "PROGRESS: | 10 | 4.30s | 5.59845e+06 | 0 |\n", "PROGRESS: | 20 | 8.08s | 5.65802e+06 | 0 |\n", "PROGRESS: | 30 | 11.87s | 5.9078e+06 | 0 |\n", "PROGRESS: | 40 | 15.64s | 5.53945e+06 | 0 |\n", "PROGRESS: | 50 | 19.58s | 5.64386e+06 | 0 |\n", "PROGRESS: | 60 | 23.63s | 5.14035e+06 | 0 |\n", "PROGRESS: | 70 | 28.19s | 5.19469e+06 | 0 |\n", "PROGRESS: | 80 | 32.55s | 5.06062e+06 | 0 |\n", "PROGRESS: | 90 | 36.71s | 5.24635e+06 | 0 |\n", "PROGRESS: | 100 | 40.63s | 5.46609e+06 | 0 |\n", "PROGRESS: | 110 | 44.58s | 5.60969e+06 | 0 |\n", "PROGRESS: | 120 | 48.73s | 5.43378e+06 | 0 |\n", "PROGRESS: | 130 | 52.86s | 5.11243e+06 | 0 |\n", "PROGRESS: | 140 | 56.85s | 5.36708e+06 | 0 |\n", "PROGRESS: | 150 | 1m 0s | 5.31207e+06 | 0 |\n", "PROGRESS: | 160 | 1m 4s | 5.35679e+06 | 0 |\n", "PROGRESS: | 170 | 1m 8s | 5.53531e+06 | 0 |\n", "PROGRESS: | 180 | 1m 12s | 5.4313e+06 | 0 |\n", "PROGRESS: | 190 | 1m 17s | 5.3068e+06 | 0 |\n", "PROGRESS: | 200 | 1m 21s | 5.49324e+06 | 0 |\n", "PROGRESS: | 210 | 1m 24s | 5.59991e+06 | 0 |\n", "PROGRESS: | 220 | 1m 28s | 5.69224e+06 | 0 |\n", "PROGRESS: | 230 | 1m 32s | 5.70256e+06 | 0 |\n", "PROGRESS: | 240 | 1m 37s | 5.22822e+06 | 0 |\n", "PROGRESS: | 250 | 1m 41s | 4.66881e+06 | 0 |\n", "PROGRESS: | 260 | 1m 45s | 4.56584e+06 | 0 |\n", "PROGRESS: | 270 | 1m 50s | 4.50357e+06 | 0 |\n", "PROGRESS: | 280 | 1m 54s | 4.98652e+06 | 0 |\n", "PROGRESS: | 290 | 1m 59s | 5.18934e+06 | 0 |\n", "PROGRESS: | 300 | 2m 3s | 5.23964e+06 | 0 |\n", "PROGRESS: | 310 | 2m 7s | 4.88573e+06 | 0 |\n", "PROGRESS: | 320 | 2m 12s | 4.80853e+06 | 0 |\n", "PROGRESS: | 330 | 2m 16s | 5.13996e+06 | 0 |\n", "PROGRESS: | 340 | 2m 21s | 4.96993e+06 | 0 |\n", "PROGRESS: | 350 | 2m 25s | 5.1258e+06 | 0 |\n", "PROGRESS: | 360 | 2m 30s | 5.34332e+06 | 0 |\n", "PROGRESS: | 370 | 2m 34s | 5.16732e+06 | 0 |\n", "PROGRESS: | 380 | 2m 38s | 5.08862e+06 | 0 |\n", "PROGRESS: | 390 | 2m 42s | 5.54525e+06 | 0 |\n", "PROGRESS: | 400 | 2m 46s | 5.13597e+06 | 0 |\n", "PROGRESS: | 410 | 2m 51s | 5.42203e+06 | 0 |\n", "PROGRESS: | 420 | 2m 55s | 5.41349e+06 | 0 |\n", "PROGRESS: | 430 | 2m 59s | 5.02961e+06 | 0 |\n", "PROGRESS: | 440 | 3m 3s | 5.35264e+06 | 0 |\n", "PROGRESS: | 450 | 3m 7s | 5.44598e+06 | 0 |\n", "PROGRESS: | 460 | 3m 11s | 5.83597e+06 | 0 |\n", "PROGRESS: | 470 | 3m 16s | 5.19771e+06 | 0 |\n", "PROGRESS: | 480 | 3m 20s | 5.42732e+06 | 0 |\n", "PROGRESS: | 490 | 3m 24s | 5.25899e+06 | 0 |\n", "PROGRESS: | 500 | 3m 28s | 4.22521e+06 | 0 |\n", "PROGRESS: | 510 | 3m 33s | 4.81078e+06 | 0 |\n", "PROGRESS: | 520 | 3m 37s | 5.23807e+06 | 0 |\n", "PROGRESS: | 530 | 3m 42s | 4.83344e+06 | 0 |\n", "PROGRESS: | 540 | 3m 46s | 4.88105e+06 | 0 |\n", "PROGRESS: | 550 | 3m 51s | 4.96088e+06 | 0 |\n", "PROGRESS: | 560 | 3m 55s | 5.23742e+06 | 0 |\n", "PROGRESS: | 570 | 3m 59s | 5.14391e+06 | 0 |\n", "PROGRESS: | 580 | 4m 4s | 4.73677e+06 | 0 |\n", "PROGRESS: | 590 | 4m 8s | 4.7446e+06 | 0 |\n", "PROGRESS: | 600 | 4m 13s | 4.81935e+06 | 0 |\n", "PROGRESS: | 610 | 4m 17s | 5.14317e+06 | 0 |\n", "PROGRESS: | 620 | 4m 22s | 3.98405e+06 | 0 |\n", "PROGRESS: | 630 | 4m 27s | 4.48074e+06 | 0 |\n", "PROGRESS: | 640 | 4m 31s | 5.21034e+06 | 0 |\n", "PROGRESS: | 650 | 4m 35s | 5.18135e+06 | 0 |\n", "PROGRESS: | 660 | 4m 40s | 4.87386e+06 | 0 |\n", "PROGRESS: | 670 | 4m 44s | 5.23199e+06 | 0 |\n", "PROGRESS: | 680 | 4m 48s | 4.62264e+06 | 0 |\n", "PROGRESS: | 690 | 4m 53s | 5.24695e+06 | 0 |\n", "PROGRESS: | 700 | 4m 57s | 5.42099e+06 | 0 |\n", "PROGRESS: | 710 | 5m 1s | 5.27416e+06 | 0 |\n", "PROGRESS: | 720 | 5m 5s | 4.97378e+06 | 0 |\n", "PROGRESS: | 730 | 5m 10s | 5.11693e+06 | 0 |\n", "PROGRESS: | 740 | 5m 14s | 5.25399e+06 | 0 |\n", "PROGRESS: | 750 | 5m 18s | 4.75152e+06 | 0 |\n", "PROGRESS: | 760 | 5m 23s | 4.74399e+06 | 0 |\n", "PROGRESS: | 770 | 5m 27s | 5.30147e+06 | 0 |\n", "PROGRESS: | 780 | 5m 31s | 4.95419e+06 | 0 |\n", "PROGRESS: | 790 | 5m 36s | 4.75985e+06 | 0 |\n", "PROGRESS: | 800 | 5m 40s | 5.0079e+06 | 0 |\n", "PROGRESS: | 810 | 5m 45s | 5.07237e+06 | 0 |\n", "PROGRESS: | 820 | 5m 49s | 4.85123e+06 | 0 |\n", "PROGRESS: | 830 | 5m 53s | 5.23005e+06 | 0 |\n", "PROGRESS: | 840 | 5m 58s | 4.84427e+06 | 0 |\n", "PROGRESS: | 850 | 6m 2s | 4.99574e+06 | 0 |\n", "PROGRESS: | 860 | 6m 7s | 5.06177e+06 | 0 |\n", "PROGRESS: | 870 | 6m 11s | 5.25326e+06 | 0 |\n", "PROGRESS: | 880 | 6m 15s | 5.12843e+06 | 0 |\n", "PROGRESS: | 890 | 6m 20s | 4.95561e+06 | 0 |\n", "PROGRESS: | 900 | 6m 24s | 4.81062e+06 | 0 |\n", "PROGRESS: | 910 | 6m 28s | 4.89783e+06 | 0 |\n", "PROGRESS: | 920 | 6m 32s | 5.3197e+06 | 0 |\n", "PROGRESS: | 930 | 6m 37s | 5.14227e+06 | 0 |\n", "PROGRESS: | 940 | 6m 41s | 5.04436e+06 | 0 |\n", "PROGRESS: | 950 | 6m 46s | 5.02783e+06 | 0 |\n", "PROGRESS: | 960 | 6m 50s | 4.93528e+06 | 0 |\n", "PROGRESS: | 970 | 6m 55s | 4.24361e+06 | 0 |\n", "PROGRESS: | 980 | 6m 59s | 4.69206e+06 | 0 |\n", "PROGRESS: | 990 | 7m 4s | 4.52958e+06 | 0 |\n", "PROGRESS: | 1000 | 7m 8s | 5.11498e+06 | 0 |\n", "PROGRESS: | 1010 | 7m 12s | 5.03511e+06 | 0 |\n", "PROGRESS: | 1020 | 7m 16s | 5.24804e+06 | 0 |\n", "PROGRESS: | 1030 | 7m 21s | 5.22683e+06 | 0 |\n", "PROGRESS: | 1040 | 7m 25s | 4.8709e+06 | 0 |\n", "PROGRESS: | 1050 | 7m 29s | 4.97393e+06 | 0 |\n", "PROGRESS: | 1060 | 7m 34s | 5.02255e+06 | 0 |\n", "PROGRESS: | 1070 | 7m 38s | 5.25404e+06 | 0 |\n", "PROGRESS: | 1080 | 7m 43s | 4.90541e+06 | 0 |\n", "PROGRESS: | 1090 | 7m 47s | 5.21804e+06 | 0 |\n", "PROGRESS: | 1100 | 7m 51s | 5.13437e+06 | 0 |\n", "PROGRESS: | 1110 | 7m 56s | 5.23265e+06 | 0 |\n", "PROGRESS: | 1120 | 8m 0s | 5.21102e+06 | 0 |\n", "PROGRESS: | 1130 | 8m 4s | 4.7302e+06 | 0 |\n", "PROGRESS: | 1140 | 8m 8s | 5.07703e+06 | 0 |\n", "PROGRESS: | 1150 | 8m 13s | 5.17569e+06 | 0 |\n", "PROGRESS: | 1160 | 8m 17s | 5.30585e+06 | 0 |\n", "PROGRESS: | 1170 | 8m 21s | 5.08952e+06 | 0 |\n", "PROGRESS: | 1180 | 8m 25s | 5.0374e+06 | 0 |\n", "PROGRESS: | 1190 | 8m 30s | 5.25056e+06 | 0 |\n", "PROGRESS: | 1200 | 8m 34s | 4.64185e+06 | 0 |\n", "PROGRESS: | 1210 | 8m 38s | 5.42466e+06 | 0 |\n", "PROGRESS: | 1220 | 8m 42s | 5.04005e+06 | 0 |\n", "PROGRESS: | 1230 | 8m 47s | 4.65573e+06 | 0 |\n", "PROGRESS: | 1240 | 8m 51s | 4.9266e+06 | 0 |\n", "PROGRESS: | 1250 | 8m 55s | 5.25832e+06 | 0 |\n", "PROGRESS: | 1260 | 8m 59s | 4.87296e+06 | 0 |\n", "PROGRESS: | 1270 | 9m 4s | 5.54099e+06 | 0 |\n", "PROGRESS: | 1280 | 9m 8s | 5.40511e+06 | 0 |\n", "PROGRESS: | 1290 | 9m 12s | 5.4751e+06 | 0 |\n", "PROGRESS: | 1300 | 9m 16s | 5.35094e+06 | 0 |\n", "PROGRESS: | 1310 | 9m 20s | 5.34549e+06 | 0 |\n", "PROGRESS: | 1320 | 9m 25s | 4.80484e+06 | 0 |\n", "PROGRESS: | 1330 | 9m 29s | 4.66212e+06 | 0 |\n", "PROGRESS: | 1340 | 9m 33s | 5.32365e+06 | 0 |\n", "PROGRESS: | 1350 | 9m 38s | 5.31993e+06 | 0 |\n", "PROGRESS: | 1360 | 9m 42s | 5.32912e+06 | 0 |\n", "PROGRESS: | 1370 | 9m 46s | 5.08139e+06 | 0 |\n", "PROGRESS: | 1380 | 9m 50s | 5.21139e+06 | 0 |\n", "PROGRESS: | 1390 | 9m 55s | 4.99703e+06 | 0 |\n", "PROGRESS: | 1400 | 9m 59s | 4.991e+06 | 0 |\n", "PROGRESS: | 1410 | 10m 3s | 5.08148e+06 | 0 |\n", "PROGRESS: | 1420 | 10m 7s | 5.13859e+06 | 0 |\n", "PROGRESS: | 1430 | 10m 12s | 5.08677e+06 | 0 |\n", "PROGRESS: | 1440 | 10m 16s | 4.7161e+06 | 0 |\n", "PROGRESS: | 1450 | 10m 20s | 5.40952e+06 | 0 |\n", "PROGRESS: | 1460 | 10m 25s | 5.05909e+06 | 0 |\n", "PROGRESS: | 1470 | 10m 29s | 5.00158e+06 | 0 |\n", "PROGRESS: | 1480 | 10m 33s | 5.07173e+06 | 0 |\n", "PROGRESS: | 1490 | 10m 37s | 5.39908e+06 | 0 |\n", "PROGRESS: | 1500 | 10m 41s | 5.20519e+06 | 0 |\n", "PROGRESS: | 1510 | 10m 46s | 5.39554e+06 | 0 |\n", "PROGRESS: | 1520 | 10m 50s | 5.40323e+06 | 0 |\n", "PROGRESS: | 1530 | 10m 54s | 5.18759e+06 | 0 |\n", "PROGRESS: | 1540 | 10m 58s | 5.21281e+06 | 0 |\n", "PROGRESS: | 1550 | 11m 2s | 5.31349e+06 | 0 |\n", "PROGRESS: | 1560 | 11m 6s | 5.31172e+06 | 0 |\n", "PROGRESS: | 1570 | 11m 11s | 5.11091e+06 | 0 |\n", "PROGRESS: | 1580 | 11m 15s | 5.05909e+06 | 0 |\n", "PROGRESS: | 1590 | 11m 19s | 4.94512e+06 | 0 |\n", "PROGRESS: | 1600 | 11m 23s | 4.93809e+06 | 0 |\n", "PROGRESS: | 1610 | 11m 27s | 5.59195e+06 | 0 |\n", "PROGRESS: | 1620 | 11m 31s | 5.56521e+06 | 0 |\n", "PROGRESS: | 1630 | 11m 35s | 5.43631e+06 | 0 |\n", "PROGRESS: | 1640 | 11m 40s | 4.90119e+06 | 0 |\n", "PROGRESS: | 1650 | 11m 44s | 5.43916e+06 | 0 |\n", "PROGRESS: | 1660 | 11m 48s | 5.1769e+06 | 0 |\n", "PROGRESS: | 1670 | 11m 52s | 5.25893e+06 | 0 |\n", "PROGRESS: | 1680 | 11m 56s | 5.2838e+06 | 0 |\n", "PROGRESS: | 1690 | 12m 0s | 5.51797e+06 | 0 |\n", "PROGRESS: | 1700 | 12m 5s | 4.9903e+06 | 0 |\n", "PROGRESS: | 1710 | 12m 9s | 4.8294e+06 | 0 |\n", "PROGRESS: | 1720 | 12m 13s | 5.12015e+06 | 0 |\n", "PROGRESS: | 1730 | 12m 17s | 5.38809e+06 | 0 |\n", "PROGRESS: | 1740 | 12m 21s | 5.30651e+06 | 0 |\n", "PROGRESS: | 1750 | 12m 26s | 5.39673e+06 | 0 |\n", "PROGRESS: | 1760 | 12m 30s | 5.36165e+06 | 0 |\n", "PROGRESS: | 1770 | 12m 34s | 5.34118e+06 | 0 |\n", "PROGRESS: | 1780 | 12m 38s | 5.22067e+06 | 0 |\n", "PROGRESS: | 1790 | 12m 42s | 5.42174e+06 | 0 |\n", "PROGRESS: | 1800 | 12m 46s | 5.20593e+06 | 0 |\n", "PROGRESS: | 1810 | 12m 50s | 5.28709e+06 | 0 |\n", "PROGRESS: | 1820 | 12m 55s | 5.58678e+06 | 0 |\n", "PROGRESS: | 1830 | 12m 59s | 5.00077e+06 | 0 |\n", "PROGRESS: | 1840 | 13m 3s | 5.2627e+06 | 0 |\n", "PROGRESS: | 1850 | 13m 7s | 5.50355e+06 | 0 |\n", "PROGRESS: | 1860 | 13m 11s | 5.43598e+06 | 0 |\n", "PROGRESS: | 1870 | 13m 15s | 5.11404e+06 | 0 |\n", "PROGRESS: | 1880 | 13m 19s | 5.261e+06 | 0 |\n", "PROGRESS: | 1890 | 13m 24s | 5.08025e+06 | 0 |\n", "PROGRESS: | 1900 | 13m 28s | 5.01006e+06 | 0 |\n", "PROGRESS: | 1910 | 13m 32s | 5.15994e+06 | 0 |\n", "PROGRESS: | 1920 | 13m 37s | 5.16736e+06 | 0 |\n", "PROGRESS: | 1930 | 13m 41s | 5.1345e+06 | 0 |\n", "PROGRESS: | 1940 | 13m 45s | 5.09099e+06 | 0 |\n", "PROGRESS: | 1950 | 13m 50s | 4.99722e+06 | 0 |\n", "PROGRESS: | 1960 | 13m 54s | 4.81856e+06 | 0 |\n", "PROGRESS: | 1970 | 13m 58s | 4.99156e+06 | 0 |\n", "PROGRESS: | 1980 | 14m 3s | 4.68172e+06 | 0 |\n", "PROGRESS: | 1990 | 14m 7s | 4.84688e+06 | 0 |\n", "PROGRESS: | 2000 | 14m 12s | 4.76679e+06 | 0 |\n", "PROGRESS: | 2010 | 14m 17s | 4.54456e+06 | 0 |\n", "PROGRESS: | 2020 | 14m 22s | 3.69908e+06 | 0 |\n", "PROGRESS: | 2030 | 14m 27s | 4.56501e+06 | 0 |\n", "PROGRESS: | 2040 | 14m 32s | 4.56358e+06 | 0 |\n", "PROGRESS: | 2050 | 14m 36s | 4.49848e+06 | 0 |\n", "PROGRESS: | 2060 | 14m 41s | 4.54544e+06 | 0 |\n", "PROGRESS: | 2070 | 14m 46s | 4.74351e+06 | 0 |\n", "PROGRESS: | 2080 | 14m 50s | 4.65406e+06 | 0 |\n", "PROGRESS: | 2090 | 14m 55s | 4.85266e+06 | 0 |\n", "PROGRESS: | 2100 | 14m 59s | 4.73347e+06 | 0 |\n", "PROGRESS: | 2110 | 15m 4s | 4.30323e+06 | 0 |\n", "PROGRESS: | 2120 | 15m 9s | 4.59272e+06 | 0 |\n", "PROGRESS: | 2130 | 15m 13s | 4.8619e+06 | 0 |\n", "PROGRESS: | 2140 | 15m 18s | 4.56132e+06 | 0 |\n", "PROGRESS: | 2150 | 15m 23s | 4.59589e+06 | 0 |\n", "PROGRESS: | 2160 | 15m 27s | 4.76242e+06 | 0 |\n", "PROGRESS: | 2170 | 15m 32s | 4.04776e+06 | 0 |\n", "PROGRESS: | 2180 | 15m 38s | 4.28786e+06 | 0 |\n", "PROGRESS: | 2190 | 15m 43s | 4.20092e+06 | 0 |\n", "PROGRESS: | 2200 | 15m 47s | 4.73318e+06 | 0 |\n", "PROGRESS: | 2210 | 15m 52s | 4.75373e+06 | 0 |\n", "PROGRESS: | 2220 | 15m 57s | 4.32632e+06 | 0 |\n", "PROGRESS: | 2230 | 16m 2s | 4.79865e+06 | 0 |\n", "PROGRESS: | 2240 | 16m 6s | 4.74353e+06 | 0 |\n", "PROGRESS: | 2250 | 16m 11s | 4.52114e+06 | 0 |\n", "PROGRESS: | 2260 | 16m 16s | 4.57798e+06 | 0 |\n", "PROGRESS: | 2270 | 16m 21s | 4.26944e+06 | 0 |\n", "PROGRESS: | 2280 | 16m 26s | 4.79986e+06 | 0 |\n", "PROGRESS: | 2290 | 16m 31s | 4.47916e+06 | 0 |\n", "PROGRESS: | 2300 | 16m 35s | 4.69962e+06 | 0 |\n", "PROGRESS: | 2310 | 16m 40s | 4.45616e+06 | 0 |\n", "PROGRESS: | 2320 | 16m 45s | 4.45254e+06 | 0 |\n", "PROGRESS: | 2330 | 16m 50s | 4.21933e+06 | 0 |\n", "PROGRESS: | 2340 | 16m 56s | 3.88585e+06 | 0 |\n", "PROGRESS: | 2350 | 17m 1s | 4.75735e+06 | 0 |\n", "PROGRESS: | 2360 | 17m 6s | 4.49801e+06 | 0 |\n", "PROGRESS: | 2370 | 17m 11s | 3.70626e+06 | 0 |\n", "PROGRESS: | 2380 | 17m 16s | 4.69068e+06 | 0 |\n", "PROGRESS: | 2390 | 17m 21s | 3.78046e+06 | 0 |\n", "PROGRESS: | 2400 | 17m 27s | 3.82714e+06 | 0 |\n", "PROGRESS: | 2410 | 17m 32s | 4.29387e+06 | 0 |\n", "PROGRESS: | 2420 | 17m 37s | 4.23766e+06 | 0 |\n", "PROGRESS: | 2430 | 17m 42s | 4.39951e+06 | 0 |\n", "PROGRESS: | 2440 | 17m 47s | 4.40527e+06 | 0 |\n", "PROGRESS: | 2450 | 17m 52s | 4.49867e+06 | 0 |\n", "PROGRESS: | 2460 | 17m 57s | 4.55966e+06 | 0 |\n", "PROGRESS: | 2470 | 18m 2s | 4.40979e+06 | 0 |\n", "PROGRESS: | 2480 | 18m 7s | 4.51377e+06 | 0 |\n", "PROGRESS: | 2490 | 18m 11s | 4.30073e+06 | 0 |\n", "PROGRESS: | 2500 | 18m 16s | 4.66434e+06 | 0 |\n", "PROGRESS: | 2510 | 18m 21s | 4.43723e+06 | 0 |\n", "PROGRESS: | 2520 | 18m 26s | 4.33696e+06 | 0 |\n", "PROGRESS: | 2530 | 18m 31s | 4.44306e+06 | 0 |\n", "PROGRESS: | 2540 | 18m 36s | 4.60931e+06 | 0 |\n", "PROGRESS: | 2550 | 18m 40s | 4.29257e+06 | 0 |\n", "PROGRESS: | 2560 | 18m 45s | 4.84307e+06 | 0 |\n", "PROGRESS: | 2570 | 18m 50s | 4.59479e+06 | 0 |\n", "PROGRESS: | 2580 | 18m 54s | 4.79189e+06 | 0 |\n", "PROGRESS: | 2590 | 18m 59s | 5.00048e+06 | 0 |\n", "PROGRESS: | 2600 | 19m 3s | 4.58225e+06 | 0 |\n", "PROGRESS: | 2610 | 19m 8s | 4.44892e+06 | 0 |\n", "PROGRESS: | 2620 | 19m 13s | 4.24166e+06 | 0 |\n", "PROGRESS: | 2630 | 19m 18s | 4.72903e+06 | 0 |\n", "PROGRESS: | 2640 | 19m 22s | 4.28621e+06 | 0 |\n", "PROGRESS: | 2650 | 19m 28s | 4.16912e+06 | 0 |\n", "PROGRESS: | 2660 | 19m 32s | 4.76708e+06 | 0 |\n", "PROGRESS: | 2670 | 19m 37s | 4.47706e+06 | 0 |\n", "PROGRESS: | 2680 | 19m 42s | 4.19275e+06 | 0 |\n", "PROGRESS: | 2690 | 19m 47s | 4.56176e+06 | 0 |\n", "PROGRESS: | 2700 | 19m 52s | 4.52325e+06 | 0 |\n", "PROGRESS: | 2710 | 19m 57s | 4.78738e+06 | 0 |\n", "PROGRESS: | 2720 | 20m 2s | 4.41018e+06 | 0 |\n", "PROGRESS: | 2730 | 20m 6s | 4.82358e+06 | 0 |\n", "PROGRESS: | 2740 | 20m 11s | 4.30686e+06 | 0 |\n", "PROGRESS: | 2750 | 20m 16s | 4.60166e+06 | 0 |\n", "PROGRESS: | 2760 | 20m 21s | 4.56818e+06 | 0 |\n", "PROGRESS: | 2770 | 20m 25s | 4.66626e+06 | 0 |\n", "PROGRESS: | 2780 | 20m 30s | 4.51772e+06 | 0 |\n", "PROGRESS: | 2790 | 20m 35s | 4.56473e+06 | 0 |\n", "PROGRESS: | 2800 | 20m 40s | 4.26625e+06 | 0 |\n", "PROGRESS: | 2810 | 20m 45s | 4.43764e+06 | 0 |\n", "PROGRESS: | 2820 | 20m 50s | 4.19352e+06 | 0 |\n", "PROGRESS: | 2830 | 20m 55s | 4.39982e+06 | 0 |\n", "PROGRESS: | 2840 | 21m 1s | 4.43111e+06 | 0 |\n", "PROGRESS: | 2850 | 21m 5s | 4.82279e+06 | 0 |\n", "PROGRESS: | 2860 | 21m 10s | 4.61505e+06 | 0 |\n", "PROGRESS: | 2870 | 21m 15s | 4.62852e+06 | 0 |\n", "PROGRESS: | 2880 | 21m 19s | 4.57006e+06 | 0 |\n", "PROGRESS: | 2890 | 21m 24s | 4.58023e+06 | 0 |\n", "PROGRESS: | 2900 | 21m 29s | 4.62587e+06 | 0 |\n", "PROGRESS: | 2910 | 21m 34s | 4.61136e+06 | 0 |\n", "PROGRESS: | 2920 | 21m 39s | 4.44602e+06 | 0 |\n", "PROGRESS: | 2930 | 21m 44s | 4.6914e+06 | 0 |\n", "PROGRESS: | 2940 | 21m 48s | 4.63902e+06 | 0 |\n", "PROGRESS: | 2950 | 21m 53s | 4.93341e+06 | 0 |\n", "PROGRESS: | 2960 | 21m 58s | 4.83688e+06 | 0 |\n", "PROGRESS: | 2970 | 22m 2s | 5.00597e+06 | 0 |\n", "PROGRESS: | 2980 | 22m 7s | 4.7511e+06 | 0 |\n", "PROGRESS: | 2990 | 22m 11s | 4.57458e+06 | 0 |\n", "PROGRESS: | 3000 | 22m 17s | 3.65077e+06 | 0 |\n", "PROGRESS: +-----------+---------------+----------------+-----------------+\n", "CPU times: user 432 ms, sys: 647 ms, total: 1.08 s\n", "Wall time: 22min 19s\n" ] } ], "source": [ "%%time\n", "topic_model = gl.topic_model.create(docs, num_topics=15, num_iterations=3000)" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "
\n", "" ], "text/plain": [ "PreparedData(topic_coordinates= Freq cluster topics x y\n", "topic \n", "6 8.909169 1 1 -0.083716 -0.068616\n", "5 8.839272 1 2 -0.177491 0.055107\n", "12 8.312294 1 3 0.100675 0.289403\n", "11 7.652615 1 4 0.062355 0.093847\n", "14 7.028000 1 5 -0.194651 0.010916\n", "2 6.802032 1 6 0.155845 -0.008784\n", "0 6.597788 1 7 -0.223110 0.016532\n", "1 6.575814 1 8 -0.075709 -0.096276\n", "10 6.142479 1 9 -0.054644 -0.055411\n", "13 5.901158 1 10 0.161956 -0.085644\n", "9 5.861304 1 11 -0.110989 -0.001673\n", "3 5.856388 1 12 0.090985 -0.048802\n", "7 5.767169 1 13 0.062811 -0.175730\n", "8 5.208863 1 14 0.100993 0.224601\n", "4 4.545656 1 15 0.184690 -0.149470, topic_info= Category Freq Term Total loglift logprob\n", "16925 Default 7169.000000 team 7169 30.0000 30.0000\n", "38580 Default 6485.000000 game 6485 29.0000 29.0000\n", "55943 Default 5783.000000 campaign 5783 28.0000 28.0000\n", "2121 Default 6311.000000 company 6311 27.0000 27.0000\n", "36784 Default 4654.000000 zzz_al_gore 4654 26.0000 26.0000\n", "63915 Default 8483.000000 percent 8483 25.0000 25.0000\n", "20429 Default 3476.000000 com 3476 24.0000 24.0000\n", "61323 Default 5401.000000 season 5401 23.0000 23.0000\n", "17214 Default 7285.000000 million 7285 22.0000 22.0000\n", "44852 Default 3437.000000 coach 3437 21.0000 21.0000\n", "... ... ... ... ... ... ...\n", "48104 Topic15 433.706899 hot 843 2.4329 -5.7727\n", "60975 Topic15 532.692741 water 1539 2.0324 -5.5712\n", "33913 Topic15 566.918221 large 1998 1.8133 -5.5294\n", "50600 Topic15 410.361383 pound 861 2.3581 -5.8264\n", "3051 Topic15 515.618588 makes 1954 1.7480 -5.6169\n", "34823 Topic15 367.441870 fresh 608 2.6032 -5.9292\n", "63734 Topic15 545.702544 book 3124 1.3341 -5.5616\n", "56934 Topic15 367.239628 french 873 2.2302 -5.9404\n", "9965 Topic15 412.950185 hour 3053 1.0721 -5.8466\n", "2023 Topic15 382.225792 small 2385 1.2419 -5.9237\n", "\n", "[1936 rows x 6 columns], token_table= Topic Freq Term\n", "term \n", "1163 2 0.997854 abortion\n", "48322 1 0.988142 acquisition\n", "48322 15 0.003953 acquisition\n", "27155 12 0.993464 acres\n", "17994 2 0.035743 action\n", "17994 5 0.057677 action\n", "17994 6 0.068237 action\n", "17994 7 0.569456 action\n", "17994 8 0.036556 action\n", "17994 9 0.127539 action\n", "... ... ... ...\n", "21449 3 0.129301 zzz_washington\n", "21449 5 0.210793 zzz_washington\n", "21449 7 0.308946 zzz_washington\n", "21449 13 0.150308 zzz_washington\n", "21449 15 0.015936 zzz_washington\n", "43666 5 0.993976 zzz_will_putin\n", "25684 14 0.990566 zzz_world_cup\n", "25740 14 0.991071 zzz_world_series\n", "49940 4 0.977778 zzz_young_children\n", "49282 11 0.992126 zzz_zeljko_raznatovic\n", "\n", "[3003 rows x 3 columns], R=30, lambda_step=0.01, plot_opts={'xlab': 'PC1', 'ylab': 'PC2'}, topic_order=[7, 6, 13, 12, 15, 3, 1, 2, 11, 14, 10, 4, 8, 9, 5])" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pyLDAvis.graphlab.prepare(topic_model, docs)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.8" } }, "nbformat": 4, "nbformat_minor": 0 }