{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "docword_file = '/home/romovpa/notebooks/bigartm-book/applications/uci_bow/data/docword.nips.txt'\n",
    "dict_file = '/home/romovpa/notebooks/bigartm-book/applications/uci_bow/data/vocab.nips.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./bigartm --corpus-format bow --read-corpus /home/romovpa/notebooks/bigartm-book/applications/uci_bow/data/docword.nips.txt --use-dictionary-bow /home/romovpa/notebooks/bigartm-book/applications/uci_bow/data/vocab.nips.txt --topics 10 --update-every 1 --passes 10 --kappa 0.5 --tau0 64\n",
      "libartm.so: cannot open shared object file: No such file or directory, fall back to ARTM_SHARED_LIBRARY environment variable\n",
      "Create temporary batch folder: /tmp/tmpMuNvzM\n",
      "Parse collection: /home/romovpa/notebooks/bigartm-book/applications/uci_bow/data/docword.nips.txt -> /tmp/tmpMuNvzM, batch_size=1000\n",
      "Initialize model\n",
      "W0612 18:59:26.353670 12134 merger.cc:300] SynchronizeModel() did not found any increments to topic model urn:uuid:ffb0c1f2-111b-11e5-96b3-fa163e8d9532\n",
      "processed 1000 items, perplexity = 12185.621588\n",
      "processed 2500 items, perplexity = 1248.822991\n",
      "processed 4000 items, perplexity = 1617.799503\n",
      "processed 5500 items, perplexity = 1802.688212\n",
      "processed 7000 items, perplexity = 1907.541042\n",
      "processed 8500 items, perplexity = 1971.545763\n",
      "processed 10000 items, perplexity = 2010.787087\n",
      "processed 11500 items, perplexity = 2034.872774\n",
      "processed 13000 items, perplexity = 2049.129378\n",
      "processed 14500 items, perplexity = 2056.932210\n",
      "processed 15000 items, perplexity = 2057.239507\n",
      "Top tokens per topic:\n",
      "Topic#1: learning (0.022) network (0.014) input (0.010) set (0.010) neural (0.009) weight (0.008) algorithm (0.008) model (0.007) task (0.007) function (0.006) \n",
      "Topic#2: network (0.015) weight (0.012) system (0.011) neural (0.009) neuron (0.009) output (0.009) learning (0.008) current (0.007) circuit (0.006) input (0.006) \n",
      "Topic#3: network (0.015) unit (0.010) output (0.008) point (0.008) problem (0.008) function (0.007) images (0.007) image (0.006) learning (0.006) layer (0.006) \n",
      "Topic#4: model (0.018) neuron (0.015) network (0.013) input (0.010) cell (0.010) system (0.008) unit (0.006) neural (0.006) pattern (0.006) synaptic (0.006) \n",
      "Topic#5: network (0.018) data (0.011) system (0.009) model (0.008) algorithm (0.007) word (0.006) parameter (0.006) component (0.005) linear (0.005) term (0.005) \n",
      "Topic#6: data (0.009) neural (0.009) number (0.006) set (0.006) model (0.006) motion (0.006) result (0.005) pattern (0.005) system (0.005) rate (0.005) \n",
      "Topic#7: function (0.021) algorithm (0.016) network (0.014) training (0.010) error (0.010) learning (0.009) neural (0.009) weight (0.008) number (0.008) result (0.007) \n",
      "Topic#8: network (0.033) input (0.026) set (0.013) unit (0.012) function (0.012) output (0.010) learning (0.009) system (0.009) training (0.008) algorithm (0.007) \n",
      "Topic#9: model (0.022) learning (0.016) set (0.011) data (0.010) problem (0.009) method (0.008) function (0.007) error (0.006) distribution (0.006) step (0.006) \n",
      "Topic#10: model (0.011) training (0.010) neural (0.010) network (0.010) input (0.009) set (0.009) error (0.008) output (0.008) function (0.007) learning (0.007) \n"
     ]
    }
   ],
   "source": [
    "!./bigartm --corpus-format bow --read-corpus {docword_file} --use-dictionary-bow {dict_file} --topics 10 --update-every 1 --passes 10 --kappa 0.5 --tau0 64"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "docword_file = '/home/romovpa/notebooks/bigartm-book/applications/uci_bow/data/docword.enron.txt'\n",
    "dict_file = '/home/romovpa/notebooks/bigartm-book/applications/uci_bow/data/vocab.enron.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./bigartm --corpus-format bow --read-corpus /home/romovpa/notebooks/bigartm-book/applications/uci_bow/data/docword.enron.txt --use-dictionary-bow /home/romovpa/notebooks/bigartm-book/applications/uci_bow/data/vocab.enron.txt --batch-size 30000 --topics 10 --update-every 1 --passes 10 --kappa 0.5 --tau0 64\n",
      "libartm.so: cannot open shared object file: No such file or directory, fall back to ARTM_SHARED_LIBRARY environment variable\n",
      "Create temporary batch folder: /tmp/tmpAFXQJ6\n",
      "Parse collection: /home/romovpa/notebooks/bigartm-book/applications/uci_bow/data/docword.enron.txt -> /tmp/tmpAFXQJ6, batch_size=30000\n",
      "Initialize model\n",
      "processed 39861 items, perplexity = 27412.710768\n",
      "processed 79722 items, perplexity = 11517.766680\n",
      "processed 119583 items, perplexity = 8602.245303\n",
      "processed 159444 items, perplexity = 7386.092703\n",
      "processed 199305 items, perplexity = 6694.426605\n",
      "processed 239166 items, perplexity = 6237.597039\n",
      "processed 279027 items, perplexity = 5904.315310\n",
      "processed 318888 items, perplexity = 5645.505335\n",
      "processed 358749 items, perplexity = 5435.339423\n",
      "processed 398610 items, perplexity = 5259.309476\n",
      "processed 398610 items, perplexity = 5259.309476\n",
      "Top tokens per topic:\n",
      "Topic#1: meeting (0.006) california (0.006) office (0.006) group (0.004) team (0.004) energy (0.004) deal (0.004) issues (0.004) point (0.003) market (0.003) \n",
      "Topic#2: power (0.013) california (0.011) energy (0.009) electricity (0.006) utility (0.006) contract (0.005) prices (0.005) corp (0.005) states (0.005) plan (0.005) \n",
      "Topic#3: energy (0.010) customer (0.009) market (0.008) order (0.006) access (0.005) ferc (0.004) message (0.004) page (0.004) program (0.004) service (0.004) \n",
      "Topic#4: meeting (0.006) free (0.004) energy (0.004) help (0.004) game (0.004) number (0.004) against (0.004) updated (0.003) going (0.003) look (0.003) \n",
      "Topic#5: power (0.008) california (0.007) cost (0.007) attached (0.006) meeting (0.005) comment (0.005) davis (0.005) bill (0.005) rates (0.004) electricity (0.004) \n",
      "Topic#6: company (0.010) energy (0.007) business (0.007) gas (0.006) market (0.006) stock (0.005) companies (0.005) houston (0.004) power (0.004) investment (0.004) \n",
      "Topic#7: company (0.010) market (0.007) price (0.006) power (0.005) california (0.005) database (0.005) gas (0.004) customer (0.004) contract (0.004) operation (0.004) \n",
      "Topic#8: company (0.016) firm (0.007) services (0.005) power (0.005) business (0.005) fund (0.005) financial (0.005) technology (0.004) investor (0.004) agreement (0.004) \n",
      "Topic#9: texas (0.007) team (0.007) top (0.004) plan (0.004) list (0.004) play (0.003) longhorn (0.003) energy (0.003) power (0.003) cost (0.003) \n",
      "Topic#10: final (0.007) company (0.007) market (0.005) report (0.005) hour (0.005) offer (0.005) price (0.005) investment (0.005) deal (0.005) companies (0.005) \n"
     ]
    }
   ],
   "source": [
    "!./bigartm --corpus-format bow --read-corpus {docword_file} --use-dictionary-bow {dict_file} --batch-size 30000 --topics 10 --update-every 1  --passes 10 --kappa 0.5 --tau0 64"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}