{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "FN = 'tf-projector'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using TensorFlow's TenosrBoard to project numpy embedding matrix to 3D/2D"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "https://www.tensorflow.org/versions/master/how_tos/embedding_viz/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import os\n",
    "LOG_DIR = os.path.join('data', FN)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!mkdir -p {LOG_DIR}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10000, 100, dtype('float32'))"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "my_embedding = np.random.random((10000,100)).astype(np.float32)\n",
    "vocabulary_size, embedding_size = my_embedding.shape\n",
    "vocabulary_size, embedding_size, my_embedding.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# You dont need to normalize because tensorboard will do it for you\n",
    "# embedding /= np.sqrt((embedding*embedding).sum(axis=-1,keepdims=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "graph = tf.Graph()\n",
    "with graph.as_default():\n",
    "    embedding_var = tf.Variable(\n",
    "        tf.constant(my_embedding))\n",
    "    init = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "with tf.Session(graph=graph) as session:\n",
    "    init.run()\n",
    "    saver = tf.train.Saver()\n",
    "    saver.save(session, LOG_DIR+\"/model.ckpt\", 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from tensorflow.contrib.tensorboard.plugins import projector\n",
    "# Use the same LOG_DIR where you stored your checkpoint.\n",
    "summary_writer = tf.summary.FileWriter(LOG_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Format: tensorflow/contrib/tensorboard/plugins/projector/projector_config.proto\n",
    "config = projector.ProjectorConfig()\n",
    "\n",
    "# You can add multiple embeddings. Here we add only one.\n",
    "embedding = config.embeddings.add()\n",
    "embedding.tensor_name = embedding_var.name\n",
    "# Link this tensor to its metadata file (e.g. labels).\n",
    "embedding.metadata_path = os.path.join(LOG_DIR, 'metadata.tsv')\n",
    "\n",
    "# Saves a configuration file that TensorBoard will read during startup.\n",
    "projector.visualize_embeddings(summary_writer, config)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "documentation say you dont need a header line for a single column file, but code say otherwise"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from __future__ import print_function\n",
    "with open(embedding.metadata_path,'w') as fp:\n",
    "    print('Name', file=fp)\n",
    "    for i in xrange(vocabulary_size):\n",
    "        print('label%d'%i, file=fp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Name\r\n",
      "label0\r\n",
      "label1\r\n",
      "label2\r\n",
      "label3\r\n",
      "label4\r\n",
      "label5\r\n",
      "label6\r\n",
      "label7\r\n",
      "label8\r\n"
     ]
    }
   ],
   "source": [
    "!head {embedding.metadata_path}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   10001 data/tf-projector/metadata.tsv\r\n"
     ]
    }
   ],
   "source": [
    "!wc -l {embedding.metadata_path}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "checkpoint                       model.ckpt-0.index\r\n",
      "metadata.tsv                     model.ckpt-0.meta\r\n",
      "model.ckpt-0.data-00000-of-00001 projector_config.pbtxt\r\n"
     ]
    }
   ],
   "source": [
    "!ls data/{FN}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!tensorboard --logdir=data/{FN}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "click on [this link](http://localhost:6006) and click on `EMBEDDING` on the top right"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}