{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "FN = 'tf-projector'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Using TensorFlow's TenosrBoard to project numpy embedding matrix to 3D/2D" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "https://www.tensorflow.org/versions/master/how_tos/embedding_viz/" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import os\n", "LOG_DIR = os.path.join('data', FN)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!mkdir -p {LOG_DIR}" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(10000, 100, dtype('float32'))" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "my_embedding = np.random.random((10000,100)).astype(np.float32)\n", "vocabulary_size, embedding_size = my_embedding.shape\n", "vocabulary_size, embedding_size, my_embedding.dtype" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# You dont need to normalize because tensorboard will do it for you\n", "# embedding /= np.sqrt((embedding*embedding).sum(axis=-1,keepdims=1))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import tensorflow as tf\n", "graph = tf.Graph()\n", "with graph.as_default():\n", " embedding_var = tf.Variable(\n", " tf.constant(my_embedding))\n", " init = tf.global_variables_initializer()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with tf.Session(graph=graph) as session:\n", " init.run()\n", " saver = tf.train.Saver()\n", " saver.save(session, LOG_DIR+\"/model.ckpt\", 0)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from tensorflow.contrib.tensorboard.plugins import projector\n", "# Use the same LOG_DIR where you stored your checkpoint.\n", "summary_writer = tf.summary.FileWriter(LOG_DIR)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Format: tensorflow/contrib/tensorboard/plugins/projector/projector_config.proto\n", "config = projector.ProjectorConfig()\n", "\n", "# You can add multiple embeddings. Here we add only one.\n", "embedding = config.embeddings.add()\n", "embedding.tensor_name = embedding_var.name\n", "# Link this tensor to its metadata file (e.g. labels).\n", "embedding.metadata_path = os.path.join(LOG_DIR, 'metadata.tsv')\n", "\n", "# Saves a configuration file that TensorBoard will read during startup.\n", "projector.visualize_embeddings(summary_writer, config)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "documentation say you dont need a header line for a single column file, but code say otherwise" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import print_function\n", "with open(embedding.metadata_path,'w') as fp:\n", " print('Name', file=fp)\n", " for i in xrange(vocabulary_size):\n", " print('label%d'%i, file=fp)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Name\r\n", "label0\r\n", "label1\r\n", "label2\r\n", "label3\r\n", "label4\r\n", "label5\r\n", "label6\r\n", "label7\r\n", "label8\r\n" ] } ], "source": [ "!head {embedding.metadata_path}" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 10001 data/tf-projector/metadata.tsv\r\n" ] } ], "source": [ "!wc -l {embedding.metadata_path}" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "checkpoint model.ckpt-0.index\r\n", "metadata.tsv model.ckpt-0.meta\r\n", "model.ckpt-0.data-00000-of-00001 projector_config.pbtxt\r\n" ] } ], "source": [ "!ls data/{FN}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!tensorboard --logdir=data/{FN}" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "click on [this link](http://localhost:6006) and click on `EMBEDDING` on the top right" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 1 }