{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "from gensim.models import KeyedVectors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will load only first 1000 (top 1000) vectors from python fasttext (128) model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = KeyedVectors.load_word2vec_format(\"../model.vec\", binary=False, limit=1000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In case you are loading GloVe embeddings, you need to convert it first"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tmpfile=get_tmpfile(\"source2vec\")\n",
    "glove2word2vec(datapath(\"../glove_model.txt\"), tmpfile)\n",
    "model = KeyedVectors.load_word2vec_format(tmpfile, binary=False, limit=1000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we can do fancy staff"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('in', 0.8999497890472412),\n",
       " ('enumerate', 0.7851556539535522),\n",
       " ('and', 0.6600955724716187),\n",
       " ('set', 0.6515539884567261),\n",
       " ('range', 0.6467443704605103),\n",
       " ('the', 0.6438981294631958),\n",
       " ('are', 0.6360925436019897),\n",
       " ('len', 0.6327202916145325),\n",
       " ('or', 0.625091552734375),\n",
       " ('list', 0.6236062049865723)]"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.most_similar(\"for\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('in', 0.949974000453949),\n",
       " ('enumerate', 0.8925769925117493),\n",
       " ('and', 0.8300470113754272),\n",
       " ('set', 0.825776219367981),\n",
       " ('range', 0.823371410369873),\n",
       " ('the', 0.8219482898712158),\n",
       " ('are', 0.8180454969406128),\n",
       " ('len', 0.8163593411445618),\n",
       " ('or', 0.8125450015068054),\n",
       " ('list', 0.811802327632904)]"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.most_similar_cosmul(\"for\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'a'"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.doesnt_match([\"for\", \"i\", \"a\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.6013880741598091"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.similarity(\"for\", \"i\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('in', 0.8999497890472412),\n",
       " ('enumerate', 0.7851556539535522),\n",
       " ('and', 0.6600955724716187),\n",
       " ('set', 0.6515539884567261),\n",
       " ('range', 0.6467443704605103),\n",
       " ('the', 0.6438981294631958),\n",
       " ('are', 0.6360925436019897),\n",
       " ('len', 0.6327202916145325),\n",
       " ('or', 0.625091552734375),\n",
       " ('list', 0.6236062049865723)]"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.similar_by_word(\"for\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Raw vector values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "</s> : [-0.0040119 -0.2772     0.39069   -0.11142   -0.064213   0.031526\n",
      "  0.072355   0.28111    0.098242   0.44947    0.0033427  0.059818\n",
      "  0.10572    0.033005  -0.16825    0.027287  -0.014041  -0.13167\n",
      "  0.20144    0.097083   0.13253    0.09556   -0.12805    0.10373\n",
      " -0.12057    0.36752   -0.13177   -0.070997  -0.079466   0.29838\n",
      " -0.066887  -0.069284  -0.26501    0.21408    0.020991  -0.34294\n",
      " -0.3189    -0.1705     0.11337   -0.22872   -0.024095   0.069369\n",
      " -0.31733    0.63158    0.084219  -0.23931   -0.17847   -0.38957\n",
      " -0.038808  -0.046805  -0.20444   -0.15775   -0.12279   -0.014646\n",
      " -0.10996   -0.060379  -0.16898   -0.0048211 -0.57151    0.18944\n",
      "  0.11457   -0.2425    -0.08871   -0.054677  -0.2549    -0.15642\n",
      "  0.12891   -0.27773    0.10004   -0.46064    0.25698    0.039099\n",
      "  0.24376   -0.14525   -0.27021    0.018427   0.046646  -0.090066\n",
      "  0.1492     0.0032186 -0.15175   -0.11093    0.35132   -0.068802\n",
      "  0.0021299  0.29755   -0.19092    0.0321    -0.086515   0.36746\n",
      " -0.15456   -0.051887   0.63347    0.02882    0.3993    -0.20558\n",
      "  0.08532    0.10247   -0.056457  -0.12951   -0.28994    0.15222\n",
      "  0.16311   -0.22158    0.032566  -0.38924   -0.20935   -0.12184\n",
      "  0.064111  -0.11226    0.10365    0.065956  -0.064537   0.072354\n",
      " -0.029869   0.016191   0.22993    0.03368   -0.074305   0.080369\n",
      "  0.062322   0.14384    0.059349   0.25721   -0.016504   0.034721\n",
      " -0.35689   -0.20129  ]\n"
     ]
    }
   ],
   "source": [
    "for word in model.vocab:\n",
    "    print(word, \":\", model[word])\n",
    "    break # only first one now"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}