{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Chapter 1: Introduction\n",
    "\n",
    "Let's build a vector for input text, e.g., from `doc1`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[3, 5]\n"
     ]
    }
   ],
   "source": [
    "doc1 = \"meeting ... management ... meeting ... management ... meeting \"\n",
    "doc1 += \"... management ... meeting ... meeting\"\n",
    "\n",
    "vector = [0, 0]\n",
    "\n",
    "for word in doc1.split(\" \"):\n",
    "    if word==\"management\":\n",
    "        vector[0] = vector[0] + 1\n",
    "    if word==\"meeting\":\n",
    "        vector[1] = vector[1] + 1\n",
    "        \n",
    "print (vector)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here is how you can calculate *Euclidean distance* between a document and a query:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4.47213595499958\n"
     ]
    }
   ],
   "source": [
    "import math\n",
    "\n",
    "query = [1, 1]\n",
    "doc1 = [3, 5]\n",
    "sq_length = 0\n",
    "\n",
    "for index in range(0, len(query)):\n",
    "    sq_length += math.pow((doc1[index] - query[index]), 2)\n",
    "            \n",
    "print (math.sqrt(sq_length))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally, let's estimate *cosine similarity*:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9701425001453319\n"
     ]
    }
   ],
   "source": [
    "import math\n",
    "\n",
    "query = [1, 1]\n",
    "doc1 = [3, 5]\n",
    "\n",
    "def length(vector):\n",
    "    sq_length = 0\n",
    "    for index in range(0, len(vector)):\n",
    "        sq_length += math.pow(vector[index], 2)\n",
    "    return math.sqrt(sq_length)\n",
    "    \n",
    "def dot_product(vector1, vector2):\n",
    "    if len(vector1)==len(vector2):\n",
    "        dot_prod = 0\n",
    "        for index in range(0, len(vector1)):\n",
    "            dot_prod += vector1[index]*vector2[index]\n",
    "        return dot_prod\n",
    "    else:\n",
    "        return \"Unmatching dimensionality\"\n",
    "\n",
    "cosine=dot_product(query, doc1)/(length(query)*length(doc1))\n",
    "print (cosine)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}