{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Importing Libraries and Loading Data "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Reading movies file\n",
    "movies = pd.read_csv('movies.csv', sep=',', encoding='latin-1', usecols=['title', 'genres'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Toy Story</td>\n",
       "      <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Jumanji</td>\n",
       "      <td>Adventure|Children|Fantasy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Grumpier Old Men</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Waiting to Exhale</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Father of the Bride Part II</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          title                                       genres\n",
       "0                    Toy Story   Adventure|Animation|Children|Comedy|Fantasy\n",
       "1                      Jumanji                    Adventure|Children|Fantasy\n",
       "2             Grumpier Old Men                                Comedy|Romance\n",
       "3            Waiting to Exhale                          Comedy|Drama|Romance\n",
       "4  Father of the Bride Part II                                        Comedy"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Break up the big genre string into a string array\n",
    "movies['genres'] = movies['genres'].str.split('|')\n",
    "# Convert genres to string value\n",
    "movies['genres'] = movies['genres'].fillna(\"\").astype('str')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Recommendation based on Genre"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(9742, 177)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')\n",
    "tfidf_matrix = tf.fit_transform(movies['genres'])\n",
    "tfidf_matrix.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1.        , 0.31379419, 0.0611029 , 0.05271111],\n",
       "       [0.31379419, 1.        , 0.        , 0.        ],\n",
       "       [0.0611029 , 0.        , 1.        , 0.35172407],\n",
       "       [0.05271111, 0.        , 0.35172407, 1.        ]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)\n",
    "cosine_sim[:4, :4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Build a 1-dimensional array with movie titles\n",
    "titles = movies['title']\n",
    "indices = pd.Series(movies.index, index=movies['title'])\n",
    "\n",
    "# Function that get movie recommendations based on the cosine similarity score of movie genres\n",
    "def genre_recommendations(title):\n",
    "    idx = indices[title]\n",
    "    sim_scores = list(enumerate(cosine_sim[idx]))\n",
    "    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)\n",
    "    sim_scores = sim_scores[1:21]\n",
    "    movie_indices = [i[0] for i in sim_scores]\n",
    "    return titles.iloc[movie_indices]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8387                          Need for Speed \n",
       "8149      Grandmaster, The (Yi dai zong shi) \n",
       "123                                Apollo 13 \n",
       "8026                              Life of Pi \n",
       "8396                                    Noah \n",
       "38                           Dead Presidents \n",
       "341                              Bad Company \n",
       "347             Faster Pussycat! Kill! Kill! \n",
       "430                        Menace II Society \n",
       "568                          Substitute, The \n",
       "665                          Nothing to Lose \n",
       "1645                       Untouchables, The \n",
       "1696                           Monument Ave. \n",
       "2563                              Death Wish \n",
       "2574                        Band of the Hand \n",
       "3037                              Foxy Brown \n",
       "3124    Harley Davidson and the Marlboro Man \n",
       "3167                                Scarface \n",
       "3217                               Swordfish \n",
       "3301                           Above the Law \n",
       "Name: title, dtype: object"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "genre_recommendations('Dark Knight ').head(20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Recommendation based on Title"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(9742, 20558)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')\n",
    "tfidf_matrix = tf.fit_transform(movies['title'])\n",
    "tfidf_matrix.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1., 0., 0., 0.],\n",
       "       [0., 1., 0., 0.],\n",
       "       [0., 0., 1., 0.],\n",
       "       [0., 0., 0., 1.]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)\n",
    "cosine_sim[:4, :4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Build a 1-dimensional array with movie titles\n",
    "titles = movies['title']\n",
    "indices = pd.Series(movies.index, index=movies['title'])\n",
    "\n",
    "# Function that get movie recommendations based on the cosine similarity score of movie genres\n",
    "def title_recommendations(title):\n",
    "    idx = indices[title]\n",
    "    sim_scores = list(enumerate(cosine_sim[idx]))\n",
    "    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)\n",
    "    sim_scores = sim_scores[1:21]\n",
    "    movie_indices = [i[0] for i in sim_scores]\n",
    "    return titles.iloc[movie_indices]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7768                     Dark Knight Rises, The \n",
       "8032    Batman: The Dark Knight Returns, Part 1 \n",
       "8080    Batman: The Dark Knight Returns, Part 2 \n",
       "140                                First Knight \n",
       "2417                         Cry in the Dark, A \n",
       "5778                          Alone in the Dark \n",
       "7375                             Knight and Day \n",
       "3576                               Black Knight \n",
       "3190                           Knight's Tale, A \n",
       "6858                       Alone in the Dark II \n",
       "4242                                  Dark Blue \n",
       "5060                                  Dark Days \n",
       "1305                                  Dark City \n",
       "5483                                  Dark Star \n",
       "6815                      Batman: Gotham Knight \n",
       "5934                                 Dark Water \n",
       "4749                        Shot in the Dark, A \n",
       "7877                               Dark Shadows \n",
       "8766                            The Dark Valley \n",
       "6690                      Taxi to the Dark Side \n",
       "Name: title, dtype: object"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "title_recommendations('Dark Knight ').head(20)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}