{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Importing Libraries and Loading Data " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import math\n", "import operator\n", "\n", "#Building Custom Data for Movie Rating\n", "review = {\n", "'Marlon Brando': {\n", "'The Godfather': 5.00, \n", "'The Godfather Part II': 4.29,\n", "'Apocalypse Now': 5.00, \n", "'Jaws': 1.\n", "},\n", "'Stephen King': {\n", "'The Shawshank Redemption': 4.89, \n", "'The Shining': 4.93 , \n", "'The Green Mile': 4.87,\n", "'The Godfather': 1.33,\n", "},\n", "'Steven Spielberg': {\n", "'Raiders of the Lost Ark': 5.0, \n", "'Jaws': 4.89,\n", "'Saving Private Ryan': 4.78, \n", "'Star Wars Episode IV - A New Hope': 4.33,\n", "'Close Encounters of the Third Kind': 4.77,\n", "'The Godfather': 1.25,\n", "'The Godfather Part II': 1.72\n", "},\n", "'George Lucas':{\n", "'Star Wars Episode IV - A New Hope': 5.00\t\n", "},\n", "'Al Pacino': {\n", "'The Godfather': 4.02, \n", "'The Godfather Part II': 5.00,\n", "},\n", "'Robert DeNiro': {\n", "'The Godfather': 3.07, \n", "'The Godfather Part II': 4.29, \n", "'Raging Bull': 5.00, \n", "'Goodfellas': 4.89\n", "},\n", "'Robert Duvall': {\n", "'The Godfather': 3.80, \n", "'The Godfather Part II': 3.61,\n", "'Apocalypse Now': 4.26 \n", "},\n", "'Jack Nicholson': {\n", "'The Shining': 5.0,\n", "'One Flew Over The Cuckoos Nest': 5.0,\n", "'The Godfather': 2.22,\n", "'The Godfather Part II': 3.34\n", "},\n", "'Morgan Freeman': {\n", "'The Shawshank Redemption': 4.98,\n", "'The Shining': 4.42,\n", "'Apocalypse Now': 1.63,\n", "'The Godfather': 1.12,\n", "'The Godfather Part II': 2.16\n", "},\n", "'Harrison Ford': {\n", "'Raiders of the Lost Ark': 5.0, \n", "'Star Wars Episode IV - A New Hope': 4.84,\n", "},\n", "'Tom Hanks': {\n", "'Saving Private Ryan': 3.78, \n", "'The Green Mile': 4.96,\n", "'The Godfather': 1.04,\n", "'The Godfather Part II': 1.03\n", "},\n", "'Francis Ford Coppola': {\n", "'The Godfather': 5.00, \n", "'The Godfather Part II': 5.0, \n", "'Jaws': 1.24,\n", "'One Flew Over The Cuckoos Nest': 2.02\n", "},\n", "'Martin Scorsese': {\n", "'Raging Bull': 5.0, \n", "'Goodfellas': 4.87,\n", "'Close Encounters of the Third Kind': 1.14,\n", "'The Godfather': 4.00\n", "},\n", "'Diane Keaton': {\n", "'The Godfather': 2.98,\n", "'The Godfather Part II': 3.93,\n", "'Close Encounters of the Third Kind': 1.37\n", "},\n", "'Richard Dreyfuss': {\n", "'Jaws': 5.0, \n", "'Close Encounters of the Third Kind': 5.0,\n", "'The Godfather': 1.07,\n", "'The Godfather Part II': 0.63\n", "},\n", "'Joe Pesci': {\n", "'Raging Bull': 4.89, \n", "'Goodfellas': 5.0,\n", "'The Godfather': 4.87,\n", "'Star Wars Episode IV - A New Hope': 1.32\n", "}\n", "}" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Function to get common movies b/w Users\n", "def get_common_movies(criticA,criticB):\n", " return [movie for movie in review[criticA] if movie in review[criticB]]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "['The Godfather', 'The Godfather Part II']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_common_movies('Marlon Brando','Robert DeNiro')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "['Saving Private Ryan', 'The Godfather', 'The Godfather Part II']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_common_movies('Steven Spielberg','Tom Hanks')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Raging Bull', 'Goodfellas', 'The Godfather']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_common_movies('Martin Scorsese','Joe Pesci')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Function to get reviews from the common movies\n", "def get_reviews(criticA,criticB):\n", " common_movies = get_common_movies(criticA,criticB)\n", " return [(review[criticA][movie], review[criticB][movie]) for movie in common_movies]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(5.0, 3.07), (4.29, 4.29)]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_reviews('Marlon Brando','Robert DeNiro')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(4.78, 3.78), (1.25, 1.04), (1.72, 1.03)]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_reviews('Steven Spielberg','Tom Hanks')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(5.0, 4.89), (4.87, 5.0), (4.0, 4.87)]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_reviews('Martin Scorsese','Joe Pesci')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Euclidean Distance Formula for Calculating similarity\n", "$d(x,y)=\\sqrt{(x1-y1)^2 + (x2-y1)^2 + (xn-yn)^2}$" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Function to get Euclidean Distance b/w 2 points \n", "def euclidean_distance(points):\n", " squared_diffs = [(point[0] - point[1]) ** 2 for point in points]\n", " summed_squared_diffs = sum(squared_diffs)\n", " distance = math.sqrt(summed_squared_diffs)\n", " return distance" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Function to calculate similarity more similar less the distance and vice versa\n", "# Added 1 for if highly similar can make the distance zero and give NotDefined Error\n", "def similarity(reviews):\n", " return 1/ (1 + euclidean_distance(reviews))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Function to get similarity b/w 2 users\n", "def get_critic_similarity(criticA, criticB):\n", " reviews = get_reviews(criticA,criticB)\n", " return similarity(reviews)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0.341296928327645" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_critic_similarity('Marlon Brando','Robert DeNiro')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0.4478352722730117" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_critic_similarity('Steven Spielberg','Tom Hanks')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5300793497254199" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_critic_similarity('Martin Scorsese','Joe Pesci')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Function to give recommendation to users based on their reviews.\n", "def recommend_movies(critic, num_suggestions):\n", " similarity_scores = [(get_critic_similarity(critic, other), other) for other in review if other != critic]\n", " # Get similarity Scores for all the critics\n", " similarity_scores.sort() \n", " similarity_scores.reverse()\n", " similarity_scores = similarity_scores[0:num_suggestions]\n", "\n", " recommendations = {}\n", " # Dictionary to store recommendations\n", " for similarity, other in similarity_scores:\n", " reviewed = review[other]\n", " # Storing the review\n", " for movie in reviewed:\n", " if movie not in review[critic]:\n", " weight = similarity * reviewed[movie]\n", " # Weighing similarity with review\n", " if movie in recommendations:\n", " sim, weights = recommendations[movie]\n", " recommendations[movie] = (sim + similarity, weights + [weight])\n", " # Similarity of movie along with weight\n", " else:\n", " recommendations[movie] = (similarity, [weight])\n", " \n", "\n", " for recommendation in recommendations:\n", " similarity, movie = recommendations[recommendation]\n", " recommendations[recommendation] = sum(movie) / similarity\n", " # Normalizing weights with similarity\n", "\n", " sorted_recommendations = sorted(recommendations.items(), key=operator.itemgetter(1), reverse=True)\n", " #Sorting recommendations with weight\n", " return sorted_recommendations" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "[('Goodfellas', 5.000000000000001),\n", " ('Raiders of the Lost Ark', 5.0),\n", " ('Raging Bull', 4.89),\n", " ('Star Wars Episode IV - A New Hope', 3.8157055214723923),\n", " ('One Flew Over The Cuckoos Nest', 2.02)]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "recommend_movies('Marlon Brando',4)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('Raiders of the Lost Ark', 5.0),\n", " ('Star Wars Episode IV - A New Hope', 4.92),\n", " ('Close Encounters of the Third Kind', 1.2744773851327365)]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "recommend_movies('Robert DeNiro',4)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "[('The Shawshank Redemption', 4.928285762244913),\n", " ('The Green Mile', 4.87),\n", " ('The Shining', 4.71304734727882),\n", " ('Apocalypse Now', 1.63)]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "recommend_movies('Steven Spielberg',4)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('Raiders of the Lost Ark', 5.0),\n", " ('Jaws', 5.0),\n", " ('Close Encounters of the Third Kind', 5.0),\n", " ('The Shining', 4.93),\n", " ('Star Wars Episode IV - A New Hope', 4.92),\n", " ('The Shawshank Redemption', 4.89)]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "recommend_movies('Tom Hanks',4)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('Raiders of the Lost Ark', 5.0),\n", " ('Star Wars Episode IV - A New Hope', 4.92),\n", " ('The Godfather Part II', 4.3613513513513515),\n", " ('Apocalypse Now', 4.26)]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "recommend_movies('Martin Scorsese',4)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('Apocalypse Now', 5.000000000000001),\n", " ('The Godfather Part II', 4.7280538302277435),\n", " ('One Flew Over The Cuckoos Nest', 2.02),\n", " ('Close Encounters of the Third Kind', 1.14),\n", " ('Jaws', 1.12)]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "recommend_movies('Joe Pesci',4)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }