{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "View assignment [here](http://www.cs.ubc.ca/~nando/540-2013/lectures/homework2.pdf)" ] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Exercise 1" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from __future__ import division\n", "import numpy as np\n", "import pdb\n", "\n", "# MOVIES: Legally Blond; Matrix; Bourne Identity; You\u2019ve Got Mail;\n", "# The Devil Wears Prada; The Dark Knight; The Lord of the Rings.\n", "movie_titles = ['Legally Blond', 'Matrix', 'Bourne Identity',\n", " 'You\u2019ve Got Mail', 'The Devil Wears Prada',\n", " 'The Dark Knight', 'The Lord of the Rings']\n", "P = [[ 0, 0,-1, 0,-1, 1, 1], # User 1\n", " [-1, 1, 1,-1, 0, 1, 1], # User 2\n", " [ 0, 1, 1, 0, 0,-1, 1], # User 3\n", " [-1, 1, 1, 0, 0, 1, 1], # User 4\n", " [ 0, 1, 1, 0, 0, 1, 1], # User 5\n", " [ 1,-1, 1, 1, 1,-1, 0], # User 6\n", " [-1, 1,-1, 0,-1, 0, 1], # User 7\n", " [ 0,-1, 0, 1, 1,-1,-1], # User 8\n", " [ 0, 0,-1, 1, 1, 0,-1]] # User 9\n", "P = np.array(P)\n", "C = np.abs(P) # Will be 0 only when P[i,j] == 0.\n", "print('Raw Preference Matrix:')\n", "print(P)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Raw Preference Matrix:\n", "[[ 0 0 -1 0 -1 1 1]\n", " [-1 1 1 -1 0 1 1]\n", " [ 0 1 1 0 0 -1 1]\n", " [-1 1 1 0 0 1 1]\n", " [ 0 1 1 0 0 1 1]\n", " [ 1 -1 1 1 1 -1 0]\n", " [-1 1 -1 0 -1 0 1]\n", " [ 0 -1 0 1 1 -1 -1]\n", " [ 0 0 -1 1 1 0 -1]]\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "# Parameters\n", "reg = 0.1 # regularization parameter\n", "f = 2 # number of factors\n", "m,n = P.shape" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "#Random Initialization\n", "# X is (m x f)\n", "# Y is (f x n)\n", "X = 1 - 2*np.random.rand(m,f)\n", "Y = 1 - 2*np.random.rand(f,n)\n", "X *= 0.1\n", "Y *= 0.1" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "# Alternating Ridge Regression\n", "for _ in xrange(100):\n", " # Least-squares keeping Y fixed\n", " X = np.linalg.solve(\n", " np.dot(Y, Y.T) + reg * np.eye(f),\n", " np.dot(Y, P.T)\n", " ).T\n", " # Least-squares keeping X fixed\n", " Y = np.linalg.solve(\n", " np.dot(X.T, X) + reg * np.eye(f),\n", " np.dot(X.T, P)\n", " )\n", "print('Alternating Ridge Regression:')\n", "print(np.dot(X,Y))\n", "print('Error for movies that users actually rated: %.2f'%np.sum((C*(P - np.dot(X,Y)))**2))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Alternating Ridge Regression:\n", "[[-0.47047868 0.42881288 -0.92322674 -0.40431625 -0.82818449 0.71377038\n", " 0.35042258]\n", " [-0.57771631 1.11906024 0.94589649 -0.6138595 -0.41293394 0.72761475\n", " 1.24992152]\n", " [-0.08329304 0.47515025 1.23776778 -0.15067507 0.26037126 0.02607104\n", " 0.61430612]\n", " [-0.4797323 0.97615665 0.95005888 -0.51903623 -0.29509116 0.59242618\n", " 1.10279904]\n", " [-0.37593946 0.83720164 0.99806064 -0.42105221 -0.1576005 0.44610297\n", " 0.96413601]\n", " [ 0.67254752 -0.68837628 1.05514875 0.59290466 1.10703169 -1.00139291\n", " -0.60521603]\n", " [-0.56690331 0.63195467 -0.7079191 -0.51001528 -0.88042456 0.83110331\n", " 0.58167835]\n", " [ 0.65941592 -0.96038474 0.03268862 0.63788069 0.79442036 -0.91013103\n", " -0.98826639]\n", " [ 0.32613592 -0.67760237 -0.69495382 0.35562611 0.186357 -0.39923578\n", " -0.76905741]]\n", "Error for movies that users actually rated: 5.73\n" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "# Re-initialize\n", "X = 1 - 2*np.random.rand(m,f)\n", "Y = 1 - 2*np.random.rand(f,n)\n", "X *= 0.1\n", "Y *= 0.1" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "# Alternating Weighted Ridge Regression\n", "for _ in xrange(100):\n", " # Each user u has a different set of weights Cu\n", " for u,Cu in enumerate(C):\n", " X[u] = np.linalg.solve(\n", " np.dot(Y, np.dot(np.diag(Cu), Y.T)) + reg * np.eye(f),\n", " np.dot(Y, np.dot(np.diag(Cu), P[u].T))\n", " ).T\n", " for i,Ci in enumerate(C.T):\n", " Y[:,i] = np.linalg.solve(\n", " np.dot(X.T, np.dot(np.diag(Ci), X)) + reg * np.eye(f),\n", " np.dot(X.T, np.dot(np.diag(Ci), P[:, i]))\n", " )\n", "print('Alternating Weighted Ridge Regression:')\n", "print(np.dot(X,Y))\n", "print('Error for movies that users actually rated: %.2f'%np.sum((C*(P - np.dot(X,Y)))**2))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Alternating Weighted Ridge Regression:\n", "[[-0.8785934 0.75591055 -0.84723564 -0.90527554 -0.91301451 1.22456781\n", " 0.7703408 ]\n", " [-0.96653212 1.12037936 0.83860294 -1.00626954 -1.00818432 0.72382856\n", " 1.10750498]\n", " [-0.32540594 0.55731684 1.38658549 -0.34526083 -0.34179019 -0.14502803\n", " 0.53505323]\n", " [-0.96804918 1.12194066 0.83870998 -1.00784188 -1.00976418 0.72539036\n", " 1.10906571]\n", " [-0.95829148 1.11193016 0.83821615 -0.99772977 -0.99960303 0.71527644\n", " 1.09905574]\n", " [ 0.92243741 -0.80265369 0.83420677 0.95077543 0.95869447 -1.26620706\n", " -0.81690604]\n", " [-1.02704309 0.8941689 -0.92578349 -1.05861242 -1.06741822 1.40873236\n", " 0.90998839]\n", " [ 0.95137278 -0.98077431 -0.07728943 0.98609905 0.99077203 -0.97584608\n", " -0.98024936]\n", " [ 0.89692922 -1.06910816 -0.9585243 0.93486255 0.93596746 -0.60822952\n", " -1.05423328]]\n", "Error for movies that users actually rated: 2.01\n" ] } ], "prompt_number": 6 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Exercise 2" ] }, { "cell_type": "code", "collapsed": false, "input": [ "not_C = abs(1 - C) # movies not rated\n", "P_hat = np.dot(X, Y)\n", "top_movie_id = np.argmax(P_hat - (4*C), axis=1)\n", "for u, tm_id in zip(range(m), top_movie_id):\n", " print('User %d liked %s'%(u+1, ', '.join([movie_titles[i] for i,p in enumerate(P[u]) if p == 1])))\n", " print('User %d disliked %s'%(u+1, ', '.join([movie_titles[i] for i,p in enumerate(P[u]) if p == -1])))\n", " print('For user %d the top movie is movie n.%d (%s) - predicted vote %.2f\\n'% \\\n", " (u+1, tm_id+1, movie_titles[tm_id], P_hat[u,tm_id]))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "User 1 liked The Dark Knight, The Lord of the Rings\n", "User 1 disliked Bourne Identity, The Devil Wears Prada\n", "For user 1 the top movie is movie n.2 (Matrix) - predicted vote 0.76\n", "\n", "User 2 liked Matrix, Bourne Identity, The Dark Knight, The Lord of the Rings\n", "User 2 disliked Legally Blond, You\u2019ve Got Mail\n", "For user 2 the top movie is movie n.5 (The Devil Wears Prada) - predicted vote -1.01\n", "\n", "User 3 liked Matrix, Bourne Identity, The Lord of the Rings\n", "User 3 disliked The Dark Knight\n", "For user 3 the top movie is movie n.1 (Legally Blond) - predicted vote -0.33\n", "\n", "User 4 liked Matrix, Bourne Identity, The Dark Knight, The Lord of the Rings\n", "User 4 disliked Legally Blond\n", "For user 4 the top movie is movie n.4 (You\u2019ve Got Mail) - predicted vote -1.01\n", "\n", "User 5 liked Matrix, Bourne Identity, The Dark Knight, The Lord of the Rings\n", "User 5 disliked \n", "For user 5 the top movie is movie n.1 (Legally Blond) - predicted vote -0.96\n", "\n", "User 6 liked Legally Blond, Bourne Identity, You\u2019ve Got Mail, The Devil Wears Prada\n", "User 6 disliked Matrix, The Dark Knight\n", "For user 6 the top movie is movie n.7 (The Lord of the Rings) - predicted vote -0.82\n", "\n", "User 7 liked Matrix, The Lord of the Rings\n", "User 7 disliked Legally Blond, Bourne Identity, The Devil Wears Prada\n", "For user 7 the top movie is movie n.6 (The Dark Knight) - predicted vote 1.41\n", "\n", "User 8 liked You\u2019ve Got Mail, The Devil Wears Prada\n", "User 8 disliked Matrix, The Dark Knight, The Lord of the Rings\n", "For user 8 the top movie is movie n.1 (Legally Blond) - predicted vote 0.95\n", "\n", "User 9 liked You\u2019ve Got Mail, The Devil Wears Prada\n", "User 9 disliked Bourne Identity, The Lord of the Rings\n", "For user 9 the top movie is movie n.1 (Legally Blond) - predicted vote 0.90\n", "\n" ] } ], "prompt_number": 7 } ], "metadata": {} } ] }