{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Matrix Factorization and Stochastic Gradient Descent\n", "\n", "\n", "### step one: write a dot product function\n", "\n", "15xp" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "84" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def dot(a, b):\n", " total = 0\n", " # To be done\n", " return total\n", "\n", "a = [2, 4, 6]\n", "b = [3, 6, 9]\n", "dot(a, b)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "should return 84\n", "\n", "### now some additional functions we will need:" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[4.997032834327799, 2.9330884336676823, 4.121609930427961, 0.995788649960746]\n", "[3.960698640501951, 2.3329915753309693, 3.4585191682164638, 0.997277812062709]\n", "[1.0747834402096434, 0.8177841226178048, 5.258229675851485, 4.95802557060687]\n", "[0.9617906007448743, 0.7134234148971222, 4.275407774007592, 3.9701621535435514]\n", "[1.8388283001447419, 1.2242568641489167, 4.906232113552432, 4.044489286963926]\n", "\n", "\n", "\n", "\n", "[4.997032834327799, 3.960698640501951, 1.0747834402096434, 0.9617906007448743, 1.8388283001447419]\n", "[2.9330884336676823, 2.3329915753309693, 0.8177841226178048, 0.7134234148971222, 1.2242568641489167]\n", "[4.121609930427961, 3.4585191682164638, 5.258229675851485, 4.275407774007592, 4.906232113552432]\n", "[0.995788649960746, 0.997277812062709, 4.95802557060687, 3.9701621535435514, 4.044489286963926]\n" ] } ], "source": [ "p = [[ 1.3756739, 1.82247159],\n", " [ 1.16901533, 1.39862014],\n", " [ 2.08942885, -0.65458002],\n", " [ 1.69334605, -0.48281951],\n", " [ 1.89681673, -0.14079464]]\n", "\n", "q = [[1.11071814, 0.72430544,2.60828792, 2.05752625,],\n", " [1.90348475, 1.06266696, 0.29271036, -1.00670788]]\n", "\n", "\n", "q2 = [[ 1.11071814, 1.90348475],\n", " [ 0.72430544, 1.06266696],\n", " [ 2.60828792, 0.29271036],\n", " [ 2.05752625, -1.00670788]]\n", "\n", "def transpose(a):\n", " result = [[a[y][x] for y in range(len(a))] for x in range(len(a[0]))]\n", " \n", " return result\n", "\n", "def matMult(a, b):\n", " #matrix multiplication\n", " b = transpose(b)\n", " result = [[0 for y in range(len(b))] for x in range(len(a))]\n", " for x in range(len(a)):\n", " for y in range(len(b)):\n", " result[x][y] = dot(a[x], b[y])\n", " return result\n", "\n", "def prettyPrintMatrix(a):\n", " for x in a:\n", " print (x)\n", "\n", "# my test code\n", "r = matMult(p, q)\n", "prettyPrintMatrix(r)\n", "print('\\n\\n\\n')\n", "prettyPrintMatrix(transpose(r))\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "####my results\n", " [4.997032834327799, 2.9330884336676823, 4.121609930427961, 0.995788649960746]\n", " [3.960698640501951, 2.3329915753309693, 3.4585191682164638, 0.997277812062709]\n", " [1.0747834402096434, 0.8177841226178048, 5.258229675851485, 4.95802557060687]\n", " [0.9617906007448743, 0.7134234148971222, 4.275407774007592, 3.9701621535435514]\n", " [1.8388283001447419, 1.2242568641489167, 4.906232113552432, 4.044489286963926]\n", "\n", "\n", " [4.997032834327799, 3.960698640501951, 1.0747834402096434, 0.9617906007448743, 1.8388283001447419]\n", " [2.9330884336676823, 2.3329915753309693, 0.8177841226178048, 0.7134234148971222, 1.2242568641489167]\n", " [4.121609930427961, 3.4585191682164638, 5.258229675851485, 4.275407774007592, 4.906232113552432]\n", " [0.995788649960746, 0.997277812062709, 4.95802557060687, 3.9701621535435514, 4.044489286963926]\n", "\n", "\n", "### Gradient Descent\n", "\n", "#### finish the algorithm for 30xp\n", "\n", "Here is the data we will use.\n", "\n", "|Customer | Taylor Swift | Miranda Lambert | Carrie Underwood | Nicki Minaj | Ariana Grande |\n", "|:-----------|:------:|:------:|:---------:|:------:|:--------:|\n", "|Jake|5|-|5|2|2|\n", "|Clara|2|-|-|4|5|\n", "|Kelsey|5|5|5|2|-|\n", "|Angelica|2|3|-|5|5|\n", "|Jordyn|2|1|-|5|-|\n", "\n", "\n", "Now you need to finish the following algorithm:" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Our final estimate of the P matrix\n", "[2.3610176655454493, 0.4439041251268022]\n", "[0.6137938317930417, 1.8591178978338283]\n", "[2.326666932204386, 0.46068244286925697]\n", "[0.8572268226166766, 2.013617527721459]\n", "[0.32274717667725983, 2.239859945627974]\n", "\n", "Our final estimate of the Q matrix\n", "[2.0187048261349063, 0.40145041435638706]\n", "[2.1223208557273714, 0.33442067280710364]\n", "[1.8321637142068383, 1.5391839075388205]\n", "[0.44154179777070157, 2.156216743837729]\n", "[0.39155773901155466, 2.4160709447019313]\n", "\n", "\n", "Our predictions\n", "[4.944403250993033, 5.159287748514613, 5.009020981298982, 1.999641491870372, 1.9969785978395125]\n", "[1.9854122209152159, 1.9243949085525076, 3.9860951371784163, 4.279676772028168, 4.732096460727768]\n", "[4.881794922426067, 5.091995487043348, 4.97190973077985, 2.0206518969659273, 2.0240659083576187]\n", "[2.53885551476764, 2.4927056921250674, 4.669907573850071, 4.720297301313576, 5.200696598954197]\n", "[1.5507239864524245, 1.4340285341998649, 4.038882049422134, 4.972129887229999, 5.538034689605507]\n" ] } ], "source": [ "\n", "def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02):\n", " Q = transpose(Q)\n", " for step in range(steps):\n", " for i in range(len(R)):\n", " for j in range(len(R[i])):\n", " if R[i][j] > 0:\n", " eij = R[i][j] - dot(P[i],transpose(Q)[j])\n", " #\n", " # Now you need to update the P and Q matrices\n", " \n", " \n", " # don't need to alter or add anything after this line\n", " eR = matMult(P,Q)\n", " e = 0\n", " for i in range(len(R)):\n", " for j in range(len(R[i])):\n", " if R[i][j] > 0:\n", " e = e + pow(R[i][j] - dot(P[i],transpose(Q)[j]), 2)\n", " for k in range(K):\n", " e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))\n", " if e < 0.001:\n", " break\n", " return P, transpose(Q)\n", "\n", "\n", "\n", "R = [\n", " [5, 0, 5, 2, 2],\n", " [2, 0, 0, 4, 5],\n", " [5, 5, 5, 2, 0],\n", " [2, 3, 0, 5, 5],\n", " [2, 1, 0, 5, 0]\n", " ]\n", "\n", "\n", "K = 2\n", "\n", "## these are some random arrays I created\n", "\n", "P = [[ 0.64132372, 0.3808661 ],\n", " [ 0.092069, 0.82043744],\n", " [ 0.77184994, 0.08276139],\n", " [ 0.36048553, 0.05087073],\n", " [ 0.37160684, 0.3147877 ]]\n", "\n", "Q = [[ 0.69314147, 0.75344936],\n", " [ 0.75093344, 0.36297899],\n", " [ 0.22309714, 0.58856832],\n", " [ 0.11611411, 0.30807273],\n", " [ 0.04559444, 0.29029353]]\n", "\n", "\n", "nP, nQ = matrix_factorization(R, P, Q, K)\n", "print(\"Our final estimate of the P matrix\")\n", "prettyPrintMatrix (nP)\n", "print(\"\\nOur final estimate of the Q matrix\")\n", "prettyPrintMatrix (nQ)\n", "nR = matMult(nP, transpose(nQ))\n", "print(\"\\n\\nOur predictions\")\n", "prettyPrintMatrix (nR)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Here were my predictions. \n", "How did you do?\n", "\n", "|Customer | Taylor Swift | Miranda Lambert | Carrie Underwood | Nicki Minaj | Ariana Grande |\n", "|:-----------|:------:|:------:|:---------:|:------:|:--------:|\n", "|Jake|4.94 | 5.159|5.009 | 1.999 | 1.997 |\n", "|Clara|1.9854| 1.9243| 3.9860| 4.2796|4.7320|\n", "|Kelsey|4.8817|5.0919| 4.9719| 2.0206|2.0240|\n", "|Angelica|2.5388|2.4927| 4.6699| 4.72029| 5.2006|\n", "|Jordyn|1.5507| 1.4340|4.0388| 4.9721|5.5380|" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }