{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Building a fashion recommender (III): Content based recommender"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"__The explanation of this implementation can be found at: http://www.rosariomgomez.me/__
\n",
"\n",
"__Index__
\n",
"1. [Build the training and testing sets](#1.-Build-the-training-and-testing-sets)
\n",
"2. [Estimation functions](#2.-Estimation-functions)
\n",
"3. [Content based recommendations](#3.-Content-based-recommendation-engine)
"
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"1. Build the training and testing sets"
]
},
{
"cell_type": "heading",
"level": 4,
"metadata": {},
"source": [
"1.1. Build the dataframes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"import pandas as pd\n",
"from create_features import create_pin_features, create_user_features"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def get_db():\n",
" from pymongo import MongoClient\n",
" client = MongoClient('server', port) #server, port\n",
" db = client.database_name #database name\n",
" db.authenticate(\"user\", \"pwd\")\n",
" return db"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"db = get_db()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#retrieve all the ratings from the DB\n",
"rated_outfits = db.ratings.find()\n",
"list_ratings = [rate for rate in rated_outfits]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#retrieve all the users from the DB and build the feature vectors\n",
"all_users = db.user.find()\n",
"list_users = [create_user_features(user) for user in all_users]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#retrieve all items from the DB and build the feature vectors\n",
"all_pins = db.fullpin.find()\n",
"list_pins = [create_pin_features(pin) for pin in all_pins]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#build the pandas items dataframe\n",
"items = pd.DataFrame(list_pins)\n",
"items = items.rename(columns = {'_id':'pin_id'}) #to be in line with the ratings names\n",
"items.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"
\n", " | pin_id | \n", "blog_name | \n", "blogger_age | \n", "blogger_body_shape_apple | \n", "blogger_body_shape_hourglass | \n", "blogger_body_shape_inverted_triangle | \n", "blogger_body_shape_pear | \n", "blogger_body_shape_rectangle | \n", "blogger_dress_size | \n", "blogger_style | \n", "blogger_style_bohemian chic | \n", "blogger_style_casual chic | \n", "blogger_style_classic | \n", "blogger_style_edgy | \n", "blogger_style_preppy | \n", "blogger_style_romantic | \n", "brands_ASOS | \n", "brands_Abercrombie & Fitch | \n", "brands_Accessorize | \n", "brands_Alexander McQueen | \n", "\n", " |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "537934c861e01f10f1118dea | \n", "Hallie Daily | \n", "40 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "6 | \n", "[classic, romantic] | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "
1 | \n", "537934d261e01f10f1118e1b | \n", "Hallie Daily | \n", "40 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "6 | \n", "[classic] | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "
2 | \n", "537934cc61e01f10f1118e00 | \n", "Hallie Daily | \n", "40 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "6 | \n", "[classic] | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "
3 | \n", "537934c661e01f10f1118ddf | \n", "Hallie Daily | \n", "40 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "6 | \n", "[classic, romantic] | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "
4 | \n", "537934c661e01f10f1118ddd | \n", "Hallie Daily | \n", "40 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "6 | \n", "[classic] | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "
5 rows \u00d7 319 columns
\n", "\n", " | user_id | \n", "pin_id | \n", "rating | \n", "
---|---|---|---|
0 | \n", "538677f561e01f0be9e838f7 | \n", "537933bb61e01f10f111886f | \n", "0 | \n", "
1 | \n", "538677f561e01f0be9e838f7 | \n", "537933a161e01f10f11187e6 | \n", "0 | \n", "
2 | \n", "538677f561e01f0be9e838f7 | \n", "53793d6d61e01f10f111a725 | \n", "2 | \n", "
3 | \n", "538677f561e01f0be9e838f7 | \n", "537933be61e01f10f111887f | \n", "2 | \n", "
4 | \n", "538677f561e01f0be9e838f7 | \n", "53793d8861e01f10f111a741 | \n", "0 | \n", "
5 rows \u00d7 3 columns
\n", "\n", " | user_id | \n", "age | \n", "country | \n", "day_off | \n", "dress_size | \n", "fashionista | \n", "like_styles_pref | \n", "like_styles_pref_bohemian chic | \n", "like_styles_pref_casual chic | \n", "like_styles_pref_classic | \n", "like_styles_pref_edgy | \n", "like_styles_pref_preppy | \n", "like_styles_pref_romantic | \n", "nolike_styles_pref | \n", "nolike_styles_pref_bohemian chic | \n", "nolike_styles_pref_casual chic | \n", "nolike_styles_pref_classic | \n", "nolike_styles_pref_edgy | \n", "nolike_styles_pref_preppy | \n", "nolike_styles_pref_romantic | \n", "\n", " |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "53962dfa3191490008a690df | \n", "55 | \n", "US | \n", "sport | \n", "10 | \n", "nolike | \n", "[classic, casual chic, preppy] | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "[] | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "
1 | \n", "53968e993d4e0c0007a2546f | \n", "50 | \n", "US | \n", "family | \n", "6 | \n", "nolike | \n", "[] | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "
2 | \n", "53971683b7d85a0008b1bbe2 | \n", "30 | \n", "ES | \n", "family | \n", "8 | \n", "nolike | \n", "[romantic, casual chic] | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "
3 | \n", "539851a17f6ba70007ba8bdb | \n", "30 | \n", "ES | \n", "party | \n", "10 | \n", "nolike | \n", "[romantic, casual chic, preppy] | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "
4 | \n", "539770c4a9a4570008c28a9b | \n", "45 | \n", "ES | \n", "family | \n", "10 | \n", "ok | \n", "[classic, casual chic, preppy] | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "[bohemian chic, edgy] | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "
5 rows \u00d7 27 columns
\n", "\n", " | user_id | \n", "pin_id | \n", "rating | \n", "age | \n", "country | \n", "day_off | \n", "dress_size | \n", "fashionista | \n", "like_styles_pref | \n", "like_styles_pref_bohemian chic | \n", "like_styles_pref_casual chic | \n", "like_styles_pref_classic | \n", "like_styles_pref_edgy | \n", "like_styles_pref_preppy | \n", "like_styles_pref_romantic | \n", "nolike_styles_pref | \n", "nolike_styles_pref_bohemian chic | \n", "nolike_styles_pref_casual chic | \n", "nolike_styles_pref_classic | \n", "nolike_styles_pref_edgy | \n", "\n", " |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "538677f561e01f0be9e838f7 | \n", "537933bb61e01f10f111886f | \n", "0 | \n", "30 | \n", "US | \n", "sport | \n", "8 | \n", "love | \n", "[classic, casual chic, preppy] | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "
1 | \n", "539604e40aa8e20007a976fb | \n", "537933bb61e01f10f111886f | \n", "2 | \n", "30 | \n", "ES | \n", "read | \n", "8 | \n", "ok | \n", "[casual chic] | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "[] | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "
2 | \n", "539616880aa8e20008b99e19 | \n", "537933bb61e01f10f111886f | \n", "2 | \n", "25 | \n", "ES | \n", "family | \n", "6 | \n", "ok | \n", "[casual chic, preppy] | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "
3 | \n", "539628df3191490008a690d5 | \n", "537933bb61e01f10f111886f | \n", "2 | \n", "40 | \n", "US | \n", "sport | \n", "10 | \n", "ok | \n", "[classic] | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "
4 | \n", "5396d3a6989960000db79381 | \n", "537933bb61e01f10f111886f | \n", "1 | \n", "30 | \n", "ES | \n", "party | \n", "10 | \n", "nolike | \n", "[classic, casual chic] | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "[bohemian chic, edgy] | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "
5 rows \u00d7 347 columns
\n", "