{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Building a fashion recommender (III): Users' analysis"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"__The explanation of this implementation can be found at: http://www.rosariomgomez.me/__
\n",
"\n",
"__Index__
\n",
"1. [Build the pandas dataframes](#1.-Build-the-dataframes)
\n",
"2. [Looking for users' similarities](#2.-Looking-for-users'-similarities)
"
]
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"1. Build the dataframes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"from create_features import create_user_features"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def get_db():\n",
" from pymongo import MongoClient\n",
" client = MongoClient('server', port) #server, port\n",
" db = client.database_name #database name\n",
" db.authenticate(\"user\", \"pwd\")\n",
" return db"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"db = get_db()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#retrieve all users from the DB and build the feature vector\n",
"all_users = db.user.find()\n",
"list_users = [create_user_features(user) for user in all_users]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#build users' pandas dataframe\n",
"users = pd.DataFrame(list_users)\n",
"users = users.rename(columns = {'_id':'user_id'}) #to be in line with the ratings names\n",
"users.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"
\n", " | user_id | \n", "age | \n", "country | \n", "day_off | \n", "dress_size | \n", "fashionista | \n", "like_styles_pref | \n", "like_styles_pref_bohemian chic | \n", "like_styles_pref_casual chic | \n", "like_styles_pref_classic | \n", "like_styles_pref_edgy | \n", "like_styles_pref_preppy | \n", "like_styles_pref_romantic | \n", "nolike_styles_pref | \n", "nolike_styles_pref_bohemian chic | \n", "nolike_styles_pref_casual chic | \n", "nolike_styles_pref_classic | \n", "nolike_styles_pref_edgy | \n", "nolike_styles_pref_preppy | \n", "nolike_styles_pref_romantic | \n", "\n", " |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "53962dfa3191490008a690df | \n", "55 | \n", "US | \n", "sport | \n", "10 | \n", "nolike | \n", "[classic, casual chic, preppy] | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "[] | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "
1 | \n", "53968e993d4e0c0007a2546f | \n", "50 | \n", "US | \n", "family | \n", "6 | \n", "nolike | \n", "[] | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "
2 | \n", "53971683b7d85a0008b1bbe2 | \n", "30 | \n", "ES | \n", "family | \n", "8 | \n", "nolike | \n", "[romantic, casual chic] | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "
3 | \n", "539851a17f6ba70007ba8bdb | \n", "30 | \n", "ES | \n", "party | \n", "10 | \n", "nolike | \n", "[romantic, casual chic, preppy] | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "
4 | \n", "539770c4a9a4570008c28a9b | \n", "45 | \n", "ES | \n", "family | \n", "10 | \n", "ok | \n", "[classic, casual chic, preppy] | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "[bohemian chic, edgy] | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "
5 rows \u00d7 27 columns
\n", "\n", " | user_id | \n", "pin_id | \n", "rating | \n", "
---|---|---|---|
0 | \n", "538677f561e01f0be9e838f7 | \n", "537933bb61e01f10f111886f | \n", "0 | \n", "
1 | \n", "538677f561e01f0be9e838f7 | \n", "537933a161e01f10f11187e6 | \n", "0 | \n", "
2 | \n", "538677f561e01f0be9e838f7 | \n", "53793d6d61e01f10f111a725 | \n", "2 | \n", "
3 | \n", "538677f561e01f0be9e838f7 | \n", "537933be61e01f10f111887f | \n", "2 | \n", "
4 | \n", "538677f561e01f0be9e838f7 | \n", "53793d8861e01f10f111a741 | \n", "0 | \n", "
5 rows \u00d7 3 columns
\n", "\n", " | user_id | \n", "pin_id | \n", "rating | \n", "age | \n", "country | \n", "day_off | \n", "dress_size | \n", "fashionista | \n", "like_styles_pref | \n", "like_styles_pref_bohemian chic | \n", "like_styles_pref_casual chic | \n", "like_styles_pref_classic | \n", "like_styles_pref_edgy | \n", "like_styles_pref_preppy | \n", "like_styles_pref_romantic | \n", "nolike_styles_pref | \n", "nolike_styles_pref_bohemian chic | \n", "nolike_styles_pref_casual chic | \n", "nolike_styles_pref_classic | \n", "nolike_styles_pref_edgy | \n", "\n", " |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "538677f561e01f0be9e838f7 | \n", "537933bb61e01f10f111886f | \n", "0 | \n", "30 | \n", "US | \n", "sport | \n", "8 | \n", "love | \n", "[classic, casual chic, preppy] | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "
1 | \n", "538677f561e01f0be9e838f7 | \n", "537933a161e01f10f11187e6 | \n", "0 | \n", "30 | \n", "US | \n", "sport | \n", "8 | \n", "love | \n", "[classic, casual chic, preppy] | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "
2 | \n", "538677f561e01f0be9e838f7 | \n", "53793d6d61e01f10f111a725 | \n", "2 | \n", "30 | \n", "US | \n", "sport | \n", "8 | \n", "love | \n", "[classic, casual chic, preppy] | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "
3 | \n", "538677f561e01f0be9e838f7 | \n", "537933be61e01f10f111887f | \n", "2 | \n", "30 | \n", "US | \n", "sport | \n", "8 | \n", "love | \n", "[classic, casual chic, preppy] | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "
4 | \n", "538677f561e01f0be9e838f7 | \n", "53793d8861e01f10f111a741 | \n", "0 | \n", "30 | \n", "US | \n", "sport | \n", "8 | \n", "love | \n", "[classic, casual chic, preppy] | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "[edgy] | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "
5 rows \u00d7 29 columns
\n", "\n", " | size | \n", "mean | \n", "std | \n", "
---|---|---|---|
profession | \n", "\n", " | \n", " | \n", " |
art | \n", "104 | \n", "0.884615 | \n", "0.832050 | \n", "
business | \n", "548 | \n", "1.003650 | \n", "0.836433 | \n", "
other | \n", "802 | \n", "1.019950 | \n", "0.862903 | \n", "
science | \n", "3006 | \n", "1.021956 | \n", "0.865170 | \n", "
social | \n", "112 | \n", "0.482143 | \n", "0.571794 | \n", "
5 rows \u00d7 3 columns
\n", "\n", " | size | \n", "mean | \n", "std | \n", "
---|---|---|---|
day_off | \n", "\n", " | \n", " | \n", " |
family | \n", "604 | \n", "0.890728 | \n", "0.826058 | \n", "
party | \n", "1234 | \n", "1.009724 | \n", "0.850841 | \n", "
read | \n", "132 | \n", "1.106061 | \n", "0.896640 | \n", "
shop | \n", "1666 | \n", "1.094838 | \n", "0.862903 | \n", "
sport | \n", "936 | \n", "0.888889 | \n", "0.856043 | \n", "
5 rows \u00d7 3 columns
\n", "\n", " | size | \n", "mean | \n", "std | \n", "
---|---|---|---|
fashionista | \n", "\n", " | \n", " | \n", " |
love | \n", "1646 | \n", "1.002430 | \n", "0.865671 | \n", "
nolike | \n", "414 | \n", "0.850242 | \n", "0.819535 | \n", "
ok | \n", "2512 | \n", "1.028662 | \n", "0.857574 | \n", "
3 rows \u00d7 3 columns
\n", "\n", " | \n", " | size | \n", "mean | \n", "std | \n", "
---|---|---|---|---|
age | \n", "country | \n", "\n", " | \n", " | \n", " |
20 | \n", "ES | \n", "24 | \n", "1.166667 | \n", "0.816497 | \n", "
US | \n", "176 | \n", "0.994318 | \n", "0.851870 | \n", "|
25 | \n", "ES | \n", "506 | \n", "1.229249 | \n", "0.880377 | \n", "
US | \n", "27 | \n", "0.740741 | \n", "0.813000 | \n", "|
30 | \n", "ES | \n", "286 | \n", "1.101399 | \n", "0.842010 | \n", "
SE | \n", "22 | \n", "1.363636 | \n", "0.847711 | \n", "|
US | \n", "476 | \n", "0.848739 | \n", "0.839936 | \n", "|
35 | \n", "ES | \n", "193 | \n", "0.772021 | \n", "0.809993 | \n", "
US | \n", "151 | \n", "0.854305 | \n", "0.778009 | \n", "|
40 | \n", "IE | \n", "288 | \n", "0.913194 | \n", "0.828141 | \n", "
US | \n", "21 | \n", "0.952381 | \n", "0.920662 | \n", "|
45 | \n", "ES | \n", "25 | \n", "0.920000 | \n", "0.812404 | \n", "
50 | \n", "US | \n", "4 | \n", "1.250000 | \n", "0.957427 | \n", "
55 | \n", "ES | \n", "83 | \n", "1.301205 | \n", "0.851612 | \n", "
US | \n", "4 | \n", "0.500000 | \n", "0.577350 | \n", "
15 rows \u00d7 3 columns
\n", "