{
"cells": [
{
"cell_type": "markdown",
"id": "4c157315",
"metadata": {},
"source": [
"# Improve Product Recommendation using Sentiment Analysis\n",
"\n",
"- Watch [Other Interesting Data Science Topics](https://www.youtube.com/channel/UC4yh4xPxRP0-bLG_ldnLCHA/videos)\n",
"- Subscribe on [YouTube](https://www.youtube.com/channel/UC4yh4xPxRP0-bLG_ldnLCHA?sub_confirmation=1)\n",
"- Created on: 26-MAY-2022\n",
"- Last Updated on: 26-MAY-2022"
]
},
{
"cell_type": "markdown",
"id": "f4d231b9",
"metadata": {},
"source": [
"## Recommendation"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "301c0740",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"################################\n",
"## STEP 01: Import Libraries ##\n",
"################################\n",
"import pandas as pd\n",
"import numpy as np\n",
"import pickle\n",
"from sklearn.model_selection import train_test_split \n",
"from sklearn.metrics.pairwise import pairwise_distances\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"import matplotlib.pyplot as plt\n",
"from IPython.display import display"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4fa09dfc",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" userId | \n",
" rating | \n",
" prod_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 28266 | \n",
" daitaliana23 | \n",
" 5 | \n",
" Storkcraft Tuscany Glider and Ottoman, Beige C... | \n",
"
\n",
" \n",
" 15603 | \n",
" beverly | \n",
" 5 | \n",
" Lysol Concentrate Deodorizing Cleaner, Origina... | \n",
"
\n",
" \n",
" 7839 | \n",
" amy77 | \n",
" 5 | \n",
" Clorox Disinfecting Wipes Value Pack Scented 1... | \n",
"
\n",
" \n",
" 4850 | \n",
" dmann10101 | \n",
" 5 | \n",
" The Resident Evil Collection 5 Discs (blu-Ray) | \n",
"
\n",
" \n",
" 4699 | \n",
" morenito021582 | \n",
" 5 | \n",
" The Resident Evil Collection 5 Discs (blu-Ray) | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" userId rating \\\n",
"28266 daitaliana23 5 \n",
"15603 beverly 5 \n",
"7839 amy77 5 \n",
"4850 dmann10101 5 \n",
"4699 morenito021582 5 \n",
"\n",
" prod_name \n",
"28266 Storkcraft Tuscany Glider and Ottoman, Beige C... \n",
"15603 Lysol Concentrate Deodorizing Cleaner, Origina... \n",
"7839 Clorox Disinfecting Wipes Value Pack Scented 1... \n",
"4850 The Resident Evil Collection 5 Discs (blu-Ray) \n",
"4699 The Resident Evil Collection 5 Discs (blu-Ray) "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#############################\n",
"## STEP 02: Read Data ####\n",
"#############################\n",
"# Reading ratings file\n",
"ratings = pd.read_csv('https://raw.githubusercontent.com/aakashgoel12/blogs/master/input/product_ratings_final.csv',\\\n",
" encoding='latin-1')\n",
"# ratings.reset_index(drop=True, inplace=True)\n",
"display(ratings.sample(n=5, random_state=42))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "74d8ecb2",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"#################################\n",
"## STEP 03: Data Preparation ####\n",
"#################################\n",
"\n",
"def apply_pivot(df,fillby = None):\n",
" if fillby is not None:\n",
" return df.pivot_table(index='userId', columns='prod_name',values='rating').fillna(fillby)\n",
" return df.pivot_table(index='userId', columns='prod_name',values='rating')\n",
"\n",
"\n",
"#3.1 Dividing the dataset into train and test\n",
"train, test = train_test_split(ratings, test_size=0.30, random_state=42)\n",
"test = test[test.userId.isin(train.userId)]\n",
"#3.2 Apply pivot operation and fillna used to replace NaN values with 0 i.e. where user didn't made any rating\n",
"df_train_pivot = apply_pivot(df = train, fillby = 0)\n",
"df_test_pivot = apply_pivot(df = test, fillby = 0)\n",
"#3.3 dummy dataset (train and test)\n",
"## Train\n",
"dummy_train = train.copy()\n",
"dummy_train['rating'] = dummy_train['rating'].apply(lambda x: 0 if x>=1 else 1)\n",
"dummy_train = apply_pivot(df = dummy_train, fillby = 1)\n",
"## Test\n",
"dummy_test = test.copy()\n",
"dummy_test['rating'] = dummy_test['rating'].apply(lambda x: 1 if x>=1 else 0)\n",
"dummy_test = apply_pivot(df = dummy_test, fillby = 0)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6c193726",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" prod_name | \n",
" 0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest | \n",
" 100:Complete First Season (blu-Ray) | \n",
" 2017-2018 Brownline174 Duraflex 14-Month Planner 8 1/2 X 11 Black | \n",
" 2x Ultra Era with Oxi Booster, 50fl oz | \n",
" 42 Dual Drop Leaf Table with 2 Madrid Chairs\" | \n",
" 4C Grated Parmesan Cheese 100% Natural 8oz Shaker | \n",
" Africa's Best No-Lye Dual Conditioning Relaxer System Super | \n",
" Alberto VO5 Salon Series Smooth Plus Sleek Shampoo | \n",
" All,bran Complete Wheat Flakes, 18 Oz. | \n",
" Ambi Complexion Cleansing Bar | \n",
" ... | \n",
" Vicks Vaporub, Regular, 3.53oz | \n",
" Voortman Sugar Free Fudge Chocolate Chip Cookies | \n",
" Wagan Smartac 80watt Inverter With Usb | \n",
" Wallmount Server Cabinet (450mm, 9 RU) | \n",
" Way Basics 3-Shelf Eco Narrow Bookcase Storage Shelf, Espresso - Formaldehyde Free - Lifetime Guarantee | \n",
" Wedding Wishes Wedding Guest Book | \n",
" Weleda Everon Lip Balm | \n",
" Windex Original Glass Cleaner Refill 67.6oz (2 Liter) | \n",
" Yes To Carrots Nourishing Body Wash | \n",
" Yes To Grapefruit Rejuvenating Body Wash | \n",
"
\n",
" \n",
" userId | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" brewno | \n",
" 3.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" deelee | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" embum | \n",
" 5.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" erinn | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" rmtarboro | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" smokey bear | \n",
" 3.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" spicesea | \n",
" 5.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
7 rows × 231 columns
\n",
"
"
],
"text/plain": [
"prod_name 0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest \\\n",
"userId \n",
"brewno 3.0 \n",
"deelee 0.0 \n",
"embum 5.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 3.0 \n",
"spicesea 5.0 \n",
"\n",
"prod_name 100:Complete First Season (blu-Ray) \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name 2017-2018 Brownline174 Duraflex 14-Month Planner 8 1/2 X 11 Black \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name 2x Ultra Era with Oxi Booster, 50fl oz \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name 42 Dual Drop Leaf Table with 2 Madrid Chairs\" \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name 4C Grated Parmesan Cheese 100% Natural 8oz Shaker \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 5.0 \n",
"embum 0.0 \n",
"erinn 5.0 \n",
"rmtarboro 5.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name Africa's Best No-Lye Dual Conditioning Relaxer System Super \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name Alberto VO5 Salon Series Smooth Plus Sleek Shampoo \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name All,bran Complete Wheat Flakes, 18 Oz. \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name Ambi Complexion Cleansing Bar ... \\\n",
"userId ... \n",
"brewno 0.0 ... \n",
"deelee 0.0 ... \n",
"embum 0.0 ... \n",
"erinn 0.0 ... \n",
"rmtarboro 0.0 ... \n",
"smokey bear 0.0 ... \n",
"spicesea 0.0 ... \n",
"\n",
"prod_name Vicks Vaporub, Regular, 3.53oz \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name Voortman Sugar Free Fudge Chocolate Chip Cookies \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name Wagan Smartac 80watt Inverter With Usb \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name Wallmount Server Cabinet (450mm, 9 RU) \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name Way Basics 3-Shelf Eco Narrow Bookcase Storage Shelf, Espresso - Formaldehyde Free - Lifetime Guarantee \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name Wedding Wishes Wedding Guest Book Weleda Everon Lip Balm \\\n",
"userId \n",
"brewno 0.0 0.0 \n",
"deelee 0.0 0.0 \n",
"embum 0.0 0.0 \n",
"erinn 0.0 0.0 \n",
"rmtarboro 0.0 0.0 \n",
"smokey bear 0.0 0.0 \n",
"spicesea 0.0 0.0 \n",
"\n",
"prod_name Windex Original Glass Cleaner Refill 67.6oz (2 Liter) \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name Yes To Carrots Nourishing Body Wash \\\n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"prod_name Yes To Grapefruit Rejuvenating Body Wash \n",
"userId \n",
"brewno 0.0 \n",
"deelee 0.0 \n",
"embum 0.0 \n",
"erinn 0.0 \n",
"rmtarboro 0.0 \n",
"smokey bear 0.0 \n",
"spicesea 0.0 \n",
"\n",
"[7 rows x 231 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_train_pivot[(df_train_pivot['0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest']!=0) | \\\n",
" (df_train_pivot['4C Grated Parmesan Cheese 100% Natural 8oz Shaker']!=0)]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "73b81faa",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#####################################\n",
"## STEP 04: User-User Similarity ####\n",
"#####################################\n",
"\n",
"# to calculate mean, use only ratings given by user instead of fillna by 0 as it increase denominator in mean\n",
"mean = np.nanmean(apply_pivot(df = train), axis = 1)\n",
"df_train_subtracted = (apply_pivot(df = train).T-mean).T\n",
"# Make rating=0 where user hasn't given any rating\n",
"df_train_subtracted.fillna(0, inplace = True)\n",
"# Creating the User Similarity Matrix using pairwise_distance function. shape of user_correlation is userXuser i.e. 18025X18025\n",
"user_correlation = 1 - pairwise_distances(df_train_subtracted, metric='cosine')\n",
"user_correlation[np.isnan(user_correlation)] = 0\n",
"# user_correlation[user_correlation<0] = 0\n",
"# Convert the user_correlation matrix into dataframe\n",
"user_correlation_df = pd.DataFrame(user_correlation)\n",
"user_correlation_df['userId'] = df_train_subtracted.index\n",
"user_correlation_df.set_index('userId',inplace=True)\n",
"user_correlation_df.columns = df_train_subtracted.index.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "b1748140",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((18025, 18025), (18025, 231))"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_correlation.shape,df_train_pivot.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f033747d",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"###########################################\n",
"## STEP 05: Predict Rating (User-User) ####\n",
"###########################################\n",
"# Rating predicted by the user (for rated & non rated product both) is the weighted sum of correlation with the product rating (as present in the rating dataset). \n",
"user_predicted_ratings = np.dot(user_correlation, df_train_pivot)\n",
"\n",
"# To find only product not rated by the user, ignore the product rated by the user by making it zero. \n",
"user_final_rating = np.multiply(user_predicted_ratings,dummy_train)\n",
"\n",
"# scaler = MinMaxScaler(feature_range=(1, 5))\n",
"# scaler.fit(user_final_rating)\n",
"# user_final_rating = scaler.transform(user_final_rating)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "ebca7115",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Enter your user idjoshua\n"
]
}
],
"source": [
"################################################################\n",
"## STEP 06: Find Top N recommendation for User (User-User) #####\n",
"################################################################\n",
"\n",
"def find_top_recommendations(pred_rating_df, userid, topn):\n",
" recommendation = pred_rating_df.loc[userid].sort_values(ascending=False)[0:topn]\n",
" recommendation = pd.DataFrame(recommendation).reset_index().rename(columns={userid:'predicted_ratings'})\n",
" return recommendation\n",
"\n",
"user_input = str(input(\"Enter your user id\"))\n",
"recommendation_user_user = find_top_recommendations(user_final_rating, user_input, 5)\n",
"recommendation_user_user['userId'] = user_input"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "537a2db6",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Recommended products for user id:joshua as below\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" prod_name | \n",
" predicted_ratings | \n",
" userId | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Clorox Disinfecting Wipes Value Pack Scented 1... | \n",
" 5.226926 | \n",
" joshua | \n",
"
\n",
" \n",
" 1 | \n",
" Lysol Concentrate Deodorizing Cleaner, Origina... | \n",
" 3.750000 | \n",
" joshua | \n",
"
\n",
" \n",
" 2 | \n",
" Head & Shoulders Dandruff Shampoo Ocean Lift 2... | \n",
" 3.535534 | \n",
" joshua | \n",
"
\n",
" \n",
" 3 | \n",
" Bounce Dryer Sheets, Fresh Linen, 160 sheets | \n",
" 3.535534 | \n",
" joshua | \n",
"
\n",
" \n",
" 4 | \n",
" The Resident Evil Collection 5 Discs (blu-Ray) | \n",
" 3.345348 | \n",
" joshua | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" prod_name predicted_ratings \\\n",
"0 Clorox Disinfecting Wipes Value Pack Scented 1... 5.226926 \n",
"1 Lysol Concentrate Deodorizing Cleaner, Origina... 3.750000 \n",
"2 Head & Shoulders Dandruff Shampoo Ocean Lift 2... 3.535534 \n",
"3 Bounce Dryer Sheets, Fresh Linen, 160 sheets 3.535534 \n",
"4 The Resident Evil Collection 5 Discs (blu-Ray) 3.345348 \n",
"\n",
" userId \n",
"0 joshua \n",
"1 joshua \n",
"2 joshua \n",
"3 joshua \n",
"4 joshua "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Earlier rated products by user id:joshua as below\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" userId | \n",
" rating | \n",
" prod_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" joshua | \n",
" 5 | \n",
" Pink Friday: Roman Reloaded Re-Up (w/dvd) | \n",
"
\n",
" \n",
" 17718 | \n",
" joshua | \n",
" 5 | \n",
" Smead174 Recycled Letter Size Manila File Back... | \n",
"
\n",
" \n",
" 22379 | \n",
" joshua | \n",
" 5 | \n",
" Cheetos Crunchy Flamin' Hot Cheese Flavored Sn... | \n",
"
\n",
" \n",
" 1541 | \n",
" joshua | \n",
" 3 | \n",
" Dark Shadows (includes Digital Copy) (ultravio... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" userId rating prod_name\n",
"0 joshua 5 Pink Friday: Roman Reloaded Re-Up (w/dvd)\n",
"17718 joshua 5 Smead174 Recycled Letter Size Manila File Back...\n",
"22379 joshua 5 Cheetos Crunchy Flamin' Hot Cheese Flavored Sn...\n",
"1541 joshua 3 Dark Shadows (includes Digital Copy) (ultravio..."
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"print(\"Recommended products for user id:{} as below\".format(user_input))\n",
"display(recommendation_user_user)\n",
"print(\"Earlier rated products by user id:{} as below\".format(user_input))\n",
"display(train[train['userId']==user_input].sort_values(['rating'],ascending=False))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "991ffd6c",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.506663023687151\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\aakashgoel\\Anaconda3\\envs\\forecast_anaconda\\lib\\site-packages\\sklearn\\preprocessing\\_data.py:464: RuntimeWarning: All-NaN slice encountered\n",
" data_min = np.nanmin(X, axis=0)\n",
"C:\\Users\\aakashgoel\\Anaconda3\\envs\\forecast_anaconda\\lib\\site-packages\\sklearn\\preprocessing\\_data.py:465: RuntimeWarning: All-NaN slice encountered\n",
" data_max = np.nanmax(X, axis=0)\n"
]
}
],
"source": [
"################################################\n",
"## STEP 07: Evaluation (User-User) on test #####\n",
"################################################s\n",
"\n",
"#Filter user correlation only for user which is in test, test is subset/equal of train in terms of userId\n",
"\n",
"user_correlation_test_df = user_correlation_df[user_correlation_df.index.isin(test.userId)]\n",
"user_correlation_test_df = user_correlation_test_df[list(set(test.userId))]\n",
"# user_correlation_test_df[user_correlation_test_df<0]=0\n",
"\n",
"#Get test user predicted rating\n",
"test_user_predicted_ratings = np.dot(user_correlation_test_df, df_test_pivot)\n",
"test_user_predicted_ratings = np.multiply(test_user_predicted_ratings,dummy_test)\n",
"#Get NaN where user never rated as it shouldn't contribute in calculating RMSE\n",
"test_user_predicted_ratings = test_user_predicted_ratings[test_user_predicted_ratings>0]\n",
"scaler = MinMaxScaler(feature_range=(1, 5))\n",
"scaler.fit(test_user_predicted_ratings)\n",
"test_user_predicted_ratings = scaler.transform(test_user_predicted_ratings)\n",
"\n",
"total_non_nan = np.count_nonzero(~np.isnan(test_user_predicted_ratings))\n",
"rmse = (np.sum(np.sum((apply_pivot(df = test) - test_user_predicted_ratings)**2))/total_non_nan)**0.5\n",
"print(rmse)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "7a217ded",
"metadata": {},
"outputs": [],
"source": [
"############################\n",
"## STEP 08: Save Model ####\n",
"############################\n",
"pickle.dump(user_final_rating,open('./model/user_final_rating.pkl','wb'))"
]
},
{
"cell_type": "markdown",
"id": "245a1358",
"metadata": {},
"source": [
"## Sentiment"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "92250611",
"metadata": {},
"outputs": [],
"source": [
"################################\n",
"## STEP 01: Import Libraries ##\n",
"################################\n",
"import pandas as pd\n",
"import numpy as np\n",
"import pickle\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import confusion_matrix, classification_report\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"from imblearn import over_sampling\n",
"from IPython.display import display"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4caafb8c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Review | \n",
" user_sentiment | \n",
"
\n",
" \n",
" \n",
" \n",
" 9329 | \n",
" fresh clean smell everything need quick clean ... | \n",
" 1 | \n",
"
\n",
" \n",
" 4160 | \n",
" great vacuum love lightweight vacuum easy carr... | \n",
" 1 | \n",
"
\n",
" \n",
" 18500 | \n",
" smell great wipe easy use work smell great | \n",
" 1 | \n",
"
\n",
" \n",
" 8840 | \n",
" product count use clorox wipe everything trave... | \n",
" 1 | \n",
"
\n",
" \n",
" 5098 | \n",
" great movie excellent movie add blu ray collec... | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Review user_sentiment\n",
"9329 fresh clean smell everything need quick clean ... 1\n",
"4160 great vacuum love lightweight vacuum easy carr... 1\n",
"18500 smell great wipe easy use work smell great 1\n",
"8840 product count use clorox wipe everything trave... 1\n",
"5098 great movie excellent movie add blu ray collec... 1"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#############################\n",
"## STEP 02: Read Data ####\n",
"#############################\n",
"# Reading product review sentiment file\n",
"df_prod_review = pd.read_csv('https://raw.githubusercontent.com/aakashgoel12/blogs/master/input/product_review_sentiment.csv',\\\n",
" encoding='latin-1')\n",
"display(df_prod_review.sample(n=5, random_state=42))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e82554eb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Checking distribution of +ve and -ve review sentiment: \n",
"1 0.888401\n",
"0 0.111599\n",
"Name: user_sentiment, dtype: float64\n",
"Checking distribution of +ve and -ve review sentiment after oversampling: \n",
"1 0.5\n",
"0 0.5\n",
"Name: user_sentiment, dtype: float64\n"
]
}
],
"source": [
"#################################\n",
"## STEP 03: Data Preparation ####\n",
"#################################\n",
"x=df_prod_review['Review']\n",
"y=df_prod_review['user_sentiment']\n",
"print(\"Checking distribution of +ve and -ve review sentiment: \\n{}\".format(y.value_counts(normalize=True)))\n",
"# Split the dataset into test and train\n",
"X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=50)\n",
"\n",
"#As we saw above that data is imbalanced, balance training data using over sampling\n",
"\n",
"ros = over_sampling.RandomOverSampler(random_state=0)\n",
"X_train, y_train = ros.fit_resample(pd.DataFrame(X_train), pd.Series(y_train))\n",
"print(\"Checking distribution of +ve and -ve review sentiment after oversampling: \\n{}\".format(y_train.value_counts(normalize=True)))\n",
"#convert into list of string\n",
"X_train = X_train['Review'].tolist()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "fa6f1d9d",
"metadata": {},
"outputs": [],
"source": [
"################################################################\n",
"## STEP 04: Feature Engineering (Convert text into numbers) ####\n",
"################################################################\n",
"word_vectorizer = TfidfVectorizer(strip_accents='unicode', token_pattern=r'\\w{1,}',\\\n",
" ngram_range=(1, 3), stop_words='english', sublinear_tf=True, max_df = 0.80, min_df = 0.01)\n",
"\n",
"# Fiting it on Train\n",
"word_vectorizer.fit(X_train)\n",
"# transforming the train and test datasets\n",
"X_train_transformed = word_vectorizer.transform(X_train)\n",
"X_test_transformed = word_vectorizer.transform(X_test.tolist())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7c61bde3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((33468, 263), (8062, 263))"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train_transformed.shape, X_test_transformed.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "50db52e2",
"metadata": {},
"outputs": [],
"source": [
"# print(list(word_vectorizer.get_feature_names()))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "936aaf31",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Evaluation on Train dataset ..\n",
" precision recall f1-score support\n",
"\n",
" 0 0.82 0.83 0.82 16734\n",
" 1 0.83 0.81 0.82 16734\n",
"\n",
" accuracy 0.82 33468\n",
" macro avg 0.82 0.82 0.82 33468\n",
"weighted avg 0.82 0.82 0.82 33468\n",
"\n",
"sensitivity: 0.81\n",
"specificity: 0.83\n",
"Evaluation on Test dataset ..\n",
" precision recall f1-score support\n",
"\n",
" 0 0.35 0.80 0.49 922\n",
" 1 0.97 0.81 0.88 7140\n",
"\n",
" accuracy 0.81 8062\n",
" macro avg 0.66 0.81 0.69 8062\n",
"weighted avg 0.90 0.81 0.84 8062\n",
"\n",
"sensitivity: 0.81\n",
"specificity: 0.8\n"
]
}
],
"source": [
"###############################################\n",
"## STEP 05: ML Model (Logistic Regression) ####\n",
"###############################################\n",
"\n",
"def evaluate_model(y_pred,y_actual):\n",
" print(classification_report(y_true = y_actual, y_pred = y_pred))\n",
" #confusion matrix\n",
" cm = confusion_matrix(y_true = y_actual, y_pred = y_pred)\n",
" TN = cm[0, 0] \n",
" FP = cm[0, 1]\n",
" FN = cm[1, 0]\n",
" TP = cm[1, 1]\n",
" #Calculating the Sensitivity\n",
" sensitivity = round(TP/float(FN + TP),2)\n",
" print(\"sensitivity: {}\".format(sensitivity))\n",
" #Calculating the Specificity\n",
" specificity = round(TN / float(TN + FP),2)\n",
" print(\"specificity: {}\".format(specificity))\n",
"\n",
"#4.1 Model Training\n",
"logit = LogisticRegression()\n",
"logit.fit(X_train_transformed,y_train)\n",
"#4.2 Prediction on Train Data\n",
"y_pred_train= logit.predict(X_train_transformed)\n",
"#4.3 Prediction on Test Data\n",
"y_pred_test = logit.predict(X_test_transformed)\n",
"#4.4 Evaluation on Train\n",
"print(\"Evaluation on Train dataset ..\")\n",
"evaluate_model(y_pred = y_pred_train, y_actual = y_train)\n",
"print(\"Evaluation on Test dataset ..\")\n",
"#4.5 Evaluation on Test\n",
"evaluate_model(y_pred = y_pred_test, y_actual = y_test)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "fef2c721",
"metadata": {},
"outputs": [],
"source": [
"############################\n",
"## STEP 06: Save Model ####\n",
"############################\n",
"pickle.dump(logit,open('./model/logit_model.pkl', 'wb'))\n",
"pickle.dump(word_vectorizer,open('./model/word_vectorizer.pkl','wb'))"
]
},
{
"cell_type": "markdown",
"id": "131bd76b",
"metadata": {},
"source": [
"## Connecting dot -- Use Sentiment in Improving Recommendation"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "74222c66",
"metadata": {},
"outputs": [],
"source": [
"################################\n",
"## STEP 01: Import Libraries ##\n",
"################################\n",
"import pandas as pd\n",
"import numpy as np\n",
"import pickle\n",
"from sklearn.preprocessing import MinMaxScaler"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b9e11dfa",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" prod_name | \n",
" Review | \n",
"
\n",
" \n",
" \n",
" \n",
" 2501 | \n",
" Hawaiian Punch Berry Limeade Blast Juice | \n",
" pretty good stuff much sugar kid like | \n",
"
\n",
" \n",
" 21252 | \n",
" Godzilla 3d Includes Digital Copy Ultraviolet ... | \n",
" enteraining great interesting version classic ... | \n",
"
\n",
" \n",
" 23503 | \n",
" Godzilla 3d Includes Digital Copy Ultraviolet ... | \n",
" best godzilla date like previous godzilla film... | \n",
"
\n",
" \n",
" 26827 | \n",
" Storkcraft Tuscany Glider and Ottoman, Beige C... | \n",
" comfy good put baby sleep calming sister mom n... | \n",
"
\n",
" \n",
" 18210 | \n",
" Clorox Disinfecting Bathroom Cleaner | \n",
" product easy use product easy use open use har... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" prod_name \\\n",
"2501 Hawaiian Punch Berry Limeade Blast Juice \n",
"21252 Godzilla 3d Includes Digital Copy Ultraviolet ... \n",
"23503 Godzilla 3d Includes Digital Copy Ultraviolet ... \n",
"26827 Storkcraft Tuscany Glider and Ottoman, Beige C... \n",
"18210 Clorox Disinfecting Bathroom Cleaner \n",
"\n",
" Review \n",
"2501 pretty good stuff much sugar kid like \n",
"21252 enteraining great interesting version classic ... \n",
"23503 best godzilla date like previous godzilla film... \n",
"26827 comfy good put baby sleep calming sister mom n... \n",
"18210 product easy use product easy use open use har... "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#############################\n",
"## STEP 02: Read Data ####\n",
"#############################\n",
"# Reading product review data\n",
"df_prod_review = pd.read_csv('https://raw.githubusercontent.com/aakashgoel12/blogs/master/input/product_review.csv',\\\n",
" encoding='latin-1')\n",
"display(df_prod_review.sample(n=5, random_state=42))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "238b4adc",
"metadata": {},
"outputs": [],
"source": [
"###########################\n",
"## STEP 03: Load Model ####\n",
"###########################\n",
"\n",
"model = pickle.load(open('./model/logit_model.pkl', 'rb'))\n",
"word_vectorizer = pickle.load(open('./model/word_vectorizer.pkl','rb'))\n",
"user_final_rating = pickle.load(open('./model/user_final_rating.pkl','rb'))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "902c72a3",
"metadata": {},
"outputs": [],
"source": [
"##########################################################################\n",
"## STEP 04: Get positive review Recommendation only for given user id ####\n",
"##########################################################################\n",
"\n",
"def find_top_recommendations(pred_rating_df, userid, topn):\n",
" recommendation = pred_rating_df.loc[userid].sort_values(ascending=False)[0:topn]\n",
" recommendation = pd.DataFrame(recommendation).reset_index().rename(columns={userid:'predicted_ratings'})\n",
" return recommendation\n",
"\n",
"def get_sentiment_product(x):\n",
" ## Get review list for given product\n",
" product_name_review_list = df_prod_review[df_prod_review['prod_name']== x]['Review'].tolist()\n",
" ## Transform review list into DTM (Document/review Term Matrix)\n",
" features= word_vectorizer.transform(product_name_review_list)\n",
" ## Predict sentiment\n",
" return model.predict(features).mean()\n",
"\n",
"def find_top_pos_recommendation(user_final_rating, user_input, df_prod_review, word_vectorizer,\\\n",
" model, no_recommendation):\n",
" ## 10 is manually coded, need to change \n",
" ## Generate top recommenddations using user-user based recommendation system w/o using sentiment analysis \n",
" recommendation_user_user = find_top_recommendations(user_final_rating, user_input, 10)\n",
" recommendation_user_user['userId'] = user_input\n",
" ## filter out recommendations where predicted rating is zero\n",
" recommendation_user_user = recommendation_user_user[recommendation_user_user['predicted_ratings']!=0]\n",
" print(\"Recommended products for user id:{} without using sentiment\".format(user_input))\n",
" display(recommendation_user_user)\n",
" ## Get overall sentiment score for each recommended product\n",
" recommendation_user_user['sentiment_score'] = recommendation_user_user['prod_name'].apply(get_sentiment_product)\n",
" ## Transform scale of sentiment so that it can be manipulated with predicted rating score\n",
" scaler = MinMaxScaler(feature_range=(1, 5))\n",
" scaler.fit(recommendation_user_user[['sentiment_score']])\n",
" recommendation_user_user['sentiment_score'] = scaler.transform(recommendation_user_user[['sentiment_score']])\n",
" ## Get final product ranking score using 1*Predicted rating of recommended product + 2*normalized sentiment score on scale of 1–5 of recommended product \n",
" recommendation_user_user['product_ranking_score'] = 1*recommendation_user_user['predicted_ratings'] + \\\n",
" 2*recommendation_user_user['sentiment_score']\n",
" print(\"Recommended products for user id:{} after using sentiment\".format(user_input))\n",
" ## Sort product ranking score in descending order and show only top `no_recommendation`\n",
" display(recommendation_user_user.sort_values(by = ['product_ranking_score'],ascending = False).head(no_recommendation))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "4cfb8a8d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Enter your user idjoshua\n",
"Recommended products for user id:joshua without using sentiment\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" prod_name | \n",
" predicted_ratings | \n",
" userId | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Clorox Disinfecting Wipes Value Pack Scented 1... | \n",
" 5.226926 | \n",
" joshua | \n",
"
\n",
" \n",
" 1 | \n",
" Lysol Concentrate Deodorizing Cleaner, Origina... | \n",
" 3.750000 | \n",
" joshua | \n",
"
\n",
" \n",
" 2 | \n",
" Head & Shoulders Dandruff Shampoo Ocean Lift 2... | \n",
" 3.535534 | \n",
" joshua | \n",
"
\n",
" \n",
" 3 | \n",
" Bounce Dryer Sheets, Fresh Linen, 160 sheets | \n",
" 3.535534 | \n",
" joshua | \n",
"
\n",
" \n",
" 4 | \n",
" The Resident Evil Collection 5 Discs (blu-Ray) | \n",
" 3.345348 | \n",
" joshua | \n",
"
\n",
" \n",
" 5 | \n",
" Hormel Chili, No Beans | \n",
" 3.286511 | \n",
" joshua | \n",
"
\n",
" \n",
" 6 | \n",
" Chester's Cheese Flavored Puffcorn Snacks | \n",
" 2.204404 | \n",
" joshua | \n",
"
\n",
" \n",
" 7 | \n",
" Mike Dave Need Wedding Dates (dvd + Digital) | \n",
" 0.720898 | \n",
" joshua | \n",
"
\n",
" \n",
" 8 | \n",
" Storkcraft Tuscany Glider and Ottoman, Beige C... | \n",
" 0.708318 | \n",
" joshua | \n",
"
\n",
" \n",
" 9 | \n",
" Ceiling Fan With Light White 14.2 X 29.9 X 9.2... | \n",
" 0.708318 | \n",
" joshua | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" prod_name predicted_ratings \\\n",
"0 Clorox Disinfecting Wipes Value Pack Scented 1... 5.226926 \n",
"1 Lysol Concentrate Deodorizing Cleaner, Origina... 3.750000 \n",
"2 Head & Shoulders Dandruff Shampoo Ocean Lift 2... 3.535534 \n",
"3 Bounce Dryer Sheets, Fresh Linen, 160 sheets 3.535534 \n",
"4 The Resident Evil Collection 5 Discs (blu-Ray) 3.345348 \n",
"5 Hormel Chili, No Beans 3.286511 \n",
"6 Chester's Cheese Flavored Puffcorn Snacks 2.204404 \n",
"7 Mike Dave Need Wedding Dates (dvd + Digital) 0.720898 \n",
"8 Storkcraft Tuscany Glider and Ottoman, Beige C... 0.708318 \n",
"9 Ceiling Fan With Light White 14.2 X 29.9 X 9.2... 0.708318 \n",
"\n",
" userId \n",
"0 joshua \n",
"1 joshua \n",
"2 joshua \n",
"3 joshua \n",
"4 joshua \n",
"5 joshua \n",
"6 joshua \n",
"7 joshua \n",
"8 joshua \n",
"9 joshua "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Recommended products for user id:joshua after using sentiment\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" prod_name | \n",
" predicted_ratings | \n",
" userId | \n",
" sentiment_score | \n",
" product_ranking_score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Clorox Disinfecting Wipes Value Pack Scented 1... | \n",
" 5.226926 | \n",
" joshua | \n",
" 5.000000 | \n",
" 15.226926 | \n",
"
\n",
" \n",
" 3 | \n",
" Bounce Dryer Sheets, Fresh Linen, 160 sheets | \n",
" 3.535534 | \n",
" joshua | \n",
" 4.390329 | \n",
" 12.316191 | \n",
"
\n",
" \n",
" 8 | \n",
" Storkcraft Tuscany Glider and Ottoman, Beige C... | \n",
" 0.708318 | \n",
" joshua | \n",
" 4.978562 | \n",
" 10.665442 | \n",
"
\n",
" \n",
" 5 | \n",
" Hormel Chili, No Beans | \n",
" 3.286511 | \n",
" joshua | \n",
" 3.202279 | \n",
" 9.691070 | \n",
"
\n",
" \n",
" 6 | \n",
" Chester's Cheese Flavored Puffcorn Snacks | \n",
" 2.204404 | \n",
" joshua | \n",
" 3.641906 | \n",
" 9.488215 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" prod_name predicted_ratings \\\n",
"0 Clorox Disinfecting Wipes Value Pack Scented 1... 5.226926 \n",
"3 Bounce Dryer Sheets, Fresh Linen, 160 sheets 3.535534 \n",
"8 Storkcraft Tuscany Glider and Ottoman, Beige C... 0.708318 \n",
"5 Hormel Chili, No Beans 3.286511 \n",
"6 Chester's Cheese Flavored Puffcorn Snacks 2.204404 \n",
"\n",
" userId sentiment_score product_ranking_score \n",
"0 joshua 5.000000 15.226926 \n",
"3 joshua 4.390329 12.316191 \n",
"8 joshua 4.978562 10.665442 \n",
"5 joshua 3.202279 9.691070 \n",
"6 joshua 3.641906 9.488215 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"user_input = str(input(\"Enter your user id\"))\n",
"find_top_pos_recommendation(user_final_rating, user_input, df_prod_review, word_vectorizer,\\\n",
" model, no_recommendation = 5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}