{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "4c157315",
   "metadata": {},
   "source": [
    "# Improve Product Recommendation using Sentiment Analysis\n",
    "\n",
    "- Watch [Other Interesting Data Science Topics](https://www.youtube.com/channel/UC4yh4xPxRP0-bLG_ldnLCHA/videos)\n",
    "- Subscribe on [YouTube](https://www.youtube.com/channel/UC4yh4xPxRP0-bLG_ldnLCHA?sub_confirmation=1)\n",
    "- Created on: 26-MAY-2022\n",
    "- Last Updated on: 26-MAY-2022"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f4d231b9",
   "metadata": {},
   "source": [
    "## Recommendation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "301c0740",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "################################\n",
    "## STEP 01: Import Libraries  ##\n",
    "################################\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import pickle\n",
    "from sklearn.model_selection import train_test_split \n",
    "from sklearn.metrics.pairwise import pairwise_distances\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "import matplotlib.pyplot as plt\n",
    "from IPython.display import display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4fa09dfc",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>rating</th>\n",
       "      <th>prod_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>28266</th>\n",
       "      <td>daitaliana23</td>\n",
       "      <td>5</td>\n",
       "      <td>Storkcraft Tuscany Glider and Ottoman, Beige C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15603</th>\n",
       "      <td>beverly</td>\n",
       "      <td>5</td>\n",
       "      <td>Lysol Concentrate Deodorizing Cleaner, Origina...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7839</th>\n",
       "      <td>amy77</td>\n",
       "      <td>5</td>\n",
       "      <td>Clorox Disinfecting Wipes Value Pack Scented 1...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4850</th>\n",
       "      <td>dmann10101</td>\n",
       "      <td>5</td>\n",
       "      <td>The Resident Evil Collection 5 Discs (blu-Ray)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4699</th>\n",
       "      <td>morenito021582</td>\n",
       "      <td>5</td>\n",
       "      <td>The Resident Evil Collection 5 Discs (blu-Ray)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               userId  rating  \\\n",
       "28266    daitaliana23       5   \n",
       "15603         beverly       5   \n",
       "7839            amy77       5   \n",
       "4850       dmann10101       5   \n",
       "4699   morenito021582       5   \n",
       "\n",
       "                                               prod_name  \n",
       "28266  Storkcraft Tuscany Glider and Ottoman, Beige C...  \n",
       "15603  Lysol Concentrate Deodorizing Cleaner, Origina...  \n",
       "7839   Clorox Disinfecting Wipes Value Pack Scented 1...  \n",
       "4850      The Resident Evil Collection 5 Discs (blu-Ray)  \n",
       "4699      The Resident Evil Collection 5 Discs (blu-Ray)  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#############################\n",
    "## STEP 02: Read Data    ####\n",
    "#############################\n",
    "# Reading ratings file\n",
    "ratings = pd.read_csv('https://raw.githubusercontent.com/aakashgoel12/blogs/master/input/product_ratings_final.csv',\\\n",
    "                      encoding='latin-1')\n",
    "# ratings.reset_index(drop=True, inplace=True)\n",
    "display(ratings.sample(n=5, random_state=42))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "74d8ecb2",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "#################################\n",
    "## STEP 03: Data Preparation ####\n",
    "#################################\n",
    "\n",
    "def apply_pivot(df,fillby = None):\n",
    "    if fillby is not None:\n",
    "        return df.pivot_table(index='userId', columns='prod_name',values='rating').fillna(fillby)\n",
    "    return df.pivot_table(index='userId', columns='prod_name',values='rating')\n",
    "\n",
    "\n",
    "#3.1 Dividing the dataset into train and test\n",
    "train, test = train_test_split(ratings, test_size=0.30, random_state=42)\n",
    "test = test[test.userId.isin(train.userId)]\n",
    "#3.2 Apply pivot operation and fillna used to replace NaN values with 0 i.e. where user didn't made any rating\n",
    "df_train_pivot = apply_pivot(df = train, fillby = 0)\n",
    "df_test_pivot = apply_pivot(df = test, fillby = 0)\n",
    "#3.3 dummy dataset (train and test)\n",
    "## Train\n",
    "dummy_train = train.copy()\n",
    "dummy_train['rating'] = dummy_train['rating'].apply(lambda x: 0 if x>=1 else 1)\n",
    "dummy_train = apply_pivot(df = dummy_train, fillby = 1)\n",
    "## Test\n",
    "dummy_test = test.copy()\n",
    "dummy_test['rating'] = dummy_test['rating'].apply(lambda x: 1 if x>=1 else 0)\n",
    "dummy_test = apply_pivot(df = dummy_test, fillby = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "6c193726",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>prod_name</th>\n",
       "      <th>0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest</th>\n",
       "      <th>100:Complete First Season (blu-Ray)</th>\n",
       "      <th>2017-2018 Brownline174 Duraflex 14-Month Planner 8 1/2 X 11 Black</th>\n",
       "      <th>2x Ultra Era with Oxi Booster, 50fl oz</th>\n",
       "      <th>42 Dual Drop Leaf Table with 2 Madrid Chairs\"</th>\n",
       "      <th>4C Grated Parmesan Cheese 100% Natural 8oz Shaker</th>\n",
       "      <th>Africa's Best No-Lye Dual Conditioning Relaxer System Super</th>\n",
       "      <th>Alberto VO5 Salon Series Smooth Plus Sleek Shampoo</th>\n",
       "      <th>All,bran Complete Wheat Flakes, 18 Oz.</th>\n",
       "      <th>Ambi Complexion Cleansing Bar</th>\n",
       "      <th>...</th>\n",
       "      <th>Vicks Vaporub, Regular, 3.53oz</th>\n",
       "      <th>Voortman Sugar Free Fudge Chocolate Chip Cookies</th>\n",
       "      <th>Wagan Smartac 80watt Inverter With Usb</th>\n",
       "      <th>Wallmount Server Cabinet (450mm, 9 RU)</th>\n",
       "      <th>Way Basics 3-Shelf Eco Narrow Bookcase Storage Shelf, Espresso - Formaldehyde Free - Lifetime Guarantee</th>\n",
       "      <th>Wedding Wishes Wedding Guest Book</th>\n",
       "      <th>Weleda Everon Lip Balm</th>\n",
       "      <th>Windex Original Glass Cleaner Refill 67.6oz (2 Liter)</th>\n",
       "      <th>Yes To Carrots Nourishing Body Wash</th>\n",
       "      <th>Yes To Grapefruit Rejuvenating Body Wash</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>userId</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>brewno</th>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>deelee</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>embum</th>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>erinn</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>rmtarboro</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>smokey bear</th>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>spicesea</th>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7 rows × 231 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "prod_name    0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest  \\\n",
       "userId                                                                       \n",
       "brewno                                                     3.0               \n",
       "deelee                                                     0.0               \n",
       "embum                                                      5.0               \n",
       "erinn                                                      0.0               \n",
       "rmtarboro                                                  0.0               \n",
       "smokey bear                                                3.0               \n",
       "spicesea                                                   5.0               \n",
       "\n",
       "prod_name    100:Complete First Season (blu-Ray)  \\\n",
       "userId                                             \n",
       "brewno                                       0.0   \n",
       "deelee                                       0.0   \n",
       "embum                                        0.0   \n",
       "erinn                                        0.0   \n",
       "rmtarboro                                    0.0   \n",
       "smokey bear                                  0.0   \n",
       "spicesea                                     0.0   \n",
       "\n",
       "prod_name    2017-2018 Brownline174 Duraflex 14-Month Planner 8 1/2 X 11 Black  \\\n",
       "userId                                                                           \n",
       "brewno                                                     0.0                   \n",
       "deelee                                                     0.0                   \n",
       "embum                                                      0.0                   \n",
       "erinn                                                      0.0                   \n",
       "rmtarboro                                                  0.0                   \n",
       "smokey bear                                                0.0                   \n",
       "spicesea                                                   0.0                   \n",
       "\n",
       "prod_name    2x Ultra Era with Oxi Booster, 50fl oz  \\\n",
       "userId                                                \n",
       "brewno                                          0.0   \n",
       "deelee                                          0.0   \n",
       "embum                                           0.0   \n",
       "erinn                                           0.0   \n",
       "rmtarboro                                       0.0   \n",
       "smokey bear                                     0.0   \n",
       "spicesea                                        0.0   \n",
       "\n",
       "prod_name    42 Dual Drop Leaf Table with 2 Madrid Chairs\"  \\\n",
       "userId                                                       \n",
       "brewno                                                 0.0   \n",
       "deelee                                                 0.0   \n",
       "embum                                                  0.0   \n",
       "erinn                                                  0.0   \n",
       "rmtarboro                                              0.0   \n",
       "smokey bear                                            0.0   \n",
       "spicesea                                               0.0   \n",
       "\n",
       "prod_name    4C Grated Parmesan Cheese 100% Natural 8oz Shaker  \\\n",
       "userId                                                           \n",
       "brewno                                                     0.0   \n",
       "deelee                                                     5.0   \n",
       "embum                                                      0.0   \n",
       "erinn                                                      5.0   \n",
       "rmtarboro                                                  5.0   \n",
       "smokey bear                                                0.0   \n",
       "spicesea                                                   0.0   \n",
       "\n",
       "prod_name    Africa's Best No-Lye Dual Conditioning Relaxer System Super  \\\n",
       "userId                                                                     \n",
       "brewno                                                     0.0             \n",
       "deelee                                                     0.0             \n",
       "embum                                                      0.0             \n",
       "erinn                                                      0.0             \n",
       "rmtarboro                                                  0.0             \n",
       "smokey bear                                                0.0             \n",
       "spicesea                                                   0.0             \n",
       "\n",
       "prod_name    Alberto VO5 Salon Series Smooth Plus Sleek Shampoo  \\\n",
       "userId                                                            \n",
       "brewno                                                     0.0    \n",
       "deelee                                                     0.0    \n",
       "embum                                                      0.0    \n",
       "erinn                                                      0.0    \n",
       "rmtarboro                                                  0.0    \n",
       "smokey bear                                                0.0    \n",
       "spicesea                                                   0.0    \n",
       "\n",
       "prod_name    All,bran Complete Wheat Flakes, 18 Oz.  \\\n",
       "userId                                                \n",
       "brewno                                          0.0   \n",
       "deelee                                          0.0   \n",
       "embum                                           0.0   \n",
       "erinn                                           0.0   \n",
       "rmtarboro                                       0.0   \n",
       "smokey bear                                     0.0   \n",
       "spicesea                                        0.0   \n",
       "\n",
       "prod_name    Ambi Complexion Cleansing Bar  ...  \\\n",
       "userId                                      ...   \n",
       "brewno                                 0.0  ...   \n",
       "deelee                                 0.0  ...   \n",
       "embum                                  0.0  ...   \n",
       "erinn                                  0.0  ...   \n",
       "rmtarboro                              0.0  ...   \n",
       "smokey bear                            0.0  ...   \n",
       "spicesea                               0.0  ...   \n",
       "\n",
       "prod_name    Vicks Vaporub, Regular, 3.53oz  \\\n",
       "userId                                        \n",
       "brewno                                  0.0   \n",
       "deelee                                  0.0   \n",
       "embum                                   0.0   \n",
       "erinn                                   0.0   \n",
       "rmtarboro                               0.0   \n",
       "smokey bear                             0.0   \n",
       "spicesea                                0.0   \n",
       "\n",
       "prod_name    Voortman Sugar Free Fudge Chocolate Chip Cookies  \\\n",
       "userId                                                          \n",
       "brewno                                                    0.0   \n",
       "deelee                                                    0.0   \n",
       "embum                                                     0.0   \n",
       "erinn                                                     0.0   \n",
       "rmtarboro                                                 0.0   \n",
       "smokey bear                                               0.0   \n",
       "spicesea                                                  0.0   \n",
       "\n",
       "prod_name    Wagan Smartac 80watt Inverter With Usb  \\\n",
       "userId                                                \n",
       "brewno                                          0.0   \n",
       "deelee                                          0.0   \n",
       "embum                                           0.0   \n",
       "erinn                                           0.0   \n",
       "rmtarboro                                       0.0   \n",
       "smokey bear                                     0.0   \n",
       "spicesea                                        0.0   \n",
       "\n",
       "prod_name    Wallmount Server Cabinet (450mm, 9 RU)  \\\n",
       "userId                                                \n",
       "brewno                                          0.0   \n",
       "deelee                                          0.0   \n",
       "embum                                           0.0   \n",
       "erinn                                           0.0   \n",
       "rmtarboro                                       0.0   \n",
       "smokey bear                                     0.0   \n",
       "spicesea                                        0.0   \n",
       "\n",
       "prod_name    Way Basics 3-Shelf Eco Narrow Bookcase Storage Shelf, Espresso - Formaldehyde Free - Lifetime Guarantee  \\\n",
       "userId                                                                                                                 \n",
       "brewno                                                     0.0                                                         \n",
       "deelee                                                     0.0                                                         \n",
       "embum                                                      0.0                                                         \n",
       "erinn                                                      0.0                                                         \n",
       "rmtarboro                                                  0.0                                                         \n",
       "smokey bear                                                0.0                                                         \n",
       "spicesea                                                   0.0                                                         \n",
       "\n",
       "prod_name    Wedding Wishes Wedding Guest Book  Weleda Everon Lip Balm  \\\n",
       "userId                                                                   \n",
       "brewno                                     0.0                     0.0   \n",
       "deelee                                     0.0                     0.0   \n",
       "embum                                      0.0                     0.0   \n",
       "erinn                                      0.0                     0.0   \n",
       "rmtarboro                                  0.0                     0.0   \n",
       "smokey bear                                0.0                     0.0   \n",
       "spicesea                                   0.0                     0.0   \n",
       "\n",
       "prod_name    Windex Original Glass Cleaner Refill 67.6oz (2 Liter)  \\\n",
       "userId                                                               \n",
       "brewno                                                     0.0       \n",
       "deelee                                                     0.0       \n",
       "embum                                                      0.0       \n",
       "erinn                                                      0.0       \n",
       "rmtarboro                                                  0.0       \n",
       "smokey bear                                                0.0       \n",
       "spicesea                                                   0.0       \n",
       "\n",
       "prod_name    Yes To Carrots Nourishing Body Wash  \\\n",
       "userId                                             \n",
       "brewno                                       0.0   \n",
       "deelee                                       0.0   \n",
       "embum                                        0.0   \n",
       "erinn                                        0.0   \n",
       "rmtarboro                                    0.0   \n",
       "smokey bear                                  0.0   \n",
       "spicesea                                     0.0   \n",
       "\n",
       "prod_name    Yes To Grapefruit Rejuvenating Body Wash  \n",
       "userId                                                 \n",
       "brewno                                            0.0  \n",
       "deelee                                            0.0  \n",
       "embum                                             0.0  \n",
       "erinn                                             0.0  \n",
       "rmtarboro                                         0.0  \n",
       "smokey bear                                       0.0  \n",
       "spicesea                                          0.0  \n",
       "\n",
       "[7 rows x 231 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train_pivot[(df_train_pivot['0.6 Cu. Ft. Letter A4 Size Waterproof 30 Min. Fire File Chest']!=0) | \\\n",
    "               (df_train_pivot['4C Grated Parmesan Cheese 100% Natural 8oz Shaker']!=0)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "73b81faa",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#####################################\n",
    "## STEP 04: User-User Similarity ####\n",
    "#####################################\n",
    "\n",
    "# to calculate mean, use only ratings given by user instead of fillna by 0 as it increase denominator in mean\n",
    "mean = np.nanmean(apply_pivot(df = train), axis = 1)\n",
    "df_train_subtracted = (apply_pivot(df = train).T-mean).T\n",
    "# Make rating=0 where user hasn't given any rating\n",
    "df_train_subtracted.fillna(0, inplace = True)\n",
    "# Creating the User Similarity Matrix using pairwise_distance function. shape of user_correlation is userXuser i.e. 18025X18025\n",
    "user_correlation = 1 - pairwise_distances(df_train_subtracted, metric='cosine')\n",
    "user_correlation[np.isnan(user_correlation)] = 0\n",
    "# user_correlation[user_correlation<0] = 0\n",
    "# Convert the user_correlation matrix into dataframe\n",
    "user_correlation_df = pd.DataFrame(user_correlation)\n",
    "user_correlation_df['userId'] = df_train_subtracted.index\n",
    "user_correlation_df.set_index('userId',inplace=True)\n",
    "user_correlation_df.columns = df_train_subtracted.index.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b1748140",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((18025, 18025), (18025, 231))"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_correlation.shape,df_train_pivot.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f033747d",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "###########################################\n",
    "## STEP 05: Predict Rating (User-User) ####\n",
    "###########################################\n",
    "# Rating predicted by the user (for rated & non rated product both) is the weighted sum of correlation with the product rating (as present in the rating dataset). \n",
    "user_predicted_ratings = np.dot(user_correlation, df_train_pivot)\n",
    "\n",
    "# To find only product not rated by the user, ignore the product rated by the user by making it zero. \n",
    "user_final_rating = np.multiply(user_predicted_ratings,dummy_train)\n",
    "\n",
    "# scaler = MinMaxScaler(feature_range=(1, 5))\n",
    "# scaler.fit(user_final_rating)\n",
    "# user_final_rating = scaler.transform(user_final_rating)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "ebca7115",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Enter your user idjoshua\n"
     ]
    }
   ],
   "source": [
    "################################################################\n",
    "## STEP 06: Find Top N recommendation for User (User-User) #####\n",
    "################################################################\n",
    "\n",
    "def find_top_recommendations(pred_rating_df, userid, topn):\n",
    "    recommendation = pred_rating_df.loc[userid].sort_values(ascending=False)[0:topn]\n",
    "    recommendation = pd.DataFrame(recommendation).reset_index().rename(columns={userid:'predicted_ratings'})\n",
    "    return recommendation\n",
    "\n",
    "user_input = str(input(\"Enter your user id\"))\n",
    "recommendation_user_user = find_top_recommendations(user_final_rating, user_input, 5)\n",
    "recommendation_user_user['userId'] = user_input"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "537a2db6",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Recommended products for user id:joshua as below\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>prod_name</th>\n",
       "      <th>predicted_ratings</th>\n",
       "      <th>userId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Clorox Disinfecting Wipes Value Pack Scented 1...</td>\n",
       "      <td>5.226926</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Lysol Concentrate Deodorizing Cleaner, Origina...</td>\n",
       "      <td>3.750000</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Head &amp; Shoulders Dandruff Shampoo Ocean Lift 2...</td>\n",
       "      <td>3.535534</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Bounce Dryer Sheets, Fresh Linen, 160 sheets</td>\n",
       "      <td>3.535534</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>The Resident Evil Collection 5 Discs (blu-Ray)</td>\n",
       "      <td>3.345348</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           prod_name  predicted_ratings  \\\n",
       "0  Clorox Disinfecting Wipes Value Pack Scented 1...           5.226926   \n",
       "1  Lysol Concentrate Deodorizing Cleaner, Origina...           3.750000   \n",
       "2  Head & Shoulders Dandruff Shampoo Ocean Lift 2...           3.535534   \n",
       "3       Bounce Dryer Sheets, Fresh Linen, 160 sheets           3.535534   \n",
       "4     The Resident Evil Collection 5 Discs (blu-Ray)           3.345348   \n",
       "\n",
       "   userId  \n",
       "0  joshua  \n",
       "1  joshua  \n",
       "2  joshua  \n",
       "3  joshua  \n",
       "4  joshua  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Earlier rated products by user id:joshua as below\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>rating</th>\n",
       "      <th>prod_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>joshua</td>\n",
       "      <td>5</td>\n",
       "      <td>Pink Friday: Roman Reloaded Re-Up (w/dvd)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17718</th>\n",
       "      <td>joshua</td>\n",
       "      <td>5</td>\n",
       "      <td>Smead174 Recycled Letter Size Manila File Back...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22379</th>\n",
       "      <td>joshua</td>\n",
       "      <td>5</td>\n",
       "      <td>Cheetos Crunchy Flamin' Hot Cheese Flavored Sn...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1541</th>\n",
       "      <td>joshua</td>\n",
       "      <td>3</td>\n",
       "      <td>Dark Shadows (includes Digital Copy) (ultravio...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       userId  rating                                          prod_name\n",
       "0      joshua       5          Pink Friday: Roman Reloaded Re-Up (w/dvd)\n",
       "17718  joshua       5  Smead174 Recycled Letter Size Manila File Back...\n",
       "22379  joshua       5  Cheetos Crunchy Flamin' Hot Cheese Flavored Sn...\n",
       "1541   joshua       3  Dark Shadows (includes Digital Copy) (ultravio..."
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "print(\"Recommended products for user id:{} as below\".format(user_input))\n",
    "display(recommendation_user_user)\n",
    "print(\"Earlier rated products by user id:{} as below\".format(user_input))\n",
    "display(train[train['userId']==user_input].sort_values(['rating'],ascending=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "991ffd6c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.506663023687151\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\aakashgoel\\Anaconda3\\envs\\forecast_anaconda\\lib\\site-packages\\sklearn\\preprocessing\\_data.py:464: RuntimeWarning: All-NaN slice encountered\n",
      "  data_min = np.nanmin(X, axis=0)\n",
      "C:\\Users\\aakashgoel\\Anaconda3\\envs\\forecast_anaconda\\lib\\site-packages\\sklearn\\preprocessing\\_data.py:465: RuntimeWarning: All-NaN slice encountered\n",
      "  data_max = np.nanmax(X, axis=0)\n"
     ]
    }
   ],
   "source": [
    "################################################\n",
    "## STEP 07: Evaluation (User-User) on test #####\n",
    "################################################s\n",
    "\n",
    "#Filter user correlation only for user which is in test, test is subset/equal of train in terms of userId\n",
    "\n",
    "user_correlation_test_df = user_correlation_df[user_correlation_df.index.isin(test.userId)]\n",
    "user_correlation_test_df = user_correlation_test_df[list(set(test.userId))]\n",
    "# user_correlation_test_df[user_correlation_test_df<0]=0\n",
    "\n",
    "#Get test user predicted rating\n",
    "test_user_predicted_ratings = np.dot(user_correlation_test_df, df_test_pivot)\n",
    "test_user_predicted_ratings = np.multiply(test_user_predicted_ratings,dummy_test)\n",
    "#Get NaN where user never rated as it shouldn't contribute in calculating RMSE\n",
    "test_user_predicted_ratings = test_user_predicted_ratings[test_user_predicted_ratings>0]\n",
    "scaler = MinMaxScaler(feature_range=(1, 5))\n",
    "scaler.fit(test_user_predicted_ratings)\n",
    "test_user_predicted_ratings = scaler.transform(test_user_predicted_ratings)\n",
    "\n",
    "total_non_nan = np.count_nonzero(~np.isnan(test_user_predicted_ratings))\n",
    "rmse = (np.sum(np.sum((apply_pivot(df = test) - test_user_predicted_ratings)**2))/total_non_nan)**0.5\n",
    "print(rmse)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "7a217ded",
   "metadata": {},
   "outputs": [],
   "source": [
    "############################\n",
    "## STEP 08: Save Model  ####\n",
    "############################\n",
    "pickle.dump(user_final_rating,open('./model/user_final_rating.pkl','wb'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "245a1358",
   "metadata": {},
   "source": [
    "## Sentiment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "92250611",
   "metadata": {},
   "outputs": [],
   "source": [
    "################################\n",
    "## STEP 01: Import Libraries  ##\n",
    "################################\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import pickle\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import confusion_matrix, classification_report\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from imblearn import over_sampling\n",
    "from IPython.display import display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4caafb8c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Review</th>\n",
       "      <th>user_sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>9329</th>\n",
       "      <td>fresh clean smell everything need quick clean ...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4160</th>\n",
       "      <td>great vacuum love lightweight vacuum easy carr...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18500</th>\n",
       "      <td>smell great wipe easy use work smell great</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8840</th>\n",
       "      <td>product count use clorox wipe everything trave...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5098</th>\n",
       "      <td>great movie excellent movie add blu ray collec...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  Review  user_sentiment\n",
       "9329   fresh clean smell everything need quick clean ...               1\n",
       "4160   great vacuum love lightweight vacuum easy carr...               1\n",
       "18500         smell great wipe easy use work smell great               1\n",
       "8840   product count use clorox wipe everything trave...               1\n",
       "5098   great movie excellent movie add blu ray collec...               1"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#############################\n",
    "## STEP 02: Read Data    ####\n",
    "#############################\n",
    "# Reading product review sentiment file\n",
    "df_prod_review = pd.read_csv('https://raw.githubusercontent.com/aakashgoel12/blogs/master/input/product_review_sentiment.csv',\\\n",
    "                      encoding='latin-1')\n",
    "display(df_prod_review.sample(n=5, random_state=42))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e82554eb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Checking distribution of +ve and -ve review sentiment: \n",
      "1    0.888401\n",
      "0    0.111599\n",
      "Name: user_sentiment, dtype: float64\n",
      "Checking distribution of +ve and -ve review sentiment after oversampling: \n",
      "1    0.5\n",
      "0    0.5\n",
      "Name: user_sentiment, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "#################################\n",
    "## STEP 03: Data Preparation ####\n",
    "#################################\n",
    "x=df_prod_review['Review']\n",
    "y=df_prod_review['user_sentiment']\n",
    "print(\"Checking distribution of +ve and -ve review sentiment: \\n{}\".format(y.value_counts(normalize=True)))\n",
    "# Split the dataset into test and train\n",
    "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=50)\n",
    "\n",
    "#As we saw above that data is imbalanced, balance training data using over sampling\n",
    "\n",
    "ros = over_sampling.RandomOverSampler(random_state=0)\n",
    "X_train, y_train = ros.fit_resample(pd.DataFrame(X_train), pd.Series(y_train))\n",
    "print(\"Checking distribution of +ve and -ve review sentiment after oversampling: \\n{}\".format(y_train.value_counts(normalize=True)))\n",
    "#convert into list of string\n",
    "X_train = X_train['Review'].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "fa6f1d9d",
   "metadata": {},
   "outputs": [],
   "source": [
    "################################################################\n",
    "## STEP 04: Feature Engineering (Convert text into numbers) ####\n",
    "################################################################\n",
    "word_vectorizer = TfidfVectorizer(strip_accents='unicode', token_pattern=r'\\w{1,}',\\\n",
    "                                ngram_range=(1, 3), stop_words='english', sublinear_tf=True, max_df = 0.80, min_df = 0.01)\n",
    "\n",
    "# Fiting it on Train\n",
    "word_vectorizer.fit(X_train)\n",
    "# transforming the train and test datasets\n",
    "X_train_transformed = word_vectorizer.transform(X_train)\n",
    "X_test_transformed = word_vectorizer.transform(X_test.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7c61bde3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((33468, 263), (8062, 263))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train_transformed.shape, X_test_transformed.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "50db52e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# print(list(word_vectorizer.get_feature_names()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "936aaf31",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluation on Train dataset ..\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.82      0.83      0.82     16734\n",
      "           1       0.83      0.81      0.82     16734\n",
      "\n",
      "    accuracy                           0.82     33468\n",
      "   macro avg       0.82      0.82      0.82     33468\n",
      "weighted avg       0.82      0.82      0.82     33468\n",
      "\n",
      "sensitivity: 0.81\n",
      "specificity: 0.83\n",
      "Evaluation on Test dataset ..\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.35      0.80      0.49       922\n",
      "           1       0.97      0.81      0.88      7140\n",
      "\n",
      "    accuracy                           0.81      8062\n",
      "   macro avg       0.66      0.81      0.69      8062\n",
      "weighted avg       0.90      0.81      0.84      8062\n",
      "\n",
      "sensitivity: 0.81\n",
      "specificity: 0.8\n"
     ]
    }
   ],
   "source": [
    "###############################################\n",
    "## STEP 05: ML Model (Logistic Regression) ####\n",
    "###############################################\n",
    "\n",
    "def evaluate_model(y_pred,y_actual):\n",
    "    print(classification_report(y_true = y_actual, y_pred = y_pred))\n",
    "    #confusion matrix\n",
    "    cm = confusion_matrix(y_true = y_actual, y_pred = y_pred)\n",
    "    TN = cm[0, 0] \n",
    "    FP = cm[0, 1]\n",
    "    FN = cm[1, 0]\n",
    "    TP = cm[1, 1]\n",
    "    #Calculating the Sensitivity\n",
    "    sensitivity = round(TP/float(FN + TP),2)\n",
    "    print(\"sensitivity: {}\".format(sensitivity))\n",
    "    #Calculating the Specificity\n",
    "    specificity = round(TN / float(TN + FP),2)\n",
    "    print(\"specificity: {}\".format(specificity))\n",
    "\n",
    "#4.1 Model Training\n",
    "logit = LogisticRegression()\n",
    "logit.fit(X_train_transformed,y_train)\n",
    "#4.2 Prediction on Train Data\n",
    "y_pred_train= logit.predict(X_train_transformed)\n",
    "#4.3 Prediction on Test Data\n",
    "y_pred_test = logit.predict(X_test_transformed)\n",
    "#4.4 Evaluation on Train\n",
    "print(\"Evaluation on Train dataset ..\")\n",
    "evaluate_model(y_pred = y_pred_train, y_actual = y_train)\n",
    "print(\"Evaluation on Test dataset ..\")\n",
    "#4.5 Evaluation on Test\n",
    "evaluate_model(y_pred = y_pred_test, y_actual = y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "fef2c721",
   "metadata": {},
   "outputs": [],
   "source": [
    "############################\n",
    "## STEP 06: Save Model  ####\n",
    "############################\n",
    "pickle.dump(logit,open('./model/logit_model.pkl', 'wb'))\n",
    "pickle.dump(word_vectorizer,open('./model/word_vectorizer.pkl','wb'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "131bd76b",
   "metadata": {},
   "source": [
    "## Connecting dot -- Use Sentiment in Improving Recommendation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "74222c66",
   "metadata": {},
   "outputs": [],
   "source": [
    "################################\n",
    "## STEP 01: Import Libraries  ##\n",
    "################################\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import pickle\n",
    "from sklearn.preprocessing import MinMaxScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b9e11dfa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>prod_name</th>\n",
       "      <th>Review</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2501</th>\n",
       "      <td>Hawaiian Punch Berry Limeade Blast Juice</td>\n",
       "      <td>pretty good stuff much sugar kid like</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21252</th>\n",
       "      <td>Godzilla 3d Includes Digital Copy Ultraviolet ...</td>\n",
       "      <td>enteraining great interesting version classic ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23503</th>\n",
       "      <td>Godzilla 3d Includes Digital Copy Ultraviolet ...</td>\n",
       "      <td>best godzilla date like previous godzilla film...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26827</th>\n",
       "      <td>Storkcraft Tuscany Glider and Ottoman, Beige C...</td>\n",
       "      <td>comfy good put baby sleep calming sister mom n...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18210</th>\n",
       "      <td>Clorox Disinfecting Bathroom Cleaner</td>\n",
       "      <td>product easy use product easy use open use har...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               prod_name  \\\n",
       "2501            Hawaiian Punch Berry Limeade Blast Juice   \n",
       "21252  Godzilla 3d Includes Digital Copy Ultraviolet ...   \n",
       "23503  Godzilla 3d Includes Digital Copy Ultraviolet ...   \n",
       "26827  Storkcraft Tuscany Glider and Ottoman, Beige C...   \n",
       "18210               Clorox Disinfecting Bathroom Cleaner   \n",
       "\n",
       "                                                  Review  \n",
       "2501               pretty good stuff much sugar kid like  \n",
       "21252  enteraining great interesting version classic ...  \n",
       "23503  best godzilla date like previous godzilla film...  \n",
       "26827  comfy good put baby sleep calming sister mom n...  \n",
       "18210  product easy use product easy use open use har...  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#############################\n",
    "## STEP 02: Read Data    ####\n",
    "#############################\n",
    "# Reading product review data\n",
    "df_prod_review = pd.read_csv('https://raw.githubusercontent.com/aakashgoel12/blogs/master/input/product_review.csv',\\\n",
    "                      encoding='latin-1')\n",
    "display(df_prod_review.sample(n=5, random_state=42))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "238b4adc",
   "metadata": {},
   "outputs": [],
   "source": [
    "###########################\n",
    "## STEP 03: Load Model ####\n",
    "###########################\n",
    "\n",
    "model =  pickle.load(open('./model/logit_model.pkl', 'rb'))\n",
    "word_vectorizer = pickle.load(open('./model/word_vectorizer.pkl','rb'))\n",
    "user_final_rating = pickle.load(open('./model/user_final_rating.pkl','rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "902c72a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "##########################################################################\n",
    "## STEP 04: Get positive review Recommendation only for given user id ####\n",
    "##########################################################################\n",
    "\n",
    "def find_top_recommendations(pred_rating_df, userid, topn):\n",
    "    recommendation = pred_rating_df.loc[userid].sort_values(ascending=False)[0:topn]\n",
    "    recommendation = pd.DataFrame(recommendation).reset_index().rename(columns={userid:'predicted_ratings'})\n",
    "    return recommendation\n",
    "\n",
    "def get_sentiment_product(x):\n",
    "    ## Get review list for given product\n",
    "    product_name_review_list = df_prod_review[df_prod_review['prod_name']== x]['Review'].tolist()\n",
    "    ## Transform review list into DTM (Document/review Term Matrix)\n",
    "    features= word_vectorizer.transform(product_name_review_list)\n",
    "    ## Predict sentiment\n",
    "    return model.predict(features).mean()\n",
    "\n",
    "def find_top_pos_recommendation(user_final_rating, user_input, df_prod_review, word_vectorizer,\\\n",
    "                                 model, no_recommendation):\n",
    "    ## 10 is manually coded, need to change \n",
    "    ## Generate top recommenddations using user-user based recommendation system w/o using sentiment analysis  \n",
    "    recommendation_user_user = find_top_recommendations(user_final_rating, user_input, 10)\n",
    "    recommendation_user_user['userId'] = user_input\n",
    "    ## filter out recommendations where predicted rating is zero\n",
    "    recommendation_user_user = recommendation_user_user[recommendation_user_user['predicted_ratings']!=0]\n",
    "    print(\"Recommended products for user id:{} without using sentiment\".format(user_input))\n",
    "    display(recommendation_user_user)\n",
    "    ## Get overall sentiment score for each recommended product\n",
    "    recommendation_user_user['sentiment_score'] = recommendation_user_user['prod_name'].apply(get_sentiment_product)\n",
    "    ## Transform scale of sentiment so that it can be manipulated with predicted rating score\n",
    "    scaler = MinMaxScaler(feature_range=(1, 5))\n",
    "    scaler.fit(recommendation_user_user[['sentiment_score']])\n",
    "    recommendation_user_user['sentiment_score'] = scaler.transform(recommendation_user_user[['sentiment_score']])\n",
    "    ## Get final product ranking score using 1*Predicted rating of recommended product + 2*normalized sentiment score on scale of 1–5 of recommended product \n",
    "    recommendation_user_user['product_ranking_score'] =  1*recommendation_user_user['predicted_ratings'] + \\\n",
    "                                                        2*recommendation_user_user['sentiment_score']\n",
    "    print(\"Recommended products for user id:{} after using sentiment\".format(user_input))\n",
    "    ## Sort product ranking score in descending order and show only top `no_recommendation`\n",
    "    display(recommendation_user_user.sort_values(by = ['product_ranking_score'],ascending = False).head(no_recommendation))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "4cfb8a8d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Enter your user idjoshua\n",
      "Recommended products for user id:joshua without using sentiment\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>prod_name</th>\n",
       "      <th>predicted_ratings</th>\n",
       "      <th>userId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Clorox Disinfecting Wipes Value Pack Scented 1...</td>\n",
       "      <td>5.226926</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Lysol Concentrate Deodorizing Cleaner, Origina...</td>\n",
       "      <td>3.750000</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Head &amp; Shoulders Dandruff Shampoo Ocean Lift 2...</td>\n",
       "      <td>3.535534</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Bounce Dryer Sheets, Fresh Linen, 160 sheets</td>\n",
       "      <td>3.535534</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>The Resident Evil Collection 5 Discs (blu-Ray)</td>\n",
       "      <td>3.345348</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Hormel Chili, No Beans</td>\n",
       "      <td>3.286511</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Chester's Cheese Flavored Puffcorn Snacks</td>\n",
       "      <td>2.204404</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Mike Dave Need Wedding Dates (dvd + Digital)</td>\n",
       "      <td>0.720898</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Storkcraft Tuscany Glider and Ottoman, Beige C...</td>\n",
       "      <td>0.708318</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Ceiling Fan With Light White 14.2 X 29.9 X 9.2...</td>\n",
       "      <td>0.708318</td>\n",
       "      <td>joshua</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           prod_name  predicted_ratings  \\\n",
       "0  Clorox Disinfecting Wipes Value Pack Scented 1...           5.226926   \n",
       "1  Lysol Concentrate Deodorizing Cleaner, Origina...           3.750000   \n",
       "2  Head & Shoulders Dandruff Shampoo Ocean Lift 2...           3.535534   \n",
       "3       Bounce Dryer Sheets, Fresh Linen, 160 sheets           3.535534   \n",
       "4     The Resident Evil Collection 5 Discs (blu-Ray)           3.345348   \n",
       "5                             Hormel Chili, No Beans           3.286511   \n",
       "6          Chester's Cheese Flavored Puffcorn Snacks           2.204404   \n",
       "7       Mike Dave Need Wedding Dates (dvd + Digital)           0.720898   \n",
       "8  Storkcraft Tuscany Glider and Ottoman, Beige C...           0.708318   \n",
       "9  Ceiling Fan With Light White 14.2 X 29.9 X 9.2...           0.708318   \n",
       "\n",
       "   userId  \n",
       "0  joshua  \n",
       "1  joshua  \n",
       "2  joshua  \n",
       "3  joshua  \n",
       "4  joshua  \n",
       "5  joshua  \n",
       "6  joshua  \n",
       "7  joshua  \n",
       "8  joshua  \n",
       "9  joshua  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Recommended products for user id:joshua after using sentiment\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>prod_name</th>\n",
       "      <th>predicted_ratings</th>\n",
       "      <th>userId</th>\n",
       "      <th>sentiment_score</th>\n",
       "      <th>product_ranking_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Clorox Disinfecting Wipes Value Pack Scented 1...</td>\n",
       "      <td>5.226926</td>\n",
       "      <td>joshua</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>15.226926</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Bounce Dryer Sheets, Fresh Linen, 160 sheets</td>\n",
       "      <td>3.535534</td>\n",
       "      <td>joshua</td>\n",
       "      <td>4.390329</td>\n",
       "      <td>12.316191</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Storkcraft Tuscany Glider and Ottoman, Beige C...</td>\n",
       "      <td>0.708318</td>\n",
       "      <td>joshua</td>\n",
       "      <td>4.978562</td>\n",
       "      <td>10.665442</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Hormel Chili, No Beans</td>\n",
       "      <td>3.286511</td>\n",
       "      <td>joshua</td>\n",
       "      <td>3.202279</td>\n",
       "      <td>9.691070</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Chester's Cheese Flavored Puffcorn Snacks</td>\n",
       "      <td>2.204404</td>\n",
       "      <td>joshua</td>\n",
       "      <td>3.641906</td>\n",
       "      <td>9.488215</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           prod_name  predicted_ratings  \\\n",
       "0  Clorox Disinfecting Wipes Value Pack Scented 1...           5.226926   \n",
       "3       Bounce Dryer Sheets, Fresh Linen, 160 sheets           3.535534   \n",
       "8  Storkcraft Tuscany Glider and Ottoman, Beige C...           0.708318   \n",
       "5                             Hormel Chili, No Beans           3.286511   \n",
       "6          Chester's Cheese Flavored Puffcorn Snacks           2.204404   \n",
       "\n",
       "   userId  sentiment_score  product_ranking_score  \n",
       "0  joshua         5.000000              15.226926  \n",
       "3  joshua         4.390329              12.316191  \n",
       "8  joshua         4.978562              10.665442  \n",
       "5  joshua         3.202279               9.691070  \n",
       "6  joshua         3.641906               9.488215  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "user_input = str(input(\"Enter your user id\"))\n",
    "find_top_pos_recommendation(user_final_rating, user_input, df_prod_review, word_vectorizer,\\\n",
    "                                 model, no_recommendation = 5)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}