{"cells":[{"cell_type":"markdown","metadata":{},"source":["# Itempop and two-stage recommender on MTS data"]},{"cell_type":"markdown","metadata":{"id":"Ey05k9RtFXlQ"},"source":["## Setup"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"OZHUcZZCmCxf"},"outputs":[],"source":["!pip install --upgrade pip setuptools wheel\n","!git clone https://github.com/benfred/implicit\n","!cd implicit && pip install .\n","!pip install -q catboost\n","!pip install recohut"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"EJ_UIHjq9NnK"},"outputs":[],"source":["import os\n","import numpy as np\n","import pandas as pd\n","import scipy.sparse as sp\n","\n","import random\n","import datetime\n","\n","import pickle\n","from sklearn.model_selection import train_test_split\n","from sklearn.utils import shuffle\n","\n","from implicit import nearest_neighbours as NN\n","from implicit.nearest_neighbours import TFIDFRecommender\n","\n","from catboost import CatBoostClassifier\n","\n","from recohut.datasets.mts import MTSDataset\n","from recohut.utils.common_utils import get_coo_matrix\n","from recohut.transforms.splitting import TimeRangeSplit\n","from recohut.models.itempop import ItemPop as PopularRecommender"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"nmhnVyko6Ynx"},"outputs":[],"source":["ds = MTSDataset(data_dir='/content/data', sample_frac=0.1)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"UJwOSLrd9Tmg"},"outputs":[],"source":["users_df = pd.read_csv(os.path.join(ds.processed_dir, 'users_processed.csv'))\n","items_df = pd.read_csv(os.path.join(ds.processed_dir, 'items_processed.csv'))\n","interactions_df = pd.read_csv(os.path.join(ds.processed_dir, 'interactions_processed.csv'))"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"lQiBlArS-IVP"},"outputs":[],"source":["interactions_df['last_watch_dt'] = pd.to_datetime(interactions_df['last_watch_dt'])\n","interactions_df.sort_values(by='last_watch_dt', inplace=True)"]},{"cell_type":"markdown","metadata":{"id":"NJhm264qm293"},"source":["## Winning Solution\n","\n","This solution includes a two-stage model. I used item-item CF from implicit library to generate candidates with their scores and Catboost classifier to predict final ranks with classification objective. Recommendations for cold users were made with Popular items.\n","\n","Implicit model parameters were chosen on sliding time window cross validation. The best scores were achieved by Cosine recommender model, taking only last 20 interactions for each user. 100 candidates with their scores were generated for each user, filtering all items that user had interactions with.\n","\n","Implicit candidates were calculated for the last 14 days of the interactions. Then catboost model was trained on positive interactions from the candidates list on last 14 days. Random negative sampling was applied.\n","\n","For final submission implicit candidates and catboost predictions were recalculated on the whole dataset.\n","\n","Ref: [Daria](https://github.com/blondered/ods_MTS_RecSys_Challenge_solution)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Oa4FR6zv_lsB"},"outputs":[],"source":["# Creating items and users mapping\n","users_inv_mapping = dict(enumerate(interactions_df['user_id'].unique()))\n","users_mapping = {v: k for k, v in users_inv_mapping.items()}\n","items_inv_mapping = dict(enumerate(interactions_df['item_id'].unique()))\n","items_mapping = {v: k for k, v in items_inv_mapping.items()}"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"FbsUVNeImJrm"},"outputs":[],"source":["# Preparing data\n","last_date_df = interactions_df['last_watch_dt'].max()\n","boosting_split_date = last_date_df - pd.Timedelta(days=14)\n","boosting_data = interactions_df[(interactions_df['last_watch_dt'] >\n"," boosting_split_date)].copy()\n","boost_idx = boosting_data['user_id'].unique() \n","before_boosting = interactions_df[(interactions_df['last_watch_dt'] <=\n"," boosting_split_date)].copy()\n","before_boosting_known_items = before_boosting.groupby(\n"," 'user_id')['item_id'].apply(list).to_dict()\n","\n","before_boosting_known_items_mapped = {}\n","for user, recommend in before_boosting_known_items.items():\n"," before_boosting_known_items_mapped[user] = list(map(lambda x:\n"," items_mapping[x],\n"," recommend))\n","before_boosting['order_from_recent'] = before_boosting.sort_values(\n"," by=['last_watch_dt'], ascending=False).groupby('user_id').cumcount() + 1\n","boost_warm_idx = np.intersect1d(before_boosting['user_id'].unique(),\n"," boosting_data['user_id'].unique())"]},{"cell_type":"markdown","metadata":{"id":"70FwEuTwIYD4"},"source":[" Calculates top candidates from implicit model with their scores. Implicit parameters were chosen on time range split cross-validation. History offset stands for taking only lask X items from user history. Day offset stands for taking items from last X days of user history."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"9X0MRF9TBLvs"},"outputs":[],"source":["k_neighbours = 200\n","day_offset = 170\n","history_offset = 20\n","distance = 'Cosine'\n","num_candidates = 100"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"-S2vvbAWBTAZ"},"outputs":[],"source":["before_boosting['order_from_recent'] = before_boosting.sort_values(\n"," by=['last_watch_dt'], ascending=False).groupby('user_id').cumcount() + 1\n","train = before_boosting.copy()\n","date_window = train['last_watch_dt'].max() - pd.DateOffset(days=day_offset)\n","train = train[train['last_watch_dt'] >= date_window]"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"b1sORvcLCc2V"},"outputs":[],"source":["if history_offset:\n"," train = train[train['order_from_recent'] < history_offset]\n"," \n","if distance == 'Cosine':\n"," model = NN.CosineRecommender(K=k_neighbours)\n"," weights = None\n","else:\n"," model = NN.TFIDFRecommender(K=k_neighbours)\n"," weights = None"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"XL39gZ51CnWc"},"outputs":[],"source":["train_mat = get_coo_matrix(\n"," train,\n"," users_mapping=users_mapping,\n"," items_mapping=items_mapping,\n"," weight_col=weights\n",").tocsr()"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":49,"referenced_widgets":["1150ae9b77d04ee089151cdf9b3c97fd","6404fcf9360b4c8bafac3d0c62dc7d58","cd203aa954364d05a2d9197f04bac18a","7d5a993575214d4189c013bb12fc9080","552747c596b440929459610765a70c67","6b9c40da36c746169708e1251c893b47","e6fb3757a3db480aa1be1f0a91e19f4d","163ead0dd46c434ea3412e462a0938db","b6183320801a4148b75f6b36b65a9b13","6773a61987794d45bf453ac8a8f78a34","1cfcd7deb21741cc95481b1ece102ced"]},"executionInfo":{"elapsed":29070,"status":"ok","timestamp":1642187764981,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"sjI4cw1ZCpoz","outputId":"6660fc68-d9fa-4477-8a4a-10b9073d4b0f"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"1150ae9b77d04ee089151cdf9b3c97fd","version_major":2,"version_minor":0},"text/plain":[" 0%| | 0/266854 [00:00\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_idimplicit_score
030199262.00.707107
030203105.00.707107
030199886.00.707107
030219904.00.707107
030203206.00.707107
............
222311097544263721.00.577350
222311097544227113.00.577350
222311097544239830.00.577350
222311097544139002.00.577350
222311097544243127.00.577350
\n","

2109153 rows × 3 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n"," \n"," "],"text/plain":[" user_id item_id implicit_score\n","0 30 199262.0 0.707107\n","0 30 203105.0 0.707107\n","0 30 199886.0 0.707107\n","0 30 219904.0 0.707107\n","0 30 203206.0 0.707107\n","... ... ... ...\n","22231 1097544 263721.0 0.577350\n","22231 1097544 227113.0 0.577350\n","22231 1097544 239830.0 0.577350\n","22231 1097544 139002.0 0.577350\n","22231 1097544 243127.0 0.577350\n","\n","[2109153 rows x 3 columns]"]},"execution_count":17,"metadata":{},"output_type":"execute_result"}],"source":["recs = pd.DataFrame({'user_id': boost_warm_idx})\n","recs['item_id_score'] = recs['user_id'].map(mapper)\n","recs['item_id'] = recs['item_id_score'].apply(lambda x: x[0])\n","recs['implicit_score'] = recs['item_id_score'].apply(lambda x: x[1])\n","recs['tmp'] = recs.apply(lambda row: list(zip(row['item_id'], row['implicit_score'])), axis=1) \n","recs = recs.explode('tmp')\n","recs[['item_id','implicit_score']] = pd.DataFrame(recs['tmp'].tolist(), index=recs.index)\n","recs.drop(columns='tmp', inplace=True)\n","recs.drop(['item_id_score'], axis=1, inplace=True)\n","recs"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"roQ4due5DKVl"},"outputs":[],"source":["recs.to_csv(os.path.join(ds.processed_dir, 'impl_scores.csv'), index=False)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Xje9T6CtI7kM"},"outputs":[],"source":["# taking candidates from implicit model and generating positive samples\n","candidates = pd.read_csv(os.path.join(ds.processed_dir, 'impl_scores.csv'))\n","candidates['item_id'] = candidates['item_id'].fillna(0.).astype('int64')\n","candidates['id'] = candidates.index\n","pos = candidates.merge(boosting_data[['user_id', 'item_id']], \n"," on=['user_id', 'item_id'], how='inner')\n","pos['target'] = 1"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":677},"executionInfo":{"elapsed":8,"status":"ok","timestamp":1642188049285,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"XmD3UFnzK4el","outputId":"b11e5bae-1d3b-41f9-af99-6a0c4c532065"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_idimplicit_scoreidtarget
010992555431.0000002112881
112608755181.0000002404481
213180378070.7071072509891
314017950110.7071072649671
422376327801.0000004250321
531607470331.0000006045431
6419536102671.0000008067231
7482854132371.0000009230661
848483475580.5000009271301
948716037841.0000009313331
10522481137870.5163989950991
1161614082540.16903111762381
1262614752161.00000011938791
13779743109710.35355314942761
14860928144310.50000016508901
1592802391130.50000017847501
1694791611731.00000018229621
1710308606570.33333319836021
181043861153841.00000020068211
191093253117691.00000021012801
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id implicit_score id target\n","0 109925 5543 1.000000 211288 1\n","1 126087 5518 1.000000 240448 1\n","2 131803 7807 0.707107 250989 1\n","3 140179 5011 0.707107 264967 1\n","4 223763 2780 1.000000 425032 1\n","5 316074 7033 1.000000 604543 1\n","6 419536 10267 1.000000 806723 1\n","7 482854 13237 1.000000 923066 1\n","8 484834 7558 0.500000 927130 1\n","9 487160 3784 1.000000 931333 1\n","10 522481 13787 0.516398 995099 1\n","11 616140 8254 0.169031 1176238 1\n","12 626147 5216 1.000000 1193879 1\n","13 779743 10971 0.353553 1494276 1\n","14 860928 14431 0.500000 1650890 1\n","15 928023 9113 0.500000 1784750 1\n","16 947916 1173 1.000000 1822962 1\n","17 1030860 657 0.333333 1983602 1\n","18 1043861 15384 1.000000 2006821 1\n","19 1093253 11769 1.000000 2101280 1"]},"execution_count":20,"metadata":{},"output_type":"execute_result"}],"source":["pos"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xnuzDSyeKhEa"},"outputs":[],"source":["# Generating negative samples\n","num_negatives = 3\n","pos_group = pos.groupby('user_id')['item_id'].count()\n","neg = candidates[~candidates['id'].isin(pos['id'])].copy()\n","neg_sampling = pd.DataFrame(neg.groupby('user_id')['id'].apply(\n"," list)).join(pos_group, on='user_id', rsuffix='p', how='right')\n","neg_sampling['num_choices'] = np.clip(neg_sampling['item_id'] * num_negatives, \n"," a_min=0, a_max=25)\n","func = lambda row: np.random.choice(row['id'],\n"," size=row['num_choices'],\n"," replace=False)\n","neg_sampling['sample_idx'] = neg_sampling.apply(func, axis=1)\n","idx_chosen = neg_sampling['sample_idx'].explode().values\n","neg = neg[neg['id'].isin(idx_chosen)]\n","neg['target'] = 0"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"elapsed":8,"status":"ok","timestamp":1642188051315,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"iReJGyJNMpVg","outputId":"adcff675-6618-47f3-a534-67ef145b3ccb"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_idimplicit_scoreidtarget
211232109925129481.0000002112320
211234109925312051.0000002112340
2112871099252511321.0000002112870
240482126087388591.0000002404820
240493126087652571.0000002404930
240494126087410671.0000002404940
2509801318032075870.5773502509800
25098813180361130.7071072509880
2510411318031073811.0000002510410
265003140179304331.0000002650030
265014140179210641.0000002650140
265031140179163731.0000002650310
425049223763771691.0000004250490
425052223763129481.0000004250520
4250742237631092801.0000004250740
60454231607471071.0000006045420
604554316074118291.0000006045540
604556316074739971.0000006045560
806662419536128541.0000008066620
806668419536130761.0000008066680
80674241953692041.0000008067420
923002482854347631.0000009230020
923018482854113611.0000009230180
923061482854129651.0000009230610
927151484834302170.7071079271510
927180484834126520.5000009271800
927201484834650370.7071079272010
93131048716076160.7071079313100
93134348716071071.0000009313430
931370487160213171.0000009313700
9951705224811202100.7071079951700
995173522481332601.0000009951730
9951835224811760891.0000009951830
1176201616140407760.13363111762010
1176203616140354110.13363111762030
1176252616140755520.26726111762520
11938206261471183550.70710711938200
11938366261472459450.70710711938360
119387762614722391.00000011938770
1494296779743733980.35355314942960
149430277974322090.50000014943020
1494338779743880200.50000014943380
1650910860928220210.57735016509100
1650945860928433261.00000016509450
1650951860928387360.57735016509510
1784731928023113570.44721417847310
17847709280232202901.00000017847700
1784781928023307160.32444317847810
182294694791686371.00000018229460
18229549479162264020.70710718229540
1822957947916217720.70710718229570
1983609103086072230.50000019836090
198364510308601108610.70710719836450
19836631030860160070.40824819836630
200681110438611089340.70710720068110
20068721043861382421.00000020068720
20068891043861574891.00000020068890
210121310932531945120.70710721012130
210124010932531501610.70710721012400
210126110932532250080.70710721012610
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id implicit_score id target\n","211232 109925 12948 1.000000 211232 0\n","211234 109925 31205 1.000000 211234 0\n","211287 109925 251132 1.000000 211287 0\n","240482 126087 38859 1.000000 240482 0\n","240493 126087 65257 1.000000 240493 0\n","240494 126087 41067 1.000000 240494 0\n","250980 131803 207587 0.577350 250980 0\n","250988 131803 6113 0.707107 250988 0\n","251041 131803 107381 1.000000 251041 0\n","265003 140179 30433 1.000000 265003 0\n","265014 140179 21064 1.000000 265014 0\n","265031 140179 16373 1.000000 265031 0\n","425049 223763 77169 1.000000 425049 0\n","425052 223763 12948 1.000000 425052 0\n","425074 223763 109280 1.000000 425074 0\n","604542 316074 7107 1.000000 604542 0\n","604554 316074 11829 1.000000 604554 0\n","604556 316074 73997 1.000000 604556 0\n","806662 419536 12854 1.000000 806662 0\n","806668 419536 13076 1.000000 806668 0\n","806742 419536 9204 1.000000 806742 0\n","923002 482854 34763 1.000000 923002 0\n","923018 482854 11361 1.000000 923018 0\n","923061 482854 12965 1.000000 923061 0\n","927151 484834 30217 0.707107 927151 0\n","927180 484834 12652 0.500000 927180 0\n","927201 484834 65037 0.707107 927201 0\n","931310 487160 7616 0.707107 931310 0\n","931343 487160 7107 1.000000 931343 0\n","931370 487160 21317 1.000000 931370 0\n","995170 522481 120210 0.707107 995170 0\n","995173 522481 33260 1.000000 995173 0\n","995183 522481 176089 1.000000 995183 0\n","1176201 616140 40776 0.133631 1176201 0\n","1176203 616140 35411 0.133631 1176203 0\n","1176252 616140 75552 0.267261 1176252 0\n","1193820 626147 118355 0.707107 1193820 0\n","1193836 626147 245945 0.707107 1193836 0\n","1193877 626147 2239 1.000000 1193877 0\n","1494296 779743 73398 0.353553 1494296 0\n","1494302 779743 2209 0.500000 1494302 0\n","1494338 779743 88020 0.500000 1494338 0\n","1650910 860928 22021 0.577350 1650910 0\n","1650945 860928 43326 1.000000 1650945 0\n","1650951 860928 38736 0.577350 1650951 0\n","1784731 928023 11357 0.447214 1784731 0\n","1784770 928023 220290 1.000000 1784770 0\n","1784781 928023 30716 0.324443 1784781 0\n","1822946 947916 8637 1.000000 1822946 0\n","1822954 947916 226402 0.707107 1822954 0\n","1822957 947916 21772 0.707107 1822957 0\n","1983609 1030860 7223 0.500000 1983609 0\n","1983645 1030860 110861 0.707107 1983645 0\n","1983663 1030860 16007 0.408248 1983663 0\n","2006811 1043861 108934 0.707107 2006811 0\n","2006872 1043861 38242 1.000000 2006872 0\n","2006889 1043861 57489 1.000000 2006889 0\n","2101213 1093253 194512 0.707107 2101213 0\n","2101240 1093253 150161 0.707107 2101240 0\n","2101261 1093253 225008 0.707107 2101261 0"]},"execution_count":22,"metadata":{},"output_type":"execute_result"}],"source":["neg"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"B3h45yA7MzMe"},"outputs":[],"source":["# Creating training data sample and early stopping data sample\n","boost_idx_train = np.intersect1d(boost_idx, pos['user_id'].unique())\n","boost_train_users, boost_eval_users = train_test_split(boost_idx_train, \n"," test_size=0.1,\n"," random_state=345)\n","select_col = ['user_id', 'item_id', 'implicit_score', 'target']\n","boost_train = shuffle(\n"," pd.concat([\n"," pos[pos['user_id'].isin(boost_train_users)],\n"," neg[neg['user_id'].isin(boost_train_users)]\n"," ])[select_col]\n",")\n","boost_eval = shuffle(\n"," pd.concat([\n"," pos[pos['user_id'].isin(boost_eval_users)],\n"," neg[neg['user_id'].isin(boost_eval_users)]\n"," ])[select_col]\n",")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ojeLRc9iM-LQ"},"outputs":[],"source":["user_col = ['user_id','age','income','sex','kids_flg','boost_user_watch_cnt_all',\n"," 'boost_user_watch_cnt_last_14']\n","\n","item_col = ['item_id','content_type','countries_max','for_kids','age_rating',\n"," 'studios_max','genres_max','genres_min','genres_med','release_novelty']\n","\n","item_stats_col = ['item_id','watched_in_7_days','watch_ts_std','trend_slope',\n"," 'watch_ts_quantile_95_diff','watch_ts_median_diff',\n"," 'watched_in_all_time','male_watchers_fraction',\n"," 'female_watchers_fraction','younger_35_fraction','older_35_fraction']\n"," \n","cat_col = ['age','income','sex','content_type']"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":364},"executionInfo":{"elapsed":610,"status":"ok","timestamp":1642188056980,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"-gP7TfKGNMba","outputId":"e9484e51-24c6-4da8-e152-b51c1c68bc9e"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_idimplicit_scoretargetageincomesexkids_flgboost_user_watch_cnt_allboost_user_watch_cnt_last_14content_typecountries_maxfor_kidsage_ratingstudios_maxgenres_maxgenres_mingenres_medrelease_novelty
031607470331.0000001age_18_24income_20_40FFalse4.00.0series4340.0False16.014898.03858.02778.03318.05.0
113180361130.7071070age_35_44income_20_40MFalse0.00.0film5065.0False12.014898.03503.01820.01877.01.0
2316074118291.0000000age_18_24income_20_40FFalse4.00.0film5065.0False18.014898.01820.01033.01426.56.0
31318032075870.5773500age_35_44income_20_40MFalse0.00.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
431607471071.0000000age_18_24income_20_40FFalse4.00.0series4340.0False12.014898.05431.0626.01877.06.0
513180378070.7071071age_35_44income_20_40MFalse0.00.0film4340.0False16.014898.03858.03858.03858.05.0
6316074739971.0000000age_18_24income_20_40FFalse4.00.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
71318031073811.0000000age_35_44income_20_40MFalse0.00.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id implicit_score ... genres_min genres_med release_novelty\n","0 316074 7033 1.000000 ... 2778.0 3318.0 5.0\n","1 131803 6113 0.707107 ... 1820.0 1877.0 1.0\n","2 316074 11829 1.000000 ... 1033.0 1426.5 6.0\n","3 131803 207587 0.577350 ... NaN NaN NaN\n","4 316074 7107 1.000000 ... 626.0 1877.0 6.0\n","5 131803 7807 0.707107 ... 3858.0 3858.0 5.0\n","6 316074 73997 1.000000 ... NaN NaN NaN\n","7 131803 107381 1.000000 ... NaN NaN NaN\n","\n","[8 rows x 19 columns]"]},"execution_count":25,"metadata":{},"output_type":"execute_result"}],"source":["train_feat = boost_train.merge(users_df[user_col],\n"," on=['user_id'],\n"," how='left')\\\n"," .merge(items_df[item_col],\n"," on=['item_id'],\n"," how='left')\n"," \n","eval_feat = boost_eval.merge(users_df[user_col],\n"," on=['user_id'],\n"," how='left') \\\n"," .merge(items_df[item_col],\n"," on=['item_id'],\n"," how='left')\n"," \n","eval_feat"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":488},"executionInfo":{"elapsed":623,"status":"ok","timestamp":1642188058306,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"ECh2RhahNSsQ","outputId":"2cafea2d-7165-47ce-a45b-cc5acd6840e6"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
implicit_scoreageincomesexkids_flgboost_user_watch_cnt_allboost_user_watch_cnt_last_14content_typecountries_maxfor_kidsage_ratingstudios_maxgenres_maxgenres_mingenres_medrelease_noveltywatched_in_7_dayswatch_ts_stdtrend_slopewatch_ts_quantile_95_diffwatch_ts_median_diffwatched_in_all_timemale_watchers_fractionfemale_watchers_fractionyounger_35_fractionolder_35_fraction
01.000000age_35_44income_20_40FFalse21film5065False16148982418182021193460.7875850.19578301460.4222220.3555560.3111110.466667
11.000000age_unknownincome_unknownsex_unknownFalse11series4340False121489813391339133940000000000
20.500000age_18_24income_20_40MFalse21film5065False18148985431122424185546.3813-0.0692771574890.4318180.4090910.4204550.420455
31.000000age_25_34income_20_40MTrue11NoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNone
40.707107age_18_24income_20_40MFalse31NoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNone
.................................................................................
670.707107age_35_44income_20_40FFalse21NoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNone
680.447214age_18_24income_20_40MFalse21film295False18148983858313140.540000000000
690.500000age_25_34income_40_60MTrue11film1272False1814898543125435035000686810000
701.000000age_45_54income_40_60MTrue20film5065False161489838582778350340000000000
710.707107age_65_infincome_20_40FFalse55NoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNoneNone
\n","

72 rows × 26 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" implicit_score age ... younger_35_fraction older_35_fraction\n","0 1.000000 age_35_44 ... 0.311111 0.466667\n","1 1.000000 age_unknown ... 0 0\n","2 0.500000 age_18_24 ... 0.420455 0.420455\n","3 1.000000 age_25_34 ... None None\n","4 0.707107 age_18_24 ... None None\n",".. ... ... ... ... ...\n","67 0.707107 age_35_44 ... None None\n","68 0.447214 age_18_24 ... 0 0\n","69 0.500000 age_25_34 ... 0 0\n","70 1.000000 age_45_54 ... 0 0\n","71 0.707107 age_65_inf ... None None\n","\n","[72 rows x 26 columns]"]},"execution_count":26,"metadata":{},"output_type":"execute_result"}],"source":["item_stats = pd.read_csv(os.path.join(ds.processed_dir, 'item_stats.csv'))\n","item_stats = item_stats[item_stats_col]\n","train_feat = train_feat.join(item_stats.set_index('item_id'), \n"," on='item_id', how='left')\n","eval_feat = eval_feat.join(item_stats.set_index('item_id'), \n"," on='item_id', how='left')\n","drop_col = ['user_id', 'item_id']\n","target_col = ['target']\n","\n","X_train = train_feat.drop(drop_col + target_col, axis=1)\n","y_train = train_feat[target_col]\n","X_val = eval_feat.drop(drop_col + target_col, axis=1)\n","y_val = eval_feat[target_col]\n","X_train.fillna('None', inplace=True)\n","X_val.fillna('None', inplace=True)\n","X_train[cat_col] = X_train[cat_col].astype('category')\n","X_val[cat_col] = X_val[cat_col].astype('category')\n","\n","X_train"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3256,"status":"ok","timestamp":1642188064223,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"NPARxAndNoNV","outputId":"7d21dbf9-a091-40c6-990e-84db9fb7f9a1"},"outputs":[{"name":"stdout","output_type":"stream","text":["0:\tlearn: 0.6814278\ttest: 0.6853672\tbest: 0.6853672 (0)\ttotal: 57.5ms\tremaining: 1m 54s\n","200:\tlearn: 0.1793975\ttest: 0.5471784\tbest: 0.5422113 (146)\ttotal: 1.19s\tremaining: 10.7s\n","Stopped by overfitting detector (200 iterations wait)\n","\n","bestTest = 0.5422113159\n","bestIteration = 146\n","\n","Shrink model to first 147 iterations.\n"]},{"data":{"text/plain":[""]},"execution_count":27,"metadata":{},"output_type":"execute_result"}],"source":["# Training CatBoost classifier with parameters previously chosen on cross validation\n","params = {\n"," 'subsample': 0.97, \n"," 'max_depth': 9,\n"," 'n_estimators': 2000,\n"," 'learning_rate': 0.03, \n"," 'scale_pos_weight': num_negatives, \n"," 'l2_leaf_reg': 27, \n"," 'thread_count': -1,\n"," 'verbose': 200,\n"," 'task_type': \"CPU\",\n"," 'devices': '0:1',\n"," # 'bootstrap_type': 'Poisson'\n","}\n","boost_model = CatBoostClassifier(**params)\n","boost_model.fit(X_train,\n"," y_train,\n"," eval_set=(X_val, y_val),\n"," early_stopping_rounds=200,\n"," cat_features=cat_col,\n"," plot=False)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"KH3ZUwyqmS-f"},"outputs":[],"source":["with open(\"catboost_trained.pkl\", 'wb') as f:\n"," pickle.dump(boost_model, f)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1642188107612,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"NRchFAmAPdBh","outputId":"b3675f02-b241-4c38-8d08-095ffc172f50"},"outputs":[{"data":{"text/plain":[""]},"execution_count":29,"metadata":{},"output_type":"execute_result"}],"source":["# with open(\"catboost_trained.pkl\", 'rb') as f:\n","# boost_model = pickle.load(f)\n","boost_model"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"X0yG3zmjPqC1"},"outputs":[],"source":["random_items = list(np.random.choice(interactions_df['user_id'], size=5, replace=False))\n","cold_items = [10000, 20000]\n","random_items.extend(cold_items)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":15,"status":"ok","timestamp":1642188281959,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"JBqnIdlKQFIy","outputId":"6e1a8e42-562f-4ad9-a45f-f196fc4e5f74"},"outputs":[{"data":{"text/plain":["array([ 20000, 133452, 332832, 341075, 622570, 728808])"]},"execution_count":31,"metadata":{},"output_type":"execute_result"}],"source":["warm_idx = np.intersect1d(random_items, interactions_df['user_id'].unique())\n","warm_idx"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xvkws1XHQP1O"},"outputs":[],"source":["_candidates = candidates.copy()\n","_candidates.dropna(subset=['item_id'], axis=0, inplace=True)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":488},"executionInfo":{"elapsed":1745,"status":"ok","timestamp":1642188284831,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"DmhCuWoOQgA4","outputId":"38d1ca58-104c-4aca-c15f-3c5f70bc9510"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_idimplicit_scoreidageincomesexkids_flgboost_user_watch_cnt_allboost_user_watch_cnt_last_14content_typecountries_maxfor_kidsage_ratingstudios_maxgenres_maxgenres_mingenres_medrelease_novelty
0301992620.7071070age_unknownincome_unknownsex_unknownFalse2.01.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
1302031050.7071071age_unknownincome_unknownsex_unknownFalse2.01.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
2301998860.7071072age_unknownincome_unknownsex_unknownFalse2.01.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
3302199040.7071073age_unknownincome_unknownsex_unknownFalse2.01.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
4302032060.7071074age_unknownincome_unknownsex_unknownFalse2.01.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
............................................................
210914810975442637210.5773502109148age_25_34income_20_40FTrue1.01.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
210914910975442271130.5773502109149age_25_34income_20_40FTrue1.01.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
210915010975442398300.5773502109150age_25_34income_20_40FTrue1.01.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
210915110975441390020.5773502109151age_25_34income_20_40FTrue1.01.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
210915210975442431270.5773502109152age_25_34income_20_40FTrue1.01.0NaNNaNNaNNaNNaNNaNNaNNaNNaN
\n","

2109153 rows × 19 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id ... genres_med release_novelty\n","0 30 199262 ... NaN NaN\n","1 30 203105 ... NaN NaN\n","2 30 199886 ... NaN NaN\n","3 30 219904 ... NaN NaN\n","4 30 203206 ... NaN NaN\n","... ... ... ... ... ...\n","2109148 1097544 263721 ... NaN NaN\n","2109149 1097544 227113 ... NaN NaN\n","2109150 1097544 239830 ... NaN NaN\n","2109151 1097544 139002 ... NaN NaN\n","2109152 1097544 243127 ... NaN NaN\n","\n","[2109153 rows x 19 columns]"]},"execution_count":33,"metadata":{},"output_type":"execute_result"}],"source":["submit_feat = _candidates.merge(users_df[user_col],\n"," on=['user_id'],\n"," how='left') \\\n"," .merge(items_df[item_col],\n"," on=['item_id'],\n"," how='left')\n","submit_feat"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"P00tQC_ZQrYm"},"outputs":[],"source":["full_train = submit_feat.fillna('None')\n","full_train[cat_col] = full_train[cat_col].astype('category')\n","# item_stats = pd.read_csv('data/item_stats_for_submit.csv')\n","full_train = full_train.join(item_stats.set_index('item_id'),\n"," on='item_id', how='left')"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":488},"executionInfo":{"elapsed":733,"status":"ok","timestamp":1642188360258,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"mwa-Fp1wDbW-","outputId":"a80d2a9c-dba5-4d17-f339-dea2e3894792"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_idimplicit_scoreidageincomesexkids_flgboost_user_watch_cnt_allboost_user_watch_cnt_last_14content_typecountries_maxfor_kidsage_ratingstudios_maxgenres_maxgenres_mingenres_medrelease_noveltywatched_in_7_dayswatch_ts_stdtrend_slopewatch_ts_quantile_95_diffwatch_ts_median_diffwatched_in_all_timemale_watchers_fractionfemale_watchers_fractionyounger_35_fractionolder_35_fraction
0301992620.7071070age_unknownincome_unknownsex_unknownFalse21NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1302031050.7071071age_unknownincome_unknownsex_unknownFalse21NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2301998860.7071072age_unknownincome_unknownsex_unknownFalse21NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3302199040.7071073age_unknownincome_unknownsex_unknownFalse21NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4302032060.7071074age_unknownincome_unknownsex_unknownFalse21NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..........................................................................................
210914810975442637210.577352109148age_25_34income_20_40FTrue11NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
210914910975442271130.577352109149age_25_34income_20_40FTrue11NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
210915010975442398300.577352109150age_25_34income_20_40FTrue11NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
210915110975441390020.577352109151age_25_34income_20_40FTrue11NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
210915210975442431270.577352109152age_25_34income_20_40FTrue11NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n","

2109153 rows × 29 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id ... younger_35_fraction older_35_fraction\n","0 30 199262 ... NaN NaN\n","1 30 203105 ... NaN NaN\n","2 30 199886 ... NaN NaN\n","3 30 219904 ... NaN NaN\n","4 30 203206 ... NaN NaN\n","... ... ... ... ... ...\n","2109148 1097544 263721 ... NaN NaN\n","2109149 1097544 227113 ... NaN NaN\n","2109150 1097544 239830 ... NaN NaN\n","2109151 1097544 139002 ... NaN NaN\n","2109152 1097544 243127 ... NaN NaN\n","\n","[2109153 rows x 29 columns]"]},"execution_count":38,"metadata":{},"output_type":"execute_result"}],"source":["full_train"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":892,"status":"ok","timestamp":1642188385461,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"5vqmW3AADiFL","outputId":"72dc7b75-7c6a-4d58-f554-01f8a3a76b78"},"outputs":[{"data":{"text/plain":["['user_id',\n"," 'item_id',\n"," 'implicit_score',\n"," 'age',\n"," 'income',\n"," 'sex',\n"," 'kids_flg',\n"," 'user_watch_cnt_all',\n"," 'user_watch_cnt_last_14',\n"," 'content_type',\n"," 'countries_max',\n"," 'for_kids',\n"," 'age_rating',\n"," 'studios_max',\n"," 'genres_max',\n"," 'genres_min',\n"," 'genres_med',\n"," 'release_novelty',\n"," 'watched_in_7_days',\n"," 'watch_ts_std',\n"," 'trend_slope',\n"," 'watch_ts_quantile_95_diff',\n"," 'watch_ts_median_diff',\n"," 'watched_in_all_time',\n"," 'male_watchers_fraction',\n"," 'female_watchers_fraction',\n"," 'younger_35_fraction',\n"," 'older_35_fraction']"]},"execution_count":39,"metadata":{},"output_type":"execute_result"}],"source":["cols"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":488},"executionInfo":{"elapsed":1394,"status":"ok","timestamp":1642188508132,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"i96FiqviQu42","outputId":"679f1b7a-7de9-46b7-a850-ddeeeedcf102"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_idimplicit_scoreageincomesexkids_flgboost_user_watch_cnt_allboost_user_watch_cnt_last_14content_typecountries_maxfor_kidsage_ratingstudios_maxgenres_maxgenres_mingenres_medrelease_noveltywatched_in_7_dayswatch_ts_stdtrend_slopewatch_ts_quantile_95_diffwatch_ts_median_diffwatched_in_all_timemale_watchers_fractionfemale_watchers_fractionyounger_35_fractionolder_35_fraction
0301992620.707107age_unknownincome_unknownsex_unknownFalse21NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1302031050.707107age_unknownincome_unknownsex_unknownFalse21NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2301998860.707107age_unknownincome_unknownsex_unknownFalse21NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3302199040.707107age_unknownincome_unknownsex_unknownFalse21NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4302032060.707107age_unknownincome_unknownsex_unknownFalse21NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
.......................................................................................
210914810975442637210.57735age_25_34income_20_40FTrue11NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
210914910975442271130.57735age_25_34income_20_40FTrue11NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
210915010975442398300.57735age_25_34income_20_40FTrue11NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
210915110975441390020.57735age_25_34income_20_40FTrue11NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
210915210975442431270.57735age_25_34income_20_40FTrue11NoneNoneNoneNoneNoneNoneNoneNoneNoneNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n","

2109153 rows × 28 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id ... younger_35_fraction older_35_fraction\n","0 30 199262 ... NaN NaN\n","1 30 203105 ... NaN NaN\n","2 30 199886 ... NaN NaN\n","3 30 219904 ... NaN NaN\n","4 30 203206 ... NaN NaN\n","... ... ... ... ... ...\n","2109148 1097544 263721 ... NaN NaN\n","2109149 1097544 227113 ... NaN NaN\n","2109150 1097544 239830 ... NaN NaN\n","2109151 1097544 139002 ... NaN NaN\n","2109152 1097544 243127 ... NaN NaN\n","\n","[2109153 rows x 28 columns]"]},"execution_count":40,"metadata":{},"output_type":"execute_result"}],"source":["# Renaming columns to match classifier feature names\n","cols = ['user_id', 'item_id']\n","cols.extend(boost_model.feature_names_)\n","cols = cols[:7] + ['boost_user_watch_cnt_all', 'boost_user_watch_cnt_last_14'] + cols[9:]\n","full_train = full_train[cols]\n","full_train_new_names = ['user_id', 'item_id'] + boost_model.feature_names_\n","full_train.columns = full_train_new_names\n","full_train"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":424},"executionInfo":{"elapsed":7830,"status":"ok","timestamp":1642188520391,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"PdLtTGROQ11b","outputId":"5e6dc810-1edb-4565-88af-1b9487ee9372"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_id
030[16986, 199262, 203105, 199886, 219904, 203206...
155[12232, 7634, 6489, 15987, 14556, 5573, 15058,...
2106[8821, 10700, 10497, 3399, 9154, 3629, 12189, ...
3144[79668, 85771, 79780, 100360, 87071, 80158, 14...
4155[10747, 2236, 67784, 78954, 139975, 137705, 22...
.........
222271097444[7300, 16181, 110702, 114582, 113097, 86716, 1...
222281097459[68578, 71663, 68642, 74552, 71682, 68811, 777...
222291097470[196242, 201115, 196364, 201461, 203105, 19904...
222301097508[207809, 210545, 208388, 212164, 213627, 21296...
222311097544[71485, 75317, 72714, 94880, 75852, 72851, 112...
\n","

22232 rows × 2 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id\n","0 30 [16986, 199262, 203105, 199886, 219904, 203206...\n","1 55 [12232, 7634, 6489, 15987, 14556, 5573, 15058,...\n","2 106 [8821, 10700, 10497, 3399, 9154, 3629, 12189, ...\n","3 144 [79668, 85771, 79780, 100360, 87071, 80158, 14...\n","4 155 [10747, 2236, 67784, 78954, 139975, 137705, 22...\n","... ... ...\n","22227 1097444 [7300, 16181, 110702, 114582, 113097, 86716, 1...\n","22228 1097459 [68578, 71663, 68642, 74552, 71682, 68811, 777...\n","22229 1097470 [196242, 201115, 196364, 201461, 203105, 19904...\n","22230 1097508 [207809, 210545, 208388, 212164, 213627, 21296...\n","22231 1097544 [71485, 75317, 72714, 94880, 75852, 72851, 112...\n","\n","[22232 rows x 2 columns]"]},"execution_count":41,"metadata":{},"output_type":"execute_result"}],"source":["# Making predictions for warm users\n","y_pred_all = boost_model.predict_proba(full_train.drop(\n"," ['user_id', 'item_id'], axis=1))\n","full_train['boost_pred'] = y_pred_all[:, 1]\n","full_train = full_train[['user_id', 'item_id', 'boost_pred']]\n","full_train = full_train.sort_values(by=['user_id', 'boost_pred'],\n"," ascending=[True, False])\n","full_train['rank'] = full_train.groupby('user_id').cumcount() + 1\n","full_train = full_train[full_train['rank'] <= 10].drop('boost_pred', axis=1)\n","full_train['item_id'] = full_train['item_id'].astype('int64')\n","boost_recs = full_train.groupby('user_id')['item_id'].apply(list)\n","boost_recs = pd.DataFrame(boost_recs)\n","boost_recs.reset_index(inplace=True)\n","boost_recs"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":4,"status":"ok","timestamp":1642188521208,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"2bCBSBe_57JM","outputId":"5328db55-be6d-42fd-c8a3-4857f13c1e32"},"outputs":[{"data":{"text/plain":["[20000, 728808, 622570, 133452, 10000, 341075]"]},"execution_count":42,"metadata":{},"output_type":"execute_result"}],"source":["# Making predictions for cold users with Popular Recommender\n","idx_for_popular = list(set(pd.Series(random_items).unique()).difference(\n"," set(boost_recs['user_id'].unique())))\n","idx_for_popular"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":424},"executionInfo":{"elapsed":6,"status":"ok","timestamp":1642188521956,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"yCctjmQT6AJV","outputId":"b2273d78-9891-4580-d238-1598366c3bb7"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_idlast_watch_dttotal_durwatched_pct
0917575103532021-03-131113158
1060275080155742021-03-1367011
105912051795502021-03-1332456100
10581504561152021-03-1322830100
105792904101352021-03-13370971
..................
54291448487091572021-08-2294356
542913842857322021-08-226570100
542912818134115052021-08-22600
542923314358141112021-08-22259035
54762475551756932021-08-22617488
\n","

547625 rows × 5 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id last_watch_dt total_dur watched_pct\n","0 917575 10353 2021-03-13 11131 58\n","1060 275080 15574 2021-03-13 670 11\n","1059 120517 9550 2021-03-13 32456 100\n","1058 15045 6115 2021-03-13 22830 100\n","1057 92904 10135 2021-03-13 3709 71\n","... ... ... ... ... ...\n","542914 484870 9157 2021-08-22 9435 6\n","542913 8428 5732 2021-08-22 6570 100\n","542912 818134 11505 2021-08-22 60 0\n","542923 314358 14111 2021-08-22 2590 35\n","547624 755517 5693 2021-08-22 6174 88\n","\n","[547625 rows x 5 columns]"]},"execution_count":43,"metadata":{},"output_type":"execute_result"}],"source":["interactions_df"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"tQKdXPRl6C5f"},"outputs":[],"source":["pop_model = PopularRecommender(days=30, dt_column='last_watch_dt',\n"," with_filter=True)\n","pop_model.fit(interactions_df)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":238},"executionInfo":{"elapsed":8734,"status":"ok","timestamp":1642188532800,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"ec2JpRUH6LZv","outputId":"66f7fa3a-78dc-4051-b1f4-59a7b374e4dd"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_id
410000[10440, 9728, 15297, 13865, 3734, 12192, 4151,...
020000[10440, 9728, 15297, 13865, 3734, 12192, 4151,...
1728808[10440, 9728, 15297, 13865, 12192, 4151, 11863...
2622570[10440, 9728, 15297, 13865, 12192, 4151, 11863...
3133452[10440, 9728, 15297, 13865, 3734, 12192, 4151,...
5341075[10440, 9728, 15297, 13865, 3734, 12192, 4151,...
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id\n","4 10000 [10440, 9728, 15297, 13865, 3734, 12192, 4151,...\n","0 20000 [10440, 9728, 15297, 13865, 3734, 12192, 4151,...\n","1 728808 [10440, 9728, 15297, 13865, 12192, 4151, 11863...\n","2 622570 [10440, 9728, 15297, 13865, 12192, 4151, 11863...\n","3 133452 [10440, 9728, 15297, 13865, 3734, 12192, 4151,...\n","5 341075 [10440, 9728, 15297, 13865, 3734, 12192, 4151,..."]},"execution_count":45,"metadata":{},"output_type":"execute_result"}],"source":["recs_popular = pop_model.recommend_with_filter(interactions_df, idx_for_popular, top_K=10)\n","recs_popular"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"KTbSaiIyRBgu"},"outputs":[],"source":["all_recs = pd.concat([boost_recs, recs_popular], axis=0)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"rLyH2YZaShKv"},"outputs":[],"source":["def fill_with_popular(recs, pop_model_fitted, interactions_df, top_K=10):\n"," \"\"\"\n"," Fills missing recommendations with Popular Recommender.\n"," Takes top_K first recommendations if length of recs exceeds top_K\n"," \"\"\"\n"," recs['len'] = recs['item_id'].apply(lambda x: len(x))\n"," recs_good = recs[recs['len'] >= top_K].copy()\n"," recs_good.loc[(recs_good['len'] > top_K), 'item_id'] = recs_good.loc[\n"," (recs_good['len'] > 10), 'item_id'].apply(lambda x: x[:10])\n"," recs_bad = recs[recs['len'] < top_K].copy()\n"," recs_bad['num_popular'] = top_K - recs_bad.len\n"," idx_for_filling = recs_bad['user_id'].unique()\n"," filling_recs = pop_model_fitted.recommend_with_filter(\n"," interactions_df, idx_for_filling, top_K=top_K)\n"," recs_bad = recs_bad.join(filling_recs.set_index('user_id'),\n"," on='user_id', how='left', rsuffix='1')\n"," recs_bad.loc[(recs_bad['len'] > 0), 'item_id'] = \\\n"," recs_bad.loc[(recs_bad['len'] > 0), 'item_id'] + \\\n"," recs_bad.loc[(recs_bad['len'] > 0), 'item_id1']\n"," recs_bad.loc[(recs_bad['len'] == 0), 'item_id'] = recs_bad.loc[\n"," (recs_bad['len'] == 0), 'item_id1']\n"," recs_bad['item_id'] = recs_bad['item_id'].apply(lambda x: x[:top_K])\n"," total_recs = pd.concat([recs_good[['user_id', 'item_id']],\n"," recs_bad[['user_id', 'item_id']]], axis=0)\n"," return total_recs"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":424},"executionInfo":{"elapsed":8980,"status":"ok","timestamp":1642188541766,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"397i5e_fmiHS","outputId":"eb66e6ee-3a57-451c-9c22-07a629ee8f4b"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_id
030[16986, 199262, 203105, 199886, 219904, 203206...
155[12232, 7634, 6489, 15987, 14556, 5573, 15058,...
2106[8821, 10700, 10497, 3399, 9154, 3629, 12189, ...
3144[79668, 85771, 79780, 100360, 87071, 80158, 14...
4155[10747, 2236, 67784, 78954, 139975, 137705, 22...
.........
220541087746[366, 4784, 33316, 63977, 10440, 9728, 15297, ...
221371092833[15355, 198132, 191636, 50599, 177761, 10440, ...
221591093784[296, 124311, 20002, 219743, 10440, 9728, 1529...
221601093836[1343, 11710, 3254, 1967, 3356, 5292, 70331, 2...
221711094683[15355, 198132, 191636, 50599, 177761, 10440, ...
\n","

22238 rows × 2 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id\n","0 30 [16986, 199262, 203105, 199886, 219904, 203206...\n","1 55 [12232, 7634, 6489, 15987, 14556, 5573, 15058,...\n","2 106 [8821, 10700, 10497, 3399, 9154, 3629, 12189, ...\n","3 144 [79668, 85771, 79780, 100360, 87071, 80158, 14...\n","4 155 [10747, 2236, 67784, 78954, 139975, 137705, 22...\n","... ... ...\n","22054 1087746 [366, 4784, 33316, 63977, 10440, 9728, 15297, ...\n","22137 1092833 [15355, 198132, 191636, 50599, 177761, 10440, ...\n","22159 1093784 [296, 124311, 20002, 219743, 10440, 9728, 1529...\n","22160 1093836 [1343, 11710, 3254, 1967, 3356, 5292, 70331, 2...\n","22171 1094683 [15355, 198132, 191636, 50599, 177761, 10440, ...\n","\n","[22238 rows x 2 columns]"]},"execution_count":48,"metadata":{},"output_type":"execute_result"}],"source":["# Filling short recommendations woth popular items\n","all_recs = fill_with_popular(all_recs, pop_model, interactions_df)\n","all_recs"]},{"cell_type":"markdown","metadata":{"id":"unhZ55xCzSII"},"source":["## Baseline\n","\n","Popularity based model\n","\n","Ref: [Official baseline tutorial](https://github.com/recohut/notebooks/blob/main/extras/mts_baseline.ipynb)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"LT8NuO96TICh"},"outputs":[],"source":["def calculate_novelty(train_interactions, recommendations, top_n): \n"," users = recommendations['user_id'].unique()\n"," n_users = train_interactions['user_id'].nunique()\n"," n_users_per_item = train_interactions.groupby('item_id')['user_id'].nunique()\n","\n"," recommendations = recommendations.loc[recommendations['rank'] <= top_n].copy()\n"," recommendations['n_users_per_item'] = recommendations['item_id'].map(n_users_per_item)\n"," recommendations['n_users_per_item'] = recommendations['n_users_per_item'].fillna(1)\n"," recommendations['item_novelty'] = -np.log2(recommendations['n_users_per_item'] / n_users)\n","\n"," item_novelties = recommendations[['user_id', 'rank', 'item_novelty']]\n"," \n"," miuf_at_k = item_novelties.loc[item_novelties['rank'] <= top_n, ['user_id', 'item_novelty']]\n"," miuf_at_k = miuf_at_k.groupby('user_id').agg('mean').squeeze()\n","\n"," return miuf_at_k.reindex(users).mean()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"MujfY8TjTICi"},"outputs":[],"source":["def compute_metrics(train, test, recs, top_N):\n"," result = {}\n"," test_recs = test.set_index(['user_id', 'item_id']).join(recs.set_index(['user_id', 'item_id']))\n"," test_recs = test_recs.sort_values(by=['user_id', 'rank'])\n","\n"," test_recs['users_item_count'] = test_recs.groupby(level='user_id')['rank'].transform(np.size)\n"," test_recs['reciprocal_rank'] = (1 / test_recs['rank']).fillna(0)\n"," test_recs['cumulative_rank'] = test_recs.groupby(level='user_id').cumcount() + 1\n"," test_recs['cumulative_rank'] = test_recs['cumulative_rank'] / test_recs['rank']\n"," \n"," users_count = test_recs.index.get_level_values('user_id').nunique()\n","\n"," for k in range(1, top_N + 1):\n"," hit_k = f'hit@{k}'\n"," test_recs[hit_k] = test_recs['rank'] <= k\n"," result[f'Precision@{k}'] = (test_recs[hit_k] / k).sum() / users_count\n"," result[f'Recall@{k}'] = (test_recs[hit_k] / test_recs['users_item_count']).sum() / users_count\n"," \n"," result[f'MAP@{top_N}'] = (test_recs['cumulative_rank'] / test_recs['users_item_count']).sum() / users_count\n"," result[f'Novelty@{top_N}'] = calculate_novelty(train, recs, top_N)\n"," \n"," return pd.Series(result)"]},{"cell_type":"markdown","metadata":{"id":"P28xd48xTICz"},"source":["### Example on one fold"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"uJVqVnskTIC0"},"outputs":[],"source":["test = interactions_df[interactions_df['last_watch_dt'] == interactions_df['last_watch_dt'].max()]\n","train = interactions_df[interactions_df['last_watch_dt'] < interactions_df['last_watch_dt'].max()]"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"yWAZei5ETIC1"},"outputs":[],"source":["pop_model = PopularRecommender(days=7, dt_column='last_watch_dt')\n","pop_model.fit(train)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":14,"status":"ok","timestamp":1642188574386,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"t9hL7kFfTIC2","outputId":"fbf7c514-37eb-4f69-8510-b71704236b99"},"outputs":[{"data":{"text/plain":["array([ 9728, 15297, 10440, 13865, 12360, 14488, 12192, 512, 341,\n"," 3734])"]},"execution_count":54,"metadata":{},"output_type":"execute_result"}],"source":["top10_recs = pop_model.recommend()\n","top10_recs"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"TKmJ8SydTIC3"},"outputs":[],"source":["item_titles = pd.Series(items_df['title'].values, index=items_df['item_id']).to_dict()"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":15,"status":"ok","timestamp":1642188574389,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"3n6RAbh6TIC4","outputId":"6b113f1b-2cbc-4d0f-9c81-bf20fcb16787"},"outputs":[{"data":{"text/plain":["['гнев человеческий',\n"," 'клиника счастья',\n"," 'хрустальный',\n"," 'девятаев',\n"," 'круэлла',\n"," 'мастер меча',\n"," 'фемида видит',\n"," 'рядовой чээрин',\n"," 'лето - это море',\n"," 'прабабушка легкого поведения']"]},"execution_count":56,"metadata":{},"output_type":"execute_result"}],"source":["list(map(item_titles.get, top10_recs))"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"elapsed":11,"status":"ok","timestamp":1642188574390,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"lVk4_qpvTIC4","outputId":"87000191-3012-4a90-e132-7969e06fa538"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_id
0936370[9728, 15297, 10440, 13865, 12360, 14488, 1219...
1279776[9728, 15297, 10440, 13865, 12360, 14488, 1219...
2321739[9728, 15297, 10440, 13865, 12360, 14488, 1219...
398693[9728, 15297, 10440, 13865, 12360, 14488, 1219...
4267998[9728, 15297, 10440, 13865, 12360, 14488, 1219...
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id\n","0 936370 [9728, 15297, 10440, 13865, 12360, 14488, 1219...\n","1 279776 [9728, 15297, 10440, 13865, 12360, 14488, 1219...\n","2 321739 [9728, 15297, 10440, 13865, 12360, 14488, 1219...\n","3 98693 [9728, 15297, 10440, 13865, 12360, 14488, 1219...\n","4 267998 [9728, 15297, 10440, 13865, 12360, 14488, 1219..."]},"execution_count":57,"metadata":{},"output_type":"execute_result"}],"source":["recs = pd.DataFrame({'user_id': test['user_id'].unique()})\n","top_N = 10\n","recs['item_id'] = pop_model.recommend(recs['user_id'], N=top_N)\n","recs.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"UPjTuarETIC5"},"outputs":[],"source":["recs = recs.explode('item_id')"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":426},"executionInfo":{"elapsed":9,"status":"ok","timestamp":1642188576837,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"XqjGRg15TIC6","outputId":"25c15140-b328-4cb9-d875-8f6733f89229"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_idrank
093637097281
0936370152972
0936370104403
0936370138654
0936370123605
0936370144886
0936370121927
09363705128
09363703419
0936370373410
127977697281
1279776152972
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id rank\n","0 936370 9728 1\n","0 936370 15297 2\n","0 936370 10440 3\n","0 936370 13865 4\n","0 936370 12360 5\n","0 936370 14488 6\n","0 936370 12192 7\n","0 936370 512 8\n","0 936370 341 9\n","0 936370 3734 10\n","1 279776 9728 1\n","1 279776 15297 2"]},"execution_count":59,"metadata":{},"output_type":"execute_result"}],"source":["recs['rank'] = recs.groupby('user_id').cumcount() + 1\n","recs.head(top_N + 2)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2593,"status":"ok","timestamp":1642188579423,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"Yp74HHqnTIC6","outputId":"5a94f5ac-7cdd-4bc4-a50e-3ce1d97bdc7a"},"outputs":[{"data":{"text/plain":["Precision@1 0.034862\n","Recall@1 0.033231\n","Precision@2 0.033945\n","Recall@2 0.065418\n","Precision@3 0.032875\n","Recall@3 0.095387\n","Precision@4 0.029128\n","Recall@4 0.112564\n","Precision@5 0.023425\n","Recall@5 0.113175\n","Precision@6 0.022273\n","Recall@6 0.128721\n","Precision@7 0.021669\n","Recall@7 0.145846\n","Precision@8 0.019897\n","Recall@8 0.152727\n","Precision@9 0.018926\n","Recall@9 0.163532\n","Precision@10 0.018211\n","Recall@10 0.174618\n","MAP@10 0.071974\n","Novelty@10 6.242784\n","dtype: float64"]},"execution_count":60,"metadata":{},"output_type":"execute_result"}],"source":["compute_metrics(train, test, recs, 10)"]},{"cell_type":"markdown","metadata":{"id":"_3I9v8q7UlYk"},"source":["### Folder validation\n","\n","Let's take the last 3 weeks from our data and test them sequentially (1 test fold - 1 week). Don't forget about the cold start problem."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":688,"status":"ok","timestamp":1642188582610,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"8VkbYiXhTIC8","outputId":"19421d57-7726-4866-e826-ec8a7298d36b"},"outputs":[{"data":{"text/plain":["(Timestamp('2021-08-01 00:00:00'), Timestamp('2021-08-22 00:00:00'))"]},"execution_count":61,"metadata":{},"output_type":"execute_result"}],"source":["last_date = interactions_df['last_watch_dt'].max().normalize()\n","folds = 3\n","start_date = last_date - pd.Timedelta(days=folds*7)\n","start_date, last_date"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":6,"status":"ok","timestamp":1642188583154,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"1ByOzE3HTIC9","outputId":"573e4e37-8d82-4e81-fe1c-986cf0e7a466"},"outputs":[{"data":{"text/plain":["(3, 3)"]},"execution_count":62,"metadata":{},"output_type":"execute_result"}],"source":["cv = TimeRangeSplit(start_date=start_date, periods=folds+1, freq='W')\n","\n","cv.max_n_splits, cv.get_n_splits(interactions_df, datetime_column='last_watch_dt')"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1642188585451,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"cwvzIFYcTIC9","outputId":"9edcb93e-acb9-406b-9e10-4e019c598275"},"outputs":[{"data":{"text/plain":["DatetimeIndex(['2021-08-01', '2021-08-08', '2021-08-15', '2021-08-22'], dtype='datetime64[ns]', freq='W-SUN')"]},"execution_count":63,"metadata":{},"output_type":"execute_result"}],"source":["cv.date_range"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2877,"status":"ok","timestamp":1642188588772,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"6-Ridv5hTIC-","outputId":"b3ad2cd1-5a25-4a09-8c61-0bab9774fe19"},"outputs":[{"name":"stdout","output_type":"stream","text":["Already seen number: 0\n","Already seen number: 0\n","Already seen number: 0\n"]}],"source":["folds_with_stats = list(cv.split(\n"," interactions_df, \n"," user_column='user_id',\n"," item_column='item_id',\n"," datetime_column='last_watch_dt',\n"," fold_stats=True\n","))\n","\n","folds_info_with_stats = pd.DataFrame([info for _, _, info in folds_with_stats])"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":257},"executionInfo":{"elapsed":13,"status":"ok","timestamp":1642188588773,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"pPHoISGQTIC_","outputId":"e3595ffb-eaee-4cc4-ad09-e400a4f71543"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Start dateEnd dateTrainNew usersNew users interactionsNew itemsNew items interactionsKnown interactionsTest
02021-08-012021-08-084209151936022608166907014717
12021-08-082021-08-154591471961522955136609015979
22021-08-152021-08-22498690205012403299476017371
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" Start date End date ... Known interactions Test\n","0 2021-08-01 2021-08-08 ... 0 14717\n","1 2021-08-08 2021-08-15 ... 0 15979\n","2 2021-08-15 2021-08-22 ... 0 17371\n","\n","[3 rows x 9 columns]"]},"execution_count":65,"metadata":{},"output_type":"execute_result"}],"source":["folds_info_with_stats"]},{"cell_type":"markdown","metadata":{"id":"oMuGmqVBTIC_"},"source":["### Popular on folds"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Q1bXpzaMTIC_"},"outputs":[],"source":["top_N = 10\n","last_n_days = 7"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Y9guCzRqTIDA","scrolled":true},"outputs":[],"source":["final_results = []\n","validation_results = pd.DataFrame()\n","\n","for train_idx, test_idx, info in folds_with_stats:\n"," train = interactions_df.loc[train_idx]\n"," test = interactions_df.loc[test_idx]\n"," \n"," pop_model = PopularRecommender(days=last_n_days, dt_column='last_watch_dt')\n"," pop_model.fit(train)\n","\n"," recs = pd.DataFrame({'user_id': test['user_id'].unique()})\n"," recs['item_id'] = pop_model.recommend(recs['user_id'], N=top_N)\n"," recs = recs.explode('item_id')\n"," recs['rank'] = recs.groupby('user_id').cumcount() + 1\n","\n"," fold_result = compute_metrics(train, test, recs, top_N)\n","\n"," validation_results = validation_results.append(fold_result, ignore_index=True)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":30,"status":"ok","timestamp":1642188603077,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"cyFK7eHhTIDA","outputId":"fc4c35a5-3c20-400f-c48d-4690dee2f79b"},"outputs":[{"data":{"text/plain":["MAP@10 0.039814\n","Novelty@10 5.778481\n","dtype: float64"]},"execution_count":68,"metadata":{},"output_type":"execute_result"}],"source":["validation_results.agg({'MAP@10':'mean', 'Novelty@10':'mean'})"]},{"cell_type":"markdown","metadata":{"id":"hcFFZpFATIDA"},"source":["### Popular Prediction\n","\n","Let's see if it makes sense to predict the popular depending on the social group"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"sfqxhgZuTIDB"},"outputs":[],"source":["train_idx, test_idx, info = folds_with_stats[0]\n","train = interactions_df.loc[train_idx]\n","test = interactions_df.loc[test_idx]\n","date_window_for_popular = train['last_watch_dt'].max() - pd.DateOffset(days=last_n_days)\n","train_slice = pd.merge(train[train['last_watch_dt'] >= date_window_for_popular], users_df, on='user_id', how='left')"]},{"cell_type":"markdown","metadata":{"id":"ydpUgqh6TIDB"},"source":["we have users without features, so we need to define padding for them"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":270},"executionInfo":{"elapsed":27,"status":"ok","timestamp":1642188603078,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"tH4n5EAnTIDC","outputId":"e8d1f2bd-23ef-4b83-b0bb-b91a08f4eac5"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_idlast_watch_dttotal_durwatched_pctageincomesexkids_flgboost_user_watch_cnt_allboost_user_watch_cnt_last_14user_watch_cnt_alluser_watch_cnt_last_14
068987164042021-07-2490516age_45_54income_20_40MFalse1.00.01.00.0
148271826242021-07-24189825age_18_24income_40_60FFalse1.00.04.03.0
2183195112392021-07-24103714age_35_44income_20_40FTrue5.00.05.00.0
3107753444572021-07-241512age_25_34income_20_40MFalse0.00.00.00.0
4274241162282021-07-241930618age_65_infincome_20_40FFalse4.00.04.00.0
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id ... user_watch_cnt_all user_watch_cnt_last_14\n","0 689871 6404 ... 1.0 0.0\n","1 482718 2624 ... 4.0 3.0\n","2 183195 11239 ... 5.0 0.0\n","3 1077534 4457 ... 0.0 0.0\n","4 274241 16228 ... 4.0 0.0\n","\n","[5 rows x 13 columns]"]},"execution_count":70,"metadata":{},"output_type":"execute_result"}],"source":["train_slice.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"b8VbywpUTIDC"},"outputs":[],"source":["train_slice.fillna({'age':'age_unknown',\n"," 'sex':'sex_unknown',\n"," 'income': 'income_unknown',\n"," 'kids_flg': False\n"," }, inplace=True)"]},{"cell_type":"markdown","metadata":{"id":"X8edftA_TIDD"},"source":["For example, you can watch popular by age, gender and presence of children"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":270},"executionInfo":{"elapsed":8,"status":"ok","timestamp":1642188605384,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"D-q6NC2ZTIDD","outputId":"57c555d5-544d-4e78-f25b-e13d5b58436f"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_idlast_watch_dttotal_durwatched_pctageincomesexkids_flgboost_user_watch_cnt_allboost_user_watch_cnt_last_14user_watch_cnt_alluser_watch_cnt_last_14
068987164042021-07-2490516age_45_54income_20_40MFalse1.00.01.00.0
148271826242021-07-24189825age_18_24income_40_60FFalse1.00.04.03.0
2183195112392021-07-24103714age_35_44income_20_40FTrue5.00.05.00.0
3107753444572021-07-241512age_25_34income_20_40MFalse0.00.00.00.0
4274241162282021-07-241930618age_65_infincome_20_40FFalse4.00.04.00.0
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id ... user_watch_cnt_all user_watch_cnt_last_14\n","0 689871 6404 ... 1.0 0.0\n","1 482718 2624 ... 4.0 3.0\n","2 183195 11239 ... 5.0 0.0\n","3 1077534 4457 ... 0.0 0.0\n","4 274241 16228 ... 4.0 0.0\n","\n","[5 rows x 13 columns]"]},"execution_count":72,"metadata":{},"output_type":"execute_result"}],"source":["train_slice.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"zFTxZZNBTIDD"},"outputs":[],"source":["soc_dem_recommendations = train_slice.groupby(\n"," ['age', 'sex', 'income', 'item_id']\n",").size().to_frame().reset_index()"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":424},"executionInfo":{"elapsed":8,"status":"ok","timestamp":1642188606950,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"66aG48_cTIDD","outputId":"d8da5e9a-c3c5-4b4c-fbaf-c971ec676690"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
agesexincomeitem_id0
0age_18_24Fincome_0_20141
1age_18_24Fincome_0_201111
2age_18_24Fincome_0_201621
3age_18_24Fincome_0_202881
4age_18_24Fincome_0_203341
..................
18651age_unknownsex_unknownincome_unknown164881
18652age_unknownsex_unknownincome_unknown164981
18653age_unknownsex_unknownincome_unknown164993
18654age_unknownsex_unknownincome_unknown1650921
18655age_unknownsex_unknownincome_unknown165161
\n","

18656 rows × 5 columns

\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" age sex income item_id 0\n","0 age_18_24 F income_0_20 14 1\n","1 age_18_24 F income_0_20 111 1\n","2 age_18_24 F income_0_20 162 1\n","3 age_18_24 F income_0_20 288 1\n","4 age_18_24 F income_0_20 334 1\n","... ... ... ... ... ..\n","18651 age_unknown sex_unknown income_unknown 16488 1\n","18652 age_unknown sex_unknown income_unknown 16498 1\n","18653 age_unknown sex_unknown income_unknown 16499 3\n","18654 age_unknown sex_unknown income_unknown 16509 21\n","18655 age_unknown sex_unknown income_unknown 16516 1\n","\n","[18656 rows x 5 columns]"]},"execution_count":74,"metadata":{},"output_type":"execute_result"}],"source":["soc_dem_recommendations"]},{"cell_type":"markdown","metadata":{"id":"MMRvQGlxTIDE"},"source":["Now you just need to select for each user the most popular top_n objects in his group"]},{"cell_type":"markdown","metadata":{"id":"qxY3Q_uETIDE"},"source":["We can check this option on folds\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"-Nm6HPYhTIDF"},"outputs":[],"source":["validation_results = pd.DataFrame()\n","\n","for train_idx, test_idx, info in folds_with_stats:\n"," train = interactions_df.loc[train_idx]\n"," test = interactions_df.loc[test_idx]\n"," date_window = train['last_watch_dt'].max() - pd.DateOffset(days=last_n_days)\n"," train_slice = pd.merge(train[train['last_watch_dt'] >= date_window], users_df, on='user_id', how='left')\n"," \n"," train_slice.fillna({\n"," 'age':'age_unknown',\n"," 'sex':'sex_unknown',\n"," 'income': 'income_unknown',\n"," 'kids_flg': False\n"," },inplace=True)\n"," \n"," soc_dem_recommendations = train_slice.groupby(\n"," ['age', 'sex', 'income', 'item_id']\n"," ).size().to_frame().reset_index()\n"," \n"," top_soc_dem = []\n","\n"," for age in soc_dem_recommendations.age.unique():\n"," for income in soc_dem_recommendations.income.unique():\n"," for sex in soc_dem_recommendations.sex.unique():\n"," top_items = soc_dem_recommendations[\n"," (soc_dem_recommendations.age == age)\n"," & (soc_dem_recommendations.income == income)\n"," & (soc_dem_recommendations.sex == sex)].sort_values(0, ascending=False).head(10).item_id.values\n"," top_soc_dem.append([age, income, sex, top_items])\n","\n"," top_soc_dem = pd.DataFrame(top_soc_dem, columns = ['age', 'income', 'sex', 'item_id'])\n"," \n"," recs = pd.DataFrame({'user_id': test['user_id'].unique()})\n"," recs = pd.merge(recs[['user_id']], users_df, on='user_id', how='left')\n"," recs.fillna({\n"," 'age':'age_unknown',\n"," 'sex':'sex_unknown',\n"," 'income': 'income_unknown',\n"," 'kids_flg': False\n"," }, inplace=True)\n"," \n"," recs = pd.merge(recs, top_soc_dem, on = ['age', 'sex', 'income'], how = 'left')\n"," recs = recs.drop(columns = ['age', 'sex', 'income'])\n"," \n"," recs = recs.explode('item_id')\n"," recs['rank'] = recs.groupby('user_id').cumcount() + 1\n"," fold_result = compute_metrics(train, test, recs, top_N)\n"," \n"," validation_results = validation_results.append(fold_result, ignore_index=True)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":17,"status":"ok","timestamp":1642188624221,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"k81s04vuTIDF","outputId":"b6d5d848-4ce9-4b88-aa4f-294094d0396c"},"outputs":[{"data":{"text/plain":["MAP@10 0.040677\n","Novelty@10 6.050588\n","dtype: float64"]},"execution_count":76,"metadata":{},"output_type":"execute_result"}],"source":["validation_results.agg({'MAP@10':'mean', 'Novelty@10':'mean'})"]},{"cell_type":"markdown","metadata":{"id":"8vbHhlUiTIDG"},"source":["In this case, the features by which you build the popular are selected, as well as the number of days that you take to calculate the popular"]},{"cell_type":"markdown","metadata":{"id":"OPv2gBKETIDG"},"source":["### Tfidf"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"pitmMjB2TIDH"},"outputs":[],"source":["users_inv_mapping = dict(enumerate(interactions_df['user_id'].unique()))\n","users_mapping = {v: k for k, v in users_inv_mapping.items()}\n","\n","items_inv_mapping = dict(enumerate(interactions_df['item_id'].unique()))\n","items_mapping = {v: k for k, v in items_inv_mapping.items()}"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"MQtorexITIDH"},"outputs":[],"source":["validation_results = pd.DataFrame()\n","\n","for train_idx, test_idx, info in folds_with_stats:\n"," train = interactions_df.loc[train_idx]\n","\n"," date_window = train['last_watch_dt'].max() - pd.DateOffset(days=60)\n"," train = train[train['last_watch_dt'] >= date_window]\n","\n"," test = interactions_df.loc[test_idx]\n","\n"," train_mat = get_coo_matrix(\n"," train,\n"," users_mapping=users_mapping,\n"," items_mapping=items_mapping,\n"," ).tocsr()\n","\n"," model = TFIDFRecommender(K=top_N)\n"," model.fit(train_mat.T, show_progress=False) \n","\n"," mapper = generate_implicit_recs_mapper( \n"," model,\n"," train_mat,\n"," top_N,\n"," users_mapping,\n"," items_inv_mapping,\n"," filter_already_liked_items=True\n"," )\n","\n"," recs = pd.DataFrame({'user_id': test['user_id'].unique()})\n"," recs['item_id'] = recs['user_id'].map(mapper)\n"," recs = recs.explode('item_id')\n"," recs['rank'] = recs.groupby('user_id').cumcount() + 1\n"," fold_result = compute_metrics(train, test, recs, top_N)\n","\n"," validation_results = validation_results.append(fold_result, ignore_index=True)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":27,"status":"ok","timestamp":1642188699563,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"tm0_BSCLTIDI","outputId":"2aa86238-a2d5-4f3a-9190-ea77c87dc56b"},"outputs":[{"data":{"text/plain":["MAP@10 0.698575\n","Novelty@10 17.440547\n","dtype: float64"]},"execution_count":81,"metadata":{},"output_type":"execute_result"}],"source":["validation_results.agg({'MAP@10':'mean', 'Novelty@10':'mean',})"]},{"cell_type":"markdown","metadata":{"id":"dzI0rVytTIDI"},"source":["Simply using the code above for submission won't work due to cold users. We'll have to figure out how to process them."]},{"cell_type":"markdown","metadata":{"id":"4d54eqKGTIDI"},"source":["### Predictions"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"OOswWzXxWxGK"},"outputs":[],"source":["random_items = list(np.random.choice(interactions_df['user_id'], size=5, replace=False))\n","cold_items = [10000, 20000]\n","random_items.extend(cold_items)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":25,"status":"ok","timestamp":1642188699565,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"SAbO_8EFXAgY","outputId":"11260a68-a229-48e2-a724-5e5d52019917"},"outputs":[{"data":{"text/plain":["[754950, 758416, 83485, 636568, 669127, 10000, 20000]"]},"execution_count":83,"metadata":{},"output_type":"execute_result"}],"source":["random_items"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"bZd04bj0TIDJ"},"outputs":[],"source":["train = interactions_df\n","test = random_items\n","\n","pop_model = PopularRecommender(days=last_n_days, dt_column='last_watch_dt')\n","pop_model.fit(train)\n","\n","recs = pd.DataFrame({'user_id': pd.Series(test).unique()})\n","recs['item_id'] = pop_model.recommend(recs['user_id'], N=top_N)\n","recs = recs.explode('item_id')\n","recs['rank'] = recs.groupby('user_id').cumcount() + 1\n","recs = recs.groupby('user_id').agg({'item_id': list}).reset_index()"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"elapsed":23,"status":"ok","timestamp":1642188699568,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"hmb6rWr2TIDJ","outputId":"3a1a7d85-1d2a-4be6-993a-c02b902e18f1"},"outputs":[{"data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
user_iditem_id
010000[9728, 15297, 10440, 14488, 13865, 12192, 341,...
120000[9728, 15297, 10440, 14488, 13865, 12192, 341,...
283485[9728, 15297, 10440, 14488, 13865, 12192, 341,...
3636568[9728, 15297, 10440, 14488, 13865, 12192, 341,...
4669127[9728, 15297, 10440, 14488, 13865, 12192, 341,...
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" user_id item_id\n","0 10000 [9728, 15297, 10440, 14488, 13865, 12192, 341,...\n","1 20000 [9728, 15297, 10440, 14488, 13865, 12192, 341,...\n","2 83485 [9728, 15297, 10440, 14488, 13865, 12192, 341,...\n","3 636568 [9728, 15297, 10440, 14488, 13865, 12192, 341,...\n","4 669127 [9728, 15297, 10440, 14488, 13865, 12192, 341,..."]},"execution_count":85,"metadata":{},"output_type":"execute_result"}],"source":["recs.head()"]},{"cell_type":"markdown","metadata":{"id":"YT7-dpYKEqub"},"source":["---"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":714,"status":"ok","timestamp":1642188912940,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"},"user_tz":-330},"id":"SwzKjSnTFmfa","outputId":"490c855f-1a59-440e-8529-0690e102aa2c"},"outputs":[{"name":"stdout","output_type":"stream","text":["numpy 1.19.5\n","pandas 1.1.5\n","Sparsh A. \n","last updated: 2022-01-14 19:35:09 \n","\n","implicit 0.4.8\n","catboost 1.0.4\n","recohut 0.0.11\n","\n","compiler : GCC 7.5.0\n","system : Linux\n","release : 5.4.144+\n","machine : x86_64\n","processor : x86_64\n","CPU cores : 2\n","interpreter: 64bit\n"]}],"source":["!pip install -q watermark\n","%reload_ext watermark\n","%watermark -a \"Sparsh A.\" -m -iv -u -t -d -p implicit,catboost,recohut"]},{"cell_type":"markdown","metadata":{"id":"VaKjWG8IEquj"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"c1vxSboeEquj"},"source":["**END**"]}],"metadata":{"colab":{"authorship_tag":"ABX9TyNLNtjY3nwpcDeDocgy8rzQ","collapsed_sections":["1KypvcFZI64_"],"mount_file_id":"1FEZmnoLGIsTsGiK2gi1TsIHLAaWCXF_a","name":"itempop and two-stage recommender on mts data","provenance":[{"file_id":"1FEZmnoLGIsTsGiK2gi1TsIHLAaWCXF_a","timestamp":1642188758676}],"toc_visible":true},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"1150ae9b77d04ee089151cdf9b3c97fd":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_cd203aa954364d05a2d9197f04bac18a","IPY_MODEL_7d5a993575214d4189c013bb12fc9080","IPY_MODEL_552747c596b440929459610765a70c67"],"layout":"IPY_MODEL_6404fcf9360b4c8bafac3d0c62dc7d58"}},"163ead0dd46c434ea3412e462a0938db":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"1cfcd7deb21741cc95481b1ece102ced":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"552747c596b440929459610765a70c67":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1cfcd7deb21741cc95481b1ece102ced","placeholder":"​","style":"IPY_MODEL_6773a61987794d45bf453ac8a8f78a34","value":" 266854/266854 [00:23<00:00, 14342.15it/s]"}},"6404fcf9360b4c8bafac3d0c62dc7d58":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6773a61987794d45bf453ac8a8f78a34":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6b9c40da36c746169708e1251c893b47":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7d5a993575214d4189c013bb12fc9080":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_b6183320801a4148b75f6b36b65a9b13","max":266854,"min":0,"orientation":"horizontal","style":"IPY_MODEL_163ead0dd46c434ea3412e462a0938db","value":266854}},"b6183320801a4148b75f6b36b65a9b13":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cd203aa954364d05a2d9197f04bac18a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e6fb3757a3db480aa1be1f0a91e19f4d","placeholder":"​","style":"IPY_MODEL_6b9c40da36c746169708e1251c893b47","value":"100%"}},"e6fb3757a3db480aa1be1f0a91e19f4d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}}}}},"nbformat":4,"nbformat_minor":0}