{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "            <div>\n",
       "                <p><b>SparkSession - hive</b></p>\n",
       "                \n",
       "        <div>\n",
       "            <p><b>SparkContext</b></p>\n",
       "\n",
       "            <p><a href=\"http://localhost:4041\">Spark UI</a></p>\n",
       "\n",
       "            <dl>\n",
       "              <dt>Version</dt>\n",
       "                <dd><code>v3.0.0</code></dd>\n",
       "              <dt>Master</dt>\n",
       "                <dd><code>local[*]</code></dd>\n",
       "              <dt>AppName</dt>\n",
       "                <dd><code>pyspark-shell</code></dd>\n",
       "            </dl>\n",
       "        </div>\n",
       "        \n",
       "            </div>\n",
       "        "
      ],
      "text/plain": [
       "<pyspark.sql.session.SparkSession at 0x119483ed0>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from replay.session_handler import State\n",
    "\n",
    "spark = State().session\n",
    "spark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>item_id</th>\n",
       "      <th>relevance</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1193</td>\n",
       "      <td>5</td>\n",
       "      <td>978300760</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>661</td>\n",
       "      <td>3</td>\n",
       "      <td>978302109</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>914</td>\n",
       "      <td>3</td>\n",
       "      <td>978301968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>3408</td>\n",
       "      <td>4</td>\n",
       "      <td>978300275</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>2355</td>\n",
       "      <td>5</td>\n",
       "      <td>978824291</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user_id  item_id  relevance  timestamp\n",
       "0        1     1193          5  978300760\n",
       "1        1      661          3  978302109\n",
       "2        1      914          3  978301968\n",
       "3        1     3408          4  978300275\n",
       "4        1     2355          5  978824291"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv(\"data/ml1m_ratings.dat\", sep=\"\\t\", names=[\"user_id\", \"item_id\", \"relevance\", \"timestamp\"])\n",
    "items = pd.read_csv(\"data/ml1m_items.dat\", sep=\"\\t\", names=[\"item_id\", \"titile\", \"genres\"])\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from replay.data_preparator import DataPreparator\n",
    "\n",
    "log = DataPreparator().transform(\n",
    "    data=df,\n",
    "    columns_names={\n",
    "        \"user_id\": \"user_id\",\n",
    "        \"item_id\": \"item_id\",\n",
    "        \"relevance\": \"relevance\",\n",
    "        \"timestamp\": \"timestamp\"\n",
    "    }\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.preprocessing import MultiLabelBinarizer,LabelBinarizer\n",
    "\n",
    "mlb = MultiLabelBinarizer()\n",
    "lb = LabelBinarizer()\n",
    "item_features = pd.DataFrame(mlb.fit_transform(items.genres.apply(lambda x: x.split(\"|\"))),\n",
    "                   columns=list(map(lambda x: f\"genre_{x}\",mlb.classes_)),\n",
    "                   index=items.item_id).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "item_features_spark = DataPreparator().transform(\n",
    "    data=item_features,\n",
    "    columns_names={\n",
    "        \"item_id\": \"item_id\"\n",
    "    }\n",
    ").drop(\"timestamp\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from replay.splitters import UserSplitter\n",
    "\n",
    "second_stage_splitter = UserSplitter(\n",
    "    drop_cold_items=True,\n",
    "    drop_cold_users=True,\n",
    "    item_test_size=10,\n",
    "    seed=1234,\n",
    "    shuffle=True\n",
    ")\n",
    "\n",
    "first_stage_splitter = UserSplitter(\n",
    "    drop_cold_items=False, item_test_size=0.5, shuffle=True, seed=42\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/darel/python/sponge-bob-magic/.new_env2/lib/python3.7/site-packages/lightfm/_lightfm_fast.py:9: UserWarning: LightFM was compiled without OpenMP support. Only a single thread will be used.\n",
      "  warnings.warn('LightFM was compiled without OpenMP support. '\n"
     ]
    }
   ],
   "source": [
    "from replay.models import ALSWrap\n",
    "# при 98 все падает с Java heap space error\n",
    "first_model = ALSWrap(rank=40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from replay.models import ClassifierRec\n",
    "from pyspark.ml.classification import RandomForestClassifier\n",
    "second_model = ClassifierRec(RandomForestClassifier(seed=47), use_recs_value=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Двухуровневый сценарий со статистическими фичами"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from replay.scenarios import TwoStagesScenario\n",
    "from replay.metrics import NDCG, HitRate, Precision, Recall, RocAuc\n",
    "\n",
    "two_stages_with_stat = TwoStagesScenario(\n",
    "    second_stage_splitter=second_stage_splitter,\n",
    "    second_model=second_model,\n",
    "    first_model=first_model,\n",
    "    metrics={NDCG(): [1, 5, 10], HitRate(): [1, 5, 10]},\n",
    "    stat_features=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "02-Mar-21 18:18:31, replay, DEBUG: mixed_train stat: total lines: 939809, total users: 6040, total items: 3699\n",
      "DEBUG:replay:mixed_train stat: total lines: 939809, total users: 6040, total items: 3699\n",
      "02-Mar-21 18:18:34, replay, DEBUG: test stat: total lines: 60393, total users: 6040, total items: 3051\n",
      "DEBUG:replay:test stat: total lines: 60393, total users: 6040, total items: 3051\n",
      "02-Mar-21 18:18:37, replay, DEBUG: first_train stat: total lines: 471386, total users: 6040, total items: 3604\n",
      "DEBUG:replay:first_train stat: total lines: 471386, total users: 6040, total items: 3604\n",
      "02-Mar-21 18:18:39, replay, DEBUG: first_test stat: total lines: 468423, total users: 6040, total items: 3611\n",
      "DEBUG:replay:first_test stat: total lines: 468423, total users: 6040, total items: 3611\n",
      "02-Mar-21 18:18:39, replay, DEBUG: Начало обучения ALSWrap\n",
      "DEBUG:replay:Начало обучения ALSWrap\n",
      "02-Mar-21 18:18:39, replay, DEBUG: Предварительная стадия обучения (pre-fit)\n",
      "DEBUG:replay:Предварительная стадия обучения (pre-fit)\n",
      "02-Mar-21 18:18:40, replay, DEBUG: Основная стадия обучения (fit)\n",
      "DEBUG:replay:Основная стадия обучения (fit)\n",
      "02-Mar-21 18:18:51, replay, DEBUG: Начало предикта ALSWrap\n",
      "DEBUG:replay:Начало предикта ALSWrap\n",
      "02-Mar-21 18:19:42, replay, DEBUG: баланс классов: положительных 164401 из 604000\n",
      "DEBUG:replay:баланс классов: положительных 164401 из 604000\n",
      "02-Mar-21 18:19:42, replay, DEBUG: Начало предикта ALSWrap\n",
      "DEBUG:replay:Начало предикта ALSWrap\n",
      "02-Mar-21 18:19:44, replay, WARNING: Список item содержит элементы, которые отсутствовали при обучении. Результат предсказания будет не полным.\n",
      "WARNING:replay:Список item содержит элементы, которые отсутствовали при обучении. Результат предсказания будет не полным.\n",
      "02-Mar-21 18:19:45, replay, DEBUG: Начало обучения ClassifierRec\n",
      "DEBUG:replay:Начало обучения ClassifierRec\n",
      "02-Mar-21 18:19:45, replay, DEBUG: Предварительная стадия обучения (pre-fit)\n",
      "DEBUG:replay:Предварительная стадия обучения (pre-fit)\n",
      "02-Mar-21 18:20:27, replay, DEBUG: Основная стадия обучения (fit)\n",
      "DEBUG:replay:Основная стадия обучения (fit)\n",
      "02-Mar-21 18:23:16, replay, DEBUG: ROC AUC модели второго уровня (как классификатора): 0.8018\n",
      "DEBUG:replay:ROC AUC модели второго уровня (как классификатора): 0.8018\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3.69 s, sys: 674 ms, total: 4.37 s\n",
      "Wall time: 5min 47s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>HitRate@1</th>\n",
       "      <th>HitRate@5</th>\n",
       "      <th>HitRate@10</th>\n",
       "      <th>NDCG@1</th>\n",
       "      <th>NDCG@5</th>\n",
       "      <th>NDCG@10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>two_stages_scenario</th>\n",
       "      <td>0.213576</td>\n",
       "      <td>0.548841</td>\n",
       "      <td>0.706954</td>\n",
       "      <td>0.213576</td>\n",
       "      <td>0.174369</td>\n",
       "      <td>0.148692</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     HitRate@1  HitRate@5  HitRate@10    NDCG@1    NDCG@5  \\\n",
       "two_stages_scenario   0.213576   0.548841    0.706954  0.213576  0.174369   \n",
       "\n",
       "                      NDCG@10  \n",
       "two_stages_scenario  0.148692  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time \n",
    "recs_with_stat = two_stages_with_stat.get_recs(log, 10, item_features=item_features_spark)\n",
    "two_stages_with_stat.experiment.results\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Двухуровневый сценарий без статистических фичей"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "02-Mar-21 18:25:17, replay, DEBUG: mixed_train stat: total lines: 939809, total users: 6040, total items: 3699\n",
      "DEBUG:replay:mixed_train stat: total lines: 939809, total users: 6040, total items: 3699\n",
      "02-Mar-21 18:25:17, replay, DEBUG: test stat: total lines: 60393, total users: 6040, total items: 3051\n",
      "DEBUG:replay:test stat: total lines: 60393, total users: 6040, total items: 3051\n",
      "02-Mar-21 18:25:18, replay, DEBUG: first_train stat: total lines: 471386, total users: 6040, total items: 3604\n",
      "DEBUG:replay:first_train stat: total lines: 471386, total users: 6040, total items: 3604\n",
      "02-Mar-21 18:25:18, replay, DEBUG: first_test stat: total lines: 468423, total users: 6040, total items: 3611\n",
      "DEBUG:replay:first_test stat: total lines: 468423, total users: 6040, total items: 3611\n",
      "02-Mar-21 18:25:18, replay, DEBUG: Начало обучения ALSWrap\n",
      "DEBUG:replay:Начало обучения ALSWrap\n",
      "02-Mar-21 18:25:18, replay, DEBUG: Предварительная стадия обучения (pre-fit)\n",
      "DEBUG:replay:Предварительная стадия обучения (pre-fit)\n",
      "02-Mar-21 18:25:18, replay, DEBUG: Основная стадия обучения (fit)\n",
      "DEBUG:replay:Основная стадия обучения (fit)\n",
      "02-Mar-21 18:25:26, replay, DEBUG: Начало предикта ALSWrap\n",
      "DEBUG:replay:Начало предикта ALSWrap\n",
      "02-Mar-21 18:26:08, replay, DEBUG: баланс классов: положительных 164401 из 604000\n",
      "DEBUG:replay:баланс классов: положительных 164401 из 604000\n",
      "02-Mar-21 18:26:08, replay, DEBUG: Начало предикта ALSWrap\n",
      "DEBUG:replay:Начало предикта ALSWrap\n",
      "02-Mar-21 18:26:10, replay, WARNING: Список item содержит элементы, которые отсутствовали при обучении. Результат предсказания будет не полным.\n",
      "WARNING:replay:Список item содержит элементы, которые отсутствовали при обучении. Результат предсказания будет не полным.\n",
      "02-Mar-21 18:26:11, replay, DEBUG: Начало обучения ClassifierRec\n",
      "DEBUG:replay:Начало обучения ClassifierRec\n",
      "02-Mar-21 18:26:11, replay, DEBUG: Предварительная стадия обучения (pre-fit)\n",
      "DEBUG:replay:Предварительная стадия обучения (pre-fit)\n",
      "02-Mar-21 18:27:03, replay, DEBUG: Основная стадия обучения (fit)\n",
      "DEBUG:replay:Основная стадия обучения (fit)\n",
      "02-Mar-21 18:29:48, replay, DEBUG: ROC AUC модели второго уровня (как классификатора): 0.8006\n",
      "DEBUG:replay:ROC AUC модели второго уровня (как классификатора): 0.8006\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3.63 s, sys: 629 ms, total: 4.26 s\n",
      "Wall time: 5min 22s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>HitRate@1</th>\n",
       "      <th>HitRate@5</th>\n",
       "      <th>HitRate@10</th>\n",
       "      <th>NDCG@1</th>\n",
       "      <th>NDCG@5</th>\n",
       "      <th>NDCG@10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>two_stages_scenario</th>\n",
       "      <td>0.223675</td>\n",
       "      <td>0.560762</td>\n",
       "      <td>0.721358</td>\n",
       "      <td>0.223675</td>\n",
       "      <td>0.180095</td>\n",
       "      <td>0.153606</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     HitRate@1  HitRate@5  HitRate@10    NDCG@1    NDCG@5  \\\n",
       "two_stages_scenario   0.223675   0.560762    0.721358  0.223675  0.180095   \n",
       "\n",
       "                      NDCG@10  \n",
       "two_stages_scenario  0.153606  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "two_stages_without_stat = TwoStagesScenario(\n",
    "    second_stage_splitter=second_stage_splitter,\n",
    "    second_model=second_model,\n",
    "    first_model=first_model,\n",
    "    metrics={NDCG(): [1, 5, 10], HitRate(): [1, 5, 10]},\n",
    "    stat_features=False\n",
    ")\n",
    "recs_without_stat = two_stages_without_stat.get_recs(log, 10, item_features=item_features_spark)\n",
    "two_stages_without_stat.experiment.results\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>HitRate@1</th>\n",
       "      <th>HitRate@5</th>\n",
       "      <th>HitRate@10</th>\n",
       "      <th>NDCG@1</th>\n",
       "      <th>NDCG@5</th>\n",
       "      <th>NDCG@10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>two_stages_scenario</th>\n",
       "      <td>0.213576</td>\n",
       "      <td>0.548841</td>\n",
       "      <td>0.706954</td>\n",
       "      <td>0.213576</td>\n",
       "      <td>0.174369</td>\n",
       "      <td>0.148692</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two_stages_without_stat</th>\n",
       "      <td>0.223675</td>\n",
       "      <td>0.560762</td>\n",
       "      <td>0.721358</td>\n",
       "      <td>0.223675</td>\n",
       "      <td>0.180095</td>\n",
       "      <td>0.153606</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         HitRate@1  HitRate@5  HitRate@10    NDCG@1    NDCG@5  \\\n",
       "two_stages_scenario       0.213576   0.548841    0.706954  0.213576  0.174369   \n",
       "two_stages_without_stat   0.223675   0.560762    0.721358  0.223675  0.180095   \n",
       "\n",
       "                          NDCG@10  \n",
       "two_stages_scenario      0.148692  \n",
       "two_stages_without_stat  0.153606  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "two_stages_with_stat.experiment.add_result(\"two_stages_without_stat\", recs_without_stat)\n",
    "two_stages_with_stat.experiment.results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Модель первого уровня, обученная на всем train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "train, test = second_stage_splitter.split(log)\n",
    "first_train, first_test = first_stage_splitter.split(train)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "02-Mar-21 18:31:25, replay, DEBUG: Начало обучения ALSWrap\n",
      "DEBUG:replay:Начало обучения ALSWrap\n",
      "02-Mar-21 18:31:25, replay, DEBUG: Предварительная стадия обучения (pre-fit)\n",
      "DEBUG:replay:Предварительная стадия обучения (pre-fit)\n",
      "02-Mar-21 18:31:25, replay, DEBUG: Основная стадия обучения (fit)\n",
      "DEBUG:replay:Основная стадия обучения (fit)\n",
      "02-Mar-21 18:31:35, replay, DEBUG: Начало предикта ALSWrap\n",
      "DEBUG:replay:Начало предикта ALSWrap\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.14 s, sys: 179 ms, total: 1.32 s\n",
      "Wall time: 13.1 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "first_recs_all = first_model.fit_predict(\n",
    "    log=train,\n",
    "    k=10,\n",
    "    users=test.select(\"user_id\").distinct().cache(),\n",
    "    items=train.select(\"item_id\").distinct().cache(),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>HitRate@1</th>\n",
       "      <th>HitRate@5</th>\n",
       "      <th>HitRate@10</th>\n",
       "      <th>NDCG@1</th>\n",
       "      <th>NDCG@5</th>\n",
       "      <th>NDCG@10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>two_stages_scenario</th>\n",
       "      <td>0.213576</td>\n",
       "      <td>0.548841</td>\n",
       "      <td>0.706954</td>\n",
       "      <td>0.213576</td>\n",
       "      <td>0.174369</td>\n",
       "      <td>0.148692</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two_stages_without_stat</th>\n",
       "      <td>0.223675</td>\n",
       "      <td>0.560762</td>\n",
       "      <td>0.721358</td>\n",
       "      <td>0.223675</td>\n",
       "      <td>0.180095</td>\n",
       "      <td>0.153606</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>first_stage_all</th>\n",
       "      <td>0.337086</td>\n",
       "      <td>0.725993</td>\n",
       "      <td>0.870695</td>\n",
       "      <td>0.337086</td>\n",
       "      <td>0.265648</td>\n",
       "      <td>0.224414</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         HitRate@1  HitRate@5  HitRate@10    NDCG@1    NDCG@5  \\\n",
       "two_stages_scenario       0.213576   0.548841    0.706954  0.213576  0.174369   \n",
       "two_stages_without_stat   0.223675   0.560762    0.721358  0.223675  0.180095   \n",
       "first_stage_all           0.337086   0.725993    0.870695  0.337086  0.265648   \n",
       "\n",
       "                          NDCG@10  \n",
       "two_stages_scenario      0.148692  \n",
       "two_stages_without_stat  0.153606  \n",
       "first_stage_all          0.224414  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "two_stages_with_stat.experiment.add_result(\"first_stage_all\", first_recs_all)\n",
    "two_stages_with_stat.experiment.results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Модель первого уровня, обученная на половине train (как в двухуровневом сценарии)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "02-Mar-21 18:32:42, replay, DEBUG: Начало обучения ALSWrap\n",
      "DEBUG:replay:Начало обучения ALSWrap\n",
      "02-Mar-21 18:32:42, replay, DEBUG: Предварительная стадия обучения (pre-fit)\n",
      "DEBUG:replay:Предварительная стадия обучения (pre-fit)\n",
      "02-Mar-21 18:32:43, replay, DEBUG: Основная стадия обучения (fit)\n",
      "DEBUG:replay:Основная стадия обучения (fit)\n",
      "02-Mar-21 18:32:50, replay, DEBUG: Начало предикта ALSWrap\n",
      "DEBUG:replay:Начало предикта ALSWrap\n",
      "02-Mar-21 18:32:50, replay, WARNING: Список item содержит элементы, которые отсутствовали при обучении. Результат предсказания будет не полным.\n",
      "WARNING:replay:Список item содержит элементы, которые отсутствовали при обучении. Результат предсказания будет не полным.\n",
      "02-Mar-21 18:32:51, replay, WARNING: Список item содержит элементы, которые отсутствовали при обучении. Результат предсказания будет не полным.\n",
      "WARNING:replay:Список item содержит элементы, которые отсутствовали при обучении. Результат предсказания будет не полным.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.22 s, sys: 498 ms, total: 1.72 s\n",
      "Wall time: 1min 33s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "first_model.fit(log=first_train)\n",
    "first_model_half = first_model.predict(\n",
    "    log=train,\n",
    "    k=10,\n",
    "    users=test.select(\"user_id\").distinct().cache(),\n",
    "    items=train.select(\"item_id\").distinct().cache(),\n",
    ")\n",
    "\n",
    "two_stages_with_stat.experiment.add_result(\"first_stage_half\", first_model_half)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>HitRate@1</th>\n",
       "      <th>HitRate@5</th>\n",
       "      <th>HitRate@10</th>\n",
       "      <th>NDCG@1</th>\n",
       "      <th>NDCG@5</th>\n",
       "      <th>NDCG@10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>two_stages_scenario</th>\n",
       "      <td>0.213576</td>\n",
       "      <td>0.548841</td>\n",
       "      <td>0.706954</td>\n",
       "      <td>0.213576</td>\n",
       "      <td>0.174369</td>\n",
       "      <td>0.148692</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two_stages_without_stat</th>\n",
       "      <td>0.223675</td>\n",
       "      <td>0.560762</td>\n",
       "      <td>0.721358</td>\n",
       "      <td>0.223675</td>\n",
       "      <td>0.180095</td>\n",
       "      <td>0.153606</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>first_stage_all</th>\n",
       "      <td>0.337086</td>\n",
       "      <td>0.725993</td>\n",
       "      <td>0.870695</td>\n",
       "      <td>0.337086</td>\n",
       "      <td>0.265648</td>\n",
       "      <td>0.224414</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>first_stage_half</th>\n",
       "      <td>0.275828</td>\n",
       "      <td>0.652483</td>\n",
       "      <td>0.810927</td>\n",
       "      <td>0.275828</td>\n",
       "      <td>0.220098</td>\n",
       "      <td>0.187830</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         HitRate@1  HitRate@5  HitRate@10    NDCG@1    NDCG@5  \\\n",
       "two_stages_scenario       0.213576   0.548841    0.706954  0.213576  0.174369   \n",
       "two_stages_without_stat   0.223675   0.560762    0.721358  0.223675  0.180095   \n",
       "first_stage_all           0.337086   0.725993    0.870695  0.337086  0.265648   \n",
       "first_stage_half          0.275828   0.652483    0.810927  0.275828  0.220098   \n",
       "\n",
       "                          NDCG@10  \n",
       "two_stages_scenario      0.148692  \n",
       "two_stages_without_stat  0.153606  \n",
       "first_stage_all          0.224414  \n",
       "first_stage_half         0.187830  "
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "two_stages_with_stat.experiment.results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Двухуровневый сценарий с усиленным классификатором"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "second_model = ClassifierRec(spark_classifier=RandomForestClassifier(numTrees=100, seed=47), use_recs_value=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Двухуровневый сценарий со статистическими фичами"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "two_stages_with_stat_strong = TwoStagesScenario(\n",
    "    second_stage_splitter=second_stage_splitter,\n",
    "    second_model=second_model,\n",
    "    first_model=first_model,\n",
    "    metrics={NDCG(): [1, 5, 10], HitRate(): [1, 5, 10]},\n",
    "    stat_features=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "02-Mar-21 18:46:21, replay, DEBUG: mixed_train stat: total lines: 939809, total users: 6040, total items: 3699\n",
      "DEBUG:replay:mixed_train stat: total lines: 939809, total users: 6040, total items: 3699\n",
      "02-Mar-21 18:46:21, replay, DEBUG: test stat: total lines: 60393, total users: 6040, total items: 3051\n",
      "DEBUG:replay:test stat: total lines: 60393, total users: 6040, total items: 3051\n",
      "02-Mar-21 18:46:22, replay, DEBUG: first_train stat: total lines: 471386, total users: 6040, total items: 3604\n",
      "DEBUG:replay:first_train stat: total lines: 471386, total users: 6040, total items: 3604\n",
      "02-Mar-21 18:46:22, replay, DEBUG: first_test stat: total lines: 468423, total users: 6040, total items: 3611\n",
      "DEBUG:replay:first_test stat: total lines: 468423, total users: 6040, total items: 3611\n",
      "02-Mar-21 18:46:22, replay, DEBUG: Начало обучения ALSWrap\n",
      "DEBUG:replay:Начало обучения ALSWrap\n",
      "02-Mar-21 18:46:22, replay, DEBUG: Предварительная стадия обучения (pre-fit)\n",
      "DEBUG:replay:Предварительная стадия обучения (pre-fit)\n",
      "02-Mar-21 18:46:23, replay, DEBUG: Основная стадия обучения (fit)\n",
      "DEBUG:replay:Основная стадия обучения (fit)\n",
      "02-Mar-21 18:46:31, replay, DEBUG: Начало предикта ALSWrap\n",
      "DEBUG:replay:Начало предикта ALSWrap\n",
      "02-Mar-21 18:47:16, replay, DEBUG: баланс классов: положительных 164401 из 604000\n",
      "DEBUG:replay:баланс классов: положительных 164401 из 604000\n",
      "02-Mar-21 18:47:16, replay, DEBUG: Начало предикта ALSWrap\n",
      "DEBUG:replay:Начало предикта ALSWrap\n",
      "02-Mar-21 18:47:18, replay, WARNING: Список item содержит элементы, которые отсутствовали при обучении. Результат предсказания будет не полным.\n",
      "WARNING:replay:Список item содержит элементы, которые отсутствовали при обучении. Результат предсказания будет не полным.\n",
      "02-Mar-21 18:47:19, replay, DEBUG: Начало обучения ClassifierRec\n",
      "DEBUG:replay:Начало обучения ClassifierRec\n",
      "02-Mar-21 18:47:19, replay, DEBUG: Предварительная стадия обучения (pre-fit)\n",
      "DEBUG:replay:Предварительная стадия обучения (pre-fit)\n",
      "02-Mar-21 18:48:12, replay, DEBUG: Основная стадия обучения (fit)\n",
      "DEBUG:replay:Основная стадия обучения (fit)\n",
      "02-Mar-21 18:52:10, replay, DEBUG: ROC AUC модели второго уровня (как классификатора): 0.8058\n",
      "DEBUG:replay:ROC AUC модели второго уровня (как классификатора): 0.8058\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3.75 s, sys: 802 ms, total: 4.55 s\n",
      "Wall time: 7min 54s\n"
     ]
    }
   ],
   "source": [
    "%%time \n",
    "recs_with_stat = two_stages_with_stat_strong.get_recs(log, 10, item_features=item_features_spark)\n",
    "two_stages_with_stat.experiment.add_result(\"two_stages_with_stat_strong\", recs_with_stat)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Двухуровневый сценарий без статистических фичей"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "02-Mar-21 18:54:16, replay, DEBUG: mixed_train stat: total lines: 939809, total users: 6040, total items: 3699\n",
      "DEBUG:replay:mixed_train stat: total lines: 939809, total users: 6040, total items: 3699\n",
      "02-Mar-21 18:54:16, replay, DEBUG: test stat: total lines: 60393, total users: 6040, total items: 3051\n",
      "DEBUG:replay:test stat: total lines: 60393, total users: 6040, total items: 3051\n",
      "02-Mar-21 18:54:16, replay, DEBUG: first_train stat: total lines: 471386, total users: 6040, total items: 3604\n",
      "DEBUG:replay:first_train stat: total lines: 471386, total users: 6040, total items: 3604\n",
      "02-Mar-21 18:54:17, replay, DEBUG: first_test stat: total lines: 468423, total users: 6040, total items: 3611\n",
      "DEBUG:replay:first_test stat: total lines: 468423, total users: 6040, total items: 3611\n",
      "02-Mar-21 18:54:17, replay, DEBUG: Начало обучения ALSWrap\n",
      "DEBUG:replay:Начало обучения ALSWrap\n",
      "02-Mar-21 18:54:17, replay, DEBUG: Предварительная стадия обучения (pre-fit)\n",
      "DEBUG:replay:Предварительная стадия обучения (pre-fit)\n",
      "02-Mar-21 18:54:17, replay, DEBUG: Основная стадия обучения (fit)\n",
      "DEBUG:replay:Основная стадия обучения (fit)\n",
      "02-Mar-21 18:54:29, replay, DEBUG: Начало предикта ALSWrap\n",
      "DEBUG:replay:Начало предикта ALSWrap\n",
      "02-Mar-21 18:55:25, replay, DEBUG: баланс классов: положительных 164401 из 604000\n",
      "DEBUG:replay:баланс классов: положительных 164401 из 604000\n",
      "02-Mar-21 18:55:25, replay, DEBUG: Начало предикта ALSWrap\n",
      "DEBUG:replay:Начало предикта ALSWrap\n",
      "02-Mar-21 18:55:27, replay, WARNING: Список item содержит элементы, которые отсутствовали при обучении. Результат предсказания будет не полным.\n",
      "WARNING:replay:Список item содержит элементы, которые отсутствовали при обучении. Результат предсказания будет не полным.\n",
      "02-Mar-21 18:55:29, replay, DEBUG: Начало обучения ClassifierRec\n",
      "DEBUG:replay:Начало обучения ClassifierRec\n",
      "02-Mar-21 18:55:29, replay, DEBUG: Предварительная стадия обучения (pre-fit)\n",
      "DEBUG:replay:Предварительная стадия обучения (pre-fit)\n",
      "02-Mar-21 18:56:11, replay, DEBUG: Основная стадия обучения (fit)\n",
      "DEBUG:replay:Основная стадия обучения (fit)\n",
      "02-Mar-21 18:59:32, replay, DEBUG: ROC AUC модели второго уровня (как классификатора): 0.8053\n",
      "DEBUG:replay:ROC AUC модели второго уровня (как классификатора): 0.8053\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 6.23 s, sys: 1.24 s, total: 7.46 s\n",
      "Wall time: 6min 57s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>HitRate@1</th>\n",
       "      <th>HitRate@5</th>\n",
       "      <th>HitRate@10</th>\n",
       "      <th>NDCG@1</th>\n",
       "      <th>NDCG@5</th>\n",
       "      <th>NDCG@10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>two_stages_scenario</th>\n",
       "      <td>0.230795</td>\n",
       "      <td>0.564073</td>\n",
       "      <td>0.71904</td>\n",
       "      <td>0.230795</td>\n",
       "      <td>0.183748</td>\n",
       "      <td>0.155696</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     HitRate@1  HitRate@5  HitRate@10    NDCG@1    NDCG@5  \\\n",
       "two_stages_scenario   0.230795   0.564073     0.71904  0.230795  0.183748   \n",
       "\n",
       "                      NDCG@10  \n",
       "two_stages_scenario  0.155696  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "two_stages_without_stat_strong = TwoStagesScenario(\n",
    "    second_stage_splitter=second_stage_splitter,\n",
    "    second_model=second_model,\n",
    "    first_model=first_model,\n",
    "    metrics={NDCG(): [1, 5, 10], HitRate(): [1, 5, 10]},\n",
    "    stat_features=False\n",
    ")\n",
    "recs_without_stat = two_stages_without_stat_strong.get_recs(log, 10, item_features=item_features_spark)\n",
    "two_stages_with_stat.experiment.add_result(\"two_stages_without_stat_strong\", recs_without_stat)\n",
    "two_stages_without_stat_strong.experiment.results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "two_stages_with_stat.experiment.add_result(\"two_stages_without_stat_strong\", recs_without_stat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>HitRate@1</th>\n",
       "      <th>HitRate@5</th>\n",
       "      <th>HitRate@10</th>\n",
       "      <th>NDCG@1</th>\n",
       "      <th>NDCG@5</th>\n",
       "      <th>NDCG@10</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>first_stage_all</th>\n",
       "      <td>0.337086</td>\n",
       "      <td>0.725993</td>\n",
       "      <td>0.870695</td>\n",
       "      <td>0.337086</td>\n",
       "      <td>0.265648</td>\n",
       "      <td>0.224414</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>first_stage_half</th>\n",
       "      <td>0.275828</td>\n",
       "      <td>0.652483</td>\n",
       "      <td>0.810927</td>\n",
       "      <td>0.275828</td>\n",
       "      <td>0.220098</td>\n",
       "      <td>0.187830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two_stages_with_stat_strong</th>\n",
       "      <td>0.247185</td>\n",
       "      <td>0.589901</td>\n",
       "      <td>0.750993</td>\n",
       "      <td>0.247185</td>\n",
       "      <td>0.194624</td>\n",
       "      <td>0.164429</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two_stages_without_stat_strong</th>\n",
       "      <td>0.230795</td>\n",
       "      <td>0.564073</td>\n",
       "      <td>0.719040</td>\n",
       "      <td>0.230795</td>\n",
       "      <td>0.183748</td>\n",
       "      <td>0.155696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two_stages_without_stat</th>\n",
       "      <td>0.223675</td>\n",
       "      <td>0.560762</td>\n",
       "      <td>0.721358</td>\n",
       "      <td>0.223675</td>\n",
       "      <td>0.180095</td>\n",
       "      <td>0.153606</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two_stages_scenario</th>\n",
       "      <td>0.213576</td>\n",
       "      <td>0.548841</td>\n",
       "      <td>0.706954</td>\n",
       "      <td>0.213576</td>\n",
       "      <td>0.174369</td>\n",
       "      <td>0.148692</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                HitRate@1  HitRate@5  HitRate@10    NDCG@1  \\\n",
       "first_stage_all                  0.337086   0.725993    0.870695  0.337086   \n",
       "first_stage_half                 0.275828   0.652483    0.810927  0.275828   \n",
       "two_stages_with_stat_strong      0.247185   0.589901    0.750993  0.247185   \n",
       "two_stages_without_stat_strong   0.230795   0.564073    0.719040  0.230795   \n",
       "two_stages_without_stat          0.223675   0.560762    0.721358  0.223675   \n",
       "two_stages_scenario              0.213576   0.548841    0.706954  0.213576   \n",
       "\n",
       "                                  NDCG@5   NDCG@10  \n",
       "first_stage_all                 0.265648  0.224414  \n",
       "first_stage_half                0.220098  0.187830  \n",
       "two_stages_with_stat_strong     0.194624  0.164429  \n",
       "two_stages_without_stat_strong  0.183748  0.155696  \n",
       "two_stages_without_stat         0.180095  0.153606  \n",
       "two_stages_scenario             0.174369  0.148692  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "two_stages_with_stat.experiment.results.sort_values('NDCG@10', ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Модель первого уровня работает лучше, чем двухуровневый сценарий. Двухуровневый сценарий, использущий статистические признаки, работает лучше, чем без них."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "name": "two_levels.ipynb"
 },
 "nbformat": 4,
 "nbformat_minor": 4
}