{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 🎯 Uplift model selection" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# [MegaFon Uplift Competition](https://ods.ai/competitions/megafon-df21-comp)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## problem: predict the uplift by user's feature vector" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2021-08-09T22:36:25.634993Z", "start_time": "2021-08-09T22:36:25.630910Z" } }, "outputs": [], "source": [ "pip install scikit-uplift lightgbm -U" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2021-08-09T22:36:28.529257Z", "start_time": "2021-08-09T22:36:26.514166Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.model_selection import (\n", " StratifiedShuffleSplit, GridSearchCV, \n", " train_test_split, cross_validate, cross_val_score\n", ")\n", "from lightgbm import LGBMClassifier\n", "from sklift.models import SoloModel\n", "from sklift.viz import plot_qini_curve\n", "from sklift.datasets import fetch_megafon\n", "from sklift.metrics import make_uplift_scorer" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2021-08-09T22:36:37.602085Z", "start_time": "2021-08-09T22:36:28.531896Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " | X_1 | \n", "X_2 | \n", "X_3 | \n", "X_4 | \n", "X_5 | \n", "X_6 | \n", "X_7 | \n", "X_8 | \n", "X_9 | \n", "X_10 | \n", "... | \n", "X_41 | \n", "X_42 | \n", "X_43 | \n", "X_44 | \n", "X_45 | \n", "X_46 | \n", "X_47 | \n", "X_48 | \n", "X_49 | \n", "X_50 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
0 | \n", "39.396577 | \n", "-0.186548 | \n", "19.524505 | \n", "21.250208 | \n", "55.291264 | \n", "182.966712 | \n", "-5.385606 | \n", "144.573379 | \n", "-12.534344 | \n", "-58.279429 | \n", "... | \n", "90.877638 | \n", "134.363458 | \n", "-213.584582 | \n", "-2.092461 | \n", "-93.973258 | \n", "-0.155597 | \n", "-312.130733 | \n", "44.798182 | \n", "-125.682413 | \n", "16.231365 | \n", "
1 | \n", "38.987694 | \n", "0.819522 | \n", "-42.064512 | \n", "-48.270949 | \n", "-33.171257 | \n", "179.459341 | \n", "-87.151810 | \n", "-162.693257 | \n", "20.651652 | \n", "181.635081 | \n", "... | \n", "-183.840746 | \n", "72.864779 | \n", "559.783584 | \n", "1.142391 | \n", "80.037124 | \n", "-1.216185 | \n", "-111.473936 | \n", "-127.737977 | \n", "-117.501171 | \n", "10.732234 | \n", "
2 | \n", "-16.693093 | \n", "1.844558 | \n", "-8.615192 | \n", "-18.818740 | \n", "-22.271188 | \n", "-116.290369 | \n", "-63.816746 | \n", "-38.340763 | \n", "24.968496 | \n", "-136.340629 | \n", "... | \n", "-203.637766 | \n", "2.480242 | \n", "96.998504 | \n", "1.100962 | \n", "-33.275159 | \n", "0.920926 | \n", "-679.492242 | \n", "-91.009397 | \n", "-18.173358 | \n", "14.367636 | \n", "
3 | \n", "-72.040154 | \n", "-0.226921 | \n", "39.802607 | \n", "16.441262 | \n", "-1.112509 | \n", "68.128008 | \n", "23.073147 | \n", "4.688858 | \n", "-49.383641 | \n", "-91.866107 | \n", "... | \n", "172.906875 | \n", "83.951551 | \n", "-323.642557 | \n", "-0.369182 | \n", "93.221948 | \n", "-1.962380 | \n", "-442.466684 | \n", "-22.298302 | \n", "-75.916603 | \n", "11.634299 | \n", "
4 | \n", "18.296973 | \n", "0.996437 | \n", "24.465307 | \n", "-34.151971 | \n", "24.623458 | \n", "-155.455558 | \n", "-12.159787 | \n", "26.705778 | \n", "105.864805 | \n", "258.607252 | \n", "... | \n", "125.577535 | \n", "-208.531112 | \n", "118.902324 | \n", "-0.808578 | \n", "-117.497906 | \n", "1.770635 | \n", "627.395611 | \n", "122.019189 | \n", "194.091195 | \n", "-11.883858 | \n", "
5 rows × 50 columns
\n", "