{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Baseline for the challenge DOTAW\n", "### Author - Pulkit Gera" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[](https://colab.research.google.com/github/ayushshivani/aicrowd_educational_baselines/blob/master/DOTAW_baseline.ipynb)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!pip install numpy\n", "!pip install pandas\n", "!pip install sklearn" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split \n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn import metrics" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download data\n", "The first step is to download out train test data. We will be training a classifier on the train data and make predictions on test data. We submit our predictions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!wget https://s3.eu-central-1.wasabisys.com/aicrowd-public-datasets/aicrowd_educational_dotaw/data/public/test.zip\n", "!wget https://s3.eu-central-1.wasabisys.com/aicrowd-public-datasets/aicrowd_educational_dotaw/data/public/train.zip\n", "!unzip train.zip\n", "!unzip test.zip" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Data\n", "We use pandas library to load our data. Pandas loads them into dataframes which helps us analyze our data easily. Learn more about it [here](https://www.tutorialspoint.com/python_pandas/index.htm)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "train_data = pd.read_csv('train.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Analyse Data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | winner | \n", "cluster_id | \n", "game_mode | \n", "game_type | \n", "hero_0 | \n", "hero_1 | \n", "hero_2 | \n", "hero_3 | \n", "hero_4 | \n", "hero_5 | \n", "... | \n", "hero_103 | \n", "hero_104 | \n", "hero_105 | \n", "hero_106 | \n", "hero_107 | \n", "hero_108 | \n", "hero_109 | \n", "hero_110 | \n", "hero_111 | \n", "hero_112 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "-1 | \n", "223 | \n", "2 | \n", "2 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 1 | \n", "1 | \n", "152 | \n", "2 | \n", "2 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "-1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 2 | \n", "1 | \n", "131 | \n", "2 | \n", "2 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "-1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 3 | \n", "1 | \n", "154 | \n", "2 | \n", "2 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "-1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 4 | \n", "-1 | \n", "171 | \n", "2 | \n", "3 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "-1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
5 rows × 117 columns
\n", "| \n", " | winner | \n", "cluster_id | \n", "game_mode | \n", "game_type | \n", "hero_0 | \n", "hero_1 | \n", "hero_2 | \n", "hero_3 | \n", "hero_4 | \n", "hero_5 | \n", "... | \n", "hero_103 | \n", "hero_104 | \n", "hero_105 | \n", "hero_106 | \n", "hero_107 | \n", "hero_108 | \n", "hero_109 | \n", "hero_110 | \n", "hero_111 | \n", "hero_112 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "... | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.0 | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "92650.000000 | \n", "
| mean | \n", "0.053038 | \n", "175.864145 | \n", "3.317572 | \n", "2.384587 | \n", "-0.001630 | \n", "-0.000971 | \n", "0.000691 | \n", "-0.000799 | \n", "-0.002008 | \n", "0.003173 | \n", "... | \n", "-0.001371 | \n", "-0.000950 | \n", "0.000885 | \n", "0.000594 | \n", "0.0 | \n", "0.001025 | \n", "0.000648 | \n", "-0.000227 | \n", "-0.000043 | \n", "0.000896 | \n", "
| std | \n", "0.998598 | \n", "35.658214 | \n", "2.633070 | \n", "0.486833 | \n", "0.402004 | \n", "0.467672 | \n", "0.165052 | \n", "0.355393 | \n", "0.329348 | \n", "0.483950 | \n", "... | \n", "0.535024 | \n", "0.206112 | \n", "0.283985 | \n", "0.155940 | \n", "0.0 | \n", "0.220703 | \n", "0.204166 | \n", "0.168707 | \n", "0.189868 | \n", "0.139033 | \n", "
| min | \n", "-1.000000 | \n", "111.000000 | \n", "1.000000 | \n", "1.000000 | \n", "-1.000000 | \n", "-1.000000 | \n", "-1.000000 | \n", "-1.000000 | \n", "-1.000000 | \n", "-1.000000 | \n", "... | \n", "-1.000000 | \n", "-1.000000 | \n", "-1.000000 | \n", "-1.000000 | \n", "0.0 | \n", "-1.000000 | \n", "-1.000000 | \n", "-1.000000 | \n", "-1.000000 | \n", "-1.000000 | \n", "
| 25% | \n", "-1.000000 | \n", "152.000000 | \n", "2.000000 | \n", "2.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
| 50% | \n", "1.000000 | \n", "156.000000 | \n", "2.000000 | \n", "2.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
| 75% | \n", "1.000000 | \n", "223.000000 | \n", "2.000000 | \n", "3.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
| max | \n", "1.000000 | \n", "261.000000 | \n", "9.000000 | \n", "3.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "... | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "0.0 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "
8 rows × 117 columns
\n", "| \n", " | Actual | \n", "Predicted | \n", "
|---|---|---|
| 26389 | \n", "1 | \n", "-1 | \n", "
| 55196 | \n", "-1 | \n", "1 | \n", "
| 51250 | \n", "-1 | \n", "1 | \n", "
| 25508 | \n", "1 | \n", "-1 | \n", "
| 24128 | \n", "1 | \n", "-1 | \n", "
| 2442 | \n", "-1 | \n", "-1 | \n", "
| 5638 | \n", "-1 | \n", "-1 | \n", "
| 3714 | \n", "-1 | \n", "1 | \n", "
| 36579 | \n", "-1 | \n", "1 | \n", "
| 10399 | \n", "-1 | \n", "-1 | \n", "
| 13464 | \n", "-1 | \n", "-1 | \n", "
| 71600 | \n", "-1 | \n", "1 | \n", "
| 80162 | \n", "1 | \n", "-1 | \n", "
| 7077 | \n", "1 | \n", "1 | \n", "
| 63431 | \n", "-1 | \n", "1 | \n", "
| 78584 | \n", "1 | \n", "-1 | \n", "
| 31413 | \n", "1 | \n", "1 | \n", "
| 13393 | \n", "1 | \n", "1 | \n", "
| 90845 | \n", "1 | \n", "1 | \n", "
| 23339 | \n", "-1 | \n", "-1 | \n", "
| 13756 | \n", "-1 | \n", "1 | \n", "
| 63563 | \n", "-1 | \n", "-1 | \n", "
| 81880 | \n", "-1 | \n", "1 | \n", "
| 77591 | \n", "-1 | \n", "-1 | \n", "
| 23311 | \n", "1 | \n", "1 | \n", "