{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2020-12-02T09:00:22.124286Z", "start_time": "2020-12-02T09:00:18.430379Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import tensorflow as tf\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import mean_absolute_error\n", "from sklearn.ensemble import RandomForestRegressor\n", "import plotly.express as px\n", "import plotly.graph_objects as go" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Data preparation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reading data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-12-02T09:00:34.677869Z", "start_time": "2020-12-02T09:00:22.126491Z" } }, "outputs": [], "source": [ "df_raw = pd.read_csv(\"train_2.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-12-02T09:00:34.712531Z", "start_time": "2020-12-02T09:00:34.683457Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " | Page | \n", "2015-07-01 | \n", "2015-07-02 | \n", "2015-07-03 | \n", "2015-07-04 | \n", "2015-07-05 | \n", "2015-07-06 | \n", "2015-07-07 | \n", "2015-07-08 | \n", "2015-07-09 | \n", "... | \n", "2017-09-01 | \n", "2017-09-02 | \n", "2017-09-03 | \n", "2017-09-04 | \n", "2017-09-05 | \n", "2017-09-06 | \n", "2017-09-07 | \n", "2017-09-08 | \n", "2017-09-09 | \n", "2017-09-10 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2NE1_zh.wikipedia.org_all-access_spider | \n", "18.0 | \n", "11.0 | \n", "5.0 | \n", "13.0 | \n", "14.0 | \n", "9.0 | \n", "9.0 | \n", "22.0 | \n", "26.0 | \n", "... | \n", "19.0 | \n", "33.0 | \n", "33.0 | \n", "18.0 | \n", "16.0 | \n", "27.0 | \n", "29.0 | \n", "23.0 | \n", "54.0 | \n", "38.0 | \n", "
1 | \n", "2PM_zh.wikipedia.org_all-access_spider | \n", "11.0 | \n", "14.0 | \n", "15.0 | \n", "18.0 | \n", "11.0 | \n", "13.0 | \n", "22.0 | \n", "11.0 | \n", "10.0 | \n", "... | \n", "32.0 | \n", "30.0 | \n", "11.0 | \n", "19.0 | \n", "54.0 | \n", "25.0 | \n", "26.0 | \n", "23.0 | \n", "13.0 | \n", "81.0 | \n", "
2 | \n", "3C_zh.wikipedia.org_all-access_spider | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "4.0 | \n", "0.0 | \n", "3.0 | \n", "4.0 | \n", "... | \n", "6.0 | \n", "6.0 | \n", "7.0 | \n", "2.0 | \n", "4.0 | \n", "7.0 | \n", "3.0 | \n", "4.0 | \n", "7.0 | \n", "6.0 | \n", "
3 | \n", "4minute_zh.wikipedia.org_all-access_spider | \n", "35.0 | \n", "13.0 | \n", "10.0 | \n", "94.0 | \n", "4.0 | \n", "26.0 | \n", "14.0 | \n", "9.0 | \n", "11.0 | \n", "... | \n", "7.0 | \n", "19.0 | \n", "19.0 | \n", "9.0 | \n", "6.0 | \n", "16.0 | \n", "19.0 | \n", "30.0 | \n", "38.0 | \n", "4.0 | \n", "
4 | \n", "52_Hz_I_Love_You_zh.wikipedia.org_all-access_s... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "16.0 | \n", "16.0 | \n", "19.0 | \n", "9.0 | \n", "20.0 | \n", "23.0 | \n", "28.0 | \n", "14.0 | \n", "8.0 | \n", "7.0 | \n", "
5 rows × 804 columns
\n", "\n", " | Page | \n", "2015-07-01 | \n", "2015-07-02 | \n", "2015-07-03 | \n", "2015-07-04 | \n", "2015-07-05 | \n", "2015-07-06 | \n", "2015-07-07 | \n", "2015-07-08 | \n", "2015-07-09 | \n", "... | \n", "2017-09-01 | \n", "2017-09-02 | \n", "2017-09-03 | \n", "2017-09-04 | \n", "2017-09-05 | \n", "2017-09-06 | \n", "2017-09-07 | \n", "2017-09-08 | \n", "2017-09-09 | \n", "2017-09-10 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2NE1_zh.wikipedia.org_all-access_spider | \n", "18.0 | \n", "11.0 | \n", "5.0 | \n", "13.0 | \n", "14.0 | \n", "9.0 | \n", "9.0 | \n", "22.0 | \n", "26.0 | \n", "... | \n", "19.0 | \n", "33.0 | \n", "33.0 | \n", "18.0 | \n", "16.0 | \n", "27.0 | \n", "29.0 | \n", "23.0 | \n", "54.0 | \n", "38.0 | \n", "
1 | \n", "2PM_zh.wikipedia.org_all-access_spider | \n", "11.0 | \n", "14.0 | \n", "15.0 | \n", "18.0 | \n", "11.0 | \n", "13.0 | \n", "22.0 | \n", "11.0 | \n", "10.0 | \n", "... | \n", "32.0 | \n", "30.0 | \n", "11.0 | \n", "19.0 | \n", "54.0 | \n", "25.0 | \n", "26.0 | \n", "23.0 | \n", "13.0 | \n", "81.0 | \n", "
2 | \n", "3C_zh.wikipedia.org_all-access_spider | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "4.0 | \n", "0.0 | \n", "3.0 | \n", "4.0 | \n", "... | \n", "6.0 | \n", "6.0 | \n", "7.0 | \n", "2.0 | \n", "4.0 | \n", "7.0 | \n", "3.0 | \n", "4.0 | \n", "7.0 | \n", "6.0 | \n", "
3 | \n", "4minute_zh.wikipedia.org_all-access_spider | \n", "35.0 | \n", "13.0 | \n", "10.0 | \n", "94.0 | \n", "4.0 | \n", "26.0 | \n", "14.0 | \n", "9.0 | \n", "11.0 | \n", "... | \n", "7.0 | \n", "19.0 | \n", "19.0 | \n", "9.0 | \n", "6.0 | \n", "16.0 | \n", "19.0 | \n", "30.0 | \n", "38.0 | \n", "4.0 | \n", "
5 | \n", "5566_zh.wikipedia.org_all-access_spider | \n", "12.0 | \n", "7.0 | \n", "4.0 | \n", "5.0 | \n", "20.0 | \n", "8.0 | \n", "5.0 | \n", "17.0 | \n", "24.0 | \n", "... | \n", "13.0 | \n", "13.0 | \n", "45.0 | \n", "4.0 | \n", "13.0 | \n", "20.0 | \n", "18.0 | \n", "17.0 | \n", "14.0 | \n", "11.0 | \n", "
5 rows × 804 columns
\n", "\n", " | 2015-07-01 | \n", "2015-07-02 | \n", "2015-07-03 | \n", "2015-07-04 | \n", "2015-07-05 | \n", "2015-07-06 | \n", "2015-07-07 | \n", "2015-07-08 | \n", "2015-07-09 | \n", "2015-07-10 | \n", "... | \n", "2017-09-01 | \n", "2017-09-02 | \n", "2017-09-03 | \n", "2017-09-04 | \n", "2017-09-05 | \n", "2017-09-06 | \n", "2017-09-07 | \n", "2017-09-08 | \n", "2017-09-09 | \n", "2017-09-10 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
71123 | \n", "1348.0 | \n", "1448.0 | \n", "881.0 | \n", "547.0 | \n", "658.0 | \n", "951.0 | \n", "1228.0 | \n", "1157.0 | \n", "1162.0 | \n", "866.0 | \n", "... | \n", "2155.0 | \n", "1274.0 | \n", "1715.0 | \n", "2758.0 | \n", "3151.0 | \n", "2991.0 | \n", "2637.0 | \n", "1527.0 | \n", "931.0 | \n", "1146.0 | \n", "
1 rows × 803 columns
\n", "\n", " | predictions | \n", "reality | \n", "model | \n", "
---|---|---|---|
0 | \n", "1307.753052 | \n", "1592.0 | \n", "model_without_scaling | \n", "
1 | \n", "1307.753052 | \n", "1570.0 | \n", "model_without_scaling | \n", "
2 | \n", "1307.753052 | \n", "1732.0 | \n", "model_without_scaling | \n", "
3 | \n", "1307.753052 | \n", "1445.0 | \n", "model_without_scaling | \n", "
4 | \n", "1307.753052 | \n", "1180.0 | \n", "model_without_scaling | \n", "