{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dimensional-township",
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "import re\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import sagemaker\n",
    "from sagemaker import get_execution_role\n",
    "from sagemaker.inputs import TrainingInput\n",
    "from sagemaker.serializers import CSVSerializer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "collect-albuquerque",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 这里替换成自己的 S3 bucket and prefix\n",
    "bucket = 'sagemaker-cn-northwest-1-876820548815'\n",
    "prefix = 'windturbine/xgboost'\n",
    "\n",
    "# 创建 IAM Role\n",
    "role = get_execution_role()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "decimal-judges",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2021-03-08 08:58:02--  https://samick-virginia.s3.amazonaws.com/xgboost/data/wind_turbine_training_data.csv\n",
      "Resolving samick-virginia.s3.amazonaws.com (samick-virginia.s3.amazonaws.com)... 52.216.244.116\n",
      "Connecting to samick-virginia.s3.amazonaws.com (samick-virginia.s3.amazonaws.com)|52.216.244.116|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 30337871 (29M) [text/csv]\n",
      "Saving to: ‘wind_turbine_training_data.csv’\n",
      "\n",
      "wind_turbine_traini 100%[===================>]  28.93M  6.26MB/s    in 5.8s    \n",
      "\n",
      "2021-03-08 08:58:10 (4.96 MB/s) - ‘wind_turbine_training_data.csv’ saved [30337871/30337871]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#下载训练数据集到本地\n",
    "!wget https://samick-virginia.s3.amazonaws.com/xgboost/data/wind_turbine_training_data.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "together-weekend",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>turbine_id</th>\n",
       "      <th>wind_speed</th>\n",
       "      <th>RPM_blade</th>\n",
       "      <th>oil_temperature</th>\n",
       "      <th>oil_level</th>\n",
       "      <th>temperature</th>\n",
       "      <th>humidity</th>\n",
       "      <th>vibrations_frequency</th>\n",
       "      <th>pressure</th>\n",
       "      <th>wind_direction</th>\n",
       "      <th>breakdown</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>80</td>\n",
       "      <td>61</td>\n",
       "      <td>39</td>\n",
       "      <td>34</td>\n",
       "      <td>33</td>\n",
       "      <td>26</td>\n",
       "      <td>1</td>\n",
       "      <td>77</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10</td>\n",
       "      <td>85</td>\n",
       "      <td>78</td>\n",
       "      <td>36</td>\n",
       "      <td>28</td>\n",
       "      <td>35</td>\n",
       "      <td>43</td>\n",
       "      <td>15</td>\n",
       "      <td>62</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7</td>\n",
       "      <td>47</td>\n",
       "      <td>31</td>\n",
       "      <td>31</td>\n",
       "      <td>23</td>\n",
       "      <td>46</td>\n",
       "      <td>62</td>\n",
       "      <td>15</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999997</th>\n",
       "      <td>4</td>\n",
       "      <td>42</td>\n",
       "      <td>75</td>\n",
       "      <td>25</td>\n",
       "      <td>31</td>\n",
       "      <td>42</td>\n",
       "      <td>35</td>\n",
       "      <td>5</td>\n",
       "      <td>67</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999998</th>\n",
       "      <td>3</td>\n",
       "      <td>48</td>\n",
       "      <td>75</td>\n",
       "      <td>47</td>\n",
       "      <td>10</td>\n",
       "      <td>85</td>\n",
       "      <td>63</td>\n",
       "      <td>7</td>\n",
       "      <td>72</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999999</th>\n",
       "      <td>10</td>\n",
       "      <td>45</td>\n",
       "      <td>60</td>\n",
       "      <td>37</td>\n",
       "      <td>8</td>\n",
       "      <td>39</td>\n",
       "      <td>35</td>\n",
       "      <td>12</td>\n",
       "      <td>64</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000000 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        turbine_id  wind_speed  RPM_blade  oil_temperature  oil_level  \\\n",
       "0                3          80         61               39         34   \n",
       "1               10          85         78               36         28   \n",
       "2                7          47         31               31         23   \n",
       "...            ...         ...        ...              ...        ...   \n",
       "999997           4          42         75               25         31   \n",
       "999998           3          48         75               47         10   \n",
       "999999          10          45         60               37          8   \n",
       "\n",
       "        temperature  humidity  vibrations_frequency  pressure  wind_direction  \\\n",
       "0                33        26                     1        77               3   \n",
       "1                35        43                    15        62               2   \n",
       "2                46        62                    15        32               1   \n",
       "...             ...       ...                   ...       ...             ...   \n",
       "999997           42        35                     5        67               2   \n",
       "999998           85        63                     7        72               2   \n",
       "999999           39        35                    12        64               4   \n",
       "\n",
       "        breakdown  \n",
       "0               0  \n",
       "1               1  \n",
       "2               0  \n",
       "...           ...  \n",
       "999997          0  \n",
       "999998          1  \n",
       "999999          1  \n",
       "\n",
       "[1000000 rows x 11 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 浏览数据集\n",
    "dataset = pd.read_csv('wind_turbine_training_data.csv')\n",
    "pd.set_option('display.max_rows', 6)\n",
    "dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "thirty-temperature",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>breakdown</th>\n",
       "      <th>wind_speed</th>\n",
       "      <th>RPM_blade</th>\n",
       "      <th>oil_temperature</th>\n",
       "      <th>oil_level</th>\n",
       "      <th>temperature</th>\n",
       "      <th>humidity</th>\n",
       "      <th>vibrations_frequency</th>\n",
       "      <th>pressure</th>\n",
       "      <th>wind_direction</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>80</td>\n",
       "      <td>61</td>\n",
       "      <td>39</td>\n",
       "      <td>34</td>\n",
       "      <td>33</td>\n",
       "      <td>26</td>\n",
       "      <td>1</td>\n",
       "      <td>77</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>85</td>\n",
       "      <td>78</td>\n",
       "      <td>36</td>\n",
       "      <td>28</td>\n",
       "      <td>35</td>\n",
       "      <td>43</td>\n",
       "      <td>15</td>\n",
       "      <td>62</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>47</td>\n",
       "      <td>31</td>\n",
       "      <td>31</td>\n",
       "      <td>23</td>\n",
       "      <td>46</td>\n",
       "      <td>62</td>\n",
       "      <td>15</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999997</th>\n",
       "      <td>0</td>\n",
       "      <td>42</td>\n",
       "      <td>75</td>\n",
       "      <td>25</td>\n",
       "      <td>31</td>\n",
       "      <td>42</td>\n",
       "      <td>35</td>\n",
       "      <td>5</td>\n",
       "      <td>67</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999998</th>\n",
       "      <td>1</td>\n",
       "      <td>48</td>\n",
       "      <td>75</td>\n",
       "      <td>47</td>\n",
       "      <td>10</td>\n",
       "      <td>85</td>\n",
       "      <td>63</td>\n",
       "      <td>7</td>\n",
       "      <td>72</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999999</th>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "      <td>60</td>\n",
       "      <td>37</td>\n",
       "      <td>8</td>\n",
       "      <td>39</td>\n",
       "      <td>35</td>\n",
       "      <td>12</td>\n",
       "      <td>64</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000000 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        breakdown  wind_speed  RPM_blade  oil_temperature  oil_level  \\\n",
       "0               0          80         61               39         34   \n",
       "1               1          85         78               36         28   \n",
       "2               0          47         31               31         23   \n",
       "...           ...         ...        ...              ...        ...   \n",
       "999997          0          42         75               25         31   \n",
       "999998          1          48         75               47         10   \n",
       "999999          1          45         60               37          8   \n",
       "\n",
       "        temperature  humidity  vibrations_frequency  pressure  wind_direction  \n",
       "0                33        26                     1        77               3  \n",
       "1                35        43                    15        62               2  \n",
       "2                46        62                    15        32               1  \n",
       "...             ...       ...                   ...       ...             ...  \n",
       "999997           42        35                     5        67               2  \n",
       "999998           85        63                     7        72               2  \n",
       "999999           39        35                    12        64               4  \n",
       "\n",
       "[1000000 rows x 10 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 清洗数据 (删除turbine_id列，按照XGBoost的训练数据格式要求，删除表头并且将最后一列的推理结果数据挪到第一列）\n",
    "dataset = dataset.drop('turbine_id', axis=1)\n",
    "dataset = pd.concat([dataset['breakdown'], dataset.drop(['breakdown'], axis=1)], axis=1)\n",
    "dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "qualified-discount",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将数据拆分为训练数据集和验证数据集并保存到本地\n",
    "train_data, validation_data, test_data = np.split(dataset.sample(frac=1, random_state=1729), [int(0.7 * len(dataset)), int(0.9 * len(dataset))])\n",
    "train_data.to_csv('train.csv', header=False, index=False)\n",
    "validation_data.to_csv('validation.csv', header=False, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "caring-airfare",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 上传数据到 S3\n",
    "boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'data/train/train.csv')).upload_file('train.csv')\n",
    "boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'data/validation/validation.csv')).upload_file('validation.csv')\n",
    "# 为 Sagemaker 训练任务指定数据位置 \n",
    "s3_input_train = TrainingInput(s3_data='s3://{}/{}/data/train'.format(bucket, prefix), content_type='csv')\n",
    "s3_input_validation = TrainingInput(s3_data='s3://{}/{}/data/validation/'.format(bucket, prefix), content_type='csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "growing-salem",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 用于模型训练环境的 container 的位置\n",
    "containers = {\n",
    "              'cn-northwest-1':'387376663083.dkr.ecr.cn-northwest-1.amazonaws.com.cn/xgboost:latest',\n",
    "              'cn-north-1':'390948362332.dkr.ecr.cn-north-1.amazonaws.com.cn/xgboost:latest'\n",
    "             }\n",
    "\n",
    "# 创建 Sagemaker Session\n",
    "sess = sagemaker.Session()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "demographic-istanbul",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 创建 Sagemaker estimator, 指定训练任务的机器类型和数量等\n",
    "xgb = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],\n",
    "                                    role, \n",
    "                                    instance_count=1, \n",
    "                                    instance_type='ml.m5.xlarge',\n",
    "                                    output_path='s3://{}/{}/model'.format(bucket, prefix),\n",
    "                                    sagemaker_session=sess)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "integrated-clerk",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2021-03-08 09:57:20 Starting - Starting the training job...\n",
      "2021-03-08 09:57:23 Starting - Launching requested ML instances......\n",
      "2021-03-08 09:58:25 Starting - Preparing the instances for training......\n",
      "2021-03-08 09:59:23 Downloading - Downloading input data...\n",
      "2021-03-08 10:00:12 Training - Training image download completed. Training in progress.\u001b[34mArguments: train\u001b[0m\n",
      "\u001b[34m[2021-03-08:10:00:12:INFO] Running standalone xgboost training.\u001b[0m\n",
      "\u001b[34m[2021-03-08:10:00:12:INFO] File size need to be processed in the node: 23.38mb. Available memory size in the node: 8114.96mb\u001b[0m\n",
      "\u001b[34m[2021-03-08:10:00:12:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
      "\u001b[34m[10:00:12] S3DistributionType set as FullyReplicated\u001b[0m\n",
      "\u001b[34m[10:00:12] 700000x9 matrix with 6300000 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,\u001b[0m\n",
      "\u001b[34m[2021-03-08:10:00:12:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
      "\u001b[34m[10:00:12] S3DistributionType set as FullyReplicated\u001b[0m\n",
      "\u001b[34m[10:00:13] 200000x9 matrix with 1800000 entries loaded from /opt/ml/input/data/validation?format=csv&label_column=0&delimiter=,\u001b[0m\n",
      "\u001b[34m[10:00:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[0]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:13] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[1]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[2]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[3]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:14] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[4]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:15] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[5]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:15] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[6]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:15] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[7]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:16] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[8]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:16] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[9]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:16] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[10]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:17] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[11]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:17] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[12]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:17] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[13]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:17] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[14]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:18] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[15]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:18] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[16]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:18] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[17]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[18]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[19]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[20]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:19] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[21]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[22]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[23]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\u001b[34m[10:00:20] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5\u001b[0m\n",
      "\u001b[34m[24]#011train-error:0#011validation-error:0\u001b[0m\n",
      "\n",
      "2021-03-08 10:00:29 Uploading - Uploading generated training model\n",
      "2021-03-08 10:00:29 Completed - Training job completed\n",
      "Training seconds: 66\n",
      "Billable seconds: 66\n"
     ]
    }
   ],
   "source": [
    "# 设置超参数并开始训练数据\n",
    "xgb.set_hyperparameters(eta=0.1, objective='binary:logistic', num_round=25) \n",
    "xgb.fit({'train': s3_input_train, 'validation': s3_input_validation})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "structural-ceiling",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------!"
     ]
    }
   ],
   "source": [
    "# 部署训练好的模型并生成用于推理的 Endpoint\n",
    "xgb_predictor = xgb.deploy(\n",
    "\tinitial_instance_count = 1,\n",
    "\tinstance_type = 'ml.m5.xlarge',\n",
    "\tserializer = CSVSerializer())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "advisory-posting",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "xgboost-2021-03-08-10-43-34-693\n"
     ]
    }
   ],
   "source": [
    "# 查看在 Sagemaker 上部署好的 Endpoint\n",
    "print (xgb_predictor.endpoint_name)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conda_python3",
   "language": "python",
   "name": "conda_python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}