{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "dimensional-township", "metadata": {}, "outputs": [], "source": [ "import boto3\n", "import re\n", "import pandas as pd\n", "import numpy as np\n", "import os\n", "import sagemaker\n", "from sagemaker import get_execution_role\n", "from sagemaker.inputs import TrainingInput\n", "from sagemaker.serializers import CSVSerializer" ] }, { "cell_type": "code", "execution_count": 2, "id": "collect-albuquerque", "metadata": {}, "outputs": [], "source": [ "# 这里替换成自己的 S3 bucket and prefix\n", "bucket = 'sagemaker-cn-northwest-1-876820548815'\n", "prefix = 'windturbine/xgboost'\n", "\n", "# 创建 IAM Role\n", "role = get_execution_role()" ] }, { "cell_type": "code", "execution_count": 3, "id": "decimal-judges", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2021-03-08 08:58:02-- https://samick-virginia.s3.amazonaws.com/xgboost/data/wind_turbine_training_data.csv\n", "Resolving samick-virginia.s3.amazonaws.com (samick-virginia.s3.amazonaws.com)... 52.216.244.116\n", "Connecting to samick-virginia.s3.amazonaws.com (samick-virginia.s3.amazonaws.com)|52.216.244.116|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 30337871 (29M) [text/csv]\n", "Saving to: ‘wind_turbine_training_data.csv’\n", "\n", "wind_turbine_traini 100%[===================>] 28.93M 6.26MB/s in 5.8s \n", "\n", "2021-03-08 08:58:10 (4.96 MB/s) - ‘wind_turbine_training_data.csv’ saved [30337871/30337871]\n", "\n" ] } ], "source": [ "#下载训练数据集到本地\n", "!wget https://samick-virginia.s3.amazonaws.com/xgboost/data/wind_turbine_training_data.csv" ] }, { "cell_type": "code", "execution_count": 4, "id": "together-weekend", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | turbine_id | \n", "wind_speed | \n", "RPM_blade | \n", "oil_temperature | \n", "oil_level | \n", "temperature | \n", "humidity | \n", "vibrations_frequency | \n", "pressure | \n", "wind_direction | \n", "breakdown | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "3 | \n", "80 | \n", "61 | \n", "39 | \n", "34 | \n", "33 | \n", "26 | \n", "1 | \n", "77 | \n", "3 | \n", "0 | \n", "
1 | \n", "10 | \n", "85 | \n", "78 | \n", "36 | \n", "28 | \n", "35 | \n", "43 | \n", "15 | \n", "62 | \n", "2 | \n", "1 | \n", "
2 | \n", "7 | \n", "47 | \n", "31 | \n", "31 | \n", "23 | \n", "46 | \n", "62 | \n", "15 | \n", "32 | \n", "1 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
999997 | \n", "4 | \n", "42 | \n", "75 | \n", "25 | \n", "31 | \n", "42 | \n", "35 | \n", "5 | \n", "67 | \n", "2 | \n", "0 | \n", "
999998 | \n", "3 | \n", "48 | \n", "75 | \n", "47 | \n", "10 | \n", "85 | \n", "63 | \n", "7 | \n", "72 | \n", "2 | \n", "1 | \n", "
999999 | \n", "10 | \n", "45 | \n", "60 | \n", "37 | \n", "8 | \n", "39 | \n", "35 | \n", "12 | \n", "64 | \n", "4 | \n", "1 | \n", "
1000000 rows × 11 columns
\n", "\n", " | breakdown | \n", "wind_speed | \n", "RPM_blade | \n", "oil_temperature | \n", "oil_level | \n", "temperature | \n", "humidity | \n", "vibrations_frequency | \n", "pressure | \n", "wind_direction | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "80 | \n", "61 | \n", "39 | \n", "34 | \n", "33 | \n", "26 | \n", "1 | \n", "77 | \n", "3 | \n", "
1 | \n", "1 | \n", "85 | \n", "78 | \n", "36 | \n", "28 | \n", "35 | \n", "43 | \n", "15 | \n", "62 | \n", "2 | \n", "
2 | \n", "0 | \n", "47 | \n", "31 | \n", "31 | \n", "23 | \n", "46 | \n", "62 | \n", "15 | \n", "32 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
999997 | \n", "0 | \n", "42 | \n", "75 | \n", "25 | \n", "31 | \n", "42 | \n", "35 | \n", "5 | \n", "67 | \n", "2 | \n", "
999998 | \n", "1 | \n", "48 | \n", "75 | \n", "47 | \n", "10 | \n", "85 | \n", "63 | \n", "7 | \n", "72 | \n", "2 | \n", "
999999 | \n", "1 | \n", "45 | \n", "60 | \n", "37 | \n", "8 | \n", "39 | \n", "35 | \n", "12 | \n", "64 | \n", "4 | \n", "
1000000 rows × 10 columns
\n", "