{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "02_machine_learning_enshu.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "qvBSQgmhJxNc" }, "source": [ "### 機械学習 演習" ] }, { "cell_type": "markdown", "metadata": { "id": "7L-SYR9eJ4Cj" }, "source": [ "#### ボストン住宅家賃予測\n", "#### 単純な線形回帰モデル(scikit learn)\n" ] }, { "cell_type": "code", "metadata": { "id": "VwoxZLNsp0od" }, "source": [ "from sklearn.datasets import load_boston\n", "import pandas as pd\n", "import numpy as np\n", "from sklearn.linear_model import LinearRegression\n", "import re\n", "import requests\n", "from pprint import pprint\n", "import lightgbm" ], "execution_count": 115, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9QQsxjmhqAAy", "outputId": "05d21d66-cb50-4d9e-8bc7-2692a619a9d1" }, "source": [ "print(np.__version__)" ], "execution_count": 116, "outputs": [ { "output_type": "stream", "text": [ "1.19.5\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "8uhhGhM3qICG" }, "source": [ "boston = load_boston()" ], "execution_count": 117, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "8weuK7GLqXjt" }, "source": [ "# print(boston)" ], "execution_count": 118, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "R-bmp0g0M-GF", "outputId": "852ec3cd-610e-420e-b005-fe2165b1ec75" }, "source": [ "# ディクショナリ型なので、keys()でデータ構成を確認できる。\n", "print(boston.keys())\n", "print(boston[\"feature_names\"])" ], "execution_count": 119, "outputs": [ { "output_type": "stream", "text": [ "dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])\n", "['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'\n", " 'B' 'LSTAT']\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "F6v-iS1uqn-w" }, "source": [ "# print(boston[\"DESCR\"])" ], "execution_count": 120, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "35Ol7MqnqvO6", "outputId": "a902474e-5a85-4bc6-ed65-817bbe6aca26" }, "source": [ "print(boston[\"feature_names\"])" ], "execution_count": 121, "outputs": [ { "output_type": "stream", "text": [ "['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'\n", " 'B' 'LSTAT']\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "rJTY88hVqznv" }, "source": [ "boston_data = boston[\"data\"]" ], "execution_count": 122, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 453 }, "id": "WMowO6lYq9e6", "outputId": "655b4daf-bde4-4edd-e44e-13d4d099de69" }, "source": [ "columns = boston[\"feature_names\"]\n", "# columns = np.append(columns, \"MEDV\") MEDV列はtargetの方にある。\n", "df = pd.DataFrame(boston_data, columns=columns)\n", "print(columns)\n", "df" ], "execution_count": 123, "outputs": [ { "output_type": "stream", "text": [ "['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'\n", " 'B' 'LSTAT']\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "
| \n", " | CRIM | \n", "ZN | \n", "INDUS | \n", "CHAS | \n", "NOX | \n", "RM | \n", "AGE | \n", "DIS | \n", "RAD | \n", "TAX | \n", "PTRATIO | \n", "B | \n", "LSTAT | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.00632 | \n", "18.0 | \n", "2.31 | \n", "0.0 | \n", "0.538 | \n", "6.575 | \n", "65.2 | \n", "4.0900 | \n", "1.0 | \n", "296.0 | \n", "15.3 | \n", "396.90 | \n", "4.98 | \n", "
| 1 | \n", "0.02731 | \n", "0.0 | \n", "7.07 | \n", "0.0 | \n", "0.469 | \n", "6.421 | \n", "78.9 | \n", "4.9671 | \n", "2.0 | \n", "242.0 | \n", "17.8 | \n", "396.90 | \n", "9.14 | \n", "
| 2 | \n", "0.02729 | \n", "0.0 | \n", "7.07 | \n", "0.0 | \n", "0.469 | \n", "7.185 | \n", "61.1 | \n", "4.9671 | \n", "2.0 | \n", "242.0 | \n", "17.8 | \n", "392.83 | \n", "4.03 | \n", "
| 3 | \n", "0.03237 | \n", "0.0 | \n", "2.18 | \n", "0.0 | \n", "0.458 | \n", "6.998 | \n", "45.8 | \n", "6.0622 | \n", "3.0 | \n", "222.0 | \n", "18.7 | \n", "394.63 | \n", "2.94 | \n", "
| 4 | \n", "0.06905 | \n", "0.0 | \n", "2.18 | \n", "0.0 | \n", "0.458 | \n", "7.147 | \n", "54.2 | \n", "6.0622 | \n", "3.0 | \n", "222.0 | \n", "18.7 | \n", "396.90 | \n", "5.33 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 501 | \n", "0.06263 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.593 | \n", "69.1 | \n", "2.4786 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "391.99 | \n", "9.67 | \n", "
| 502 | \n", "0.04527 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.120 | \n", "76.7 | \n", "2.2875 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "396.90 | \n", "9.08 | \n", "
| 503 | \n", "0.06076 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.976 | \n", "91.0 | \n", "2.1675 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "396.90 | \n", "5.64 | \n", "
| 504 | \n", "0.10959 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.794 | \n", "89.3 | \n", "2.3889 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "393.45 | \n", "6.48 | \n", "
| 505 | \n", "0.04741 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.030 | \n", "80.8 | \n", "2.5050 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "396.90 | \n", "7.88 | \n", "
506 rows × 13 columns
\n", "| \n", " | CRIM | \n", "ZN | \n", "INDUS | \n", "CHAS | \n", "NOX | \n", "RM | \n", "AGE | \n", "DIS | \n", "RAD | \n", "TAX | \n", "PTRATIO | \n", "B | \n", "LSTAT | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | \n", "506.000000 | \n", "506.000000 | \n", "506.000000 | \n", "506.000000 | \n", "506.000000 | \n", "506.000000 | \n", "506.000000 | \n", "506.000000 | \n", "506.000000 | \n", "506.000000 | \n", "506.000000 | \n", "506.000000 | \n", "506.000000 | \n", "
| mean | \n", "3.613524 | \n", "11.363636 | \n", "11.136779 | \n", "0.069170 | \n", "0.554695 | \n", "6.284634 | \n", "68.574901 | \n", "3.795043 | \n", "9.549407 | \n", "408.237154 | \n", "18.455534 | \n", "356.674032 | \n", "12.653063 | \n", "
| std | \n", "8.601545 | \n", "23.322453 | \n", "6.860353 | \n", "0.253994 | \n", "0.115878 | \n", "0.702617 | \n", "28.148861 | \n", "2.105710 | \n", "8.707259 | \n", "168.537116 | \n", "2.164946 | \n", "91.294864 | \n", "7.141062 | \n", "
| min | \n", "0.006320 | \n", "0.000000 | \n", "0.460000 | \n", "0.000000 | \n", "0.385000 | \n", "3.561000 | \n", "2.900000 | \n", "1.129600 | \n", "1.000000 | \n", "187.000000 | \n", "12.600000 | \n", "0.320000 | \n", "1.730000 | \n", "
| 25% | \n", "0.082045 | \n", "0.000000 | \n", "5.190000 | \n", "0.000000 | \n", "0.449000 | \n", "5.885500 | \n", "45.025000 | \n", "2.100175 | \n", "4.000000 | \n", "279.000000 | \n", "17.400000 | \n", "375.377500 | \n", "6.950000 | \n", "
| 50% | \n", "0.256510 | \n", "0.000000 | \n", "9.690000 | \n", "0.000000 | \n", "0.538000 | \n", "6.208500 | \n", "77.500000 | \n", "3.207450 | \n", "5.000000 | \n", "330.000000 | \n", "19.050000 | \n", "391.440000 | \n", "11.360000 | \n", "
| 75% | \n", "3.677083 | \n", "12.500000 | \n", "18.100000 | \n", "0.000000 | \n", "0.624000 | \n", "6.623500 | \n", "94.075000 | \n", "5.188425 | \n", "24.000000 | \n", "666.000000 | \n", "20.200000 | \n", "396.225000 | \n", "16.955000 | \n", "
| max | \n", "88.976200 | \n", "100.000000 | \n", "27.740000 | \n", "1.000000 | \n", "0.871000 | \n", "8.780000 | \n", "100.000000 | \n", "12.126500 | \n", "24.000000 | \n", "711.000000 | \n", "22.000000 | \n", "396.900000 | \n", "37.970000 | \n", "
| \n", " | CRIM | \n", "ZN | \n", "INDUS | \n", "CHAS | \n", "NOX | \n", "RM | \n", "AGE | \n", "DIS | \n", "RAD | \n", "TAX | \n", "PTRATIO | \n", "B | \n", "LSTAT | \n", "PRICE | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.00632 | \n", "18.0 | \n", "2.31 | \n", "0.0 | \n", "0.538 | \n", "6.575 | \n", "65.2 | \n", "4.0900 | \n", "1.0 | \n", "296.0 | \n", "15.3 | \n", "396.90 | \n", "4.98 | \n", "24.0 | \n", "
| 1 | \n", "0.02731 | \n", "0.0 | \n", "7.07 | \n", "0.0 | \n", "0.469 | \n", "6.421 | \n", "78.9 | \n", "4.9671 | \n", "2.0 | \n", "242.0 | \n", "17.8 | \n", "396.90 | \n", "9.14 | \n", "21.6 | \n", "
| 2 | \n", "0.02729 | \n", "0.0 | \n", "7.07 | \n", "0.0 | \n", "0.469 | \n", "7.185 | \n", "61.1 | \n", "4.9671 | \n", "2.0 | \n", "242.0 | \n", "17.8 | \n", "392.83 | \n", "4.03 | \n", "34.7 | \n", "
| 3 | \n", "0.03237 | \n", "0.0 | \n", "2.18 | \n", "0.0 | \n", "0.458 | \n", "6.998 | \n", "45.8 | \n", "6.0622 | \n", "3.0 | \n", "222.0 | \n", "18.7 | \n", "394.63 | \n", "2.94 | \n", "33.4 | \n", "
| 4 | \n", "0.06905 | \n", "0.0 | \n", "2.18 | \n", "0.0 | \n", "0.458 | \n", "7.147 | \n", "54.2 | \n", "6.0622 | \n", "3.0 | \n", "222.0 | \n", "18.7 | \n", "396.90 | \n", "5.33 | \n", "36.2 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 501 | \n", "0.06263 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.593 | \n", "69.1 | \n", "2.4786 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "391.99 | \n", "9.67 | \n", "22.4 | \n", "
| 502 | \n", "0.04527 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.120 | \n", "76.7 | \n", "2.2875 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "396.90 | \n", "9.08 | \n", "20.6 | \n", "
| 503 | \n", "0.06076 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.976 | \n", "91.0 | \n", "2.1675 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "396.90 | \n", "5.64 | \n", "23.9 | \n", "
| 504 | \n", "0.10959 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.794 | \n", "89.3 | \n", "2.3889 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "393.45 | \n", "6.48 | \n", "22.0 | \n", "
| 505 | \n", "0.04741 | \n", "0.0 | \n", "11.93 | \n", "0.0 | \n", "0.573 | \n", "6.030 | \n", "80.8 | \n", "2.5050 | \n", "1.0 | \n", "273.0 | \n", "21.0 | \n", "396.90 | \n", "7.88 | \n", "11.9 | \n", "
506 rows × 14 columns
\n", "