{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "explainable_kmeans.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "TuTD6V71BLs8", "colab_type": "text" }, "source": [ "### created by Takuya Matsuda at YNU" ] }, { "cell_type": "code", "metadata": { "id": "NQ8_Pv0X7ZIP", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "3ff2095e-a18e-4be8-95f8-3d15d1cf9637" }, "source": [ "!pip install graphviz" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Requirement already satisfied: graphviz in /usr/local/lib/python3.6/dist-packages (0.10.1)\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "evtU8xgl52Wf", "colab_type": "text" }, "source": [ "

\n", "# Explainable k-means" ] }, { "cell_type": "code", "metadata": { "id": "vO0mgUpX5j4x", "colab_type": "code", "colab": {} }, "source": [ "import queue,graphviz\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from graphviz import Digraph\n", "from sklearn.cluster import KMeans\n", "from sklearn.tree import export_graphviz,DecisionTreeClassifier" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "TM5mgXdl57nV", "colab_type": "text" }, "source": [ "

\n", "## Define funcs for k=2\n", "I did not create Class of their algorithm, so this is a little experiment for me." ] }, { "cell_type": "code", "metadata": { "id": "N8o4DrKq5zRk", "colab_type": "code", "colab": {} }, "source": [ "#ソートと動的計画法によって,最適な分割を求める (k=2)\n", "#this func is main func and get best split condition using sort and dynamic programming\n", "def optimal_threshold_2means(X):\n", " bests_split = {'cost':np.inf,'coordinate':None,'threshold':None}\n", " data_num = X.shape[0]\n", " data_dimentions = X.shape[1]\n", " u = np.sum(X*X)\n", " \n", " for i in range(data_dimentions):\n", " s = np.zeros(data_dimentions)\n", " r = np.sum(X,axis=0)\n", " ith_sorted_X = X[X[:,i].argsort(), :]\n", " for j,data in enumerate(ith_sorted_X[:-1]):\n", " s += data\n", " r -= data\n", " cost = u - np.sum(s*s)/(j+1) -np.sum(r*r)/(data_num-j-1)\n", " #print(cost)\n", " if cost < bests_split['cost'] and X[j][i]!=X[j+1][i]:\n", " bests_split['cost'] = cost\n", " bests_split['coordinate'] = i\n", " bests_split['threshold'] = data[i]\n", " \n", " return bests_split\n", "\n", "#最適な分割に基づいてクラスタリング(k=2)\n", "#this func is for clustering datasets based on best splits got from above func\n", "def clustering_2means_by_tree(bests_split,X):\n", " cluster = np.ones(X.shape[0])\n", " for i,data in enumerate(X):\n", " if(data[bests_split['coordinate']]>bests_split['threshold']):\n", " cluster[i] = 0\n", " return cluster\n", "\n", "#得られた分割の中心座標を求める(k=2)\n", "#this func is for calculating center points\n", "def get_mean(X,approx_labels):\n", " res=[]\n", " for k in range(len(np.unique(approx_labels))):\n", " n = 0\n", " mean = np.zeros(X.shape[1])\n", " for i,data in enumerate(X):\n", " if(approx_labels[i]==k):\n", " mean+=data\n", " n+=1\n", " res.append(mean/n)\n", " return np.array(res)\n", "\n", "#近似の比率を計算する,論文ではkmeansの場合,上界は4である(k=2).\n", "#this func is for calculating approximation ratio\n", "def approx_score(approx_labels,kmeans_model,X):\n", " kmeans_cost = 0\n", " kmeans_label = kmeans_model.labels_\n", " kmeans_centers = kmeans_model.cluster_centers_\n", " for i,data in enumerate(X):\n", " kmeans_cost += np.sum((data-kmeans_centers[kmeans_label[i]])*(data-kmeans_centers[kmeans_label[i]]))\n", " \n", " approx_cost = 0\n", " mean = get_mean(X,approx_labels)\n", " for k in range(kmeans_model.n_clusters):\n", " for i,data in enumerate(X):\n", " if(approx_labels[i]==k):\n", " approx_cost += np.sum((data-mean[k])*(data-mean[k]))\n", " print(kmeans_cost)\n", " print(approx_cost)\n", " return approx_cost/kmeans_cost" ], "execution_count": 76, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "ZRJETWfD6ALL", "colab_type": "text" }, "source": [ "

\n", "## Two Datasets\n", "First is from [this good article](https://qiita.com/ynakayama/items/1223b6844a1a044e2e3b).\n", "\n", "Second is from uci repo and I get the code from [this good article](https://pythondatascience.plavox.info/scikit-learn/%E3%82%AF%E3%83%A9%E3%82%B9%E3%82%BF%E5%88%86%E6%9E%90-k-means)." ] }, { "cell_type": "code", "metadata": { "id": "MfULm2_S5_Cd", "colab_type": "code", "colab": {} }, "source": [ "# First dataset\n", "# 生徒の国語・数学・英語の各得点を配列として与える\n", "X = np.array([\n", " [ 80, 85, 100 ],\n", " [ 96, 100, 100 ],\n", " [ 54, 83, 98 ],\n", " [ 80, 98, 98 ],\n", " [ 90, 92, 91 ],\n", " [ 84, 78, 82 ],\n", " [ 79, 100, 96 ],\n", " [ 88, 92, 92 ],\n", " [ 98, 73, 72 ],\n", " [ 75, 84, 85 ],\n", " [ 92, 100, 96 ],\n", " [ 96, 92, 90 ],\n", " [ 99, 76, 91 ],\n", " [ 75, 82, 88 ],\n", " [ 90, 94, 94 ],\n", " [ 54, 84, 87 ],\n", " [ 92, 89, 62 ],\n", " [ 88, 94, 97 ],\n", " [ 42, 99, 80 ],\n", " [ 70, 98, 70 ],\n", " [ 94, 78, 83 ],\n", " [ 52, 73, 87 ],\n", " [ 94, 88, 72 ],\n", " [ 70, 73, 80 ],\n", " [ 95, 84, 90 ],\n", " [ 95, 88, 84 ],\n", " [ 75, 97, 89 ],\n", " [ 49, 81, 86 ],\n", " [ 83, 72, 80 ],\n", " [ 75, 73, 88 ],\n", " [ 79, 82, 76 ],\n", " [ 100, 77, 89 ],\n", " [ 88, 63, 79 ],\n", " [ 100, 50, 86 ],\n", " [ 55, 96, 84 ],\n", " [ 92, 74, 77 ],\n", " [ 97, 50, 73 ],\n", " ])" ], "execution_count": 77, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "YTfJApum6Kh1", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "d0d6bd72-0588-40c9-e006-585f1314b9cf" }, "source": [ "#second datasets\n", "cust_df = pd.read_csv(\"http://pythondatascience.plavox.info/wp-content/uploads/2016/05/Wholesale_customers_data.csv\")\n", "del(cust_df['Channel'])\n", "del(cust_df['Region'])\n", "cust_array = np.array([cust_df['Fresh'].tolist(),\n", " cust_df['Milk'].tolist(),\n", " cust_df['Grocery'].tolist(),\n", " cust_df['Frozen'].tolist(),\n", " cust_df['Milk'].tolist(),\n", " cust_df['Detergents_Paper'].tolist(),\n", " cust_df['Delicassen'].tolist()\n", " ], np.int32)\n", "cust_array = cust_array.T\n", "cust_array.shape" ], "execution_count": 78, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(440, 7)" ] }, "metadata": { "tags": [] }, "execution_count": 78 } ] }, { "cell_type": "markdown", "metadata": { "id": "2e3Q7CEq6RKa", "colab_type": "text" }, "source": [ "

\n", "## First data and k=2 " ] }, { "cell_type": "code", "metadata": { "id": "gjNuH4Cq6NLo", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "80b02360-68aa-477b-fb58-e0e7492ecc24" }, "source": [ "#K-meansクラスタリングをおこなう\n", "kmeans_model = KMeans(n_clusters=2, random_state=10).fit(X)\n", "#分類先となったラベルを取得する\n", "labels = kmeans_model.labels_\n", "\n", "#提案手法による近似アルゴリズムで取得する\n", "bests_split = optimal_threshold_2means(X)\n", "approx_labels = clustering_2means_by_tree(bests_split,X)\n", "print(bests_split)\n", "print(approx_labels)" ], "execution_count": 79, "outputs": [ { "output_type": "stream", "text": [ "{'cost': 11316.030172413797, 'coordinate': 0, 'threshold': 70}\n", "[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 0. 1.\n", " 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0.]\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "Qw2hFW3X6TGF", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "ad450568-5d37-4014-d419-5f2d1e55ced6" }, "source": [ "approx_score(approx_labels,kmeans_model,X) #近似アルゴリズムのコスト ➗ k-meansのコスト" ], "execution_count": 80, "outputs": [ { "output_type": "stream", "text": [ "11316.030172413793\n", "11316.030172413793\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/plain": [ "1.0" ] }, "metadata": { "tags": [] }, "execution_count": 80 } ] }, { "cell_type": "markdown", "metadata": { "id": "2QDeGFmP6Yr4", "colab_type": "text" }, "source": [ "

\n", "## Second data and k=2" ] }, { "cell_type": "code", "metadata": { "id": "UCkO03gN6VXa", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "b21c8de2-f566-403b-bf73-5a55fcf42ff4" }, "source": [ "#K-meansクラスタリングをおこなう\n", "kmeans_model = KMeans(n_clusters=2, random_state=10).fit(cust_array)\n", "#分類先となったラベルを取得する\n", "labels = kmeans_model.labels_\n", "\n", "#提案手法による近似アルゴリズムで取得する\n", "bests_split = optimal_threshold_2means(cust_array)\n", "approx_labels = clustering_2means_by_tree(bests_split,cust_array)\n", "print(bests_split)" ], "execution_count": 81, "outputs": [ { "output_type": "stream", "text": [ "{'cost': 41288343652.5358, 'coordinate': 2, 'threshold': 16483}\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "CT-5ZyGP6aly", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "ca24df6d-ded3-44e9-8313-952323f7e760" }, "source": [ "approx_score(approx_labels,kmeans_model,cust_array) #近似アルゴリズムのコスト ➗ k-meansのコスト" ], "execution_count": 82, "outputs": [ { "output_type": "stream", "text": [ "132340344661.33641\n", "135777624164.53575\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/plain": [ "1.0259730281948067" ] }, "metadata": { "tags": [] }, "execution_count": 82 } ] }, { "cell_type": "markdown", "metadata": { "id": "mKb_viSO6e53", "colab_type": "text" }, "source": [ "

\n", "## IMM Algorithm\n", "I'm working on this...." ] }, { "cell_type": "markdown", "metadata": { "id": "uayor9-46g6H", "colab_type": "text" }, "source": [ "## Define funcs\n", "I did not create Class of their algorithm, so this is a little experiment for me." ] }, { "cell_type": "code", "metadata": { "id": "ssapF7ca6cIQ", "colab_type": "code", "colab": {} }, "source": [ "class TreeNode:\n", " def __init__(self, cluster=None, left=None, right=None,condition=None):\n", " self.cluster = cluster\n", " self.left = left\n", " self.right = right \n", " self.condition = (0,0) #(i,threshold) x_i <= threshold or x_i > threshold \n", "\n", "def minimum_center(i,labels,centers):\n", " minimum = np.inf\n", " for j in labels:\n", " minimum = min(minimum, centers[j][i])\n", " return minimum\n", "\n", "def maximum_center(i,labels,centers):\n", " maximum = -np.inf\n", " for j in labels:\n", " maximum = max(maximum, centers[j][i])\n", " return maximum \n", "\n", "def mistake(x,center,i,threshold):\n", " return 0 if ((x[i]<=threshold) == (center[i]<=threshold)) else 1\n", "\n", "def delete_mistakes_data(X,labels,centers,i,threshold):\n", " new_data = []\n", " new_labels=[]\n", " for idx,x in enumerate(X):\n", " if(mistake(x,centers[labels[idx]],i,threshold)==0):\n", " new_data.append(x)\n", " new_labels.append(labels[idx])\n", " return np.array(new_data),np.array(new_labels)\n", " \n", "def make_next_data(X,labels,i,threshold):\n", " l_data=[]\n", " l_labels=[]\n", " r_data=[]\n", " r_labels=[]\n", " for idx,x in enumerate(X):\n", " if(x[i]<=threshold):\n", " l_data.append(x)\n", " l_labels.append(labels[idx])\n", " else:\n", " r_data.append(x)\n", " r_labels.append(labels[idx])\n", " \n", " return np.array(l_data),np.array(l_labels),np.array(r_data),np.array(r_labels)\n", "\n", "def count_mistakes(X,l,i,labels,centers):\n", " cnt=0\n", " for idx,x in enumerate(X):\n", " if(mistake(x,centers[labels[idx]],i,l[i])==1):\n", " cnt+=1\n", " return cnt\n", "\n", "def get_best_splits(X,l,r,labels,centers):\n", " bests_split = {'mistake':np.inf,'coordinate':None,'threshold':None}\n", " data_dimentions = X.shape[1]\n", " \n", " for i in range(data_dimentions):\n", " ith_sorted_X = X[X[:,i].argsort(), :]\n", " ith_sorted_centers = centers[centers[:,i].argsort(), :]\n", " idx_center = 1\n", " cnt_mistakes = count_mistakes(X,l,i,labels,centers)\n", " for j,x in enumerate(ith_sorted_X[:-1]):\n", " if(l[i]>x[i] or x[i]>=r[i]):\n", " continue\n", " \n", " cnt_mistakes = count_mistakes(X,x,i,labels,centers) #ここで本来はDPでより効率よく計算すべきだが,やり方がよくわからない.なのでナイーブなやり方でやっている.つまり,全データに対してその分割でmistakeとなるのか否かを調べている\n", " \n", " if bests_split['mistake'] > cnt_mistakes:\n", " bests_split['mistake'] = cnt_mistakes\n", " bests_split['coordinate'] = i\n", " bests_split['threshold'] = x[i] \n", " print(\"num of mistakes at this node => {}\".format(bests_split['mistake']))\n", " return bests_split['coordinate'],bests_split['threshold']\n", "\n", "def build_tree(X,labels,centers):\n", " node = TreeNode()\n", " l=[]\n", " r=[]\n", " \n", " #論文疑似コード 2〜4行目\n", " if(len(np.unique(labels))==1):\n", " node.cluster = labels[0]\n", " return node\n", "\n", " #論文疑似コード 6〜9行目\n", " for i in range(X.shape[1]):\n", " l.append(minimum_center(i,labels,centers))\n", " r.append(maximum_center(i,labels,centers))\n", "\n", " #論文疑似コード 10〜13行目\n", " i,threshold = get_best_splits(X,l,r,labels,centers)\n", " X,labels = delete_mistakes_data(X,labels,centers,i,threshold)\n", " left_data,left_labels,right_data,right_labels = make_next_data(X,labels,i,threshold)\n", " \n", " #論文疑似コード 14〜16行目\n", " node.condition = (i,threshold)\n", " node.left = build_tree(left_data,left_labels,centers)\n", " node.right = build_tree(right_data,right_labels,centers)\n", " \n", " return node" ], "execution_count": 83, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "viQyjZlK6ryE", "colab_type": "text" }, "source": [ "

\n", "## First data and k=3" ] }, { "cell_type": "code", "metadata": { "id": "lWw2YW_w6jds", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 531 }, "outputId": "a344e197-c168-42dd-9157-657548eb4ad1" }, "source": [ "#IMM procedure\n", "kmeans_model = KMeans(n_clusters=3, random_state=10).fit(X)\n", "centers = kmeans_model.cluster_centers_\n", "labels = kmeans_model.labels_\n", "root = build_tree(X,labels,centers)\n", "make_tree(root,kmeans_model.n_clusters)" ], "execution_count": 87, "outputs": [ { "output_type": "stream", "text": [ "num of mistakes at this node => 1\n", "num of mistakes at this node => 2\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/plain": [ "" ], "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\nX_0 <= 75\n\n\n\n1\n\n0\n\n\n\n0->1\n\n\nTrue\n\n\n\n2\n\nX_1 <= 82\n\n\n\n0->2\n\n\nFalse\n\n\n\n3\n\n2\n\n\n\n2->3\n\n\nTrue\n\n\n\n4\n\n1\n\n\n\n2->4\n\n\nFalse\n\n\n\n" }, "metadata": { "tags": [] }, "execution_count": 87 } ] }, { "cell_type": "markdown", "metadata": { "id": "cYEF7HEWKTbj", "colab_type": "text" }, "source": [ "⇩クラスタ0におけるmistakeは1つだけ" ] }, { "cell_type": "code", "metadata": { "id": "f9AXdugNJ95V", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "cdd04e03-18ef-4669-9b68-07df7f0b2bcc" }, "source": [ "labels[X[:,0] <= 75]" ], "execution_count": 88, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], dtype=int32)" ] }, "metadata": { "tags": [] }, "execution_count": 88 } ] }, { "cell_type": "markdown", "metadata": { "id": "PIdjQh4JK4n1", "colab_type": "text" }, "source": [ "⇩クラスタ2におけるmistakeはない(0は考えなくて良い)." ] }, { "cell_type": "code", "metadata": { "id": "MITFO8KkJ6ZA", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "7eeb9fe7-7228-4f52-f5f4-718c5a854fd6" }, "source": [ "labels[X[:,1] <= 82]" ], "execution_count": 89, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 2, 2, 2, 2, 2, 2], dtype=int32)" ] }, "metadata": { "tags": [] }, "execution_count": 89 } ] }, { "cell_type": "markdown", "metadata": { "id": "DVWlMj5ELCRt", "colab_type": "text" }, "source": [ "⇩クラスタ1におけるmistakeは2つ" ] }, { "cell_type": "code", "metadata": { "id": "Kx0rArMgKNlU", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 51 }, "outputId": "341c3bd8-722e-4f40-8bc9-554f5a4cb7c3" }, "source": [ "labels[X[:,1] > 82]" ], "execution_count": 90, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 2, 1, 0, 0, 2, 1, 1, 1, 0],\n", " dtype=int32)" ] }, "metadata": { "tags": [] }, "execution_count": 90 } ] }, { "cell_type": "markdown", "metadata": { "id": "F_qNy2J16uEP", "colab_type": "text" }, "source": [ "

\n", "## Second data and k=3" ] }, { "cell_type": "code", "metadata": { "id": "0ibTR82J6liX", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 595 }, "outputId": "a9588cb0-051f-4e41-9d25-f4ed76b38b40" }, "source": [ "#IMM procedure\n", "kmeans_model = KMeans(n_clusters=3, random_state=10).fit(cust_array)\n", "centers = kmeans_model.cluster_centers_\n", "labels = kmeans_model.labels_\n", "\n", "root = build_tree(cust_array,labels,centers)\n", "make_tree(root,kmeans_model.n_clusters)" ], "execution_count": 97, "outputs": [ { "output_type": "stream", "text": [ "num of mistakes at this node => 8\n", "num of mistakes at this node => 11\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/plain": [ "" ], "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n0\n\nX_0 > 20049\n\n\n\n1\n\n0\n\n\n\n0->1\n\n\nTrue\n\n\n\n2\n\nX_1 <= 12220\n\n\n\n0->2\n\n\nFalse\n\n\n\n3\n\n2\n\n\n\n2->3\n\n\nTrue\n\n\n\n4\n\n1\n\n\n\n2->4\n\n\nFalse\n\n\n\n" }, "metadata": { "tags": [] }, "execution_count": 97 } ] }, { "cell_type": "code", "metadata": { "id": "glKXXfuVLXV5", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 85 }, "outputId": "47fa3082-32db-4ffe-9d14-409881cb5963" }, "source": [ "labels[cust_array[:,0] > 20049]" ], "execution_count": 93, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)" ] }, "metadata": { "tags": [] }, "execution_count": 93 } ] }, { "cell_type": "code", "metadata": { "id": "wpL565elMakS", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 340 }, "outputId": "91e0e6ee-7cb8-48ee-b1dc-38f5220fc7ac" }, "source": [ "labels[cust_array[:,1] <= 12220]" ], "execution_count": 94, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0,\n", " 0, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 0, 2, 0, 0, 2, 2, 1, 2, 2, 2, 2,\n", " 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0,\n", " 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 1, 2, 2, 2,\n", " 0, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2,\n", " 2, 2, 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2,\n", " 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2,\n", " 0, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 0, 2, 2, 2, 2, 2, 0, 1, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2,\n", " 2, 0, 2, 2, 1, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2,\n", " 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2,\n", " 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2],\n", " dtype=int32)" ] }, "metadata": { "tags": [] }, "execution_count": 94 } ] }, { "cell_type": "code", "metadata": { "id": "ScazzrEdMf2B", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "98c91de4-3efc-4435-8f2e-31819d6180c9" }, "source": [ "labels[cust_array[:,1] > 12220]" ], "execution_count": 95, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,\n", " 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 0, 1, 1, 2, 1, 1, 0, 1],\n", " dtype=int32)" ] }, "metadata": { "tags": [] }, "execution_count": 95 } ] }, { "cell_type": "markdown", "metadata": { "id": "pM5V20ceM3F6", "colab_type": "text" }, "source": [ "

\n", "func for visualization \n", "sorry for dirty code..." ] }, { "cell_type": "code", "metadata": { "id": "QH-P2P-k_g8h", "colab_type": "code", "colab": {} }, "source": [ "def make_tree(root,n_clusters):\n", " G = Digraph(format='png')\n", " G.attr('node', shape='circle')\n", " N = 2*n_clusters - 1 #ノード数\n", " \n", " q = queue.Queue()\n", " q.put(root)\n", " if(root.right.cluster != None):\n", " G.node(str(0),\"X_{} > {}\".format(root.condition[0],root.condition[1]))\n", " else:\n", " G.node(str(0),\"X_{} <= {}\".format(root.condition[0],root.condition[1]))\n", " i=1\n", " while not q.empty():\n", " root = q.get()\n", "\n", " if root.left.cluster != None and root.right.cluster != None:\n", " G.node(str(i), str(root.left.cluster))\n", " G.edge(str(i-1), str(i),label='True')\n", " G.node(str(i+1), str(root.right.cluster))\n", " G.edge(str(i-1), str(i+1),label='False') \n", " elif root.right.cluster != None:\n", " G.node(str(i), str(root.right.cluster))\n", " G.edge(str(i-1), str(i),label='True')\n", " G.node(str(i+1),\"X_{} <= {}\".format(root.left.condition[0],root.left.condition[1]))\n", " G.edge(str(i-1), str(i+1),label='False')\n", " q.put(root.left)\n", " else:\n", " G.node(str(i), str(root.left.cluster))\n", " G.edge(str(i-1), str(i),label='True')\n", " G.node(str(i+1),\"X_{} <= {}\".format(root.right.condition[0],root.right.condition[1]))\n", " G.edge(str(i-1), str(i+1),label='False')\n", " q.put(root.right) \n", " i+=2\n", " return G" ], "execution_count": 85, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "hqNdbgdr8Kuk", "colab_type": "text" }, "source": [ "

\n", "## Decision tree simulation" ] }, { "cell_type": "code", "metadata": { "id": "Aj1mdsRq8Rzu", "colab_type": "code", "colab": {} }, "source": [ "def make_toydata(v):\n", " mean1 = np.array([2, 0])\n", " cov1 = np.array([[0.3, 0], [0, 0.3]])\n", " data_1 = np.random.multivariate_normal(mean1, cov1, size=200)\n", "\n", " mean2 = np.array([-2, 0])\n", " cov2 = np.array([[0.3, 0], [0, 0.3]])\n", " data_2 = np.random.multivariate_normal(mean2, cov2, size=200)\n", "\n", " data_3 = np.array([[-2,v],[2,v]])\n", "\n", " return data_1,data_2,data_3" ], "execution_count": 62, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "W1j_ehJgADif", "colab_type": "code", "colab": {} }, "source": [ "data_1,data_2,data_3 = make_toydata(v=100)" ], "execution_count": 63, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "jJNCN64BALIP", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 282 }, "outputId": "8ab3dba7-d3bc-43a4-87d7-6806b436f9f2" }, "source": [ "plt.scatter(data_1[:,0],data_1[:,1])\n", "plt.scatter(data_2[:,0],data_2[:,1])" ], "execution_count": 64, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 64 }, { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD4CAYAAADvsV2wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO2df5Bc1XXnv2daLdGSiUYKs2szSKBkKchi/VqmWCdsbRmw+WFskIUt2U7VxmUnWtbxOiiOFnmdAqF1ChGVLf/M2rKhHG+IrVkjjYWBFTbC64RdJYyskUAYBRZZSIPLyJZmYqQG9cyc/eP1m3n9+t777vvR70e/86lSaeb16/duz0jn3nfO934PMTMEQRCE7qcn6wEIgiAI6SABXxAEoSRIwBcEQSgJEvAFQRBKggR8QRCEkjAr6wGYuOCCC/iSSy7JehiCIAiFYf/+/b9k5j7Va7kO+JdccgmGh4ezHoYgCEJhIKJjutckpSMIglASJOALgiCUBAn4giAIJUECviAIQkmQgC8IglAScq3SEQShnAwdGMXWPUfwylgdF/bWsOGGy7BqZX/Wwyo8EvAFQcgVQwdG8amdz6DemAQAjI7V8amdzwCABP2YJJLSIaIHiOhVInpW8/rbiWiciEaaf+5K4r6CILQzdGAUV2/ZiyUbH8HVW/Zi6MBo1kMKxdY9R6aDvUu9MYmte45kNKLuIakV/jcBfBnAtwzn/B0zvzuh+wmCoKAbVsevjNVDHRfsSWSFz8w/BnAqiWsJghCdblgdX9hbC3VcsCdNlc7vEtFBInqMiK7QnURE64homIiGT548meLwCs6hQWDbW4FNvc7fhwazHpGQAd2wOt5ww2WoVSstx2rVCjbccFlGI+oe0gr4PwFwMTMvB/AlAEO6E5l5OzMPMPNAX5/S/0fwc2gQePgTwPhxAOz8/fAnJOiXkG5YHa9a2Y97Vy9Ff28NBKC/t4Z7Vy8tTEoqz6Si0mHmf/Z8/SgR/RURXcDMv0zj/l3PE5uBhm8F16g7x5etyWZMQiZsuOGylhw+UMzV8aqV/RLgO0AqAZ+I3gzgF8zMRHQVnCeLX6Vx71IwfiLccaFrcYOkaNjtKJveP5GAT0TfBvB2ABcQ0QkAdwOoAgAzfxXA+wD8JyKaAFAH8AFm5iTuLQCYf1EznaM4LpQOWR3b0Q2KprAkEvCZ+YMBr38ZjmxT6ATX3eXk7L1pnWrNOS4IghKToqlbA7546XQDy9YA7/kiMH8RAHL+fs8XJX8vCAa6QdEUFrFW6BaWrZEALwghuLC3hlFFcC+SoiksssIXBKGUlFHvLyt8QRBKSRkVTRLwBUEoLWVTNElKRxAEoSRIwBcEQSgJEvAFQRBKggR8QRCEkiABXxAEoSRIwBcEQSgJEvAFQRBKggT8IhDUzUq6XQmCYIFsvMo7bjcr1wnT7WYFON45Qa8LgiA0kRV+3jF1s7J5XRAEoYkE/LwT1M0qSrcrSQEJQimRlE7eCepmFbbblaSABCE3pN1iUVb4eee6u5zuVV683ayCXvcjKSCh4AwdGMXVW/ZiycZHcPWWvRg6MJr1kCLhtlgcHauDMdNisZOfR1b4ecdddT+x2UnTzL/ICebu8aDX/UjDc6GDdHrF2k19aLNosSgBvwgEdbMK0+1KGp4LHSKNYNxNfWizaLEoKZ2yETYFJAiWmIJxUnRTH1pdK8VOtlhMJOAT0QNE9CoRPat5nYjoi0T0IhEdIqJ/k8R9hQhIw3OhQ6QRjG2CZFFy/Fm0WExqhf9NADcaXr8JwKXNP+sA/PeE7iuEwZVj7lznfL96O7D+WQn2QiKksWINCpJZFEKjsmplP+5dvRT9vTUQgP7eGu5dvbSjqalEcvjM/GMiusRwyq0AvsXMDGAfEfUS0VuY+edJ3F+wQOSYQofZcMNlLTl8IPkVa1Af2qLl+NNusZhW0bYfgLdSeKJ5TAJ+WpjkmBLwhQRIqym4KUh2U46/E+ROpUNE6+CkfbB48eKMR9NFiBxTSIGsm4Jf2FvDqCK4d7IQWiTSUumMAljk+f6i5rE2mHk7Mw8w80BfX18qgysFOtmlyDGFLqJThdCiFIKDSCvg7wbwH5pqnbcBGJf8fcqIHFMoAf5C6IK5VcyZ1YP1O0YiB+oiFYKDSEqW+W0A/xfAZUR0gog+SkS3E9HtzVMeBfASgBcBfB3Ax5K4rxCCOHJMMVsTCsSqlf14auO12LZ2BV5vTGGs3ogVqNPYX5AW5Ahn8snAwAAPDw9nPYzkOTRob4WQNX51DwD0VIE55wP10874L70eeOHxYnweoTRcvWWvMp8POBJI24Lyko2PQBUlCcDRLTfHG2QHIKL9zDygei13Rduup2jySJW6Z6oB1E85X48fB4bvn3kt759HKA0mZU4Y24duKgSLtULaFM2tMoqKJ8+fRygNQQHZNi2TxY7YTiEBP22ylEdGycVHVfGI3FPIGFWg9mOjz89iR2ynkJRO2qTtVjldLzgOJ+vYzEbapl6uu6s9h2+DyD2FjPFuBNPl8m3TMlnvL0gKWeGnTZrySLdeMD3B+EpPNqkXv7qnthCozDa/R+SeQk5wFTufX7uia9IycZAVftqEbVgSB1W9wM/4iWDVkN9v33++qHRKQ9ot+ZIiLduHvCOyzG5mUy/aVvV+aguBiXrrxFCtiWWy0Ia/wQngrJKLms/uVkyyTEnpdDNBeXQ3tVQk1ZDQcXQ2At20AamsSEqnm1EWXJuF2/mLnNd3/pH6varCstBVqNIzALRtCsWJsvhIwO9mbOoFu24HeLL9vWSWswnFRtd/ds6sHu0qvps2IJUVCfhpkKWVQlCDc1WwNx0XugJdesZ/zOWVsTq2rV3R8QYnUfA+qfTOrYIZGK83Wp5ayl6sdZGA32nybqUwf5FmX8Ci9mNC1xA2DXNhby200iUNRY//SeX02cb0a6NjdWz47kGAgcYUTx+ztVToRiTgd5q8dZpSSSoP/m27Skd09F2NLj2zYG4VrzemtKt42w1IupTR8LFTePL5k7EmAe9E0kOESYPSsDHZ/lqclodFlaW6iCyz02ilkQRsGms/3Mn0j8r5sloDln9IdPQlwySxBOKnQHROlZ693i33tL2+atxR6e+thfqMRZGlmmSZEvCTxh+wz52ZcZb0Mn8RsP7Z9vcGqWriBOJtb9Wnb/xjEbqeTq5WdZbCKvp7a3hq47VW55osj8MQZeLR3TvM+NNA7JHTQpWvr8x2/OOnZnKL2pSJcmesz/vm5X3RV+PS11bw0El/GF3KSEWYAB629lCtUEsOH2gP9oBdmkd379GxOpZsfKQQKR7ZeJUkqoA9ec5pFmLTaSoo8DbqwPADzVU6z0wCth2oTH1tpauVkCA2TpUuBFh3oeqdW1UerxBNtzTsrVWnXS23vm85tr5/eYvTpe7JI2gyMclPi9L6UFb4SaIL2PXTwJ1Hg9+vc9JsQWOApppAvE6ZVGlKLX3rm2rNKdzmWUkkFA6/oseU3uHmeTY59Nden2g7Xq0Qtr5vufH93td0qZkeIuNKfcMNlwXWD+IUhNNAVvhJYlpB26By0rRBNdH4nTKndfWe/3ru08YLj4u9gpA4rlPl0S03oz9gc5ZNqmbrniMtqRmXebNnhQqwuqePSWbjSt3vi68jzzuPJeAnSVzr4xYrYgBt/6w0/8xUE0qQU2ZtoVOoXbZGcvtCx7nm8j5jkLTZrasLpOP1hvK4Dn/grlD7yHQeQTaTWJ53HkvAj4Iu3+33jjfl63UsW+ME4k3jwOrtrdca+IjiCYCcVbw/7x4UrL3KobhPJoJgYOjAKB7aP6pN69ju1tUF0igB1hu4pzRKxaCVehFbH0oOPyxBO2eDrAzCoLrW4rfZdbCyqgc0UZmsyeYrQUNYOafKxsGlP4SyRZVDDxtgVWOP6hFURI/9RHT4RHQjgC8AqAD4BjNv8b3+YQBbAbhJsS8z8zeCrptLHX5etOxB4/j+nwLD95uvsfrrMxNKln4/Qu7QBfUom490mnwCsG3tilAB0zTZBE1EqrFXewjVCuFsY6rlPnncUGVLRzdeEVEFwD8BeCeAEwCeBvBBZn7Oc86HAQww88fDXDuXAT/sztkk8QZlk+5h9deBoY+1av9V2ExSKqWPuwkMsJskZDIpFKagrusPa9p8pFPF6Gwc3EAb5knCZiKy3bS1YG4Vd7/nCivVUB5X953eeHUVgBeZ+aXmzb4D4FYAzxnfVVTiNCGPE/iUu3AVUMW5R1CwB4Lz/N//U0f3704urtJn/DjwvT8GmGfuo5Ny5t08TmjD1Ogkiie+LhXDDGNDFZ0vvyqomsbsnm+rnplrofrReQXpxpcXkija9gPwRsATzWN+biOiQ0T0XSLSWjES0ToiGiai4ZMnTyYwvISJqsRpkUlG2DT12J3BwR5wgrKtusY0SR0abA32fibPtU8qKimnyTxOyCWmoB6lcOpXxfT31nDv6qVadc0rY/XQ3bVsJiLb4q6tRLSI3b/SUuk8DOASZl4G4AcA/lp3IjNvZ+YBZh7o6+tLaXghiKrEiRP4Dg2q/XhUUI/zJ4igSeqJzQjsh6vCP9mI5DP3+Fsa6nazXthbU8orbQqnq1b2TxdI3YBuuk/YJwmbich2928ciWieNfhAMgF/FIB3xX4RZoqzAABm/hUzv9H89hsArkzgvtmxbI0TLOdf5ASuJzYHr9TjBL4wq2GeCm5eMnse0HjdaW94z0IndRNlXCqIWn8WIvnMNW5qYrS5G3Z0rI7XXp9wPGg81KoVXHN5X5u8kgDcdmWwJ8+fDz2D9TtGrO/To9DGA/pgbJJIuhPa+h0jOK/aM2290FurKu+ftkQ0TZII+E8DuJSIlhDRbAAfALDbewIRvcXz7S0AfprAfbNDlZ7ZuQ7YNF/vQxMn8CWxGp49z9lsBTgOnt68/PD97UE/aFyV2eo2iDwFDN0O3LfE+Xmoxi6Sz9ygSk00phjzZs9qS8E8+fzJtnMZwJPPm1OvQwdG8eC+l9ueF1X3ue3Kfjy0f1TpcW8Kxv600YK5VcyZ1YM7doy0TDSnzzbwxsQUtq1dgZG7r8fW9y1vu//WPUfaGrj7KaIGH0igaMvME0T0cQB74MgyH2Dmw0S0GcAwM+8G8AkiugXABIBTAD4c976ZYuNqCbSmeeJo3cNo6nXwlDkttP+bwLs/N/O9sgG6O56mSuexO9XXnJr0HPf9x60tBG66Twq2OcG0e3Xk7utbjq3fMRLqGi5b9xzRJgf997l6y16lZr9CFCiTdN0//QVVkzOm1zE0TCG2iBp8IKGNV8z8KIBHfcfu8nz9KQCfSuJeucDG1dJvaGbTUFyHKfi6uKt3VQCmSnDB158GshnvznXBY/cze54E+xwRZtOR7lyGE6h1Ac80Ifjvozt3khnrd4xg654jsTZ6me5jo/Tx0kl76U4hO22jYLPiVk0KUXfhtgRfzX1nz3NcL/3KmmrNTt3jT8/YSEijPHlIsTY3DB0Yxdlz7e6TutSEyS3StBrWTRTUvKbNuUCrBbHqPi42hVPVhBamEJtXDX4Q4qUTBRtXy6SLkq7Hjs6Cavy405vWX1Jb/iG7huRXfnjma1sJ6XV3Oc1dwiDF2lzgpi+8Tb8Bp5BpSp2cV9WHDJUscejAKMbOnlOe79oie/PkNkoa3X1cpZGu4Ouim9BsC7GqQnfeffBdJOBHIcjV0pSbj9toRBcwlWkbdgqy584YAjM5TwfDD8yMRychfezO1mPL1gCr/momnQQ411IVcwEp1uYIXdpj3hz1piPdBOFndKw+HfiGDoxiw3cP4sw5fXrFHyxtLYi9TwH+AKwq+LrXcgvQqs9oW4gtqgYfkJROdLzpGW/6o7bAObZznXPMmwpJYteprvhrStvUTzmqmtpCpxmLm6IBnGudO9M6Ht216qecz+CvTfh31g59rL0mYCrW+tNHl14vTdU7TFgduU1e3MVNuWzdcwSNyeC9HPXGJO7w5efdgPzbn3pUGcC9lsa6sVWIMMk8/XeQUZt7/J6HD09PbHNmta+Jk9DgZ5USkhV+ErjpltXbgYl6s3CqSIUksetUt/ErKG0zec5ZfW8am/HB143HRNBYdbYOumKtKn00fH/0HcmCFWF15GGCWZANgw5VakQV7P3HTYXeWrUyfa5t6uV1j5HaWL3R9p64GvwsU0IS8JMkKKB3ctepTV3BdhdsmGscGpzR3G+ary/i6u4V1KgFECuGDhBWRx52Q5HJhsGEPzWiazLiPa67T4UodOrFJl0TV4OfZUpIAn6SBAX0JHad6gqqQPBK33+fKAVU73sODTomaja2D7p72U46ou5JFJ2/jS6toAtyvTV1bah3bhVn3mhXANngXbHbBFfdObqnA9OTh026JuzPLso9OoXk8JMkyEkziUYjpqcIN1WjctZU3cdG3+/Ff40nNjuporDv82Ir7RR1T+KE0ZHrNhoBaPeYrxBee31C2X/WzbwHNTX36/pN+W7dOTorZ9OTh+2+hDga/KgNV5JAAn6SBAX0OJuvXGzSQrb3WbYGeHmfs8s2yH/H3V3rvUZgoKbgz2gz6Yi6J7eogu2ZNyYwpnHCtLXj8+vtg4Kr7pywHbKS6KoVRBr30CEBP0lsAm3cFohBTxF+xcvq7fr7HRp0tPumYF+t6d1A3YYoKqgCvPerwZ9V9TMTlU5q2KpFgmwHvO9ZsvGRRMZm2uVqQxT7gzQsE7K0ZUikxWGnyGXHq6zRtS4c+KjT71a1WvZLIr1drEwEBe1N883vN00WQuaEaVeo6xal6nRl21nKBgJwdMvNiVyrLHS645XQSfwrdlcz7+eFx50/qtRI/dRMYRewz9vzlDlYz19knjRUnkJCbgjyjvGu/nXLQlWh0WTBEJagvHZRLQ6yQgJ+nlFt1NJha+jmfm1DUKHUJv8+frx9s1YaSB/dQExqEdXqX4UqIHtTFqNjdRAitdIB0O614zJ0YLRlgxRQnDaDWSIBP4/Yply81BYAr4+b8/FhpY1BhVI3gO78I/N5afewlT66VpjUIjY7a4P86b22w95VuG26Z8HcqlU9wUvcvL/qiQEong2yDsnh5w3bZuVeeqpOp6kgiaSr0beZSGoLgTuP2t1/21uDrzl/UdP8LQV040lzDAXAlMNfv2NEuyonIFbgs8nxE4Bta1cor2/z/s9r3mtC9fOoVghgtEhMdXWOJEgiRSU5/LzjTT9QT7BE0gtVgDnnB29+8kobAycUcq637a12qRCr1E6KG6ekj24L/iByzeV9ePL5k3hlrI7eZmeo8XqjJcDoNOyqIm1YbHL8v/fbjiHf1Vv2tgU/mw1KUVI7yu5fCi8gXZ0j7uo/TAOWqEjAzxr/ij5MsAeCO1nptPB+GeThXe1dqmxTITZ+/a6pXBoESVdLhCqI/M2+l6dfP322gVq10raa3nDDZdjwPw+2rGyrPRRLK+4NjvNrVfQQtE6az/3819rgZ5MWipLaCaMsUtU54gbosA1YoiDWCllj4yVjggy/QqrMmKUBM7bMroun+9ritzmmbyrcYm+QrbPb2N1rleylPpaeAZrKV6ikm7dscvFaHxe/P7HZZt6I3zBsrN4w2iafPtvQBr8NN1xmNZSwVgWVAB99Lz1EuGPHSKKeOGlYLkjAzxqrwqzhH6LpicB9LaihSdCk455vcrB0bZG1TxtT7X76nULnKFrCgq1tsPCfp7I2bkxy5GAWxl7ZxCtjdaxa2W+l+glrVaDz3gl7btQAHdeF0wYJ+FmjaxbSQsTCulukjeri6aJqruJ3sHzsTrUtshcbk7WkcC2rvXbQJcQ2WNj2lo0azMK+T2fK5o5T56Lp4lcQeTtiXb1lr9KKWHfN3lp12ijN5ikgaoCO68JpgwT8uMTtYBU2Zx+Ghb/l/B3VxRNwUiG6MXqvaxvM4/yshNBcc3lf4DmqoJL0ajPM+2rVHmy65Qpj8FMFR11XK1v/+Q03XIZqT2tAr/YQNt1yBZ7aeC2ObrkZUwFPAXEDtLeFZFC7yShIwI+Dbe9XEzb9ZpVY5BuP/m/HiiHIllnnpV9baLZcpp6Z4G2LNDZJlSefP6k8XiEyWvvGWW2qVtOq61V7CL74imoP4d7VywItiFWvb1u7Aj/bcjOe2nhty+cJ5T8fULcwTVxhbZK9qFpIvjExZXhHNBLR4RPRjQC+AKAC4BvMvMX3+hwA3wJwJYBfAVjLzD8Lum7udfhJ6L1D6+5pRllz8G+D3+f64ahcPL15be9mL9cUzXXIBMLvDbBFtPEdZcnGRyLr6U2SQ91rJm0/oLZX1klGk9rkpPsZ+H16dPr+BXOrOHDX9dOf29Z/KAxhvIqC6KgOn4gqAL4C4J0ATgB4moh2M/NzntM+CuA0M/8rIvoAgPsArI1778xJSu89y9OTtrYQmHgDaCg8c/zBcfHbZuSVuv/WPGnv4gmod6i+54vOn6h7BUyUVBufFiYJoze9AbRLCXWWwyY5omk17V95e+8TdN04wXR+raq0a55fq1r5BZ0+28DQgdGWn4et9t5Wp59WU5QkdPhXAXiRmV8CACL6DoBbAXgD/q0ANjW//i6ALxMRcZ63+doQV++tWt2few2YUgTTnmq7rNBrtXzPQnUQdmWbNrbMNs1VACeNo2P+IvME1HZ++bTxaWKzySms1lsX1O/YMaJ9jxu4TAGwUzp0XZ21MTllbfLmpn+8Y9ftBHYJM4GZJqUkSSKH3w/AG/VONI8pz2HmCQDjAH5TdTEiWkdEw0Q0fPKkOv+YG+LqvVUBdvKcOnDPOd8csK/8sOYFss+T2zyxHBrUa//dJ5BNY3a1iZJq49PEn+vW4V1JBilaoqw6L+ytBRZPO7XKHTurVo+dOTdpLRV1x2rbeHzowCg+OXjQunagm5RCbA2wIndFW2bezswDzDzQ1xesMMiUuHrvMOmM+mnz6+/+HFCd136cJ4Fdt9sF/aDirvtEopqQ/ME7qKk6VUqrjU+bVSv7p1UmOumhW4y0UbSEVerUqhVcc3lfYADsnavvj+timox0ryWlY7cN3u7PMExPXd2kpDselSRSOqMAvMu5i5rHVOecIKJZAObDKd4WnzgdrGz7uQLt1gQq+9/GWfV7edJpNu6OV0dQi0bTBq3G645rpruL173PY3e2SzalMUpmBLXX06VVPjl4EOt3jEwXVh/aPxq4OnaLwu75QQFQl+A9fbaBFfc8jsbkVMvuXG+KBIA2faL7zHNm9WhbMdqiCt5Bm8xUE1BafW6TWOE/DeBSIlpCRLMBfADAbt85uwH8QfPr9wHYW/j8fRKoVsGV2VD+Wl4fn1ml6+Sgs+fq7zV5Dvj+HWodvLuXYOc6p4DsfVKY5Rmf8YnE57/jeuDfeRRY/XXzU1DcvQyCNUFyR136ZJJ5esX/0P5R3HZlv3ETUn9vDUebEsknnz9pFQDHDcFXZ8XgrrKD8v+qz6zT+odBFZBNKSidtDWNTVdAAit8Zp4goo8D2ANHlvkAMx8mos0Ahpl5N4D7AfwPInoRwCk4k4LgV8+4q3jVJiaeBB6+w3mPrrgaxLkzMx2z3MD88r5Weaf/3t5uWbZPJI26k0bauW7m6UMnvRTv+tQxNQW3NSZ78vmT+Oya5W0Ga4BjKewNVLYBMIxXvhfT9d3XTJ/ZX0TWOYX6G7noArLuc1SItPLNtPrcih9+XrDV46/+uhNII/cQ8mFqRO7F1eRH0eObUjjiXZ8rbDtdAc5KeXSsDqKZdMyCuVXc/Z4rWgKVTmNeIcJn1yxvkWWalD6mcQBqt8soOnad1v62K/ut9gh0Sqtvi/jhFwFb18wnNofL/Qdhq6cfP+7ce/mHfFbKFph624p3fa7wrzR7iJS5d8JMgGU2BzRdDj2JAOhdZZtqE2GIu9pOa7UeBVnh54VNvbBeta/+enI7X21X+C7VmpPXD22ERo5c0899S9TXkhV+LlCtVnU9ak2raZsNSDadrLz01qrYdMsVLU8IeQyyaSMr/CJgu2qnil3DkdY3QflftFpzVuw2Fg0ujXq0iUYl+Tw0CLzx6/bjldmiz88R51V7pgN+r2aDEGDOpZty6Dbvn1vtwZxqBWNnG9pgbnOPsiMBPy/Y5sd5ckYBs2yN87W2iTjpJxKvDn7aouE49Ou3GOg2WD2xWW2pPPtNUrBNkKgrX9Xq/o2JKSyYW20x+XKJKyHUFTu9XjZCPHK38Sp18iIJ9G/iMvnke10ml63Rd5maf5E+F85TM0F12jt+HFi9PXgMtYXmTVVeqEdfsNWNzbvJLC+/n4Jiaw2sQid1dHP2Xkz5chsvekAvTbz7PVcEjjVrbD9j1pQ74Cdhb5wk3qYd7/2qPqj6m4/cdJ/63HNn9L1kdbtqg8ZQrTn38+8wVu3yBYDzevWrddudvXn5/RSQUNbAPnQplvF6o0XXvqDZCH39jhHl7lfbCSdoj0DSJBWk40yqaVPulI7JLCztlIJ356wbpI1tBz2rY92u1vopx3StMtvZeOVi62ET5LLp/RnpDNVMlhBRdvZm9fspKHH8aUy7P918uc4gbPjYKTz5/Enl+02GaEnm4YPsnZNy5kyj+XhSlDvg50US6Nfg2yhgVKvj18fbj001nBTM7Hl6a2SVTYM3qNsE1yjOoboJBdDr8wGRbIYgzpb9ay7vw9/se1l53EUX7B7c97KxEpS07a+foICeZJBOy9o4Ccqd0glKKaSFrQbfxb9CN5maAc4qW9ffNam0SVTnUH/vWcAzHg1iqWxNnC37um5Z3uO6oBZU9k/aI8bPpt2HtRbOJvnn6Fg9dHonjebjSVHugB/X3jgprFesGi+aoAnDFCCDGpy7BBVP4zqHmsbjRSyVQxEnL26zco0S1PzWC0kzdGDUaIo2OlY3WkWHzcGn5YOTBOVO6dh0gkoDGw2+aSNS0IRxqUHSZuuBb+N3E8c5NGg8wIy9g+TvQxE1L26TDrJpsNJGh/d62hSkGWYBcpj0Tp531vopd8AHkglScQnS4AetaoMmjBceD/9e71NBmsVT7Xhk523aBFkpA63BznaXbGOK8cnBgy3vTxLb3Dljxg8oznWA4mz6KndKJy3CpkNqC5vaesvUSFCzEd2q+dDgjHumF/8Ek2bxNC9pNsEqHeRVwvOByGoAABVxSURBVIRhkhnrd4zgz4eeCT45JLZpJtcKIqgpTDchK/xOo0qHfO+PmxLK061ppKirZfd9u25XF251tgaqp4raQkdn717z0CC0D79evXxSabG8pNlKgM0OXNPKNYyzpgoG8OC+lzFw8cJYq2P/57Bp0OJ9UrF5kukWxDyt05jkhS5JdYBSBnECBj7itEC0GZc/daIdPzm7cgG1ll46WuWaJCx8w5qd6YhiYexia2V8zeV9RmvjbjJeE/O0LLFJeySVD1+2xmloMvwAZlbk7JijAU4u310126ZptONn537b3iqbowpIEjr0oDQOAZjVAzSmzNeJM2noPse3/+F4i9e+ixvY1+8YwdY9R1paO+o2aHXLRABIwO88ti6YSeXDX3gcbemXRh0Yvt9zL4NJmj/9YyqiAvnZvCaEIonNQkEdqhjARECwB2BslRiEqSWjf+esajPWhu8eBBjTXbts++QWNeh3X9E2D2Zb3jGcO9PsUxtAUpuJrAOtK0zzoCqOBhVR87J5LQ+/9wKRxGYhlf7cj03CWNfc3AbTeOuNSdzz8OHp71VPA41JbmvRaNMn14Y8Gqp1V8DPg9mWfwz1U82WQBpHSxd/oI0awEIFWg7eKBW0oSoPqpo8/N4LxNCBUZx5Y6LteNhCpVfFEwfv+8MGyaBJ5/TZxvQ1wjy9vDJWj/UUlFdDte5K6eTBbEs1hqmG42Uze546PVJbOONtr/KlD9PY+7q7DP74Pqhip20PUhDNqs18Zr/KJw3y8HsvCDpljaoXrQ06EzVbvJNMWEMzN78edE+3LhGmSbr75BDVhyivhmrdtcLPQz7ZNAbdavim+3yrVECZh/fbHagw+eP7CdPaUIU7Zq/Z20QGhlF5+L0XBF2AnDt7VqxApNLsL5hbVZ5LBKWuXxckN+0+3Lbq966gg3DPUT0NVHrU9YNrLu+LZZmQV0O17lrhR3FsTHMM/taEVJkJ5OfOBBuo2Qawm+5ztP5eS2TlWBfZXU9HkA9PWlr6PPzeC0InA5Ffsx9W+qkbw1i9Me2NMzpWx/odI6hVe1APkv80cYvCKguEM29MKH13nnz+JD6zamnb+bYqnTgupZ0k1gqfiBYS0Q+I6IXm38puG0Q0SUQjzT+749zTSB7yyUFjWLZm5hx3hT1+PLolsopla4Bbv2Je6Sfxc9GurI+359R3rgM2ze9MQTUPv/cOkmTxL01nx7DGbbZjYABnLYM90FoUXrWyH09tvBZHt9yMpzZei/GA/rzu+dvWrgAAZZMXFXk1VIu7wt8I4Alm3kJEG5vf36k4r87MK2LeK5g87NK0GUNYO2QgfADz592T3A3rYuqX2/b5ItQjbMnD771DJNmoA0hmV2kYbbpqp67u/ZGM2CwwFZVtVuJRfgd5NVSLtdOWiI4AeDsz/5yI3gLgR8zc9i+HiF5j5jeFvX5X7LRVsakXoSwDqeK0GzQFMFNA70Swd6+r2mVrM5mJGZoSfzDUpRzi7k6NGoji7tANer9/bGfPTSgbpuuo9lCLzDJobDafR7ejOM7voJOYdtrGDfhjzNzb/JoAnHa/9503AWAEwASALcw8ZLjmOgDrAGDx4sVXHjt2LPL4couN3YKLyaZAp+rxvg+w88yJimoymR6TCXKangjThFG6EICjW27u/KB8xA1+Yd8/dGAU63eMWC+PPr92RejJLGgCXLLxEeX9s/odBBHLWoGIfgjgzYqXPu39hpmZiHS/l4uZeZSIfgvAXiJ6hpn/n+pEZt4OYDvgrPCDxldIgqST8xcFr8bbVtcGVY9qxV0/lUxqRSfZNNk9A1JQVWAjMXTJqvgXtejrBtWwVsSrVvZj+NipwJaJgDNpRLEpDnpPXguwUQgM+Mz8Dt1rRPQLInqLJ6XzquYao82/XyKiHwFYCUAZ8HNNUqmRZWuAXf8RYEXhyVYbb1MHCFL1dEqr7lcjqZ4+uqSgmiS2Spksi39Rgp/Nk4vp/Z9ZtRQDFy+cXoXPr1Vx5twEGpOtqRv/zyQpH5xuctOMW7TdDeAPAGxp/v09/wlN5c5ZZn6DiC4AcDWAv4x53/Sx7fpkiyrYA/baeBuJpruKNqVXOqVV9678O1VD6DJ0wXTB3Crmzp6Vi+KfKvgRZnrBqsYW9ORiEzxVkk9/MAeclJFqUohT7M5rATYKcXP4vwlgEMBiAMcArGHmU0Q0AOB2Zv5DIvo9AF8DMAVHBvp5Zr5fe1EPuSra2toJp3W9oDpAUA4/7P2EjpOEZXEaeNMzfgs+1Xh1OXDAScPECZ6msejul8dCa5J0zB6ZmX8F4DrF8WEAf9j8+v8AWBrnPrkg6d2cqraGYVIdyraIzX/yrv5+5zpnRb38Q8DhXe1a/6RTK96VfK25JcPf5EXQUpSVpLvaVhVgvfYBbjDuVCHOP0Ha3MefNus2++MgumunbSdJejdnXO247v1+P/zx444fvrvaTzK14g/w516b2d3rnVw6ob3vUorSGxUwF3BtFUe2qRZVYA5T5HaJq68vOtLxyhad5jxPnZ0ODTqreqXPfcKpG12LRBOSPuoqTBJLIFxjE1OqRZfqimLUVmR9vS2mlE53mad1kiCb4DzwxGZoH2yTLs5G2S08flwsi3NMEtbEbgE2rDeP6XydqVpQ25QecgreOluHLAzOsvbIl5ROGOI0Gk8Kk+LFFNSTbjgedQKR1E4uSdo+QKe5rxApG56YZJm6AByUm/iN86o4cNf12tfn16rKXcyd0tfnIYVUzoBfVJlgkDRU206RnM+YpLS0tsDO8M2P+NTnCtOGKBv/dl3NQaddv+3Kfjy0f7Tt+DWX901LKv3F0zA+9l7G6g1csvERAO1qoKEDozhzrr0JTLWHOqavz4NHfvlSOkXujhRkR6xyjQQBAx9xAmzQ+205NOgUaNvoaSqEyOzUKT71ucDGUz5qekPnlPmZVUvbjruTgK47lC51pPPbV+G/5tY9R1o2brm86bx4fQFM5MEjv3wr/CJ3RwqShgYpf3S6/bAB+InNaq/9Wi9w59GZ77V7DcRWIQ/YqFzipDd0q3//8au37FWufO95+PB0uqh3bhVzZvVgvN5o2WgVxl3Tu5rWeu+HMGoLSx4sGsoX8IvcHclGGqqrMxwaRJvFger9Nuh+VvXTrd/b7DUoanqtCwhaWaZlH6Abx+mzjWmnzNNnG6hVK9i2dkXbJHLHjpHQ98oi+ObBoqF8KR1dcCvCqvO6u4Ae32NsT9Vu85RWwUPhN1/Z/gyDlE1FTq91AabgFtSsJK1xeHFX6F5WrewP1UTdvVeYBiVJKWvCNoTpBOVb4cfd4Zo1RObvdWifYDj8ijrMz9CkbCpyeq0L0K040w5CYRqfqJ4GbN/vDei2u5pVypr1O0YwfOzUdAvEMGS9sa58Ab/I3ZFUufPJc3YBUpcOsm147iXKz1CVuilyei0nxLEGSMrKIewYVOffu3op7nn4cGCzk/m19kKt/3OcV+3BGxNT8PRBUXr22ARfVZ2DATy472UMXLywcDtyZadtkdB2yrJoJnJoEBj6GDDl+w9Vme30v+3khKfbpTyrppZ2zl/kaaRSsEk5RfJgthZ2DKbzTX75LtUKYev7lqf2+YKM3/K4I1d22nYLceoPy9YAc85vP+4+IdhyaNBR32zqtW9IrkvdAOrm45deL7l9C0y67ryOQXf+JwcPWmntG5Oc6ueLsiEsz0jALxIqnX2Y+oNfRePiT6HognrUIqtJ1aMq6r7weDL7BbqcPOi6w45Bd1y1+zbsPTvBhhsu01o4FLHjlQT8IhHXz8fmCcEU1KNu3DLdd9kax1Bt05jz97I1ktu3RBdw0gxEYceQxNjS/HyrVvbj99+2uC3oF7XjlQT8oqEKkLbYPCGYgro2EB83p3fCPpkUWTqbImGkhXkZg+p8HUSO1YHq2mmakH1m1VJsW7siUzllUpRPpVNmbNQ1ptW11qsHZl8e031V6p2iS2dTIg8NU8KOwT3+ycGDwWkcBrauWa5sZZi2CVnWcsqkEJWO0Iqp9aKyy5biPFvPe1OPAUBUOl2Crv9skHa+QoQp5rZJpFt97JNCVDp5IorKJU1M6ZeWGoKGMHn2oI1XUVNXQm7wGrR5jdEATO86BaAsjE4yK83UohSrs/ahzwsS8NOkCFYCQYVhNxDrgn6YPLsUZ7ueIEvgpzZei59tubklR15R7B73Sj3DFoR1k04Zg74E/DRJyp64E3ifPJ7Y7KzoTavruBJRQIqzJcB2Ne4G/6NbbsaUJs3svidsoTgP+xXyghRt0ySvK9oojVGSsKiQ4mxXocrVR3Gl1HWicm0VwhaK87BfIS/ECvhE9H4AmwD8DoCrmFlZYSWiGwF8AUAFwDeYeUuc+xYWG3vjLHjszmgmZnFbPhbZ10hoQde+T9fhyiQd1fkBeo+HUc3kwYc+L8Rd4T8LYDWAr+lOIKIKgK8AeCeAEwCeJqLdzPxczHsXjzyuaA8N6lsVpvHkkYc+wUJsdGmTJ58/Oe2TYysd1TUhCducxNu+0d8Joqgbp+ISK+Az808BgMwWvVcBeJGZX2qe+x0AtwIoX8DP44rWVD/I+slDKAymtElYDXsSK3L/Ewdjpv2PyjmzLKSRw+8H4M1jnADwb3UnE9E6AOsAYPHixZ0dWRbkbUVrWsVLLl2wJMm0SVBnKBs7Zp2tcdm1+oEqHSL6IRE9q/hzaycGxMzbmXmAmQf6+vo6cQvBi24VX1uYr4lJyDVJ2jyYOkPZSiylUKsmcIXPzO+IeY9RAF7R9kXNYwKQfU9XXV3hpvvSG4NQeJK2eVClgYYOjCotGby6fhcp1KpJI6XzNIBLiWgJnED/AQAfSuG++SeKHDJp8lhXEApJJ/1m3JW9zn/Hv3LPQ8PwPBJXlvleAF8C0AfgESIaYeYbiOhCOPLLdzHzBBF9HMAeOLLMB5j5cOyRdwOd7ulq+/SQt7qCIPhQ5eS9+FfueTCWyyNxVTq7AOxSHH8FwLs83z8K4NE49+pKOrkRKw9PD0IuiNP3Ni+Ycu+6lXu3OFwmiVgrZEknrQXybOMgpEa3+Mjocu8VosJ602eBBPwsScKPRkdebRyEVOkWHxmdCuiza9JraN4NSMDPkrgtC02EeXrIu2WzEJlukSeapJqCPWKeljWdKpja2jhIrr+r6SZ5ouTk4yMr/G7F9ulBcv1dTR763gr5QVb43YzN04Pk+rsakScKXiTgl528WjYLiSGpEMFFUjplp5NKIUEQcoUE/LLTSaWQIAi5QlI6glgrCEJJkBW+IAhCSZCALwiCUBIk4AuCIJQECfiCIAglQQK+IAhCSZCALwiCUBIk4AuCIJQECfiCIAglQTZelQ3bPreCkDLd0Iox70jALxPifS/kFLcVo9udy23FCECCfoJISqdMiPe9kFO6pRVj3okV8Ino/UR0mIimiGjAcN7PiOgZIhohouE49xRiIN73Qk7pllaMeSfuCv9ZAKsB/Nji3GuYeQUzaycGocOE6XMrCCmia7lYxFaMeSZWwGfmnzKzPHMVBfG+F3KKtGJMh7Ry+AzgcSLaT0TrUrqn4Ee874WcsmplP+5dvRT9vTUQgP7eGu5dvVQKtglDzGw+geiHAN6seOnTzPy95jk/AvBnzKzMzxNRPzOPEtG/APADAP+ZmZVpoOaEsA4AFi9efOWxY8dsP4sgCELpIaL9utR5oCyTmd8RdwDMPNr8+1Ui2gXgKmjy/sy8HcB2ABgYGDDPRoLo6gVBsKbjKR0imkdE57tfA7geTrFXiIurqx8/DoBndPWHBrMemSAIOSSuLPO9RHQCwO8CeISI9jSPX0hEjzZP+5cA/p6IDgL4RwCPMPP/inNfoYno6gVBCEGsnbbMvAvALsXxVwC8q/n1SwCWx7mPoEF09YIghEB22hYZ0dULghACCfhFRnT1giCEQAJ+kRFdvSAIIRC3zE6ShmRy2Zp41xRZpyCUBgn4naIIVsRFGKMgCIkhKZ1OUQTJZBHGKAhCYkjA7xRFkEwWYYyCICSGBPxOUQTJZBHGKAhCYkjA7xRFkEwWYYyCICSGBPxOUQTJZBHGKAhCYgTaI2fJwMAADw9LR0RBEARbTPbIssIXBEEoCRLwBUEQSoIEfEEQhJIgAV8QBKEkSMAXBEEoCblW6RDRSQB572J+AYBfZj0IS2SsnUHG2hlkrNG4mJn7VC/kOuAXASIa1kmg8oaMtTPIWDuDjDV5JKUjCIJQEiTgC4IglAQJ+PHZnvUAQiBj7Qwy1s4gY00YyeELgiCUBFnhC4IglAQJ+IIgCCVBAn4CENF/I6JDRDRCRI8T0YVZj0kHEW0loueb491FRL1Zj0kHEb2fiA4T0RQR5VLyRkQ3EtERInqRiDZmPR4dRPQAEb1KRM9mPZYgiGgRET1JRM81f/9/kvWYdBDReUT0j0R0sDnWe7IekwnJ4ScAEf0GM/9z8+tPAPjXzHx7xsNSQkTXA9jLzBNEdB8AMPOdGQ9LCRH9DoApAF8D8GfMnCuvbCKqAPgnAO8EcALA0wA+yMzPZTowBUT07wG8BuBbzPzWrMdjgojeAuAtzPwTIjofwH4Aq3L6cyUA85j5NSKqAvh7AH/CzPsyHpoSWeEngBvsm8wDkNtZlJkfZ+aJ5rf7AOS2nyEz/5SZj2Q9DgNXAXiRmV9i5nMAvgPg1ozHpISZfwzgVNbjsIGZf87MP2l+/WsAPwXQn+2o1LDDa81vq80/uf3/LwE/IYjoL4joOIDfB1CUHoEfAfBY1oMoMP0Ajnu+P4GcBqaiQkSXAFgJ4B+yHYkeIqoQ0QiAVwH8gJlzO1YJ+JYQ0Q+J6FnFn1sBgJk/zcyLADwI4ON5HmvznE8DmIAz3sywGatQTojoTQAeAnCH7yk6VzDzJDOvgPO0fBUR5TZlNivrARQFZn6H5akPAngUwN0dHI6RoLES0YcBvBvAdZxxESfEzzWPjAJY5Pn+ouYxISbNfPhDAB5k5p1Zj8cGZh4joicB3Aggl8VxWeEnABFd6vn2VgDPZzWWIIjoRgD/BcAtzHw26/EUnKcBXEpES4hoNoAPANid8ZgKT7MQej+AnzLz57Iejwki6nOVbkRUg1PAz+//f1HpxIeIHgJwGRxFyTEAtzNzLld6RPQigDkAftU8tC/HiqL3AvgSgD4AYwBGmPmGbEfVChG9C8DnAVQAPMDMf5HxkJQQ0bcBvB2Oje8vANzNzPdnOigNRPTvAPwdgGfg/J8CgP/KzI9mNyo1RLQMwF/D+f33ABhk5s3ZjkqPBHxBEISSICkdQRCEkiABXxAEoSRIwBcEQSgJEvAFQRBKggR8QRCEkiABXxAEoSRIwBcEQSgJ/x/2DHi7GnzztAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "id": "6wSIHB1S_xMJ", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 282 }, "outputId": "df77f67e-8e47-42e3-cb50-5732b109f28b" }, "source": [ "plt.scatter(data_1[:,0],data_1[:,1])\n", "plt.scatter(data_2[:,0],data_2[:,1])\n", "plt.scatter(data_3[:,0],data_3[:,1])" ], "execution_count": 65, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ] }, "metadata": { "tags": [] }, "execution_count": 65 }, { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAS+0lEQVR4nO3df5DcdX3H8ef7Nhu5RM0PucGYxAaU0aIi4A3+oONYUUSxEq1FGKdFywzjVCtqq4B2JNo64tBBsJ06pWKNM4w1AwgoWqSItdZCvQDyG6UokhjgFIL8CHJJ3v1jvxf2Lrt3t7t32csnz8fMze338/1+dt+33/2+9nuf7/e7G5mJJKksA/0uQJI0+wx3SSqQ4S5JBTLcJalAhrskFWhBvwsA2H///XPNmjX9LkOS9iobN278dWYOtZo3L8J9zZo1jIyM9LsMSdqrRMS97eY5LCNJBTLcJalAhrskFchwl6QCGe6SVKBpwz0ivhwRD0bErU1tyyPi6oj4WfV7WdUeEfGFiLg7Im6OiCPmsnjt7sp7ruSYi4/h0PWHcszFx3DlPVf2uySpLV+vc2cme+5fAY6d1HYGcE1mHgxcU00DvBk4uPo5Ffji7JSpmbjynitZ96N1bHl8C0my5fEtrPvROjcYzUu+XufWtOGemT8AHprUfDywvrq9Hljb1P7VbLgOWBoRK2arWE3t/BvO58kdT05oe3LHk5x/w/l9qkhqz9fr3Op2zP2AzNxS3b4fOKC6vRK4r2m5TVXbbiLi1IgYiYiR0dHRLstQs/sfv7+jdqmffL3OrZ4PqGbj2z46/saPzLwgM4czc3hoqOXVs+rQcxc/t6N2qZ98vc6tbsP9gfHhlur3g1X7ZmB103KrqjbtAacdcRr71fab0LZfbT9OO+K0PlUktefrdW51G+5XACdXt08GLm9q/7PqrJlXAY80Dd9ojh130HGse806VixeQRCsWLyCda9Zx3EHHdfv0qTd+HqdWzHdd6hGxNeA1wH7Aw8AZwGXARuA5wP3Aidk5kMREcA/0ji75gngvZk57SeCDQ8Ppx8cJkmdiYiNmTncat60nwqZmSe1mXV0i2UTeH9n5UmSZptXqEpSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QC9RTuEfHhiLgtIm6NiK9FxH4RcWBEXB8Rd0fE1yNi4WwVK0mama7DPSJWAh8EhjPzpUANOBH4HPD5zHwh8DBwymwUKkmauV6HZRYAgxGxAFgEbAFeD1xczV8PrO3xMSRJHeo63DNzM/D3wC9phPojwEZga2ZurxbbBKxs1T8iTo2IkYgYGR0d7bYMSVILvQzLLAOOBw4EngcsBo6daf/MvCAzhzNzeGhoqNsyJEkt9DIs8wbg55k5mpljwKXAUcDSapgGYBWwuccaJUkd6iXcfwm8KiIWRUQARwO3A9cC76yWORm4vLcSJUmd6mXM/XoaB05vAG6p7usC4HTgIxFxN/Ac4MJZqFOS1IEF0y/SXmaeBZw1qfke4Mhe7leS1BuvUJWkAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkF6incI2JpRFwcEXdGxB0R8eqIWB4RV0fEz6rfy2arWEnSzPS6534+8O+Z+WLg5cAdwBnANZl5MHBNNS1J2oO6DveIWAK8FrgQIDOfysytwPHA+mqx9cDaXouUJHWmlz33A4FR4F8j4saI+FJELAYOyMwt1TL3Awe06hwRp0bESESMjI6O9lCGJGmyXsJ9AXAE8MXMPBx4nElDMJmZQLbqnJkXZOZwZg4PDQ31UIYkabJewn0TsCkzr6+mL6YR9g9ExAqA6veDvZUoSepU1+GemfcD90XEi6qmo4HbgSuAk6u2k4HLe6pQktSxBT32/0vgoohYCNwDvJfGG8aGiDgFuBc4ocfHkCR1qKdwz8ybgOEWs47u5X4lSb3xClVJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUoJ7DPSJqEXFjRHyrmj4wIq6PiLsj4usRsbD3MiVJnZiNPffTgDuapj8HfD4zXwg8DJwyC48hSepAT+EeEauA44AvVdMBvB64uFpkPbC2l8eQJHWu1z3384CPATur6ecAWzNzezW9CVjZqmNEnBoRIxExMjo62mMZkqRmXYd7RLwVeDAzN3bTPzMvyMzhzBweGhrqtgxJUgsLeuh7FPC2iHgLsB/wbOB8YGlELKj23lcBm3svU5LUia733DPzzMxclZlrgBOB72Xmu4FrgXdWi50MXN5zlZKkjszFee6nAx+JiLtpjMFfOAePIUmaQi/DMrtk5veB71e37wGOnI37lSR1xytUJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoG6DveIWB0R10bE7RFxW0ScVrUvj4irI+Jn1e9ls1euJGkmetlz3w78VWYeArwKeH9EHAKcAVyTmQcD11TTkqQ9qOtwz8wtmXlDdftR4A5gJXA8sL5abD2wttciJUmdmZUx94hYAxwOXA8ckJlbqln3Awe06XNqRIxExMjo6OhslCFJqvQc7hHxTOAS4EOZ+dvmeZmZQLbql5kXZOZwZg4PDQ31WoYkqUlP4R4RdRrBflFmXlo1PxARK6r5K4AHeytRktSpXs6WCeBC4I7MPLdp1hXAydXtk4HLuy9PktSNBT30PQr4U+CWiLipavs4cDawISJOAe4FTuitRElSp7oO98z8IRBtZh/d7f1KknrnFaqSVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEW9LsATXLzBrjm0/DIJliyCo7+JBx6wtPzv/UR2PgVyB0QNXjFe+Ct53Z/f9prXXbjZs656i5+tXUbz1s6yEff9CLWHr6y32V1ZE/9DSU8V52KzOx3DQwPD+fIyEi/y+hcq+CEp9sGlzWmtz28e7A299213ENAAM3rpJpeshqWHwQ//8/d6xg+pRHwu+7zvhb308aS1U/XNfnvOfgY+Nl32/99vln0zWU3bubMS29h29iOXW2D9RqffcfLWoZWq3ADpgy86QKxef6SwToRsPWJMZYM1hnbsZPHn3q6tqWDdda97SW79Z/qb7jsxs18/NKbeWJsJwAR8JqDlvOL32xj89ZtRMB4fC1eWKNeG2DrtrEJr/xli+ocsuJZ/Oj/Hmq1VbFy0t81/jdt3rqNWgQ7MnctM93zNd36mos3l4jYmJnDLefNRbhHxLHA+UAN+FJmnj3V8vM63G/eAN85vQpeIAYgd0J9MYw9PnHZ2sLGq23nWOv7qi2Egfru/UrV/MYB078ZdvtmsQ/+d3LU2d9j89Ztu7UvW1Rn0cIFu0LkD188xDdu2DwhaKezeGGNZz6jxgOPPrXbvEX1ARYuqO0WoiVYOljn0d9tZ8fOmf1V9VpwzjtfPm1I/81lt3DRdb+c8FwN1mv88StWcu2doz0F/h4N94ioAT8F3ghsAn4MnJSZt7fr01W4d7NBd9rn5g1w2V+0D2vNzOByeMnb4Yavzuy5HFwOb/5c+3Uz+b+epx6DHU1BVB+EP/pC0QF/4BlXFhWse6uBgFcftJz/uechxt8TBusDfPYdh+767+PDX79pRutqqv+82tnT4f5qYF1mvqmaPhMgMz/brk/H4X7zBvjmB2Gsac+l1QbdbQhMGN5QX0y1biav+1aWrIYP3zp39fWo12GSgWrIQPPXskV1Hn6isx3DlUsH+e8zXj/j5acK97k4oLoSaE7FTcArZ/URrvn07hv32LZGe/MQQHMIjA+rTNWnVT/1R6t1A63XfSuPbJqbumbB5LHmzVu38dGLfwIJY9Xu3+at2zjz0lsAdu0BNvcx2Oe/ToMd4Fcthtq61bdTISPi1IgYiYiR0dHRzjq323Cb27sNgZn209xrtZ5nGtpLVs1uLbPonKvumnAQEWBsR+4K9nHbxnZwzlV3te0DUIsg5q5U7WHPWzo4a/c1F+G+GVjdNL2qapsgMy/IzOHMHB4aGursEdptuM3t3YbAPN7j2+e0Ws8zCe364NMHa+ehTvbOxpdt12dnJj8/+zhqYcTv7QJ2Dc/NhrkI9x8DB0fEgRGxEDgRuGJWH+HoTzY24GaTN+huQ2Ae7/HtU9oFdKt1P1BvHIQlGmPt8/xgaid7Z+PLtusz3u4wzd5h6WCdgTbvw+9+1fNn9dz7WQ/3zNwOfAC4CrgD2JCZt83qgxx6QmMDXrKatht0tyHQ7o3jHf/S+Gl+zOFTqulZFvv4hcODy9sHdKt1v/af4PSfw7qtjYOo8zjYobF3NlivTWir14L6pK1+sF7btSfXqk/z/JVtwr8+QNfDNrV2KbQPOuoFy/nF2cfxi7OP47x3HcbKpYMEjef9qBcsnxDY9YHG+mw2WK9x3rsO46azjuHcEw5j6WB917xli+qc967D+Lu1L5vVmsu+iKnb85877ff5l7Y+syZqjXPia/WJZ+k0Zjauysidra80nXwlatRg5+7nHc+ayacfNj8H9UUw9gQTzmpeuBhqz2gcqB4/93+q+4WJ1ws0t+9j56jD7F9UNJOLmtpddNTuHOvm5ZcuqvPk2A62je2+nuu1YPHCBWzdNsZAMOGUwJ0Jv9v+dJ/6AAwMDExomyvjF04BfOqbt004wNnuoqrxC5jG1SI46ZWrOw7ePXVF7B6/iKlT8/oippmYyamZvV5oM9VjwPRXyv7u0dbnmE93Trn2GnvTpfyT3zgy4ZFtY7u90bW6UnSmV+CW/vECYLjvGXviKsleHmMfvIpTKp3hLkkFmirc9/Ejd5JUJsNdkgpkuEtSgQx3SSqQ4S5JBZoXZ8tExChwb7/rmIH9gV/3u4gZsta5Ya1zw1q783uZ2fLDueZFuO8tImKk3WlH8421zg1rnRvWOvsclpGkAhnuklQgw70zF/S7gA5Y69yw1rlhrbPMMXdJKpB77pJUIMNdkgpkuHcoIv42Im6OiJsi4rsR8bx+19RKRJwTEXdWtX4jIpb2u6Z2IuJPIuK2iNgZEfPyFLOIODYi7oqIuyPijH7XM5WI+HJEPBgRt/a7lqlExOqIuDYibq/W/2n9rqmdiNgvIv43In5S1fqpftc0HcfcOxQRz87M31a3Pwgckpnv63NZu4mIY4DvZeb2iPgcQGae3ueyWoqI3wd2Av8M/HVmzqvPf46IGvBT4I3AJhrfE3xSZt7e18LaiIjXAo8BX83Ml/a7nnYiYgWwIjNviIhnARuBtfPxeY2IABZn5mMRUQd+CJyWmdf1ubS23HPv0HiwVxYz4bvn5o/M/G71fbYA1wHz9pu/M/OOzLyr33VM4Ujg7sy8JzOfAv4NOL7PNbWVmT8AHup3HdPJzC2ZeUN1+1Ea37k8L78+KRseqybr1c+83PbHGe5diIjPRMR9wLuBT/a7nhn4c+A7/S5iL7YSaP6S3E3M0xDaW0XEGuBw4Pr+VtJeRNQi4ibgQeDqzJy3tYLh3lJE/EdE3Nri53iAzPxEZq4GLgI+MF/rrJb5BLC9qrVvZlKr9k0R8UzgEuBDk/4znlcyc0dmHkbjv+AjI2LeDnkBLOh3AfNRZr5hhoteBHwbOGsOy2lrujoj4j3AW4Gjs88HVzp4TuejzcDqpulVVZt6VI1fXwJclJmX9ruemcjMrRFxLXAsMG8PWrvn3qGIOLhp8njgzn7VMpWIOBb4GPC2zHyi3/Xs5X4MHBwRB0bEQuBE4Io+17TXqw5SXgjckZnn9rueqUTE0PgZZxExSOPg+rzc9sd5tkyHIuIS4EU0zu64F3hfZs67vbiIuBt4BvCbqum6+XhWD0BEvB34B2AI2ArclJlv6m9VE0XEW4DzgBrw5cz8TJ9Laisivga8jsZH0z4AnJWZF/a1qBYi4g+A/wJuobE9AXw8M7/dv6pai4hDgfU01v8AsCEzP93fqqZmuEtSgRyWkaQCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQP8PvPIMSZIPNZMAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "id": "7ZF6c_Al8e7r", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 340 }, "outputId": "6b8e296d-7799-4e99-c280-380005d6cbf4" }, "source": [ "toy_X = np.concatenate([data_1,data_2,data_3])\n", "kmeans_model = KMeans(n_clusters=3, random_state=10).fit(toy_X)\n", "centers = kmeans_model.cluster_centers_\n", "labels = kmeans_model.labels_\n", "labels" ], "execution_count": 66, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 1, 1], dtype=int32)" ] }, "metadata": { "tags": [] }, "execution_count": 66 } ] }, { "cell_type": "markdown", "metadata": { "id": "2hb3F0UP8nZz", "colab_type": "text" }, "source": [ "0クラスタが200データ,1クラスタが2データ,2クラスタが200データと狙い通りにクラスタリングできている" ] }, { "cell_type": "code", "metadata": { "id": "sWGe7Z8886Oq", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 340 }, "outputId": "b90376c5-9ce1-4645-f9ec-20c404ef53c3" }, "source": [ "dt = DecisionTreeClassifier(criterion='entropy',max_leaf_nodes=3).fit(toy_X,labels)\n", "dt.predict(toy_X)" ], "execution_count": 67, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 1, 0], dtype=int32)" ] }, "metadata": { "tags": [] }, "execution_count": 67 } ] }, { "cell_type": "markdown", "metadata": { "id": "SF_GVSxy_axZ", "colab_type": "text" }, "source": [ "やはり最初の分割が論文中のような分割になっている(data_3の片方が右へ,片方が左へ)" ] }, { "cell_type": "code", "metadata": { "id": "MFR5mVET78Wx", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 380 }, "outputId": "a5cd580f-9236-4136-bd11-c391c571c974" }, "source": [ "dot_data = export_graphviz(\n", " dt,\n", " filled=True,\n", " )\n", "graph = graphviz.Source(dot_data)\n", "graph" ], "execution_count": 68, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ], "image/svg+xml": "\n\n\n\n\n\nTree\n\n\n\n0\n\nX[0] <= -0.053\nentropy = 1.04\nsamples = 402\nvalue = [200, 2, 200]\n\n\n\n1\n\nX[1] <= 50.724\nentropy = 0.045\nsamples = 201\nvalue = [0, 1, 200]\n\n\n\n0->1\n\n\nTrue\n\n\n\n2\n\nentropy = 0.045\nsamples = 201\nvalue = [200, 1, 0]\n\n\n\n0->2\n\n\nFalse\n\n\n\n3\n\nentropy = 0.0\nsamples = 200\nvalue = [0, 0, 200]\n\n\n\n1->3\n\n\n\n\n\n4\n\nentropy = 0.0\nsamples = 1\nvalue = [0, 1, 0]\n\n\n\n1->4\n\n\n\n\n\n" }, "metadata": { "tags": [] }, "execution_count": 68 } ] }, { "cell_type": "markdown", "metadata": { "id": "XWQjjP1NDwto", "colab_type": "text" }, "source": [ "

\n", "## v→∞へ" ] }, { "cell_type": "code", "metadata": { "id": "MxRUCylwAa7o", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 187 }, "outputId": "3373645f-21c7-4a6e-9efa-fecb4797cdab" }, "source": [ "for i in range(1,10000,1000):\n", " data_1,data_2,data_3 = make_toydata(v=i)\n", " toy_X = np.concatenate([data_1,data_2,data_3])\n", " kmeans_model = KMeans(n_clusters=3, random_state=10).fit(toy_X)\n", " kmeans_labels = kmeans_model.labels_\n", " dt = DecisionTreeClassifier(criterion='entropy',max_leaf_nodes=3).fit(toy_X,kmeans_labels)\n", "\n", " cost = 0\n", " labels = dt.predict(toy_X)\n", " centers = get_mean(toy_X,labels)\n", " for i,data in enumerate(toy_X):\n", " cost += np.sum((data-kmeans_centers[labels[i]])*(data-kmeans_centers[labels[i]]))\n", " \n", " print(\"Optimal Score:{} vs Decision Tree Score:{}\".format(kmeans_model.score(toy_X),cost))" ], "execution_count": 73, "outputs": [ { "output_type": "stream", "text": [ "Optimal Score:-199.17982861954835 vs Decision Tree Score:16284591918.10414\n", "Optimal Score:-246.18124015152924 vs Decision Tree Score:65002282.27015967\n", "Optimal Score:-246.4869545232153 vs Decision Tree Score:53004322.696862355\n", "Optimal Score:-219.56312850576407 vs Decision Tree Score:45006333.642052606\n", "Optimal Score:-260.7027891214368 vs Decision Tree Score:41008413.387505375\n", "Optimal Score:-244.1077337078409 vs Decision Tree Score:41010435.60921242\n", "Optimal Score:-267.98712249271443 vs Decision Tree Score:45012495.40759582\n", "Optimal Score:-254.49826816169144 vs Decision Tree Score:53014522.64540449\n", "Optimal Score:-272.9091436380789 vs Decision Tree Score:65016576.71783102\n", "Optimal Score:-239.80939207439016 vs Decision Tree Score:81024105.0891669\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "wf7Og4kEBaXT", "colab_type": "text" }, "source": [ "v→無限大につれて,近似スコアは無限大になってしまう(もちろん,葉をクラスタ数にしなければこれは起こらないが) \n", "\\#sklearnのkmeansは目的関数のスコアのマイナスをreturnする" ] } ] }