\n",
"(150, 4)\n"
]
}
],
"source": [
"print(type(iris.data))\n",
"print(type(iris.target))\n",
"print(type(iris))\n",
"print(iris.data.shape)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.isnan(iris.data).any()\n",
"np.isnan(iris.target).any()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"X = iris.data\n",
"y = iris.target"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"
\n",
"\n",
"Gerekli Kütüphanenin İçe Aktarılması
\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.tree import DecisionTreeClassifier"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"
\n",
"\n",
"Eğitim ve Test Kümelerine Ayırma (Train/Test split)
\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(100, 4) (50, 4)\n"
]
}
],
"source": [
"print(X_train.shape ,X_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(100,) (50,)\n"
]
}
],
"source": [
"print(y_train.shape ,y_test.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"
\n",
"KNN Algoritması \n",
"
\n",
"
\n",
"K = 1 Değeri İçin Tahmin Yapalım
\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"kNN = KNeighborsClassifier(n_neighbors=1)\n",
"kNN.fit(X_train, y_train)\n",
"y_pred = kNN.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score,confusion_matrix "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Hata Oranını Bulalım"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Test Değeri ve Tahmin Edilen Değerler\n",
"\n",
" [[23 0 0]\n",
" [ 0 11 1]\n",
" [ 0 1 14]]\n",
"\n",
"Hata Oranı = 0.96\n"
]
}
],
"source": [
"print(\"\\nTest Değeri ve Tahmin Edilen Değerler\")\n",
"error_matrix = confusion_matrix(y_test, y_pred)\n",
"print(\"\\n\",error_matrix)\n",
"print(\"\\nHata Oranı =\",accuracy_score(y_test, y_pred))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Hata Matrisinin Yorumlanması\n",
"1. satırda 23 değer varmış ve hepsi doğru tahmin edilmiş.\n",
"2. satırda 12 değer varmış 11 tanesi doğru 1 tanesi yanlış tahmin edilmiş.\n",
"3. satırda 15 değer varmış 14 tanesi doğru 1 tanesi yanlış tahmin edilmiş.\n",
"\n",
"Not: 23 + 12 + 15 = 50. Bizim test için ayırdığımız sayıya eşit."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"
\n",
"\n",
"K = 5 (Default Value) Değeri İçin Tahmin Yapalım
\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"kNN = KNeighborsClassifier(n_neighbors=5)\n",
"kNN.fit(X_train, y_train)\n",
"y_pred = kNN.predict(X_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Hata Oranına tekrar bakalım"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Test Değeri ve Tahmin Edilen Değerler\n",
"\n",
" [[23 0 0]\n",
" [ 0 11 1]\n",
" [ 0 0 15]]\n",
"\n",
"Hata Oranı = 0.98\n"
]
}
],
"source": [
"print(\"\\nTest Değeri ve Tahmin Edilen Değerler\")\n",
"error_matrix = confusion_matrix(y_test, y_pred)\n",
"print(\"\\n\",error_matrix)\n",
"print(\"\\nHata Oranı =\",accuracy_score(y_test, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"iris_classifier = DecisionTreeClassifier(max_leaf_nodes=10, random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n",
" max_features=None, max_leaf_nodes=10, min_impurity_split=1e-07,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, presort=False, random_state=0,\n",
" splitter='best')"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris_classifier.fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"y_pred = iris_classifier.predict(X_test)\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Test Değeri ve Tahmin Edilen Değerler\n",
"\n",
" [[23 0 0]\n",
" [ 0 11 1]\n",
" [ 0 0 15]]\n",
"\n",
"Hata Oranı = 0.98\n"
]
}
],
"source": [
"print(\"\\nTest Değeri ve Tahmin Edilen Değerler\")\n",
"error_matrix = confusion_matrix(y_test, y_pred)\n",
"print(\"\\n\",error_matrix)\n",
"print(\"\\nHata Oranı =\",accuracy_score(y_test, y_pred))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"kNN (K=5) ve DecisonTree aynı oranda başarı yakalarken, kNN k=1'de 0.02 oranında daha az bir başarı oranına sahip. "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}