{ "cells": [ { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import tensorflow as tf\n", "import os\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "path = '../datasets/food-101/train/'" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "fried_rice = os.listdir(path + 'fried_rice/')\n", "ramen = os.listdir(path + 'ramen/')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "resnet = tf.keras.applications.resnet50.ResNet50(include_top=False)\n", "preprocess_input = tf.keras.applications.resnet50.preprocess_input\n", "image = tf.keras.preprocessing.image" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def extract_features(img_paths, batch_size=64):\n", " \"\"\" This function extracts image features for each image in img_paths using ResNet50 bottleneck layer.\n", " Returned features is a numpy array with shape (len(img_paths), 2048).\n", " \"\"\"\n", " global resnet\n", " n = len(img_paths)\n", " img_array = np.zeros((n, 224, 224, 3))\n", " \n", " for i, path in enumerate(img_paths):\n", " img = image.load_img(path, target_size=(224, 224))\n", " img = image.img_to_array(img)\n", " img = np.expand_dims(img, axis=0)\n", " x = preprocess_input(img)\n", " img_array[i] = x\n", " \n", " X = resnet.predict(img_array, batch_size=batch_size, verbose=1)\n", " X = X.reshape((n, 2048))\n", " return X" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1600/1600 [==============================] - 320s 200ms/step\n" ] } ], "source": [ "X = extract_features(\n", " list(map(lambda x: path + 'fried_rice/' + x, fried_rice)) + list(map(lambda x: path + 'ramen/' + x, ramen))\n", ")\n", "y = np.array([1] * len(fried_rice) + [0] * len(ramen))" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "def net():\n", " model = tf.keras.models.Sequential([\n", " tf.keras.layers.Flatten(),\n", " tf.keras.layers.Dense(256, activation=tf.nn.relu),\n", " tf.keras.layers.Dropout(0.2),\n", " tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)\n", " ])\n", " return model" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train on 1120 samples, validate on 480 samples\n", "Epoch 1/5\n", "1120/1120 [==============================] - 1s 825us/step - loss: 0.3837 - acc: 0.8366 - val_loss: 0.1460 - val_acc: 0.9458\n", "Epoch 2/5\n", "1120/1120 [==============================] - 0s 138us/step - loss: 0.0960 - acc: 0.9688 - val_loss: 0.1089 - val_acc: 0.9563\n", "Epoch 3/5\n", "1120/1120 [==============================] - 0s 131us/step - loss: 0.0635 - acc: 0.9795 - val_loss: 0.1074 - val_acc: 0.9604\n", "Epoch 4/5\n", "1120/1120 [==============================] - 0s 131us/step - loss: 0.0558 - acc: 0.9839 - val_loss: 0.0988 - val_acc: 0.9604\n", "Epoch 5/5\n", "1120/1120 [==============================] - 0s 135us/step - loss: 0.0309 - acc: 0.9946 - val_loss: 0.0921 - val_acc: 0.9625\n" ] } ], "source": [ "np.random.seed(42)\n", "\n", "batch_size = 64\n", "epochs = 5\n", "\n", "model = net()\n", "model.compile(optimizer='adam',\n", " loss='binary_crossentropy',\n", " metrics=['accuracy'])\n", "\n", "history = model.fit(X_train, y_train,\n", " validation_data=(X_val, y_val),\n", " batch_size=batch_size,\n", " epochs=epochs)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.plot(range(1,epochs+1), history.history['acc'], label='train')\n", "plt.plot(range(1,epochs+1), history.history['val_acc'], label='validation')\n", "plt.legend();" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "test_path = '../datasets/food-101/test/'\n", "test = os.listdir(test_path)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "400/400 [==============================] - 73s 183ms/step\n" ] } ], "source": [ "X_test = extract_features(\n", " list(map(lambda x: test_path + x, test))\n", ")" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "y_pred = model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "pred = pd.DataFrame({\n", " 'id': test,\n", " 'kelas': (y_pred > .5).reshape(-1)\n", "})\n", "pred['kelas'] = pred['kelas'].map({True: 1, False: 0})" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "pred.to_csv('pred.csv', index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.2" } }, "nbformat": 4, "nbformat_minor": 2 }