{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Майнор по Анализу Данных, Группа ИАД-4\n", "## 09/11/2017 Практика с rnn" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from keras.models import Sequential\n", "from keras.layers import Dense\n", "from keras.layers import LSTM\n", "\n", "import numpy as np\n", "import math\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn.preprocessing import MinMaxScaler\n", "from sklearn.metrics import mean_squared_error\n", "\n", "\n", "RND_SEED = 7\n", "plt.style.use('ggplot')\n", "\n", "np.random.seed(RND_SEED)\n", "\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Прогнозирование временных рядов" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def create_dataset(dataset, look_back=1):\n", "\tdataX, dataY = [], []\n", "\tfor i in range(len(dataset)-look_back-1):\n", "\t\ta = dataset[i:(i+look_back), 0]\n", "\t\tdataX.append(a)\n", "\t\tdataY.append(dataset[i + look_back, 0])\n", "\treturn np.array(dataX), np.array(dataY)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df = pd.read_csv('./monthly-australian-wine-sales.csv')\n", "dataset = df.loc[:, ['sales']].values.astype(np.float32)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Отнормируем данные и разобъем их на обучение и контроль" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "scaler = MinMaxScaler(feature_range=(0, 1))\n", "dataset = scaler.fit_transform(dataset)\n", "\n", "train_size = int(len(dataset) * 0.67)\n", "test_size = len(dataset) - train_size\n", "train, test = dataset[:train_size, :], dataset[train_size:, :]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Переведем их в нужный фоомат" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "look_back = 1\n", "trainX, trainY = create_dataset(train, look_back)\n", "testX, testY = create_dataset(test, look_back)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# [samples, time steps, features]\n", "trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))\n", "testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Создаем сеточку\n", "model = Sequential()\n", "model.add(LSTM(4, input_shape=(1, look_back)))\n", "model.add(Dense(1))\n", "model.compile(loss='mean_squared_error', optimizer='adam')\n", "model.fit(trainX, trainY, nb_epoch=100, batch_size=1, verbose=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Предсказываем\n", "trainPredict = model.predict(trainX)\n", "testPredict = model.predict(testX)\n", "\n", "# Обратное преобразование scaler\n", "trainPredict = scaler.inverse_transform(trainPredict)\n", "trainY = scaler.inverse_transform([trainY])\n", "testPredict = scaler.inverse_transform(testPredict)\n", "testY = scaler.inverse_transform([testY])\n", "\n", "# Считаем ошибку\n", "trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))\n", "print('Train Score: %.2f RMSE' % (trainScore))\n", "testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))\n", "print('Test Score: %.2f RMSE' % (testScore))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Нарисуем предсказания" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "trainPredictPlot = np.empty_like(dataset)\n", "trainPredictPlot[:, :] = np.nan\n", "trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict\n", "\n", "testPredictPlot = np.empty_like(dataset)\n", "testPredictPlot[:, :] = np.nan\n", "testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict\n", "\n", "plt.plot(scaler.inverse_transform(dataset))\n", "plt.plot(trainPredictPlot)\n", "plt.plot(testPredictPlot)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Тональность отзыва" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from keras.datasets import imdb\n", "from keras.layers.embeddings import Embedding\n", "from keras.preprocessing import sequence" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "np.random.seed(RND_SEED)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Загрузим датасет" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# \n", "top_words = 5000\n", "(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=top_words)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Необходимо привести все к однообразной форме" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "max_review_length = 500\n", "X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)\n", "X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Создаем модель\n", "embedding_vecor_length = 32\n", "model = Sequential()\n", "model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))\n", "model.add(LSTM(100))\n", "model.add(Dense(1, activation='sigmoid'))\n", "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n", "\n", "print(model.summary())\n", "model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=3, batch_size=64)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "scores = model.evaluate(X_test, y_test, verbose=0)\n", "print(\"Accuracy: %.2f%%\" % (scores[1]*100))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Теперь попробуем добавить Dropout (2мя способами) и одномерную конволюцию!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from keras.layers.convolutional import Conv1D\n", "from keras.layers.convolutional import MaxPooling1D\n", "from keras.layers import Dropout" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.13" }, "nav_menu": {}, "toc": { "colors": { "hover_highlight": "#DAA520", "navigate_num": "#000000", "navigate_text": "#333333", "running_highlight": "#FF0000", "selected_highlight": "#FFD700", "sidebar_border": "#EEEEEE", "wrapper_background": "#FFFFFF" }, "moveMenuLeft": true, "nav_menu": { "height": "49px", "width": "252px" }, "navigate_menu": true, "number_sections": false, "sideBar": true, "threshold": 4, "toc_cell": false, "toc_section_display": "block", "toc_window_display": true, "widenNotebook": false } }, "nbformat": 4, "nbformat_minor": 1 }