In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### 1. 讀入深度學習套件

In [None]:
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.layers import LSTM
from tensorflow.keras.datasets import imdb

### 2. 讀入數據

一般自然語言處理, 我們會限制最大要使用的字數。

In [None]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [None]:
print(f'訓練資料筆數:{len(x_train)}')
print(f'測試資料筆數:{len(x_test)}')

訓練資料筆數:25000
測試資料筆數:25000


注意每筆評論的長度當然是不一樣的。

In [None]:
print(f'第一筆訓練資料的長度:{len(x_train[0])}')
print(f'第二筆測試資料的長度:{len(x_train[1])}')

第一筆訓練資料的長度:218
第二筆測試資料的長度:189


In [None]:
print(f'第一筆資料的標籤:{y_train[0]}(正評)')
print(f'第二筆資料的標籤:{y_train[1]}(負評)')

第一筆資料的標籤:1(正評)
第二筆資料的標籤:0(負評)


### 3. 資料處理

雖然我們可以做真的 seq2seq, 可是資料長度不一樣對計算上有麻煩, 因此平常還是會固定一定長度, 其餘補 0。

In [None]:
x_train = sequence.pad_sequences(x_train, maxlen=100)
x_test = sequence.pad_sequences(x_test, maxlen=100)

### 4. step 01: 打造一個函數學習機

In [None]:
model = Sequential()

In [None]:
model.add(Embedding(10000, 128))

In [None]:
model.add(LSTM(128))

In [None]:
model.add(Dense(1, activation='sigmoid'))

#### 組裝

In [None]:
model.compile(loss='binary_crossentropy',
 optimizer='adam',
 metrics=['accuracy'])

#### 欣賞我們的 model

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type) Output Shape Param # 
 embedding (Embedding) (None, None, 128) 1280000 
 
 lstm (LSTM) (None, 128) 131584 
 
 dense (Dense) (None, 1) 129 
 
Total params: 1,411,713
Trainable params: 1,411,713
Non-trainable params: 0
_________________________________________________________________


### 5. step 02: 訓練

In [None]:
model.fit(x_train, y_train, batch_size=32, epochs=10,
 validation_data=(x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




### 6. 換個存檔方式

這次是把 model 和訓練權重分開存, 使用上更有彈性。

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd '/content/drive/My Drive/Colab Notebooks'

/content/drive/My Drive/Colab Notebooks


In [None]:
model_json = model.to_json()
open('imdb_model_architecture.json', 'w').write(model_json)
model.save_weights('imdb_model_weights.h5')