"
]
},
"metadata": {},
"execution_count": 10
}
],
"source": [
"Image(url='https://git.io/JLdVr', width=700)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"execution": {
"iopub.execute_input": "2021-01-01T08:20:52.131350Z",
"iopub.status.busy": "2021-01-01T08:20:52.130702Z",
"iopub.status.idle": "2021-01-01T08:20:52.199641Z",
"shell.execute_reply": "2021-01-01T08:20:52.198873Z"
},
"id": "oDIGchMbyWJq",
"outputId": "b70bf233-aa2a-430b-9e40-5d289e74797c"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"입력 (x): 'THE MYSTERIOUS ISLAND ***\\n\\n\\n\\n\\nProduced b'\n",
"타깃 (y): 'HE MYSTERIOUS ISLAND ***\\n\\n\\n\\n\\nProduced by'\n",
"\n",
"입력 (x): ' Anthony Matonak, and Trevor Carlson\\n\\n\\n\\n'\n",
"타깃 (y): 'Anthony Matonak, and Trevor Carlson\\n\\n\\n\\n\\n'\n",
"\n"
]
}
],
"source": [
"## x & y를 나누기 위한 함수를 정의합니다\n",
"def split_input_target(chunk):\n",
" input_seq = chunk[:-1]\n",
" target_seq = chunk[1:]\n",
" return input_seq, target_seq\n",
"\n",
"ds_sequences = ds_chunks.map(split_input_target)\n",
"\n",
"## 확인:\n",
"for example in ds_sequences.take(2):\n",
" print('입력 (x):', repr(''.join(char_array[example[0].numpy()])))\n",
" print('타깃 (y):', repr(''.join(char_array[example[1].numpy()])))\n",
" print()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"execution": {
"iopub.execute_input": "2021-01-01T08:20:52.206427Z",
"iopub.status.busy": "2021-01-01T08:20:52.205493Z",
"iopub.status.idle": "2021-01-01T08:20:52.214632Z",
"shell.execute_reply": "2021-01-01T08:20:52.214055Z"
},
"id": "EwXM1x7MyWJr",
"outputId": "80db52f7-c6c1-4cbe-c1a3-100fe66a61a4"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<_BatchDataset element_spec=(TensorSpec(shape=(None, 40), dtype=tf.int32, name=None), TensorSpec(shape=(None, 40), dtype=tf.int32, name=None))>"
]
},
"metadata": {},
"execution_count": 12
}
],
"source": [
"# 배치 크기\n",
"BATCH_SIZE = 64\n",
"BUFFER_SIZE = 10000\n",
"\n",
"tf.random.set_seed(1)\n",
"ds = ds_sequences.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)# drop_remainder=True)\n",
"\n",
"ds"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ALd_KVXbyWJr"
},
"source": [
"### 문자 수준의 RNN 모델 만들기"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 247
},
"execution": {
"iopub.execute_input": "2021-01-01T08:20:52.222512Z",
"iopub.status.busy": "2021-01-01T08:20:52.221844Z",
"iopub.status.idle": "2021-01-01T08:20:52.507739Z",
"shell.execute_reply": "2021-01-01T08:20:52.508469Z"
},
"id": "2DQqLCqAyWJr",
"outputId": "d4c729a2-5025-48bc-a143-546db078209b"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"\u001b[1mModel: \"sequential_7\"\u001b[0m\n"
],
"text/html": [
"Model: \"sequential_7\"\n",
"\n"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ embedding_11 (\u001b[38;5;33mEmbedding\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ lstm_7 (\u001b[38;5;33mLSTM\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_7 (\u001b[38;5;33mDense\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
],
"text/html": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃ Layer (type) ┃ Output Shape ┃ Param # ┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ embedding_11 (Embedding) │ ? │ 0 (unbuilt) │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ lstm_7 (LSTM) │ ? │ 0 (unbuilt) │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_7 (Dense) │ ? │ 0 (unbuilt) │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
"\n"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
],
"text/html": [
" Total params: 0 (0.00 B)\n",
"\n"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
],
"text/html": [
" Trainable params: 0 (0.00 B)\n",
"\n"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
],
"text/html": [
" Non-trainable params: 0 (0.00 B)\n",
"\n"
]
},
"metadata": {}
}
],
"source": [
"def build_model(vocab_size, embedding_dim, rnn_units):\n",
" model = tf.keras.Sequential([\n",
" tf.keras.layers.Embedding(vocab_size, embedding_dim),\n",
" tf.keras.layers.LSTM(\n",
" rnn_units, return_sequences=True),\n",
" tf.keras.layers.Dense(vocab_size)\n",
" ])\n",
" return model\n",
"\n",
"\n",
"charset_size = len(char_array)\n",
"embedding_dim = 256\n",
"rnn_units = 512\n",
"\n",
"tf.random.set_seed(1)\n",
"\n",
"model = build_model(\n",
" vocab_size = charset_size,\n",
" embedding_dim=embedding_dim,\n",
" rnn_units=rnn_units)\n",
"\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"execution": {
"iopub.execute_input": "2021-01-01T08:20:52.523264Z",
"iopub.status.busy": "2021-01-01T08:20:52.522357Z",
"iopub.status.idle": "2021-01-01T08:52:55.869819Z",
"shell.execute_reply": "2021-01-01T08:52:55.870445Z"
},
"id": "6oDKVX0JyWJr",
"outputId": "7ef015c9-ff3d-4ef8-c6f2-634dce2beb04"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Epoch 1/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 5ms/step - loss: 2.6163\n",
"Epoch 2/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.7046\n",
"Epoch 3/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.4891\n",
"Epoch 4/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.3822\n",
"Epoch 5/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.3160\n",
"Epoch 6/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.2699\n",
"Epoch 7/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.2370\n",
"Epoch 8/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.2099\n",
"Epoch 9/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.1820\n",
"Epoch 10/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.1663\n",
"Epoch 11/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.1460\n",
"Epoch 12/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.1275\n",
"Epoch 13/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.1099\n",
"Epoch 14/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.0946\n",
"Epoch 15/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.0759\n",
"Epoch 16/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.0631\n",
"Epoch 17/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.0511\n",
"Epoch 18/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.0343\n",
"Epoch 19/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.0219\n",
"Epoch 20/20\n",
"\u001b[1m424/424\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 5ms/step - loss: 1.0086\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 35
}
],
"source": [
"model.compile(\n",
" optimizer='adam',\n",
" loss=tf.keras.losses.SparseCategoricalCrossentropy(\n",
" from_logits=True\n",
" ))\n",
"\n",
"model.fit(ds, epochs=20)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "HPxMUjzzyWJr"
},
"source": [
"### 평가 단계 - 새로운 텍스트 생성"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"execution": {
"iopub.execute_input": "2021-01-01T08:52:55.887487Z",
"iopub.status.busy": "2021-01-01T08:52:55.886806Z",
"iopub.status.idle": "2021-01-01T08:52:55.892794Z",
"shell.execute_reply": "2021-01-01T08:52:55.892440Z"
},
"id": "qJ8Wf-ofyWJs",
"outputId": "c352d67b-27de-4252-a0fc-e5b126e30fb4"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"확률: [0.33333334 0.33333334 0.33333334]\n",
"array([[1, 2, 0, 1, 0, 1, 1, 2, 1, 1]])\n"
]
}
],
"source": [
"tf.random.set_seed(1)\n",
"\n",
"logits = [[1.0, 1.0, 1.0]]\n",
"print('확률:', tf.math.softmax(logits).numpy()[0])\n",
"\n",
"samples = tf.random.categorical(\n",
" logits=logits, num_samples=10)\n",
"tf.print(samples.numpy())"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"execution": {
"iopub.execute_input": "2021-01-01T08:52:55.899437Z",
"iopub.status.busy": "2021-01-01T08:52:55.898456Z",
"iopub.status.idle": "2021-01-01T08:52:55.903367Z",
"shell.execute_reply": "2021-01-01T08:52:55.904194Z"
},
"id": "PZd-fbXByWJs",
"outputId": "f8f78eff-ca97-4dd2-9d6f-07f89b2b8522"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"확률: [0.10650698 0.10650698 0.78698605]\n",
"array([[2, 2, 0, 2, 2, 2, 2, 2, 1, 2]])\n"
]
}
],
"source": [
"tf.random.set_seed(1)\n",
"\n",
"logits = [[1.0, 1.0, 3.0]]\n",
"print('확률:', tf.math.softmax(logits).numpy()[0])\n",
"\n",
"samples = tf.random.categorical(\n",
" logits=logits, num_samples=10)\n",
"tf.print(samples.numpy())"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"execution": {
"iopub.execute_input": "2021-01-01T08:52:55.914798Z",
"iopub.status.busy": "2021-01-01T08:52:55.914137Z",
"iopub.status.idle": "2021-01-01T08:53:06.162148Z",
"shell.execute_reply": "2021-01-01T08:53:06.162896Z"
},
"id": "R_b2-tLDyWJs",
"outputId": "5270df5d-186e-49e1-c14b-01d51662f4f8"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"The island was egbly float in the water.\n",
"\n",
"They never seen for the present, from whole found it marked go river. But the obstinate vast continents of trees should ask any well. Here is a vessel was increased by\n",
"Union? Gretoms,” said Herbert.\n",
"\n",
"“I should be a simple enemy. Master Jup should take up, with its entable that he might little obscure.\n",
"\n",
"As to Hester Cyrus Harding, Neb, and there were neither observation.\n",
"\n",
"The engineer intended a see the first servants\n",
"of a coldience towards the last season.\n",
"\n",
"As to \n"
]
}
],
"source": [
"def sample(model, starting_str,\n",
" len_generated_text=500,\n",
" max_input_length=40,\n",
" scale_factor=1.0):\n",
" encoded_input = [char2int[s] for s in starting_str]\n",
" encoded_input = tf.reshape(encoded_input, (1, -1))\n",
"\n",
" generated_str = starting_str\n",
"\n",
" # model.reset_states()\n",
" for i in range(len_generated_text):\n",
" logits = model(encoded_input)\n",
" logits = tf.squeeze(logits, 0)\n",
"\n",
" scaled_logits = logits * scale_factor\n",
" new_char_indx = tf.random.categorical(\n",
" scaled_logits, num_samples=1)\n",
"\n",
" new_char_indx = tf.squeeze(new_char_indx)[-1].numpy()\n",
"\n",
" generated_str += str(char_array[new_char_indx])\n",
"\n",
" new_char_indx = tf.expand_dims([new_char_indx], 0)\n",
" encoded_input = tf.concat(\n",
" [encoded_input, new_char_indx],\n",
" axis=1)\n",
" encoded_input = encoded_input[:, -max_input_length:]\n",
"\n",
" return generated_str\n",
"\n",
"tf.random.set_seed(1)\n",
"print(sample(model, starting_str='The island'))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Stm4dLH2yWJs"
},
"source": [
"* **예측 가능성 대 무작위성**"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"execution": {
"iopub.execute_input": "2021-01-01T08:53:06.171621Z",
"iopub.status.busy": "2021-01-01T08:53:06.170721Z",
"iopub.status.idle": "2021-01-01T08:53:06.175211Z",
"shell.execute_reply": "2021-01-01T08:53:06.175546Z"
},
"id": "-UKHgrqTyWJs",
"outputId": "a23daf10-03d3-4975-9c06-791e0a94d47f"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"스케일 조정 전의 확률: [0.10650698 0.10650698 0.78698604]\n",
"0.5배 조정 후 확률: [0.21194156 0.21194156 0.57611688]\n",
"0.1배 조정 후 확률: [0.31042377 0.31042377 0.37915245]\n"
]
}
],
"source": [
"logits = np.array([[1.0, 1.0, 3.0]])\n",
"\n",
"print('스케일 조정 전의 확률: ', tf.math.softmax(logits).numpy()[0])\n",
"\n",
"print('0.5배 조정 후 확률: ', tf.math.softmax(0.5*logits).numpy()[0])\n",
"\n",
"print('0.1배 조정 후 확률: ', tf.math.softmax(0.1*logits).numpy()[0])"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"execution": {
"iopub.execute_input": "2021-01-01T08:53:06.182417Z",
"iopub.status.busy": "2021-01-01T08:53:06.182031Z",
"iopub.status.idle": "2021-01-01T08:53:16.415192Z",
"shell.execute_reply": "2021-01-01T08:53:16.415932Z"
},
"id": "ickgML_8yWJt",
"outputId": "58b42e64-fce9-4312-e27e-6df66204e311"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"The island was in the little band was endeavored to transist some profound drop were the case of the cart was enough to cave up the same time a sort of flight. A few moments were still the castaways could not be able to give the heart of the sea, and the convicts had not been able to die to the convicts.\n",
"\n",
"“That is not long it?” asked the reporter.\n",
"\n",
"“No,” replied the sailor.\n",
"“And you will be the convicts have only lose in the fire of the left bank of the Mercy, and the wire was now to be feared. As to the \n"
]
}
],
"source": [
"tf.random.set_seed(1)\n",
"print(sample(model, starting_str='The island',\n",
" scale_factor=2.0))"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"execution": {
"iopub.execute_input": "2021-01-01T08:53:16.424579Z",
"iopub.status.busy": "2021-01-01T08:53:16.424183Z",
"iopub.status.idle": "2021-01-01T08:53:26.644678Z",
"shell.execute_reply": "2021-01-01T08:53:26.643763Z"
},
"id": "UF4sqqpcyWJt",
"outputId": "4d3f3d2f-5491-4356-9035-72a30190ac8f"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"The island had egblope fox Cork any way Heaven. SU“more ciff shote never, I\n",
"rejoke,--1 of\n",
"Jouttin. Be punfivecteds had bohes\n",
"visipeh gave\n",
"unfluence open,\n",
"sig her a visib who indergrieable mathar,\n",
"orcape\n",
"lyevinaze,\n",
"liste,\n",
"ressec\n",
"larquet,\n",
"with\n",
"occosped loss encrobce; in less.\n",
"Harding’s dwelling was nowed hereods! excetively Eisable, unknowcbod\n",
"frose, uses, smening, whn Pencroft been her cheside ral dwelbing!”\n",
"\n",
"Their\n",
"yearspog\n",
"here?ed zeal-slet,\n",
"lift; vercy, lines us, ob,” rest Jup of tiedles’, sombority our \n"
]
}
],
"source": [
"tf.random.set_seed(1)\n",
"print(sample(model, starting_str='The island',\n",
" scale_factor=0.5))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_V30iCf5yWJt"
},
"source": [
"# 트랜스포머 모델을 사용한 언어 이해\n",
"\n",
"## 셀프 어텐션 메카니즘 이해하기\n",
"\n",
"### 셀프 어텐션 기본 구조"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 350
},
"execution": {
"iopub.execute_input": "2021-01-01T08:53:26.650577Z",
"iopub.status.busy": "2021-01-01T08:53:26.649535Z",
"iopub.status.idle": "2021-01-01T08:53:26.663374Z",
"shell.execute_reply": "2021-01-01T08:53:26.664093Z"
},
"id": "aWx-IXRtyWJt",
"outputId": "d6bc0765-923d-4095-ee30-7c300d5eefe7"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 43
}
],
"source": [
"Image(url='https://git.io/JLdVo', width=700)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "EXNdJdxnyWJt"
},
"source": [
"### 쿼리, 키, 값 가중치를 가진 셀프 어텐션 메카니즘"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "o99EBntFyWJt"
},
"source": [
"## 멀티-헤드 어텐션과 트랜스포머 블록"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 278
},
"execution": {
"iopub.execute_input": "2021-01-01T08:53:26.668048Z",
"iopub.status.busy": "2021-01-01T08:53:26.667215Z",
"iopub.status.idle": "2021-01-01T08:53:26.672956Z",
"shell.execute_reply": "2021-01-01T08:53:26.673524Z"
},
"id": "Klo3F7nvyWJu",
"outputId": "5b86caef-b5b3-45dc-e937-4d7944e160e1"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 44
}
],
"source": [
"Image(url='https://git.io/JLdV6', width=700)"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"name": "ch16_part2.ipynb",
"provenance": [],
"gpuType": "A100"
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}