{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "ap09O_yL3Exf" }, "source": [ "# Text Summarizer" ] }, { "cell_type": "markdown", "metadata": { "id": "RxTpE3I6KAe0" }, "source": [ "## Import Module" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-09T09:48:54.240729Z", "iopub.status.busy": "2024-05-09T09:48:54.240033Z", "iopub.status.idle": "2024-05-09T09:49:06.702076Z", "shell.execute_reply": "2024-05-09T09:49:06.700855Z", "shell.execute_reply.started": "2024-05-09T09:48:54.240693Z" }, "id": "ySdoG0L23Exh", "outputId": "c56180d2-781f-42a4-dc17-14c62f690400", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting datasets\n", " Downloading datasets-2.19.1-py3-none-any.whl (542 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m542.0/542.0 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.14.0)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.25.2)\n", "Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (14.0.2)\n", "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets) (0.6)\n", "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n", " Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.0.3)\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n", "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.4)\n", "Collecting xxhash (from datasets)\n", " Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting multiprocess (from datasets)\n", " Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: fsspec[http]<=2024.3.1,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.5)\n", "Collecting huggingface-hub>=0.21.2 (from datasets)\n", " Downloading huggingface_hub-0.23.0-py3-none-any.whl (401 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m401.2/401.2 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.2.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.1)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.5)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.4)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.21.2->datasets) (4.11.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2024.2.2)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.4)\n", "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.1)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n", "Installing collected packages: xxhash, dill, multiprocess, huggingface-hub, datasets\n", " Attempting uninstall: huggingface-hub\n", " Found existing installation: huggingface-hub 0.20.3\n", " Uninstalling huggingface-hub-0.20.3:\n", " Successfully uninstalled huggingface-hub-0.20.3\n", "Successfully installed datasets-2.19.1 dill-0.3.8 huggingface-hub-0.23.0 multiprocess-0.70.16 xxhash-3.4.1\n" ] } ], "source": [ "!pip install datasets" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "1zN614mg3Exi", "outputId": "620ba251-2f5c-4006-8c55-e94ee1720fcf" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting rouge\n", " Downloading rouge-1.0.1-py3-none-any.whl (13 kB)\n", "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from rouge) (1.16.0)\n", "Installing collected packages: rouge\n", "Successfully installed rouge-1.0.1\n" ] } ], "source": [ "!pip install rouge" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-09T10:05:37.014088Z", "iopub.status.busy": "2024-05-09T10:05:37.012949Z", "iopub.status.idle": "2024-05-09T10:05:51.575293Z", "shell.execute_reply": "2024-05-09T10:05:51.574121Z", "shell.execute_reply.started": "2024-05-09T10:05:37.014050Z" }, "id": "LEvliRHn3Exi", "outputId": "857521cf-096f-440d-82d2-20cc3d69d154", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting rouge-score\n", " Downloading rouge_score-0.1.2.tar.gz (17 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge-score) (1.4.0)\n", "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from rouge-score) (3.8.1)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from rouge-score) (1.25.2)\n", "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge-score) (1.16.0)\n", "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (8.1.7)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (1.4.2)\n", "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (2023.12.25)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (4.66.4)\n", "Building wheels for collected packages: rouge-score\n", " Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=e335d88b2a0de16f5eaf4c7e1437689dbccfa4b3eb5e95556434630b018697dc\n", " Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n", "Successfully built rouge-score\n", "Installing collected packages: rouge-score\n", "Successfully installed rouge-score-0.1.2\n" ] } ], "source": [ "!pip install rouge-score" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-09T09:49:06.705842Z", "iopub.status.busy": "2024-05-09T09:49:06.704854Z", "iopub.status.idle": "2024-05-09T09:49:45.263790Z", "shell.execute_reply": "2024-05-09T09:49:45.262578Z", "shell.execute_reply.started": "2024-05-09T09:49:06.705796Z" }, "id": "nimOxRn-3Exj", "outputId": "7cc140fc-86ac-4fd9-e6b5-18207f916dd9", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting git+https://github.com/keras-team/keras-nlp.git\n", " Cloning https://github.com/keras-team/keras-nlp.git to /tmp/pip-req-build-gc9g1ft6\n", " Running command git clone --filter=blob:none --quiet https://github.com/keras-team/keras-nlp.git /tmp/pip-req-build-gc9g1ft6\n", " Resolved https://github.com/keras-team/keras-nlp.git to commit 294304b94f8ab9355beeeae6965e26f6c4fc9286\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Collecting keras-core (from keras-nlp==0.10.0)\n", " Downloading keras_core-0.1.7-py3-none-any.whl (950 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m950.8/950.8 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from keras-nlp==0.10.0) (1.4.0)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from keras-nlp==0.10.0) (1.25.2)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from keras-nlp==0.10.0) (24.0)\n", "Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from keras-nlp==0.10.0) (2023.12.25)\n", "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from keras-nlp==0.10.0) (13.7.1)\n", "Requirement already satisfied: dm-tree in /usr/local/lib/python3.10/dist-packages (from keras-nlp==0.10.0) (0.1.8)\n", "Requirement already satisfied: kagglehub in /usr/local/lib/python3.10/dist-packages (from keras-nlp==0.10.0) (0.2.5)\n", "Collecting tensorflow-text (from keras-nlp==0.10.0)\n", " Downloading tensorflow_text-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.2/5.2 MB\u001b[0m \u001b[31m24.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kagglehub->keras-nlp==0.10.0) (2.31.0)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kagglehub->keras-nlp==0.10.0) (4.66.4)\n", "Collecting namex (from keras-core->keras-nlp==0.10.0)\n", " Downloading namex-0.0.8-py3-none-any.whl (5.8 kB)\n", "Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from keras-core->keras-nlp==0.10.0) (3.9.0)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->keras-nlp==0.10.0) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->keras-nlp==0.10.0) (2.16.1)\n", "Collecting tensorflow<2.17,>=2.16.1 (from tensorflow-text->keras-nlp==0.10.0)\n", " Downloading tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (589.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m589.8/589.8 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->keras-nlp==0.10.0) (0.1.2)\n", "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (1.6.3)\n", "Requirement already satisfied: flatbuffers>=23.5.26 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (24.3.25)\n", "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (0.5.4)\n", "Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (0.2.0)\n", "Collecting h5py (from keras-core->keras-nlp==0.10.0)\n", " Downloading h5py-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.3/5.3 MB\u001b[0m \u001b[31m76.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (18.1.1)\n", "Collecting ml-dtypes~=0.3.1 (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0)\n", " Downloading ml_dtypes-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m72.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (3.3.0)\n", "Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (3.20.3)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (67.7.2)\n", "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (1.16.0)\n", "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (2.4.0)\n", "Requirement already satisfied: typing-extensions>=3.6.6 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (4.11.0)\n", "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (1.14.1)\n", "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (1.63.0)\n", "Collecting tensorboard<2.17,>=2.16 (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0)\n", " Downloading tensorboard-2.16.2-py3-none-any.whl (5.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m110.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting keras>=3.0.0 (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0)\n", " Downloading keras-3.3.3-py3-none-any.whl (1.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m79.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (0.37.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kagglehub->keras-nlp==0.10.0) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kagglehub->keras-nlp==0.10.0) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->kagglehub->keras-nlp==0.10.0) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->kagglehub->keras-nlp==0.10.0) (2024.2.2)\n", "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from astunparse>=1.6.0->tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (0.43.0)\n", "Collecting optree (from keras>=3.0.0->tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0)\n", " Downloading optree-0.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (311 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.2/311.2 kB\u001b[0m \u001b[31m42.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.17,>=2.16->tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (3.6)\n", "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.17,>=2.16->tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (0.7.2)\n", "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.17,>=2.16->tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (3.0.3)\n", "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard<2.17,>=2.16->tensorflow<2.17,>=2.16.1->tensorflow-text->keras-nlp==0.10.0) (2.1.5)\n", "Building wheels for collected packages: keras-nlp\n", " Building wheel for keras-nlp (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for keras-nlp: filename=keras_nlp-0.10.0-py3-none-any.whl size=876028 sha256=50475c779ae0ad3ea3540421eda08f6ae3a9911c19ee71bbd0b45eaa05d73934\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-htfqggzu/wheels/aa/8c/45/112235850203b00e1d4942afbaa83677a6d8a775618e72a132\n", "Successfully built keras-nlp\n", "Installing collected packages: namex, optree, ml-dtypes, h5py, tensorboard, keras-core, keras, tensorflow, tensorflow-text, keras-nlp\n", " Attempting uninstall: ml-dtypes\n", " Found existing installation: ml-dtypes 0.2.0\n", " Uninstalling ml-dtypes-0.2.0:\n", " Successfully uninstalled ml-dtypes-0.2.0\n", " Attempting uninstall: h5py\n", " Found existing installation: h5py 3.9.0\n", " Uninstalling h5py-3.9.0:\n", " Successfully uninstalled h5py-3.9.0\n", " Attempting uninstall: tensorboard\n", " Found existing installation: tensorboard 2.15.2\n", " Uninstalling tensorboard-2.15.2:\n", " Successfully uninstalled tensorboard-2.15.2\n", " Attempting uninstall: keras\n", " Found existing installation: keras 2.15.0\n", " Uninstalling keras-2.15.0:\n", " Successfully uninstalled keras-2.15.0\n", " Attempting uninstall: tensorflow\n", " Found existing installation: tensorflow 2.15.0\n", " Uninstalling tensorflow-2.15.0:\n", " Successfully uninstalled tensorflow-2.15.0\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "tf-keras 2.15.1 requires tensorflow<2.16,>=2.15, but you have tensorflow 2.16.1 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed h5py-3.11.0 keras-3.3.3 keras-core-0.1.7 keras-nlp-0.10.0 ml-dtypes-0.3.2 namex-0.0.8 optree-0.11.0 tensorboard-2.16.2 tensorflow-2.16.1 tensorflow-text-2.16.1\n" ] } ], "source": [ "!pip install git+https://github.com/keras-team/keras-nlp.git\n", "#!pip install tensorflow-datasets" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-09T10:05:55.489940Z", "iopub.status.busy": "2024-05-09T10:05:55.489524Z", "iopub.status.idle": "2024-05-09T10:05:56.570119Z", "shell.execute_reply": "2024-05-09T10:05:56.569321Z", "shell.execute_reply.started": "2024-05-09T10:05:55.489901Z" }, "id": "K0VA4CEi3Exj", "trusted": true }, "outputs": [], "source": [ "import pandas as pd\n", "from rouge import Rouge\n", "from rouge_score import rouge_scorer" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-09T09:49:45.265569Z", "iopub.status.busy": "2024-05-09T09:49:45.265247Z", "iopub.status.idle": "2024-05-09T09:49:45.270747Z", "shell.execute_reply": "2024-05-09T09:49:45.269768Z", "shell.execute_reply.started": "2024-05-09T09:49:45.265529Z" }, "id": "Bl0lFL403Exj", "trusted": true }, "outputs": [], "source": [ "import os\n", "\n", "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-09T09:49:45.274090Z", "iopub.status.busy": "2024-05-09T09:49:45.273663Z", "iopub.status.idle": "2024-05-09T09:49:45.287300Z", "shell.execute_reply": "2024-05-09T09:49:45.286459Z", "shell.execute_reply.started": "2024-05-09T09:49:45.274058Z" }, "id": "xeg1AjzE3Exj", "outputId": "aa6ed29e-fbd8-4a53-f83f-cd4c225bf94f", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Default GPU Device: /device:GPU:0\n" ] } ], "source": [ "import tensorflow as tf\n", "\n", "if tf.test.gpu_device_name():\n", " print(\"Default GPU Device: {}\".format(tf.test.gpu_device_name()))\n", "else:\n", " print(\"Please install GPU version of TF\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-09T09:49:45.288758Z", "iopub.status.busy": "2024-05-09T09:49:45.288453Z", "iopub.status.idle": "2024-05-09T09:49:45.299372Z", "shell.execute_reply": "2024-05-09T09:49:45.298486Z", "shell.execute_reply.started": "2024-05-09T09:49:45.288733Z" }, "id": "RcbPNILp3Exj", "trusted": true }, "outputs": [], "source": [ "import time\n", "import keras_nlp\n", "import tensorflow as tf\n", "#import tensorflow_datasets as tfds\n", "from tensorflow.keras.callbacks import EarlyStopping\n", "\n", "import time" ] }, { "cell_type": "markdown", "metadata": { "id": "wiN0UPm5K-O5" }, "source": [ "## Define Training Variables" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-09T09:49:45.300787Z", "iopub.status.busy": "2024-05-09T09:49:45.300497Z", "iopub.status.idle": "2024-05-09T09:49:45.309685Z", "shell.execute_reply": "2024-05-09T09:49:45.308865Z", "shell.execute_reply.started": "2024-05-09T09:49:45.300763Z" }, "id": "4gZv4CRH3Exk", "trusted": true }, "outputs": [], "source": [ "BATCH_SIZE = 8\n", "NUM_BATCHES = 50\n", "EPOCHS = 10\n", "MAX_ENCODER_SEQUENCE_LENGTH = 1024\n", "MAX_DECODER_SEQUENCE_LENGTH = 256" ] }, { "cell_type": "markdown", "metadata": { "id": "NOnEa7zSLHLd" }, "source": [ "## Collecting Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 365, "referenced_widgets": [ "16bb79ba14df4dcfaa17886d862d866c", "ffcfe7124903419b8903d684f7b68d7a", "c9f52b6c001943919e39602f3cf525f6", "19d839b21cf74c9a8d390e7a9e20c44e", "4960e7d2fd184b94a4d76d2415d8f0eb", "4e90dce6ce6a4a98bbd6946d89090a02", "d48adfb6987c495aa486953bdf516b9d", "5a78ffc298ef48e0b5cfa54f85630763", "57230f13e02848a9a7b1a8b10ca68f17", "9a6d9395189b4e329ad4bd28d99c58d2", "fb86e141b3ca42868b63c1ff3f310756", "fa1da835ebf54e4d954a5d6e7ebe5102", "72175117e8ab4bfb9378443a04168851", "0a166bbdb4fd4e3bb5c595695ced2654", "d0bf5db2b7df49a9ad56cc36cde663b4", "e83e35f4e2ad499b94b23a35ea71ffc5", "2b02d5f362884e658bdabe9d59d8d1b7", "a5db5c3caa3046d7b4bfbf6241542e73", "cbf258c05361409aa3d4ffd8da7161b2", "456b79a7dad644389fb8020e07855466", "29cae4230ba94241b972b1f7a7472bc4", "d35121b675a745f895437114264d6072", "f6281a2b2e57465ba37caf40b50f142d", "83fbd0bc25de422a98324ffdf03e99af", "4abb44f5e96945e6a8973cda11f79650", "0668eda6cc764cf89eb4a4b3caa2e0eb", "51f066a3149543ef9286fcfa769c9a7d", "a7d92db025444a209bba79bd316bf148", "17752425c1c148f69ee28558f4b611b4", "cee49a6054be48878521025d3b296062", "d8183f1c0c4845c791daa19436970457", "dc5e9dc9cec646739c399980ca9d9e86", "09f08e0bd0c34cfe95de707f95bef8d9", "d288b6d3a4a44ebdb313981c713346ab", "50c9c5e846ba436a8ed5129094abfaaa", "fc5d497b8e264af284d98215c2d9fb85", "8b55a23b8f8f46dd85a381e0a658dc82", "197f7ab5c5f542b185085e6ac5ec6bb2", "e6b375f404a14f7998d08cd7c360bb9b", "d94ec244bc5948cbb7869876baccc605", "f87770efd7204186a8bef817039d02c6", "feb4e900998942fd858d86023a80a875", "51f929b36e5847fa92c20415784484b1", "8fceaa333b59423283aee94f68cf927b", "0c92582a118f4208a86ba74198b80c33", "cb6e51e4420f45beb5e634745f9c15c5", "7df6b34be9ad4de6bc64092fc6894f32", "bf42d6467a244a11b5c3b7e207d7f163", "43c794452ff34076b9241530f76d46bb", "32e321edca7f49e39c12073c086993e2", "d8699434eabb4a7a8fea8e2dc84f299c", "99c465772c66496a9754b30f1640e694", "66745442e2154e8a85e915bf0b318037", "b52306f0cd2d4cfb82a41dedc6b0ec2f", "a12372db054b4b7fa06694047379ceab", "c3cb27dc53934fdb9292037038a2ad4b", "4ed42dd6709e465b8812ea3f495e96fb", "b7e763a9637343c2bb4f72b9542103e9", "f2ca4e5466c34ecfb972ee359e126e55", "97a92e800a6d4121b0a1e1f566209cdf", "ab4b7b018cbf4ef3bfce404234f122b9", "1ab3db35a49b4b2f85e591b1cfd73507", "2a331347a10549a4967e85f5452331af", "4f6f6c5e6f2845a4a5063817618880f2", "89e56c9363fa4bd6affa6940c7810485", "5a4ccf7cd7574c689ecdf0e1b40eaf9f", "96451d09b07045a08218c07c76ba3840", "d934a4c44c904b379fa88c3160f85d73", "081cb47c5c374f69ad4ccf92ecd19fbc", "277ee952e0b741cd8bd074dba7eb34e2", "df73458c056d4c9681395091a3d64ea5", "1dbeccc6c69f44ff96576f74d01f7b63", "44e2770f24f549f799f673df33df7b8f", "f8648ee19d69479283e29b1f6682f8c4", "b151c915bdd049d1972846de07060ea2", "f3434897092c402480a4adb1baae6adb", "888e380377ba4c7a8e9ff037eed66b49" ] }, "execution": { "iopub.execute_input": "2024-05-09T09:49:45.310973Z", "iopub.status.busy": "2024-05-09T09:49:45.310703Z", "iopub.status.idle": "2024-05-09T09:49:47.511434Z", "shell.execute_reply": "2024-05-09T09:49:47.510538Z", "shell.execute_reply.started": "2024-05-09T09:49:45.310951Z" }, "id": "xhhh_WYk3Exk", "outputId": "dccfe1ec-5d59-4e96-b000-a0506c714a9e", "trusted": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", "You will be able to reuse this secret in all of your notebooks.\n", "Please note that authentication is recommended but still optional to access public models or datasets.\n", " warnings.warn(\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "16bb79ba14df4dcfaa17886d862d866c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/295M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "fa1da835ebf54e4d954a5d6e7ebe5102", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/28.3M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f6281a2b2e57465ba37caf40b50f142d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/39.5M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d288b6d3a4a44ebdb313981c713346ab", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/40.1M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0c92582a118f4208a86ba74198b80c33", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating train split: 0%| | 0/44972 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c3cb27dc53934fdb9292037038a2ad4b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating validation split: 0%| | 0/5622 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "96451d09b07045a08218c07c76ba3840", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating test split: 0%| | 0/5622 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from datasets import load_dataset\n", "\n", "dataset_dict = load_dataset(\"multi_news\")\n", "\n", "train_dataset = dataset_dict['train']\n", "validation_dataset = dataset_dict['validation']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-09T09:49:47.512821Z", "iopub.status.busy": "2024-05-09T09:49:47.512514Z", "iopub.status.idle": "2024-05-09T09:49:47.521186Z", "shell.execute_reply": "2024-05-09T09:49:47.520276Z", "shell.execute_reply.started": "2024-05-09T09:49:47.512795Z" }, "id": "6RsYpKdY3Exk", "trusted": true }, "outputs": [], "source": [ "def check_len_every_split(dataset_dict):\n", " for split in ['train', 'validation', 'test']:\n", " print(f\"Number of rows in {split} dataset: {len(dataset_dict[split])}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-09T09:49:47.522890Z", "iopub.status.busy": "2024-05-09T09:49:47.522544Z", "iopub.status.idle": "2024-05-09T09:49:47.533028Z", "shell.execute_reply": "2024-05-09T09:49:47.531967Z", "shell.execute_reply.started": "2024-05-09T09:49:47.522859Z" }, "id": "rdJ370PQ3Exk", "outputId": "847bdcb6-903e-4a2f-dd36-20f2575b3553", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of rows in train dataset: 44972\n", "Number of rows in validation dataset: 5622\n", "Number of rows in test dataset: 5622\n" ] } ], "source": [ "check_len_every_split(dataset_dict)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-09T09:49:47.537292Z", "iopub.status.busy": "2024-05-09T09:49:47.536878Z", "iopub.status.idle": "2024-05-09T09:49:47.544102Z", "shell.execute_reply": "2024-05-09T09:49:47.543168Z", "shell.execute_reply.started": "2024-05-09T09:49:47.537261Z" }, "id": "aUWFLncU3Exl", "outputId": "9a224733-427e-429b-da8c-5bf8cc4deb78", "trusted": true }, "outputs": [ { "data": { "text/plain": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['document', 'summary'],\n", " num_rows: 44972\n", " })\n", " validation: Dataset({\n", " features: ['document', 'summary'],\n", " num_rows: 5622\n", " })\n", " test: Dataset({\n", " features: ['document', 'summary'],\n", " num_rows: 5622\n", " })\n", "})" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset_dict" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-09T09:49:47.545458Z", "iopub.status.busy": "2024-05-09T09:49:47.545201Z", "iopub.status.idle": "2024-05-09T09:49:49.351128Z", "shell.execute_reply": "2024-05-09T09:49:49.350226Z", "shell.execute_reply.started": "2024-05-09T09:49:47.545436Z" }, "id": "gIfbZ2nL3Exl", "trusted": true }, "outputs": [], "source": [ "dataset_dict = load_dataset(\"multi_news\")" ] }, { "cell_type": "markdown", "metadata": { "id": "KbsdoDIbLXdt" }, "source": [ "## Data Preprocessing" ] }, { "cell_type": "markdown", "metadata": { "id": "SJeYO1K6LayM" }, "source": [ "Delete rows that have a summary longer than the document" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 209, "referenced_widgets": [ "2ec7afd73a4a4003ae2aeec34d1bd03b", "926c114dd76c4b2ba8c889a0686c44aa", "8cd6103269ae489eb26d7da34d487f4a", "b66bd50016c341ae8d7efcf4900fd1ef", "090d2a901e414e1b861ca1559603a6d7", "71a4f21197a649afbfe6331334156747", "d02144ae039b42e58c05d485fd116887", "335e27525bb6421b8aacfbd89dcea2f9", "5ceaad9422e24bbd8aafa0d1c3ad551b", "2cdf784120734b419ce1d35032e6ddef", "ba41a320cb8e40569f17423ffec95061", "c32b62b8e9354f11ac08c27843856030", "1fb1ab7919d748c68ba4d7a1c06e901a", "ab95cb8635394233a3e70e9ab13978ef", "58e36fd119e44d289fa14c978d8cb38c", "526f7dc303e14b8da3d442e8e6cc1655", "307bcaacd03149a58958c4f4cf30fec9", "8f1a4603111943efab924c14189aede9", "6f7b08e5d7cd49ccbad1ca7f1af09174", "af38af6adba94ce9abdaab8436a68331", "5028ad4de9ea48199bb7b035f30643cb", "e4fa54cb886d45c7b17b083426d4d622", "3fd3851bcb284b9bb5c99dad2bab4ea5", "d548a7320bdc43f88eeeaf7f0963e202", "4bb3448ce30848a5a50c8a051b0475ed", "5cfbc4eae434424aa6172c1e733eaf95", "3fee7619e7ac45aeba62382bf0388eaf", "4830502a50e442edaf03ba6bc217db9a", "db3561f6c22e4f5a946eb4f6ba6d94a8", "9f94c3d7d2c64bd2afa86fa096133bea", "7d1dc5d4dbad4c0497458984c68a4488", "1e7e7607d4ef4462af7335cd2646c9db", "e33ce42164ef4a08ac4a254e712c2119", "5c84cde33fcb4427b9a9f3814639f7a0", "1e4d477cd41e4971a0a2fe86085caec8", "9ea15c6618d94fb8bb816aae94fa5e13", "2068e67827eb469592536669dbd995c0", "e69689a25c874b6ab164269472bda7e8", "09c27d98813647dfaec9b5da915aea54", "39960896f9564515864c4ac3b6a5c90e", "7a844543d71d466bbc6bc2678367b197", "03bfb1471c78499aa9f22b10009d328e", "7142ca0c1fe34afbb9766e731a6773c7", "9ba085cef6c946e1b78da7dc050c0c4f", "866f8ec85cf442c4a47f6352c5f9de89", "3ef375bcec674fbdbfe6cf23f74aada0", "59510bd0289e418eb8ad0020887d8435", "2a6fc0f6316e404bbba07cb8a4929079", "036b8fac6ac1491c9a3788beaed3bc45", "2d44f1dc6b014c4899a8d0341d88fc70", "f33a8a115ea246c69df95de11d7bf000", "761db9ccdeea49cc8dd2c36ae29e3621", "21233417016d479c98812f0f047533ea", "bf92c0898a51449ebe13be6143e5c175", "89b2e4f03c8c413a9956a3abcf7cd4f9", "9c68ef783d8b4a20a9a2cca43d3ac8ab", "5d267509b2a4413a8e5c5f2c4af1561b", "0854e2de2443419a946b47b9e79f5194", "e3c71e64cd0a48ccbf584e5a02e7dbb2", "6118360abbc94f398f53c450f6ca7499", "b0a58b8f37fa473a9201fca09314e52b", "0d785dd11064488298656573da1763d1", "a9b7359a0da5422587b1126ff9e098f8", "04f97b5aee1742b4b82d276018692365", "08d4e87829fc409299e8804a4d92d45c", "deaa676fe3af4076a7aee7e9cb1f557c" ] }, "execution": { "iopub.execute_input": "2024-05-09T09:49:49.353173Z", "iopub.status.busy": "2024-05-09T09:49:49.352646Z", "iopub.status.idle": "2024-05-09T09:49:49.383166Z", "shell.execute_reply": "2024-05-09T09:49:49.382302Z", "shell.execute_reply.started": "2024-05-09T09:49:49.353130Z" }, "id": "H6pCAYAN3Exl", "outputId": "22de87ab-8652-416c-a140-fa2a8384b50c", "trusted": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2ec7afd73a4a4003ae2aeec34d1bd03b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/44972 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c32b62b8e9354f11ac08c27843856030", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Filter: 0%| | 0/44972 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3fd3851bcb284b9bb5c99dad2bab4ea5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/5622 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5c84cde33fcb4427b9a9f3814639f7a0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Filter: 0%| | 0/5622 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "866f8ec85cf442c4a47f6352c5f9de89", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/5622 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9c68ef783d8b4a20a9a2cca43d3ac8ab", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Filter: 0%| | 0/5622 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "for split in ['train', 'validation', 'test']:\n", " dataset = dataset_dict[split]\n", " dataset = dataset.map(lambda example: {'document': example['document'], 'summary': example['summary'].strip(\"–\").strip()})\n", " dataset = dataset.filter(lambda example: len(example['summary']) <= len(example['document']))\n", "# dataset = dataset.filter(lambda example: len(example['document']) <= MAX_DECODER_SEQUENCE_LENGTH)\n", "# dataset = dataset.filter(lambda example: len(example['summary']) <= MAX_ENCODER_SEQUENCE_LENGTH)\n", " dataset_dict[split] = dataset\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-09T09:49:49.384608Z", "iopub.status.busy": "2024-05-09T09:49:49.384330Z", "iopub.status.idle": "2024-05-09T09:49:49.389156Z", "shell.execute_reply": "2024-05-09T09:49:49.388289Z", "shell.execute_reply.started": "2024-05-09T09:49:49.384564Z" }, "id": "QC5g4U9a3Exl", "outputId": "ca0cb5cf-74c0-452b-e08d-3b477507e94c", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of rows in train dataset: 44556\n", "Number of rows in validation dataset: 5574\n", "Number of rows in test dataset: 5564\n" ] } ], "source": [ "check_len_every_split(dataset_dict)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2024-05-09T09:49:49.390612Z", "iopub.status.busy": "2024-05-09T09:49:49.390322Z", "iopub.status.idle": "2024-05-09T09:49:49.930767Z", "shell.execute_reply": "2024-05-09T09:49:49.929893Z", "shell.execute_reply.started": "2024-05-09T09:49:49.390570Z" }, "id": "cds8jrbz3Exl", "outputId": "b3d5a250-e1b9-4be0-c828-cedc2b2a31c3", "trusted": true }, "outputs": [ { "data": { "text/plain": [ "['The unemployment rate dropped to 8.2% last month, but the economy only added 120,000 jobs, when 203,000 new jobs had been predicted, according to today\\'s jobs report. Reaction on the Wall Street Journal\\'s MarketBeat Blog was swift: \"Woah!!! Bad number.\" The unemployment rate, however, is better news; it had been expected to hold steady at 8.3%. But the AP notes that the dip is mostly due to more Americans giving up on seeking employment.',\n", " 'Shelly Sterling plans \"eventually\" to divorce her estranged husband Donald, she tells Barbara Walters at ABC News. As for her stake in the Los Angeles Clippers, she plans to keep it, the AP notes. Sterling says she would \"absolutely\" fight any NBA decision to force her to sell the team. The team is her \"legacy\" to her family, she says. \"To be honest with you, I\\'m wondering if a wife of one of the owners … said those racial slurs, would they oust the husband? Or would they leave the husband in?\"']" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset_dict['train']['summary'][:2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-09T09:49:49.932082Z", "iopub.status.busy": "2024-05-09T09:49:49.931821Z", "iopub.status.idle": "2024-05-09T09:50:02.993549Z", "shell.execute_reply": "2024-05-09T09:50:02.992436Z", "shell.execute_reply.started": "2024-05-09T09:49:49.932059Z" }, "id": "4yGRYAch3Exm", "trusted": true }, "outputs": [], "source": [ "train_ds = tf.data.Dataset.from_tensor_slices({\n", " \"encoder_text\":dataset_dict['train']['document'],\n", " \"decoder_text\":dataset_dict['train']['summary'],\n", "})\n", "train_ds = train_ds.batch(BATCH_SIZE)\n", "train_ds = train_ds.take(NUM_BATCHES)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-09T09:50:02.995262Z", "iopub.status.busy": "2024-05-09T09:50:02.994953Z", "iopub.status.idle": "2024-05-09T09:50:04.749637Z", "shell.execute_reply": "2024-05-09T09:50:04.748833Z", "shell.execute_reply.started": "2024-05-09T09:50:02.995236Z" }, "id": "cxZSEzLe3Exm", "trusted": true }, "outputs": [], "source": [ "val_ds = tf.data.Dataset.from_tensor_slices({\n", " \"encoder_text\":dataset_dict['validation']['document'],\n", " \"decoder_text\":dataset_dict['validation']['summary'],\n", "})\n", "val_ds = val_ds.batch(BATCH_SIZE)\n", "val_ds = val_ds.take(NUM_BATCHES)" ] }, { "cell_type": "markdown", "metadata": { "id": "82RqWN0yLrml" }, "source": [ "## Creating Model" ] }, { "cell_type": "markdown", "metadata": { "id": "PxcjuKyWL0GL" }, "source": [ "Creating model using Keras-NLP + Tensorflow Keras Utility" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-09T09:50:04.751226Z", "iopub.status.busy": "2024-05-09T09:50:04.750878Z", "iopub.status.idle": "2024-05-09T09:50:08.564764Z", "shell.execute_reply": "2024-05-09T09:50:08.563973Z", "shell.execute_reply.started": "2024-05-09T09:50:04.751194Z" }, "id": "dqCGT09p3Exm", "outputId": "9fa24d7f-d918-4e0c-d8fb-d806fc43d22d", "trusted": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Attaching 'metadata.json' from model 'keras/bart/keras/bart_large_en_cnn/2' to your Kaggle notebook...\n", "Attaching 'metadata.json' from model 'keras/bart/keras/bart_large_en_cnn/2' to your Kaggle notebook...\n", "Attaching 'tokenizer.json' from model 'keras/bart/keras/bart_large_en_cnn/2' to your Kaggle notebook...\n", "Attaching 'tokenizer.json' from model 'keras/bart/keras/bart_large_en_cnn/2' to your Kaggle notebook...\n", "Attaching 'assets/tokenizer/vocabulary.json' from model 'keras/bart/keras/bart_large_en_cnn/2' to your Kaggle notebook...\n", "Attaching 'assets/tokenizer/merges.txt' from model 'keras/bart/keras/bart_large_en_cnn/2' to your Kaggle notebook...\n" ] } ], "source": [ "preprocessor = keras_nlp.models.BartSeq2SeqLMPreprocessor(\n", " tokenizer=keras_nlp.models.BartTokenizer.from_preset(\"bart_large_en_cnn\"),\n", " encoder_sequence_length=MAX_ENCODER_SEQUENCE_LENGTH,\n", " decoder_sequence_length=MAX_DECODER_SEQUENCE_LENGTH,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-09T09:50:08.566211Z", "iopub.status.busy": "2024-05-09T09:50:08.565919Z", "iopub.status.idle": "2024-05-09T09:50:33.704936Z", "shell.execute_reply": "2024-05-09T09:50:33.704057Z", "shell.execute_reply.started": "2024-05-09T09:50:08.566185Z" }, "id": "kodbIpyw3Exm", "outputId": "bf052e51-2c26-4757-91f0-205e764536fa", "trusted": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Attaching 'metadata.json' from model 'keras/bart/keras/bart_large_en_cnn/2' to your Kaggle notebook...\n", "Attaching 'metadata.json' from model 'keras/bart/keras/bart_large_en_cnn/2' to your Kaggle notebook...\n", "Attaching 'config.json' from model 'keras/bart/keras/bart_large_en_cnn/2' to your Kaggle notebook...\n", "Attaching 'config.json' from model 'keras/bart/keras/bart_large_en_cnn/2' to your Kaggle notebook...\n", "Attaching 'model.weights.h5' from model 'keras/bart/keras/bart_large_en_cnn/2' to your Kaggle notebook...\n" ] }, { "data": { "text/html": [ "
Preprocessor: \"bart_seq2_seq_lm_preprocessor\"\n",
"\n"
],
"text/plain": [
"\u001b[1mPreprocessor: \"bart_seq2_seq_lm_preprocessor\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
"┃ Tokenizer (type) ┃ Vocab # ┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
"│ bart_tokenizer (BartTokenizer) │ 50,265 │\n",
"└────────────────────────────────────────────────────┴─────────────────────────────────────────────────────┘\n",
"\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mTokenizer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Vocab #\u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
"│ bart_tokenizer (\u001b[38;5;33mBartTokenizer\u001b[0m) │ \u001b[38;5;34m50,265\u001b[0m │\n",
"└────────────────────────────────────────────────────┴─────────────────────────────────────────────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Model: \"bart_seq2_seq_lm\"\n",
"\n"
],
"text/plain": [
"\u001b[1mModel: \"bart_seq2_seq_lm\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n",
"┃ Layer (type) ┃ Output Shape ┃ Param # ┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n",
"│ decoder_padding_mask (InputLayer) │ (None, None) │ 0 │\n",
"├───────────────────────────────────────────────┼────────────────────────────────────────┼─────────────────┤\n",
"│ decoder_token_ids (InputLayer) │ (None, None) │ 0 │\n",
"├───────────────────────────────────────────────┼────────────────────────────────────────┼─────────────────┤\n",
"│ encoder_padding_mask (InputLayer) │ (None, None) │ 0 │\n",
"├───────────────────────────────────────────────┼────────────────────────────────────────┼─────────────────┤\n",
"│ encoder_token_ids (InputLayer) │ (None, None) │ 0 │\n",
"├───────────────────────────────────────────────┼────────────────────────────────────────┼─────────────────┤\n",
"│ bart_backbone (BartBackbone) │ {encoder_sequence_output: (None, None, │ 406,286,336 │\n",
"│ │ 1024), decoder_sequence_output: (None, │ │\n",
"│ │ None, 1024)} │ │\n",
"├───────────────────────────────────────────────┼────────────────────────────────────────┼─────────────────┤\n",
"│ token_embedding (ReversibleEmbedding) │ (None, None, 50264) │ 51,470,336 │\n",
"└───────────────────────────────────────────────┴────────────────────────────────────────┴─────────────────┘\n",
"\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n",
"│ decoder_padding_mask (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├───────────────────────────────────────────────┼────────────────────────────────────────┼─────────────────┤\n",
"│ decoder_token_ids (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├───────────────────────────────────────────────┼────────────────────────────────────────┼─────────────────┤\n",
"│ encoder_padding_mask (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├───────────────────────────────────────────────┼────────────────────────────────────────┼─────────────────┤\n",
"│ encoder_token_ids (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├───────────────────────────────────────────────┼────────────────────────────────────────┼─────────────────┤\n",
"│ bart_backbone (\u001b[38;5;33mBartBackbone\u001b[0m) │ {encoder_sequence_output: (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, │ \u001b[38;5;34m406,286,336\u001b[0m │\n",
"│ │ \u001b[38;5;34m1024\u001b[0m), decoder_sequence_output: (\u001b[38;5;45mNone\u001b[0m, │ │\n",
"│ │ \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1024\u001b[0m)} │ │\n",
"├───────────────────────────────────────────────┼────────────────────────────────────────┼─────────────────┤\n",
"│ token_embedding (\u001b[38;5;33mReversibleEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m50264\u001b[0m) │ \u001b[38;5;34m51,470,336\u001b[0m │\n",
"└───────────────────────────────────────────────┴────────────────────────────────────────┴─────────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Total params: 406,286,336 (1.51 GB)\n", "\n" ], "text/plain": [ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m406,286,336\u001b[0m (1.51 GB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Trainable params: 406,286,336 (1.51 GB)\n", "\n" ], "text/plain": [ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m406,286,336\u001b[0m (1.51 GB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Non-trainable params: 0 (0.00 B)\n", "\n" ], "text/plain": [ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "bart_lm = keras_nlp.models.BartSeq2SeqLM(\n", " backbone = keras_nlp.models.BartBackbone.from_preset(\"bart_large_en_cnn\"),\n", " preprocessor=preprocessor,\n", ")\n", "\n", "bart_lm.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-05-09T09:50:33.706356Z", "iopub.status.busy": "2024-05-09T09:50:33.706034Z", "iopub.status.idle": "2024-05-09T09:50:33.744651Z", "shell.execute_reply": "2024-05-09T09:50:33.743964Z", "shell.execute_reply.started": "2024-05-09T09:50:33.706329Z" }, "id": "IGCxbG953Exm", "trusted": true }, "outputs": [], "source": [ "optimizer = tf.keras.optimizers.AdamW(\n", " learning_rate=5e-5,\n", " weight_decay=0.01,\n", " epsilon=1e-6,\n", " global_clipnorm=1.0,\n", ")\n", "\n", "optimizer.exclude_from_weight_decay(var_names=[\"bias\"])\n", "optimizer.exclude_from_weight_decay(var_names=[\"gamma\"])\n", "optimizer.exclude_from_weight_decay(var_names=[\"beta\"])\n", "\n", "loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", "\n", "bart_lm.compile(\n", " optimizer=optimizer,\n", " loss=loss,\n", " weighted_metrics=[\"accuracy\"],\n", ")\n", "\n", "early_stopping = EarlyStopping(\n", " monitor='val_loss',\n", " patience=5,\n", " restore_best_weights=True\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ZZOhqszS3Exn", "outputId": "0b2bd34c-0538-46be-c1de-3a527fdfcf16" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/10\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "\n", "I0000 00:00:1715177964.524744 9643 service.cc:145] XLA service 0x7f126c0400a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n", "\n", "I0000 00:00:1715177964.524819 9643 service.cc:153] StreamExecutor device (0): NVIDIA A100-SXM4-40GB MIG 7g.40gb, Compute Capability 8.0\n", "\n", "2024-05-08 21:19:27.103587: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n", "\n", "W0000 00:00:1715177970.706905 9643 assert_op.cc:38] Ignoring Assert operator compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert\n", "\n", "2024-05-08 21:19:36.820027: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907\n", "\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "\n", "I0000 00:00:1715178075.666352 9762 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_289', 24 bytes spill stores, 24 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178076.165019 9766 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_289', 1456 bytes spill stores, 1840 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178076.234660 9745 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_1185', 296 bytes spill stores, 480 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178076.242935 9769 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_289', 100 bytes spill stores, 100 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178076.981695 9764 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_277', 100 bytes spill stores, 100 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178077.316521 9749 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_1185', 100 bytes spill stores, 100 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178077.366223 9745 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_575', 1456 bytes spill stores, 1840 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178077.611185 9759 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_575', 24 bytes spill stores, 24 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178077.711387 9748 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_1088', 648 bytes spill stores, 944 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178077.901690 9750 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_1186', 648 bytes spill stores, 944 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178078.200723 9762 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_277', 1456 bytes spill stores, 1840 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178078.372503 9755 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_277', 24 bytes spill stores, 24 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178078.789418 9744 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_575', 100 bytes spill stores, 100 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178078.943988 9761 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'triton_gemm_dot_1186', 16 bytes spill stores, 16 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178274.308223 9643 asm_compiler.cc:369] ptxas warning : Registers are spilled to local memory in function 'loop_add_subtract_fusion_125', 116 bytes spill stores, 224 bytes spill loads\n", "\n", "ptxas warning : Registers are spilled to local memory in function 'loop_add_subtract_fusion_124', 8 bytes spill stores, 8 bytes spill loads\n", "\n", "ptxas warning : Registers are spilled to local memory in function 'copy_fusion_49', 136 bytes spill stores, 136 bytes spill loads\n", "\n", "ptxas warning : Registers are spilled to local memory in function 'copy_fusion_43', 244 bytes spill stores, 400 bytes spill loads\n", "\n", "ptxas warning : Registers are spilled to local memory in function 'copy_fusion_42', 3036 bytes spill stores, 2676 bytes spill loads\n", "\n", "ptxas warning : Registers are spilled to local memory in function '__cuda_sm3x_div_rn_noftz_f32_slowpath', 24 bytes spill stores, 24 bytes spill loads\n", "\n", "ptxas warning : Registers are spilled to local memory in function 'copy_fusion_37', 24 bytes spill stores, 24 bytes spill loads\n", "\n", "ptxas warning : Registers are spilled to local memory in function 'copy_fusion_32', 8 bytes spill stores, 8 bytes spill loads\n", "\n", "\n", "\n", "I0000 00:00:1715178274.587572 9643 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m50/50\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 345ms/step - accuracy: 0.4865 - loss: 2.2594" ] }, { "name": "stderr", "output_type": "stream", "text": [ "W0000 00:00:1715178302.055912 9648 assert_op.cc:38] Ignoring Assert operator compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m50/50\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m412s\u001b[0m 762ms/step - accuracy: 0.4868 - loss: 2.2574 - val_accuracy: 0.5168 - val_loss: 2.0858\n", "\n", "Epoch 2/10\n", "\n", "\u001b[1m50/50\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m30s\u001b[0m 569ms/step - accuracy: 0.5760 - loss: 1.7116 - val_accuracy: 0.5122 - val_loss: 2.1271\n", "\n", "Epoch 3/10\n", "\n", "\u001b[1m50/50\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m30s\u001b[0m 571ms/step - accuracy: 0.6440 - loss: 1.3491 - val_accuracy: 0.5041 - val_loss: 2.2663\n", "\n", "Epoch 4/10\n", "\n", "\u001b[1m50/50\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m30s\u001b[0m 570ms/step - accuracy: 0.7051 - loss: 1.0527 - val_accuracy: 0.4910 - val_loss: 2.5058\n", "\n", "Epoch 5/10\n", "\n", "\u001b[1m50/50\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m32s\u001b[0m 597ms/step - accuracy: 0.7595 - loss: 0.8171 - val_accuracy: 0.4903 - val_loss: 2.7663\n", "\n", "Epoch 6/10\n", "\n", "\u001b[1m50/50\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m31s\u001b[0m 574ms/step - accuracy: 0.8056 - loss: 0.6320 - val_accuracy: 0.4936 - val_loss: 2.9751\n" ] }, { "data": { "text/plain": [ "
| \n", " | id | \n", "rouge1_f1 | \n", "rouge2_f1 | \n", "rougeL_f1 | \n", "
|---|---|---|---|---|
| 1108 | \n", "1108 | \n", "0.529293 | \n", "0.190669 | \n", "0.210101 | \n", "
| 1109 | \n", "1109 | \n", "0.518950 | \n", "0.246334 | \n", "0.303207 | \n", "
| 1110 | \n", "1110 | \n", "0.465011 | \n", "0.208617 | \n", "0.257336 | \n", "
| 1111 | \n", "1111 | \n", "0.350711 | \n", "0.109524 | \n", "0.199052 | \n", "
| 1112 | \n", "1112 | \n", "0.454023 | \n", "0.121387 | \n", "0.201149 | \n", "