{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "gpuClass": "standard",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "288c96b917a94903a6a550db86eb3e0e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_535b9ac6e879412285c2e661660d842a",
              "IPY_MODEL_88557cbe4553406794b1ab38d0398e2e",
              "IPY_MODEL_50c7b4aef7c448ca92064226881ffbd1"
            ],
            "layout": "IPY_MODEL_57b31bbd6e934adfbc7e10d882edba0c"
          }
        },
        "535b9ac6e879412285c2e661660d842a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0cb7936baadd4245826a63e9e76009ef",
            "placeholder": "​",
            "style": "IPY_MODEL_ea20bd4e8df84d548f961628980c2bd1",
            "value": ""
          }
        },
        "88557cbe4553406794b1ab38d0398e2e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_38ac1ccc2f7f42399311de83ba19376a",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_008fc9f2c5ab40bd84176926234882b9",
            "value": 0
          }
        },
        "50c7b4aef7c448ca92064226881ffbd1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_559a880554b14200b4b1ab24ab40d41e",
            "placeholder": "​",
            "style": "IPY_MODEL_7df74532ac57413cb55207099dbffaa7",
            "value": " 0/0 [00:00&lt;?, ?it/s]"
          }
        },
        "57b31bbd6e934adfbc7e10d882edba0c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0cb7936baadd4245826a63e9e76009ef": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ea20bd4e8df84d548f961628980c2bd1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "38ac1ccc2f7f42399311de83ba19376a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": "20px"
          }
        },
        "008fc9f2c5ab40bd84176926234882b9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "559a880554b14200b4b1ab24ab40d41e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7df74532ac57413cb55207099dbffaa7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "VtIzUxEFu35b",
        "outputId": "fd9638e1-0374-4bb5-c65d-f3310c83ef76"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Cloning into 'RWKV-LM'...\n",
            "remote: Enumerating objects: 1092, done.\u001b[K\n",
            "remote: Counting objects: 100% (211/211), done.\u001b[K\n",
            "remote: Compressing objects: 100% (81/81), done.\u001b[K\n",
            "remote: Total 1092 (delta 132), reused 185 (delta 130), pack-reused 881\u001b[K\n",
            "Receiving objects: 100% (1092/1092), 5.97 MiB | 30.87 MiB/s, done.\n",
            "Resolving deltas: 100% (663/663), done.\n",
            "--2022-09-21 06:57:37--  https://huggingface.co/BlinkDL/rwkv-4-pile-1b5/resolve/main/RWKV-4-Pile-1B5-20220903-8040.pth\n",
            "Resolving huggingface.co (huggingface.co)... 54.173.5.192, 44.195.102.200, 52.5.62.33, ...\n",
            "Connecting to huggingface.co (huggingface.co)|54.173.5.192|:443... connected.\n",
            "HTTP request sent, awaiting response... 302 Found\n",
            "Location: https://cdn-lfs.huggingface.co/repos/d6/95/d69583b06567422d104d5413e7926ae97bcf0d541619db6e61fe10133d91582d/4e215be3b4f86dc2f145835b47a2c432306c373cbf625375b7721bb474512bad?response-content-disposition=attachment%3B%20filename%3D%22RWKV-4-Pile-1B5-20220903-8040.pth%22 [following]\n",
            "--2022-09-21 06:57:37--  https://cdn-lfs.huggingface.co/repos/d6/95/d69583b06567422d104d5413e7926ae97bcf0d541619db6e61fe10133d91582d/4e215be3b4f86dc2f145835b47a2c432306c373cbf625375b7721bb474512bad?response-content-disposition=attachment%3B%20filename%3D%22RWKV-4-Pile-1B5-20220903-8040.pth%22\n",
            "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 13.226.52.13, 13.226.52.128, 13.226.52.14, ...\n",
            "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|13.226.52.13|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 3030279587 (2.8G) [application/zip]\n",
            "Saving to: ‘./RWKV-LM/RWKV-v4/500.pth’\n",
            "\n",
            "./RWKV-LM/RWKV-v4/5 100%[===================>]   2.82G  81.0MB/s    in 35s     \n",
            "\n",
            "2022-09-21 06:58:12 (82.9 MB/s) - ‘./RWKV-LM/RWKV-v4/500.pth’ saved [3030279587/3030279587]\n",
            "\n"
          ]
        }
      ],
      "source": [
        "!git clone https://github.com/BlinkDL/RWKV-LM.git\n",
        "#!wget https://huggingface.co/BlinkDL/rwkv-4-pile-3b/resolve/main/RWKV-4-Pile-3B-20220915-1207.pth -O ./RWKV-LM/RWKV-v4/500.pth 3B needs more vram then google offers\n",
        "!wget https://huggingface.co/BlinkDL/rwkv-4-pile-1b5/resolve/main/RWKV-4-Pile-1B5-20220903-8040.pth -O ./RWKV-LM/RWKV-v4/500.pth"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%cd ./RWKV-LM/RWKV-v4/"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "g5JdHvg2zjom",
        "outputId": "35230cee-1081-46e9-d9c6-1d682660d61b"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/RWKV-LM/RWKV-v4\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install transformers\n",
        "!pip install ninja"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "F02bGykkyicB",
        "outputId": "fbbb2802-050a-4480-bf99-d08f4c7cd11e"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting transformers\n",
            "  Downloading transformers-4.22.1-py3-none-any.whl (4.9 MB)\n",
            "\u001b[K     |████████████████████████████████| 4.9 MB 4.0 MB/s \n",
            "\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n",
            "Collecting huggingface-hub<1.0,>=0.9.0\n",
            "  Downloading huggingface_hub-0.9.1-py3-none-any.whl (120 kB)\n",
            "\u001b[K     |████████████████████████████████| 120 kB 66.4 MB/s \n",
            "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.8.0)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n",
            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.12.0)\n",
            "Collecting tokenizers!=0.11.3,<0.13,>=0.11.1\n",
            "  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)\n",
            "\u001b[K     |████████████████████████████████| 6.6 MB 48.2 MB/s \n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.6)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.64.1)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2022.6.2)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.9.0->transformers) (4.1.1)\n",
            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.9)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.8.1)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2022.6.15)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
            "Installing collected packages: tokenizers, huggingface-hub, transformers\n",
            "Successfully installed huggingface-hub-0.9.1 tokenizers-0.12.1 transformers-4.22.1\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting ninja\n",
            "  Downloading ninja-1.10.2.3-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (108 kB)\n",
            "\u001b[K     |████████████████████████████████| 108 kB 4.0 MB/s \n",
            "\u001b[?25hInstalling collected packages: ninja\n",
            "Successfully installed ninja-1.10.2.3\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "########################################################################################################\n",
        "# The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM\n",
        "########################################################################################################\n",
        "import numpy as np\n",
        "import math, os\n",
        "import time\n",
        "import types\n",
        "import copy\n",
        "import torch\n",
        "from torch.nn import functional as F\n",
        "from src.utils import TOKENIZER, Dataset\n",
        "torch.backends.cudnn.benchmark = True\n",
        "torch.backends.cudnn.allow_tf32 = True\n",
        "torch.backends.cuda.matmul.allow_tf32 = True\n",
        "np.set_printoptions(precision=4, suppress=True, linewidth=200)\n",
        "\n",
        "########################################################################################################\n",
        "# Step 1: set model\n",
        "# \n",
        "# Set TOKEN_MODE to 'char' or 'bpe' if the model is trained by 'train.py' from scratch.\n",
        "#\n",
        "# Set TOKEN_MODE to 'pile' if you want to test pre-trained pile models.\n",
        "########################################################################################################\n",
        "\n",
        "TOKEN_MODE = 'pile' # char / bpe / pile\n",
        "\n",
        "n_layer = 6\n",
        "n_embd = 512\n",
        "ctx_len = 10024\n",
        "\n",
        "if TOKEN_MODE == 'char':\n",
        "    MODEL_NAME = 'trained-500'  # your trained model\n",
        "    WORD_NAME = 'vocab'         # the .json vocab (generated by train.py)\n",
        "    # set UNKNOWN_CHAR to the rarest token in your vocab.json, and all unknown tokens in your prompt will be denoted by it\n",
        "    UNKNOWN_CHAR = ' '          # here we just set it to ' ' for simplicity\n",
        "\n",
        "elif TOKEN_MODE == 'bpe':\n",
        "    MODEL_NAME = 'trained-500'  # your trained model\n",
        "    WORD_NAME = ['model-vocab.json', 'model-merges.txt'] # [vocab, merge] for your BPE model\n",
        "    UNKNOWN_CHAR = None\n",
        "\n",
        "elif TOKEN_MODE == 'pile':\n",
        "    WORD_NAME = ['20B_tokenizer.json', '20B_tokenizer.json']\n",
        "    UNKNOWN_CHAR = None\n",
        "\n",
        "    #---> you can set MODEL_NAME to your fine-tuned model <---\n",
        "\n",
        "    MODEL_NAME = '500'\n",
        "   \n",
        "    # for 3b\n",
        "    #n_layer = 32\n",
        "    #n_embd = 2560\n",
        "    #ctx_len = 10024\n",
        "\n",
        "    # for 1b5'\n",
        "    n_layer = 24\n",
        "    n_embd = 2048\n",
        "    ctx_len = 1024\n",
        "\n",
        "os.environ['RWKV_FLOAT_MODE'] = 'bf16'  # 'bf16' / 'fp16' / 'fp32' (note: only using fp32 at this moment)\n",
        "os.environ['RWKV_RUN_DEVICE'] = 'cuda'   # 'cpu' (already very fast) or 'cuda'\n",
        "model_type = 'RWKV' # 'RWKV' or 'RWKV-ffnPre'\n",
        "\n",
        "########################################################################################################\n",
        "# Step 2: set prompt & sampling stuffs\n",
        "########################################################################################################\n",
        "\n",
        "# context = 'A'\n",
        "# context = \"\\nIn the\"\n",
        "# context = '\\nSugar:'\n",
        "\n",
        "NUM_TRIALS = 5\n",
        "LENGTH_PER_TRIAL = 3330\n",
        "\n",
        "DEBUG_DEBUG = False  # True False --> show softmax output\n",
        "\n",
        "########################################################################################################\n",
        "\n",
        "print(f'Loading {MODEL_NAME}...')\n",
        "from src.model_run import RWKV_RNN\n",
        "model = RWKV_RNN(MODEL_NAME, os.environ['RWKV_RUN_DEVICE'], model_type, n_layer, n_embd, ctx_len)\n",
        "tokenizer = TOKENIZER(WORD_NAME, UNKNOWN_CHAR=UNKNOWN_CHAR)\n"
      ],
      "metadata": {
        "id": "a7P7ISTa1XgC",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 275,
          "referenced_widgets": [
            "288c96b917a94903a6a550db86eb3e0e",
            "535b9ac6e879412285c2e661660d842a",
            "88557cbe4553406794b1ab38d0398e2e",
            "50c7b4aef7c448ca92064226881ffbd1",
            "57b31bbd6e934adfbc7e10d882edba0c",
            "0cb7936baadd4245826a63e9e76009ef",
            "ea20bd4e8df84d548f961628980c2bd1",
            "38ac1ccc2f7f42399311de83ba19376a",
            "008fc9f2c5ab40bd84176926234882b9",
            "559a880554b14200b4b1ab24ab40d41e",
            "7df74532ac57413cb55207099dbffaa7"
          ]
        },
        "outputId": "af73daec-3a23-4bfb-b960-6feccb5436b1"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Loading 500...\n",
            "\n",
            "RWKV_HEAD_QK_DIM 0\n",
            "\n",
            "Using /root/.cache/torch_extensions/py37_cu113 as PyTorch extensions root...\n",
            "Creating extension directory /root/.cache/torch_extensions/py37_cu113/wkv...\n",
            "Detected CUDA files, patching ldflags\n",
            "Emitting ninja build file /root/.cache/torch_extensions/py37_cu113/wkv/build.ninja...\n",
            "Building extension module wkv...\n",
            "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n",
            "Loading extension module wkv...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Moving 0 files to the new cache system\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "0it [00:00, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "288c96b917a94903a6a550db86eb3e0e"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!nvidia-smi"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "8Qw1y-qAwTzU",
        "outputId": "807eeb47-4d59-4616-d869-22c2cc297621"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Wed Sep 21 06:59:29 2022       \n",
            "+-----------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |\n",
            "|-------------------------------+----------------------+----------------------+\n",
            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
            "|                               |                      |               MIG M. |\n",
            "|===============================+======================+======================|\n",
            "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   50C    P0    28W /  70W |   9282MiB / 15109MiB |      0%      Default |\n",
            "|                               |                      |                  N/A |\n",
            "+-------------------------------+----------------------+----------------------+\n",
            "                                                                               \n",
            "+-----------------------------------------------------------------------------+\n",
            "| Processes:                                                                  |\n",
            "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
            "|        ID   ID                                                   Usage      |\n",
            "|=============================================================================|\n",
            "+-----------------------------------------------------------------------------+\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "########################################################################################################\n",
        "context = \"You are an AI running a house.\\ngiven the following commands: volumeUp(int amount),volumeDown(int amount),setVolume(int percent),setLights([r,g,b]),playSong(string url)\\nand the given instruction 'Please make the room romantic'\\nList the commands, and the parameters they should have, that should be done to fullfil the command\\nGive the commands in the format [command(parameter)]\\n\\nTask: list the commands and a reasonable value for the parameter\\nResponse:\"\n",
        "\n",
        "TEMPERATURE = 0.9\n",
        "top_p = 0.8\n",
        "top_p_newline = 0.9 # only used in TOKEN_MODE = char\n",
        "\n",
        "if tokenizer.charMode:\n",
        "    context = tokenizer.refine_context(context)\n",
        "    ctx = [tokenizer.stoi.get(s, tokenizer.UNKNOWN_CHAR) for s in context]\n",
        "else:\n",
        "    ctx = tokenizer.tokenizer.encode(context)\n",
        "src_len = len(ctx)\n",
        "src_ctx = ctx.copy()\n",
        "\n",
        "print('\\nYour prompt has ' + str(src_len) + ' tokens.')\n",
        "print('\\n--> Currently the first run takes a while if your prompt is long, as we are using RNN to process the prompt. Use GPT to build the hidden state for better speed. <--\\n')\n",
        "\n",
        "for TRIAL in range(1 if DEBUG_DEBUG else NUM_TRIALS):\n",
        "    t_begin = time.time_ns()\n",
        "    print(('-' * 30) + context, end='')\n",
        "    ctx = src_ctx.copy()\n",
        "    model.clear()\n",
        "    if TRIAL == 0:\n",
        "        init_state = types.SimpleNamespace()\n",
        "        for i in range(src_len):\n",
        "            x = ctx[:i+1]\n",
        "            if i == src_len - 1:\n",
        "                init_state.out = model.run(x)\n",
        "            else:\n",
        "                model.run(x)\n",
        "        model.save(init_state)\n",
        "    else:\n",
        "        model.load(init_state)\n",
        "\n",
        "    for i in range(src_len, src_len + (1 if DEBUG_DEBUG else LENGTH_PER_TRIAL)):\n",
        "        x = ctx[:i+1]\n",
        "        x = x[-ctx_len:]\n",
        "\n",
        "        if i == src_len:\n",
        "            out = copy.deepcopy(init_state.out)\n",
        "        else:\n",
        "            out = model.run(x)\n",
        "        if DEBUG_DEBUG:\n",
        "            print('model', np.array(x), '==>', np.array(\n",
        "                out), np.max(out), np.min(out))\n",
        "\n",
        "        if TOKEN_MODE == 'pile':\n",
        "            out[0] = -999999999  # disable <|endoftext|>\n",
        "\n",
        "        char = tokenizer.sample_logits(out, x, ctx_len, temperature=TEMPERATURE,\n",
        "                                       top_p_usual=top_p, top_p_newline=top_p_newline)\n",
        "        char = char.item()\n",
        "        if tokenizer.charMode:\n",
        "            print(tokenizer.itos[int(char)], end='', flush=True)\n",
        "        else:\n",
        "            print(tokenizer.tokenizer.decode(int(char)), end='', flush=True)\n",
        "        ctx += [char]\n",
        "\n",
        "    t_end = time.time_ns()\n",
        "    print(\"\\n----------\", round((t_end - t_begin) / (10 ** 9), 2), end='s ')\n"
      ],
      "metadata": {
        "id": "Co9eLstRwRZ_",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 990
        },
        "outputId": "010231cc-abfe-4356-ff11-1ec3abac295e"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "Your prompt has 110 tokens.\n",
            "\n",
            "--> Currently the first run takes a while if your prompt is long, as we are using RNN to process the prompt. Use GPT to build the hidden state for better speed. <--\n",
            "\n",
            "------------------------------You are an AI running a house.\n",
            "given the following commands: volumeUp(int amount),volumeDown(int amount),setVolume(int percent),setLights([r,g,b]),playSong(string url)\n",
            "and the given instruction 'Please make the room romantic'\n",
            "List the commands, and the parameters they should have, that should be done to fullfil the command\n",
            "Give the commands in the format [command(parameter)]\n",
            "\n",
            "Task: list the commands and a reasonable value for the parameter\n",
            "Response:\n",
            "\n",
            "answers:\n",
            "-  answer 1:\n",
            "-  answer 2:\n",
            "-  answer 3:\n",
            "-  answer 4:\n",
            "-  answer 5:\n",
            "\n",
            "The first two commands (room.increase volume and command.setVolume) work.\n",
            "\n",
            "But the others do not. The command \"volumeUp(int percent)\" seems to do nothing.\n",
            "I have tried to change the command by \"volumeUp(1)\" but the result is still the same.\n",
            "\n",
            "What command should I use to set the volume to full?\n",
            "\n",
            "I have an idea about what happens in the first command, but I don't know how it works.\n",
            "This is the full code of the game:\n",
            "#include <iostream>\n",
            "#include <vector>\n",
            "#include <list>\n",
            "#include <string>\n",
            "#include <"
          ]
        },
        {
          "output_type": "error",
          "ename": "KeyboardInterrupt",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-7-ce677f0ef442>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     41\u001b[0m             \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdeepcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minit_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     42\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 43\u001b[0;31m             \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     44\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mDEBUG_DEBUG\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     45\u001b[0m             print('model', np.array(x), '==>', np.array(\n",
            "\u001b[0;32m/content/RWKV-LM/RWKV-v4/src/model_run.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, ctx)\u001b[0m\n\u001b[1;32m    365\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    366\u001b[0m                 \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSA\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mln1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34mf'att.{i}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 367\u001b[0;31m             \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFF\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mln2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mffn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34mf'ffn.{i}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    368\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    369\u001b[0m         \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mln_out\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/content/RWKV-LM/RWKV-v4/src/model_run.py\u001b[0m in \u001b[0;36mLN\u001b[0;34m(self, xx, w)\u001b[0m\n\u001b[1;32m    301\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    302\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mLN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 303\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayer_norm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_embd\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    304\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    305\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mFF\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mlayer_norm\u001b[0;34m(input, normalized_shape, weight, bias, eps)\u001b[0m\n\u001b[1;32m   2501\u001b[0m             \u001b[0mlayer_norm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnormalized_shape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0meps\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2502\u001b[0m         )\n\u001b[0;32m-> 2503\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayer_norm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnormalized_shape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meps\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackends\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcudnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menabled\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2504\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2505\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
          ]
        }
      ]
    }
  ]
}