{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "ca5fe93f-2da8-47ed-ade2-cf2a3810d53e",
   "metadata": {},
   "source": [
    "# Finetuning falcon-1b with Axolotl+QLoRA on RunPod\n",
    "\n",
    "This notebook makes it easy to try out finetuning falcon-1b with Axolotl+QLo on RunPod.\n",
    "\n",
    "If you run into any issues, welcome to report [here](https://github.com/OpenAccess-AI-Collective/axolotl/pull/132) .\n",
    "\n",
    "To run this notebook on RunPod, use [this RunPod template](https://runpod.io/gsc?template=tkb65a1zcb&ref=km0th85l) to deploy a pod with GPU."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "539e223c",
   "metadata": {},
   "source": [
    "## Step 1. Generate default config for accelerate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "650cb9dd-d311-40c1-8bca-549b81b6da52",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/workspace/axolotl\n"
     ]
    }
   ],
   "source": [
    "%cd axolotl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "164de0ff-6b77-4a22-9a90-57931c9246a7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Setting ds_accelerator to cuda (auto detect)\n",
      "accelerate configuration saved at /root/.cache/huggingface/accelerate/default_config.yaml\n"
     ]
    }
   ],
   "source": [
    "!accelerate config --config_file configs/accelerate/default_config.yaml default"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2. Use a well-tested falcon-7b qlora config and adjust it to 1b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d3a4401f-36a7-40a7-8f09-bac9cb19bab4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2023-06-05 14:10:49--  https://raw.githubusercontent.com/utensil/axolotl/falcon-7b-qlora/examples/falcon/config-7b-qlora.yml\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.109.133, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 2238 (2.2K) [text/plain]\n",
      "Saving to: ‘config-7b-qlora.yml’\n",
      "\n",
      "config-7b-qlora.yml 100%[===================>]   2.19K  --.-KB/s    in 0s      \n",
      "\n",
      "2023-06-05 14:10:49 (30.8 MB/s) - ‘config-7b-qlora.yml’ saved [2238/2238]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!wget https://raw.githubusercontent.com/utensil/axolotl/falcon-7b-qlora/examples/falcon/config-7b-qlora.yml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "518fd376-f42f-4bd6-8f6a-de43433e9848",
   "metadata": {},
   "outputs": [],
   "source": [
    "!cp config-7b-qlora.yml config-1b-qlora.yml\n",
    "!sed -i -e 's/falcon-7b/falcon-rw-1b/g' -e 's/wandb_project: falcon-qlora/wandb_project: /g' config-1b-qlora.yml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "21ab97e9-b1e0-402f-bfb1-e2846395dea1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# 1b: tiiuae/falcon-rw-1b\n",
      "# 40b: tiiuae/falcon-40b\n",
      "base_model: tiiuae/falcon-rw-1b\n",
      "base_model_config: tiiuae/falcon-rw-1b\n",
      "# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-rw-1b/tree/main \n",
      "trust_remote_code: true\n",
      "model_type: AutoModelForCausalLM\n",
      "tokenizer_type: AutoTokenizer\n",
      "load_in_8bit: false\n",
      "# enable 4bit for QLoRA\n",
      "load_in_4bit: true\n",
      "gptq: false\n",
      "strict: false\n",
      "push_dataset_to_hub:\n",
      "datasets:\n",
      "  - path: QingyiSi/Alpaca-CoT\n",
      "    data_files:\n",
      "      - Chain-of-Thought/formatted_cot_data/gsm8k_train.json\n",
      "    type: \"alpaca:chat\"\n",
      "dataset_prepared_path: last_run_prepared\n",
      "val_set_size: 0.01\n",
      "# enable QLoRA\n",
      "adapter: qlora\n",
      "lora_model_dir:\n",
      "sequence_len: 2048\n",
      "max_packed_sequence_len:\n",
      "\n",
      "# hyperparameters from QLoRA paper Appendix B.2\n",
      "# \"We find hyperparameters to be largely robust across datasets\"\n",
      "lora_r: 64\n",
      "lora_alpha: 16\n",
      "# 0.1 for models up to 13B\n",
      "# 0.05 for 33B and 65B models\n",
      "lora_dropout: 0.05\n",
      "# add LoRA modules on all linear layers of the base model\n",
      "lora_target_modules:\n",
      "lora_target_linear: true\n",
      "lora_fan_in_fan_out:\n",
      "\n",
      "wandb_project: \n",
      "wandb_watch:\n",
      "wandb_run_id:\n",
      "wandb_log_model:\n",
      "output_dir: ./qlora-out\n",
      "\n",
      "# QLoRA paper Table 9\n",
      "# - 16 for 7b & 13b\n",
      "# - 32 for 33b, 64 for 64b\n",
      "# Max size tested on A6000\n",
      "# - 7b: 40\n",
      "# - 40b: 4\n",
      "# decrease if OOM, increase for max VRAM utilization\n",
      "micro_batch_size: 1\n",
      "gradient_accumulation_steps: 2\n",
      "num_epochs: 3\n",
      "# Optimizer for QLoRA\n",
      "optimizer: paged_adamw_32bit\n",
      "torchdistx_path:\n",
      "lr_scheduler: cosine\n",
      "# QLoRA paper Table 9\n",
      "# - 2e-4 for 7b & 13b\n",
      "# - 1e-4 for 33b & 64b\n",
      "learning_rate: 0.0002\n",
      "train_on_inputs: false\n",
      "group_by_length: false\n",
      "bf16: true\n",
      "fp16: false\n",
      "tf32: true\n",
      "gradient_checkpointing: true\n",
      "# stop training after this many evaluation losses have increased in a row\n",
      "# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback\n",
      "early_stopping_patience: 3\n",
      "resume_from_checkpoint:\n",
      "auto_resume_from_checkpoints: true\n",
      "local_rank:\n",
      "logging_steps: 1\n",
      "xformers_attention: true\n",
      "flash_attention:\n",
      "gptq_groupsize:\n",
      "gptq_model_v1:\n",
      "warmup_steps: 10\n",
      "eval_steps: 5\n",
      "save_steps: 10\n",
      "debug:\n",
      "deepspeed:\n",
      "weight_decay: 0.000001\n",
      "fsdp:\n",
      "fsdp_config:\n",
      "special_tokens:\n",
      "  pad_token: \"<|endoftext|>\"\n",
      "  bos_token: \">>ABSTRACT<<\"\n",
      "  eos_token: \"<|endoftext|>\"\n"
     ]
    }
   ],
   "source": [
    "!cat config-1b-qlora.yml"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 3. Set W&B to offline mode"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "ee36f84c-3538-4272-88a7-fe475e9d5fbe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "env: WANDB_MODE=offline\n"
     ]
    }
   ],
   "source": [
    "%env WANDB_MODE=offline"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is to skip some extra setup steps, you can also choose to login to your W&B account  before training."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 4. Start training and enjoy!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba26536c-576f-43a5-b67e-d1b2996607b6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Setting ds_accelerator to cuda (auto detect)\n",
      "\n",
      "===================================BUG REPORT===================================\n",
      "Welcome to bitsandbytes. For bug reports, please run\n",
      "\n",
      "python -m bitsandbytes\n",
      "\n",
      " and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n",
      "================================================================================\n",
      "bin /root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/libbitsandbytes_cuda118.so\n",
      "/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/usr/local/nvidia/lib'), PosixPath('/usr/local/nvidia/lib64')}\n",
      "  warn(msg)\n",
      "/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/cuda_setup/main.py:149: UserWarning: /usr/local/nvidia/lib:/usr/local/nvidia/lib64 did not contain ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] as expected! Searching further paths...\n",
      "  warn(msg)\n",
      "/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('module'), PosixPath('//matplotlib_inline.backend_inline')}\n",
      "  warn(msg)\n",
      "/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDvnE4umHheXhWsDJbbukYvvyc47/mC4z8syS93btA72T90WDrQagOy5O+DrhdXOvr5i/JwsTlAImy57eLRrtRFOrQq73jyi7Dzo0tvrAiNLVgX2q2dFLoplRyXDXiVYLPmPieMWQOeUCLeSb8FC5zzllcocZwjMXpxScDerZqnlAR0ccpSkGyKIod4ZMkn/29A/C5kHEb/wT8cOAq+MWJ/2okZZgbiR0AMV4DynAkrtcx9JnJnTs9chiMyH+dyCS42Ai24sHWJBkQo6TfxXkyKo9GOpu3Y2WLgrHyaot9Lk5mA1mujyIWdlReD2nvjeCQKjl3KW3xZ73m4nD97MydWSWoJfEWlr+VZvk8EWsZk3CYLZCIBLdod6xXJJ0DD0pvTIq11c8VB7XkgVjapuU/sC8M6HFzHW/NBeE+xX/txPkZkIGqrnxeQ0AtBXdN9ukyNGhGzTkPYJNliiYpY0dCvVuz/BJ2FawFTQGnD1EHOenUCRajREFGCbKoYZqi40j8= utensil@Utensils-MacBook-Pro.local')}\n",
      "  warn(msg)\n",
      "CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...\n",
      "/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/cuda_setup/main.py:149: UserWarning: Found duplicate ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] files: {PosixPath('/usr/local/cuda/lib64/libcudart.so'), PosixPath('/usr/local/cuda/lib64/libcudart.so.11.0')}.. We'll flip a coin and try one of these, in order to fail forward.\n",
      "Either way, this might cause trouble in the future:\n",
      "If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.\n",
      "  warn(msg)\n",
      "CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so\n",
      "CUDA SETUP: Highest compute capability among GPUs detected: 8.6\n",
      "CUDA SETUP: Detected CUDA version 118\n",
      "CUDA SETUP: Loading binary /root/miniconda3/envs/py3.9/lib/python3.9/site-packages/bitsandbytes-0.39.0-py3.9.egg/bitsandbytes/libbitsandbytes_cuda118.so...\n",
      "Setting ds_accelerator to cuda (auto detect)\n",
      "WARNING:root:`trust_remote_code` is set to true. Please make sure that you reviewed the remote code/model.\n",
      "INFO:root:loading tokenizer... tiiuae/falcon-rw-1b\n",
      "Downloading (…)okenizer_config.json: 100%|█████| 255/255 [00:00<00:00, 48.1kB/s]\n",
      "Downloading (…)olve/main/vocab.json: 100%|███| 798k/798k [00:00<00:00, 54.2MB/s]\n",
      "Downloading (…)olve/main/merges.txt: 100%|███| 456k/456k [00:00<00:00, 11.4MB/s]\n",
      "Downloading (…)/main/tokenizer.json: 100%|█| 2.11M/2.11M [00:00<00:00, 51.1MB/s]\n",
      "Downloading (…)cial_tokens_map.json: 100%|████| 99.0/99.0 [00:00<00:00, 200kB/s]\n",
      "Using pad_token, but it is not set yet.\n",
      "INFO:root:Unable to find prepared dataset in last_run_prepared/0ecc5b78e3ce4254b22e749b093712b4\n",
      "INFO:root:Loading raw datasets...\n",
      "Downloading readme: 100%|██████████████████| 8.26k/8.26k [00:00<00:00, 23.9MB/s]\n",
      "Downloading and preparing dataset json/QingyiSi--Alpaca-CoT to /root/.cache/huggingface/datasets/QingyiSi___json/QingyiSi--Alpaca-CoT-2953efcfeb19f105/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4...\n",
      "Downloading data files:   0%|                             | 0/1 [00:00<?, ?it/s]\n",
      "Downloading data:   0%|                             | 0.00/4.45M [00:00<?, ?B/s]\u001b[A\n",
      "Downloading data: 100%|████████████████████| 4.45M/4.45M [00:00<00:00, 25.2MB/s]\u001b[A\n",
      "Downloading data files: 100%|█████████████████████| 1/1 [00:00<00:00,  2.18it/s]\n",
      "Extracting data files: 100%|████████████████████| 1/1 [00:00<00:00, 2031.14it/s]\n",
      "Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/QingyiSi___json/QingyiSi--Alpaca-CoT-2953efcfeb19f105/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4. Subsequent calls will reuse this data.\n",
      "100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 830.88it/s]\n",
      "INFO:root:tokenizing, merging, and shuffling master dataset\n",
      "INFO:root:Saving merged prepared dataset to disk... last_run_prepared/0ecc5b78e3ce4254b22e749b093712b4\n",
      "INFO:root:loading model and peft_config...                                      \n",
      "Downloading (…)lve/main/config.json: 100%|██████| 665/665 [00:00<00:00, 452kB/s]\n",
      "Downloading (…)/configuration_RW.py: 100%|█| 2.61k/2.61k [00:00<00:00, 3.79MB/s]\n",
      "A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-rw-1b:\n",
      "- configuration_RW.py\n",
      ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n",
      "Downloading (…)main/modelling_RW.py: 100%|█| 47.5k/47.5k [00:00<00:00, 48.6MB/s]\n",
      "A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-rw-1b:\n",
      "- modelling_RW.py\n",
      ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n",
      "Downloading pytorch_model.bin: 100%|████████| 2.62G/2.62G [00:23<00:00, 113MB/s]\n",
      "Downloading (…)neration_config.json: 100%|█████| 111/111 [00:00<00:00, 28.3kB/s]\n",
      "INFO:root:converting PEFT model w/ prepare_model_for_int8_training\n",
      "/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/peft/utils/other.py:76: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.\n",
      "  warnings.warn(\n",
      "INFO:root:found linear modules: ['dense_h_to_4h', 'dense', 'dense_4h_to_h', 'query_key_value']\n",
      "trainable params: 50331648 || all params: 757911552 || trainable%: 6.6408339953630895\n",
      "INFO:root:Compiling torch model\n",
      "INFO:root:Pre-saving adapter config to ./qlora-out\n",
      "INFO:root:Starting trainer...\n",
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.3\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: W&B syncing is set to \u001b[1m`offline`\u001b[0m in this directory.  \n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb online`\u001b[0m or set \u001b[1mWANDB_MODE=online\u001b[0m to enable cloud syncing.\n",
      "  0%|                                                 | 0/11097 [00:00<?, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
      "{'loss': 1.8589, 'learning_rate': 2e-05, 'epoch': 0.0}                          \n",
      "{'loss': 1.6525, 'learning_rate': 4e-05, 'epoch': 0.0}                          \n",
      "{'loss': 1.6585, 'learning_rate': 6e-05, 'epoch': 0.0}                          \n",
      "{'loss': 1.8112, 'learning_rate': 8e-05, 'epoch': 0.0}                          \n",
      "{'loss': 1.7954, 'learning_rate': 0.0001, 'epoch': 0.0}                         \n",
      "  0%|                                       | 5/11097 [00:02<1:25:34,  2.16it/s]\n",
      "  0%|                                                    | 0/75 [00:00<?, ?it/s]\u001b[A\n",
      " 97%|█████████████████████████████████████████▊ | 73/75 [00:03<00:00, 19.84it/s]\u001b[A\n",
      "                                                                                \u001b[A\n",
      "\u001b[A{'eval_loss': 1.9517160654067993, 'eval_runtime': 3.8321, 'eval_samples_per_second': 19.572, 'eval_steps_per_second': 19.572, 'epoch': 0.0}\n"
     ]
    }
   ],
   "source": [
    "!accelerate launch scripts/finetune.py config-1b-qlora.yml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "96b7c709-4ae5-4146-a879-89809a60494b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}