{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"32e7669cd82042cbbb419e25db606c1d":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b6698be32bf74c4087e129fab6e13fdd","IPY_MODEL_ff7333b35c1c472482df6550f6e43be2","IPY_MODEL_da4df56a1ba440dbb69087d0019cab1d"],"layout":"IPY_MODEL_ad598693c58549e0a83a1328d77b8f83"}},"b6698be32bf74c4087e129fab6e13fdd":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_de2f7a60851f4681877a4c8dccba29cc","placeholder":"​","style":"IPY_MODEL_02b296efbff143f4bfbb904cbc7b1109","value":"Loading checkpoint shards: 100%"}},"ff7333b35c1c472482df6550f6e43be2":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_72ac83e43e2b4d4498070a5b701a5572","max":3,"min":0,"orientation":"horizontal","style":"IPY_MODEL_320fa615d4de4652ac34fc2518f7749e","value":3}},"da4df56a1ba440dbb69087d0019cab1d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_75280ef205a245be92da268e0752dc71","placeholder":"​","style":"IPY_MODEL_3f33eabd6f7f46ef8138abe748d8fbb1","value":" 3/3 [01:06<00:00, 18.14s/it]"}},"ad598693c58549e0a83a1328d77b8f83":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"de2f7a60851f4681877a4c8dccba29cc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"02b296efbff143f4bfbb904cbc7b1109":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"72ac83e43e2b4d4498070a5b701a5572":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"320fa615d4de4652ac34fc2518f7749e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"75280ef205a245be92da268e0752dc71":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3f33eabd6f7f46ef8138abe748d8fbb1":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"markdown","source":["**This notebook is based on the example provided by Google on how to fine-tune the [Gemma-7B](https://huggingface.co/google/gemma-7b) model, as found in this [example notebook](https://github.com/huggingface/notebooks/blob/main/peft/gemma_7b_english_quotes.ipynb).**\n","\n","**此筆記乃基於 Google 所提供如何微調 [Gemma-7B](https://huggingface.co/google/gemma-7b) 模型的[範例筆記](https://github.com/huggingface/notebooks/blob/main/peft/gemma_7b_english_quotes.ipynb)所寫成。**"],"metadata":{"id":"m5mIsLyNgZUi"}},{"cell_type":"markdown","source":["# **How to Fine-Tune Large Language Models (LLMs) Efficiently and Cost-Effectively?**\n","In recent years, Large Language Models (LLM) have attracted significant attention due to their ability to solve a wide range of problems and their outstanding performance. These models are typically trained using massive datasets and huge numbers of parameters. Many big-tech companies offer pre-trained models, called base or foundational models, but to utilize them in specific domains, fine-tuning is required. Although ChatGPT offers online model fine-tuning features, users may prefer to fine-tune models in a local environment for privacy or customization reasons.\n","\n","Fine-tuning large models primarily falls into the following two methods:\n","\n","1. Full Parameters Fine-Tuning: Adjusts all parameters of the pre-trained large model. However, due to the massive number of parameters in large models, full parameters fine-tuning requires substantial computational resources, making it impractical for many users.\n","2. Parameter-Efficient Fine-Tuning (PEFT): A notable example of PEFT is the Low-Rank Adaptation (LoRA) proposed by the Microsoft team. The concept of LoRA involves freezing the original pre-trained model parameters and fine-tuning a small number of additional parameters (which can be viewed as plugins or patches). Since the original pre-trained model remains unchanged, the training cost is significantly reduced, achieving efficient transfer learning.\n","\n","# **如何以較少成本有效地微調大型語言模型 (Large Language Models, LLMs)?**\n","近年來,大型語言模型(LLM)因可解決廣泛問題及其優秀表現而受矚目。這些模型通常利用大量數據集和龐大的模型參數進行訓練。許多大型科技公司提供預訓練好的基礎模型(Base/Foundational Model),但如要用於特定領域,則需要透過微調(Fine-Tuning)來調整模型。儘管ChatGPT提供了線上微調模型功能,但出於隱私或自定義需求等各種原因,用戶或希望在本地環境中對模型進行微調。\n","\n","微調大型模型主要分為以下兩種方法:\n","\n","1. 全量微調 (Full Parameters Fine-Tuning):在預訓練的大型模型基礎上調整所有參數,但由於大型模型參數過多,全量微調需要大量的計算資源,對許多用戶來說不切實際。\n","2. 參數高效微調 (Parameter-Efficient Fine-Tuning, PEFT):PEFT的一個著名例子是Microsoft團隊提出的Low-Rank Adaptation(LoRA)。LoRA的概念是通過凍結原始的預訓練模型參數,並搭配數據微調少部分額外參數(可以視為插件或補丁)。由於原始預訓練模型保持不變,因此訓練成本大幅降低,實現高效的遷移學習(Transfer Learning)。\n","\n","## **參考文獻 / References**\n","[Hu et al., \"LoRA: Low-Rank Adaptation of Large Language Models,\" In ICLR 2021.](https://arxiv.org/abs/2106.09685)"],"metadata":{"collapsed":false,"id":"V2uc328LfvC1"}},{"cell_type":"markdown","source":["## **Tutorial: Fine-Tuning Large Models with Hugging Face**\n","In this tutorial, we will use Google Gemma as our foundational model to demonstrate how to fine-tune models using Hugging Face. Although Google Gemma is publicly available, specific conditions must be accepted for its use. Please obtain permission and a Token [here](https://huggingface.co/google/gemma-7b), and then store this Token in the environment variable `[\"HF_TOKEN\"]`.\n","\n","## **教學:使用 Hugging Face 微調大型模型**\n","在本教學中,我們將採用Google Gemma作為基礎模型來展示如何使用Hugging Face進行微調。Google Gemma雖然公開,但需接受特定條件才能使用。請在[此](https://huggingface.co/google/gemma-7b)獲取許可和Token,然後將這個Token儲存在 `[\"HF_TOKEN\"]` 環境變數中。"],"metadata":{"collapsed":false,"id":"T9FzqhZGfvC3"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"mi50mprVsU_P"},"outputs":[],"source":["import os\n","# from google.colab import userdata\n","# os.environ[\"HF_TOKEN\"] = userdata.get('HF_TOKEN')\n","os.environ[\"HF_TOKEN\"] = \"API_TOKEN\""]},{"cell_type":"markdown","source":["### **Common Libraries used During fine-tuning with Hugging Face**\n","\n","* `transformers`: The core of Hugging Face, facilitating the use of state-of-the-art pre-trained models.\n","* `datasets`: Provides common datasets.\n","* `bitsandbytes`: Offers quantization functionality, helping to reduce model memory usage and improve computational efficiency.\n","* `accelerate`: Speeds up model computation.\n","* `trl` and `peft`: Transformer Reinforcement Learning & Parameter-Efficient Fine-Tuning. Offer efficient model fine-tuning capabilities.\n","\n","### **在使用 Hugging Face 進行大型模型微調過程中,以下是幾個常用且重要的程式庫:**\n","\n","* `transformers`: Hugging Face的核心,可方便使用最先進的預訓練模型\n","* `datasets`: 提供常用數據集\n","* `bitsandbytes`: 提供量化(quantization)功能,幫助減少模型的內存使用量並提升運行效率。\n","* `accelerate`: 加速模型計算。\n","* `trl` 和 `peft`: Transformer Reinforcement Learning & Parameter-Efficient Fine-Tuning. 提供高效的模型微調功能"],"metadata":{"collapsed":false,"id":"d8OEpFqbfvC5"}},{"cell_type":"code","source":["# !pip3 install -q -U transformers==4.38.1\n","# !pip3 install -q -U datasets==2.17.0\n","# !pip3 install -q -U bitsandbytes==0.42.0\n","# !pip3 install -q -U accelerate==0.27.1\n","# !pip3 install -q -U trl==0.7.10\n","# !pip3 install -q -U peft==0.8.2"],"metadata":{"id":"-5gJk3W_s0RY"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["### **What is Quantization?**\n","Quantization reduces the model size and speeds up computation by converting model parameters into a lower precision format. Proper quantization can maintain model performance with minimal impact while saving memory. However, excessive quantization may lead to a decrease in model performance. In essence, quantization stores model parameters in fewer bits, reducing computational complexity and memory space (most computations involve matrix multiplication, essentially multiplication).\n","\n","* `bnb_config`: Optional, quantization configuration options.\n","* `tokenizer`: Converts text into numbers, a format that the model can understand.\n","* `model`'s `device_map`: Specifies the device for running the model, where `0` represents GPU 0.\n","\n","### **何謂量化 (Quantization)?**\n","量化將模型參數轉換成低精度格式來減少模型大小並加速運算。適當的量化可以保持模型性能不受太大影響同時節省記憶體。但過度量化可能會導致模型性能下降。簡而言之,量化用更少位元去儲存模型參數,可減少運算複雜性和記憶空間(大部份是運算是矩陣乘法,本質上是乘法)。\n","\n","* `bnb_config`: 可選,量化的配置選項\n","* `tokenizer`: 將文本轉換成數字,即模型可以理解的格式\n","* `model` 的 `device_map`: 指定模型運行的裝置,`0`代表GPU 0\n","\n"],"metadata":{"collapsed":false,"id":"XpYHjv1YfvC5"}},{"cell_type":"code","source":["import torch\n","from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer\n","\n","model_id = \"google/gemma-7b\"\n","bnb_config = BitsAndBytesConfig(\n"," load_in_4bit=True,\n"," bnb_4bit_quant_type=\"nf4\",\n"," bnb_4bit_compute_dtype=torch.bfloat16\n",")\n","\n","tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_TOKEN'])\n","model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={\"\":0}, token=os.environ['HF_TOKEN'])"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":49,"referenced_widgets":["32e7669cd82042cbbb419e25db606c1d","b6698be32bf74c4087e129fab6e13fdd","ff7333b35c1c472482df6550f6e43be2","da4df56a1ba440dbb69087d0019cab1d","ad598693c58549e0a83a1328d77b8f83","de2f7a60851f4681877a4c8dccba29cc","02b296efbff143f4bfbb904cbc7b1109","72ac83e43e2b4d4498070a5b701a5572","320fa615d4de4652ac34fc2518f7749e","75280ef205a245be92da268e0752dc71","3f33eabd6f7f46ef8138abe748d8fbb1","67b28eca64644c8ba5265ee57f9b6594"]},"id":"EVEotZX8s-v6","outputId":"e378234f-f56f-483e-c569-f3a196c02370"},"execution_count":null,"outputs":[{"data":{"text/plain":"Loading checkpoint shards: 0%| | 0/4 [00:00 Example: `\"Quote: Be yourself; everyone else is already taken.\\nAuthor: Oscar Wilde\"`\n","\n","Thus, during fine-tuning, the data will be trained in this format, enabling the model to learn and mimic this specific output format.\n","\n","\n","### **設定輸出格式**\n","在`SFTTrainer`的設定中,`formatting_func`允許你自定義輸出資料的格式,並應是你希望模型學習生成文字的格式。舉例來說,如果我們希望模型輸出的格式是一段引用加上作者名字,我們可以將`quote`和`author`的資料處理成以下格式:\n","\n","> 例如:`\"Quote: Be yourself; everyone else is already taken.\\nAuthor: Oscar Wilde\"`\n","\n","即微調模型時,資料會以此格式的數據去訓練,使模型學習並模仿這種特定的輸出格式。"],"metadata":{"collapsed":false,"id":"cdcQiwoJfvC7"}},{"cell_type":"code","source":["import transformers\n","from trl import SFTTrainer\n","\n","def formatting_func(example):\n"," text = f\"Quote: {example['quote'][0]}\\nAuthor: {example['author'][0]}\"\n"," return [text]\n","\n","trainer = SFTTrainer(\n"," model=model,\n"," train_dataset=data[\"train\"],\n"," args=transformers.TrainingArguments(\n"," per_device_train_batch_size=1,\n"," gradient_accumulation_steps=4,\n"," warmup_steps=2,\n"," max_steps=10,\n"," learning_rate=2e-4,\n"," fp16=True,\n"," logging_steps=1,\n"," output_dir=\"outputs\",\n"," optim=\"paged_adamw_8bit\"\n"," ),\n"," peft_config=lora_config,\n"," formatting_func=formatting_func,\n",")\n","trainer.train()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":530},"id":"HFbR2FIgVfiT","outputId":"ba27fbda-54be-415c-ee47-78632e4ad4c6"},"execution_count":null,"outputs":[{"name":"stderr","output_type":"stream","text":["Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n","C:\\Users\\sitma\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\trl\\trainer\\sft_trainer.py:245: UserWarning: You didn't pass a `max_seq_length` argument to the SFTTrainer, this will default to 1024\n"," warnings.warn(\n","C:\\Users\\sitma\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\trl\\trainer\\sft_trainer.py:317: UserWarning: You passed a tokenizer with `padding_side` not equal to `right` to the SFTTrainer. This might lead to some unexpected behaviour due to overflow issues when training a model in half-precision. You might consider adding `tokenizer.padding_side = 'right'` to your code.\n"," warnings.warn(\n","C:\\Users\\sitma\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\accelerate\\accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n","dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n"," warnings.warn(\n"]},{"data":{"text/plain":"","text/html":"\n
\n \n \n [ 2/10 : < :, Epoch 1/10]\n
\n \n \n \n \n \n \n \n \n \n
StepTraining Loss

"},"metadata":{},"output_type":"display_data"},{"data":{"text/plain":"TrainOutput(global_step=10, training_loss=0.5222328573465347, metrics={'train_runtime': 26.3684, 'train_samples_per_second': 1.517, 'train_steps_per_second': 0.379, 'total_flos': 21555767439360.0, 'train_loss': 0.5222328573465347, 'epoch': 6.67})"},"execution_count":48,"metadata":{},"output_type":"execute_result"}]},{"cell_type":"markdown","source":["# **Results**\n","Although there are repetition issues, the format indeed meets our expectations.\n","\n","# **結果**\n","雖然有重覆的問題,但格式的確符合我們的預期。"],"metadata":{"collapsed":false,"id":"oR_3FxTHfvC8"}},{"cell_type":"code","source":["text = \"Quote: Imagination is\"\n","device = \"cuda:0\"\n","inputs = tokenizer(text, return_tensors=\"pt\").to(device)\n","\n","outputs = model.generate(**inputs, max_new_tokens=20)\n","print(tokenizer.decode(outputs[0], skip_special_tokens=True))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"f5Mim0lNViwe","outputId":"4534ee26-63e3-4ced-ee27-673f0b9d7afb"},"execution_count":null,"outputs":[{"name":"stdout","output_type":"stream","text":["Quote: Imagination is more important than knowledge\n","Author: Albert Einstein\n","Author: Albert Einstein\n","Author: Albert Einstein\n","\n"]}]}]}