{ "cells": [ { "cell_type": "markdown", "id": "0", "metadata": {}, "source": [ "# Train Yolo-NAS model with 3LC Metrics Collection\n", "\n", "This notebook shows how to train a Yolo-NAS detection model with 3LC Metrics Collection on an Object Detection 3LC `Table`.\n", "\n", "\n", "\n", "We recommend to use Python 3.10, and ensure `super-gradients` is installed. Note that installing `super-gradients` will downgrade the versions of several dependencies of `3lc`. We therefore install `termcolor==3.1.0` afterwards. " ] }, { "cell_type": "markdown", "id": "1", "metadata": {}, "source": [ "## Install dependencies" ] }, { "cell_type": "code", "execution_count": null, "id": "2", "metadata": {}, "outputs": [], "source": [ "%pip install 3lc\n", "%pip install super-gradients\n", "%pip install termcolor==3.1.0" ] }, { "cell_type": "markdown", "id": "3", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": null, "id": "4", "metadata": {}, "outputs": [], "source": [ "import tlc\n", "from super_gradients.common.object_names import Models\n", "from super_gradients.training import Trainer, models\n", "from super_gradients.training.losses import PPYoloELoss\n", "from super_gradients.training.metrics import DetectionMetrics_050\n", "from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback\n", "from super_gradients.training.transforms.transforms import (\n", " DetectionHorizontalFlip,\n", " DetectionHSV,\n", " DetectionPaddedRescale,\n", " DetectionRandomAffine,\n", " DetectionStandardize,\n", " DetectionTargetsFormatTransform,\n", ")\n", "from super_gradients.training.utils.collate_fn.detection_collate_fn import DetectionCollateFN\n", "from tlc.integration.super_gradients import (\n", " DetectionDataset,\n", " DetectionMetricsCollectionCallback,\n", " PipelineParams,\n", ")\n", "from torch.utils.data import DataLoader" ] }, { "cell_type": "markdown", "id": "5", "metadata": {}, "source": [ "## Project Setup" ] }, { "cell_type": "code", "execution_count": null, "id": "6", "metadata": {}, "outputs": [], "source": [ "PROJECT_NAME = \"3LC Tutorials\"\n", "DATASET_NAME = \"COCO128\"" ] }, { "cell_type": "markdown", "id": "7", "metadata": {}, "source": [ "We reuse the COCO128 tables created in [create-table-from-coco-detection.ipynb](../1-create-tables/object%20detection/create-table-from-coco-detection.ipynb), so make sure to run this notebook first." ] }, { "cell_type": "code", "execution_count": null, "id": "8", "metadata": {}, "outputs": [], "source": [ "train_table = tlc.Table.from_names(\"initial\", DATASET_NAME, PROJECT_NAME)\n", "val_table = tlc.Table.from_names(\"initial\", DATASET_NAME, PROJECT_NAME)" ] }, { "cell_type": "code", "execution_count": null, "id": "9", "metadata": {}, "outputs": [], "source": [ "simple_value_map = train_table.get_simple_value_map(\"bbs.bb_list.label\")\n", "class_names = list(simple_value_map.values())\n", "num_classes = len(class_names)\n", "class_names" ] }, { "cell_type": "markdown", "id": "10", "metadata": {}, "source": [ "Pass the training and validation tables to a `DetectionDataset`, which loads images and annotations from the `Table` and retains image and label preprocessing functionality available in the parent SuperGradients `DetectionDataset`." ] }, { "cell_type": "code", "execution_count": null, "id": "11", "metadata": {}, "outputs": [], "source": [ "train_dataset = DetectionDataset(\n", " train_table,\n", " input_dim=(640, 640),\n", " transforms=[\n", " DetectionRandomAffine(\n", " degrees=0.0,\n", " scales=(0.5, 1.5),\n", " shear=0.0,\n", " target_size=(640, 640),\n", " filter_box_candidates=False,\n", " border_value=128,\n", " ),\n", " DetectionHSV(prob=1.0, hgain=5, vgain=30, sgain=30),\n", " DetectionHorizontalFlip(prob=0.5),\n", " DetectionPaddedRescale(input_dim=(640, 640)),\n", " DetectionStandardize(max_value=255),\n", " DetectionTargetsFormatTransform(input_dim=(640, 640), output_format=\"LABEL_CXCYWH\"),\n", " ],\n", ")\n", "val_dataset = DetectionDataset(\n", " val_table,\n", " input_dim=(640, 640),\n", " transforms=[\n", " DetectionPaddedRescale(input_dim=(640, 640), max_targets=300),\n", " DetectionStandardize(max_value=255),\n", " DetectionTargetsFormatTransform(input_dim=(640, 640), output_format=\"LABEL_CXCYWH\"),\n", " ],\n", ")\n", "\n", "train_dataloader: DataLoader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=DetectionCollateFN())\n", "val_dataloader: DataLoader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=DetectionCollateFN())\n", "\n", "model = models.get(Models.YOLO_NAS_S, num_classes=num_classes, pretrained_weights=\"coco\")\n", "\n", "trainer = Trainer(\n", " experiment_name=\"fine-tune-yolo-nas-detect-3lc\",\n", " ckpt_root_dir=\"checkpoints\",\n", ")" ] }, { "cell_type": "markdown", "id": "12", "metadata": {}, "source": [ "Use `PipelineParams` to customize the SuperGradients `Pipeline` used for inference when collecting metrics, and a `DetectionMetricsCollectionCallback` to invoke 3LC Metrics Collection, Hyperparameter and Run logging. Make sure to pass the callback to the `phase_callbacks` in the training parameters passed to the SuperGradients `Trainer`." ] }, { "cell_type": "code", "execution_count": null, "id": "13", "metadata": {}, "outputs": [], "source": [ "pipeline_params = PipelineParams(\n", " iou=0.5, # IoU threshold for NMS matching\n", " conf=0.3, # Confidence threshold for predictions to keep\n", " max_predictions=10, # Max number of predictions for each image\n", " class_agnostic_nms=True,\n", ")\n", "\n", "callback = DetectionMetricsCollectionCallback(\n", " project_name=PROJECT_NAME,\n", " run_description=\"Fine-tune Yolo-NAS detection model\",\n", " collect_val_only=True,\n", " pipeline_params=pipeline_params,\n", ")\n", "\n", "train_params = {\n", " \"warmup_initial_lr\": 5e-5,\n", " \"initial_lr\": 5e-5,\n", " \"lr_mode\": \"cosine\",\n", " \"cosine_final_lr_ratio\": 0.5,\n", " \"optimizer\": \"AdamW\",\n", " \"zero_weight_decay_on_bias_and_bn\": True,\n", " \"lr_warmup_epochs\": 1,\n", " \"warmup_mode\": \"LinearEpochLRWarmup\",\n", " \"optimizer_params\": {\"weight_decay\": 0.0001},\n", " \"ema\": False,\n", " \"average_best_models\": False,\n", " \"ema_params\": {\"beta\": 25, \"decay_type\": \"exp\"},\n", " \"max_epochs\": 5,\n", " \"mixed_precision\": True,\n", " \"loss\": PPYoloELoss(use_static_assigner=False, num_classes=num_classes, reg_max=16),\n", " \"valid_metrics_list\": [\n", " DetectionMetrics_050(\n", " score_thres=0.1,\n", " top_k_predictions=300,\n", " num_cls=num_classes,\n", " normalize_targets=True,\n", " include_classwise_ap=True,\n", " class_names=class_names,\n", " post_prediction_callback=PPYoloEPostPredictionCallback(\n", " score_threshold=0.01, nms_top_k=1000, max_predictions=300, nms_threshold=0.7\n", " ),\n", " )\n", " ],\n", " \"metric_to_watch\": \"mAP@0.50\",\n", " \"phase_callbacks\": [callback],\n", "}\n", "\n", "run = tlc.init(project_name=PROJECT_NAME)\n", "\n", "trainer.train(\n", " model=model,\n", " training_params=train_params,\n", " train_loader=train_dataloader,\n", " valid_loader=val_dataloader,\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.22" }, "test_marks": [ "dependent" ] }, "nbformat": 4, "nbformat_minor": 5 }