{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Register Augmented Samples From a Training Loop\n", "\n", "In this notebook, we will demonstrate how to register augmented samples from a training loop as 3LC metrics.\n", "\n", "![](../images/augmentations.jpg)\n", "\n", "\n", "\n", "- Define some image augmentations\n", "- Register a torch dataset as a `tlc.Table`\n", "- Create a `tlc.Run` to store the augmented samples\n", "- Iterate a number of times through the Table using a dataloader, writing batches of augmented images as 3LC metrics" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Project setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "parameters" ] }, "outputs": [], "source": [ "PROJECT_NAME = \"3LC Tutorials - Augmentation Explorer\"\n", "DATASET_NAME = \"COCO128\"\n", "TABLE_NAME = \"images-only\"\n", "RUN_NAME = \"register-augmented-samples\"\n", "RUN_DESCRIPTION = \"Inspecting augmentations on COCO-128\"\n", "DATA_PATH = \"../../data\"\n", "BATCH_SIZE = 32\n", "EPOCHS = 10\n", "INSTALL_DEPENDENCIES = True" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Install dependencies" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if INSTALL_DEPENDENCIES:\n", " %pip install 3lc" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import tlc\n", "import torch\n", "import torchvision.transforms.v2 as T" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define Augmentations and Dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "augmentations = T.Compose(\n", " [\n", " T.Lambda(lambda x: x.convert(\"RGB\")),\n", " T.RandomAffine(degrees=20, translate=(0.1, 0.1)),\n", " T.RandomHorizontalFlip(),\n", " T.RandomVerticalFlip(),\n", " T.RandomAdjustSharpness(0.5),\n", " T.RandomAutocontrast(0.5),\n", " T.RandomEqualize(0.5),\n", " T.RandomInvert(0.5),\n", " T.RandomPosterize(4),\n", " T.RandomSolarize(0.5),\n", " T.ToImage(),\n", " T.ToDtype(torch.float32, scale=True),\n", " T.Resize((128, 128), antialias=True),\n", " ]\n", ")\n", "\n", "\n", "image_folder_path = Path(DATA_PATH).absolute() / \"coco128\"\n", "assert image_folder_path.exists(), f\"Path {image_folder_path} does not exist\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Register the Dataset as a Table" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "table = tlc.Table.from_image_folder(\n", " image_folder_path,\n", " table_name=TABLE_NAME,\n", " dataset_name=DATASET_NAME,\n", " project_name=PROJECT_NAME,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "table.map(lambda x: augmentations(x[0]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create a Run and Register Augmented Samples" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from torch.utils.data import DataLoader\n", "\n", "AUGMENTED_IMAGE_COLUMN_NAME = \"augmented_img\"\n", "\n", "run = tlc.init(\n", " PROJECT_NAME,\n", " RUN_NAME,\n", " description=RUN_DESCRIPTION,\n", " parameters={\"augmentations\": str(augmentations)},\n", " if_exists=\"overwrite\",\n", ")\n", "\n", "# Create a metrics table writer to store the augmented images\n", "metrics_writer = tlc.MetricsTableWriter(\n", " run.url,\n", " table.url,\n", " column_schemas={AUGMENTED_IMAGE_COLUMN_NAME: tlc.PILImage},\n", ")\n", "\n", "# Create a data loader. It is important to set shuffle=False to ensure we can match the\n", "# augmented images with the input table rows\n", "dl = DataLoader(table, batch_size=BATCH_SIZE, shuffle=False)\n", "\n", "for epoch in range(EPOCHS):\n", " for batch_idx, batch in enumerate(dl):\n", " # Provide sample indices to identify written metrics-images with rows of the input table\n", " sample_indices = [batch_idx * BATCH_SIZE + i for i in range(BATCH_SIZE)]\n", "\n", " # Convert the batch to PIL images\n", " images_batch = [T.ToPILImage()(img) for img in batch]\n", "\n", " # Write the batch to the metrics table\n", " metrics_writer.add_batch(\n", " {\n", " AUGMENTED_IMAGE_COLUMN_NAME: images_batch,\n", " tlc.EXAMPLE_ID: sample_indices,\n", " tlc.EPOCH: [epoch] * BATCH_SIZE, # Add a constant epoch column\n", " }\n", " )\n", "\n", "# Finalize writes the metrics table to disk\n", "metrics_table = metrics_writer.finalize()\n", "\n", "# Ensure the metrics table is associated with the run\n", "run.add_metrics_table(metrics_table)\n", "\n", "# Mark the run as completed\n", "run.set_status_completed()" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" } }, "nbformat": 4, "nbformat_minor": 2 }