{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Collect SAM image embeddings\n", "\n", "In this example, we will show how to create a Run containing embeddings extracted from SAM for a set of images.\n", "\n", "![](../images/sam-embeddings.png)\n", "\n", "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup project" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "parameters" ] }, "outputs": [], "source": [ "PROJECT_NAME = \"3LC Tutorials - COCO128\"\n", "MODEL_TYPE = \"vit_b\"\n", "MODEL_URL = \"https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth\"\n", "DOWNLOAD_PATH = \"../../transient_data\"\n", "EMBEDDING_DIM = 3\n", "REDUCTION_METHOD = \"umap\"\n", "BATCH_SIZE = 4" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Install dependencies" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%pip install 3lc[umap]\n", "%pip install git+https://github.com/facebookresearch/segment-anything\n", "%pip install git+https://github.com/3lc-ai/3lc-examples" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "import cv2\n", "import tlc\n", "import torch\n", "from segment_anything import sam_model_registry\n", "from segment_anything.utils.transforms import ResizeLongestSide\n", "\n", "from tlc_tools.common import infer_torch_device" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download model weights" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "CHECKPOINT = DOWNLOAD_PATH + \"/sam_vit_b_01ec64.pth\"\n", "\n", "if not Path(CHECKPOINT).exists():\n", " torch.hub.download_url_to_file(MODEL_URL, CHECKPOINT)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set up model and preprocessing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "device = infer_torch_device()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def create_model():\n", " sam_model = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT)\n", " sam_model.to(device)\n", " sam_model.eval()\n", " return sam_model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sam_model = create_model()\n", "RESIZE_TRANSFORM = ResizeLongestSide(sam_model.image_encoder.img_size)\n", "PREPROCESS_TRANSFORM = sam_model.preprocess\n", "\n", "\n", "def transform_to_sam_format(sample):\n", " image = cv2.cvtColor(cv2.imread(sample[\"image\"]), cv2.COLOR_BGR2RGB)\n", " image = RESIZE_TRANSFORM.apply_image(image)\n", " image = torch.as_tensor(image, device=device).permute(2, 0, 1).contiguous()\n", " image = PREPROCESS_TRANSFORM(image)\n", "\n", " return {\"image\": image}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create 3LC Table and Run" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Reuse the COCO128 table from ../1-create-tables/create-table-from-coco and apply the transformation defined above\n", "table = tlc.Table.from_names(\"initial\", \"COCO128\", PROJECT_NAME).map(transform_to_sam_format)\n", "\n", "# Initialize a 3LC Run\n", "run = tlc.init(\n", " project_name=PROJECT_NAME,\n", " run_name=\"Collect SAM embeddings\",\n", " description=\"Collect embeddings for the COCO128 dataset using the SAM model\",\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Collect embeddings using SAM" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "embeddings_metrics_collector = tlc.EmbeddingsMetricsCollector(layers=[0])\n", "\n", "predictor = tlc.Predictor(\n", " sam_model.image_encoder,\n", " layers=[0],\n", " unpack_dicts=True,\n", " device=device,\n", ")\n", "\n", "tlc.collect_metrics(\n", " table,\n", " embeddings_metrics_collector,\n", " predictor,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reduce dimensionality of embeddings" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "run.reduce_embeddings_by_foreign_table_url(\n", " table.url,\n", " method=REDUCTION_METHOD,\n", " n_components=EMBEDDING_DIM,\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" }, "test_marks": [ "slow" ] }, "nbformat": 4, "nbformat_minor": 2 }