{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Collect SAM image embeddings\n",
    "\n",
    "In this example, we will show how to create a Run containing embeddings extracted from SAM for a set of images.\n",
    "\n",
    "![](../images/sam-embeddings.png)\n",
    "\n",
    "<!-- Tags: [\"SAM\", \"embeddings\", \"coco\"] -->"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup project"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "PROJECT_NAME = \"3LC Tutorials - COCO128\"\n",
    "MODEL_TYPE = \"vit_b\"\n",
    "MODEL_URL = \"https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth\"\n",
    "TMP_PATH = \"../../transient_data\"\n",
    "EMBEDDING_DIM = 3\n",
    "REDUCTION_METHOD = \"umap\"\n",
    "BATCH_SIZE = 4"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Install dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install -q 3lc[umap]\n",
    "%pip install -q git+https://github.com/facebookresearch/segment-anything\n",
    "%pip install -q git+https://github.com/3lc-ai/3lc-examples"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "import cv2\n",
    "import tlc\n",
    "import torch\n",
    "from segment_anything import sam_model_registry\n",
    "from segment_anything.utils.transforms import ResizeLongestSide\n",
    "\n",
    "from tlc_tools.common import infer_torch_device"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Download model weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "CHECKPOINT = TMP_PATH + \"/sam_vit_b_01ec64.pth\"\n",
    "\n",
    "if not Path(CHECKPOINT).exists():\n",
    "    torch.hub.download_url_to_file(MODEL_URL, CHECKPOINT)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Set up model and preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "device = infer_torch_device()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_model():\n",
    "    sam_model = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT)\n",
    "    sam_model.to(device)\n",
    "    sam_model.eval()\n",
    "    return sam_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sam_model = create_model()\n",
    "RESIZE_TRANSFORM = ResizeLongestSide(sam_model.image_encoder.img_size)\n",
    "PREPROCESS_TRANSFORM = sam_model.preprocess\n",
    "\n",
    "\n",
    "def transform_to_sam_format(sample):\n",
    "    image = cv2.cvtColor(cv2.imread(sample[\"image\"]), cv2.COLOR_BGR2RGB)\n",
    "    image = RESIZE_TRANSFORM.apply_image(image)\n",
    "    image = torch.as_tensor(image, device=device).permute(2, 0, 1).contiguous()\n",
    "    image = PREPROCESS_TRANSFORM(image)\n",
    "\n",
    "    return {\"image\": image}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create 3LC Table and Run"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Reuse the COCO128 table from ../1-create-tables/create-table-from-coco and apply the transformation defined above\n",
    "table = tlc.Table.from_names(\"initial\", \"COCO128\", PROJECT_NAME).map(transform_to_sam_format)\n",
    "\n",
    "# Initialize a 3LC Run\n",
    "run = tlc.init(\n",
    "    project_name=PROJECT_NAME,\n",
    "    run_name=\"Collect SAM embeddings\",\n",
    "    description=\"Collect embeddings for the COCO128 dataset using the SAM model\",\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Collect embeddings using SAM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings_metrics_collector = tlc.EmbeddingsMetricsCollector(layers=[0])\n",
    "\n",
    "predictor = tlc.Predictor(\n",
    "    sam_model.image_encoder,\n",
    "    layers=[0],\n",
    "    unpack_dicts=True,\n",
    "    device=device,\n",
    ")\n",
    "\n",
    "tlc.collect_metrics(\n",
    "    table,\n",
    "    embeddings_metrics_collector,\n",
    "    predictor,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Reduce dimensionality of embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "run.reduce_embeddings_by_foreign_table_url(\n",
    "    table.url,\n",
    "    method=REDUCTION_METHOD,\n",
    "    n_components=EMBEDDING_DIM,\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  },
  "test_marks": [
   "slow"
  ]
 },
 "nbformat": 4,
 "nbformat_minor": 2
}