{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "3dd485fa",
   "metadata": {},
   "source": [
    "# Exploring Video Analytics with TwelveLabs and FiftyOne\n",
    "\n",
    "Welcome to this hands-on workshop where we will learn how to load and explore datasets using FiftyOne. \n",
    "This notebook will guide you through programmatic interaction via the **FiftyOne SDK** and visualization using the **FiftyOne App**.\n",
    "\n",
    "![video_analytics_twelvelabs](https://cdn.voxel51.com/getting_started_manufacturing/notebook11/video_analytics_twelvelabs.webp)\n",
    "\n",
    "## Learning Objectives:\n",
    "- Load video datasets into FiftyOne.\n",
    "- Use Plugins to connect external API to the FiftyOne's workflow .\n",
    "- Calculate video embeddings using TwelveLabs.\n",
    "- Visualize clip embeddings in FiftyOne, curate and filter your datasets.\n",
    "\n",
    "In this example, we use dataset loading from directory and a Kaggle dataset.\n",
    "\n",
    "[Dataset Paper](https://www.sciencedirect.com/science/article/pii/S235234092400756X#abs0001)\n",
    "[Dataset Link](https://data.mendeley.com/datasets/xjmtb22pff/1)\n",
    "\n",
    "Download, extract and add the dataset in the root folder of this repo. I have called it \"Video Dataset for Safe and Unsafe Behaviours\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30a3c7cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"TL_API_KEY\"] = \"your_twelve_labs_api_key\" \n",
    "os.environ[\"FIFTYONE_ALLOW_LEGACY_ORCHESTRATORS\"] = \"true\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31976b09",
   "metadata": {},
   "outputs": [],
   "source": [
    "import fiftyone as fo\n",
    "import twelvelabs\n",
    "import os\n",
    "\n",
    "# Check if API key is set for TwelveLabs\n",
    "api_key = os.getenv(\"TL_API_KEY\")\n",
    "if api_key:\n",
    "    print(\"TwelveLabs API key found.\")\n",
    "else:\n",
    "    print(\"TwelveLabs API key is missing.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc7424a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import fiftyone as fo\n",
    "import fiftyone.types as fot\n",
    "import pandas as pd\n",
    "import re\n",
    "\n",
    "# Define the path to your dataset and dataset name\n",
    "dataset_path = os.getenv(\"VIDEO_DATASET_PATH\", \"path/to/your/video/dataset\")  # Set environment variable or replace with your dataset path\n",
    "dataset_name = os.getenv(\"FIFTYONE_DATASET_NAME\", \"my_video_dataset\")         # Set environment variable or replace with your dataset name\n",
    "\n",
    "# Check if the dataset already exists\n",
    "if fo.dataset_exists(dataset_name):\n",
    "    # Delete the existing dataset\n",
    "    fo.delete_dataset(dataset_name)\n",
    "\n",
    "# Create a FiftyOne dataset\n",
    "dataset = fo.Dataset(dataset_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6303b9b",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "478c9805",
   "metadata": {},
   "outputs": [],
   "source": [
    "import fiftyone as fo\n",
    "import os\n",
    "from fiftyone.core.labels import Classification\n",
    "\n",
    "# Define the path to your dataset (set via environment variable or replace with your dataset path)\n",
    "dataset_path = os.getenv(\"VIDEO_DATASET_PATH\", \"path/to/your/video/dataset\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bd5e30e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add the label field as a Classification label\n",
    "dataset.add_sample_field(\n",
    "    \"label\",\n",
    "    fo.EmbeddedDocumentField,\n",
    "    embedded_doc_type=Classification,\n",
    ")\n",
    "\n",
    "# Iterate over the train and test directories\n",
    "for split in ['train', 'test']:\n",
    "    split_dir = os.path.join(dataset_path, split)\n",
    "\n",
    "    for label_folder in os.listdir(split_dir):\n",
    "        folder_path = os.path.join(split_dir, label_folder)\n",
    "\n",
    "        if os.path.isdir(folder_path):\n",
    "            for video_file in os.listdir(folder_path):\n",
    "                video_path = os.path.join(folder_path, video_file)\n",
    "\n",
    "                if video_path.endswith(('.mp4', '.avi', '.mov')):\n",
    "                    sample = fo.Sample(filepath=video_path)\n",
    "\n",
    "                    # Assign the label using the Classification type\n",
    "                    sample[\"label\"] = Classification(label=label_folder)  # Label as Classification type\n",
    "\n",
    "                    # Add the tag to the sample based on the split (train/test)\n",
    "                    sample.tags = [split]\n",
    "\n",
    "                    dataset.add_sample(sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a456615c",
   "metadata": {},
   "outputs": [],
   "source": [
    "session = fo.launch_app(dataset, auto=False, port= 5151)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "738c618b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "import fiftyone as fo\n",
    "\n",
    "# Create a function to select 5 videos per label (train and test)\n",
    "def select_5_videos_per_label(dataset, split):\n",
    "    # Dictionary to store videos per label\n",
    "    label_videos = defaultdict(list)\n",
    "    \n",
    "    # Filter dataset by split (train or test)\n",
    "    if split == \"train\":\n",
    "        split_view = dataset.match_tags(\"train\")\n",
    "    elif split == \"test\":\n",
    "        split_view = dataset.match_tags(\"test\")\n",
    "    else:\n",
    "        raise ValueError(\"Invalid split. Choose 'train' or 'test'.\")\n",
    "\n",
    "    # Iterate through the samples and group them by label\n",
    "    for sample in split_view:\n",
    "        # Access the label correctly since it's a TemporalDetection type\n",
    "        label = sample[\"label\"].label  # Get the label text from the TemporalDetection field\n",
    "        label_videos[label].append(sample)\n",
    "\n",
    "    # Create a list to hold the selected samples (5 per label)\n",
    "    selected_samples = []\n",
    "\n",
    "    # Select only 5 videos per label (ensure you have exactly 5 videos per label)\n",
    "    for label, videos in label_videos.items():\n",
    "        selected_samples.extend(videos[:5])  # Select the first 5 videos for each label\n",
    "\n",
    "    # Ensure we have only 40 videos total (5 videos per 8 labels)\n",
    "    selected_samples = selected_samples[:40]  # Limit to 40 videos in total\n",
    "\n",
    "    # Create a filtered view with the selected samples\n",
    "    selected_view = dataset.select([sample.id for sample in selected_samples])\n",
    "\n",
    "    return selected_view\n",
    "\n",
    "# Assuming you already have a dataset loaded\n",
    "# dataset = fo.load_dataset(\"video_dataset\")  # Replace with your actual dataset name\n",
    "\n",
    "# Select 5 videos per label for the train and test datasets\n",
    "train_selected_view = select_5_videos_per_label(dataset, \"train\")\n",
    "test_selected_view = select_5_videos_per_label(dataset, \"test\")\n",
    "\n",
    "# Optionally launch the FiftyOne app to visualize the selected data\n",
    "# session = fo.launch_app(train_selected_view, port=5153, auto=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5c28b978",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset.persistent=True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9562acd8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set export directory\n",
    "export_dir = \"./Safe_Unsafe_Train\"\n",
    "\n",
    "# Export in FiftyOne format\n",
    "train_selected_view.export(\n",
    "    export_dir=export_dir,\n",
    "    dataset_type=fo.types.FiftyOneDataset,\n",
    "    overwrite=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5c92ee9",
   "metadata": {},
   "outputs": [],
   "source": [
    "from fiftyone.utils.huggingface import push_to_hub\n",
    "\n",
    "push_to_hub(train_selected_view, export_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "54f2aeff",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set export directory (make this generic)\n",
    "export_dir = \"./exported_test_dataset\"\n",
    "\n",
    "# Export in FiftyOne format\n",
    "test_selected_view.export(\n",
    "    export_dir=export_dir,\n",
    "    dataset_type=fo.types.FiftyOneDataset,\n",
    "    overwrite=True,\n",
    ")\n",
    "\n",
    "# Check if the dataset already exists\n",
    "if fo.dataset_exists(\"FIFTYONE_DATASET_NAME\"):\n",
    "    fo.delete_dataset(\"FIFTYONE_DATASET_NAME\")\n",
    "\n",
    "dataset_test = test_selected_view.clone(\"FIFTYONE_DATASET_NAME\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "06c73d0d",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(dataset_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1751ef8c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from fiftyone.utils.huggingface import push_to_hub\n",
    "\n",
    "push_to_hub(train_selected_view, export_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2b40fb69",
   "metadata": {},
   "outputs": [],
   "source": [
    "!fiftyone plugins download https://github.com/danielgural/semantic_video_search"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bbd39ed8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Optionally launch the FiftyOne app to visualize the selected data\n",
    "session = fo.launch_app(train_selected_view, port=5152, auto=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23451b62",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Run this in your terminal\n",
    "# export TL_API_KEY=\"your_twelvelabs_api_key\"\n",
    "# export FIFTYONE_ALLOW_LEGACY_ORCHESTRATORS=\"true\"\n",
    "# !fiftyone delegated launch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2faf783d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import fiftyone.utils.video as fouv\n",
    "import fiftyone.brain as fob\n",
    "\n",
    "def create_clip_dataset(\n",
    "    dataset: fo.Dataset,\n",
    "    clip_field: str,\n",
    "    new_dataset_name: str = \"clips\",\n",
    "    overwrite: bool = True,\n",
    "    viz: bool = False,\n",
    "    sim: bool = False,\n",
    ") -> fo.Dataset:\n",
    "    clips = []\n",
    "    clip_view = dataset.to_clips(clip_field)\n",
    "    clip_dataset = fo.Dataset(name=new_dataset_name,overwrite=overwrite)\n",
    "    i = 0\n",
    "    last_file = \"\"\n",
    "    samples = []\n",
    "    for clip in clip_view:\n",
    "\n",
    "        out_path = clip.filepath.split(\".\")[0] + f\"_{i}.mp4\"\n",
    "        fpath = clip.filepath \n",
    "        fouv.extract_clip(fpath, output_path=out_path, support=clip.support)\n",
    "        clip.filepath = out_path\n",
    "        samples.append(clip)\n",
    "        clip.filepath = fpath\n",
    "        if clip.filepath == last_file:\n",
    "            i += 1\n",
    "        else:\n",
    "            i = 0\n",
    "        last_file = clip.filepath\n",
    "    clip_dataset.add_samples(samples)\n",
    "    clip_dataset.add_sample_field(\"Twelve Labs Marengo-retrieval-27 Embeddings\", fo.VectorField)\n",
    "    clip_dataset.set_field(\"Twelve Labs Marengo-retrieval-27 Embeddings\", clip_view.values(\"Twelve Labs Marengo-retrieval-27.embedding\"))\n",
    "    \n",
    "    return clip_dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f936a7f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "clip_dataset = create_clip_dataset(dataset_test, \"Twelve Labs Marengo-retrieval-27\", overwrite=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "712cf581",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Need this to grab embeddings \n",
    "clip_view = dataset_test.to_clips(\"Twelve Labs Marengo-retrieval-27\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "manu_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}