{ "cells": [ { "cell_type": "markdown", "id": "3dd485fa", "metadata": {}, "source": [ "# Exploring Video Analytics with TwelveLabs and FiftyOne\n", "\n", "Welcome to this hands-on workshop where we will learn how to load and explore datasets using FiftyOne. \n", "This notebook will guide you through programmatic interaction via the **FiftyOne SDK** and visualization using the **FiftyOne App**.\n", "\n", "![video_analytics_twelvelabs](https://cdn.voxel51.com/getting_started_manufacturing/notebook11/video_analytics_twelvelabs.webp)\n", "\n", "## Learning Objectives:\n", "- Load video datasets into FiftyOne.\n", "- Use Plugins to connect external API to the FiftyOne's workflow .\n", "- Calculate video embeddings using TwelveLabs.\n", "- Visualize clip embeddings in FiftyOne, curate and filter your datasets.\n", "\n", "In this example, we use dataset loading from directory and a Kaggle dataset.\n", "\n", "[Dataset Paper](https://www.sciencedirect.com/science/article/pii/S235234092400756X#abs0001)\n", "[Dataset Link](https://data.mendeley.com/datasets/xjmtb22pff/1)\n", "\n", "Download, extract and add the dataset in the root folder of this repo. I have called it \"Video Dataset for Safe and Unsafe Behaviours\"" ] }, { "cell_type": "code", "execution_count": null, "id": "30a3c7cd", "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "os.environ[\"TL_API_KEY\"] = \"your_twelve_labs_api_key\" \n", "os.environ[\"FIFTYONE_ALLOW_LEGACY_ORCHESTRATORS\"] = \"true\"" ] }, { "cell_type": "code", "execution_count": null, "id": "31976b09", "metadata": {}, "outputs": [], "source": [ "import fiftyone as fo\n", "import twelvelabs\n", "import os\n", "\n", "# Check if API key is set for TwelveLabs\n", "api_key = os.getenv(\"TL_API_KEY\")\n", "if api_key:\n", " print(\"TwelveLabs API key found.\")\n", "else:\n", " print(\"TwelveLabs API key is missing.\")" ] }, { "cell_type": "code", "execution_count": null, "id": "dc7424a4", "metadata": {}, "outputs": [], "source": [ "import os\n", "import fiftyone as fo\n", "import fiftyone.types as fot\n", "import pandas as pd\n", "import re\n", "\n", "# Define the path to your dataset and dataset name\n", "dataset_path = os.getenv(\"VIDEO_DATASET_PATH\", \"path/to/your/video/dataset\") # Set environment variable or replace with your dataset path\n", "dataset_name = os.getenv(\"FIFTYONE_DATASET_NAME\", \"my_video_dataset\") # Set environment variable or replace with your dataset name\n", "\n", "# Check if the dataset already exists\n", "if fo.dataset_exists(dataset_name):\n", " # Delete the existing dataset\n", " fo.delete_dataset(dataset_name)\n", "\n", "# Create a FiftyOne dataset\n", "dataset = fo.Dataset(dataset_name)" ] }, { "cell_type": "code", "execution_count": null, "id": "e6303b9b", "metadata": {}, "outputs": [], "source": [ "print(dataset)" ] }, { "cell_type": "code", "execution_count": null, "id": "478c9805", "metadata": {}, "outputs": [], "source": [ "import fiftyone as fo\n", "import os\n", "from fiftyone.core.labels import Classification\n", "\n", "# Define the path to your dataset (set via environment variable or replace with your dataset path)\n", "dataset_path = os.getenv(\"VIDEO_DATASET_PATH\", \"path/to/your/video/dataset\")" ] }, { "cell_type": "code", "execution_count": null, "id": "bd5e30e7", "metadata": {}, "outputs": [], "source": [ "# Add the label field as a Classification label\n", "dataset.add_sample_field(\n", " \"label\",\n", " fo.EmbeddedDocumentField,\n", " embedded_doc_type=Classification,\n", ")\n", "\n", "# Iterate over the train and test directories\n", "for split in ['train', 'test']:\n", " split_dir = os.path.join(dataset_path, split)\n", "\n", " for label_folder in os.listdir(split_dir):\n", " folder_path = os.path.join(split_dir, label_folder)\n", "\n", " if os.path.isdir(folder_path):\n", " for video_file in os.listdir(folder_path):\n", " video_path = os.path.join(folder_path, video_file)\n", "\n", " if video_path.endswith(('.mp4', '.avi', '.mov')):\n", " sample = fo.Sample(filepath=video_path)\n", "\n", " # Assign the label using the Classification type\n", " sample[\"label\"] = Classification(label=label_folder) # Label as Classification type\n", "\n", " # Add the tag to the sample based on the split (train/test)\n", " sample.tags = [split]\n", "\n", " dataset.add_sample(sample)" ] }, { "cell_type": "code", "execution_count": null, "id": "a456615c", "metadata": {}, "outputs": [], "source": [ "session = fo.launch_app(dataset, auto=False, port= 5151)" ] }, { "cell_type": "code", "execution_count": null, "id": "738c618b", "metadata": {}, "outputs": [], "source": [ "from collections import defaultdict\n", "import fiftyone as fo\n", "\n", "# Create a function to select 5 videos per label (train and test)\n", "def select_5_videos_per_label(dataset, split):\n", " # Dictionary to store videos per label\n", " label_videos = defaultdict(list)\n", " \n", " # Filter dataset by split (train or test)\n", " if split == \"train\":\n", " split_view = dataset.match_tags(\"train\")\n", " elif split == \"test\":\n", " split_view = dataset.match_tags(\"test\")\n", " else:\n", " raise ValueError(\"Invalid split. Choose 'train' or 'test'.\")\n", "\n", " # Iterate through the samples and group them by label\n", " for sample in split_view:\n", " # Access the label correctly since it's a TemporalDetection type\n", " label = sample[\"label\"].label # Get the label text from the TemporalDetection field\n", " label_videos[label].append(sample)\n", "\n", " # Create a list to hold the selected samples (5 per label)\n", " selected_samples = []\n", "\n", " # Select only 5 videos per label (ensure you have exactly 5 videos per label)\n", " for label, videos in label_videos.items():\n", " selected_samples.extend(videos[:5]) # Select the first 5 videos for each label\n", "\n", " # Ensure we have only 40 videos total (5 videos per 8 labels)\n", " selected_samples = selected_samples[:40] # Limit to 40 videos in total\n", "\n", " # Create a filtered view with the selected samples\n", " selected_view = dataset.select([sample.id for sample in selected_samples])\n", "\n", " return selected_view\n", "\n", "# Assuming you already have a dataset loaded\n", "# dataset = fo.load_dataset(\"video_dataset\") # Replace with your actual dataset name\n", "\n", "# Select 5 videos per label for the train and test datasets\n", "train_selected_view = select_5_videos_per_label(dataset, \"train\")\n", "test_selected_view = select_5_videos_per_label(dataset, \"test\")\n", "\n", "# Optionally launch the FiftyOne app to visualize the selected data\n", "# session = fo.launch_app(train_selected_view, port=5153, auto=False)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "5c28b978", "metadata": {}, "outputs": [], "source": [ "dataset.persistent=True" ] }, { "cell_type": "code", "execution_count": null, "id": "9562acd8", "metadata": {}, "outputs": [], "source": [ "# Set export directory\n", "export_dir = \"./Safe_Unsafe_Train\"\n", "\n", "# Export in FiftyOne format\n", "train_selected_view.export(\n", " export_dir=export_dir,\n", " dataset_type=fo.types.FiftyOneDataset,\n", " overwrite=True,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "c5c92ee9", "metadata": {}, "outputs": [], "source": [ "from fiftyone.utils.huggingface import push_to_hub\n", "\n", "push_to_hub(train_selected_view, export_dir)" ] }, { "cell_type": "code", "execution_count": null, "id": "54f2aeff", "metadata": {}, "outputs": [], "source": [ "# Set export directory (make this generic)\n", "export_dir = \"./exported_test_dataset\"\n", "\n", "# Export in FiftyOne format\n", "test_selected_view.export(\n", " export_dir=export_dir,\n", " dataset_type=fo.types.FiftyOneDataset,\n", " overwrite=True,\n", ")\n", "\n", "# Check if the dataset already exists\n", "if fo.dataset_exists(\"FIFTYONE_DATASET_NAME\"):\n", " fo.delete_dataset(\"FIFTYONE_DATASET_NAME\")\n", "\n", "dataset_test = test_selected_view.clone(\"FIFTYONE_DATASET_NAME\")" ] }, { "cell_type": "code", "execution_count": null, "id": "06c73d0d", "metadata": {}, "outputs": [], "source": [ "print(dataset_test)" ] }, { "cell_type": "code", "execution_count": null, "id": "1751ef8c", "metadata": {}, "outputs": [], "source": [ "from fiftyone.utils.huggingface import push_to_hub\n", "\n", "push_to_hub(train_selected_view, export_dir)" ] }, { "cell_type": "code", "execution_count": null, "id": "2b40fb69", "metadata": {}, "outputs": [], "source": [ "!fiftyone plugins download https://github.com/danielgural/semantic_video_search" ] }, { "cell_type": "code", "execution_count": null, "id": "bbd39ed8", "metadata": {}, "outputs": [], "source": [ "# Optionally launch the FiftyOne app to visualize the selected data\n", "session = fo.launch_app(train_selected_view, port=5152, auto=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "23451b62", "metadata": {}, "outputs": [], "source": [ "# Run this in your terminal\n", "# export TL_API_KEY=\"your_twelvelabs_api_key\"\n", "# export FIFTYONE_ALLOW_LEGACY_ORCHESTRATORS=\"true\"\n", "# !fiftyone delegated launch" ] }, { "cell_type": "code", "execution_count": null, "id": "2faf783d", "metadata": {}, "outputs": [], "source": [ "import fiftyone.utils.video as fouv\n", "import fiftyone.brain as fob\n", "\n", "def create_clip_dataset(\n", " dataset: fo.Dataset,\n", " clip_field: str,\n", " new_dataset_name: str = \"clips\",\n", " overwrite: bool = True,\n", " viz: bool = False,\n", " sim: bool = False,\n", ") -> fo.Dataset:\n", " clips = []\n", " clip_view = dataset.to_clips(clip_field)\n", " clip_dataset = fo.Dataset(name=new_dataset_name,overwrite=overwrite)\n", " i = 0\n", " last_file = \"\"\n", " samples = []\n", " for clip in clip_view:\n", "\n", " out_path = clip.filepath.split(\".\")[0] + f\"_{i}.mp4\"\n", " fpath = clip.filepath \n", " fouv.extract_clip(fpath, output_path=out_path, support=clip.support)\n", " clip.filepath = out_path\n", " samples.append(clip)\n", " clip.filepath = fpath\n", " if clip.filepath == last_file:\n", " i += 1\n", " else:\n", " i = 0\n", " last_file = clip.filepath\n", " clip_dataset.add_samples(samples)\n", " clip_dataset.add_sample_field(\"Twelve Labs Marengo-retrieval-27 Embeddings\", fo.VectorField)\n", " clip_dataset.set_field(\"Twelve Labs Marengo-retrieval-27 Embeddings\", clip_view.values(\"Twelve Labs Marengo-retrieval-27.embedding\"))\n", " \n", " return clip_dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "f936a7f3", "metadata": {}, "outputs": [], "source": [ "clip_dataset = create_clip_dataset(dataset_test, \"Twelve Labs Marengo-retrieval-27\", overwrite=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "712cf581", "metadata": {}, "outputs": [], "source": [ "# Need this to grab embeddings \n", "clip_view = dataset_test.to_clips(\"Twelve Labs Marengo-retrieval-27\")" ] } ], "metadata": { "kernelspec": { "display_name": "manu_env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }