{ "cells": [ { "cell_type": "markdown", "id": "0", "metadata": {}, "source": [ "# Ingest FHIBE Dataset\n", "\n", "This tutorial is still a work in progress. Check back shortly for a more complete, well documented example!" ] }, { "cell_type": "markdown", "id": "1", "metadata": {}, "source": [ "## Install dependencies" ] }, { "cell_type": "code", "execution_count": null, "id": "2", "metadata": {}, "outputs": [], "source": [ "%pip install -q 3lc" ] }, { "cell_type": "markdown", "id": "3", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": null, "id": "4", "metadata": {}, "outputs": [], "source": [ "import itertools\n", "import json\n", "from collections import defaultdict\n", "from pathlib import Path\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import tlc\n", "from tlc.core.helpers._value_map_builder import _ValueMapBuilder" ] }, { "cell_type": "markdown", "id": "5", "metadata": {}, "source": [ "## Project setup" ] }, { "cell_type": "code", "execution_count": null, "id": "6", "metadata": {}, "outputs": [], "source": [ "PROJECT_NAME = \"3LC Tutorials - FHIBE\"\n", "DATASET_NAME = \"FHIBE\"\n", "TABLE_NAME = \"full-improved\"\n", "MAX_SAMPLES = 1_000_000" ] }, { "cell_type": "code", "execution_count": null, "id": "7", "metadata": {}, "outputs": [], "source": [ "FHIBE_ROOT = Path(\n", " \"D:/Data/fhibe.20250716.u.gT5_rFTA_downsampled_public_raw_only/fhibe.20250716.u.gT5_rFTA_downsampled_public_raw_only\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "8", "metadata": {}, "outputs": [], "source": [ "DATA_ROOT = FHIBE_ROOT / \"data/raw/fhibe_downsampled\"\n", "METADATA_ROOT = FHIBE_ROOT / \"data/processed/\"\n", "CSV_FILE = METADATA_ROOT / \"fhibe_downsampled\" / \"fhibe_downsampled.csv\"" ] }, { "cell_type": "code", "execution_count": null, "id": "9", "metadata": {}, "outputs": [], "source": [ "csv_file = pd.read_csv(CSV_FILE)\n", "csv_file = csv_file.loc[:, ~csv_file.columns.str.contains(\"annotator_id\")]\n", "csv_file.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "10", "metadata": {}, "outputs": [], "source": [ "def to_natural(x):\n", " # NaN stays NaN\n", " if pd.isna(x):\n", " return np.nan\n", "\n", " # Already a python type you want to keep\n", " if isinstance(x, (list, dict, int, float, bool)):\n", " return x\n", "\n", " # Strings: maybe JSON, maybe not\n", " if isinstance(x, str):\n", " s = x.strip().replace(\"'\", '\"')\n", " try:\n", " return json.loads(s)\n", " except Exception:\n", " return s" ] }, { "cell_type": "code", "execution_count": null, "id": "11", "metadata": {}, "outputs": [], "source": [ "IGNORE_KEYS = set([\"filepath\", \"image_height\", \"image_width\", \"keypoints\", \"face_bbox\", \"person_bbox\", \"segments\"])\n", "\n", "for col_name in set(csv_file.columns) - IGNORE_KEYS:\n", " csv_file[col_name] = csv_file[col_name].apply(to_natural)" ] }, { "cell_type": "code", "execution_count": null, "id": "12", "metadata": {}, "outputs": [], "source": [ "# Handle nan-issues\n", "for col_name in set(csv_file.columns) - IGNORE_KEYS:\n", " if \"annotator_id\" in col_name:\n", " continue\n", " col = csv_file[col_name]\n", "\n", " if col.isna().sum() == 0:\n", " continue\n", "\n", " non_nan = col.dropna()\n", " types = non_nan.map(type).value_counts()\n", " dominant = types.index[0] if len(types) else None\n", " print(f\"{col_name} has {col.isna().sum()} nan-values. Dominant type: {dominant}\")\n", " empty_for = {\n", " str: \"\",\n", " float: np.nan,\n", " }\n", " # Use a default argument in the lambda to bind the current value of empty_value\n", " empty_value = empty_for.get(dominant)\n", "\n", " col = col.apply(lambda x, empty_value=empty_value: empty_value if pd.isna(x) else x)\n", " csv_file[col_name] = col" ] }, { "cell_type": "code", "execution_count": null, "id": "13", "metadata": {}, "outputs": [], "source": [ "override_schemas = {\n", " \"image_id\": tlc.StringSchema(default_visible=False, writable=False),\n", " \"subject_id\": tlc.StringSchema(default_visible=False, writable=False),\n", " \"json_path\": tlc.StringSchema(default_visible=False, writable=False),\n", " \"user_date_captured\": tlc.StringSchema(default_visible=False, writable=False),\n", " \"model\": tlc.StringSchema(default_visible=False, writable=False),\n", " \"location_region\": tlc.StringSchema(default_visible=False, writable=False),\n", "}\n", "\n", "\n", "def normalize_string(s):\n", " return s.replace(\".\", \"\").replace(\":\", \";\").strip(\", \")\n", "\n", "\n", "def schema_for_column(col_name, col_value):\n", " if col_name in override_schemas:\n", " return override_schemas[col_name], col_value.apply(str)\n", "\n", " is_list = False\n", " if isinstance(col_value[0], float):\n", " return tlc.Float32Schema(default_visible=False), None\n", " elif isinstance(col_value[0], (bool, np.bool_)):\n", " return tlc.BoolSchema(default_visible=False), None\n", " elif isinstance(col_value[0], (int, np.int32, np.int64)):\n", " return tlc.Int32Schema(default_visible=False), None\n", " elif isinstance(col_value[0], (list, np.ndarray)):\n", " is_list = True\n", "\n", " # Default behaviour for strings and lists of strings is to convert to categoricals.\n", " element = col_value[0] if not is_list else col_value[0][0]\n", " if not isinstance(element, str):\n", " return None, None\n", "\n", " # 1. Build vocab\n", " vals = col_value.apply(\n", " lambda x: x\n", " if isinstance(x, (list, tuple, np.ndarray))\n", " else ([] if (x is None or (isinstance(x, float) and np.isnan(x))) else [x])\n", " )\n", "\n", " vocab = sorted({normalize_string(str(s)) for s in itertools.chain.from_iterable(vals)})\n", " str_to_id = {s: i for i, s in enumerate(vocab)}\n", " id_to_str = {i: s for s, i in str_to_id.items()}\n", "\n", " # These columns have a hex color associated with them, add to mapping\n", " if col_name in [\"apparent_skin_color\", \"natural_skin_color\"]:\n", " for k, v in id_to_str.items():\n", " hex_color = tlc.rgb_tuple_to_hex(json.loads(v[v.index(\"[\") : v.index(\"]\") + 1]))\n", " id_to_str[k] = tlc.MapElement(v, display_color=hex_color)\n", "\n", " # 2. Transform column\n", " def encode(x):\n", " if isinstance(x, (list, tuple, np.ndarray)):\n", " return [str_to_id[normalize_string(str(s))] for s in x]\n", " return str_to_id[normalize_string(str(x))]\n", "\n", " transformed = col_value.apply(encode)\n", "\n", " # 3. Build schema\n", " schema_type = tlc.CategoricalLabelListSchema if is_list else tlc.CategoricalLabelSchema\n", " return schema_type(classes=id_to_str, default_visible=False, writable=False), transformed" ] }, { "cell_type": "code", "execution_count": null, "id": "14", "metadata": {}, "outputs": [], "source": [ "def infer_schemas_and_transform_categoricals(df) -> dict[str, tlc.Schema]:\n", " column_schemas = {}\n", " for col in set(df.columns) - IGNORE_KEYS:\n", " schema, transformed = schema_for_column(col, df[col])\n", " column_schemas[col] = schema\n", " if transformed is not None:\n", " df[col] = transformed\n", " return column_schemas\n", "\n", "\n", "column_schemas = infer_schemas_and_transform_categoricals(csv_file)" ] }, { "cell_type": "markdown", "id": "15", "metadata": {}, "source": [ "## Define annotation transforms" ] }, { "cell_type": "code", "execution_count": null, "id": "16", "metadata": {}, "outputs": [], "source": [ "NUM_KEYPOINTS = 33\n", "\n", "KPTS = [\n", " \"0. Nose\",\n", " \"1. Right eye inner\",\n", " \"2. Right eye\",\n", " \"3. Right eye outer\",\n", " \"4. Left eye inner\",\n", " \"5. Left eye\",\n", " \"6. Left eye outer\",\n", " \"7. Right ear\",\n", " \"8. Left ear\",\n", " \"9. Mouth right\",\n", " \"10. Mouth left\",\n", " \"11. Right shoulder\",\n", " \"12. Left shoulder\",\n", " \"13. Right elbow\",\n", " \"14. Left elbow\",\n", " \"15. Right wrist\",\n", " \"16. Left wrist\",\n", " \"17. Right pinky knuckle\",\n", " \"18. Left pinky knuckle\",\n", " \"19. Right index knuckle\",\n", " \"20. Left index knuckle\",\n", " \"21. Right thumb knuckle\",\n", " \"22. Left thumb knuckle\",\n", " \"23. Right hip\",\n", " \"24. Left hip\",\n", " \"25. Right knee\",\n", " \"26. Left knee\",\n", " \"27. Right ankle\",\n", " \"28. Left ankle\",\n", " \"29. Right heel\",\n", " \"30. Left heel\",\n", " \"31. Right foot index\",\n", " \"32. Left foot index\",\n", "]\n", "\n", "SKELETON = [\n", " 11,\n", " 12,\n", " 11,\n", " 13,\n", " 13,\n", " 15,\n", " 12,\n", " 14,\n", " 14,\n", " 16,\n", " 12,\n", " 24,\n", " 11,\n", " 23,\n", " 23,\n", " 24,\n", " 24,\n", " 26,\n", " 26,\n", " 28,\n", " 23,\n", " 25,\n", " 25,\n", " 27,\n", " 27,\n", " 29,\n", " 29,\n", " 31,\n", " 28,\n", " 30,\n", " 30,\n", " 32,\n", "]\n", "\n", "\n", "def transform_keypoints(keypoints, image_width, image_height) -> tlc.Keypoints2DInstances:\n", " kpts = json.loads(keypoints.replace(\"'\", '\"'))\n", " kpts_arr = np.zeros((NUM_KEYPOINTS, 3), dtype=np.float32)\n", " for i, kpt_name in enumerate(KPTS):\n", " if kpt_name not in kpts:\n", " continue\n", " kpts_arr[i, :] = kpts[kpt_name]\n", " kpts_arr[i, 2] = 2\n", "\n", " instances = tlc.Keypoints2DInstances.create_empty(\n", " image_width=image_width,\n", " image_height=image_height,\n", " include_keypoint_visibilities=True,\n", " include_instance_bbs=False,\n", " )\n", "\n", " instances.add_instance(\n", " keypoints=kpts_arr,\n", " label=0,\n", " )\n", "\n", " return instances\n", "\n", "\n", "builder = _ValueMapBuilder[str]()\n", "\n", "\n", "def transform_segments(segments, image_width, image_height):\n", " segments = json.loads(segments.replace(\"'\", '\"'))\n", "\n", " polygons = []\n", " labels = []\n", "\n", " for segment in segments:\n", " class_name = segment[\"class_name\"]\n", " polygon = segment[\"polygon\"]\n", " poly_2_tuples = [[p[\"x\"], p[\"y\"]] for p in polygon]\n", " flattened_poly = [item for sublist in poly_2_tuples for item in sublist]\n", " polygons.append(flattened_poly)\n", " labels.append(builder(class_name))\n", "\n", " segs = tlc.SegmentationPolygonsDict(\n", " image_width=image_width,\n", " image_height=image_height,\n", " polygons=polygons,\n", " instance_properties={\"label\": labels},\n", " )\n", " return segs\n", "\n", "\n", "def transform_bboxes(face_bbox, person_bbox, image_width, image_height):\n", " face_bbox = json.loads(face_bbox)\n", " person_bbox = json.loads(person_bbox)\n", "\n", " bboxes = {\n", " tlc.IMAGE_WIDTH: image_width,\n", " tlc.IMAGE_HEIGHT: image_height,\n", " tlc.BOUNDING_BOX_LIST: [\n", " {\n", " tlc.X0: face_bbox[0],\n", " tlc.Y0: face_bbox[1],\n", " tlc.X1: face_bbox[2],\n", " tlc.Y1: face_bbox[3],\n", " tlc.LABEL: 0,\n", " },\n", " {\n", " tlc.X0: person_bbox[0],\n", " tlc.Y0: person_bbox[1],\n", " tlc.X1: person_bbox[2],\n", " tlc.Y1: person_bbox[3],\n", " tlc.LABEL: 1,\n", " },\n", " ],\n", " }\n", "\n", " return bboxes" ] }, { "cell_type": "markdown", "id": "17", "metadata": {}, "source": [ "## Load data" ] }, { "cell_type": "code", "execution_count": null, "id": "18", "metadata": {}, "outputs": [], "source": [ "data = defaultdict(list)\n", "\n", "for index, row in csv_file.iterrows():\n", " input_row = row.to_dict()\n", " image_path = FHIBE_ROOT / input_row[\"filepath\"]\n", "\n", " ## Extract and convert annotations to 3lc format\n", "\n", " image_height = input_row[\"image_height\"]\n", " image_width = input_row[\"image_width\"]\n", " keypoints = transform_keypoints(input_row[\"keypoints\"], image_width, image_height)\n", " segments = transform_segments(input_row[\"segments\"], image_width, image_height)\n", " bboxes = transform_bboxes(input_row[\"face_bbox\"], input_row[\"person_bbox\"], image_width, image_height)\n", " data[\"image\"].append(image_path.as_posix())\n", " data[\"keypoints\"].append(keypoints.to_row())\n", " data[\"bbs\"].append(bboxes)\n", " data[\"segments\"].append(segments)\n", "\n", " ## Extract metadata\n", " for key in set(input_row.keys()) - IGNORE_KEYS:\n", " data[key].append(input_row[key])\n", "\n", " if index > MAX_SAMPLES:\n", " break" ] }, { "cell_type": "markdown", "id": "19", "metadata": {}, "source": [ "## Write 3LC Table" ] }, { "cell_type": "code", "execution_count": null, "id": "20", "metadata": {}, "outputs": [], "source": [ "table_writer = tlc.TableWriter(\n", " table_name=TABLE_NAME,\n", " dataset_name=DATASET_NAME,\n", " project_name=PROJECT_NAME,\n", " column_schemas={\n", " \"image\": tlc.ImageUrlSchema(),\n", " \"keypoints\": tlc.Keypoints2DSchema(\n", " classes=[\"person\"],\n", " num_keypoints=NUM_KEYPOINTS,\n", " lines=SKELETON,\n", " point_attributes=list(map(lambda x: x.split(\". \")[1], KPTS)),\n", " include_per_point_visibility=True,\n", " ),\n", " \"bbs\": tlc.BoundingBoxListSchema(\n", " label_value_map={0: tlc.MapElement(\"face\"), 1: tlc.MapElement(\"person\")},\n", " include_segmentation=False,\n", " x1_number_role=tlc.NUMBER_ROLE_BB_SIZE_X,\n", " y1_number_role=tlc.NUMBER_ROLE_BB_SIZE_Y,\n", " ),\n", " \"segments\": tlc.SegmentationSchema(\n", " label_value_map={i: tlc.MapElement(v.split(\". \")[1]) for i, v in enumerate(builder._values)},\n", " ),\n", " **column_schemas,\n", " },\n", ")\n", "table_writer.add_batch(data)\n", "table = table_writer.finalize()" ] }, { "cell_type": "code", "execution_count": null, "id": "21", "metadata": {}, "outputs": [], "source": [ "table.table_rows[0]" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" } }, "nbformat": 4, "nbformat_minor": 5 }