import marimo __generated_with = "0.23.8" app = marimo.App() @app.cell def _(): import marimo as mo import torch # Use the best available device: CUDA (NVIDIA) > MPS (Apple Silicon) > CPU. # Pass this to Detectorv2(device=...) so the tutorial uses your GPU when present. device = ( "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" ) return device, mo @app.cell(hide_code=True) def _(mo): mo.md(r""" # 2. Detecting facial expressions from videos In this tutorial we'll use **`Detectorv2`** — Py-Feat's single multi-task model — to process a video file, first one frame at a time and then in batches to speed things up on a GPU. """) return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## 2.1 Setting up the detector We create a `Detectorv2` instance just like in the previous tutorial. One network predicts Action Units, emotions, valence/arousal, gaze, head pose, a 478-point 3D FaceMesh, and blendshapes in a single forward pass. """) return @app.cell def _(device): from feat import Detectorv2 detector = Detectorv2(device=device) # device selected above (cuda/mps/cpu) return (detector,) @app.cell(hide_code=True) def _(mo): mo.md(r""" ## 2.2 Processing a video Detecting facial expressions in a video uses the same `.detect()` method with `data_type="video"`. This sample video included in Py-Feat is by [Wolfgang Langer](https://www.pexels.com/@wolfgang-langer-1415383?utm_content=attributionCopyText&utm_medium=referral&utm_source=pexels) from [Pexels](https://www.pexels.com/video/a-woman-exhibits-different-emotions-through-facial-expressions-3063838/). """) return @app.cell def _(): from feat.utils.io import get_test_data_path import os test_data_dir = get_test_data_path() test_video_path = os.path.join(test_data_dir, "WolfgangLanger_Pexels.mp4") # (The input video is processed below; an inline preview is omitted in the # static docs. Download the notebook or open it in molab to view it.) test_video_path return (test_video_path,) @app.cell(hide_code=True) def _(mo): mo.md(r""" We pass `skip_frames=24` to process only every 24th frame for speed, and `face_detection_threshold=0.95` to be conservative about what counts as a face — we know this clip is a continuous front-on shot of one person, so raising it from the default `0.5` avoids spurious extra detections. By default `.detect()` processes **one frame at a time** (`batch_size=1`): """) return @app.cell def _(detector, test_video_path): # Without batching: one frame at a time (batch_size=1, the default). video_prediction = detector.detect( test_video_path, data_type="video", skip_frames=24, face_detection_threshold=0.95, ) video_prediction.head() return (video_prediction,) @app.cell(hide_code=True) def _(mo): mo.md(r""" Our 20-second clip recorded at 24 fps yields 20 predictions because of `skip_frames=24`: """) return @app.cell def _(video_prediction): video_prediction.shape return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## 2.3 Speeding things up with batching Passing `batch_size > 1` runs several frames through the network in a single forward pass instead of one at a time. This is **much faster on a GPU** (CUDA or MPS) and is the recommended way to process video. On CUDA you can squeeze out a bit more by also passing `pin_memory=True`, which page-locks host memory for faster CPU→GPU transfers. The predictions are identical — only throughput changes: """) return @app.cell def _(detector, test_video_path): # With batching: 8 frames per forward pass — much faster on a GPU. # On CUDA, pin_memory=True further speeds host->device transfers. video_prediction_batched = detector.detect( test_video_path, data_type="video", batch_size=8, skip_frames=24, face_detection_threshold=0.95, ) video_prediction_batched.shape return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## 2.4 Visualizing predictions You can plot detection results from a video. The frames aren't extracted from the video (that would produce thousands of images), so the visualization shows the detected face geometry without the underlying image. The clip runs at 24 fps; the actress shows sadness around 0:02 and happiness around 0:14. """) return @app.cell def _(mo, video_prediction): # Frame 48 ~ 0:02 (sadness), Frame 408 ~ 0:14 (happiness) _figs = video_prediction.query("frame in [48, 408]").plot_detections( faceboxes=False, add_titles=False ) mo.vstack(_figs) return @app.cell(hide_code=True) def _(mo): mo.md(r""" We can also use pandas plotting to show how emotions unfold over time — the shift from sadness to happiness is clearly visible: """) return @app.cell def _(video_prediction): _ax = video_prediction.emotions.plot() _ax.figure return if __name__ == "__main__": app.run()