import marimo __generated_with = "0.23.8" app = marimo.App() @app.cell def _(): import marimo as mo import torch # Use the best available device: CUDA (NVIDIA) > MPS (Apple Silicon) > CPU. # Pass this to Detectorv1(device=...) so the tutorial uses your GPU when present. device = ( "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" ) return device, mo @app.cell(hide_code=True) def _(mo): mo.md(r""" # 1. Detecting facial expressions from images In this tutorial we'll use **`Detectorv2`** — Py-Feat's single multi-task model — to detect faces, landmarks, action units, emotions, valence/arousal, gaze, and more from images, and to visualize the results. At the end we'll cover the modular `Detectorv1` for when you want to swap or disable individual models. """) return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## 1.1 Setting up a detector The recommended way to extract facial features in Py-Feat 0.7+ is **`Detectorv2`** — a single **multi-task neural network** that, in one forward pass, predicts Action Units, emotions, **valence/arousal**, **gaze**, head pose, 68-point landmarks, and a **478-point 3D MediaPipe FaceMesh**. It's fast (especially on single frames) and is what the rest of this tutorial uses. Passing `identity_model="arcface"` also adds a face-identity embedding. The first time you initialize a detector, Py-Feat downloads the required pretrained weights from [our HuggingFace Repository](https://huggingface.co/py-feat) and caches them to disk; subsequent runs reuse the cached weights. You can find a list of default models [on this page](../pages/models.md). For the older modular detector, see the [Using the modular `Detectorv1`](#using-the-modular-detectorv1) section at the end of this tutorial. """) return @app.cell def _(device): from feat import Detectorv2 # One multi-task model: AUs, emotions, valence/arousal, gaze, head pose, # 68-pt landmarks, and a 478-pt 3D FaceMesh. identity_model="arcface" adds a # face-identity embedding. device was selected above (cuda/mps/cpu). detector_v2 = Detectorv2(device=device, identity_model="arcface") return (detector_v2,) @app.cell(hide_code=True) def _(mo): mo.md(r""" ## 1.2 Processing a single image """) return @app.cell(hide_code=True) def _(mo): mo.md(r""" Let's process a single image with a single face. Py-feat includes a demo image for this purpose called `single_face.jpg` so lets use that. You can also use the convenient `imshow` function which will automatically load an image into a numpy array if provided a path unlike matplotlib: """) return @app.cell def _(): from feat.utils.io import get_test_data_path from feat.plotting import imshow import os # Helper to point to the test data folder test_data_dir = get_test_data_path() # Get the full path single_face_img_path = os.path.join(test_data_dir, "single_face.jpg") # Plot it imshow(single_face_img_path) return os, single_face_img_path, test_data_dir @app.cell(hide_code=True) def _(mo): mo.md(r""" Now we use our initialized `detector` instance to make predictions with the `.detect()` method, passing `data_type="image"`. This is the main workhorse method that will perform face, landmark, au, and emotion detection using the loaded models. It always returns a `Fex` data instance: """) return @app.cell def _(detector_v2, single_face_img_path): single_face_prediction = detector_v2.detect(single_face_img_path, data_type="image") type(single_face_prediction) # instance of a Fex class # Show results single_face_prediction return (single_face_prediction,) @app.cell(hide_code=True) def _(mo): mo.md(r""" ## 1.3 Working with `Fex` outputs """) return @app.cell(hide_code=True) def _(mo): mo.md(r""" The output of any detection always returns a `Fex` data class instance. This class is a lightweight wrapper around a pandas dataframe that contains columns with values for detection type. So you can use any pandas methods you're already familiar with: """) return @app.cell def _(single_face_prediction): # We always return a dataframe even if there's just a single row, # i.e. no Series single_face_prediction.head() return @app.cell(hide_code=True) def _(mo): mo.md(r""" `Fex` provides convenient attributes to access specific groups of columns so you don't have to write a bunch of pandas code to get the data you need: """) return @app.cell def _(single_face_prediction): single_face_prediction.faceboxes return @app.cell def _(single_face_prediction): single_face_prediction.aus return @app.cell def _(single_face_prediction): single_face_prediction.emotions return @app.cell(hide_code=True) def _(mo): mo.md(r""" `Detectorv2` also predicts continuous **valence** (unpleasant → pleasant) and **arousal** (calm → excited) — the two affective dimensions the modular v1 `Detectorv1` does not produce. They're plain `Fex` columns: """) return @app.cell def _(single_face_prediction): single_face_prediction[["valence", "arousal"]] return @app.cell def _(single_face_prediction): single_face_prediction.poses return @app.cell def _(single_face_prediction): single_face_prediction.identities return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## 1.4 Saving and Loading detections from a file Since a `Fex` object is just a sub-classed `DataFrames` we can use the `.to_csv` method to save our detections toa file: """) return @app.cell def _(single_face_prediction): single_face_prediction.to_csv("output.csv", index=False) return @app.cell(hide_code=True) def _(mo): mo.md(r""" To create a new `Fex` instance from a csv file use our custom `read_feat()` function instead pf `pd.read_csv`: """) return @app.cell def _(): from feat.utils.io import read_feat input_prediction = read_feat("output.csv") # We can quick access features like before input_prediction.aus return @app.cell(hide_code=True) def _(mo): mo.md(r""" ### Real-time saving during detection (low-memory mode) You can also write `Fex` outputs to a file during detection by passing a `save` argument to `detect`. This will save the `Fex` output to a csv file every time a face is detected. This can be useful when processing multiple images or videos (as we'll see later). """) return @app.cell def _(detector_v2, single_face_img_path): fex = detector_v2.detect(inputs=single_face_img_path, data_type="image", save='detections.csv') fex.head() return @app.cell(hide_code=True) def _(mo): mo.md(r""" We can use our terminal to see that `detections.csv` exists and contains the same content as `fex` """) return @app.cell def _(): import subprocess subprocess.run('head detections.csv', shell=True) return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## 1.5 Visualizing detection results. `Fex` objects have a method called `.plot_detections()` to generate a summary figure of detected faces, action units and emotions. It always returns a list of matplotlib figures: """) return @app.cell def _(single_face_prediction): _figs = single_face_prediction.plot_detections(poses=True) _figs[0] return @app.cell(hide_code=True) def _(mo): mo.md(r""" ### Overlaying gaze direction `plot_detections(gazes=True)` overlays a yellow arrow on each detected face showing where it's looking. The arrow direction comes from `gaze_pitch` / `gaze_yaw` columns produced by whichever gaze model is active — in v0.7+ the default is **L2CS** (Abdelrahman et al. 2022, a ResNet50 trained on Gaze360 + MPIIGaze). Angles are in radians, head-centric: positive pitch = looking up, positive yaw = subject's gaze drifts toward the viewer's right. """) return @app.cell def _(single_face_prediction): # fex.gaze_columns lists which columns hold the gaze model's output; # for L2CS that's gaze_pitch and gaze_yaw (radians). print('gaze columns:', single_face_prediction.gaze_columns) print(single_face_prediction[['gaze_pitch', 'gaze_yaw']]) _figs = single_face_prediction.plot_detections(faces='landmarks', gazes=True, muscles=False) _figs[0] return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## 1.6 Detecting multiple faces from a single image A `Detectorv1` will automatically find multiple faces in a single image and will create 1 row per detected face in the `Fex` object it outputs. Notice how `image_prediction` is now a `Fex` instance with 5 rows, one for each detected face. We can confirm this by plotting our detection results like before: """) return @app.cell def _(detector_v2, os, test_data_dir): multi_face_image_path = os.path.join(test_data_dir, "multi_face.jpg") multi_face_prediction = detector_v2.detect(multi_face_image_path, data_type="image") # Show results multi_face_prediction return multi_face_image_path, multi_face_prediction @app.cell def _(multi_face_prediction): _figs = multi_face_prediction.plot_detections(add_titles=False) _figs[0] return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## 1.7 Working with multiple images `Detectorv1` is also flexible enough to process multiple image files if `.detect()` is passed a list of images. By default images will be processed serially, but you can set `batch_size > 1` to process multiple images in a *batch* and speed up processing. **NOTE: All images in a batch must have the same dimensions for batch processing.** This is because behind the scenes, `Detectorv1` is assembling a *tensor* by stacking images together. You can ask `Detectorv1` to rescale images by padding and preserving proportions using the `output_size` in conjunction with `batch_size`. For example, the following would process a list of images in batches of 5 images at a time resizing each so one axis is 512: `detector_v2.detect(img_list, batch_size=5, output_size=512) # without output_size this would raise an error if image sizes differ!` In the example below we keep things simple, by process both our single and multi-face example serislly by setting `batch_size = 1`. Notice how the returned Fex data class instance has 6 rows: 1 for the first face in the first image, and 5 for the faces in the second image: """) return @app.cell(hide_code=True) def _(mo): mo.md(r""" **NOTE: Currently batch processing images gives slightly different AU detection results due to the way that py-feat integrates the underlying models. You can examine the degree of tolerance by checking out the results of `test_detection_and_batching_with_diff_img_sizes` in our test-suite** """) return @app.cell def _(detector_v2, multi_face_image_path, single_face_img_path): img_list = [single_face_img_path, multi_face_image_path] mixed_prediction = detector_v2.detect(img_list, batch_size=1, data_type="image") mixed_prediction return (mixed_prediction,) @app.cell(hide_code=True) def _(mo): mo.md(r""" Calling `.plot_detections()` will now plot detections for all images the detector was passed: """) return @app.cell def _(mixed_prediction): _figs = mixed_prediction.plot_detections() _figs[0] return @app.cell(hide_code=True) def _(mo): mo.md(r""" However, it's easy to use pandas slicing syntax to just grab predictions for the image you want. For example you can use `.loc` and chain it to `.plot_detections()`: """) return @app.cell def _(mixed_prediction): # Just plot the detection corresponding to the first row in the Fex data _figs = mixed_prediction.loc[0].plot_detections() _figs[0] return @app.cell(hide_code=True) def _(mo): mo.md(r""" Likewise you can use `.query()` and chain it to `.plot_detections()`. `Fex` data classes store each file path in the `'input'` column. So we can use regular pandas methods like `.unique()` to get all the unique images (2 in our case) and pick the second one. """) return @app.cell def _(mixed_prediction): # Choose plot based on image file name img_name = mixed_prediction["input"].unique()[1] axes = mixed_prediction.query("input == @img_name").plot_detections() axes[0] return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## Using the modular `Detectorv1` Before `Detectorv2`, Py-Feat used **`Detectorv1`** — a *modular* pipeline that glues together a **separate pre-trained model per sub-task** (face, landmarks, Action Units, emotion, head pose, identity). Reach for it when you want to **swap or disable a specific model** (e.g. `Detectorv1(emotion_model='svm')`) or need the classic modular behavior. It exposes the **same `.detect()` API** and returns the same kind of `Fex` object, so everything above works with either detector. `Detectorv2` is the recommended default for new work; see the [two-detector overview](/#two-detectors-detectorv1-and-detectorv2) for a full comparison. """) return @app.cell def _(device): from feat import Detectorv1 # The modular Detectorv1. Swap individual models via kwargs, e.g. # Detectorv1(emotion_model='svm'). device was selected above (cuda/mps/cpu). detector = Detectorv1(device=device) return (detector,) @app.cell(hide_code=True) def _(mo): mo.md(r""" ### AU-projection visualization By default `.plot_detections()` will overlay facial lines on top of the input image. However, it's also possible to visualize a face using Py-Feat's standardized AU landmark model, which takes the detected AUs and projects them onto a template face. You can control this by setting `faces='aus'` instead of the default `faces='landmarks'`. For more details about this kind of visualization see the [visualizing facial expressions](Plotting.md) tutorial: """) return @app.cell def _(detector, single_face_img_path): # AU-projection visualization (faces='aus') uses Detectorv1's named xgb # AU model and its trained landmark viz model; Detectorv2's AUs have no # projection model, so we use the legacy detector here. See tutorial 03 for # more on AU visualization. _v1_fex = detector.detect(single_face_img_path, data_type="image") _figs = _v1_fex.plot_detections(faces='aus', muscles=True) _figs[0] return @app.cell(hide_code=True) def _(mo): mo.md(r""" ### Interactive Plotting You can also use the `.iplot_detections()` method to generate an interactive plotly figure that lets you interactively enable/disable various detector outputs: """) return @app.cell def _(detector, single_face_img_path): # Interactive plotting uses the v1 detector here: Detectorv2's emotion # columns (Neutral/Happy/...) aren't yet wired into iplot_detections. _v1_fex = detector.detect(single_face_img_path, data_type="image") _v1_fex.iplot_detections(bounding_boxes=True, emotions=True) return if __name__ == "__main__": app.run()