#!/usr/bin/env python3 import cv2 import depthai as dai import time # This example shows how to crop out the biggest person from the maximum resolution of the camera and setting the autofocus and autoexposure region to the same ROI. def getBiggestPerson(imgDetections: dai.ImgDetections): biggestDetection = None biggestDetectionSize = 0 for detection in imgDetections.detections: if detection.label == 0: # Person size = (detection.xmax - detection.xmin) * (detection.ymax - detection.ymin) if size > biggestDetectionSize: biggestDetection = detection biggestDetectionSize = size return biggestDetection def displayFrame(name: str, frame: dai.ImgFrame, imgDetections: dai.ImgDetections, labelMap: dict): color = (0, 255, 0) assert imgDetections.getTransformation() is not None cvFrame = frame.getFrame() if frame.getType() == dai.ImgFrame.Type.RAW16 else frame.getCvFrame() for detection in imgDetections.detections: # Get the shape of the frame from which the detections originated for denormalization normShape = imgDetections.getTransformation().getSize() # Create rotated rectangle to remap rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0) # Remap the detection rectangle to target frame remapped = imgDetections.getTransformation().remapRectTo(frame.getTransformation(), rotRect) # Remapped rectangle could be rotated, so we get the bounding box bbox = [int(l) for l in remapped.getOuterRect()] cv2.putText( cvFrame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255, ) cv2.putText( cvFrame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255, ) cv2.rectangle(cvFrame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) # Show the frame cv2.imshow(name, cvFrame) def transformDetectionToSource(imgDetections: dai.ImgDetections, detection: dai.ImgDetection): normShape = imgDetections.getTransformation().getSize() sourceSize = imgDetections.getTransformation().getSourceSize() width, height = sourceSize rotRect = dai.RotatedRect(dai.Rect(dai.Point2f(detection.xmin, detection.ymin), dai.Point2f(detection.xmax, detection.ymax)).denormalize(normShape[0], normShape[1]), 0) rotRect = imgDetections.getTransformation().invTransformRect(rotRect) outerRect = rotRect.getOuterRect() firstPoint = dai.Point2f(max(0, min(outerRect[0], width)), max(0, min(outerRect[1], height))) secondPoint = dai.Point2f(max(0, min(outerRect[2], width)), max(0, min(outerRect[3], height))) return dai.Rect(firstPoint, secondPoint) device = dai.Device() modelPath = dai.getModelFromZoo(dai.NNModelDescription("yolov6-nano", platform=device.getPlatformAsString())) modelArchive = dai.NNArchive(modelPath) inputSize = modelArchive.getInputSize() type = modelArchive.getConfig().model.inputs[0].preprocessing.daiType if type: try: frameType = dai.ImgFrame.Type.__getattribute__(type) except AttributeError: type = None if not type: if device.getPlatform() == dai.Platform.RVC2: frameType = dai.ImgFrame.Type.BGR888p else: frameType = dai.ImgFrame.Type.BGR888i # Create pipeline with dai.Pipeline(device) as pipeline: # Define source and output cam = pipeline.create(dai.node.Camera).build() cameraControlQueue = cam.inputControl.createInputQueue() fullResStream = cam.requestFullResolutionOutput(useHighestResolution=True) imageManip = pipeline.create(dai.node.ImageManip) imageManip.initialConfig.setOutputSize(inputSize[0], inputSize[1]) imageManip.initialConfig.setFrameType(frameType) fullResStream.link(imageManip.inputImage) detectionNetwork = pipeline.create(dai.node.DetectionNetwork).build(imageManip.out, modelArchive) labelMap = detectionNetwork.getClasses() imageManipCropOut = pipeline.create(dai.node.ImageManip) imageManipCropOut.setMaxOutputFrameSize(round(1000*1000*1.5)+300) imageManipCropOut.initialConfig.setOutputSize(800, 800) imageManipCropOut.inputImage.setBlocking(False) imageManipCropOut.inputImage.setMaxSize(1) imageManipConfigQueue = imageManipCropOut.inputConfig.createInputQueue() imageManipCropOutQueue = imageManipCropOut.out.createOutputQueue() fullResStream.link(imageManipCropOut.inputImage) videoQueue = detectionNetwork.passthrough.createOutputQueue() detectionQueue = detectionNetwork.out.createOutputQueue() # Connect to device and start pipeline pipeline.start() lastTimeToAutoFocus = time.time() while pipeline.isRunning(): videoIn = videoQueue.get() detections = detectionQueue.get() biggestDetection = getBiggestPerson(detections) if biggestDetection: sourceRect = transformDetectionToSource(detections, biggestDetection) configQueue = dai.ImageManipConfig() configQueue.addCrop(sourceRect, False) configQueue.setOutputSize(800, 800, dai.ImageManipConfig.ResizeMode.LETTERBOX) imageManipConfigQueue.send(configQueue) configCamera = dai.CameraControl() configCamera.setAutoExposureRegion(int(sourceRect.x), int(sourceRect.y), int(sourceRect.width), int(sourceRect.height)) if(time.time() - lastTimeToAutoFocus > 5): lastTimeToAutoFocus = time.time() configCamera.setAutoFocusRegion(int(sourceRect.x), int(sourceRect.y), int(sourceRect.width), int(sourceRect.height)) cameraControlQueue.send(configCamera) imageManipCropOutFrame = imageManipCropOutQueue.tryGet() if imageManipCropOutFrame is not None: assert isinstance(imageManipCropOutFrame, dai.ImgFrame) cv2.imshow("Cropped out frame", imageManipCropOutFrame.getCvFrame()) assert isinstance(videoIn, dai.ImgFrame) assert isinstance(detections, dai.ImgDetections) displayFrame("Full view video", videoIn, detections, labelMap) key = cv2.waitKey(1) if key == ord("q"): break