#include #include #include #include #include #include "depthai/depthai.hpp" constexpr float NEURAL_FPS = 8.0f; constexpr float STEREO_DEFAULT_FPS = 20.0f; // Custom host node for spatial visualization class SpatialVisualizer : public dai::NodeCRTP { public: Input& depthInput = inputs["depth"]; Input& detectionsInput = inputs["detections"]; Input& rgbInput = inputs["rgb"]; std::vector labelMap; std::shared_ptr build(Output& depth, Output& detections, Output& rgb) { depth.link(depthInput); detections.link(detectionsInput); rgb.link(rgbInput); sendProcessingToPipeline(true); return std::static_pointer_cast(this->shared_from_this()); } std::shared_ptr processGroup(std::shared_ptr in) override { auto depthFrame = in->get("depth"); auto detections = in->get("detections"); auto rgbFrame = in->get("rgb"); cv::Mat depthCv = depthFrame->getCvFrame(); cv::Mat rgbCv = rgbFrame->getCvFrame(); cv::Mat depthFrameColor = processDepthFrame(depthCv); displayResults(rgbCv, depthFrameColor, detections->detections); return nullptr; } private: cv::Mat processDepthFrame(const cv::Mat& depthFrame) { // Downscale depth frame cv::Mat depthDownscaled; cv::resize(depthFrame, depthDownscaled, cv::Size(), 0.25, 0.25); // Find min and max depth values double minDepth = 0, maxDepth = 0; cv::Mat mask = (depthDownscaled != 0); if(cv::countNonZero(mask) > 0) { cv::minMaxLoc(depthDownscaled, &minDepth, &maxDepth, nullptr, nullptr, mask); } // Normalize depth frame cv::Mat depthFrameColor; depthFrame.convertTo(depthFrameColor, CV_8UC1, 255.0 / (maxDepth - minDepth), -minDepth * 255.0 / (maxDepth - minDepth)); // Apply color map cv::Mat colorized; cv::applyColorMap(depthFrameColor, colorized, cv::COLORMAP_HOT); return colorized; } void displayResults(cv::Mat& rgbFrame, cv::Mat& depthFrameColor, const std::vector& detections) { int height = rgbFrame.rows; int width = rgbFrame.cols; for(const auto& detection : detections) { drawBoundingBoxes(depthFrameColor, detection); drawDetections(rgbFrame, detection, width, height); } cv::imshow("depth", depthFrameColor); cv::imshow("rgb", rgbFrame); if(cv::waitKey(1) == 'q') { stopPipeline(); } } void drawBoundingBoxes(cv::Mat& depthFrameColor, const dai::SpatialImgDetection& detection) { auto roi = detection.boundingBoxMapping.roi; roi = roi.denormalize(depthFrameColor.cols, depthFrameColor.rows); auto topLeft = roi.topLeft(); auto bottomRight = roi.bottomRight(); cv::rectangle(depthFrameColor, cv::Point(static_cast(topLeft.x), static_cast(topLeft.y)), cv::Point(static_cast(bottomRight.x), static_cast(bottomRight.y)), cv::Scalar(255, 255, 255), 1); } void drawDetections(cv::Mat& frame, const dai::SpatialImgDetection& detection, int frameWidth, int frameHeight) { int x1 = static_cast(detection.xmin * frameWidth); int x2 = static_cast(detection.xmax * frameWidth); int y1 = static_cast(detection.ymin * frameHeight); int y2 = static_cast(detection.ymax * frameHeight); std::string label; try { label = labelMap[detection.label]; } catch(...) { label = std::to_string(detection.label); } cv::Scalar color(255, 255, 255); cv::putText(frame, label, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color); cv::putText(frame, std::to_string(detection.confidence * 100), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, color); cv::putText(frame, "X: " + std::to_string(static_cast(detection.spatialCoordinates.x)) + " mm", cv::Point(x1 + 10, y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, color); cv::putText(frame, "Y: " + std::to_string(static_cast(detection.spatialCoordinates.y)) + " mm", cv::Point(x1 + 10, y1 + 65), cv::FONT_HERSHEY_TRIPLEX, 0.5, color); cv::putText(frame, "Z: " + std::to_string(static_cast(detection.spatialCoordinates.z)) + " mm", cv::Point(x1 + 10, y1 + 80), cv::FONT_HERSHEY_TRIPLEX, 0.5, color); cv::rectangle(frame, cv::Point(x1, y1), cv::Point(x2, y2), color, 1); } }; int main(int argc, char** argv) { // Initialize argument parser argparse::ArgumentParser program("spatial_detection", "1.0.0"); program.add_description("Spatial detection network example with configurable depth source"); program.add_argument("--depthSource").default_value(std::string("stereo")).help("Depth source: stereo, neural, tof"); // Parse arguments try { program.parse_args(argc, argv); } catch(const std::runtime_error& err) { std::cerr << err.what() << '\n'; std::cerr << program; return EXIT_FAILURE; } // Get arguments std::string depthSourceArg = program.get("--depthSource"); // Validate depth source argument if(depthSourceArg != "stereo" && depthSourceArg != "neural" && depthSourceArg != "tof") { std::cerr << "Invalid depth source: " << depthSourceArg << '\n'; std::cerr << "Valid options are: stereo, neural, tof" << '\n'; return EXIT_FAILURE; } try { float fps = STEREO_DEFAULT_FPS; if(depthSourceArg == "neural") { fps = NEURAL_FPS; } // Create pipeline dai::Pipeline pipeline; const std::pair size = {640, 400}; // Define sources and outputs auto camRgb = pipeline.create(); camRgb->build(dai::CameraBoardSocket::CAM_A, std::nullopt, fps); auto platform = pipeline.getDefaultDevice()->getPlatform(); // Create depth source based on argument dai::node::DepthSource depthSource; if(depthSourceArg == "stereo") { auto monoLeft = pipeline.create(); auto monoRight = pipeline.create(); auto stereo = pipeline.create(); monoLeft->build(dai::CameraBoardSocket::CAM_B, std::nullopt, fps); monoRight->build(dai::CameraBoardSocket::CAM_C, std::nullopt, fps); stereo->setExtendedDisparity(true); monoLeft->requestOutput(size, std::nullopt, dai::ImgResizeMode::CROP)->link(stereo->left); monoRight->requestOutput(size, std::nullopt, dai::ImgResizeMode::CROP)->link(stereo->right); depthSource = stereo; } else if(depthSourceArg == "neural") { auto monoLeft = pipeline.create(); auto monoRight = pipeline.create(); monoLeft->build(dai::CameraBoardSocket::CAM_B, std::nullopt, fps); monoRight->build(dai::CameraBoardSocket::CAM_C, std::nullopt, fps); auto neuralDepth = pipeline.create(); neuralDepth->build(*monoLeft->requestFullResolutionOutput(), *monoRight->requestFullResolutionOutput(), dai::DeviceModelZoo::NEURAL_DEPTH_LARGE); depthSource = neuralDepth; } else if(depthSourceArg == "tof") { auto tof = pipeline.create(); depthSource = tof; } // Create spatial detection network using the unified build method with DepthSource variant auto spatialDetectionNetwork = pipeline.create(); auto visualizer = pipeline.create(); // Configure spatial detection network spatialDetectionNetwork->input.setBlocking(false); spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5f); spatialDetectionNetwork->setDepthLowerThreshold(100); spatialDetectionNetwork->setDepthUpperThreshold(5000); // Set up model and build with DepthSource variant dai::NNModelDescription modelDesc; // For better results on OAK4, use a segmentation model like "luxonis/yolov8-instance-segmentation-large:coco-640x480" // for depth estimation over the objects mask instead of the full bounding box. modelDesc.model = "yolov6-nano"; spatialDetectionNetwork->build(camRgb, depthSource, modelDesc); // Set label map visualizer->labelMap = spatialDetectionNetwork->getClasses().value(); spatialDetectionNetwork->spatialLocationCalculator->initialConfig->setSegmentationPassthrough(false); // Linking visualizer->build(spatialDetectionNetwork->passthroughDepth, spatialDetectionNetwork->out, spatialDetectionNetwork->passthrough); std::cout << "Pipeline starting with depth source: " << depthSourceArg << '\n'; // Start pipeline pipeline.run(); } catch(const std::exception& e) { std::cerr << "Error: " << e.what() << '\n'; return EXIT_FAILURE; } return EXIT_SUCCESS; }