#include #include // Includes common necessary includes for development using depthai library #include "depthai/depthai.hpp" static const std::vector labelMap = {"background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; static std::atomic syncNN{true}; int main(int argc, char** argv) { using namespace std; using namespace std::chrono; std::string nnPath(BLOB_PATH); // If path to blob specified, use that if(argc > 1) { nnPath = std::string(argv[1]); } // Print which blob we are using printf("Using blob at path: %s\n", nnPath.c_str()); // Create pipeline dai::Pipeline pipeline; // Define sources and outputs auto camRgb = pipeline.create(); auto spatialDetectionNetwork = pipeline.create(); auto monoLeft = pipeline.create(); auto monoRight = pipeline.create(); auto stereo = pipeline.create(); auto xoutRgb = pipeline.create(); auto xoutNN = pipeline.create(); auto xoutDepth = pipeline.create(); xoutRgb->setStreamName("rgb"); xoutNN->setStreamName("detections"); xoutDepth->setStreamName("depth"); // Properties camRgb->setPreviewSize(300, 300); camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P); camRgb->setInterleaved(false); camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR); monoLeft->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P); monoLeft->setCamera("left"); monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P); monoRight->setCamera("right"); // Setting node configs stereo->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY); // Align depth map to the perspective of RGB camera, on which inference is done stereo->setDepthAlign(dai::CameraBoardSocket::CAM_A); stereo->setOutputSize(monoLeft->getResolutionWidth(), monoLeft->getResolutionHeight()); spatialDetectionNetwork->setBlobPath(nnPath); spatialDetectionNetwork->setConfidenceThreshold(0.5f); spatialDetectionNetwork->input.setBlocking(false); spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5); spatialDetectionNetwork->setDepthLowerThreshold(100); spatialDetectionNetwork->setDepthUpperThreshold(5000); // Linking monoLeft->out.link(stereo->left); monoRight->out.link(stereo->right); camRgb->preview.link(spatialDetectionNetwork->input); if(syncNN) { spatialDetectionNetwork->passthrough.link(xoutRgb->input); } else { camRgb->preview.link(xoutRgb->input); } spatialDetectionNetwork->out.link(xoutNN->input); stereo->depth.link(spatialDetectionNetwork->inputDepth); spatialDetectionNetwork->passthroughDepth.link(xoutDepth->input); // Connect to device and start pipeline dai::Device device(pipeline); auto previewQueue = device.getOutputQueue("rgb", 4, false); auto detectionNNQueue = device.getOutputQueue("detections", 4, false); auto depthQueue = device.getOutputQueue("depth", 4, false); auto startTime = steady_clock::now(); int counter = 0; float fps = 0; auto color = cv::Scalar(255, 255, 255); while(true) { auto inPreview = previewQueue->get(); auto inDet = detectionNNQueue->get(); auto depth = depthQueue->get(); counter++; auto currentTime = steady_clock::now(); auto elapsed = duration_cast>(currentTime - startTime); if(elapsed > seconds(1)) { fps = counter / elapsed.count(); counter = 0; startTime = currentTime; } cv::Mat frame = inPreview->getCvFrame(); cv::Mat depthFrame = depth->getFrame(); // depthFrame values are in millimeters cv::Mat depthFrameColor; cv::normalize(depthFrame, depthFrameColor, 255, 0, cv::NORM_INF, CV_8UC1); cv::equalizeHist(depthFrameColor, depthFrameColor); cv::applyColorMap(depthFrameColor, depthFrameColor, cv::COLORMAP_HOT); auto detections = inDet->detections; for(const auto& detection : detections) { auto roiData = detection.boundingBoxMapping; auto roi = roiData.roi; roi = roi.denormalize(depthFrameColor.cols, depthFrameColor.rows); auto topLeft = roi.topLeft(); auto bottomRight = roi.bottomRight(); auto xmin = (int)topLeft.x; auto ymin = (int)topLeft.y; auto xmax = (int)bottomRight.x; auto ymax = (int)bottomRight.y; cv::rectangle(depthFrameColor, cv::Rect(cv::Point(xmin, ymin), cv::Point(xmax, ymax)), color, cv::FONT_HERSHEY_SIMPLEX); int x1 = detection.xmin * frame.cols; int y1 = detection.ymin * frame.rows; int x2 = detection.xmax * frame.cols; int y2 = detection.ymax * frame.rows; uint32_t labelIndex = detection.label; std::string labelStr = to_string(labelIndex); if(labelIndex < labelMap.size()) { labelStr = labelMap[labelIndex]; } cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); std::stringstream confStr; confStr << std::fixed << std::setprecision(2) << detection.confidence * 100; cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); std::stringstream depthX; depthX << "X: " << (int)detection.spatialCoordinates.x << " mm"; cv::putText(frame, depthX.str(), cv::Point(x1 + 10, y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); std::stringstream depthY; depthY << "Y: " << (int)detection.spatialCoordinates.y << " mm"; cv::putText(frame, depthY.str(), cv::Point(x1 + 10, y1 + 65), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); std::stringstream depthZ; depthZ << "Z: " << (int)detection.spatialCoordinates.z << " mm"; cv::putText(frame, depthZ.str(), cv::Point(x1 + 10, y1 + 80), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX); } std::stringstream fpsStr; fpsStr << "NN fps: " << std::fixed << std::setprecision(2) << fps; cv::putText(frame, fpsStr.str(), cv::Point(2, inPreview->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color); cv::imshow("depth", depthFrameColor); cv::imshow("preview", frame); int key = cv::waitKey(1); if(key == 'q' || key == 'Q') { return 0; } } return 0; }