#include #include // Includes common necessary includes for development using depthai library #include "depthai/depthai.hpp" /* The code is the same as for Tiny-yolo-V3, the only difference is the blob file. The blob was compiled following this tutorial: https://github.com/TNTWEN/OpenVINO-YOLOV4 */ static const std::vector labelMap = { "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; static std::atomic syncNN{true}; int main(int argc, char** argv) { using namespace std; using namespace std::chrono; std::string nnPath(BLOB_PATH); // If path to blob specified, use that if(argc > 1) { nnPath = std::string(argv[1]); } // Print which blob we are using printf("Using blob at path: %s\n", nnPath.c_str()); // Create pipeline dai::Pipeline pipeline; // Define sources and outputs auto camRgb = pipeline.create(); auto detectionNetwork = pipeline.create(); auto xoutRgb = pipeline.create(); auto nnOut = pipeline.create(); xoutRgb->setStreamName("rgb"); nnOut->setStreamName("detections"); // Properties camRgb->setPreviewSize(416, 416); camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P); camRgb->setInterleaved(false); camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR); camRgb->setFps(40); // Network specific settings detectionNetwork->setConfidenceThreshold(0.5f); detectionNetwork->setNumClasses(80); detectionNetwork->setCoordinateSize(4); detectionNetwork->setAnchors({10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319}); detectionNetwork->setAnchorMasks({{"side26", {1, 2, 3}}, {"side13", {3, 4, 5}}}); detectionNetwork->setIouThreshold(0.5f); detectionNetwork->setBlobPath(nnPath); detectionNetwork->setNumInferenceThreads(2); detectionNetwork->input.setBlocking(false); // Linking camRgb->preview.link(detectionNetwork->input); if(syncNN) { detectionNetwork->passthrough.link(xoutRgb->input); } else { camRgb->preview.link(xoutRgb->input); } detectionNetwork->out.link(nnOut->input); // Connect to device and start pipeline dai::Device device(pipeline); // Output queues will be used to get the rgb frames and nn data from the outputs defined above auto qRgb = device.getOutputQueue("rgb", 4, false); auto qDet = device.getOutputQueue("detections", 4, false); cv::Mat frame; std::vector detections; auto startTime = steady_clock::now(); int counter = 0; float fps = 0; auto color2 = cv::Scalar(255, 255, 255); // Add bounding boxes and text to the frame and show it to the user auto displayFrame = [](std::string name, cv::Mat frame, std::vector& detections) { auto color = cv::Scalar(255, 0, 0); // nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height for(auto& detection : detections) { int x1 = detection.xmin * frame.cols; int y1 = detection.ymin * frame.rows; int x2 = detection.xmax * frame.cols; int y2 = detection.ymax * frame.rows; uint32_t labelIndex = detection.label; std::string labelStr = to_string(labelIndex); if(labelIndex < labelMap.size()) { labelStr = labelMap[labelIndex]; } cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); std::stringstream confStr; confStr << std::fixed << std::setprecision(2) << detection.confidence * 100; cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX); } // Show the frame cv::imshow(name, frame); }; while(true) { std::shared_ptr inRgb; std::shared_ptr inDet; if(syncNN) { inRgb = qRgb->get(); inDet = qDet->get(); } else { inRgb = qRgb->tryGet(); inDet = qDet->tryGet(); } counter++; auto currentTime = steady_clock::now(); auto elapsed = duration_cast>(currentTime - startTime); if(elapsed > seconds(1)) { fps = counter / elapsed.count(); counter = 0; startTime = currentTime; } if(inRgb) { frame = inRgb->getCvFrame(); std::stringstream fpsStr; fpsStr << "NN fps: " << std::fixed << std::setprecision(2) << fps; cv::putText(frame, fpsStr.str(), cv::Point(2, inRgb->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color2); } if(inDet) { detections = inDet->detections; } if(!frame.empty()) { displayFrame("rgb", frame, detections); } int key = cv::waitKey(1); if(key == 'q' || key == 'Q') { return 0; } } return 0; }