#include #include #include "utility.hpp" // Includes common necessary includes for development using depthai library #include "depthai/depthai.hpp" static const std::vector labelMap = {"", "person"}; static std::atomic fullFrameTracking{false}; int main(int argc, char** argv) { using namespace std; using namespace std::chrono; std::string nnPath(BLOB_PATH); std::string videoPath(VIDEO_PATH); // If path to blob specified, use that if(argc > 2) { nnPath = std::string(argv[1]); videoPath = std::string(argv[2]); } // Print which blob we are using printf("Using blob at path: %s\n", nnPath.c_str()); printf("Using video at path: %s\n", videoPath.c_str()); // Create pipeline dai::Pipeline pipeline; // Define sources and outputs auto manip = pipeline.create(); auto objectTracker = pipeline.create(); auto detectionNetwork = pipeline.create(); auto manipOut = pipeline.create(); auto xinFrame = pipeline.create(); auto trackerOut = pipeline.create(); auto xlinkOut = pipeline.create(); auto nnOut = pipeline.create(); manipOut->setStreamName("manip"); xinFrame->setStreamName("inFrame"); xlinkOut->setStreamName("trackerFrame"); trackerOut->setStreamName("tracklets"); nnOut->setStreamName("nn"); // Properties xinFrame->setMaxDataSize(1920 * 1080 * 3); manip->initialConfig.setResizeThumbnail(544, 320); // manip->initialConfig.setResize(384, 384); // manip->initialConfig.setKeepAspectRatio(false); //squash the image to not lose FOV // The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case) manip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p); manip->inputImage.setBlocking(true); // setting node configs detectionNetwork->setBlobPath(nnPath); detectionNetwork->setConfidenceThreshold(0.5); detectionNetwork->input.setBlocking(true); objectTracker->inputTrackerFrame.setBlocking(true); objectTracker->inputDetectionFrame.setBlocking(true); objectTracker->inputDetections.setBlocking(true); objectTracker->setDetectionLabelsToTrack({1}); // track only person // possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF objectTracker->setTrackerType(dai::TrackerType::ZERO_TERM_COLOR_HISTOGRAM); // take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID objectTracker->setTrackerIdAssignmentPolicy(dai::TrackerIdAssignmentPolicy::SMALLEST_ID); // Linking manip->out.link(manipOut->input); manip->out.link(detectionNetwork->input); xinFrame->out.link(manip->inputImage); xinFrame->out.link(objectTracker->inputTrackerFrame); detectionNetwork->out.link(nnOut->input); detectionNetwork->out.link(objectTracker->inputDetections); detectionNetwork->passthrough.link(objectTracker->inputDetectionFrame); objectTracker->out.link(trackerOut->input); objectTracker->passthroughTrackerFrame.link(xlinkOut->input); // Connect to device and start pipeline dai::Device device(pipeline); auto qIn = device.getInputQueue("inFrame", 4); auto trackerFrameQ = device.getOutputQueue("trackerFrame", 4); auto tracklets = device.getOutputQueue("tracklets", 4); auto qManip = device.getOutputQueue("manip", 4); auto qDet = device.getOutputQueue("nn", 4); auto startTime = steady_clock::now(); int counter = 0; float fps = 0; cv::Mat frame; cv::Mat manipFrame; std::vector detections; // Add bounding boxes and text to the frame and show it to the user auto displayFrame = [](std::string name, cv::Mat frame, std::vector& detections) { auto color = cv::Scalar(255, 0, 0); // nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height for(auto& detection : detections) { int x1 = detection.xmin * frame.cols; int y1 = detection.ymin * frame.rows; int x2 = detection.xmax * frame.cols; int y2 = detection.ymax * frame.rows; uint32_t labelIndex = detection.label; std::string labelStr = to_string(labelIndex); if(labelIndex < labelMap.size()) { labelStr = labelMap[labelIndex]; } cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color); std::stringstream confStr; confStr << std::fixed << std::setprecision(2) << detection.confidence * 100; cv::putText(frame, confStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color); cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX); } // Show the frame cv::imshow(name, frame); }; cv::VideoCapture cap(videoPath); auto baseTs = steady_clock::now(); float simulatedFps = 30; while(cap.isOpened()) { // Read frame from video cap >> frame; if(frame.empty()) break; auto img = std::make_shared(); frame = resizeKeepAspectRatio(frame, cv::Size(1920, 1080), cv::Scalar(0)); toPlanar(frame, img->getData()); img->setTimestamp(baseTs); baseTs += steady_clock::duration(static_cast((1000 * 1000 * 1000 / simulatedFps))); img->setWidth(1920); img->setHeight(1080); img->setType(dai::ImgFrame::Type::BGR888p); qIn->send(img); auto trackFrame = trackerFrameQ->tryGet(); if(!trackFrame) { continue; } auto track = tracklets->get(); auto inManip = qManip->get(); auto inDet = qDet->get(); counter++; auto currentTime = steady_clock::now(); auto elapsed = duration_cast>(currentTime - startTime); if(elapsed > seconds(1)) { fps = counter / elapsed.count(); counter = 0; startTime = currentTime; } detections = inDet->detections; manipFrame = inManip->getCvFrame(); displayFrame("nn", manipFrame, detections); auto color = cv::Scalar(255, 0, 0); cv::Mat trackerFrame = trackFrame->getCvFrame(); auto trackletsData = track->tracklets; for(auto& t : trackletsData) { auto roi = t.roi.denormalize(trackerFrame.cols, trackerFrame.rows); int x1 = roi.topLeft().x; int y1 = roi.topLeft().y; int x2 = roi.bottomRight().x; int y2 = roi.bottomRight().y; uint32_t labelIndex = t.label; std::string labelStr = to_string(labelIndex); if(labelIndex < labelMap.size()) { labelStr = labelMap[labelIndex]; } cv::putText(trackerFrame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, color); std::stringstream idStr; idStr << "ID: " << t.id; cv::putText(trackerFrame, idStr.str(), cv::Point(x1 + 10, y1 + 40), cv::FONT_HERSHEY_TRIPLEX, 0.5, color); std::stringstream statusStr; statusStr << "Status: " << t.status; cv::putText(trackerFrame, statusStr.str(), cv::Point(x1 + 10, y1 + 60), cv::FONT_HERSHEY_TRIPLEX, 0.5, color); cv::rectangle(trackerFrame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX); } std::stringstream fpsStr; fpsStr << "NN fps:" << std::fixed << std::setprecision(2) << fps; cv::putText(trackerFrame, fpsStr.str(), cv::Point(2, trackFrame->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color); cv::imshow("tracker", trackerFrame); int key = cv::waitKey(1); if(key == 'q' || key == 'Q') { return 0; } } return 0; }