#include #include "utility.hpp" // Includes common necessary includes for development using depthai library #include "depthai/depthai.hpp" static const std::vector labelMap = {"background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; static std::atomic fullFrameTracking{false}; int main(int argc, char** argv) { using namespace std; using namespace std::chrono; std::string nnPath(BLOB_PATH); // If path to blob specified, use that if(argc > 1) { nnPath = std::string(argv[1]); } // Print which blob we are using printf("Using blob at path: %s\n", nnPath.c_str()); // Create pipeline dai::Pipeline pipeline; // Define sources and outputs auto camRgb = pipeline.create(); auto spatialDetectionNetwork = pipeline.create(); auto monoLeft = pipeline.create(); auto monoRight = pipeline.create(); auto stereo = pipeline.create(); auto objectTracker = pipeline.create(); auto xoutRgb = pipeline.create(); auto trackerOut = pipeline.create(); xoutRgb->setStreamName("preview"); trackerOut->setStreamName("tracklets"); // Properties camRgb->setPreviewSize(300, 300); camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P); camRgb->setInterleaved(false); camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR); monoLeft->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P); monoLeft->setCamera("left"); monoRight->setResolution(dai::MonoCameraProperties::SensorResolution::THE_400_P); monoRight->setCamera("right"); // setting node configs stereo->setDefaultProfilePreset(dai::node::StereoDepth::PresetMode::HIGH_DENSITY); // Align depth map to the perspective of RGB camera, on which inference is done stereo->setDepthAlign(dai::CameraBoardSocket::CAM_A); stereo->setOutputSize(monoLeft->getResolutionWidth(), monoLeft->getResolutionHeight()); spatialDetectionNetwork->setBlobPath(nnPath); spatialDetectionNetwork->setConfidenceThreshold(0.5f); spatialDetectionNetwork->input.setBlocking(false); spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5); spatialDetectionNetwork->setDepthLowerThreshold(100); spatialDetectionNetwork->setDepthUpperThreshold(5000); objectTracker->setDetectionLabelsToTrack({15}); // track only person // possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF objectTracker->setTrackerType(dai::TrackerType::ZERO_TERM_COLOR_HISTOGRAM); // take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID objectTracker->setTrackerIdAssignmentPolicy(dai::TrackerIdAssignmentPolicy::SMALLEST_ID); // Linking monoLeft->out.link(stereo->left); monoRight->out.link(stereo->right); camRgb->preview.link(spatialDetectionNetwork->input); objectTracker->passthroughTrackerFrame.link(xoutRgb->input); objectTracker->out.link(trackerOut->input); if(fullFrameTracking) { camRgb->setPreviewKeepAspectRatio(false); camRgb->video.link(objectTracker->inputTrackerFrame); objectTracker->inputTrackerFrame.setBlocking(false); // do not block the pipeline if it's too slow on full frame objectTracker->inputTrackerFrame.setQueueSize(2); } else { spatialDetectionNetwork->passthrough.link(objectTracker->inputTrackerFrame); } spatialDetectionNetwork->passthrough.link(objectTracker->inputDetectionFrame); spatialDetectionNetwork->out.link(objectTracker->inputDetections); stereo->depth.link(spatialDetectionNetwork->inputDepth); // Connect to device and start pipeline dai::Device device(pipeline); auto preview = device.getOutputQueue("preview", 4, false); auto tracklets = device.getOutputQueue("tracklets", 4, false); auto startTime = steady_clock::now(); int counter = 0; float fps = 0; auto color = cv::Scalar(255, 255, 255); while(true) { auto imgFrame = preview->get(); auto track = tracklets->get(); counter++; auto currentTime = steady_clock::now(); auto elapsed = duration_cast>(currentTime - startTime); if(elapsed > seconds(1)) { fps = counter / elapsed.count(); counter = 0; startTime = currentTime; } cv::Mat frame = imgFrame->getCvFrame(); auto trackletsData = track->tracklets; for(auto& t : trackletsData) { auto roi = t.roi.denormalize(frame.cols, frame.rows); int x1 = roi.topLeft().x; int y1 = roi.topLeft().y; int x2 = roi.bottomRight().x; int y2 = roi.bottomRight().y; uint32_t labelIndex = t.label; std::string labelStr = to_string(labelIndex); if(labelIndex < labelMap.size()) { labelStr = labelMap[labelIndex]; } cv::putText(frame, labelStr, cv::Point(x1 + 10, y1 + 20), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); std::stringstream idStr; idStr << "ID: " << t.id; cv::putText(frame, idStr.str(), cv::Point(x1 + 10, y1 + 35), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); std::stringstream statusStr; statusStr << "Status: " << t.status; cv::putText(frame, statusStr.str(), cv::Point(x1 + 10, y1 + 50), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); std::stringstream depthX; depthX << "X: " << (int)t.spatialCoordinates.x << " mm"; cv::putText(frame, depthX.str(), cv::Point(x1 + 10, y1 + 65), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); std::stringstream depthY; depthY << "Y: " << (int)t.spatialCoordinates.y << " mm"; cv::putText(frame, depthY.str(), cv::Point(x1 + 10, y1 + 80), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); std::stringstream depthZ; depthZ << "Z: " << (int)t.spatialCoordinates.z << " mm"; cv::putText(frame, depthZ.str(), cv::Point(x1 + 10, y1 + 95), cv::FONT_HERSHEY_TRIPLEX, 0.5, 255); cv::rectangle(frame, cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2)), color, cv::FONT_HERSHEY_SIMPLEX); } std::stringstream fpsStr; fpsStr << "NN fps: " << std::fixed << std::setprecision(2) << fps; cv::putText(frame, fpsStr.str(), cv::Point(2, imgFrame->getHeight() - 4), cv::FONT_HERSHEY_TRIPLEX, 0.4, color); cv::imshow("tracker", frame); int key = cv::waitKey(1); if(key == 'q') { return 0; } } return 0; }