Given an input image forward the image through the YOLO model to run inference on the PyTorch model, then parse and repackage the output tensor data into a vector of easy-to-use Detection structs.
309 {
310
311 torch::set_num_threads(1);
312
313 std::vector<Detection> vObjects;
314
315
316 torch::Tensor trTensorImage =
PreprocessImage(cvInputFrame, m_trDevice);
317
318
319 std::vector<torch::jit::IValue> vInputs;
320 vInputs.push_back(trTensorImage);
321 torch::Tensor trOutputTensor;
322 try
323 {
324 trOutputTensor = m_trModel.forward(vInputs).toTensor();
325 }
326 catch (const c10::Error& trError)
327 {
328 LOG_ERROR(logging::g_qSharedLogger, "Error running inference: {}", trError.what());
329 return vObjects;
330 }
331
332
333 int nImgSize = m_cvModelInputSize.
height;
334 int nP3Stride = std::pow((nImgSize / 8), 2);
335 int nP4Stride = std::pow((nImgSize / 16), 2);
336 int nP5Stride = std::pow((nImgSize / 32), 2);
337
338 int nYOLOv5AnchorsPerGridPoint = 3;
339 int nYOLOv8AnchorsPerGridPoint = 1;
340 int nYOLOv5TotalPredictionLength =
341 (nP3Stride * nYOLOv5AnchorsPerGridPoint) + (nP4Stride * nYOLOv5AnchorsPerGridPoint) + (nP5Stride * nYOLOv5AnchorsPerGridPoint);
342 int nYOLOv8TotalPredictionLength =
343 (nP3Stride * nYOLOv8AnchorsPerGridPoint) + (nP4Stride * nYOLOv8AnchorsPerGridPoint) + (nP5Stride * nYOLOv8AnchorsPerGridPoint);
344
345
346 std::vector<int> vClassIDs;
347 std::vector<std::string> vClassLabels;
348 std::vector<float> vClassConfidences;
349 std::vector<cv::Rect> vBoundingBoxes;
350
351
352 int nLargestDimension = *std::max_element(trOutputTensor.sizes().begin(), trOutputTensor.sizes().end());
353
354 if (nLargestDimension == nYOLOv5TotalPredictionLength)
355 {
356
357 this->
ParseTensorOutputYOLOv5(trOutputTensor, vClassIDs, vClassConfidences, vBoundingBoxes, cvInputFrame.
size(), fMinObjectConfidence);
358 }
359
360 else if (nLargestDimension == nYOLOv8TotalPredictionLength)
361 {
362
363 this->
ParseTensorOutputYOLOv8(trOutputTensor, vClassIDs, vClassConfidences, vBoundingBoxes, cvInputFrame.
size(), fMinObjectConfidence);
364 }
365
366
367 NonMaxSuppression(vObjects, vClassIDs, vClassConfidences, vBoundingBoxes, fMinObjectConfidence, fNMSThreshold);
368
369
370 for (size_t nIter = 0; nIter < vObjects.size(); ++nIter)
371 {
372
373 if (vClassIDs[nIter] >= 0 && vClassIDs[nIter] < static_cast<int>(m_vClassLabels.size()))
374 {
375 vObjects[nIter].szClassName = m_vClassLabels[vClassIDs[nIter]];
376 }
377 else
378 {
379 vObjects[nIter].szClassName = "UnknownClass";
380 }
381 }
382
383 return vObjects;
384 }
void ParseTensorOutputYOLOv8(const torch::Tensor &trOutput, std::vector< int > &vClassIDs, std::vector< float > &vClassConfidences, std::vector< cv::Rect > &vBoundingBoxes, const cv::Size &cvInputFrameSize, const float fMinObjectConfidence)
Given a tensor output from a YOLOv5 model, parse it's output into something more usable.
Definition YOLOModel.hpp:550
void ParseTensorOutputYOLOv5(const torch::Tensor &trOutput, std::vector< int > &vClassIDs, std::vector< float > &vClassConfidences, std::vector< cv::Rect > &vBoundingBoxes, const cv::Size &cvInputFrameSize, const float fMinObjectConfidence)
Given a tensor output from a YOLOv5 model, parse it's output into something more usable.
Definition YOLOModel.hpp:442
torch::Tensor PreprocessImage(const cv::Mat &cvInputFrame, const torch::Device &trDevice)
Given an input image, preprocess the image to match the input tensor shape of the model,...
Definition YOLOModel.hpp:413
void NonMaxSuppression(std::vector< Detection > &vObjects, std::vector< int > &vClassIDs, std::vector< float > &vClassConfidences, std::vector< cv::Rect > &vBoundingBoxes, float fMinObjectConfidence, float fNMSThreshold)
Perform non max suppression for the given predictions. This eliminates/combines predictions that over...
Definition YOLOModel.hpp:69