Jetson Inference
DNN Vision Library

Abstract class for loading a tensor network with TensorRT. More...

#include <tensorNet.h>

Inheritance diagram for tensorNet:
actionNet backgroundNet depthNet detectNet imageNet poseNet segNet

Classes

struct  layerInfo
 
class  Logger
 Logger class for GIE info/warning/errors. More...
 
class  Profiler
 Profiler interface for measuring layer timings. More...
 

Public Member Functions

virtual ~tensorNet ()
 Destory. More...
 
bool LoadNetwork (const char *prototxt, const char *model, const char *mean=NULL, const char *input_blob="data", const char *output_blob="prob", uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
 Load a new network instance. More...
 
bool LoadNetwork (const char *prototxt, const char *model, const char *mean, const char *input_blob, const std::vector< std::string > &output_blobs, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
 Load a new network instance with multiple output layers. More...
 
bool LoadNetwork (const char *prototxt, const char *model, const char *mean, const std::vector< std::string > &input_blobs, const std::vector< std::string > &output_blobs, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
 Load a new network instance with multiple input layers. More...
 
bool LoadNetwork (const char *prototxt, const char *model, const char *mean, const char *input_blob, const Dims3 &input_dims, const std::vector< std::string > &output_blobs, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
 Load a new network instance (this variant is used for UFF models) More...
 
bool LoadNetwork (const char *prototxt, const char *model, const char *mean, const std::vector< std::string > &input_blobs, const std::vector< Dims3 > &input_dims, const std::vector< std::string > &output_blobs, uint32_t maxBatchSize=DEFAULT_MAX_BATCH_SIZE, precisionType precision=TYPE_FASTEST, deviceType device=DEVICE_GPU, bool allowGPUFallback=true, nvinfer1::IInt8Calibrator *calibrator=NULL, cudaStream_t stream=NULL)
 Load a new network instance with multiple input layers (used for UFF models) More...
 
bool LoadEngine (const char *engine_filename, const std::vector< std::string > &input_blobs, const std::vector< std::string > &output_blobs, nvinfer1::IPluginFactory *pluginFactory=NULL, deviceType device=DEVICE_GPU, cudaStream_t stream=NULL)
 Load a network instance from a serialized engine plan file. More...
 
bool LoadEngine (char *engine_stream, size_t engine_size, const std::vector< std::string > &input_blobs, const std::vector< std::string > &output_blobs, nvinfer1::IPluginFactory *pluginFactory=NULL, deviceType device=DEVICE_GPU, cudaStream_t stream=NULL)
 Load a network instance from a serialized engine plan file. More...
 
bool LoadEngine (nvinfer1::ICudaEngine *engine, const std::vector< std::string > &input_blobs, const std::vector< std::string > &output_blobs, deviceType device=DEVICE_GPU, cudaStream_t stream=NULL)
 Load network resources from an existing TensorRT engine instance. More...
 
bool LoadEngine (const char *filename, char **stream, size_t *size)
 Load a serialized engine plan file into memory. More...
 
void EnableLayerProfiler ()
 Manually enable layer profiling times. More...
 
void EnableDebug ()
 Manually enable debug messages and synchronization. More...
 
bool AllowGPUFallback () const
 Return true if GPU fallback is enabled. More...
 
deviceType GetDevice () const
 Retrieve the device being used for execution. More...
 
precisionType GetPrecision () const
 Retrieve the type of precision being used. More...
 
bool IsPrecision (precisionType type) const
 Check if a particular precision is being used. More...
 
cudaStream_t GetStream () const
 Retrieve the stream that the device is operating on. More...
 
cudaStream_t CreateStream (bool nonBlocking=true)
 Create and use a new stream for execution. More...
 
void SetStream (cudaStream_t stream)
 Set the stream that the device is operating on. More...
 
const char * GetPrototxtPath () const
 Retrieve the path to the network prototxt file. More...
 
const char * GetModelPath () const
 Retrieve the full path to model file, including the filename. More...
 
const char * GetModelFilename () const
 Retrieve the filename of the file, excluding the directory. More...
 
modelType GetModelType () const
 Retrieve the format of the network model. More...
 
bool IsModelType (modelType type) const
 Return true if the model is of the specified format. More...
 
uint32_t GetInputLayers () const
 Retrieve the number of input layers to the network. More...
 
uint32_t GetOutputLayers () const
 Retrieve the number of output layers to the network. More...
 
Dims3 GetInputDims (uint32_t layer=0) const
 Retrieve the dimensions of network input layer. More...
 
uint32_t GetInputWidth (uint32_t layer=0) const
 Retrieve the width of network input layer. More...
 
uint32_t GetInputHeight (uint32_t layer=0) const
 Retrieve the height of network input layer. More...
 
uint32_t GetInputSize (uint32_t layer=0) const
 Retrieve the size (in bytes) of network input layer. More...
 
float * GetInputPtr (uint32_t layer=0) const
 Get the CUDA pointer to the input layer's memory. More...
 
Dims3 GetOutputDims (uint32_t layer=0) const
 Retrieve the dimensions of network output layer. More...
 
uint32_t GetOutputWidth (uint32_t layer=0) const
 Retrieve the width of network output layer. More...
 
uint32_t GetOutputHeight (uint32_t layer=0) const
 Retrieve the height of network output layer. More...
 
uint32_t GetOutputSize (uint32_t layer=0) const
 Retrieve the size (in bytes) of network output layer. More...
 
float * GetOutputPtr (uint32_t layer=0) const
 Get the CUDA pointer to the output memory. More...
 
float GetNetworkFPS ()
 Retrieve the network frames per second (FPS). More...
 
float GetNetworkTime ()
 Retrieve the network runtime (in milliseconds). More...
 
const char * GetNetworkName () const
 Retrieve the network name (it's filename). More...
 
float2 GetProfilerTime (profilerQuery query)
 Retrieve the profiler runtime (in milliseconds). More...
 
float GetProfilerTime (profilerQuery query, profilerDevice device)
 Retrieve the profiler runtime (in milliseconds). More...
 
void PrintProfilerTimes ()
 Print the profiler times (in millseconds). More...
 

Static Public Member Functions

static bool LoadClassLabels (const char *filename, std::vector< std::string > &descriptions, int expectedClasses=-1)
 Load class descriptions from a label file. More...
 
static bool LoadClassLabels (const char *filename, std::vector< std::string > &descriptions, std::vector< std::string > &synsets, int expectedClasses=-1)
 Load class descriptions and synset strings from a label file. More...
 
static bool LoadClassColors (const char *filename, float4 *colors, int expectedClasses, float defaultAlpha=255.0f)
 Load class colors from a text file. More...
 
static bool LoadClassColors (const char *filename, float4 **colors, int expectedClasses, float defaultAlpha=255.0f)
 Load class colors from a text file. More...
 
static float4 GenerateColor (uint32_t classID, float alpha=255.0f)
 Procedurally generate a color for a given class index with the specified alpha value. More...
 
static precisionType SelectPrecision (precisionType precision, deviceType device=DEVICE_GPU, bool allowInt8=true)
 Resolve a desired precision to a specific one that's available. More...
 
static precisionType FindFastestPrecision (deviceType device=DEVICE_GPU, bool allowInt8=true)
 Determine the fastest native precision on a device. More...
 
static std::vector< precisionTypeDetectNativePrecisions (deviceType device=DEVICE_GPU)
 Detect the precisions supported natively on a device. More...
 
static bool DetectNativePrecision (const std::vector< precisionType > &nativeTypes, precisionType type)
 Detect if a particular precision is supported natively. More...
 
static bool DetectNativePrecision (precisionType precision, deviceType device=DEVICE_GPU)
 Detect if a particular precision is supported natively. More...
 

Protected Member Functions

 tensorNet ()
 Constructor. More...
 
bool ProcessNetwork (bool sync=true)
 Execute processing of the network. More...
 
bool ProfileModel (const std::string &deployFile, const std::string &modelFile, const std::vector< std::string > &inputs, const std::vector< Dims3 > &inputDims, const std::vector< std::string > &outputs, uint32_t maxBatchSize, precisionType precision, deviceType device, bool allowGPUFallback, nvinfer1::IInt8Calibrator *calibrator, char **engineStream, size_t *engineSize)
 Create and output an optimized network model. More...
 
bool ConfigureBuilder (nvinfer1::IBuilder *builder, uint32_t maxBatchSize, uint32_t workspaceSize, precisionType precision, deviceType device, bool allowGPUFallback, nvinfer1::IInt8Calibrator *calibrator)
 Configure builder options. More...
 
bool ValidateEngine (const char *model_path, const char *cache_path, const char *checksum_path)
 Validate that the model already has a built TensorRT engine that exists and doesn't need updating. More...
 
void PROFILER_BEGIN (profilerQuery query)
 Begin a profiling query, before network is run. More...
 
void PROFILER_END (profilerQuery query)
 End a profiling query, after the network is run. More...
 
bool PROFILER_QUERY (profilerQuery query)
 Query the CUDA part of a profiler query. More...
 

Protected Attributes

tensorNet::Logger gLogger
 
tensorNet::Profiler gProfiler
 
std::string mPrototxtPath
 
std::string mModelPath
 
std::string mModelFile
 
std::string mMeanPath
 
std::string mCacheEnginePath
 
std::string mCacheCalibrationPath
 
std::string mChecksumPath
 
deviceType mDevice
 
precisionType mPrecision
 
modelType mModelType
 
cudaStream_t mStream
 
cudaEvent_t mEventsGPU [PROFILER_TOTAL *2]
 
timespec mEventsCPU [PROFILER_TOTAL *2]
 
nvinfer1::IRuntime * mInfer
 
nvinfer1::ICudaEngine * mEngine
 
nvinfer1::IExecutionContext * mContext
 
float2 mProfilerTimes [PROFILER_TOTAL+1]
 
uint32_t mProfilerQueriesUsed
 
uint32_t mProfilerQueriesDone
 
uint32_t mWorkspaceSize
 
uint32_t mMaxBatchSize
 
bool mEnableProfiler
 
bool mEnableDebug
 
bool mAllowGPUFallback
 
void ** mBindings
 
std::vector< layerInfomInputs
 
std::vector< layerInfomOutputs
 

Detailed Description

Abstract class for loading a tensor network with TensorRT.

For example implementations,

See also
imageNet and
detectNet

Constructor & Destructor Documentation

◆ ~tensorNet()

virtual tensorNet::~tensorNet ( )
virtual

Destory.

◆ tensorNet()

tensorNet::tensorNet ( )
protected

Constructor.

Member Function Documentation

◆ AllowGPUFallback()

bool tensorNet::AllowGPUFallback ( ) const
inline

Return true if GPU fallback is enabled.

◆ ConfigureBuilder()

bool tensorNet::ConfigureBuilder ( nvinfer1::IBuilder *  builder,
uint32_t  maxBatchSize,
uint32_t  workspaceSize,
precisionType  precision,
deviceType  device,
bool  allowGPUFallback,
nvinfer1::IInt8Calibrator *  calibrator 
)
protected

Configure builder options.

◆ CreateStream()

cudaStream_t tensorNet::CreateStream ( bool  nonBlocking = true)

Create and use a new stream for execution.

◆ DetectNativePrecision() [1/2]

static bool tensorNet::DetectNativePrecision ( const std::vector< precisionType > &  nativeTypes,
precisionType  type 
)
static

Detect if a particular precision is supported natively.

◆ DetectNativePrecision() [2/2]

static bool tensorNet::DetectNativePrecision ( precisionType  precision,
deviceType  device = DEVICE_GPU 
)
static

Detect if a particular precision is supported natively.

◆ DetectNativePrecisions()

static std::vector<precisionType> tensorNet::DetectNativePrecisions ( deviceType  device = DEVICE_GPU)
static

Detect the precisions supported natively on a device.

◆ EnableDebug()

void tensorNet::EnableDebug ( )

Manually enable debug messages and synchronization.

◆ EnableLayerProfiler()

void tensorNet::EnableLayerProfiler ( )

Manually enable layer profiling times.


◆ FindFastestPrecision()

static precisionType tensorNet::FindFastestPrecision ( deviceType  device = DEVICE_GPU,
bool  allowInt8 = true 
)
static

Determine the fastest native precision on a device.

◆ GenerateColor()

static float4 tensorNet::GenerateColor ( uint32_t  classID,
float  alpha = 255.0f 
)
static

Procedurally generate a color for a given class index with the specified alpha value.

This function can be used to generate a range of colors when a colors.txt file isn't available.

◆ GetDevice()

deviceType tensorNet::GetDevice ( ) const
inline

Retrieve the device being used for execution.

◆ GetInputDims()

Dims3 tensorNet::GetInputDims ( uint32_t  layer = 0) const
inline

Retrieve the dimensions of network input layer.

◆ GetInputHeight()

uint32_t tensorNet::GetInputHeight ( uint32_t  layer = 0) const
inline

Retrieve the height of network input layer.

◆ GetInputLayers()

uint32_t tensorNet::GetInputLayers ( ) const
inline

Retrieve the number of input layers to the network.

◆ GetInputPtr()

float* tensorNet::GetInputPtr ( uint32_t  layer = 0) const
inline

Get the CUDA pointer to the input layer's memory.

◆ GetInputSize()

uint32_t tensorNet::GetInputSize ( uint32_t  layer = 0) const
inline

Retrieve the size (in bytes) of network input layer.

◆ GetInputWidth()

uint32_t tensorNet::GetInputWidth ( uint32_t  layer = 0) const
inline

Retrieve the width of network input layer.

◆ GetModelFilename()

const char* tensorNet::GetModelFilename ( ) const
inline

Retrieve the filename of the file, excluding the directory.

◆ GetModelPath()

const char* tensorNet::GetModelPath ( ) const
inline

Retrieve the full path to model file, including the filename.

◆ GetModelType()

modelType tensorNet::GetModelType ( ) const
inline

Retrieve the format of the network model.

◆ GetNetworkFPS()

float tensorNet::GetNetworkFPS ( )
inline

Retrieve the network frames per second (FPS).

◆ GetNetworkName()

const char* tensorNet::GetNetworkName ( ) const
inline

Retrieve the network name (it's filename).

◆ GetNetworkTime()

float tensorNet::GetNetworkTime ( )
inline

Retrieve the network runtime (in milliseconds).

◆ GetOutputDims()

Dims3 tensorNet::GetOutputDims ( uint32_t  layer = 0) const
inline

Retrieve the dimensions of network output layer.

◆ GetOutputHeight()

uint32_t tensorNet::GetOutputHeight ( uint32_t  layer = 0) const
inline

Retrieve the height of network output layer.

◆ GetOutputLayers()

uint32_t tensorNet::GetOutputLayers ( ) const
inline

Retrieve the number of output layers to the network.

◆ GetOutputPtr()

float* tensorNet::GetOutputPtr ( uint32_t  layer = 0) const
inline

Get the CUDA pointer to the output memory.

◆ GetOutputSize()

uint32_t tensorNet::GetOutputSize ( uint32_t  layer = 0) const
inline

Retrieve the size (in bytes) of network output layer.

◆ GetOutputWidth()

uint32_t tensorNet::GetOutputWidth ( uint32_t  layer = 0) const
inline

Retrieve the width of network output layer.

◆ GetPrecision()

precisionType tensorNet::GetPrecision ( ) const
inline

Retrieve the type of precision being used.

◆ GetProfilerTime() [1/2]

float2 tensorNet::GetProfilerTime ( profilerQuery  query)
inline

Retrieve the profiler runtime (in milliseconds).

◆ GetProfilerTime() [2/2]

float tensorNet::GetProfilerTime ( profilerQuery  query,
profilerDevice  device 
)
inline

Retrieve the profiler runtime (in milliseconds).

◆ GetPrototxtPath()

const char* tensorNet::GetPrototxtPath ( ) const
inline

Retrieve the path to the network prototxt file.

◆ GetStream()

cudaStream_t tensorNet::GetStream ( ) const
inline

Retrieve the stream that the device is operating on.

◆ IsModelType()

bool tensorNet::IsModelType ( modelType  type) const
inline

Return true if the model is of the specified format.

◆ IsPrecision()

bool tensorNet::IsPrecision ( precisionType  type) const
inline

Check if a particular precision is being used.

◆ LoadClassColors() [1/2]

static bool tensorNet::LoadClassColors ( const char *  filename,
float4 **  colors,
int  expectedClasses,
float  defaultAlpha = 255.0f 
)
static

Load class colors from a text file.

If the number of expected colors aren't parsed, they will be generated. The float4 color array will automatically be allocated in shared CPU/GPU memory by cudaAllocMapped(). If a line in the text file only has RGB, then the defaultAlpha value will be used for the alpha channel.

◆ LoadClassColors() [2/2]

static bool tensorNet::LoadClassColors ( const char *  filename,
float4 *  colors,
int  expectedClasses,
float  defaultAlpha = 255.0f 
)
static

Load class colors from a text file.

If the number of expected colors aren't parsed, they will be generated. The float4 color array should be expectedClasses long, and would typically be in shared CPU/GPU memory. If a line in the text file only has RGB, then the defaultAlpha value will be used for the alpha channel.

◆ LoadClassLabels() [1/2]

static bool tensorNet::LoadClassLabels ( const char *  filename,
std::vector< std::string > &  descriptions,
int  expectedClasses = -1 
)
static

Load class descriptions from a label file.


Each line of the text file should include one class label (and optionally a synset). If the number of expected labels aren't parsed, they will be automatically generated.

◆ LoadClassLabels() [2/2]

static bool tensorNet::LoadClassLabels ( const char *  filename,
std::vector< std::string > &  descriptions,
std::vector< std::string > &  synsets,
int  expectedClasses = -1 
)
static

Load class descriptions and synset strings from a label file.

Each line of the text file should include one class label (and optionally a synset). If the number of expected labels aren't parsed, they will be automatically generated.

◆ LoadEngine() [1/4]

bool tensorNet::LoadEngine ( char *  engine_stream,
size_t  engine_size,
const std::vector< std::string > &  input_blobs,
const std::vector< std::string > &  output_blobs,
nvinfer1::IPluginFactory *  pluginFactory = NULL,
deviceType  device = DEVICE_GPU,
cudaStream_t  stream = NULL 
)

Load a network instance from a serialized engine plan file.

Parameters
engine_streamMemory containing the serialized engine plan file.
engine_sizeSize of the serialized engine stream (in bytes).
input_blobsList of names of the inputs blob data to the network.
output_blobsList of names of the output blobs from the network.

◆ LoadEngine() [2/4]

bool tensorNet::LoadEngine ( const char *  engine_filename,
const std::vector< std::string > &  input_blobs,
const std::vector< std::string > &  output_blobs,
nvinfer1::IPluginFactory *  pluginFactory = NULL,
deviceType  device = DEVICE_GPU,
cudaStream_t  stream = NULL 
)

Load a network instance from a serialized engine plan file.

Parameters
engine_filenamepath to the serialized engine plan file.
input_blobsList of names of the inputs blob data to the network.
output_blobsList of names of the output blobs from the network.

◆ LoadEngine() [3/4]

bool tensorNet::LoadEngine ( const char *  filename,
char **  stream,
size_t *  size 
)

Load a serialized engine plan file into memory.

◆ LoadEngine() [4/4]

bool tensorNet::LoadEngine ( nvinfer1::ICudaEngine *  engine,
const std::vector< std::string > &  input_blobs,
const std::vector< std::string > &  output_blobs,
deviceType  device = DEVICE_GPU,
cudaStream_t  stream = NULL 
)

Load network resources from an existing TensorRT engine instance.

Parameters
engine_streamMemory containing the serialized engine plan file.
engine_sizeSize of the serialized engine stream (in bytes).
input_blobsList of names of the inputs blob data to the network.
output_blobsList of names of the output blobs from the network.

◆ LoadNetwork() [1/5]

bool tensorNet::LoadNetwork ( const char *  prototxt,
const char *  model,
const char *  mean,
const char *  input_blob,
const Dims3 input_dims,
const std::vector< std::string > &  output_blobs,
uint32_t  maxBatchSize = DEFAULT_MAX_BATCH_SIZE,
precisionType  precision = TYPE_FASTEST,
deviceType  device = DEVICE_GPU,
bool  allowGPUFallback = true,
nvinfer1::IInt8Calibrator *  calibrator = NULL,
cudaStream_t  stream = NULL 
)

Load a new network instance (this variant is used for UFF models)

Parameters
prototxtFile path to the deployable network prototxt
modelFile path to the caffemodel
meanFile path to the mean value binary proto (NULL if none)
input_blobThe name of the input blob data to the network.
input_dimsThe dimensions of the input blob (used for UFF).
output_blobsList of names of the output blobs from the network.
maxBatchSizeThe maximum batch size that the network will be optimized for.

◆ LoadNetwork() [2/5]

bool tensorNet::LoadNetwork ( const char *  prototxt,
const char *  model,
const char *  mean,
const char *  input_blob,
const std::vector< std::string > &  output_blobs,
uint32_t  maxBatchSize = DEFAULT_MAX_BATCH_SIZE,
precisionType  precision = TYPE_FASTEST,
deviceType  device = DEVICE_GPU,
bool  allowGPUFallback = true,
nvinfer1::IInt8Calibrator *  calibrator = NULL,
cudaStream_t  stream = NULL 
)

Load a new network instance with multiple output layers.

Parameters
prototxtFile path to the deployable network prototxt
modelFile path to the caffemodel
meanFile path to the mean value binary proto (NULL if none)
input_blobThe name of the input blob data to the network.
output_blobsList of names of the output blobs from the network.
maxBatchSizeThe maximum batch size that the network will be optimized for.

◆ LoadNetwork() [3/5]

bool tensorNet::LoadNetwork ( const char *  prototxt,
const char *  model,
const char *  mean,
const std::vector< std::string > &  input_blobs,
const std::vector< Dims3 > &  input_dims,
const std::vector< std::string > &  output_blobs,
uint32_t  maxBatchSize = DEFAULT_MAX_BATCH_SIZE,
precisionType  precision = TYPE_FASTEST,
deviceType  device = DEVICE_GPU,
bool  allowGPUFallback = true,
nvinfer1::IInt8Calibrator *  calibrator = NULL,
cudaStream_t  stream = NULL 
)

Load a new network instance with multiple input layers (used for UFF models)

Parameters
prototxtFile path to the deployable network prototxt
modelFile path to the caffemodel
meanFile path to the mean value binary proto (NULL if none)
input_blobsList of names of the inputs blob data to the network.
input_dimsList of the dimensions of the input blobs (used for UFF).
output_blobsList of names of the output blobs from the network.
maxBatchSizeThe maximum batch size that the network will be optimized for.

◆ LoadNetwork() [4/5]

bool tensorNet::LoadNetwork ( const char *  prototxt,
const char *  model,
const char *  mean,
const std::vector< std::string > &  input_blobs,
const std::vector< std::string > &  output_blobs,
uint32_t  maxBatchSize = DEFAULT_MAX_BATCH_SIZE,
precisionType  precision = TYPE_FASTEST,
deviceType  device = DEVICE_GPU,
bool  allowGPUFallback = true,
nvinfer1::IInt8Calibrator *  calibrator = NULL,
cudaStream_t  stream = NULL 
)

Load a new network instance with multiple input layers.

Parameters
prototxtFile path to the deployable network prototxt
modelFile path to the caffemodel
meanFile path to the mean value binary proto (NULL if none)
input_blobsList of names of the inputs blob data to the network.
output_blobsList of names of the output blobs from the network.
maxBatchSizeThe maximum batch size that the network will be optimized for.

◆ LoadNetwork() [5/5]

bool tensorNet::LoadNetwork ( const char *  prototxt,
const char *  model,
const char *  mean = NULL,
const char *  input_blob = "data",
const char *  output_blob = "prob",
uint32_t  maxBatchSize = DEFAULT_MAX_BATCH_SIZE,
precisionType  precision = TYPE_FASTEST,
deviceType  device = DEVICE_GPU,
bool  allowGPUFallback = true,
nvinfer1::IInt8Calibrator *  calibrator = NULL,
cudaStream_t  stream = NULL 
)

Load a new network instance.

Parameters
prototxtFile path to the deployable network prototxt
modelFile path to the caffemodel
meanFile path to the mean value binary proto (NULL if none)
input_blobThe name of the input blob data to the network.
output_blobThe name of the output blob data from the network.
maxBatchSizeThe maximum batch size that the network will be optimized for.

◆ PrintProfilerTimes()

void tensorNet::PrintProfilerTimes ( )
inline

Print the profiler times (in millseconds).

◆ ProcessNetwork()

bool tensorNet::ProcessNetwork ( bool  sync = true)
protected

Execute processing of the network.

Parameters
syncif true (default), the device will be synchronized after processing and the thread/function will block until processing is complete. if false, the function will return immediately after the processing has been enqueued to the CUDA stream indicated by GetStream().

◆ ProfileModel()

bool tensorNet::ProfileModel ( const std::string &  deployFile,
const std::string &  modelFile,
const std::vector< std::string > &  inputs,
const std::vector< Dims3 > &  inputDims,
const std::vector< std::string > &  outputs,
uint32_t  maxBatchSize,
precisionType  precision,
deviceType  device,
bool  allowGPUFallback,
nvinfer1::IInt8Calibrator *  calibrator,
char **  engineStream,
size_t *  engineSize 
)
protected

Create and output an optimized network model.

Note
this function is automatically used by LoadNetwork, but also can be used individually to perform the network operations offline.
Parameters
deployFilename for network prototxt
modelFilename for model
outputsnetwork outputs
maxBatchSizemaximum batch size
modelStreamoutput model stream

◆ PROFILER_BEGIN()

void tensorNet::PROFILER_BEGIN ( profilerQuery  query)
inlineprotected

Begin a profiling query, before network is run.

◆ PROFILER_END()

void tensorNet::PROFILER_END ( profilerQuery  query)
inlineprotected

End a profiling query, after the network is run.

◆ PROFILER_QUERY()

bool tensorNet::PROFILER_QUERY ( profilerQuery  query)
inlineprotected

Query the CUDA part of a profiler query.

◆ SelectPrecision()

static precisionType tensorNet::SelectPrecision ( precisionType  precision,
deviceType  device = DEVICE_GPU,
bool  allowInt8 = true 
)
static

Resolve a desired precision to a specific one that's available.

◆ SetStream()

void tensorNet::SetStream ( cudaStream_t  stream)

Set the stream that the device is operating on.

◆ ValidateEngine()

bool tensorNet::ValidateEngine ( const char *  model_path,
const char *  cache_path,
const char *  checksum_path 
)
protected

Validate that the model already has a built TensorRT engine that exists and doesn't need updating.

Member Data Documentation

◆ gLogger

tensorNet::Logger tensorNet::gLogger
protected

◆ gProfiler

tensorNet::Profiler tensorNet::gProfiler
protected

◆ mAllowGPUFallback

bool tensorNet::mAllowGPUFallback
protected

◆ mBindings

void** tensorNet::mBindings
protected

◆ mCacheCalibrationPath

std::string tensorNet::mCacheCalibrationPath
protected

◆ mCacheEnginePath

std::string tensorNet::mCacheEnginePath
protected

◆ mChecksumPath

std::string tensorNet::mChecksumPath
protected

◆ mContext

nvinfer1::IExecutionContext* tensorNet::mContext
protected

◆ mDevice

deviceType tensorNet::mDevice
protected

◆ mEnableDebug

bool tensorNet::mEnableDebug
protected

◆ mEnableProfiler

bool tensorNet::mEnableProfiler
protected

◆ mEngine

nvinfer1::ICudaEngine* tensorNet::mEngine
protected

◆ mEventsCPU

timespec tensorNet::mEventsCPU[PROFILER_TOTAL *2]
protected

◆ mEventsGPU

cudaEvent_t tensorNet::mEventsGPU[PROFILER_TOTAL *2]
protected

◆ mInfer

nvinfer1::IRuntime* tensorNet::mInfer
protected

◆ mInputs

std::vector<layerInfo> tensorNet::mInputs
protected

◆ mMaxBatchSize

uint32_t tensorNet::mMaxBatchSize
protected

◆ mMeanPath

std::string tensorNet::mMeanPath
protected

◆ mModelFile

std::string tensorNet::mModelFile
protected

◆ mModelPath

std::string tensorNet::mModelPath
protected

◆ mModelType

modelType tensorNet::mModelType
protected

◆ mOutputs

std::vector<layerInfo> tensorNet::mOutputs
protected

◆ mPrecision

precisionType tensorNet::mPrecision
protected

◆ mProfilerQueriesDone

uint32_t tensorNet::mProfilerQueriesDone
protected

◆ mProfilerQueriesUsed

uint32_t tensorNet::mProfilerQueriesUsed
protected

◆ mProfilerTimes

float2 tensorNet::mProfilerTimes[PROFILER_TOTAL+1]
protected

◆ mPrototxtPath

std::string tensorNet::mPrototxtPath
protected

◆ mStream

cudaStream_t tensorNet::mStream
protected

◆ mWorkspaceSize

uint32_t tensorNet::mWorkspaceSize
protected

The documentation for this class was generated from the following file: